diff options
Diffstat (limited to 'sys_util/poll_token_derive')
-rw-r--r-- | sys_util/poll_token_derive/Cargo.toml | 8 | ||||
-rw-r--r-- | sys_util/poll_token_derive/poll_token_derive.rs | 463 | ||||
-rw-r--r-- | sys_util/poll_token_derive/tests.rs | 257 |
3 files changed, 728 insertions, 0 deletions
diff --git a/sys_util/poll_token_derive/Cargo.toml b/sys_util/poll_token_derive/Cargo.toml new file mode 100644 index 0000000..f874ce5 --- /dev/null +++ b/sys_util/poll_token_derive/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "poll_token_derive" +version = "0.1.0" +authors = ["The Chromium OS Authors"] + +[lib] +proc-macro = true +path = "poll_token_derive.rs" diff --git a/sys_util/poll_token_derive/poll_token_derive.rs b/sys_util/poll_token_derive/poll_token_derive.rs new file mode 100644 index 0000000..7af2de5 --- /dev/null +++ b/sys_util/poll_token_derive/poll_token_derive.rs @@ -0,0 +1,463 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +extern crate proc_macro; + +use proc_macro::TokenStream; +use std::fmt::Write; +use std::mem; +use std::str::FromStr; + +#[cfg(test)] +mod tests; + +// This file is meant to be read from top to bottom to reflect how this code processes and generates +// Rust enum definitions and implementations. The algorithm overview: +// 1) Split the rust source by whitespace (`str::split_whitespace`). +// 2a) Attempt to tokenize each piece (see: `Tokenized`, all functions starting with `matches`). +// 2b) Feed the token to the `ParseState` (see `ParseState::handle_token`). +// 3) After the source is fully processed, the `ParseState` has an `EnumModel` representing the +// input enum. +// 4) Glue together an implementation of PollToken using a template. + +// A single token after splitting input source by white space and simple stateless matching. +#[derive(Clone, Debug, PartialEq)] +enum Tokenized { + // `enum` + Enum, + + // `pub' | `pub(crate)` + Visiblity, + + // `Hello`, `index`, `data,` + Ident(String), + + // `index:`, 'first:` + FieldIdent(String), + + // `Socket(u32)`, `Client(usize),`, + IdentAndType(String, String), + + // `{` + OpenBrace, + + // `}`, `},` + CloseBrace, +} + +// Attempts to match strings of the form "identifier" with optional trailing comma. +fn matches_ident(s: &str) -> Option<String> { + let ident = s.trim_right_matches(','); + if !ident.is_empty() && ident.chars().all(char::is_alphanumeric) { + Some(ident.to_owned()) + } else { + None + } +} + +// Attempts to match strings of the form "Identifier(Type)" with optional trailing comma. If the +// given string matches, the identifier and type are returned as a 2-tuple receptively. +fn matches_ident_and_type(s: &str) -> Option<(String, String)> { + let mut buffer = String::new(); + let mut ident = String::new(); + let mut type_ = String::new(); + let mut brace_depth = 0; + for c in s.chars() { + match c { + '(' if brace_depth == 0 && !buffer.is_empty() && ident.is_empty() => { + mem::swap(&mut ident, &mut buffer); + brace_depth += 1; + } + ')' if brace_depth == 1 && !buffer.is_empty() && type_.is_empty() => { + mem::swap(&mut type_, &mut buffer); + brace_depth -= 1; + } + ',' => {} + c if c.is_alphanumeric() => buffer.push(c), + _ => return None, + } + } + if !ident.is_empty() && !type_.is_empty() { + Some((ident, type_)) + } else { + None + } +} + +// Attempts to match strings of the form "identifier:". +fn matches_field_ident(s: &str) -> Option<String> { + let field_ident = s.trim_right_matches(':'); + if s.ends_with(':') && field_ident.chars().all(char::is_alphanumeric) { + Some(field_ident.to_owned()) + } else { + None + } +} + +impl Tokenized { + fn from_str(s: &str) -> Tokenized { + if s.starts_with("pub(") { + return Tokenized::Visiblity; + } + match s { + "enum" => Tokenized::Enum, + "pub" => Tokenized::Visiblity, + "{" => Tokenized::OpenBrace, + "}" | "}," => Tokenized::CloseBrace, + _ => { + // Try to match from most specific to least specific. + if let Some(ident) = matches_field_ident(s) { + Tokenized::FieldIdent(ident) + } else if let Some((ident, type_)) = matches_ident_and_type(s) { + Tokenized::IdentAndType(ident, type_) + } else if let Some(ident) = matches_ident(s) { + Tokenized::Ident(ident) + } else { + panic!("unable to parse token: {}", s) + } + } + } + } +} + +// Data field for an enum, with possible field name. +#[derive(Debug, PartialEq)] +struct EnumVariantData { + type_: String, + name: Option<String>, +} + +// Data for one variant of an enum, with optional single data field. +#[derive(Debug, PartialEq)] +struct EnumVariant { + name: String, + data: Option<EnumVariantData>, +} + +// Data for an entire enum type. +#[derive(Debug, Default, PartialEq)] +struct EnumModel { + name: String, + variants: Vec<EnumVariant>, +} +// Note: impl for EnumModel is below the parsing code and definitions because all of the methods are +// for generating the PollToken impl. + +// Labels for each of the states in the parsing state machine. The '->` symbol means that the given +// state may transition to the state pointed to. +#[derive(PartialEq, Debug)] +enum States { + // Initial state, expecting to see visibility rules (e.g. `pub`) or `enum` keyword. + Start, // -> Ident + + // Expect to see the name of the enum field. + Ident, // -> Brace + + // Expect to see an opening brace. + Brace, // -> VariantIdent, -> End + + // Expect to see a variant's name. + VariantIdent, // -> VariantIdent, -> VariantData, -> End + + // Expect to see the field name of a variant's data. + VariantData, // -> VariantIdent, -> VariantDataType + + // Expect to see the tye name of a variant's data. + VariantDataType, // -> VariantData + + // Expect to see no more tokens. + End, +} + +// The state machine for parsing a stream of `Tokenized`. After the States::End state is reached, a +// complete `EnumModel` is ready to be used for generating an implementation. +struct ParseState { + current_state: States, + current_variant: Option<EnumVariant>, + model: EnumModel, +} + +impl ParseState { + fn new() -> ParseState { + ParseState { + current_state: States::Start, + current_variant: Default::default(), + model: Default::default(), + } + } + + // Handles the next token in the stream of tokens. + fn handle_token(&mut self, tok: Tokenized) { + match self.current_state { + States::Start => self.handle_start(tok), + States::Ident => self.handle_ident(tok), + States::Brace => self.handle_brace(tok), + States::VariantIdent => self.handle_variant_ident(tok), + States::VariantData => self.handle_variant_data(tok), + States::VariantDataType => self.handle_variant_data_type(tok), + States::End => self.handle_end(tok), + } + } + + // All the following are handlers name after the current state that handle the next token. + + fn handle_start(&mut self, tok: Tokenized) { + self.current_state = match tok { + Tokenized::Enum => States::Ident, + Tokenized::Visiblity => States::Start, + _ => panic!("derives for enum types only"), + + }; + } + + fn handle_ident(&mut self, tok: Tokenized) { + self.current_state = match tok { + Tokenized::Ident(ident) => { + self.model.name = ident; + States::Brace + } + _ => panic!("unexpected token: {:?}", tok), + }; + } + + fn handle_brace(&mut self, tok: Tokenized) { + self.current_state = match tok { + Tokenized::OpenBrace => States::VariantIdent, + Tokenized::CloseBrace => States::End, + _ => panic!("unexpected token: {:?}", tok), + }; + } + + fn handle_variant_ident(&mut self, tok: Tokenized) { + // This handler is the most complex because it has the most branches for the new + // `current_state`. Adding to that complexity is that many branches indicate a new variant + // is being handled, which means the old `current_variant` needs to be added to `variants` + // and a fresh one needs to be started with the fresh data embedded in the token. + self.current_state = match tok { + Tokenized::Ident(ident) => { + let mut variant = Some(EnumVariant { + name: ident, + data: None, + }); + mem::swap(&mut variant, &mut self.current_variant); + if let Some(variant) = variant { + self.model.variants.push(variant); + } + States::VariantIdent + } + Tokenized::IdentAndType(ident, type_) => { + let variant_data = EnumVariantData { + type_: type_, + name: None, + }; + let mut variant = Some(EnumVariant { + name: ident, + data: Some(variant_data), + }); + mem::swap(&mut variant, &mut self.current_variant); + if let Some(variant) = variant { + self.model.variants.push(variant); + } + States::VariantIdent + } + Tokenized::OpenBrace => States::VariantData, + Tokenized::CloseBrace => { + let mut variant = Default::default(); + mem::swap(&mut variant, &mut self.current_variant); + if let Some(variant) = variant { + self.model.variants.push(variant); + } + States::End + } + _ => panic!("unexpected token: {:?}", tok), + }; + } + + fn handle_variant_data(&mut self, tok: Tokenized) { + let variant = self.current_variant.as_mut().unwrap(); + self.current_state = match tok { + Tokenized::FieldIdent(ident) => { + assert!(variant.data.is_none(), + "enum variant can only have one field"); + variant.data = Some(EnumVariantData { + type_: "".to_owned(), + name: Some(ident), + }); + States::VariantDataType + } + Tokenized::CloseBrace => States::VariantIdent, + _ => panic!("unexpected token: {:?}", tok), + }; + } + + fn handle_variant_data_type(&mut self, tok: Tokenized) { + let variant = self.current_variant.as_mut().unwrap(); + let variant_data = variant.data.as_mut().unwrap(); + self.current_state = match tok { + Tokenized::Ident(ident) => { + variant_data.type_ = ident; + States::VariantData + } + _ => panic!("unexpected token: {:?}", tok), + }; + } + + fn handle_end(&mut self, tok: Tokenized) { + panic!("unexpected tokens past ending brace: {:?}", tok); + } +} + +// Continued from the above `EnumModel` definition. All methods are used for generating PollToken +// implementation. The method for packing an enum into a u64 is as follows: +// 1) Reserve the lowest "ceil(log_2(x))" bits where x is the number of enum variants. +// 2) Store the enum variant's index (0-based index based on order in the enum definition) in +// reserved bits. +// 3) If there is data in the enum variant, store the data in remaining bits. +// The method for unpacking is as follows +// 1) Mask the raw token to just the reserved bits +// 2) Match the reserved bits to the enum variant token. +// 3) If the indicated enum variant had data, extract it from the unreserved bits. +impl EnumModel { + // Calculates the number of bits needed to store the variant index. Essentially the log base 2 + // of the number of variants, rounded up. + fn variant_bits(&self) -> u32 { + // The degenerate case of no variants. + if self.variants.is_empty() { + return 0; + } + let variant_count = self.variants.len(); + (mem::size_of_val(&variant_count) as u32 * 8) - (variant_count - 1).leading_zeros() + } + + // Generates the function body for `as_raw_token`. + fn generate_as_raw_token(&self) -> String { + let variant_bits = self.variant_bits(); + let mut match_statement = "match *self {\n".to_owned(); + + // Each iteration corresponds to one variant's match arm. + for (index, variant) in self.variants.iter().enumerate() { + // The capture string is for everything between the variant identifier and the `=>` in + // the match arm: the variant's data capture. + let capture = match variant.data.as_ref() { + Some(&EnumVariantData { name: Some(ref name), .. }) => { + format!("{{ {}: data }}", name) + } + Some(&EnumVariantData { .. }) => "(data)".to_owned(), + None => "".to_owned(), + }; + + // The modifier string ORs the variant index with extra bits from the variant data + // field. + let modifer = if variant.data.is_some() { + format!(" | ((data as u64) << {})", variant_bits) + } else { + "".to_owned() + }; + + // Assembly of the match arm. + write!(match_statement, + "{}::{}{} => {}{},\n", + self.name, + variant.name, + capture, + index, + modifer) + .unwrap(); + } + match_statement.push_str("}"); + match_statement + } + + // Generates the function body for `from_raw_token`. + fn generate_from_raw_token(&self) -> String { + let variant_bits = self.variant_bits(); + let variant_mask = (1 << variant_bits) - 1; + + // The match expression only matches the bits for the variant index. + let mut match_statement = format!("match data & 0x{:02x} {{\n", variant_mask); + + // Each iteration corresponds to one variant's match arm. + for (index, variant) in self.variants.iter().enumerate() { + // The data string is for extracting the enum variant's data bits out of the raw token + // data, which includes both variant index and data bits. + let data = match variant.data.as_ref() { + Some(&EnumVariantData { + name: Some(ref name), + ref type_, + }) => format!("{{ {}: (data >> {}) as {} }}", name, variant_bits, type_), + Some(&EnumVariantData { + name: None, + ref type_, + }) => format!("((data >> {}) as {})", variant_bits, type_), + None => "".to_owned(), + }; + + // Assembly of the match arm. + write!(match_statement, + "{} => {}::{}{},\n", + index, + self.name, + variant.name, + data) + .unwrap(); + } + match_statement.push_str("_ => unreachable!()\n}"); + match_statement + } +} + +// Because unit tests cannot create `TokenStream`s (apparently), we have an inner implementation +// that deals in strings. +fn poll_token_inner(src: &str) -> String { + let src_tokens = src.split_whitespace(); + + // Parsing is done in two interleaved stages, tokenizing without context, followed by parsing + // via state machine. + let mut state = ParseState::new(); + for src_tok in src_tokens { + let tok = Tokenized::from_str(src_tok); + state.handle_token(tok); + } + + assert_eq!(state.current_state, + States::End, + "unexpected end after parsing source enum"); + + // Given our basic model of a user given enum that is suitable as a token, we generate the + // implementation. The implementation is NOT always well formed, such as when a variant's data + // type is not bit shiftable or castable to u64, but we let Rust generate such errors as it + // would be difficult to detect every kind of error. Importantly, every implementation that we + // generate here and goes on to compile succesfully is sound. + let model = state.model; + format!("impl PollToken for {} {{ + fn as_raw_token(&self) -> u64 {{ +{} + }} + + fn from_raw_token(data: u64) -> Self {{ +{} + }} +}}", + model.name, + model.generate_as_raw_token(), + model.generate_from_raw_token()) +} + +/// Implements the PollToken trait for a given `enum`. +/// +/// There are limitations on what `enum`s this custom derive will work on: +/// +/// * Each variant must be a unit variant (no data), or have a single (un)named data field. +/// * If a variant has data, it must be a primitive type castable to and from a `u64`. +/// * If a variant data has size greater than or equal to a `u64`, its most significant bits must be +/// zero. The number of bits truncated is equal to the number of bits used to store the variant +/// index plus the number of bits above 64. +#[proc_macro_derive(PollToken)] +pub fn poll_token(input: TokenStream) -> TokenStream { + // The token stream gets converted to a string in a rather regular way, which makes parsing + // simpler. In particular, whitespace from the source enum is not carried over, instead replaced + // with whatever the token stream's to_string function outputs. The rust parser has already + // validated the syntax, so we can make lots of assumptions about the source being well formed. + TokenStream::from_str(&poll_token_inner(&input.to_string())).unwrap() +} diff --git a/sys_util/poll_token_derive/tests.rs b/sys_util/poll_token_derive/tests.rs new file mode 100644 index 0000000..8c81f66 --- /dev/null +++ b/sys_util/poll_token_derive/tests.rs @@ -0,0 +1,257 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +mod tokenized { + use Tokenized; + use Tokenized::*; + + #[test] + fn enum_() { + assert_eq!(Tokenized::from_str("enum"), Enum); + } + + #[test] + fn visibility() { + assert_eq!(Tokenized::from_str("pub"), Visiblity); + assert_eq!(Tokenized::from_str("pub(crate)"), Visiblity); + assert_eq!(Tokenized::from_str("pub(my_module)"), Visiblity); + assert_eq!(Tokenized::from_str("pub( crate )"), Visiblity); + } + + #[test] + fn ident() { + assert_eq!(Tokenized::from_str("Important"), + Ident("Important".to_owned())); + assert_eq!(Tokenized::from_str("hello,"), Ident("hello".to_owned())); + assert_eq!(Tokenized::from_str("world2"), Ident("world2".to_owned())); + assert_eq!(Tokenized::from_str("A,"), Ident("A".to_owned())); + } + + + #[test] + fn field_ident() { + assert_eq!(Tokenized::from_str("index:"), + FieldIdent("index".to_owned())); + assert_eq!(Tokenized::from_str("a:"), FieldIdent("a".to_owned())); + } + + #[test] + fn ident_and_type() { + assert_eq!(Tokenized::from_str("a(u32)"), + IdentAndType("a".to_owned(), "u32".to_owned())); + assert_eq!(Tokenized::from_str("Socket(usize),"), + IdentAndType("Socket".to_owned(), "usize".to_owned())); + } + + #[test] + fn open_brace() { + assert_eq!(Tokenized::from_str("{"), OpenBrace); + } + + #[test] + fn close_brace() { + assert_eq!(Tokenized::from_str("}"), CloseBrace); + assert_eq!(Tokenized::from_str("},"), CloseBrace); + } + + #[test] + #[should_panic] + fn empty() { + Tokenized::from_str(""); + } +} + +mod parse_state { + use {Tokenized, States, ParseState, EnumModel, EnumVariant, EnumVariantData}; + use Tokenized::*; + + fn parse_tokens(tokens: &[Tokenized]) -> EnumModel { + let mut state = ParseState::new(); + for token in tokens { + state.handle_token(token.clone()); + } + assert_eq!(state.current_state, + States::End, + "unexpected end after parsing source enum"); + state.model + } + + #[test] + fn empty_struct() { + let model = parse_tokens(&[Visiblity, + Enum, + Ident("Blarg".to_owned()), + OpenBrace, + CloseBrace]); + let expected = EnumModel { + name: "Blarg".to_string(), + variants: Vec::new(), + }; + assert_eq!(model, expected); + } + + #[test] + #[should_panic] + fn invalid_token() { + parse_tokens(&[Visiblity, + Enum, + Ident("Blarg".to_owned()), + OpenBrace, + CloseBrace, + CloseBrace]); + } + + #[test] + fn only_unit_variants() { + let model = parse_tokens(&[Enum, + Ident("Foo".to_owned()), + OpenBrace, + Ident("A".to_owned()), + Ident("B".to_owned()), + Ident("C".to_owned()), + CloseBrace]); + let expected = EnumModel { + name: "Foo".to_string(), + variants: vec![EnumVariant { + name: "A".to_owned(), + data: None, + }, + EnumVariant { + name: "B".to_owned(), + data: None, + }, + EnumVariant { + name: "C".to_owned(), + data: None, + }], + }; + assert_eq!(model, expected); + } + + #[test] + fn unnamed_data() { + let model = parse_tokens(&[Enum, + Ident("Foo".to_owned()), + OpenBrace, + IdentAndType("A".to_owned(), "u32".to_owned()), + Ident("B".to_owned()), + IdentAndType("C".to_owned(), "usize".to_owned()), + CloseBrace]); + let expected = EnumModel { + name: "Foo".to_string(), + variants: vec![EnumVariant { + name: "A".to_owned(), + data: Some(EnumVariantData { + name: None, + type_: "u32".to_owned(), + }), + }, + EnumVariant { + name: "B".to_owned(), + data: None, + }, + EnumVariant { + name: "C".to_owned(), + data: Some(EnumVariantData { + name: None, + type_: "usize".to_owned(), + }), + }], + }; + assert_eq!(model, expected); + } + + #[test] + fn named_data() { + let model = parse_tokens(&[Enum, + Ident("Foo".to_owned()), + OpenBrace, + Ident("A".to_owned()), + OpenBrace, + FieldIdent("index".to_owned()), + Ident("u16".to_owned()), + CloseBrace, + CloseBrace]); + let expected = EnumModel { + name: "Foo".to_string(), + variants: vec![EnumVariant { + name: "A".to_owned(), + data: Some(EnumVariantData { + name: Some("index".to_owned()), + type_: "u16".to_owned(), + }), + }], + }; + assert_eq!(model, expected); + } +} + +mod enum_model { + use {EnumModel, EnumVariant}; + + #[test] + fn variant_bits() { + let mut model = EnumModel { + name: "Baz".to_string(), + variants: vec![EnumVariant { + name: "A".to_owned(), + data: None, + }, + EnumVariant { + name: "B".to_owned(), + data: None, + }, + EnumVariant { + name: "C".to_owned(), + data: None, + }], + }; + assert_eq!(model.variant_bits(), 2); + for _ in 0..1021 { + model + .variants + .push(EnumVariant { + name: "Dynamic".to_owned(), + data: None, + }); + } + assert_eq!(model.variant_bits(), 10); + model + .variants + .push(EnumVariant { + name: "OneMore".to_owned(), + data: None, + }); + assert_eq!(model.variant_bits(), 11); + } +} + +#[test] +fn poll_token_e2e() { + let input = "enum Token { A, B, C, D(usize), E { foobaz: u32 }, }"; + let output = ::poll_token_inner(input); + let expected = "impl PollToken for Token { + fn as_raw_token(&self) -> u64 { +match *self { +Token::A => 0, +Token::B => 1, +Token::C => 2, +Token::D(data) => 3 | ((data as u64) << 3), +Token::E{ foobaz: data } => 4 | ((data as u64) << 3), +} + } + + fn from_raw_token(data: u64) -> Self { +match data & 0x07 { +0 => Token::A, +1 => Token::B, +2 => Token::C, +3 => Token::D((data >> 3) as usize), +4 => Token::E{ foobaz: (data >> 3) as u32 }, +_ => unreachable!() +} + } +}"; + assert_eq!(output.as_str(), expected); +} |