summary refs log tree commit diff
path: root/sys_util/poll_token_derive/poll_token_derive.rs
blob: 7af2de57fcd47ebbf49a9a7f5c4d8347c88846b3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
// Copyright 2018 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

extern crate proc_macro;

use proc_macro::TokenStream;
use std::fmt::Write;
use std::mem;
use std::str::FromStr;

#[cfg(test)]
mod tests;

// This file is meant to be read from top to bottom to reflect how this code processes and generates
// Rust enum definitions and implementations. The algorithm overview:
// 1) Split the rust source by whitespace (`str::split_whitespace`).
// 2a) Attempt to tokenize each piece (see: `Tokenized`, all functions starting with `matches`).
// 2b) Feed the token to the `ParseState` (see `ParseState::handle_token`).
// 3) After the source is fully processed, the `ParseState` has an `EnumModel` representing the
//    input enum.
// 4) Glue together an implementation of PollToken using a template.

// A single token after splitting input source by white space and simple stateless matching.
#[derive(Clone, Debug, PartialEq)]
enum Tokenized {
    // `enum`
    Enum,

    // `pub' | `pub(crate)`
    Visiblity,

    // `Hello`, `index`, `data,`
    Ident(String),

    // `index:`, 'first:`
    FieldIdent(String),

    // `Socket(u32)`, `Client(usize),`,
    IdentAndType(String, String),

    // `{`
    OpenBrace,

    // `}`, `},`
    CloseBrace,
}

// Attempts to match strings of the form "identifier" with optional trailing comma.
fn matches_ident(s: &str) -> Option<String> {
    let ident = s.trim_right_matches(',');
    if !ident.is_empty() && ident.chars().all(char::is_alphanumeric) {
        Some(ident.to_owned())
    } else {
        None
    }
}

// Attempts to match strings of the form "Identifier(Type)" with optional trailing comma. If the
// given string matches, the identifier and type are returned as a 2-tuple receptively.
fn matches_ident_and_type(s: &str) -> Option<(String, String)> {
    let mut buffer = String::new();
    let mut ident = String::new();
    let mut type_ = String::new();
    let mut brace_depth = 0;
    for c in s.chars() {
        match c {
            '(' if brace_depth == 0 && !buffer.is_empty() && ident.is_empty() => {
                mem::swap(&mut ident, &mut buffer);
                brace_depth += 1;
            }
            ')' if brace_depth == 1 && !buffer.is_empty() && type_.is_empty() => {
                mem::swap(&mut type_, &mut buffer);
                brace_depth -= 1;
            }
            ',' => {}
            c if c.is_alphanumeric() => buffer.push(c),
            _ => return None,
        }
    }
    if !ident.is_empty() && !type_.is_empty() {
        Some((ident, type_))
    } else {
        None
    }
}

// Attempts to match strings of the form "identifier:".
fn matches_field_ident(s: &str) -> Option<String> {
    let field_ident = s.trim_right_matches(':');
    if s.ends_with(':') && field_ident.chars().all(char::is_alphanumeric) {
        Some(field_ident.to_owned())
    } else {
        None
    }
}

impl Tokenized {
    fn from_str(s: &str) -> Tokenized {
        if s.starts_with("pub(") {
            return Tokenized::Visiblity;
        }
        match s {
            "enum" => Tokenized::Enum,
            "pub" => Tokenized::Visiblity,
            "{" => Tokenized::OpenBrace,
            "}" | "}," => Tokenized::CloseBrace,
            _ => {
                // Try to match from most specific to least specific.
                if let Some(ident) = matches_field_ident(s) {
                    Tokenized::FieldIdent(ident)
                } else if let Some((ident, type_)) = matches_ident_and_type(s) {
                    Tokenized::IdentAndType(ident, type_)
                } else if let Some(ident) = matches_ident(s) {
                    Tokenized::Ident(ident)
                } else {
                    panic!("unable to parse token: {}", s)
                }
            }
        }
    }
}

// Data field for an enum, with possible field name.
#[derive(Debug, PartialEq)]
struct EnumVariantData {
    type_: String,
    name: Option<String>,
}

// Data for one variant of an enum, with optional single data field.
#[derive(Debug, PartialEq)]
struct EnumVariant {
    name: String,
    data: Option<EnumVariantData>,
}

// Data for an entire enum type.
#[derive(Debug, Default, PartialEq)]
struct EnumModel {
    name: String,
    variants: Vec<EnumVariant>,
}
// Note: impl for EnumModel is below the parsing code and definitions because all of the methods are
// for generating the PollToken impl.

// Labels for each of the states in the parsing state machine. The '->` symbol means that the given
// state may transition to the state pointed to.
#[derive(PartialEq, Debug)]
enum States {
    // Initial state, expecting to see visibility rules (e.g. `pub`) or `enum` keyword.
    Start, // -> Ident

    // Expect to see the name of the enum field.
    Ident, // -> Brace

    // Expect to see an opening brace.
    Brace, // -> VariantIdent, -> End

    // Expect to see a variant's name.
    VariantIdent, // -> VariantIdent, -> VariantData, -> End

    // Expect to see the field name of a variant's data.
    VariantData, // -> VariantIdent, -> VariantDataType

    // Expect to see the tye name of a variant's data.
    VariantDataType, // -> VariantData

    // Expect to see no more tokens.
    End,
}

// The state machine for parsing a stream of `Tokenized`. After the States::End state is reached, a
// complete `EnumModel` is ready to be used for generating an implementation.
struct ParseState {
    current_state: States,
    current_variant: Option<EnumVariant>,
    model: EnumModel,
}

impl ParseState {
    fn new() -> ParseState {
        ParseState {
            current_state: States::Start,
            current_variant: Default::default(),
            model: Default::default(),
        }
    }

    // Handles the next token in the stream of tokens.
    fn handle_token(&mut self, tok: Tokenized) {
        match self.current_state {
            States::Start => self.handle_start(tok),
            States::Ident => self.handle_ident(tok),
            States::Brace => self.handle_brace(tok),
            States::VariantIdent => self.handle_variant_ident(tok),
            States::VariantData => self.handle_variant_data(tok),
            States::VariantDataType => self.handle_variant_data_type(tok),
            States::End => self.handle_end(tok),
        }
    }

    // All the following are handlers name after the current state that handle the next token.

    fn handle_start(&mut self, tok: Tokenized) {
        self.current_state = match tok {
            Tokenized::Enum => States::Ident,
            Tokenized::Visiblity => States::Start,
            _ => panic!("derives for enum types only"),

        };
    }

    fn handle_ident(&mut self, tok: Tokenized) {
        self.current_state = match tok {
            Tokenized::Ident(ident) => {
                self.model.name = ident;
                States::Brace
            }
            _ => panic!("unexpected token: {:?}", tok),
        };
    }

    fn handle_brace(&mut self, tok: Tokenized) {
        self.current_state = match tok {
            Tokenized::OpenBrace => States::VariantIdent,
            Tokenized::CloseBrace => States::End,
            _ => panic!("unexpected token: {:?}", tok),
        };
    }

    fn handle_variant_ident(&mut self, tok: Tokenized) {
        // This handler is the most complex because it has the most branches for the new
        // `current_state`. Adding to that complexity is that many branches indicate a new variant
        // is being handled, which means the old `current_variant` needs to be added to `variants`
        // and a fresh one needs to be started with the fresh data embedded in the token.
        self.current_state = match tok {
            Tokenized::Ident(ident) => {
                let mut variant = Some(EnumVariant {
                                           name: ident,
                                           data: None,
                                       });
                mem::swap(&mut variant, &mut self.current_variant);
                if let Some(variant) = variant {
                    self.model.variants.push(variant);
                }
                States::VariantIdent
            }
            Tokenized::IdentAndType(ident, type_) => {
                let variant_data = EnumVariantData {
                    type_: type_,
                    name: None,
                };
                let mut variant = Some(EnumVariant {
                                           name: ident,
                                           data: Some(variant_data),
                                       });
                mem::swap(&mut variant, &mut self.current_variant);
                if let Some(variant) = variant {
                    self.model.variants.push(variant);
                }
                States::VariantIdent
            }
            Tokenized::OpenBrace => States::VariantData,
            Tokenized::CloseBrace => {
                let mut variant = Default::default();
                mem::swap(&mut variant, &mut self.current_variant);
                if let Some(variant) = variant {
                    self.model.variants.push(variant);
                }
                States::End
            }
            _ => panic!("unexpected token: {:?}", tok),
        };
    }

    fn handle_variant_data(&mut self, tok: Tokenized) {
        let variant = self.current_variant.as_mut().unwrap();
        self.current_state = match tok {
            Tokenized::FieldIdent(ident) => {
                assert!(variant.data.is_none(),
                        "enum variant can only have one field");
                variant.data = Some(EnumVariantData {
                                        type_: "".to_owned(),
                                        name: Some(ident),
                                    });
                States::VariantDataType
            }
            Tokenized::CloseBrace => States::VariantIdent,
            _ => panic!("unexpected token: {:?}", tok),
        };
    }

    fn handle_variant_data_type(&mut self, tok: Tokenized) {
        let variant = self.current_variant.as_mut().unwrap();
        let variant_data = variant.data.as_mut().unwrap();
        self.current_state = match tok {
            Tokenized::Ident(ident) => {
                variant_data.type_ = ident;
                States::VariantData
            }
            _ => panic!("unexpected token: {:?}", tok),
        };
    }

    fn handle_end(&mut self, tok: Tokenized) {
        panic!("unexpected tokens past ending brace: {:?}", tok);
    }
}

// Continued from the above `EnumModel` definition. All methods are used for generating PollToken
// implementation. The method for packing an enum into a u64 is as follows:
// 1) Reserve the lowest "ceil(log_2(x))" bits where x is the number of enum variants.
// 2) Store the enum variant's index (0-based index based on order in the enum definition) in
//    reserved bits.
// 3) If there is data in the enum variant, store the data in remaining bits.
// The method for unpacking is as follows
// 1) Mask the raw token to just the reserved bits
// 2) Match the reserved bits to the enum variant token.
// 3) If the indicated enum variant had data, extract it from the unreserved bits.
impl EnumModel {
    // Calculates the number of bits needed to store the variant index. Essentially the log base 2
    // of the number of variants, rounded up.
    fn variant_bits(&self) -> u32 {
        // The degenerate case of no variants.
        if self.variants.is_empty() {
            return 0;
        }
        let variant_count = self.variants.len();
        (mem::size_of_val(&variant_count) as u32 * 8) - (variant_count - 1).leading_zeros()
    }

    // Generates the function body for `as_raw_token`.
    fn generate_as_raw_token(&self) -> String {
        let variant_bits = self.variant_bits();
        let mut match_statement = "match *self {\n".to_owned();

        // Each iteration corresponds to one variant's match arm.
        for (index, variant) in self.variants.iter().enumerate() {
            // The capture string is for everything between the variant identifier and the `=>` in
            // the match arm: the variant's data capture.
            let capture = match variant.data.as_ref() {
                Some(&EnumVariantData { name: Some(ref name), .. }) => {
                    format!("{{ {}: data }}", name)
                }
                Some(&EnumVariantData { .. }) => "(data)".to_owned(),
                None => "".to_owned(),
            };

            // The modifier string ORs the variant index with extra bits from the variant data
            // field.
            let modifer = if variant.data.is_some() {
                format!(" | ((data as u64) << {})", variant_bits)
            } else {
                "".to_owned()
            };

            // Assembly of the match arm.
            write!(match_statement,
                   "{}::{}{} => {}{},\n",
                   self.name,
                   variant.name,
                   capture,
                   index,
                   modifer)
                    .unwrap();
        }
        match_statement.push_str("}");
        match_statement
    }

    // Generates the function body for `from_raw_token`.
    fn generate_from_raw_token(&self) -> String {
        let variant_bits = self.variant_bits();
        let variant_mask = (1 << variant_bits) - 1;

        // The match expression only matches the bits for the variant index.
        let mut match_statement = format!("match data & 0x{:02x} {{\n", variant_mask);

        // Each iteration corresponds to one variant's match arm.
        for (index, variant) in self.variants.iter().enumerate() {
            // The data string is for extracting the enum variant's data bits out of the raw token
            // data, which includes both variant index and data bits.
            let data = match variant.data.as_ref() {
                Some(&EnumVariantData {
                          name: Some(ref name),
                          ref type_,
                      }) => format!("{{ {}: (data >> {}) as {} }}", name, variant_bits, type_),
                Some(&EnumVariantData {
                          name: None,
                          ref type_,
                      }) => format!("((data >> {}) as {})", variant_bits, type_),
                None => "".to_owned(),
            };

            // Assembly of the match arm.
            write!(match_statement,
                   "{} => {}::{}{},\n",
                   index,
                   self.name,
                   variant.name,
                   data)
                    .unwrap();
        }
        match_statement.push_str("_ => unreachable!()\n}");
        match_statement
    }
}

// Because unit tests cannot create `TokenStream`s (apparently), we have an inner implementation
// that deals in strings.
fn poll_token_inner(src: &str) -> String {
    let src_tokens = src.split_whitespace();

    // Parsing is done in two interleaved stages, tokenizing without context, followed by parsing
    // via state machine.
    let mut state = ParseState::new();
    for src_tok in src_tokens {
        let tok = Tokenized::from_str(src_tok);
        state.handle_token(tok);
    }

    assert_eq!(state.current_state,
               States::End,
               "unexpected end after parsing source enum");

    // Given our basic model of a user given enum that is suitable as a token, we generate the
    // implementation. The implementation is NOT always well formed, such as when a variant's data
    // type is not bit shiftable or castable to u64, but we let Rust generate such errors as it
    // would be difficult to detect every kind of error. Importantly, every implementation that we
    // generate here and goes on to compile succesfully is sound.
    let model = state.model;
    format!("impl PollToken for {} {{
    fn as_raw_token(&self) -> u64 {{
{}
    }}

    fn from_raw_token(data: u64) -> Self {{
{}
    }}
}}",
            model.name,
            model.generate_as_raw_token(),
            model.generate_from_raw_token())
}

/// Implements the PollToken trait for a given `enum`.
///
/// There are limitations on what `enum`s this custom derive will work on:
///
/// * Each variant must be a unit variant (no data), or have a single (un)named data field.
/// * If a variant has data, it must be a primitive type castable to and from a `u64`.
/// * If a variant data has size greater than or equal to a `u64`, its most significant bits must be
///   zero. The number of bits truncated is equal to the number of bits used to store the variant
///   index plus the number of bits above 64.
#[proc_macro_derive(PollToken)]
pub fn poll_token(input: TokenStream) -> TokenStream {
    // The token stream gets converted to a string in a rather regular way, which makes parsing
    // simpler. In particular, whitespace from the source enum is not carried over, instead replaced
    // with whatever the token stream's to_string function outputs. The rust parser has already
    // validated the syntax, so we can make lots of assumptions about the source being well formed.
    TokenStream::from_str(&poll_token_inner(&input.to_string())).unwrap()
}