Class: GmailSearchSyntax::Tokenizer
- Inherits:
-
Object
- Object
- GmailSearchSyntax::Tokenizer
- Defined in:
- lib/gmail_search_syntax/tokenizer.rb
Constant Summary collapse
- OPERATORS =
%w[ from to cc bcc subject after before older newer older_than newer_than label category has list filename in is deliveredto size larger smaller rfc822msgid ].freeze
- LOGICAL_OPERATORS =
%w[OR AND AROUND].freeze
Instance Method Summary collapse
-
#initialize(input) ⇒ Tokenizer
constructor
A new instance of Tokenizer.
- #tokenize ⇒ Object
Constructor Details
#initialize(input) ⇒ Tokenizer
Returns a new instance of Tokenizer.
29 30 31 32 33 |
# File 'lib/gmail_search_syntax/tokenizer.rb', line 29 def initialize(input) @input = input @position = 0 @tokens = [] end |
Instance Method Details
#tokenize ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/gmail_search_syntax/tokenizer.rb', line 35 def tokenize while @position < @input.length skip_whitespace break if @position >= @input.length char = current_char case char when "(" add_token(:lparen, char) advance when ")" add_token(:rparen, char) advance when "{" add_token(:lbrace, char) advance when "}" add_token(:rbrace, char) advance when "-" next_char = peek_char prev_char = (@position > 0) ? @input[@position - 1] : nil # Negation requires: non-whitespace follows AND (start of input OR whitespace precedes) # Gmail behavior: "Coxlee-Gammage" → Coxlee AND Gammage (hyphen is word separator) # "Coxlee -Gammage" → Coxlee AND NOT Gammage (space+hyphen = negation) if next_char && next_char !~ /\s/ && (prev_char.nil? || prev_char =~ /\s/) add_token(:minus, char) advance elsif prev_char && prev_char !~ /\s/ # Embedded hyphen (preceded by non-whitespace) - skip it as word separator advance else read_word end when "+" add_token(:plus, char) advance when '"' read_quoted_string when ":" add_token(:colon, char) advance else read_word end end add_token(:eof, nil) @tokens end |