Class: Fop::Tokenizer
- Inherits:
-
Object
- Object
- Fop::Tokenizer
- Defined in:
- lib/fop/tokenizer.rb
Defined Under Namespace
Constant Summary collapse
- EXP_OPEN =
"{".freeze
- EXP_CLOSE =
"}".freeze
- ESCAPE =
"\\".freeze
- WILDCARD =
"*".freeze
- REGEX_DELIM =
"/".freeze
- REGEX_CAPTURE =
"$".freeze
- OP_REPLACE =
"=".freeze
- OP_APPEND =
">".freeze
- OP_PREPEND =
"<".freeze
- OP_ADD =
"+".freeze
- OP_SUB =
"-".freeze
- WHITESPACE =
" ".freeze
Instance Attribute Summary collapse
-
#escape ⇒ Object
readonly
Controls which “mode” the tokenizer is currently in.
Instance Method Summary collapse
-
#initialize(src) ⇒ Tokenizer
constructor
A new instance of Tokenizer.
- #next ⇒ Object
-
#regex_mode! ⇒ Object
Auto-escape anything you’d find in a regular expression.
-
#reset_escapes! ⇒ Object
Auto-escape operators and regex capture vars.
Constructor Details
#initialize(src) ⇒ Tokenizer
Returns a new instance of Tokenizer.
29 30 31 32 33 34 35 |
# File 'lib/fop/tokenizer.rb', line 29 def initialize(src) @src = src @end = src.size - 1 @start_i = 0 @i = 0 reset_escapes! end |
Instance Attribute Details
#escape ⇒ Object (readonly)
Controls which “mode” the tokenizer is currently in. This is a necessary result of the syntax lacking explicit string delimiters. That could be worked around by requiring users to escape all reserved chars, but that’s ugly af. Instead, the parser continually assesses the current context and flips these flags on or off to auto-escape certain chars for the next token.
27 28 29 |
# File 'lib/fop/tokenizer.rb', line 27 def escape @escape end |
Instance Method Details
#next ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/fop/tokenizer.rb', line 53 def next return Token.new(@i, Tokens::EOF) if @i > @end char = @src[@i] case char when EXP_OPEN @i += 1 token! Tokens::EXP_OPEN when EXP_CLOSE @i += 1 token! Tokens::EXP_CLOSE when WILDCARD @i += 1 token! Tokens::WILDCARD, WILDCARD when REGEX_DELIM if @escape.regex get_str! else @i += 1 token! Tokens::REG_DELIM end when REGEX_CAPTURE if @escape.regex_capture get_str! else @i += 1 t = token! Tokens::REG_CAPTURE, @src[@i] @i += 1 @start_i = @i t end when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB if @escape.operators get_str! else @i += 1 token! Tokens::OPERATOR, char end when WHITESPACE if @escape.whitespace get_str! elsif !@escape.whitespace_sep @i += 1 token! Tokens::WHITESPACE_SEP else @i += 1 @start_i = @i self.next end else get_str! end end |
#regex_mode! ⇒ Object
Auto-escape anything you’d find in a regular expression
43 44 45 46 47 48 49 50 51 |
# File 'lib/fop/tokenizer.rb', line 43 def regex_mode! @escape.whitespace = true @escape.regex = false # look for the final / @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a / @escape.wildcards = true @escape.operators = true @escape.regex_capture = true @escape.exp = true end |