Class: Fop::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/fop/tokenizer.rb

Defined Under Namespace

Classes: Escapes, Token

Constant Summary collapse

EXP_OPEN =
"{".freeze
EXP_CLOSE =
"}".freeze
ESCAPE =
"\\".freeze
WILDCARD =
"*".freeze
REGEX_DELIM =
"/".freeze
REGEX_CAPTURE =
"$".freeze
OP_REPLACE =
"=".freeze
OP_APPEND =
">".freeze
OP_PREPEND =
"<".freeze
OP_ADD =
"+".freeze
OP_SUB =
"-".freeze
WHITESPACE =
" ".freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src) ⇒ Tokenizer

Returns a new instance of Tokenizer.



29
30
31
32
33
34
35
# File 'lib/fop/tokenizer.rb', line 29

def initialize(src)
  @src = src
  @end = src.size - 1
  @start_i = 0
  @i = 0
  reset_escapes!
end

Instance Attribute Details

#escapeObject (readonly)

Controls which “mode” the tokenizer is currently in. This is a necessary result of the syntax lacking explicit string delimiters. That could be worked around by requiring users to escape all reserved chars, but that’s ugly af. Instead, the parser continually assesses the current context and flips these flags on or off to auto-escape certain chars for the next token.



27
28
29
# File 'lib/fop/tokenizer.rb', line 27

def escape
  @escape
end

Instance Method Details

#nextObject



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/fop/tokenizer.rb', line 53

def next
  return Token.new(@i, Tokens::EOF) if @i > @end
  char = @src[@i]
  case char
  when EXP_OPEN
    @i += 1
    token! Tokens::EXP_OPEN
  when EXP_CLOSE
    @i += 1
    token! Tokens::EXP_CLOSE
  when WILDCARD
    @i += 1
    token! Tokens::WILDCARD, WILDCARD
  when REGEX_DELIM
    if @escape.regex
      get_str!
    else
      @i += 1
      token! Tokens::REG_DELIM
    end
  when REGEX_CAPTURE
    if @escape.regex_capture
      get_str!
    else
      @i += 1
      t = token! Tokens::REG_CAPTURE, @src[@i]
      @i += 1
      @start_i = @i
      t
    end
  when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
    if @escape.operators
      get_str!
    else
      @i += 1
      token! Tokens::OPERATOR, char
    end
  when WHITESPACE
    if @escape.whitespace
      get_str!
    elsif !@escape.whitespace_sep
      @i += 1
      token! Tokens::WHITESPACE_SEP
    else
      @i += 1
      @start_i = @i
      self.next
    end
  else
    get_str!
  end
end

#regex_mode!Object

Auto-escape anything you’d find in a regular expression



43
44
45
46
47
48
49
50
51
# File 'lib/fop/tokenizer.rb', line 43

def regex_mode!
  @escape.whitespace = true
  @escape.regex = false # look for the final /
  @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
  @escape.wildcards = true
  @escape.operators = true
  @escape.regex_capture = true
  @escape.exp = true
end

#reset_escapes!Object

Auto-escape operators and regex capture vars. Appropriate for top-level syntax.



38
39
40
# File 'lib/fop/tokenizer.rb', line 38

def reset_escapes!
  @escape = Escapes.new(true, true, true, true)
end