Class: Fop::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/fop/parser.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

DIGIT =
/^[0-9]$/
REGEX_START =
"^".freeze
REGEX_LAZY_WILDCARD =
".*?".freeze
REGEX_MATCHES =
{
  "N" => "[0-9]+".freeze,
  "W" => "\\w+".freeze,
  "A" => "[a-zA-Z]+".freeze,
  "*" => ".*".freeze,
}.freeze
TR_REGEX =

OPS_WITH_OPTIONAL_ARGS = [Tokenizer::OP_REPLACE]

/.*/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src, debug: false) ⇒ Parser

Returns a new instance of Parser.



26
27
28
29
# File 'lib/fop/parser.rb', line 26

def initialize(src, debug: false)
  @tokenizer = Tokenizer.new(src)
  @errors = []
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



24
25
26
# File 'lib/fop/parser.rb', line 24

def errors
  @errors
end

Instance Method Details

#build_regex!(wildcard, token, src = token.val) ⇒ Object



166
167
168
169
170
171
# File 'lib/fop/parser.rb', line 166

def build_regex!(wildcard, token, src = token.val)
  Regexp.new((wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
rescue RegexpError => e
  errors << Error.new(:regex, token, e.message)
  nil
end

#parseObject



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/fop/parser.rb', line 31

def parse
  nodes = []
  wildcard = false
  eof = false
  # Top-level parsing. It will always be looking for a String, Regex, or Expression.
  until eof
    @tokenizer.reset_escapes!
    t = @tokenizer.next
    case t.type
    when Tokens::WILDCARD
      errors << Error.new(:syntax, t, "Consecutive wildcards") if wildcard
      wildcard = true
    when Tokens::TEXT
      reg = build_regex!(wildcard, t, Regexp.escape(t.val))
      nodes << Nodes::Text.new(wildcard, t.val, reg)
      wildcard = false
    when Tokens::EXP_OPEN
      nodes << parse_exp!(wildcard)
      wildcard = false
    when Tokens::REG_DELIM
      nodes << parse_regex!(wildcard)
      wildcard = false
    when Tokens::EOF
      eof = true
    else
      errors << Error.new(:syntax, t, "Unexpected #{t.type}")
    end
  end
  nodes << Nodes::Text.new(true, "", TR_REGEX) if wildcard
  return nodes, @errors
end

#parse_exp!(wildcard = false) ⇒ Object



63
64
65
66
67
68
69
70
71
# File 'lib/fop/parser.rb', line 63

def parse_exp!(wildcard = false)
  exp = Nodes::Expression.new(wildcard)
  parse_exp_match! exp
  parse_exp_operator! exp
  if exp.operator_token
    parse_exp_arg! exp
  end
  return exp
end

#parse_exp_arg!(exp) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/fop/parser.rb', line 110

def parse_exp_arg!(exp)
  @tokenizer.escape.whitespace = false
  @tokenizer.escape.whitespace_sep = false
  @tokenizer.escape.operators = true
  @tokenizer.escape.regex = true
  @tokenizer.escape.regex_capture = false if exp.regex_match

  arg = Nodes::Arg.new([], false)
  exp.args = []
  found_close, eof = false, false
  until found_close or eof
    t = @tokenizer.next
    case t.type
    when Tokens::TEXT
      arg.segments << t.val
    when Tokens::REG_CAPTURE
      arg.has_captures = true
      arg.segments << t.val.to_i - 1
      errors << Error.new(:syntax, t, "Invalid regex capture; must be between 0 and 9 (found #{t.val})") unless t.val =~ DIGIT
      errors << Error.new(:syntax, t, "Unexpected regex capture; expected str or '}'") if !exp.regex_match
    when Tokens::WHITESPACE_SEP
      if arg.segments.any?
        exp.args << arg
        arg = Nodes::Arg.new([])
      end
    when Tokens::EXP_CLOSE
      found_close = true
    when Tokens::EOF
      eof = true
      errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
    else
      errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected str or '}'")
    end
  end
  exp.args << arg if arg.segments.any?

  #if exp.arg.size != 1 and !OPS_WITH_OPTIONAL_ARGS.include?(exp.operator)
  #  errors << Error.new(:arg, op_token, "Operator '#{op_token.val}' requires an argument")
  #end
end

#parse_exp_match!(exp) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/fop/parser.rb', line 73

def parse_exp_match!(exp)
  @tokenizer.escape.whitespace = false
  @tokenizer.escape.operators = false
  t = @tokenizer.next
  case t.type
  when Tokens::TEXT, Tokens::WILDCARD
    exp.match = t.val
    if (src = REGEX_MATCHES[exp.match])
      reg = Regexp.new((exp.wildcard ? REGEX_LAZY_WILDCARD : REGEX_START) + src)
      exp.regex = Nodes::Regex.new(exp.wildcard, src, reg)
    else
      errors << Error.new(:name, t, "Unknown match type '#{exp.match}'") if exp.regex.nil?
    end
  when Tokens::REG_DELIM
    exp.regex = parse_regex!(exp.wildcard)
    exp.match = exp.regex&.src
    exp.regex_match = true
    @tokenizer.reset_escapes!
  else
    errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string or a regex")
  end
end

#parse_exp_operator!(exp) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/fop/parser.rb', line 96

def parse_exp_operator!(exp)
  @tokenizer.escape.whitespace = false
  @tokenizer.escape.operators = false
  t = @tokenizer.next
  case t.type
  when Tokens::EXP_CLOSE
    # no op
  when Tokens::OPERATOR, Tokens::TEXT
    exp.operator_token = t
  else
    errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected an operator")
  end
end

#parse_regex!(wildcard) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/fop/parser.rb', line 151

def parse_regex!(wildcard)
  @tokenizer.regex_mode!
  t = @tokenizer.next
  reg = Nodes::Regex.new(wildcard, t.val)
  if t.type == Tokens::TEXT
    reg.regex = build_regex!(wildcard, t)
  else
    errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex")
  end

  t = @tokenizer.next
  errors << Error.new(:syntax, t, "Unexpected #{t.type}; expected a string of regex") unless t.type == Tokens::REG_DELIM
  reg
end