Method: ParsingMachine#step

Defined in:
lib/rpeg/parsing_machine.rb

#stepObject



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/rpeg/parsing_machine.rb', line 153

def step
  instr = @program[@i_ptr]

  case instr.op_code
  when Instruction::TEST_CHARSET
    test_char(instr.data.include?(@subject[@subject_index]), instr.offset)
  when Instruction::TEST_CHAR
    test_char(instr.data == @subject[@subject_index], instr.offset)
  when Instruction::TEST_ANY
    test_char(@subject_index < @subject_size, instr.offset)
  when Instruction::ANY
    check_char(@subject_index < @subject_size)
  when Instruction::CHARSET
    check_char(instr.data.include?(@subject[@subject_index]))
  when Instruction::CHAR
    check_char(instr.data == @subject[@subject_index])
  when Instruction::JUMP
    @i_ptr += instr.offset
  when Instruction::CHOICE
    # We push the offset for the other side of the choice
    push(:state, instr.offset)
    @i_ptr += 1
  when Instruction::CALL
    # Call is like jump, but we push the return address onto the stack first
    push(:instruction, 1)
    @i_ptr += instr.offset
  when Instruction::RETURN
    @i_ptr = pop(:instruction).i_ptr
  when Instruction::COMMIT
    # we pop and discard the top of the stack (which must be a full state) and then do the jump given by arg1. Even though we
    # are discarding it check that it was a full state for sanity.
    _ = pop(:state)
    @i_ptr += instr.offset
  when Instruction::PARTIAL_COMMIT
    # Sort of a combination of commit (which pops) and choice (which pushes), but we just tweak the top of the stack. See
    # Ierusalimschy, sec 4.3
    stack_top = peek(:state)
    raise "Empty stack for partial commit!" unless stack_top

    stack_top.subject_index = @subject_index
    stack_top.bread_count = @bread_count
    @i_ptr += instr.offset
  when Instruction::BACK_COMMIT
    # A combination of a fail and a commit. We backtrack, but then jump to the specified instruction rather than using the
    # backtrack label. It's used for the AND pattern. See Ierusalimschy, 4.4
    stack_top = pop(:state)
    @subject_index = stack_top.subject_index
    @bread_count = stack_top.bread_count
    @i_ptr += instr.offset
  when Instruction::SPAN
    # Special instruction for when we are repeating over a charset, which is common. We just consume as many maching characters
    # as there are. This never fails as we can always match at least zero.
    @subject_index += 1 while instr.data.include?(@subject[@subject_index])
    @i_ptr += 1
  when Instruction::BEHIND
    n = instr.aux # the (fixed) length of the pattern we want to match.
    if n > @subject_index
      # We can't jump so far back in the subject
      handle_fail_ptr
    else
      @subject_index -= n
      @i_ptr += 1
    end
  when Instruction::FAIL
    handle_fail_ptr
  when Instruction::FAIL_TWICE
    # An optimization for the NOT implementation. We pop the top of the stack and discard it, and then enter the fail routine
    # again. For sanity's sake we'll check that the thing we are popping is a :state entry. See Ierusalimschy, 4.4
    _ = pop(:state)
    handle_fail_ptr
  when Instruction::CLOSE_RUN_TIME
    # The LPEG code for runtime captures is very complicated. Reading through it, it appears that the complexity comes from
    # needing to carefully manage the capture breadcrumbs wrt to the Lua values living on the Lua stack to avoid memory
    # leaks. We don't have to worry about that here, as everything is in Ruby and we can leave the hard stuff to the garbage
    # collector. The remaining work is little more than we have with a function capture.
    result = run_time_capture
    handle_run_time_capture_result(result)
  when Instruction::OPEN_CAPTURE
    record_capture(instr, size: 0, subject_index: @subject_index)
  when Instruction::CLOSE_CAPTURE
    # As in LPEG: "if possible, turn capture into a full capture"
    raise "Close capture without an open" unless @bread_count.positive?

    lc = @breadcrumbs[@bread_count - 1].must_be # still on the breadcrumb list
    if lc.size.zero? && (@subject_index - lc.subject_index) < 255 # TODO: should we care about an upper bound here?
      # The previous breadcrumb was an OPEN, and we are closing it
      lc.size = @subject_index - lc.subject_index + 1
      @i_ptr += 1
    else
      record_capture(instr, size: 1, subject_index: @subject_index)
    end
  when Instruction::FULL_CAPTURE
    # We have an all-in-one match, and the "capture length" tells us how far back in the subject the match started.
    len = (instr.aux[:capture_length] || 0).must_be(Integer)
    record_capture(instr, size: 1 + len, subject_index: @subject_index - len)
  when Instruction::OP_END
    @success = true
    done!
  else
    raise "Unhandled op code #{instr.op_code}"
  end
end