Class: Regextest::Back::Result

Inherits:
Object
  • Object
show all
Includes:
Common
Defined in:
lib/regextest/back/result.rb

Constant Summary

Constants included from Common

Common::TstConstDebug, Common::TstConstRecursionMax, Common::TstConstRepeatMax, Common::TstConstRetryMax, Common::TstConstRetryMaxSecond, Common::TstConstTimeout, Common::TstConstUnicodeCharSet, Common::TstFixnumMax

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Common

#TstLog, #TstMdPrint, #TstRand, #TstShuffle, #is_random?, #reset_random_called

Constructor Details

#initializeResult

Returns a new instance of Result.



11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/regextest/back/result.rb', line 11

def initialize()
  @results = []
  @look_aheads = []
  @look_behinds = []
  @positional_anchors = {}
  @reluctant_repeat = {}
  @possessive_repeat = {}
  @start_offset = 0
  @end_offset = 0
  @pre_match = nil
  @match = nil
  @post_match = nil
end

Instance Attribute Details

#end_offsetObject (readonly)

Returns the value of attribute end_offset.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def end_offset
  @end_offset
end

#matchObject (readonly)

Returns the value of attribute match.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def match
  @match
end

#positional_anchorsObject (readonly)

Returns the value of attribute positional_anchors.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def positional_anchors
  @positional_anchors
end

#post_matchObject (readonly)

Returns the value of attribute post_match.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def post_match
  @post_match
end

#pre_matchObject (readonly)

Returns the value of attribute pre_match.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def pre_match
  @pre_match
end

#resultsObject (readonly)

Returns the value of attribute results.



25
26
27
# File 'lib/regextest/back/result.rb', line 25

def results
  @results
end

Instance Method Details

#[](offset) ⇒ Object

Offset of an elem



37
38
39
# File 'lib/regextest/back/result.rb', line 37

def [](offset)
  @results[offset]
end

#add_anchor(cmd) ⇒ Object

Adds offset of anchor



57
58
59
60
# File 'lib/regextest/back/result.rb', line 57

def add_anchor(cmd)
  @positional_anchors[cmd] ||= []
  @positional_anchors[cmd].push @end_offset
end

#add_look_ahead(command, sub_results) ⇒ Object

Adds results of look_ahead



47
48
49
# File 'lib/regextest/back/result.rb', line 47

def add_look_ahead(command, sub_results)
  @look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
end

#add_look_behind(command, sub_results) ⇒ Object

Adds results of look_behind



52
53
54
# File 'lib/regextest/back/result.rb', line 52

def add_look_behind(command, sub_results)
  @look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
end

#add_reluctant_repeat(elem) ⇒ Object

Adds reluctant / possessive repeat information



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/regextest/back/result.rb', line 63

def add_reluctant_repeat(elem)
  repeat_id = elem.param[:id]
  case elem.command
  when :CMD_ANC_RELUCTANT_BEGIN
    @reluctant_repeat[repeat_id] = [@end_offset]
  when :CMD_ANC_RELUCTANT_END
    if @reluctant_repeat[repeat_id]
      @reluctant_repeat[repeat_id].push @end_offset
    else
      raise "internal error, invalid reluctant_repeat_end command"
    end
  when :CMD_ANC_POSSESSIVE_BEGIN
    @possessive_repeat[repeat_id] = [@end_offset]
  when :CMD_ANC_POSSESSIVE_END
    if @possessive_repeat[repeat_id]
      @possessive_repeat[repeat_id].push @end_offset
    else
      raise "internal error, invalid possessive_repeat_end command"
    end
  else
    raise "internal error, invalid reluctant / possessive repeat command"
  end
end

#bound_process(elem1, elem2) ⇒ Object

bound process (b)



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# File 'lib/regextest/back/result.rb', line 452

def bound_process(elem1, elem2)
  if    elem1.word_elements?
    elem2.set_non_word_elements
  elsif elem1.non_word_elements?
    elem2.set_word_elements
  elsif elem2.word_elements?
    elem1.set_non_word_elements
  elsif elem2.non_word_elements?
    elem1.set_word_elements
  else
    if(TstRand(2)==0)
      elem1.set_word_elements
      elem2.set_non_word_elements
    else
      elem1.set_non_word_elements
      elem2.set_word_elements
    end
  end
  if elem1.empty? || elem2.empty?
    return false
  end
  true
end

#fixObject

Fixes results



520
521
522
523
524
525
526
# File 'lib/regextest/back/result.rb', line 520

def fix
  @pre_match  = fix_part(0, @start_offset-1)
  @match      = fix_part(@start_offset, @end_offset-1)
  @post_match = fix_part(@end_offset, @results.size-1)
  
  @pre_match + @match + @post_match
end

#fix_part(start_offset, end_offset) ⇒ Object

Fixes part of results



529
530
531
532
533
534
535
# File 'lib/regextest/back/result.rb', line 529

def fix_part(start_offset, end_offset)
  result = ""
  start_offset.step(end_offset).each do | i |
    result += @results[i].random_fix
  end
  result
end

#is_begin_anchor?Boolean

return true if begin anchor exists

Returns:

  • (Boolean)


502
503
504
505
506
507
# File 'lib/regextest/back/result.rb', line 502

def is_begin_anchor?
  (@positional_anchors[:CMD_ANC_STRING_BEGIN] &&
   @positional_anchors[:CMD_ANC_STRING_BEGIN][0] == 0) ||
  (@positional_anchors[:CMD_ANC_LINE_BEGIN] &&
   @positional_anchors[:CMD_ANC_LINE_BEGIN][0] == 0)
end

#is_end_anchor?Boolean

return true if end anchor exists

Returns:

  • (Boolean)


510
511
512
513
514
515
516
517
# File 'lib/regextest/back/result.rb', line 510

def is_end_anchor?
  (@positional_anchors[:CMD_ANC_STRING_END] &&
   @positional_anchors[:CMD_ANC_STRING_END][-1] == @results.size) ||
  (@positional_anchors[:CMD_ANC_STRING_END2] &&
   @positional_anchors[:CMD_ANC_STRING_END2][-1] == @results.size) ||
  (@positional_anchors[:CMD_ANC_LINE_END] &&
   @positional_anchors[:CMD_ANC_LINE_END][-1] == @results.size)
end

#mergeObject

Merge results of look aheads / behinds



88
89
90
91
# File 'lib/regextest/back/result.rb', line 88

def merge
  merge_look_ahead && 
  merge_look_behind
end

#merge_anchors(offset, sub_results) ⇒ Object

Merge anchors



288
289
290
291
292
293
# File 'lib/regextest/back/result.rb', line 288

def merge_anchors(offset, sub_results)
  sub_results.positional_anchors.each do | key, value |
    @positional_anchors[key] ||= []
    @positional_anchors[key] |= value.map{|elem| elem + offset}
  end
end

#merge_look_aheadObject

Merge results of look aheads



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/regextest/back/result.rb', line 94

def merge_look_ahead
  @look_aheads.each do | elem |
    offset = elem[:offset]
    sub_results = elem[:results]
    command = elem[:cmd]
    
    merge_anchors(offset, sub_results)
    case command
    when :CMD_LOOK_AHEAD
      if !merge_look_ahead_elems(offset, sub_results)
        return nil
      end
    when :CMD_NOT_LOOK_AHEAD
      if !merge_not_look_ahead_elems(offset, sub_results)
        return nil
      end
    else
      raise "invalid command at merge_look_ahead: #{command}"
    end
  end
  true
end

#merge_look_ahead_elems(offset, sub_results) ⇒ Object

Merge each elements of look aheads



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/regextest/back/result.rb', line 118

def merge_look_ahead_elems(offset, sub_results)
  term_offset = offset + sub_results.size
  # puts "offset=#{offset}, end_offset=#{sub_results.size}, term_offset=#{term_offset}"
  
  # intersect elems
  offset.step(term_offset-1) do | i |
    sub_elem = sub_results[i-offset]
    
    if i < @results.size   # it is NOT @end_offset
      if(!@results[i].intersect(sub_elem))
        return nil
      end
    else
      @results.push(sub_elem)
    end
  end
  true
end

#merge_look_behindObject

Merge results of look behind



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/regextest/back/result.rb', line 189

def merge_look_behind
  @look_behinds.each do | elem |
    offset = elem[:offset]
    sub_results = elem[:results]
    command = elem[:cmd]
    
    merge_anchors(offset, sub_results)
    case command
    when :CMD_LOOK_BEHIND
      if !merge_look_behind_elems(offset, sub_results)
        return nil
      end
    when :CMD_NOT_LOOK_BEHIND
      if !merge_not_look_behind_elems(offset, sub_results)
        return nil
      end
    else
      raise "invalid command at merge_look_behind: #{command}"
    end
  end
  true
end

#merge_look_behind_elems(offset, sub_results) ⇒ Object

Merge each elements of look behinds



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/regextest/back/result.rb', line 213

def merge_look_behind_elems(offset, sub_results)
  unshift_length = offset - sub_results.end_offset
  if unshift_length > 0
    # @results = sub_results[0..(unshift_length-1)] + @results
    if !unshift_params(unshift_length)
      return false
    end
  end

  # intersect elems
  sub_offset = (unshift_length >=0)?unshift_length:(-unshift_length)
  pre_part = []
  0.step(sub_results.end_offset-1) do | i |
    sub_elem = sub_results[i]
    if i < sub_offset
      pre_part.push sub_elem
    else
      if(!@results[i-sub_offset].intersect(sub_elem))
        return nil
      end
    end
  end
  @results = pre_part + @results
  true
end

#merge_not_look_ahead_elems(offset, sub_results) ⇒ Object

Merge each elements of not-look-aheads



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/regextest/back/result.rb', line 138

def merge_not_look_ahead_elems(offset, sub_results)
  if Regextest::Back::Result === sub_results
    term_offset = offset + sub_results.end_offset
  else
    term_offset = offset + sub_results.size
  end
  try_order = TstShuffle(sub_results.size.times.to_a)
  found = false
  # exclude, at least, one element
  try_order.each do | j |
    results_work = @results.dup
    cur_offset = offset + j
  
    # puts "offset=#{offset} term_offset=#{term_offset}"
    offset.step(term_offset-1).each do | i |
      sub_elem = sub_results[i-offset]
      
      if i < results_work.size   # it is NOT @end_offset
        if i == cur_offset
          if(!results_work[i].exclude(sub_elem))
            next
          else
            found = true
          end
        else
          # do nothing
        end
      else
        if i == cur_offset
          if(reverse_work = sub_elem.reverse)
            results_work.push reverse_work
            found = true
          else
            results_work.push(Regextest::Back::Element.any_char)
          end
        else
          results_work.push(Regextest::Back::Element.any_char)
        end
      end
    end
    if found
      @results = results_work
      break
    end
  end
  # pp @results
  # puts "found = #{found}"
  found
end

#merge_not_look_behind_elems(offset, sub_results) ⇒ Object

Merge each elements of not look behinds



240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# File 'lib/regextest/back/result.rb', line 240

def merge_not_look_behind_elems(offset, sub_results)
  unshift_length = sub_results.end_offset - offset
  if unshift_length > 0
    if !unshift_params(unshift_length)
      return false
    end
  end
  
  try_order = TstShuffle(sub_results.size.times.to_a)
  found = false
  # exclude, at least, one element
  try_order.each do | j |
    results_work = @results.dup

    # intersect elems
    results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
    sub_offset = (unshift_length >=0)?unshift_length:(-unshift_length)
    0.step(sub_results.end_offset-1) do | i |
      sub_elem = sub_results[i]
      
      if i < sub_offset
        if i == j
          results_work.unshift (sub_elem.reverse)
          found = true
        else
          results_work.unshift (Regextest::Back::Element.any_char)
        end
      else
        if i == j
          if(!results_work[results_offset+i].exclude(sub_elem))
            next
          else
            found = true
          end
        else
          # do nothing
        end
      end
    end
    if found
      @results = results_work
      break
    end
  end
  found
end

#narrow_downObject

narrow down candidate by anchors



309
310
311
312
# File 'lib/regextest/back/result.rb', line 309

def narrow_down
  narrow_down_by_anchors &&
  narrow_down_by_reluctant_repeat
end

#narrow_down_by_anchorsObject

narrow down candidate by anchors



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
# File 'lib/regextest/back/result.rb', line 334

def narrow_down_by_anchors
  @positional_anchors.each do | cmd, offsets |
    case cmd
    when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
      return false if offsets.max > 0
    when :CMD_ANC_STRING_END
      return false if offsets.min < (@results.size() - 1)
    when :CMD_ANC_STRING_END2
      min_offset = offsets.min
      if min_offset < (@results.size() -1)
        return false
      elsif min_offset == (@results.size() -1)
        if @results[min_offset].new_line?
          @results[min_offset].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_LINE_BEGIN
      offsets.each do | offset |
        if offset == 0
            # ok
        elsif @results[offset-1].new_line?
          @results[offset-1].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_LINE_END
      offsets.each do | offset |
        if offset == @results.size
            # ok
        elsif @results[offset].new_line?
          @results[offset].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_WORD_BOUND
      offsets.uniq.size.times do | i |
        offset = offsets[i]
        # puts "before offset:#{offset} #{@results}"
        if offset > 0 && offset < @results.size
          if !bound_process(@results[offset-1], @results[offset])
            return false
          end
        elsif @results.size == 0
          @results.push (Regextest::Back::Element.any_char)
          @results.push (Regextest::Back::Element.any_char)
          bound_process(@results[0], @results[1])
        elsif offset == @results.size
          if !is_end_anchor?
            @results.push (Regextest::Back::Element.any_char)
            if !bound_process(@results[-2], @results[-1])
              return false
            end
          elsif !@results[-1].word_elements?
            return false
          end
        elsif offset == 0
          if !is_begin_anchor?
            if !unshift_params(1)
              return false
            end
            @results.unshift (Regextest::Back::Element.any_char)
            if !bound_process(@results[0], @results[1])
              return false
            end
          elsif !@results[0].word_elements?
            return false
          end
        end
      end
    when :CMD_ANC_WORD_UNBOUND
      offsets.uniq.size.times do | i |
        offset = offsets[i]
        # puts "before offset:#{offset} #{@results}"
        if offset > 0 && offset < @results.size
          if !unbound_process(@results[offset-1], @results[offset])
            return false
          end
        elsif @results.size == 0
          @results.push (Regextest::Back::Element.any_char)
          @results.push (Regextest::Back::Element.any_char)
          unbound_process(@results[0], @results[1])
        elsif offset == @results.size
          if !is_end_anchor?
            @results.push (Regextest::Back::Element.any_char)
            if !unbound_process(@results[-2], @results[-1])
              return false
            end
          elsif @results[-1].word_elements?
            return false
          end
        elsif offset == 0
          if !is_begin_anchor?
            if !unshift_params(1)
              return false
            end
            @results.unshift (Regextest::Back::Element.any_char)
            if !unbound_process(@results[0], @results[1])
              return false
            end
          elsif @results[0].word_elements?
            return false
          end
        end
      end
    when :CMD_ANC_LOOK_BEHIND2
      @start_offset = offsets.max
    else
      raise "command (#{cmd}) not implemented"
    end
  end
  return true
end

#narrow_down_by_reluctant_repeatObject

narrow down candidate by reluctant repeat



315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/regextest/back/result.rb', line 315

def narrow_down_by_reluctant_repeat
  @reluctant_repeat.each do | repeat_id, offsets |
    repeat_part  = @results[offsets[0]...offsets[1]]
    succeed_part = @results[offsets[1]..-1]
    # puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
    
    if succeed_part.size > 0
      # reluctant repeat is equivalent to not_look_ahead!
      (offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
        if !merge_not_look_ahead_elems(offset, succeed_part)
          return false
        end
      end
    end
  end
  return true
end

#push_body(elem) ⇒ Object

Adds elem



31
32
33
34
# File 'lib/regextest/back/result.rb', line 31

def push_body(elem)
  @results.push elem
  @end_offset += 1
end

#sizeObject

size of results



42
43
44
# File 'lib/regextest/back/result.rb', line 42

def size
  @results.size
end

#unbound_process(elem1, elem2) ⇒ Object

unbound process (B)



477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/regextest/back/result.rb', line 477

def unbound_process(elem1, elem2)
  if    elem1.word_elements?
    elem2.set_word_elements
  elsif elem1.non_word_elements?
    elem2.set_non_word_elements
  elsif elem2.word_elements?
    elem1.set_word_elements
  elsif elem2.non_word_elements?
    elem1.set_non_word_elements
  else
    if(TstRand(2)==0)
      elem1.set_word_elements
      elem2.set_word_elements
    else
      elem1.set_non_word_elements
      elem2.set_non_word_elements
    end
  end
  if elem1.empty? || elem2.empty?
    return false
  end
  true
end

#unshift_params(unshift_length) ⇒ Object

unshift parameters



296
297
298
299
300
301
302
303
304
305
306
# File 'lib/regextest/back/result.rb', line 296

def unshift_params(unshift_length)
  @look_aheads.each{|elem| elem[:offset] += unshift_length}
  @look_behinds.each{|elem| elem[:offset] += unshift_length}
  @positional_anchors.each do | cmd, offsets |
    return false if(cmd == :CMD_ANC_STRING_BEGIN)
    offsets.map!{| offset | offset += unshift_length}
  end
  @start_offset += unshift_length
  @end_offset += unshift_length
  true
end