Class: Metrocot::CompositePattern

Inherits:
BasePattern show all
Defined in:
lib/metrocot.rb

Overview

Matches a series of patterns

Instance Attribute Summary collapse

Attributes inherited from BasePattern

#matched, #metrocot, #name, #node_scraper, #pattern_no, #pred, #source, #succ

Instance Method Summary collapse

Methods inherited from BasePattern

#default_scanner, #dump_match_map, #log, #log_match_data, #optional, parse, #priority, #with_scanned_match_data

Constructor Details

#initialize(parts = nil) ⇒ CompositePattern

Returns a new instance of CompositePattern.



736
737
738
739
740
741
742
743
744
745
746
747
748
749
# File 'lib/metrocot.rb', line 736

def initialize( parts = nil )
  super(nil)
  @parts = if parts.nil?
    []
  else
    parts.clone
  end
  succ_pred = nil
  @parts.each { |part| 
    part.pred = succ_pred
    succ_pred.succ = part unless succ_pred.nil?
    succ_pred = part
  }
end

Instance Attribute Details

#partsObject (readonly)

Returns the value of attribute parts.



734
735
736
# File 'lib/metrocot.rb', line 734

def parts
  @parts
end

Instance Method Details

#descriptionObject



876
877
878
# File 'lib/metrocot.rb', line 876

def description
  "comp ##{pattern_no}"
end

#dump(level, out) ⇒ Object



752
753
754
755
756
757
758
# File 'lib/metrocot.rb', line 752

def dump( level, out ) 
  out << "  " * level + "composite p=#{priority} [\n"
  @parts.each { |part|
    part.dump( level + 1, out )
  }
  out << "  " * level + "]\n"
end

#each_match(match_range, match_map) ⇒ Object



843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
# File 'lib/metrocot.rb', line 843

def each_match( match_range, match_map )  

  @parts.each { |part| part.matched = nil }
  
  super(match_range, match_map)

  # find the hightest priority part and divine up the children 

  parts_by_priority = @parts.sort_by { |part| 0 - part.priority }
  each_split_match( match_range, {}, parts_by_priority, 0, {} ) { |last_match_range, last_match_map|
    comp_match_range = nil
    @parts.each { |part|
      next unless part.matched
      if ! comp_match_range
        comp_match_range = part.matched
        next
      end
      comp_match_range = comp_match_range.extend(part.matched)
    }
    result = with_scanned_match_data( match_map, last_match_map ) { |match_map|
      log_match_data("comp match trying", comp_match_range, match_map)
      yield( comp_match_range, match_map )
    }
    if result
      log("comp match done, returning: #{result}")
      return result 
    else
      log("comp match not done")
    end
  }

end

#each_split_match(comp_match_range, match_map, parts_by_priority, ppx, part_matches) ⇒ Object



761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
# File 'lib/metrocot.rb', line 761

def each_split_match( comp_match_range, match_map, parts_by_priority, ppx, part_matches ) 

  pattern = nil

  while (ppx < parts_by_priority.size)
    pattern = parts_by_priority[ppx]
    break unless pattern.optional 
    break unless pattern.pred && ! pattern.pred.matched
    log("skipping optional #{pattern.description}")
    ppx += 1
  end

  if ppx >= parts_by_priority.size
    log("comp nothing left to do")
    return yield( comp_match_range, match_map ) 
  end


  match_range = comp_match_range
  log("comp matching sub-pattern #{pattern.description} within #{match_range.describe}")

  #
  # figure out which gap this pattern is supposed to fill
  #

  matched_on_right = pattern.succ

  while matched_on_right && ! matched_on_right.matched
    matched_on_right = matched_on_right.succ
  end

  if matched_on_right
    log("comp matching must be left of #{matched_on_right.description}")
    match_range = match_range.head(matched_on_right.matched.start_index, matched_on_right.matched.start_offset) 
  end

  matched_on_left = pattern.pred

  while matched_on_left && ! matched_on_left.matched
    matched_on_left = matched_on_left.pred
  end

  if matched_on_left
    log("comp matching must be right of #{matched_on_left.description}")
    match_range = match_range.tail(matched_on_left.matched.end_index, matched_on_left.matched.end_offset) 
  elsif matched_on_right
    right_node = match_range.hnodes[matched_on_right.matched.start_index]
    parent_of_right_node = right_node && right_node.parent
    parent_ix_of_right_node = parent_of_right_node && node_scraper.hnode_index[parent_of_right_node]
    if parent_ix_of_right_node && parent_ix_of_right_node >= match_range.start_index 
      match_range = match_range.tail(parent_ix_of_right_node + 1, 0)
      log("restricting left boundary to #{match_range} because would otherwise include subtree with right peer")
    end
  end

  log("comp matching sub-pattern #{pattern.description} at #{match_range.describe}")

  pattern.each_match( match_range, match_map ) { |part_match_range, match_map| 

    pattern.matched = part_match_range

    log("found sub-pattern #{pattern.description} at #{part_match_range.describe}")

    result = each_split_match( comp_match_range, match_map, parts_by_priority, ppx + 1, part_matches ) { |sub_match_range, sub_match_map|
      yield( sub_match_range, match_map )
    }

    pattern.matched = nil

    if result
      log("comp done, returning: #{result}")
      return result 
    else
      log("comp not done")
    end
  }

  return nil

end