Module: PertinentParser
- Defined in:
- lib/pertinent_parser.rb
Class Method Summary collapse
-
.html(html) ⇒ Object
Better write our own traversal function so that we can screw with the HTML representation the way we like.
- .new_replace(context, target, number, replacement) ⇒ Object
- .new_wrap(context, target, number, tag) ⇒ Object
- .offset_to_r(o) ⇒ Object
- .range_from_specification(context, target, number) ⇒ Object
- .rule(range, transform) ⇒ Object
- .text(s) ⇒ Object
- .wrap_(range, tag) ⇒ Object
Class Method Details
.html(html) ⇒ Object
Better write our own traversal function so that we can screw with the HTML representation the way we like.
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/pertinent_parser.rb', line 17 def html(html) doc = Hpricot(html) d = 0 t = text(doc.inner_text) doc.traverse_all_element do |elem| if elem.text? #puts elem.inner_text d += elem.inner_text.size else #puts elem.stag t + wrap_(d...d+elem.inner_text.size, elem.stag) #puts "#{d}..#{d+elem.inner_text.size}" end end t end |
.new_replace(context, target, number, replacement) ⇒ Object
72 73 74 75 76 |
# File 'lib/pertinent_parser.rb', line 72 def new_replace(context, target, number, replacement) range = range_from_specification(context, target, number) transform = Transform.new(:replacement, replacement) r = Rule.new(range, transform) end |
.new_wrap(context, target, number, tag) ⇒ Object
57 58 59 60 |
# File 'lib/pertinent_parser.rb', line 57 def new_wrap(context, target, number, tag) range = range_from_specification(context, target, number) wrap_(range, tag) end |
.offset_to_r(o) ⇒ Object
41 42 43 |
# File 'lib/pertinent_parser.rb', line 41 def offset_to_r(o) (o[0]..o[1]-1) end |
.range_from_specification(context, target, number) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/pertinent_parser.rb', line 45 def range_from_specification context, target, number count, position = 0, 0 stored = [] re = Regexp.new(Regexp.escape(target)) while (match = context.match(re , position)) do temp = match.offset 0 position += 1; count += 1 if temp != stored return offset_to_r(temp) if count == number stored = temp end end |
.rule(range, transform) ⇒ Object
62 63 64 |
# File 'lib/pertinent_parser.rb', line 62 def rule(range, transform) Rule.new(range, transform) end |