Class: XMLRowFinder
- Inherits:
-
Object
- Object
- XMLRowFinder
- Defined in:
- lib/xml_row_finder.rb
Instance Attribute Summary collapse
-
#to_a ⇒ Object
readonly
Returns the value of attribute to_a.
Instance Method Summary collapse
-
#body ⇒ Object
returns the container element for all rows.
-
#body_xpath ⇒ Object
returns the xpath pointing to the container element for all rows.
-
#initialize(raws, debug: false) ⇒ XMLRowFinder
constructor
A new instance of XMLRowFinder.
-
#rows ⇒ Object
returns rows object returned: An array of Nokogiri XML Element object.
-
#to_xpath ⇒ Object
(also: #rows_xpath)
returns the xpath pointing to the rows.
Constructor Details
#initialize(raws, debug: false) ⇒ XMLRowFinder
Returns a new instance of XMLRowFinder.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/xml_row_finder.rb', line 12 def initialize(raws, debug: false) @debug = debug doc = if raws =~ /^http/ then nki = Nokorexi.new(url=raws) do |doc1| doc1.xpath('//*[@onclick]').each do |e| e.attributes['onclick'].value = '' end doc1.xpath('//*[@onmousedown]').each do |e| e.attributes['onmousedown'].value = '' end end nki.to_doc else Rexle.new(raws) end @doc = Rexle.new(doc.xml) a = [] doc.root.each_recursive do |e| e.attributes.delete a << e.backtrack.to_xpath end @to_a = a2 = a.map {|e| [a.count(e), e] }.uniq xpath = a2.max_by(&:first).last a3 = xpath.split('/') a4 = [xpath] p1 = [] until (a3.length < 1) do p1 << a3.pop; a4 << a3.join('/') + "[%s]" % p1.reverse.join('/') end # using Nokogiri since Rexle has a bug with xpath predicates # @doc2 = Nokogiri::XML(doc.root.xml) a5 = a4[0..-2].map do |xpath2| [@doc2.xpath(xpath2).length, xpath2] end @xpath = a5.reverse.detect {|num, xpath2| num > 1}.last last_row = @doc2.xpath(@xpath).last # find the container element xpath = @xpath[/^[^\[]+/] axpath = xpath.split('/') e = doc.element xpath until (e.xml.include? last_row) do axpath.pop e = doc.element axpath.join('/') end @cont_xpath = axpath.join('/') end |
Instance Attribute Details
#to_a ⇒ Object (readonly)
Returns the value of attribute to_a.
10 11 12 |
# File 'lib/xml_row_finder.rb', line 10 def to_a @to_a end |
Instance Method Details
#body ⇒ Object
returns the container element for all rows
83 84 85 |
# File 'lib/xml_row_finder.rb', line 83 def body() Rexle.new(@doc.element(@cont_xpath).xml) end |
#body_xpath ⇒ Object
returns the xpath pointing to the container element for all rows
89 90 91 |
# File 'lib/xml_row_finder.rb', line 89 def body_xpath() @cont_xpath end |
#rows ⇒ Object
returns rows object returned: An array of Nokogiri XML Element object
96 97 98 |
# File 'lib/xml_row_finder.rb', line 96 def rows() @doc2.xpath @xpath end |
#to_xpath ⇒ Object Also known as: rows_xpath
returns the xpath pointing to the rows
102 103 104 |
# File 'lib/xml_row_finder.rb', line 102 def to_xpath() @xpath end |