Module: MediaartsScraper::Page::CommonTableParser
- Included in:
- PageBase
- Defined in:
- lib/mediaarts_scraper/page/common_table_parser.rb
Constant Summary collapse
- KEY_SEPARATOR =
"/"
Instance Method Summary collapse
Instance Method Details
#parse_common_key_value_table(table) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/mediaarts_scraper/page/common_table_parser.rb', line 6 def parse_common_key_value_table(table) result = {} table.xpath("tbody/tr").each do |tr| ths = tr.xpath("th") tds = tr.xpath("td") if ths.count == tds.count keys = ths.map(&:text).map(&:strip) values = tds.each_with_index.map do |td, i| if td.xpath("p").count == 1 dls = td.xpath("div/div/dl") if dls.count > 0 dls.each do |dl| dts = dl.xpath("dt").map(&:text).map(&:strip) dds = dl.xpath("dd").map(&:text).map(&:strip) if dts.count == dds.count dts.each do |dt| dds.each do |dd| result[keys[i] + KEY_SEPARATOR + dt] = dd end end else raise ParseError end end end td.xpath("p").first.text.strip else td.text.strip end end keys.each_with_index do |key, i| result[key] = values[i] end else raise ParseError end end result end |
#parse_common_serial_rows_table(table) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/mediaarts_scraper/page/common_table_parser.rb', line 54 def parse_common_serial_rows_table(table) header = table.xpath("thead/tr/th").map(&:text).map(&:strip) table.xpath("tbody/tr").map do |tr| tds = tr.xpath("td") data = tds.map { |td| child = td.child if child child.text.strip else td.text.strip end } tr_result = Hash[*header.zip(data).flatten] link_element = tds.detect { |td| td.xpath("a").first } tr_result["href"] = link_element.xpath("a").first.attributes["href"].value if link_element tr_result end end |