Module: RemoteTable::ProcessedByNokogiri
- Defined in:
- lib/remote_table/processed_by_nokogiri.rb
Overview
Mixed in to process XML and XHTML.
Constant Summary collapse
- WHITESPACE =
/\s+/
- SINGLE_SPACE =
' '
- SOFT_HYPHEN =
'­'
Instance Method Summary collapse
-
#_each ⇒ Object
Yield each row using Nokogiri.
Instance Method Details
#_each ⇒ Object
Yield each row using Nokogiri.
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/remote_table/processed_by_nokogiri.rb', line 9 def _each require 'nokogiri' require 'cgi' # save this to a local var because we modify it in the loop current_headers = headers unless row_css or row_xpath raise ::ArgumentError, "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" end delete_harmful! transliterate_whole_file_to_utf8! xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, RemoteTable::EXTERNAL_ENCODING) (row_css ? xml.css(row_css) : xml.xpath(row_xpath)).each do |row| some_value_present = false values = if column_css row.css column_css elsif column_xpath row.xpath column_xpath else [row] end.map do |cell| memo = cell.content.dup memo = assume_utf8 memo memo.gsub! WHITESPACE, SINGLE_SPACE memo.strip! if not some_value_present and not keep_blank_rows and memo.present? some_value_present = true end memo end if current_headers == :first_row current_headers = values.select(&:present?) next end if keep_blank_rows or some_value_present if not headers yield values else yield zip(current_headers, values) end end end ensure local_copy.cleanup end |