Module: RemoteTable::ProcessedByNokogiri
- Defined in:
- lib/remote_table/processed_by_nokogiri.rb
Overview
Mixed in to process XML and XHTML.
Constant Summary collapse
- SOFT_HYPHEN =
'­'
Instance Method Summary collapse
-
#_each ⇒ Object
Yield each row using Nokogiri.
- #preprocess! ⇒ Object
Instance Method Details
#_each ⇒ Object
Yield each row using Nokogiri.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/remote_table/processed_by_nokogiri.rb', line 12 def _each require 'nokogiri' require 'cgi' # save this to a local var because we modify it in the loop current_headers = headers unless row_css or row_xpath raise ::ArgumentError, "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" end xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, RemoteTable::EXTERNAL_ENCODING) (row_css ? xml.css(row_css) : xml.xpath(row_xpath)).each do |row| some_value_present = false values = if column_css row.css column_css elsif column_xpath row.xpath column_xpath else [row] end.map do |cell| memo = cell.content.dup memo = assume_utf8 memo memo = RemoteTable.normalize_whitespace memo if not some_value_present and not keep_blank_rows and memo.present? some_value_present = true end memo end if current_headers == :first_row current_headers = values.select(&:present?) next end if keep_blank_rows or some_value_present if not headers yield values else yield zip(current_headers, values) end end end ensure local_copy.cleanup end |
#preprocess! ⇒ Object
6 7 8 9 |
# File 'lib/remote_table/processed_by_nokogiri.rb', line 6 def preprocess! delete_harmful! transliterate_whole_file_to_utf8! end |