Module: RemoteTable::ProcessedByNokogiri

Defined in:
lib/remote_table/processed_by_nokogiri.rb

Overview

Mixed in to process XML and XHTML.

Constant Summary collapse

SOFT_HYPHEN =
'­'

Instance Method Summary collapse

Instance Method Details

#_eachObject

Yield each row using Nokogiri.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/remote_table/processed_by_nokogiri.rb', line 12

def _each
  require 'nokogiri'
  require 'cgi'
  
  # save this to a local var because we modify it in the loop
  current_headers = headers

  unless row_css or row_xpath
    raise ::ArgumentError, "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML"
  end
  
  xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, RemoteTable::EXTERNAL_ENCODING)
  (row_css ? xml.css(row_css) : xml.xpath(row_xpath)).each do |row|
    some_value_present = false
    values = if column_css
      row.css column_css
    elsif column_xpath
      row.xpath column_xpath
    else
      [row]
    end.map do |cell|
      memo = cell.content.dup
      memo = assume_utf8 memo
      memo = RemoteTable.normalize_whitespace memo
      if not some_value_present and not keep_blank_rows and memo.present?
        some_value_present = true
      end
      memo
    end
    if current_headers == :first_row
      current_headers = values.select(&:present?)
      next
    end
    if keep_blank_rows or some_value_present
      if not headers
        yield values
      else
        yield zip(current_headers, values)
      end
    end
  end
ensure
  local_copy.cleanup
end

#preprocess!Object



6
7
8
9
# File 'lib/remote_table/processed_by_nokogiri.rb', line 6

def preprocess!
  delete_harmful!
  transliterate_whole_file_to_utf8!
end