Class: RemoteTable::Format::HTML

Inherits:
RemoteTable::Format show all
Includes:
Textual
Defined in:
lib/remote_table/format/html.rb

Constant Summary

Constants included from Textual

Textual::USELESS_CHARACTERS

Instance Attribute Summary

Attributes inherited from RemoteTable::Format

#t

Instance Method Summary collapse

Methods included from Textual

#convert_file_to_utf8!, #crop_rows!, #cut_columns!, #remove_useless_characters!, #skip_rows!

Methods inherited from RemoteTable::Format

#initialize

Constructor Details

This class inherits a constructor from RemoteTable::Format

Instance Method Details

#each(&blk) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/remote_table/format/html.rb', line 7

def each(&blk)
  convert_file_to_utf8!
  remove_useless_characters!
  html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
  ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
    values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
    if html_headers.nil?
      html_headers = values
      next
    end
    hash = zip html_headers, values
    yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
  end
ensure
  t.local_file.delete
end