Class: CSVKit
- Inherits:
-
Object
- Object
- CSVKit
- Defined in:
- lib/csvkit/csvkit.rb,
lib/csvkit/version.rb,
lib/csvkit/middleware.rb
Defined Under Namespace
Classes: Middleware
Constant Summary collapse
- VERSION =
"0.1.4"
Instance Attribute Summary collapse
-
#content ⇒ Object
Returns the value of attribute content.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Instance Method Summary collapse
- #clean_cell_string(cell_string) ⇒ Object
- #header_with_colspan?(cell) ⇒ Boolean
-
#initialize(body_content, options = {}) ⇒ CSVKit
constructor
A new instance of CSVKit.
- #is_numeric?(string) ⇒ Boolean
- #to_csv ⇒ Object
Constructor Details
#initialize(body_content, options = {}) ⇒ CSVKit
Returns a new instance of CSVKit.
9 10 11 12 13 14 |
# File 'lib/csvkit/csvkit.rb', line 9 def initialize(body_content, = {}) = {:show_link => false}.merge @content = body_content end |
Instance Attribute Details
#content ⇒ Object
Returns the value of attribute content.
5 6 7 |
# File 'lib/csvkit/csvkit.rb', line 5 def content @content end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
7 8 9 |
# File 'lib/csvkit/csvkit.rb', line 7 def end |
Instance Method Details
#clean_cell_string(cell_string) ⇒ Object
46 47 48 49 50 51 52 53 |
# File 'lib/csvkit/csvkit.rb', line 46 def clean_cell_string(cell_string) cell_string = cell_string.gsub(/[[:space:]]/, ' ') cell_string = cell_string.gsub("\n", ' ') cell_string = cell_string.gsub('"', '\"') cell_string = cell_string.gsub(/(\s){2,}/m, '\1') cell_string = cell_string.gsub(/[\,\$]/, '') is_numeric?(cell_string) ? cell_string.to_b : cell_string.strip end |
#header_with_colspan?(cell) ⇒ Boolean
55 56 57 |
# File 'lib/csvkit/csvkit.rb', line 55 def header_with_colspan?(cell) 'th' == cell.name && cell.key?('colspan') end |
#is_numeric?(string) ⇒ Boolean
59 60 61 62 |
# File 'lib/csvkit/csvkit.rb', line 59 def is_numeric?(string) return true if self =~ /^\d+$/ true if Float(self) rescue false end |
#to_csv ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/csvkit/csvkit.rb', line 16 def to_csv doc = Nokogiri::HTML(@content) tsv_str = CSV.generate(col_sep: "\t", headers: :first_row, encoding: 'utf-8') do |tsv| doc.xpath('//table//tr').each do |row| tsv_row = [] row.xpath('td | th').each do |cell| repeat = header_with_colspan?(cell) ? cell['colspan'].to_i : 1 repeat.times do # if a link show href instead of text. if (a_link = cell.search('a').first) && [:show_link] text = a_link['href'] else text = cell.text end tsv_row << clean_cell_string(text) end end tsv << tsv_row end end raise "command failed" if tsv_str.to_s.strip.empty? write_content = "\xEF\xBB\xBF".encode!(Encoding::UTF_16LE, Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '') write_content += tsv_str.encode!(Encoding::UTF_16LE, Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '') return write_content end |