Class: Sinew::CSV
- Inherits:
-
Object
- Object
- Sinew::CSV
- Defined in:
- lib/sinew/csv.rb
Constant Summary collapse
- ASCII_ONLY =
begin chars = (33..126).map(&:chr) - ['&'] /\A[#{Regexp.escape(chars.join)}\s]+\Z/ end.freeze
Instance Attribute Summary collapse
-
#columns ⇒ Object
readonly
Returns the value of attribute columns.
-
#count ⇒ Object
readonly
Returns the value of attribute count.
-
#csv ⇒ Object
readonly
Returns the value of attribute csv.
-
#path ⇒ Object
readonly
Returns the value of attribute path.
-
#tally ⇒ Object
readonly
Returns the value of attribute tally.
Instance Method Summary collapse
-
#emit(row) ⇒ Object
append a row.
-
#initialize(path) ⇒ CSV
constructor
A new instance of CSV.
- #normalize(s) ⇒ Object
-
#start(columns) ⇒ Object
start writing the csv.
-
#started? ⇒ Boolean
has this csv been started?.
Constructor Details
#initialize(path) ⇒ CSV
Returns a new instance of CSV.
8 9 10 11 12 |
# File 'lib/sinew/csv.rb', line 8 def initialize(path) @count = 0 @csv = nil @path = path end |
Instance Attribute Details
#columns ⇒ Object (readonly)
Returns the value of attribute columns.
6 7 8 |
# File 'lib/sinew/csv.rb', line 6 def columns @columns end |
#count ⇒ Object (readonly)
Returns the value of attribute count.
6 7 8 |
# File 'lib/sinew/csv.rb', line 6 def count @count end |
#csv ⇒ Object (readonly)
Returns the value of attribute csv.
6 7 8 |
# File 'lib/sinew/csv.rb', line 6 def csv @csv end |
#path ⇒ Object (readonly)
Returns the value of attribute path.
6 7 8 |
# File 'lib/sinew/csv.rb', line 6 def path @path end |
#tally ⇒ Object (readonly)
Returns the value of attribute tally.
6 7 8 |
# File 'lib/sinew/csv.rb', line 6 def tally @tally end |
Instance Method Details
#emit(row) ⇒ Object
append a row
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/sinew/csv.rb', line 31 def emit(row) # convert row to cols, and construct print (our return value) print = {} row = columns.map do value = normalize(row[_1]) if value print[_1] = value tally[_1] += 1 end value end @count += 1 # emit csv << row csv.flush # return in case someone wants to pretty print this print end |
#normalize(s) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/sinew/csv.rb', line 57 def normalize(s) # nokogiri/array/misc => string s = if s.respond_to?(:inner_html) s.inner_html elsif s.is_a?(Array) s.join('|') else s.to_s end return if s.empty? # simple attempt to strip tags. Note that we replace tags with spaces s = s.gsub(/<[^>]+>/, ' ') if s !~ ASCII_ONLY # Converts MS Word 'smart punctuation' to ASCII s = Sterile.plain_format(s) # á & etc. s = Sterile.decode_entities(s) # "šţɽĩɳģ" => "string" s = Sterile.transliterate(s) end # squish s = s.strip.gsub(/\s+/, ' ') return if s.empty? s end |
#start(columns) ⇒ Object
start writing the csv
15 16 17 18 19 20 21 22 23 |
# File 'lib/sinew/csv.rb', line 15 def start(columns) raise 'started twice' if started? @columns = columns @tally = columns.map { [_1, 0] }.to_h @csv = ::CSV.open(path, 'wb').tap do _1 << columns end end |
#started? ⇒ Boolean
has this csv been started?
26 27 28 |
# File 'lib/sinew/csv.rb', line 26 def started? @csv != nil end |