Class: Sinew::CSV

Inherits:
Object
  • Object
show all
Defined in:
lib/sinew/csv.rb

Constant Summary collapse

ASCII_ONLY =
begin
  chars = (33..126).map(&:chr) - ['&']
  /\A[#{Regexp.escape(chars.join)}\s]+\Z/
end.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ CSV

Returns a new instance of CSV.



8
9
10
11
12
# File 'lib/sinew/csv.rb', line 8

def initialize(path)
  @count = 0
  @csv = nil
  @path = path
end

Instance Attribute Details

#columnsObject (readonly)

Returns the value of attribute columns.



6
7
8
# File 'lib/sinew/csv.rb', line 6

def columns
  @columns
end

#countObject (readonly)

Returns the value of attribute count.



6
7
8
# File 'lib/sinew/csv.rb', line 6

def count
  @count
end

#csvObject (readonly)

Returns the value of attribute csv.



6
7
8
# File 'lib/sinew/csv.rb', line 6

def csv
  @csv
end

#pathObject (readonly)

Returns the value of attribute path.



6
7
8
# File 'lib/sinew/csv.rb', line 6

def path
  @path
end

#tallyObject (readonly)

Returns the value of attribute tally.



6
7
8
# File 'lib/sinew/csv.rb', line 6

def tally
  @tally
end

Instance Method Details

#emit(row) ⇒ Object

append a row



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/sinew/csv.rb', line 31

def emit(row)
  # convert row to cols, and construct print (our return value)
  print = {}
  row = columns.map do
    value = normalize(row[_1])
    if value
      print[_1] = value
      tally[_1] += 1
    end
    value
  end
  @count += 1

  # emit
  csv << row
  csv.flush

  # return in case someone wants to pretty print this
  print
end

#normalize(s) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/sinew/csv.rb', line 57

def normalize(s)
  # nokogiri/array/misc => string
  s = if s.respond_to?(:inner_html)
    s.inner_html
  elsif s.is_a?(Array)
    s.join('|')
  else
    s.to_s
  end
  return if s.empty?

  # simple attempt to strip tags. Note that we replace tags with spaces
  s = s.gsub(/<[^>]+>/, ' ')

  if s !~ ASCII_ONLY
    # Converts MS Word 'smart punctuation' to ASCII
    s = Sterile.plain_format(s)

    # &aacute; &amp; etc.
    s = Sterile.decode_entities(s)

    # "šţɽĩɳģ" => "string"
    s = Sterile.transliterate(s)
  end

  # squish
  s = s.strip.gsub(/\s+/, ' ')
  return if s.empty?

  s
end

#start(columns) ⇒ Object

start writing the csv



15
16
17
18
19
20
21
22
23
# File 'lib/sinew/csv.rb', line 15

def start(columns)
  raise 'started twice' if started?

  @columns = columns
  @tally = columns.map { [_1, 0] }.to_h
  @csv = ::CSV.open(path, 'wb').tap do
    _1 << columns
  end
end

#started?Boolean

has this csv been started?

Returns:

  • (Boolean)


26
27
28
# File 'lib/sinew/csv.rb', line 26

def started?
  @csv != nil
end