Class: Reckon::App

Inherits:
Object
  • Object
show all
Defined in:
lib/reckon/app.rb

Constant Summary collapse

VERSION =
"Reckon 0.1"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ App

Returns a new instance of App.



8
9
10
11
12
13
14
15
16
17
# File 'lib/reckon/app.rb', line 8

def initialize(options = {})
  self.options = options
  self.tokens = {}
  self.accounts = {}
  self.seen = {}
  learn!
  parse
  filter_csv
  detect_columns
end

Instance Attribute Details

#accountsObject

Returns the value of attribute accounts.



6
7
8
# File 'lib/reckon/app.rb', line 6

def accounts
  @accounts
end

#csv_dataObject

Returns the value of attribute csv_data.



6
7
8
# File 'lib/reckon/app.rb', line 6

def csv_data
  @csv_data
end

#date_column_indexObject

Returns the value of attribute date_column_index.



6
7
8
# File 'lib/reckon/app.rb', line 6

def date_column_index
  @date_column_index
end

#description_column_indicesObject

Returns the value of attribute description_column_indices.



6
7
8
# File 'lib/reckon/app.rb', line 6

def description_column_indices
  @description_column_indices
end

#money_column_indicesObject

Returns the value of attribute money_column_indices.



6
7
8
# File 'lib/reckon/app.rb', line 6

def money_column_indices
  @money_column_indices
end

#optionsObject

Returns the value of attribute options.



6
7
8
# File 'lib/reckon/app.rb', line 6

def options
  @options
end

#seenObject

Returns the value of attribute seen.



6
7
8
# File 'lib/reckon/app.rb', line 6

def seen
  @seen
end

#tokensObject

Returns the value of attribute tokens.



6
7
8
# File 'lib/reckon/app.rb', line 6

def tokens
  @tokens
end

Class Method Details

.parse_opts(args = ARGV) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'lib/reckon/app.rb', line 342

def self.parse_opts(args = ARGV)
  options = { :output_file => STDOUT }
  parser = OptionParser.new do |opts|
    opts.banner = "Usage: Reckon.rb [options]"
    opts.separator ""

    opts.on("-f", "--file FILE", "The CSV file to parse") do |file|
      options[:file] = file
    end

    opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
      options[:verbose] = v
    end

    opts.on("-p", "--print-table", "Print out the parsed CSV in table form") do |p|
      options[:print_table] = p
    end

    opts.on("-o", "--output-file FILE", "The ledger file to append to") do |o|
      options[:output_file] = File.open(o, 'a')
    end

    opts.on("-l", "--learn-from FILE", "An existing ledger file to learn accounts from") do |l|
      options[:existing_ledger_file] = l
    end

    opts.on("", "--ignore-columns 1,2,5", "Columns to ignore in the CSV file - the first column is column 1") do |ignore|
      options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
    end

    opts.on("", "--contains-header", "The first row of the CSV is a header and should be skipped") do |contains_header|
      options[:contains_header] = contains_header
    end

    opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator|
      options[:csv_separator] = csv_separator
    end

    opts.on("", "--comma-separates-cents", "Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)") do |c|
      options[:comma_separates_cents] = c
    end

    opts.on_tail("-h", "--help", "Show this message") do
      puts opts
      exit
    end

    opts.on_tail("--version", "Show version") do
      puts VERSION
      exit
    end

    opts.parse!(args)
  end

  unless options[:file]
    options[:file] = ask("What CSV file should I parse? ")
    unless options[:file].length > 0
      puts "\nYou must provide a CSV file to parse.\n"
      puts parser
      exit
    end
  end

  unless options[:bank_account]
    options[:bank_account] = ask("What is the account name of this bank account in Ledger? ") do |q|
      q.validate = /^.{2,}$/
      q.default = "Assets:Bank:Checking"
    end
  end

  options
end

.settingsObject



418
419
420
# File 'lib/reckon/app.rb', line 418

def self.settings
  @settings
end

Instance Method Details

#already_seen?(row) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/reckon/app.rb', line 40

def already_seen?(row)
  seen[row[:pretty_date]] && seen[row[:pretty_date]][row[:pretty_money]]
end

#columnsObject



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/reckon/app.rb', line 318

def columns
  @columns ||= begin
    last_row_length = nil
    csv_data.inject([]) do |memo, row|
      # fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length
      unless row.all? { |i| i.nil? || i.length == 0 }
        row.each_with_index do |entry, index|
          memo[index] ||= []
          memo[index] << (entry || '').strip
        end
        last_row_length = row.length
      end
      memo
    end
  end
end

#date_for(index) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/reckon/app.rb', line 173

def date_for(index)
  value = columns[date_column_index][index]
  value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
  value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/            # german format
  value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/            # nordea format
  begin
    guess = Chronic.parse(value, :context => :past)
    if guess.to_i < 953236800 && value =~ /\//
      guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past)
    end
    guess
  rescue
    puts "I'm having trouble parsing #{value}, which I thought was a date.  Please report this so that we"
    puts "can make this parser better!"
  end
end

#description_for(index) ⇒ Object



194
195
196
# File 'lib/reckon/app.rb', line 194

def description_for(index)
  description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip
end

#detect_columnsObject



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/reckon/app.rb', line 274

def detect_columns
  results, found_likely_money_column = evaluate_columns(columns)
  self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]

  if !found_likely_money_column
    found_likely_double_money_columns = false
    0.upto(columns.length - 2) do |i|
      _, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1))

      if found_likely_double_money_columns
        self.money_column_indices = [ i, i+1 ]
        unless settings[:testing]
          puts "It looks like this CSV has two seperate columns for money, one of which shows positive"
          puts "changes and one of which shows negative changes.  If this is true, great.  Otherwise,"
          puts "please report this issue to us so we can take a look!\n"
        end
        break
      end
    end

    if !found_likely_double_money_columns && !settings[:testing]
      puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}."
    end
  end

  results.reject! {|i| money_column_indices.include?(i[:index]) }
  self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
  results.reject! {|i| i[:index] == date_column_index }

  self.description_column_indices = results.map { |i| i[:index] }
end

#each_row_backwardsObject



306
307
308
309
310
311
312
313
314
315
316
# File 'lib/reckon/app.rb', line 306

def each_row_backwards
  rows = []
  (0...columns.first.length).to_a.each do |index|
    rows << { :date => date_for(index), :pretty_date => pretty_date_for(index),
              :pretty_money => pretty_money_for(index), :pretty_money_negated => pretty_money_for(index, :negate),
              :money => money_for(index), :description => description_for(index) }
  end
  rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row|
    yield row
  end
end

#evaluate_columns(cols) ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/reckon/app.rb', line 208

def evaluate_columns(cols)
  results = []
  found_likely_money_column = false
  cols.each_with_index do |column, index|
    money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
    last = nil
    column.reverse.each_with_index do |entry, row_from_bottom|
      row = csv_data[csv_data.length - 1 - row_from_bottom]
      entry = entry.strip
      money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
      money_score += 20 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
      money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
      money_score -= entry.length if entry.length > 8
      money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
      possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
      possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
      date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
      date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
      date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
      date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
      date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/
      date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i

      # Try to determine if this is a balance column
      entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
      if last && entry_as_num != 0 && last != 0
        row.each do |row_entry|
          row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
          if row_entry != 0 && last + row_entry == entry_as_num
             money_score -= 10
             break
          end
        end
      end
      last = entry_as_num
    end

    if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0)
      money_score += 10 * column.length
      found_likely_money_column = true
    end

    results << { :index => index, :money_score => money_score, :date_score => date_score }
  end

  return [results, found_likely_money_column]
end

#filter_csvObject



19
20
21
22
23
24
25
26
27
# File 'lib/reckon/app.rb', line 19

def filter_csv
  if options[:ignore_columns]
    new_columns = []
    columns.each_with_index do |column, index|
      new_columns << column unless options[:ignore_columns].include?(index + 1)
    end
    @columns = new_columns
  end
end

#finishObject



110
111
112
113
114
# File 'lib/reckon/app.rb', line 110

def finish
  options[:output_file].close unless options[:output_file] == STDOUT
  puts "Exiting."
  exit
end

#guess_account(row) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/reckon/app.rb', line 121

def (row)
  query_tokens = tokenize(row[:description])

  search_vector = []
   = {}

  query_tokens.each do |token|
    idf = Math.log((accounts.keys.length + 1) / ((tokens[token] || {}).keys.length.to_f + 1))
    tf = 1.0 / query_tokens.length.to_f
    search_vector << tf*idf

    accounts.each do |, total_terms|
      tf = (tokens[token] && tokens[token][]) ? tokens[token][] / total_terms.to_f : 0
      [] ||= []
      [] << tf*idf
    end
  end

  # Should I normalize the vectors?  Probably unnecessary due to tf-idf and short documents.

   = .to_a.map do |, |
    { :cosine => (0....length).to_a.inject(0) { |m, i| m + search_vector[i] * [i] },
      :account =>  }
  end

  .sort! {|a, b| b[:cosine] <=> a[:cosine] }
  .first && .first[:account]
end

#learn!Object



44
45
46
47
48
49
50
# File 'lib/reckon/app.rb', line 44

def learn!
  if options[:existing_ledger_file]
    fail "#{options[:existing_ledger_file]} doesn't exist!" unless File.exists?(options[:existing_ledger_file])
    ledger_data = File.read(options[:existing_ledger_file])
    learn_from(ledger_data)
  end
end

#learn_about_account(account, data) ⇒ Object



52
53
54
55
56
57
58
59
60
# File 'lib/reckon/app.rb', line 52

def (, data)
  accounts[] ||= 0
  tokenize(data).each do |token|
    tokens[token] ||= {}
    tokens[token][] ||= 0
    tokens[token][] += 1
    accounts[] += 1
  end
end

#learn_from(ledger) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/reckon/app.rb', line 29

def learn_from(ledger)
  LedgerParser.new(ledger).entries.each do |entry|
    entry[:accounts].each do ||
      ( [:name],
                           [entry[:desc], [:amount]].join(" ") ) unless [:name] == options[:bank_account]
      seen[entry[:date]] ||= {}
      seen[entry[:date]][pretty_money([:amount])] = true
    end
  end
end

#ledger_format(row, line1, line2) ⇒ Object



150
151
152
153
154
155
# File 'lib/reckon/app.rb', line 150

def ledger_format(row, line1, line2)
  out = "#{row[:pretty_date]}\t#{row[:description]}\n"
  out += "\t#{line1.first}\t\t\t\t\t#{line1.last}\n"
  out += "\t#{line2.first}\t\t\t\t\t#{line2.last}\n\n"
  out
end

#merge_columns(a, b) ⇒ Object



256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/reckon/app.rb', line 256

def merge_columns(a, b)
  output_columns = []
  columns.each_with_index do |column, index|
    if index == a
      new_column = []
      column.each_with_index do |row, row_index|
        new_column << row + " " + (columns[b][row_index] || '')
      end
      output_columns << new_column
    elsif index == b
      # skip
    else
      output_columns << column
    end
  end
  output_columns
end

#money_for(index) ⇒ Object



157
158
159
160
161
162
163
# File 'lib/reckon/app.rb', line 157

def money_for(index)
  value = money_column_indices.inject("") { |m, i| m + columns[i][index] }
  value = value.gsub(/\./, '').gsub(/,/, '.') if options[:comma_separates_cents]
  cleaned_value = value.gsub(/[^\d\.]/, '').to_f
  cleaned_value *= -1 if value =~ /[\(\-]/
  cleaned_value
end

#output(ledger_line) ⇒ Object



116
117
118
119
# File 'lib/reckon/app.rb', line 116

def output(ledger_line)
  options[:output_file].puts ledger_line
  options[:output_file].flush
end

#output_tableObject



198
199
200
201
202
203
204
205
206
# File 'lib/reckon/app.rb', line 198

def output_table
  output = Terminal::Table.new do |t|
    t.headings = 'Date', 'Amount', 'Description'
    each_row_backwards do |row|
      t << [ row[:pretty_date], row[:pretty_money], row[:description] ]
    end
  end
  puts output
end

#parseObject



335
336
337
338
339
340
# File 'lib/reckon/app.rb', line 335

def parse
  data = options[:string] || File.read(options[:file])
  @csv_data = (RUBY_VERSION =~ /^1\.9/ ? CSV : FasterCSV).parse(data.strip, :col_sep => options[:csv_separator] || ',')
  csv_data.shift if options[:contains_header]
  csv_data
end

#pretty_date_for(index) ⇒ Object



190
191
192
# File 'lib/reckon/app.rb', line 190

def pretty_date_for(index)
  date_for(index).strftime("%Y/%m/%d")
end

#pretty_money(amount, negate = false) ⇒ Object



169
170
171
# File 'lib/reckon/app.rb', line 169

def pretty_money(amount, negate = false)
  (amount >= 0 ? " " : "") + sprintf("%0.2f", amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, '\1$')
end

#pretty_money_for(index, negate = false) ⇒ Object



165
166
167
# File 'lib/reckon/app.rb', line 165

def pretty_money_for(index, negate = false)
  pretty_money(money_for(index), negate)
end

#settingsObject



422
423
424
# File 'lib/reckon/app.rb', line 422

def settings
  self.class.settings
end

#tokenize(str) ⇒ Object



62
63
64
# File 'lib/reckon/app.rb', line 62

def tokenize(str)
  str.downcase.split(/[\s\-]/)
end

#walk_backwardsObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/reckon/app.rb', line 66

def walk_backwards
  seen_anything_new = false
  each_row_backwards do |row|
    puts Terminal::Table.new(:rows => [ [ row[:pretty_date], row[:pretty_money], row[:description] ] ])

    if already_seen?(row)
      puts "NOTE: This row is very similar to a previous one!"
      if !seen_anything_new
        puts "Skipping..."
        next
      end
    else
      seen_anything_new = true
    end

    ledger = if row[:money] > 0
       = ask("Which account provided this income? ([account]/[q]uit/[s]kip) ") { |q| q.default = (row) }
      finish if  == "quit" ||  == "q"
      if  == "skip" ||  == "s"
        puts "Skipping"
        next
      end

      ledger_format( row,
                     [options[:bank_account], row[:pretty_money]],
                     [, row[:pretty_money_negated]] )
    else
       = ask("To which account did this money go? ([account]/[q]uit/[s]kip) ") { |q| q.default = (row) }
      finish if  == "quit" ||  == 'q'
      if  == "skip" ||  == 's'
        puts "Skipping"
        next
      end

      ledger_format( row,
                     [, row[:pretty_money_negated]],
                     [options[:bank_account], row[:pretty_money]] )
    end

    learn_from(ledger)
    output(ledger)
  end
end