Class: Reckon::App
- Inherits:
-
Object
- Object
- Reckon::App
- Defined in:
- lib/reckon/app.rb
Constant Summary collapse
- VERSION =
"Reckon 0.1"
Instance Attribute Summary collapse
-
#accounts ⇒ Object
Returns the value of attribute accounts.
-
#csv_data ⇒ Object
Returns the value of attribute csv_data.
-
#date_column_index ⇒ Object
Returns the value of attribute date_column_index.
-
#description_column_indices ⇒ Object
Returns the value of attribute description_column_indices.
-
#money_column_indices ⇒ Object
Returns the value of attribute money_column_indices.
-
#options ⇒ Object
Returns the value of attribute options.
-
#seen ⇒ Object
Returns the value of attribute seen.
-
#tokens ⇒ Object
Returns the value of attribute tokens.
Class Method Summary collapse
Instance Method Summary collapse
- #already_seen?(row) ⇒ Boolean
- #columns ⇒ Object
- #date_for(index) ⇒ Object
- #description_for(index) ⇒ Object
- #detect_columns ⇒ Object
- #each_row_backwards ⇒ Object
- #evaluate_columns(cols) ⇒ Object
- #filter_csv ⇒ Object
- #finish ⇒ Object
- #guess_account(row) ⇒ Object
-
#initialize(options = {}) ⇒ App
constructor
A new instance of App.
- #learn! ⇒ Object
- #learn_about_account(account, data) ⇒ Object
- #learn_from(ledger) ⇒ Object
- #ledger_format(row, line1, line2) ⇒ Object
- #merge_columns(a, b) ⇒ Object
- #money_for(index) ⇒ Object
- #output(ledger_line) ⇒ Object
- #output_table ⇒ Object
- #parse ⇒ Object
- #pretty_date_for(index) ⇒ Object
- #pretty_money(amount, negate = false) ⇒ Object
- #pretty_money_for(index, negate = false) ⇒ Object
- #settings ⇒ Object
- #tokenize(str) ⇒ Object
- #walk_backwards ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ App
Returns a new instance of App.
8 9 10 11 12 13 14 15 16 17 |
# File 'lib/reckon/app.rb', line 8 def initialize( = {}) self. = self.tokens = {} self.accounts = {} self.seen = {} learn! parse filter_csv detect_columns end |
Instance Attribute Details
#accounts ⇒ Object
Returns the value of attribute accounts.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def accounts @accounts end |
#csv_data ⇒ Object
Returns the value of attribute csv_data.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def csv_data @csv_data end |
#date_column_index ⇒ Object
Returns the value of attribute date_column_index.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def date_column_index @date_column_index end |
#description_column_indices ⇒ Object
Returns the value of attribute description_column_indices.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def description_column_indices @description_column_indices end |
#money_column_indices ⇒ Object
Returns the value of attribute money_column_indices.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def money_column_indices @money_column_indices end |
#options ⇒ Object
Returns the value of attribute options.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def @options end |
#seen ⇒ Object
Returns the value of attribute seen.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def seen @seen end |
#tokens ⇒ Object
Returns the value of attribute tokens.
6 7 8 |
# File 'lib/reckon/app.rb', line 6 def tokens @tokens end |
Class Method Details
.parse_opts(args = ARGV) ⇒ Object
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 |
# File 'lib/reckon/app.rb', line 342 def self.parse_opts(args = ARGV) = { :output_file => STDOUT } parser = OptionParser.new do |opts| opts. = "Usage: Reckon.rb [options]" opts.separator "" opts.on("-f", "--file FILE", "The CSV file to parse") do |file| [:file] = file end opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| [:verbose] = v end opts.on("-p", "--print-table", "Print out the parsed CSV in table form") do |p| [:print_table] = p end opts.on("-o", "--output-file FILE", "The ledger file to append to") do |o| [:output_file] = File.open(o, 'a') end opts.on("-l", "--learn-from FILE", "An existing ledger file to learn accounts from") do |l| [:existing_ledger_file] = l end opts.on("", "--ignore-columns 1,2,5", "Columns to ignore in the CSV file - the first column is column 1") do |ignore| [:ignore_columns] = ignore.split(",").map { |i| i.to_i } end opts.on("", "--contains-header", "The first row of the CSV is a header and should be skipped") do |contains_header| [:contains_header] = contains_header end opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator| [:csv_separator] = csv_separator end opts.on("", "--comma-separates-cents", "Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)") do |c| [:comma_separates_cents] = c end opts.on_tail("-h", "--help", "Show this message") do puts opts exit end opts.on_tail("--version", "Show version") do puts VERSION exit end opts.parse!(args) end unless [:file] [:file] = ask("What CSV file should I parse? ") unless [:file].length > 0 puts "\nYou must provide a CSV file to parse.\n" puts parser exit end end unless [:bank_account] [:bank_account] = ask("What is the account name of this bank account in Ledger? ") do |q| q.validate = /^.{2,}$/ q.default = "Assets:Bank:Checking" end end end |
.settings ⇒ Object
418 419 420 |
# File 'lib/reckon/app.rb', line 418 def self.settings @settings end |
Instance Method Details
#already_seen?(row) ⇒ Boolean
40 41 42 |
# File 'lib/reckon/app.rb', line 40 def already_seen?(row) seen[row[:pretty_date]] && seen[row[:pretty_date]][row[:pretty_money]] end |
#columns ⇒ Object
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 |
# File 'lib/reckon/app.rb', line 318 def columns @columns ||= begin last_row_length = nil csv_data.inject([]) do |memo, row| # fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length unless row.all? { |i| i.nil? || i.length == 0 } row.each_with_index do |entry, index| memo[index] ||= [] memo[index] << (entry || '').strip end last_row_length = row.length end memo end end end |
#date_for(index) ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/reckon/app.rb', line 173 def date_for(index) value = columns[date_column_index][index] value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format begin guess = Chronic.parse(value, :context => :past) if guess.to_i < 953236800 && value =~ /\// guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past) end guess rescue puts "I'm having trouble parsing #{value}, which I thought was a date. Please report this so that we" puts "can make this parser better!" end end |
#description_for(index) ⇒ Object
194 195 196 |
# File 'lib/reckon/app.rb', line 194 def description_for(index) description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip end |
#detect_columns ⇒ Object
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
# File 'lib/reckon/app.rb', line 274 def detect_columns results, found_likely_money_column = evaluate_columns(columns) self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ] if !found_likely_money_column found_likely_double_money_columns = false 0.upto(columns.length - 2) do |i| _, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1)) if found_likely_double_money_columns self.money_column_indices = [ i, i+1 ] unless settings[:testing] puts "It looks like this CSV has two seperate columns for money, one of which shows positive" puts "changes and one of which shows negative changes. If this is true, great. Otherwise," puts "please report this issue to us so we can take a look!\n" end break end end if !found_likely_double_money_columns && !settings[:testing] puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}." end end results.reject! {|i| money_column_indices.include?(i[:index]) } self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index] results.reject! {|i| i[:index] == date_column_index } self.description_column_indices = results.map { |i| i[:index] } end |
#each_row_backwards ⇒ Object
306 307 308 309 310 311 312 313 314 315 316 |
# File 'lib/reckon/app.rb', line 306 def each_row_backwards rows = [] (0...columns.first.length).to_a.each do |index| rows << { :date => date_for(index), :pretty_date => pretty_date_for(index), :pretty_money => pretty_money_for(index), :pretty_money_negated => pretty_money_for(index, :negate), :money => money_for(index), :description => description_for(index) } end rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row| yield row end end |
#evaluate_columns(cols) ⇒ Object
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/reckon/app.rb', line 208 def evaluate_columns(cols) results = [] found_likely_money_column = false cols.each_with_index do |column, index| money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0 last = nil column.reverse.each_with_index do |entry, row_from_bottom| row = csv_data[csv_data.length - 1 - row_from_bottom] entry = entry.strip money_score += 20 if entry[/^[\-\+\(]{0,2}\$/] money_score += 20 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/] money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7 money_score -= entry.length if entry.length > 8 money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/ possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/ possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/ date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/ date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3 date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/ date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i # Try to determine if this is a balance column entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f if last && entry_as_num != 0 && last != 0 row.each do |row_entry| row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f if row_entry != 0 && last + row_entry == entry_as_num money_score -= 10 break end end end last = entry_as_num end if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0) money_score += 10 * column.length found_likely_money_column = true end results << { :index => index, :money_score => money_score, :date_score => date_score } end return [results, found_likely_money_column] end |
#filter_csv ⇒ Object
19 20 21 22 23 24 25 26 27 |
# File 'lib/reckon/app.rb', line 19 def filter_csv if [:ignore_columns] new_columns = [] columns.each_with_index do |column, index| new_columns << column unless [:ignore_columns].include?(index + 1) end @columns = new_columns end end |
#finish ⇒ Object
110 111 112 113 114 |
# File 'lib/reckon/app.rb', line 110 def finish [:output_file].close unless [:output_file] == STDOUT puts "Exiting." exit end |
#guess_account(row) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/reckon/app.rb', line 121 def guess_account(row) query_tokens = tokenize(row[:description]) search_vector = [] account_vectors = {} query_tokens.each do |token| idf = Math.log((accounts.keys.length + 1) / ((tokens[token] || {}).keys.length.to_f + 1)) tf = 1.0 / query_tokens.length.to_f search_vector << tf*idf accounts.each do |account, total_terms| tf = (tokens[token] && tokens[token][account]) ? tokens[token][account] / total_terms.to_f : 0 account_vectors[account] ||= [] account_vectors[account] << tf*idf end end # Should I normalize the vectors? Probably unnecessary due to tf-idf and short documents. account_vectors = account_vectors.to_a.map do |account, account_vector| { :cosine => (0...account_vector.length).to_a.inject(0) { |m, i| m + search_vector[i] * account_vector[i] }, :account => account } end account_vectors.sort! {|a, b| b[:cosine] <=> a[:cosine] } account_vectors.first && account_vectors.first[:account] end |
#learn! ⇒ Object
44 45 46 47 48 49 50 |
# File 'lib/reckon/app.rb', line 44 def learn! if [:existing_ledger_file] fail "#{[:existing_ledger_file]} doesn't exist!" unless File.exists?([:existing_ledger_file]) ledger_data = File.read([:existing_ledger_file]) learn_from(ledger_data) end end |
#learn_about_account(account, data) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/reckon/app.rb', line 52 def learn_about_account(account, data) accounts[account] ||= 0 tokenize(data).each do |token| tokens[token] ||= {} tokens[token][account] ||= 0 tokens[token][account] += 1 accounts[account] += 1 end end |
#learn_from(ledger) ⇒ Object
29 30 31 32 33 34 35 36 37 38 |
# File 'lib/reckon/app.rb', line 29 def learn_from(ledger) LedgerParser.new(ledger).entries.each do |entry| entry[:accounts].each do |account| learn_about_account( account[:name], [entry[:desc], account[:amount]].join(" ") ) unless account[:name] == [:bank_account] seen[entry[:date]] ||= {} seen[entry[:date]][pretty_money(account[:amount])] = true end end end |
#ledger_format(row, line1, line2) ⇒ Object
150 151 152 153 154 155 |
# File 'lib/reckon/app.rb', line 150 def ledger_format(row, line1, line2) out = "#{row[:pretty_date]}\t#{row[:description]}\n" out += "\t#{line1.first}\t\t\t\t\t#{line1.last}\n" out += "\t#{line2.first}\t\t\t\t\t#{line2.last}\n\n" out end |
#merge_columns(a, b) ⇒ Object
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# File 'lib/reckon/app.rb', line 256 def merge_columns(a, b) output_columns = [] columns.each_with_index do |column, index| if index == a new_column = [] column.each_with_index do |row, row_index| new_column << row + " " + (columns[b][row_index] || '') end output_columns << new_column elsif index == b # skip else output_columns << column end end output_columns end |
#money_for(index) ⇒ Object
157 158 159 160 161 162 163 |
# File 'lib/reckon/app.rb', line 157 def money_for(index) value = money_column_indices.inject("") { |m, i| m + columns[i][index] } value = value.gsub(/\./, '').gsub(/,/, '.') if [:comma_separates_cents] cleaned_value = value.gsub(/[^\d\.]/, '').to_f cleaned_value *= -1 if value =~ /[\(\-]/ cleaned_value end |
#output(ledger_line) ⇒ Object
116 117 118 119 |
# File 'lib/reckon/app.rb', line 116 def output(ledger_line) [:output_file].puts ledger_line [:output_file].flush end |
#output_table ⇒ Object
198 199 200 201 202 203 204 205 206 |
# File 'lib/reckon/app.rb', line 198 def output_table output = Terminal::Table.new do |t| t.headings = 'Date', 'Amount', 'Description' each_row_backwards do |row| t << [ row[:pretty_date], row[:pretty_money], row[:description] ] end end puts output end |
#parse ⇒ Object
335 336 337 338 339 340 |
# File 'lib/reckon/app.rb', line 335 def parse data = [:string] || File.read([:file]) @csv_data = (RUBY_VERSION =~ /^1\.9/ ? CSV : FasterCSV).parse(data.strip, :col_sep => [:csv_separator] || ',') csv_data.shift if [:contains_header] csv_data end |
#pretty_date_for(index) ⇒ Object
190 191 192 |
# File 'lib/reckon/app.rb', line 190 def pretty_date_for(index) date_for(index).strftime("%Y/%m/%d") end |
#pretty_money(amount, negate = false) ⇒ Object
169 170 171 |
# File 'lib/reckon/app.rb', line 169 def pretty_money(amount, negate = false) (amount >= 0 ? " " : "") + sprintf("%0.2f", amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, '\1$') end |
#pretty_money_for(index, negate = false) ⇒ Object
165 166 167 |
# File 'lib/reckon/app.rb', line 165 def pretty_money_for(index, negate = false) pretty_money(money_for(index), negate) end |
#settings ⇒ Object
422 423 424 |
# File 'lib/reckon/app.rb', line 422 def settings self.class.settings end |
#tokenize(str) ⇒ Object
62 63 64 |
# File 'lib/reckon/app.rb', line 62 def tokenize(str) str.downcase.split(/[\s\-]/) end |
#walk_backwards ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/reckon/app.rb', line 66 def walk_backwards seen_anything_new = false each_row_backwards do |row| puts Terminal::Table.new(:rows => [ [ row[:pretty_date], row[:pretty_money], row[:description] ] ]) if already_seen?(row) puts "NOTE: This row is very similar to a previous one!" if !seen_anything_new puts "Skipping..." next end else seen_anything_new = true end ledger = if row[:money] > 0 out_of_account = ask("Which account provided this income? ([account]/[q]uit/[s]kip) ") { |q| q.default = guess_account(row) } finish if out_of_account == "quit" || out_of_account == "q" if out_of_account == "skip" || out_of_account == "s" puts "Skipping" next end ledger_format( row, [[:bank_account], row[:pretty_money]], [out_of_account, row[:pretty_money_negated]] ) else into_account = ask("To which account did this money go? ([account]/[q]uit/[s]kip) ") { |q| q.default = guess_account(row) } finish if into_account == "quit" || into_account == 'q' if into_account == "skip" || into_account == 's' puts "Skipping" next end ledger_format( row, [into_account, row[:pretty_money_negated]], [[:bank_account], row[:pretty_money]] ) end learn_from(ledger) output(ledger) end end |