Class: Reckon::CSVParser
- Inherits:
-
Object
- Object
- Reckon::CSVParser
- Defined in:
- lib/reckon/csv_parser.rb
Instance Attribute Summary collapse
-
#csv_data ⇒ Object
Returns the value of attribute csv_data.
-
#date_column ⇒ Object
Returns the value of attribute date_column.
-
#date_column_index ⇒ Object
Returns the value of attribute date_column_index.
-
#description_column_indices ⇒ Object
Returns the value of attribute description_column_indices.
-
#money_column ⇒ Object
Returns the value of attribute money_column.
-
#money_column_indices ⇒ Object
Returns the value of attribute money_column_indices.
-
#options ⇒ Object
Returns the value of attribute options.
Class Method Summary collapse
Instance Method Summary collapse
- #columns ⇒ Object
- #date_for(index) ⇒ Object
- #description_for(index) ⇒ Object
- #detect_columns ⇒ Object
-
#detect_sign_column ⇒ Object
Some csv files negative/positive amounts are indicated in separate account.
- #evaluate_columns(cols) ⇒ Object
- #evaluate_two_money_columns(columns, id1, id2, unmerged_results) ⇒ Object
- #filter_csv ⇒ Object
- #found_double_money_column(id1, id2) ⇒ Object
-
#initialize(options = {}) ⇒ CSVParser
constructor
A new instance of CSVParser.
- #merge_columns(a, b) ⇒ Object
- #money_for(index) ⇒ Object
- #parse ⇒ Object
- #pretty_date_for(index) ⇒ Object
- #pretty_money(amount, negate = false) ⇒ Object
- #pretty_money_for(index, negate = false) ⇒ Object
- #settings ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ CSVParser
Returns a new instance of CSVParser.
8 9 10 11 12 13 14 |
# File 'lib/reckon/csv_parser.rb', line 8 def initialize( = {}) self. = self.[:currency] ||= '$' parse filter_csv detect_columns end |
Instance Attribute Details
#csv_data ⇒ Object
Returns the value of attribute csv_data.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def csv_data @csv_data end |
#date_column ⇒ Object
Returns the value of attribute date_column.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def date_column @date_column end |
#date_column_index ⇒ Object
Returns the value of attribute date_column_index.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def date_column_index @date_column_index end |
#description_column_indices ⇒ Object
Returns the value of attribute description_column_indices.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def description_column_indices @description_column_indices end |
#money_column ⇒ Object
Returns the value of attribute money_column.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def money_column @money_column end |
#money_column_indices ⇒ Object
Returns the value of attribute money_column_indices.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def money_column_indices @money_column_indices end |
#options ⇒ Object
Returns the value of attribute options.
6 7 8 |
# File 'lib/reckon/csv_parser.rb', line 6 def end |
Class Method Details
.settings ⇒ Object
250 251 252 |
# File 'lib/reckon/csv_parser.rb', line 250 def self.settings @settings end |
Instance Method Details
#columns ⇒ Object
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/reckon/csv_parser.rb', line 214 def columns @columns ||= begin last_row_length = nil csv_data.inject([]) do |memo, row| # fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length unless row.all? { |i| i.nil? || i.length == 0 } row.each_with_index do |entry, index| memo[index] ||= [] memo[index] << (entry || '').strip end last_row_length = row.length end memo end end end |
#date_for(index) ⇒ Object
38 39 40 |
# File 'lib/reckon/csv_parser.rb', line 38 def date_for(index) @date_column.for( index ) end |
#description_for(index) ⇒ Object
46 47 48 |
# File 'lib/reckon/csv_parser.rb', line 46 def description_for(index) description_column_indices.map { |i| columns[i][index] }.reject { |a| a.empty? }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip end |
#detect_columns ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/reckon/csv_parser.rb', line 161 def detect_columns results, found_likely_money_column = evaluate_columns(columns) self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ] if !found_likely_money_column found_likely_double_money_columns = false 0.upto(columns.length - 2) do |i| if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) ) _, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1)) if found_likely_double_money_columns found_double_money_column( i, i + 1 ) break end end end if !found_likely_double_money_columns 0.upto(columns.length - 2) do |i| if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) ) # Try a more specific test _, found_likely_double_money_columns = evaluate_two_money_columns( columns, i, i+1, results ) if found_likely_double_money_columns found_double_money_column( i, i + 1 ) break end end end end if !found_likely_double_money_columns && !settings[:testing] puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}." end end results.reject! {|i| money_column_indices.include?(i[:index]) } self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index] results.reject! {|i| i[:index] == date_column_index } @date_column = DateColumn.new( columns[ self.date_column_index ], ) if ( money_column_indices.length == 1 ) @money_column = MoneyColumn.new( columns[money_column_indices[0]], ) detect_sign_column if @money_column.positive? else @money_column = MoneyColumn.new( columns[money_column_indices[0]], ) @money_column.merge!( MoneyColumn.new( columns[money_column_indices[1]], ) ) end self.description_column_indices = results.map { |i| i[:index] } end |
#detect_sign_column ⇒ Object
Some csv files negative/positive amounts are indicated in separate account
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/reckon/csv_parser.rb', line 136 def detect_sign_column return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives signs = [] if @money_column_indices[0] > 0 column = columns[ @money_column_indices[0] - 1 ] signs = column.uniq end if (signs.length != 2 && (@money_column_indices[0] + 1 < columns.length)) column = columns[ @money_column_indices[0] + 1 ] signs = column.uniq end if signs.length == 2 negative_first = true negative_first = false if signs[0] == "Bij" || signs[0].downcase =~ /^cr/ # look for known debit indicators @money_column.each_with_index do |money, i| if negative_first && column[i] == signs[0] @money_column[i] = -money elsif !negative_first && column[i] == signs[1] @money_column[i] = -money end end end end |
#evaluate_columns(cols) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/reckon/csv_parser.rb', line 50 def evaluate_columns(cols) results = [] found_likely_money_column = false cols.each_with_index do |column, index| money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0 last = nil column.reverse.each_with_index do |entry, row_from_bottom| row = csv_data[csv_data.length - 1 - row_from_bottom] entry = entry.strip money_score += Money::likelihood( entry ) possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/ possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/ date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/ date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3 date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length date_score += 30 if entry =~ /^\d+[:\/\.-]\d+[:\/\.-]\d+([ :]\d+[:\/\.]\d+)?$/ date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i # Try to determine if this is a balance column entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f if last && entry_as_num != 0 && last != 0 row.each do |row_entry| row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f if row_entry != 0 && last + row_entry == entry_as_num money_score -= 10 break end end end last = entry_as_num end if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0) money_score += 10 * column.length found_likely_money_column = true end results << { :index => index, :money_score => money_score, :date_score => date_score } end return [results, found_likely_money_column] end |
#evaluate_two_money_columns(columns, id1, id2, unmerged_results) ⇒ Object
111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/reckon/csv_parser.rb', line 111 def evaluate_two_money_columns( columns, id1, id2, unmerged_results ) merged_columns = merge_columns( id1, id2 ) results, found_likely_money_column = evaluate_columns( merged_columns ) if !found_likely_money_column new_res = results.find { |el| el[:index] == id1 } old_res1 = unmerged_results.find { |el| el[:index] == id1 } old_res2 = unmerged_results.find { |el| el[:index] == id2 } if new_res[:money_score] > old_res1[:money_score] && new_res[:money_score] > old_res2[:money_score] found_likely_money_column = true end end [results, found_likely_money_column] end |
#filter_csv ⇒ Object
16 17 18 19 20 21 22 23 24 |
# File 'lib/reckon/csv_parser.rb', line 16 def filter_csv if [:ignore_columns] new_columns = [] columns.each_with_index do |column, index| new_columns << column unless [:ignore_columns].include?(index + 1) end @columns = new_columns end end |
#found_double_money_column(id1, id2) ⇒ Object
126 127 128 129 130 131 132 133 |
# File 'lib/reckon/csv_parser.rb', line 126 def found_double_money_column( id1, id2 ) self.money_column_indices = [ id1, id2 ] unless settings[:testing] puts "It looks like this CSV has two seperate columns for money, one of which shows positive" puts "changes and one of which shows negative changes. If this is true, great. Otherwise," puts "please report this issue to us so we can take a look!\n" end end |
#merge_columns(a, b) ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/reckon/csv_parser.rb', line 94 def merge_columns(a, b) output_columns = [] columns.each_with_index do |column, index| if index == a new_column = MoneyColumn.new( column ) .merge!( MoneyColumn.new( columns[b] ) ) .map { |m| m.amount.to_s } output_columns << new_column elsif index == b # skip else output_columns << column end end output_columns end |
#money_for(index) ⇒ Object
26 27 28 |
# File 'lib/reckon/csv_parser.rb', line 26 def money_for(index) @money_column[index] end |
#parse ⇒ Object
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
# File 'lib/reckon/csv_parser.rb', line 231 def parse data = [:string] || File.read([:file]) if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/ data = data.force_encoding([:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?') csv_engine = CSV else csv_engine = FasterCSV end @csv_data = csv_engine.parse data.strip, :col_sep => [:csv_separator] || ',' if [:contains_header] [:contains_header].times { csv_data.shift } end csv_data end |
#pretty_date_for(index) ⇒ Object
42 43 44 |
# File 'lib/reckon/csv_parser.rb', line 42 def pretty_date_for(index) @date_column.pretty_for( index ) end |
#pretty_money(amount, negate = false) ⇒ Object
34 35 36 |
# File 'lib/reckon/csv_parser.rb', line 34 def pretty_money(amount, negate = false) Money.new( amount, ).pretty( negate ) end |
#pretty_money_for(index, negate = false) ⇒ Object
30 31 32 |
# File 'lib/reckon/csv_parser.rb', line 30 def pretty_money_for(index, negate = false) money_for( index ).pretty( negate ) end |
#settings ⇒ Object
254 255 256 |
# File 'lib/reckon/csv_parser.rb', line 254 def settings self.class.settings end |