Class: FnbPdfToCsv
- Inherits:
-
Object
- Object
- FnbPdfToCsv
- Defined in:
- lib/fnb_pdf_to_csv.rb,
lib/fnb_pdf_to_csv/version.rb
Constant Summary collapse
- AMOUNT =
'\(?[0-9][0-9,]*\.[0-9]{2}\)?\s?(Cr)?'- DATE =
'\d{2} \w{3}'- VERSION =
"0.0.3"
Instance Attribute Summary collapse
-
#lines ⇒ Object
readonly
Returns the value of attribute lines.
Class Method Summary collapse
Instance Method Summary collapse
- #clean_amount(amount) ⇒ Object
- #clean_date(date) ⇒ Object
- #clean_line(line) ⇒ Object
-
#initialize(file) ⇒ FnbPdfToCsv
constructor
A new instance of FnbPdfToCsv.
- #mangle_line!(arr) ⇒ Object
- #output(file, separator = ',') ⇒ Object
- #parse ⇒ Object
- #parse_line(line) ⇒ Object
- #parse_page(page) ⇒ Object
- #statement(file) ⇒ Object
- #statement_line(line, count) ⇒ Object
Constructor Details
#initialize(file) ⇒ FnbPdfToCsv
Returns a new instance of FnbPdfToCsv.
11 12 13 14 |
# File 'lib/fnb_pdf_to_csv.rb', line 11 def initialize file @reader = ::PDF::Reader.new file @lines = [] end |
Instance Attribute Details
#lines ⇒ Object (readonly)
Returns the value of attribute lines.
6 7 8 |
# File 'lib/fnb_pdf_to_csv.rb', line 6 def lines @lines end |
Class Method Details
.parse(file) ⇒ Object
16 17 18 19 20 |
# File 'lib/fnb_pdf_to_csv.rb', line 16 def self.parse file parser = self.new(file) parser.parse parser end |
Instance Method Details
#clean_amount(amount) ⇒ Object
75 76 77 78 79 80 81 82 83 |
# File 'lib/fnb_pdf_to_csv.rb', line 75 def clean_amount(amount) return amount if amount.nil? return 0 - amount[1..-2].to_f if amount[0] == '(' and amount[-1] == ')' if amount[-2..-1] == 'Cr' return amount[0..-3].tr(',', '').to_f else return 0 - amount.tr(',', '').to_f end end |
#clean_date(date) ⇒ Object
70 71 72 73 |
# File 'lib/fnb_pdf_to_csv.rb', line 70 def clean_date(date) day, month = date.split(/\s/) Time.new(Time.new.year, month, day.to_i).strftime("%Y-%m-%d") end |
#clean_line(line) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/fnb_pdf_to_csv.rb', line 85 def clean_line(line) sline = line.dup sline[0] = clean_date sline[0] sline[4] = clean_amount sline[4] sline[5] = clean_amount sline[5] sline[6] = clean_amount sline[6] sline end |
#mangle_line!(arr) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/fnb_pdf_to_csv.rb', line 95 def mangle_line! arr arr.delete_at 0 arr.map! { |elm| elm.strip unless elm.nil? } # Cleanup arr.delete_at 3 arr.delete_at 4 arr.delete_at 5 arr[1] = arr[1].split(/\s{2,}/) # We get the three descriptions as one string arr.insert(2, arr[1][1]) # So split them up and add them back arr.insert(3, arr[1][2]) arr[1] = arr[1][0] arr end |
#output(file, separator = ',') ⇒ Object
26 27 28 29 30 31 32 33 |
# File 'lib/fnb_pdf_to_csv.rb', line 26 def output file, separator = ',' f = File.new file, 'w' f.write [ 'Date','Description1','Description2','Description3','Amount','Balance','Accrued Charges' ].to_csv(col_sep: separator) lines.each { |line| f.write clean_line(line).to_csv(col_sep: separator) } end |
#parse ⇒ Object
22 23 24 |
# File 'lib/fnb_pdf_to_csv.rb', line 22 def parse @reader.pages.each { |page| parse_page page } end |
#parse_line(line) ⇒ Object
64 65 66 67 68 |
# File 'lib/fnb_pdf_to_csv.rb', line 64 def parse_line line line.match(/^\s*(#{DATE})(.*?)(#{AMOUNT})\s+(#{AMOUNT})(\s+#{AMOUNT})?$/) do |m| @lines.push mangle_line!(m.to_a) end end |
#parse_page(page) ⇒ Object
60 61 62 |
# File 'lib/fnb_pdf_to_csv.rb', line 60 def parse_page page page.text.each_line { |line| parse_line line } end |
#statement(file) ⇒ Object
35 36 37 38 39 40 41 42 43 44 |
# File 'lib/fnb_pdf_to_csv.rb', line 35 def statement file f = File.new file, 'w' f.write "5,'Number','Date','Description1','Description2','Description3','Amount','Balance','Accrued Charges'\n" count = 1 lines.each do |line| f.write statement_line(line, count).join(',') + "\n" count = count + 1 end end |
#statement_line(line, count) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/fnb_pdf_to_csv.rb', line 46 def statement_line line, count sline = line.dup sline.insert(0, 5) sline.insert(1, count) sline[2] = "'#{sline[2]}'" sline[3] = '"' + sline[3] + '"' unless (sline[3].nil? or sline[3] == '') sline[4] = '"' + sline[4] + '"' unless (sline[4].nil? or sline[4] == '') sline[5] = '"' + sline[5] + '"' unless (sline[5].nil? or sline[5] == '') sline[6] = clean_amount(sline[6]) sline[7] = clean_amount(sline[7]) sline end |