Class: ExtractDates
- Inherits:
-
Object
- Object
- ExtractDates
- Defined in:
- lib/extractdates.rb
Instance Method Summary collapse
-
#addItem(date, file, title, description, blob, regex) ⇒ Object
Adds and item to the hash.
- #chunk(file) ⇒ Object
- #dateExtract(blob, file, title, description) ⇒ Object
-
#initialize(text) ⇒ ExtractDates
constructor
A new instance of ExtractDates.
Constructor Details
#initialize(text) ⇒ ExtractDates
Returns a new instance of ExtractDates.
8 9 10 11 |
# File 'lib/extractdates.rb', line 8 def initialize(text) @text = text @output = Array.new end |
Instance Method Details
#addItem(date, file, title, description, blob, regex) ⇒ Object
Adds and item to the hash
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/extractdates.rb', line 60 def addItem(date, file, title, description, blob, regex) shash = Hash.new shash[:date] = date shash[:file] = file shash[:title] = title shash[:description] = description flag = 0 @output.each do |o| if (o[:date] == shash[:date]) && (o[:file] == shash[:file]) && (o[:title].to_s == shash[:title].to_s) flag = 1 break end end if flag == 0 @output.push(shash) end blob.slice! regex dateExtract(blob, file, title, description) end |
#chunk(file) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/extractdates.rb', line 13 def chunk(file) if @text begin c = @text.chunk c.each do |i| s = paragraph(i).segment s.each do |j| dateExtract(j, file, j, i) end end rescue end end return @output end |
#dateExtract(blob, file, title, description) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/extractdates.rb', line 29 def dateExtract(blob, file, title, description) blobstring = blob.to_s begin if blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/) save = blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/) save = blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/) save = blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/) save = blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/) save = blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/(\d{4})-(\d{2})-(\d{2})/) save = blobstring.match(/(\d{4})-(\d{2})-(\d{2})/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) elsif blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/) save = blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/) addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s) end rescue end end |