Class: ExtractDates

Inherits:
Object
  • Object
show all
Defined in:
lib/extractdates.rb

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ ExtractDates

Returns a new instance of ExtractDates.



8
9
10
11
# File 'lib/extractdates.rb', line 8

def initialize(text)
  @text = text
  @output = Array.new
end

Instance Method Details

#addItem(date, file, title, description, blob, regex) ⇒ Object

Adds and item to the hash



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/extractdates.rb', line 60

def addItem(date, file, title, description, blob, regex)
  shash = Hash.new
  shash[:date] = date
  shash[:file] = file
  shash[:title] = title
  shash[:description] = description

  flag = 0
  @output.each do |o|
    if (o[:date] == shash[:date]) && (o[:file] == shash[:file]) && (o[:title].to_s == shash[:title].to_s)
      flag = 1
      break
    end
  end

  if flag == 0
    @output.push(shash)
  end

  blob.slice! regex
  dateExtract(blob, file, title, description)
end

#chunk(file) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/extractdates.rb', line 13

def chunk(file)
  if @text
    begin
      c = @text.chunk
      c.each do |i|
        s = paragraph(i).segment
        s.each do |j|
          dateExtract(j, file, j, i)
        end
      end
    rescue
    end
  end
  return @output
end

#dateExtract(blob, file, title, description) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/extractdates.rb', line 29

def dateExtract(blob, file, title, description)
  blobstring = blob.to_s
  
  begin
    if blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/)
      save = blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/)
      save = blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/)
      save = blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/)
      save = blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/)
      save = blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/(\d{4})-(\d{2})-(\d{2})/)
      save = blobstring.match(/(\d{4})-(\d{2})-(\d{2})/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    elsif blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/)
      save = blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/)
      addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
    end
  rescue
  end
end