Class: SportDb::DateFinder

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/sportdb/finders/date.rb

Constant Summary collapse

MONTH_EN =

todo: make more generic for reuse

'January|Jan|'+
'February|Feb|'+
'March|Mar|'+
'April|Apr|'+
'May|'+
'June|Jun|'+
'July|Jul|'+
'August|Aug|'+
'September|Sept|Sep|'+
'October|Oct|'+
'November|Nov|'+
'December|Dec'
MONTH_EN_TO_MM =
{
'Jan' => '1', 'January' => '1',
'Feb' => '2', 'February' => '2',
'Mar' => '3', 'March' => '3',
'Apr' => '4', 'April' => '4',
'May' => '5',
'Jun' => '6', 'June' => '6',
'Jul' => '7', 'July' => '7',
'Aug' => '8', 'August' => '8',
'Sep' => '9', 'Sept' => '9', 'September' => '9',
'Oct' => '10', 'October' => '10',
'Nov' => '11', 'November' => '11',
'Dec' => '12', 'December' =>'12' }
MONTH_ES =
'Enero|Ene|Feb|Marzo|Mar|Abril|Abr|Mayo|May|Junio|Jun|Julio|Jul|Agosto|Ago|Sept|Set|Sep|Oct|Nov|Dic'
MONTH_ES_TO_MM =
{
'Ene' => '1', 'Enero' => '1',
'Feb' => '2',
'Mar' => '3', 'Marzo' => '3',
'Abr' => '4', 'Abril' => '4',
'May' => '5', 'Mayo' => '5',
'Jun' => '6', 'Junio' => '6',
'Jul' => '7', 'Julio' => '7',
'Ago' => '8', 'Agosto' => '8',
'Sep' => '9', 'Set' => '9', 'Sept' => '9',
'Oct' => '10',
'Nov' => '11',
'Dic' => '12' }
DB__DATE_TIME_REGEX =

e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM

nb: allow 2012-9-3 7:30 e.g. no leading zero required

regex_db

/\b
                 (?<year>\d{4})
 -
                 (?<month>\d{1,2})
 -
                 (?<day>\d{1,2})
\s+
                 (?<hours>\d{1,2})
 :
                 (?<minutes>\d{2})
\b/x
DB__DATE_REGEX =

e.g. 2012-09-14 w/ implied hours (set to 12:00)

nb: allow 2012-9-3 e.g. no leading zero required

regex_db2

/\b
                    (?<year>\d{4})
  -
                    (?<month>\d{1,2})
  -
                    (?<day>\d{1,2})
\b/x
DD_MM_YYYY__DATE_TIME_REGEX =

e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM

nb: allow 2.3.2012 e.g. no leading zero required
nb: allow hour as 20.30

regex_de

/\b
                          (?<day>\d{1,2})
\.
                          (?<month>\d{1,2})
\.
                          (?<year>\d{4})
\s+
                          (?<hours>\d{1,2})
[:.]
                          (?<minutes>\d{2})
\b/x
DD_MM__DATE_TIME_REGEX =

e.g. 14.09. 20:30 => DD.MM. HH:MM

nb: allow 2.3.2012 e.g. no leading zero required
nb: allow hour as 20.30  or 3.30 instead of 03.30

regex_de2

/\b
                        (?<day>\d{1,2})
 \.
                        (?<month>\d{1,2})
 \.
 \s+
                        (?<hours>\d{1,2})
 [:.]
                        (?<minutes>\d{2})
\b/x
DD_MM_YYYY__DATE_REGEX =

e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00) regex_de3

/\b
                    (?<day>\d{1,2})
\.
                    (?<month>\d{1,2})
\.
                    (?<year>\d{4})
\b/x
DD_MM__DATE_REGEX =

e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)

note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume

regex_de4

/\b
(?<day>\d{1,2})
   \.
(?<month>\d{1,2})
   \.
(?=\s+|$|[\]])/x
EN__DD_MONTH_YYYY__DATE_TIME_REGEX =

e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM

/\b
                (?<day>\d{1,2})
 \s
                (?<month_en>#{MONTH_EN})
 \s
                (?<year>\d{4})
 \s+
                (?<hours>\d{1,2})
:
                (?<minutes>\d{2})
\b/x
EN__DD_MONTH__DATE_REGEX =

e.g. 12 May => D|DD.MMM w/ implied year and implied hours

/\b
                (?<day>\d{1,2})
\s
                (?<month_en>#{MONTH_EN})
\b/x
EN__MONTH_DD_YYYY__DATE_TIME_REGEX =

e.g. Jun/12 2011 14:00

/\b
                   (?<month_en>#{MONTH_EN})
\/
                   (?<day>\d{1,2})
\s
                   (?<year>\d{4})
\s+
                   (?<hours>\d{1,2})
:
                   (?<minutes>\d{2})
\b/x
EN__MONTH_DD__DATE_TIME_REGEX =

e.g. Jun/12 14:00 w/ implied year H|HH:MM

/\b
                   (?<month_en>#{MONTH_EN})
\/
                   (?<day>\d{1,2})
\s+
                   (?<hours>\d{1,2})
:
                   (?<minutes>\d{2})
\b/x
EN__MONTH_DD_YYYY__DATE_REGEX =

e.g. Jun/12 2013 w/ implied hours (set to 12:00)

/\b
                (?<month_en>#{MONTH_EN})
 \/
                (?<day>\d{1,2})
 \s
                (?<year>\d{4})
\b/x
EN__MONTH_DD__DATE_REGEX =

e.g. Jun/12 w/ implied year and implied hours (set to 12:00)

/\b
                   (?<month_en>#{MONTH_EN})
 \/
                   (?<day>\d{1,2})
\b/x
ES__DD_MONTH__DATE_REGEX =

e.g. 12 Ene w/ implied year and implied hours (set to 12:00)

/\b
                   (?<day>\d{1,2})
\s
                   (?<month_es>#{MONTH_ES})
\b/x
FORMATS =

map table - 1) tag, 2) regex - note: order matters; first come-first matched/served

[
  [ '[YYYY_MM_DD_hh_mm]',        DB__DATE_TIME_REGEX         ],
  [ '[YYYY_MM_DD]',              DB__DATE_REGEX              ],
  [ '[DD_MM_YYYY_hh_mm]',        DD_MM_YYYY__DATE_TIME_REGEX ],
  [ '[DD_MM_hh_mm]',             DD_MM__DATE_TIME_REGEX ],
  [ '[DD_MM_YYYY]',              DD_MM_YYYY__DATE_REGEX ],
  [ '[DD_MM]',                   DD_MM__DATE_REGEX ],
  [ '[EN_DD_MONTH_YYYY_hh_mm]',  EN__DD_MONTH_YYYY__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_YYYY_hh_mm]',  EN__MONTH_DD_YYYY__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_hh_mm]',       EN__MONTH_DD__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_YYYY]',        EN__MONTH_DD_YYYY__DATE_REGEX ],
  [ '[EN_MONTH_DD]',             EN__MONTH_DD__DATE_REGEX ],
  [ '[EN_DD_MONTH]',             EN__DD_MONTH__DATE_REGEX ],
  [ '[ES_DD_MONTH]',             ES__DD_MONTH__DATE_REGEX ]
]

Instance Method Summary collapse

Constructor Details

#initializeDateFinder

Returns a new instance of DateFinder.



230
231
232
# File 'lib/sportdb/finders/date.rb', line 230

def initialize
  # nothing here for now
end

Instance Method Details

#find!(line, opts = {}) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/sportdb/finders/date.rb', line 234

def find!( line, opts={} )
  # fix: use more lookahead for all required trailing spaces!!!!!
  # fix: use <name capturing group> for month,day,year etc.!!!

  #
  # fix: !!!!
  #   date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
  #   fix: change regex to \[[A-Z0-9.]\]  !!!!!!  plus add unit test too!!!
  #
 
  md = nil
  FORMATS.each do |format|
    tag     = format[0]
    pattern = format[1]
    md=pattern.match( line )
    if md
      date = parse_date_time( md, opts )
      ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
      ## fix: use md.begin(0), md.end(0)
      line.sub!( md[0], tag )
      ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
      return date
    end
    # no match; continue; try next pattern
  end

  return nil  # no match found
end