Module: Embulk::Guess::TimeFormatGuess

Defined in:
lib/embulk/guess/time_format_guess.rb

Defined Under Namespace

Modules: Parts, StandardPatterns Classes: GuessMatch, GuessPattern, RegexpPattern, Rfc2822Pattern, SimpleMatch

Constant Summary collapse

PATTERNS =
[
  GuessPattern.new,
  Rfc2822Pattern.new,
  RegexpPattern.new(StandardPatterns::APACHE_CLF, "%d/%b/%Y:%H:%M:%S %z"),
  RegexpPattern.new(StandardPatterns::ANSI_C_ASCTIME, "%a %b %e %H:%M:%S %Y"),
]

Class Method Summary collapse

Class Method Details

.guess(texts) ⇒ Object



375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# File 'lib/embulk/guess/time_format_guess.rb', line 375

def self.guess(texts)
  texts = Array(texts).map {|text| text.to_s }
  texts.reject! {|text| text == "" }
  matches = texts.map do |text|
    PATTERNS.map {|pattern| pattern.match(text) }.compact
  end.flatten
  if matches.empty?
    return nil
  elsif matches.size == 1
    return matches[0].format
  else
    match_groups = matches.group_by {|match| match.mergeable_group }.values
    best_match_group = match_groups.sort_by {|group| group.size }.last
    best_match = best_match_group.shift
    best_match_group.each {|m| best_match.merge!(m) }
    return best_match.format
  end
end