Module: DateExtractor

Defined in:
lib/date_extractor.rb,
lib/date_extractor/version.rb

Constant Summary collapse

RANGE_RE =
/
  [-~〜~ー]
/x
NUMBER_RE =
/
  (?:\d+)|(?:[0-9]+)
/x
START_CHINESE_CHARACTER_TIME_RE =

NOTE: Use ‘(?!間)` to reject `“〜時間”`

/
  (?<start_hour>#{NUMBER_RE})時(?!間)
  (?:
    (?<start_min>#{NUMBER_RE}分)
    |
    (?<start_half_hour_unit>半)
  )?
/x
END_CHINESE_CHARACTER_TIME_RE =
/
  (?<end_hour>#{NUMBER_RE})時(?!間)
  (?:
    (?<end_min>#{NUMBER_RE}分)
    |
    (?<end_half_hour_unit>半)
  )?
/x
TIMESLOT_RE1 =
/
  (?<start_hour>#{NUMBER_RE}+)[:;](?<start_min>#{NUMBER_RE})
  \s*
  #{RANGE_RE}?
  \s*
  (?:
    (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE})
  )?
/x
TIMESLOT_RE2 =
/
  #{START_CHINESE_CHARACTER_TIME_RE}以降
/x
TIMESLOT_RE3 =
/
  #{START_CHINESE_CHARACTER_TIME_RE}
  \s*
  #{RANGE_RE}?
  \s*
  (?:#{END_CHINESE_CHARACTER_TIME_RE})?
/x
TIMESLOT_RE4 =
/
  (?:朝)?
    #{RANGE_RE}
    \s*
    (?:
      (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE})
    )
/x
TIMESLOT_RE =
/
  (?:#{TIMESLOT_RE1})|(?:#{TIMESLOT_RE2})|(?:#{TIMESLOT_RE3}|(?:#{TIMESLOT_RE4}))
/x
WDAY_RE =
/
  (?:
    \([^()]+\)
  )
  |
  (?:
   ([^()]+)
  )
/x
DAY_RE1 =
/
  (?<year>#{NUMBER_RE})\/(?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE})
    \s*
    (?:#{WDAY_RE})?
    \s*
    (?:#{TIMESLOT_RE})?
/x
DAY_RE2 =
/
  (?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE})
    \s*
    (?:#{WDAY_RE})?
    \s*
    (?:#{TIMESLOT_RE})?
/x
DAY_RE3 =
/
  (?<month>#{NUMBER_RE})月(?<day>#{NUMBER_RE})日
  \s*
  (?:#{WDAY_RE})?
  \s*
  (?:#{TIMESLOT_RE})?
/x
DAY_RE =
/(?:#{DAY_RE1})|(?:#{DAY_RE2})|(?:#{DAY_RE3})/x
ONLY_DAY_RE =

NOTE: Use ‘(?!(?:間)|(?:ほど))` to reject `~日間` and `~日ほど`

/
  (?<day>#{NUMBER_RE})日
    (?!(?:間)|(?:ほど))
  \s*
  (?:#{WDAY_RE})?
  \s*
  (?:#{TIMESLOT_RE})?
/x
RE =
/(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
VERSION =
"0.1.1"

Class Method Summary collapse

Class Method Details

.extract(body, fallback_month: nil, fallback_year: nil, debug: false) ⇒ [String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates

Returns [String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates.

Parameters:

  • body (String)
  • fallback_month (Integer | NilClass) (defaults to: nil)
  • fallback_year (Integer | NilClass) (defaults to: nil)
  • debug (Boolean) (defaults to: false)

Returns:

  • ([String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates)
    String], [[Date, DateTime | NilClass, DateTime | NilClass]

    matched strings and dates



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/date_extractor.rb', line 120

def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
  today = Date.today
  fallback_month ||= Date.today.month
  fallback_year  ||= Date.today.year

  day_matches = get_match_and_positions(body, RE)  # [[MatchData, start, end], [...], ...]

  day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug)  # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
  day_with_hours_size = day_matches.size

  timeslots_container = Array.new(day_with_hours_size) { Array.new }  # contains timeslots in each day

  timeslot_matches = get_match_and_positions(body, TIMESLOT_RE)  # [[MatchData, start, end], [...], ...]
  timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
    i = 0  # index of left_day

    while i < day_with_hours_size
      left_day = day_with_hours[i]
      if left_day[1].nil?  # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
        i += 1
        next end

      right_day = day_with_hours[i+1]
      if !right_day.nil? && right_day[1].nil?  # When failed to `Date.new(~)`
        right_day = day_with_hours[i+2]
      end

      if right_day.nil?  # left_day is on the last
        # Check if timeslot is on the right of left_day
        if left_day[0].end(0) <= start_pos
          timeslots_container[i].push timeslot_match
        end
      else
        # Check if timeslot is between left_day and right_day
        if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
          timeslots_container[i].push timeslot_match
        end
      end

      i += 1
    end
  end

  days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours)  # days contains day whidh has same index with timeslots_container

  result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
  result_strs      = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }

  if !debug  # Reject nil dates
    exists           = result_datetimes.map { |arr| !arr[0].nil? }
    result_strs      = result_strs.select.with_index { |str, i| exists[i] }
    result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
    [result_strs, result_datetimes]
  else
    [result_strs, result_datetimes]
  end
end