Module: DateExtractor
- Defined in:
- lib/date_extractor.rb,
lib/date_extractor/version.rb
Constant Summary collapse
- RANGE_RE =
/ [-~〜~ー] /x
- NUMBER_RE =
/ (?:\d+)|(?:[0-9]+) /x
- START_CHINESE_CHARACTER_TIME_RE =
NOTE: Use ‘(?!間)` to reject `“〜時間”`
/ (?<start_hour>#{NUMBER_RE})時(?!間) (?: (?<start_min>#{NUMBER_RE}分) | (?<start_half_hour_unit>半) )? /x
- END_CHINESE_CHARACTER_TIME_RE =
/ (?<end_hour>#{NUMBER_RE})時(?!間) (?: (?<end_min>#{NUMBER_RE}分) | (?<end_half_hour_unit>半) )? /x
- TIMESLOT_RE1 =
/ (?<start_hour>#{NUMBER_RE}+)[:;](?<start_min>#{NUMBER_RE}) \s* #{RANGE_RE}? \s* (?: (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE}) )? /x
- TIMESLOT_RE2 =
/ #{START_CHINESE_CHARACTER_TIME_RE}以降 /x
- TIMESLOT_RE3 =
/ #{START_CHINESE_CHARACTER_TIME_RE} \s* #{RANGE_RE}? \s* (?:#{END_CHINESE_CHARACTER_TIME_RE})? /x
- TIMESLOT_RE4 =
/ (?:朝)? #{RANGE_RE} \s* (?: (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE}) ) /x
- TIMESLOT_RE =
/ (?:#{TIMESLOT_RE1})|(?:#{TIMESLOT_RE2})|(?:#{TIMESLOT_RE3}|(?:#{TIMESLOT_RE4})) /x
- WDAY_RE =
/ (?: \([^()]+\) ) | (?: ([^()]+) ) /x
- DAY_RE1 =
/ (?<year>#{NUMBER_RE})\/(?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE}) \s* (?:#{WDAY_RE})? \s* (?:#{TIMESLOT_RE})? /x
- DAY_RE2 =
/ (?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE}) \s* (?:#{WDAY_RE})? \s* (?:#{TIMESLOT_RE})? /x
- DAY_RE3 =
/ (?<month>#{NUMBER_RE})月(?<day>#{NUMBER_RE})日 \s* (?:#{WDAY_RE})? \s* (?:#{TIMESLOT_RE})? /x
- DAY_RE =
/(?:#{DAY_RE1})|(?:#{DAY_RE2})|(?:#{DAY_RE3})/x
- ONLY_DAY_RE =
NOTE: Use ‘(?!(?:間)|(?:ほど))` to reject `~日間` and `~日ほど`
/ (?<day>#{NUMBER_RE})日 (?!(?:間)|(?:ほど)) \s* (?:#{WDAY_RE})? \s* (?:#{TIMESLOT_RE})? /x
- RE =
/(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
- VERSION =
"0.1.1"
Class Method Summary collapse
-
.extract(body, fallback_month: nil, fallback_year: nil, debug: false) ⇒ [String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
- String], [[Date, DateTime | NilClass, DateTime | NilClass]
-
matched strings and dates.
Class Method Details
.extract(body, fallback_month: nil, fallback_year: nil, debug: false) ⇒ [String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
Returns [String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates.
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# File 'lib/date_extractor.rb', line 120 def extract(body, fallback_month: nil, fallback_year: nil, debug: false) today = Date.today fallback_month ||= Date.today.month fallback_year ||= Date.today.year day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...] day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...] day_with_hours_size = day_matches.size timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...] timeslot_matches.each do |(timeslot_match, start_pos, end_pos)| i = 0 # index of left_day while i < day_with_hours_size left_day = day_with_hours[i] if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date` i += 1 next end right_day = day_with_hours[i+1] if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)` right_day = day_with_hours[i+2] end if right_day.nil? # left_day is on the last # Check if timeslot is on the right of left_day if left_day[0].end(0) <= start_pos timeslots_container[i].push timeslot_match end else # Check if timeslot is between left_day and right_day if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0) timeslots_container[i].push timeslot_match end end i += 1 end end days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] } result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) } if !debug # Reject nil dates exists = result_datetimes.map { |arr| !arr[0].nil? } result_strs = result_strs.select.with_index { |str, i| exists[i] } result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] } [result_strs, result_datetimes] else [result_strs, result_datetimes] end end |