Class: Webhookdb::Replicator::IcalendarCalendarV1::EventProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/webhookdb/replicator/icalendar_calendar_v1.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io:, upserter:, headers:) ⇒ EventProcessor

Returns a new instance of EventProcessor.



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 399

def initialize(io:, upserter:, headers:)
  @io = io
  @upserter = upserter
  @headers = headers
  # Keep track of everything we upsert. For any rows we aren't upserting,
  # delete them if they're recurring, or cancel them if they're not recurring.
  # If doing it this way is slow, we could invert this (pull down all IDs and pop from the set).
  @upserted_identities = []
  # Keep track of all upserted recurring items.
  # If we find a RECURRENCE-ID on a later item,
  # we need to modify the item from the sequence by stealing its compound identity.
  @expanded_events_by_uid = {}
  # Delete 'extra' recurring event rows.
  # We need to keep track of how many events each UID spawns,
  # so we can delete any with a higher count.
  @max_sequence_num_by_uid = {}
  # Keep track of the bytes we've read from the file.
  # Never trust Content-Length headers for ical feeds.
  @read_bytes = 0
  @feed_md5 = Digest::MD5.new
end

Instance Attribute Details

#headersObject (readonly)

Returns the value of attribute headers.



397
398
399
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 397

def headers
  @headers
end

#read_bytesObject (readonly)

Returns the value of attribute read_bytes.



397
398
399
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 397

def read_bytes
  @read_bytes
end

#upserted_identitiesObject (readonly)

Returns the value of attribute upserted_identities.



397
398
399
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 397

def upserted_identities
  @upserted_identities
end

Instance Method Details

#_ical_entry_from_ruby(r, entry, is_date) ⇒ Object

We need is_date because the recurrence/IceCube schedule may be using times, not date.



583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 583

def _ical_entry_from_ruby(r, entry, is_date)
  return {"v" => r.strftime("%Y%m%d")} if is_date
  return {"v" => r.strftime("%Y%m%dT%H%M%SZ")} if r.zone == "UTC"
  tzid = entry["TZID"]
  return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => tzid} if tzid
  value = entry.fetch("v")
  return {"v" => value} if value.end_with?("Z")
  if /^\d{8}T\d{6}$/.match?(value)
    @upserter.upserting_replicator.logger.warn "ical_assuming_utc_time", ical_entry: entry, ruby_time: r
    return {"v" => "#{value}Z"}
  end
  raise "Cannot create ical entry from: '#{r}', #{entry}"
end

#_icecube_rule_from_ical(ical) ⇒ Object



597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 597

def _icecube_rule_from_ical(ical)
  # We have seen certain ambiguous rules, like FREQ=WEEKLY with BYMONTHDAY=4.
  # Apple interprets this as every 2 weeks; rrule.js interprets it as on the 4th of the month.
  # IceCube errors, because `day_of_month` isn't valid on a WeeklyRule.
  # In this case, we need to sanitize the string to remove the offending rule piece.
  # There are probably many other offending formats, but we'll add them here as needed.
  unambiguous_ical = nil
  if ical.include?("FREQ=WEEKLY") && ical.include?("BYMONTHDAY=")
    unambiguous_ical = ical.gsub(/BYMONTHDAY=[\d,]+/, "")
  elsif ical.include?("FREQ=MONTHLY") && ical.include?("BYYEARDAY=") && ical.include?("BYMONTHDAY=")
    # Another rule: FREQ=MONTHLY;INTERVAL=3;BYYEARDAY=14;BYMONTHDAY=14
    # Apple interprets this as monthly on the 14th; rrule.js interprets this as never happening.
    # 'day_of_year' isn't valid on a MonthlyRule, so delete the BYYEARDAY component.
    unambiguous_ical = ical.gsub(/BYYEARDAY=[\d,]+/, "")
  end
  if unambiguous_ical
    unambiguous_ical.delete_prefix! ";"
    unambiguous_ical.delete_suffix! ";"
    unambiguous_ical.squeeze!(";")
    ical = unambiguous_ical
  end
  return IceCube::IcalParser.rule_from_ical(ical)
end

#_time_array(h) ⇒ Object



621
622
623
624
625
626
627
628
629
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 621

def _time_array(h)
  expanded_entries = h["v"].split(",").map { |v| h.merge("v" => v) }
  return expanded_entries.map do |e|
    parsed_val, _got_tz = Webhookdb::Replicator::IcalendarEventV1.entry_to_date_or_datetime(e)
    next parsed_val if parsed_val.is_a?(Date)
    # Convert to UTC. We don't work with ActiveSupport timezones in the icalendar code for the most part.
    parsed_val.utc
  end
end

#delete_conditionObject



423
424
425
426
427
428
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 423

def delete_condition
  return nil if @max_sequence_num_by_uid.empty?
  return @max_sequence_num_by_uid.map do |uid, n|
    Sequel[recurring_event_id: uid] & (Sequel[:recurring_event_sequence] > n)
  end.inject(&:|)
end

#each_feed_eventObject



631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 631

def each_feed_event
  bad_event_uids = Set.new
  vevent_lines = []
  in_vevent = false
  while (line = @io.gets)
    @read_bytes += line.size
    @feed_md5.update(line)
    begin
      line.rstrip!
    rescue Encoding::CompatibilityError
      # We occassionally get incorrectly encoded files.
      # For example, the response may have a header:
      #   Content-Type: text/calendar; charset=UTF-8
      # but the actual encoding is not:
      #   file -I <filename>
      #   <filename>: text/calendar; charset=iso-8859-1
      # In these cases, there's not much we can do.
      # We can use chardet, but it's a big library and this issue
      # isn't common enough. Instead, try to force the encoding to utf-8,
      # which may break some things, but we'll see what happens.
      line = line.force_encoding("utf-8")
      line = line.scrub
      line = line.rstrip
    end
    if line == "BEGIN:VEVENT"
      in_vevent = true
      vevent_lines << line
    elsif line == "END:VEVENT"
      in_vevent = false
      vevent_lines << line
      h = Webhookdb::Replicator::IcalendarEventV1.vevent_to_hash(vevent_lines)
      vevent_lines.clear
      if h.key?("DTSTART") && h.key?("UID")
        yield h
      else
        bad_event_uids << h.fetch("UID", {}).fetch("v", "[missing]")
      end
    elsif in_vevent
      vevent_lines << line
    end
  end
  return if bad_event_uids.empty?
  @upserter.upserting_replicator.logger.warn("invalid_vevent_hash", vevent_uids: bad_event_uids.sort)
end

#each_projected_event(h) ⇒ Object

Raises:

  • (LocalJumpError)


444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 444

def each_projected_event(h)
  raise LocalJumpError unless block_given?

  uid = h.fetch("UID").fetch("v")

  if (recurrence_id = h["RECURRENCE-ID"])
    # Track down the original item in the projected sequence, so we can update it.
    if Webhookdb::Replicator::IcalendarEventV1.value_is_date_str?(recurrence_id.fetch("v"))
      start = Webhookdb::Replicator::IcalendarEventV1.entry_to_date(recurrence_id)
      startfield = :start_date
    else
      startfield = :start_at
      start = Webhookdb::Replicator::IcalendarEventV1.entry_to_datetime(recurrence_id).first
    end
    candidates = @expanded_events_by_uid[uid]
    if candidates.nil?
      # We can have no recurring events, even with the exclusion date.
      # Not much we can do here- just treat it as a standalone event.
      yield h
      return
    end
    unless (match = candidates.find { |c| c[startfield] == start })
      # There are some providers (like Apple) where an excluded event
      # will be outside the bounds of the RRULE of its owner.
      # Usually the RRULE has an UNTIL that is before the RECURRENCE-ID datetime.
      #
      # In these cases, we can use the event as-is, but we need to
      # make sure it is treated as part of the sequence.
      # So increment the last-seen sequence number for the UID and use that.
      max_seq_num = @max_sequence_num_by_uid[uid] += 1
      h["UID"] = {"v" => "#{uid}-#{max_seq_num}"}
      h["recurring_event_id"] = uid
      h["recurring_event_sequence"] = max_seq_num
      yield h
      return
    end

    # Steal the UID to overwrite the original, and record where it came from.
    # Note that all other fields, like categories, will be overwritten with the fields in this exclusion.
    # This seems to be correct, but we should keep an eye open in case we need to merge
    # these exclusion events into the originals.
    h["UID"] = {"v" => match[:uid]}
    h["recurring_event_sequence"] = match[:recurring_event_sequence]
    # Usually the recurrent event and exclusion have the same last-modified.
    # But we need to set the last-modified to AFTER the original,
    # to make sure it replaces what's in the database (the original un-excluded event
    # may already be present in the database).
    h["LAST-MODIFIED"] = match.fetch(:last_modified_at) + 1.second
    yield h
    return
  end

  unless h["RRULE"]
    yield h
    return
  end

  # We need to convert relevant parsed ical lines back to a string for use in ice_cube.
  # There are other ways to handle this, but this is fine for now.
  ical_params = {}
  if (exdates = h["RDATE"])
    ical_params[:rtimes] = exdates.map { |d| self._time_array(d) }.flatten
  end
  if (exdates = h["EXDATE"])
    ical_params[:extimes] = exdates.map { |d| self._time_array(d) }.flatten
  end
  ical_params[:rrules] = [self._icecube_rule_from_ical(h["RRULE"]["v"])] if h["RRULE"]
  # DURATION is not supported

  start_entry = h.fetch("DTSTART")
  ev_replicator = Webhookdb::Replicator::IcalendarEventV1
  is_date = ev_replicator.entry_is_date_str?(start_entry)
  # Use actual Times for start/end since ice_cube doesn't parse them well
  ical_params[:start_time] = ev_replicator.entry_to_date_or_datetime(start_entry).first
  if ical_params[:start_time].year < 1000
    # This is almost definitely a misconfiguration. Yield it as non-recurring and move on.
    yield h
    return
  end
  has_end_time = false
  if (end_entry = h["DTEND"])
    # the end date is optional. If we don't have one, we should never store one.
    has_end_time = true
    ical_params[:end_time] = ev_replicator.entry_to_date_or_datetime(end_entry).first
    if ical_params[:end_time] < ical_params[:start_time]
      # This is an invalid event. Not sure what it'll do to IceCube so don't send it there.
      # Yield it as a non-recurring event and move on.
      yield h
      return
    end
  end

  schedule = IceCube::Schedule.from_hash(ical_params)
  dont_project_before = Webhookdb::Icalendar.oldest_recurring_event
  dont_project_after = @upserter.now + RECURRENCE_PROJECTION

  # Just like google, track the original event id.
  h["recurring_event_id"] = uid
  final_sequence = -1
  begin
    # Pass in a 'closing time' to avoid a denial of service for an impossible rrule.
    # It is further into the future than the "don't project after"
    # since using something too short causes the calculation to be short-circuited before it should
    # (I'm unclear what the ideal value is, but tests will fail with much less than the number here).
    # This still results in a slow calculation, but there's not much we can do for now.
    # In the future perhaps we should try to pre-validate common problems.
    # See spec for examples.
    dos_cutoff = dont_project_after + 210.days
    schedule.send(:enumerate_occurrences, schedule.start_time, dos_cutoff).each_with_index do |occ, idx|
      next if occ.start_time < dont_project_before
      # Given the original hash, we will modify some fields.
      e = h.dup
      # Keep track of how many events we're managing.
      e["recurring_event_sequence"] = idx
      # The new UID has the sequence number.
      e["UID"] = {"v" => "#{uid}-#{idx}"}
      e["DTSTART"] = self._ical_entry_from_ruby(occ.start_time, start_entry, is_date)
      if has_end_time
        if !is_date && end_entry["VALUE"] == "DATE"
          # It's possible that DTSTART is a time, but DTEND is a date. This makes no sense,
          # so skip setting an end date. It will be in the :data column at least.
        else
          e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date)
        end
      end
      yield e
      final_sequence = idx
      break if occ.start_time > dont_project_after
    end
  rescue Date::Error
    # It's possible we yielded some recurring events too, in that case, treat them as normal,
    # in addition to yielding the event as non-recurring.
    yield h
  end
  @max_sequence_num_by_uid[uid] = final_sequence
  return
end

#feed_hashObject



421
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 421

def feed_hash = @feed_md5.hexdigest

#processObject



430
431
432
433
434
435
436
437
438
439
440
441
442
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 430

def process
  self.each_feed_event do |feed_event|
    self.each_projected_event(feed_event) do |ev|
      ident, upserted = @upserter.handle_item(ev)
      @upserted_identities << ident
      if (recurring_uid = upserted.fetch(:recurring_event_id))
        @expanded_events_by_uid[recurring_uid] ||= []
        @expanded_events_by_uid[recurring_uid] << upserted
      end
    end
  end
  @upserter.flush_pending_inserts
end