Class: Webhookdb::Replicator::IcalendarEventV1

Inherits:
Base
  • Object
show all
Includes:
Appydays::Loggable
Defined in:
lib/webhookdb/replicator/icalendar_event_v1.rb

Direct Known Subclasses

IcalendarEventV1Partitioned

Defined Under Namespace

Classes: StaleRowDeleter

Constant Summary collapse

CONV_REMOTE_KEY =
Webhookdb::Replicator::Column::IsomorphicProc.new(
  ruby: lambda do |_, resource:, **_|
    "#{resource.fetch('calendar_external_id')}-#{resource.fetch('UID').fetch('v')}"
  end,
  # Because this is a non-nullable key, we never need this in SQL
  sql: ->(_) { Sequel.lit("'do not use'") },
)
CONV_DATE =
Webhookdb::Replicator::Column::IsomorphicProc.new(
  ruby: lambda do |entry, **|
    self.entry_to_date(entry) if entry.is_a?(Hash) && self.entry_is_date_str?(entry)
  end,
  sql: Webhookdb::Replicator::Column::NOT_IMPLEMENTED,
)
CONV_DATETIME =
Webhookdb::Replicator::Column::IsomorphicProc.new(
  ruby: lambda do |entry, **|
    if entry.is_a?(Hash)
      if self.entry_is_date_str?(entry)
        nil
      else
        self.entry_to_datetime(entry).first
      end
    else
      # Entry may be a time if this was from the defaulter
      entry
    end
  end,
  sql: ->(_) { raise NotImplementedError },
)
CONV_MISSING_TZ =
Webhookdb::Replicator::Column::IsomorphicProc.new(
  ruby: lambda do |entry, **|
    may_have_missing_tz = entry.is_a?(Hash) && !self.entry_is_date_str?(entry)
    if may_have_missing_tz
      tzparsed = self.entry_to_datetime(entry)[1]
      !tzparsed
    else
      false
    end
  end,
  sql: ->(_) { Sequel[false] },
)
CONV_GEO_LAT =
Webhookdb::Replicator::Column.converter_array_element(index: 0, sep: ";", cls: DECIMAL)
CONV_GEO_LNG =
Webhookdb::Replicator::Column.converter_array_element(index: 1, sep: ";", cls: DECIMAL)
CONV_COMMA_SEP_ARRAY =
Webhookdb::Replicator::Column::IsomorphicProc.new(
  ruby: lambda do |entry, **|
    next [] if entry.nil?
    entries = []
    entry.each do |e|
      entries.concat(e.fetch("v").split(",").map(&:strip))
    end
    entries
  end,
  sql: ->(_) { raise NotImplementedError },
)
ARRAY_KEYS =

datatracker.ietf.org/doc/html/rfc5545#section-3.6.1 The following are OPTIONAL, and MAY occur more than once.

[
  "ATTACH",
  "ATTENDEE",
  "CATEGORIES",
  "COMMENT",
  "CONTACT",
  "EXDATE",
  "RSTATUS",
  "RELATED",
  "RESOURCES",
  "RDATE",
  "X-PROP",
  "IANA-PROP",
].freeze
NAME =
"[-a-zA-Z0-9]+"
QSTR =
'"[^"]*"'
PTEXT =
'[^";:,]*'
PVALUE =
"(?:#{QSTR}|#{PTEXT})".freeze
PARAM =
"(#{NAME})=(#{PVALUE}(?:,#{PVALUE})*)".freeze
VALUE =
".*"
LINE =
"(?<name>#{NAME})(?<params>(?:;#{PARAM})*):(?<value>#{VALUE})".freeze

Constants inherited from Base

Base::MAX_INDEX_NAME_LENGTH

Constants included from DBAdapter::ColumnTypes

DBAdapter::ColumnTypes::BIGINT, DBAdapter::ColumnTypes::BIGINT_ARRAY, DBAdapter::ColumnTypes::BOOLEAN, DBAdapter::ColumnTypes::COLUMN_TYPES, DBAdapter::ColumnTypes::DATE, DBAdapter::ColumnTypes::DECIMAL, DBAdapter::ColumnTypes::DOUBLE, DBAdapter::ColumnTypes::FLOAT, DBAdapter::ColumnTypes::INTEGER, DBAdapter::ColumnTypes::INTEGER_ARRAY, DBAdapter::ColumnTypes::OBJECT, DBAdapter::ColumnTypes::TEXT, DBAdapter::ColumnTypes::TEXT_ARRAY, DBAdapter::ColumnTypes::TIMESTAMP, DBAdapter::ColumnTypes::UUID

Instance Attribute Summary

Attributes inherited from Base

#service_integration

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#_any_subscriptions_to_notify?, #_backfill_state_change_fields, #_backfillers, #_clear_backfill_information, #_clear_webook_information, #_coalesce_excluded_on_update, #_enqueue_backfill_jobs, #_fetch_enrichment, #_find_dependency_candidate, #_notify_dependents, #_parallel_backfill, #_publish_rowupsert, #_store_enrichment_body?, #_to_json, #_upsert_conflict_target, #_upsert_update_expr, #_upsert_webhook, #_upsert_webhook_single_resource, #_verify_backfill_err_msg, #_webhook_state_change_fields, #admin_dataset, #avoid_writes?, #backfill, #calculate_and_backfill_state_machine, #calculate_backfill_state_machine, #calculate_dependency_state_machine_step, #calculate_preferred_create_state_machine, chunked_row_update_bounds, #clear_backfill_information, #clear_webhook_information, #create_table, #create_table_modification, #create_table_partitions, #data_column, #dbadapter_table, #denormalized_columns, #descriptor, #dispatch_request_to, #enqueue_sync_targets, #enrichment_column, #ensure_all_columns, #ensure_all_columns_modification, #existing_partitions, #find_dependent, #find_dependent!, #indices, #initialize, #on_backfill_error, #partition?, #partitioning, #preferred_create_state_machine_method, #preprocess_headers_for_logging, #primary_key_column, #process_state_change, #process_webhooks_synchronously?, #qualified_table_sequel_identifier, #readonly_dataset, #remote_key_column, #requires_sequence?, #resource_name_plural, #resource_name_singular, #schema_and_table_symbols, #storable_columns, #synchronous_processing_response_body, #timestamp_column, #upsert_has_deps?, #upsert_webhook, #upsert_webhook_body, #verify_backfill_credentials, #webhook_endpoint, #webhook_response, #with_advisory_lock

Constructor Details

This class inherits a constructor from Webhookdb::Replicator::Base

Class Method Details

._compact_vevent_lines(lines) ⇒ String

Parameters:

  • lines (Array<String>)

Returns:

  • (String)


238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 238

def self._compact_vevent_lines(lines)
  # Walk backwards from the end, joining continuation lines.
  # Very hard to reason about this if it's written using normal array stuff.
  (1..(lines.length - 1)).reverse_each do |idx|
    line = lines[idx]
    prevline = lines[idx - 1]
    next unless line.start_with?(/\s+/)
    line.lstrip!
    prevline << line
    lines.delete_at(idx)
  end
  s = lines.join("\n")
  return s
end

._parse_line(input) ⇒ Object

Parameters:

  • input (String)


327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 327

def self._parse_line(input)
  parts = /#{LINE}/o.match(input)
  return input, nil, {} if parts.nil?
  params = {}
  parts[:params].scan(/#{PARAM}/o) do |match|
    param_name = match[0]
    # params[param_name] ||= []
    match[1].scan(/#{PVALUE}/o) do |param_value|
      if param_value.size.positive?
        param_value = param_value.gsub(/\A"|"\z/, "")
        params[param_name] = param_value
        # params["x-tz-info"] = timezone_store.retrieve param_value if param_name == "tzid"
      end
    end
  end
  return parts[:name], parts[:value], params
end

._parse_time_with_tzid(value, tzid) ⇒ Object

Given a tzid and value for a timestamp, return a Time (with a timezone). While there’s no formal naming scheme, we see the following forms:

  • valid names like America/Los_Angeles, US/Eastern

  • dashes, like America-Los_Angeles, US-Eastern

  • Offsets, like GMT-0700’

In theory this can be any value, and must be given in the calendar feed (VTIMEZONE). However that is extremely difficult; even the icalendar gem doesn’t seem to do it 100% right. We can solve for this if needed; in the meantime, log it in Sentry and use UTC.

If the zone cannot be parsed, assume UTC.

Return a tuple of [Time, true if the zone could be parsed]. If the zone cannot be parsed, you usually want to log or store it.



359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 359

def self._parse_time_with_tzid(value, tzid)
  if (zone = Time.find_zone(tzid.tr("-", "/")))
    return [zone.parse(value), true]
  end
  if /^(GMT|UTC)[+-]\d\d\d\d$/.match?(tzid)
    offset = tzid[3..]
    return [Time.parse(value + offset), true]
  end
  if (zone = Webhookdb::WindowsTZ.windows_name_to_tz[tzid])
    return [zone.parse(value), true]
  end
  zone = Time.find_zone!("UTC")
  return [zone.parse(value), false]
end

.descriptorWebhookdb::Replicator::Descriptor



13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 13

def self.descriptor
  return Webhookdb::Replicator::Descriptor.new(
    name: "icalendar_event_v1",
    ctor: ->(sint) { Webhookdb::Replicator::IcalendarEventV1.new(sint) },
    dependency_descriptor: Webhookdb::Replicator::IcalendarCalendarV1.descriptor,
    feature_roles: [],
    resource_name_singular: "iCalendar Event",
    supports_webhooks: true,
    description: "Individual events in an icalendar. See icalendar_calendar_v1.",
    api_docs_url: "https://icalendar.org/",
  )
end

.entry_is_date_str?(e) ⇒ Boolean

Returns:

  • (Boolean)


42
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 42

def self.entry_is_date_str?(e) = self.value_is_date_str?(e.fetch("v"))

.entry_to_date(entry) ⇒ Date?

Returns:

  • (Date, nil)

Raises:

  • (ArgumentError)


59
60
61
62
63
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 59

def self.entry_to_date(entry)
  value = entry.fetch("v")
  raise ArgumentError, "must pass a date string" unless self.value_is_date_str?(value)
  return Date.strptime(value, "%Y%m%d")
end

.entry_to_date_or_datetime(entry) ⇒ Array<Time,Date,true,false,nil>

Return tuple of parsed date or datetime, and a boolean of whether the timezone could be parsed (true if date was parsed).

Returns:

  • (Array<Time,Date,true,false,nil>)


37
38
39
40
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 37

def self.entry_to_date_or_datetime(entry)
  return [self.entry_to_date(entry), true] if self.value_is_date_str?(entry.fetch("v"))
  return self.entry_to_datetime(entry)
end

.entry_to_datetime(entry) ⇒ Array<Time,true,false,nil>

Return tuple of parsed datetime, and a boolean of whether the timezone could be parsed.

Returns:

  • (Array<Time,true,false,nil>)

Raises:

  • (ArgumentError)


48
49
50
51
52
53
54
55
56
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 48

def self.entry_to_datetime(entry)
  value = entry.fetch("v")
  raise ArgumentError, "do not pass a date string" if self.value_is_date_str?(value)
  return [Time.strptime(value, "%Y%m%dT%H%M%S%Z"), true] if value.end_with?("Z")
  if (tzid = entry["TZID"])
    return self._parse_time_with_tzid(value, tzid)
  end
  return [Time.find_zone!("UTC").parse(value), false]
end

.value_is_date_str?(v) ⇒ Boolean

Returns:

  • (Boolean)


43
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 43

def self.value_is_date_str?(v) = v.length === 8

.vevent_to_hash(vevent_lines) ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 253

def self.vevent_to_hash(vevent_lines)
  result = {}
  vevent_str = self._compact_vevent_lines(vevent_lines)
  nest_depth = 0
  vevent_str.lines.each do |line|
    if line.start_with?("BEGIN")
      nest_depth += 1
      next
    elsif line.start_with?("END")
      nest_depth -= 1
      next
    end
    next if nest_depth > 1
    line.strip!
    next if line.empty?
    keyname, value, params = self._parse_line(line)
    unless value.nil?
      value.gsub!("\\r\\n", "\r\n")
      value.gsub!("\\n", "\n")
      value.gsub!("\\t", "\t")
      # This line is not tested, since replicating issues with HTTP body encoding
      # is really tricky (while I love Ruby's unicode handling, trying to replicate
      # invalid data from other sources is a pain).
      # However we do get invalid unicode sequences, like:
      #   DESCRIPTION:\r\nNFL regional registration opens 9/25 and ends 11/20\XAOwww.nflflag.com
      # which cannot be encoded in JSON:
      #   Invalid Unicode [a0 77 77 77 2e] at 52 (JSON::GeneratorError)
      # The only way I can think to handle this is with replacing invalid utf-8 chars
      # (with the unicode questionmark icon), so they can be represented as JSON.
      # This fix is here, and not in `Base#_to_json` (like the null char fixes),
      # since I think this is an issue with feeds like icalendar,
      # and not something to handle generally
      # (we may also see this in something like Atom, but perhaps because
      # atom is XML, not a 'plain' text format, it'll be more rare).
      value.encode!("UTF-8", invalid: :replace, undef: :replace)
    end
    entry = {"v" => value}
    entry.merge!(params)
    if ARRAY_KEYS.include?(keyname)
      result[keyname] ||= []
      result[keyname] << entry
    else
      result[keyname] = entry
    end
  end
  return result
end

Instance Method Details

#_denormalized_columnsObject



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 124

def _denormalized_columns
  col = Webhookdb::Replicator::Column
  tsconv = {converter: CONV_DATETIME}
  dateconv = {converter: CONV_DATE}
  return [
    col.new(:calendar_external_id, TEXT, index: true),
    col.new(:uid, TEXT, data_key: ["UID", "v"], index: true),
    col.new(:row_updated_at, TIMESTAMP, index: true),
    col.new(:last_modified_at,
            TIMESTAMP,
            index: true,
            data_key: "LAST-MODIFIED",
            defaulter: :now,
            optional: true,
            **tsconv,),
    col.new(:created_at, TIMESTAMP, optional: true, data_key: "CREATED", **tsconv),
    col.new(:start_at, TIMESTAMP, index: true, index_not_null: true, data_key: "DTSTART", **tsconv),
    # This is True when start/end at fields are missing timezones in the underlying feed.
    # Their timestamps are in UTC.
    col.new(:missing_timezone, BOOLEAN, data_key: "DTSTART", converter: CONV_MISSING_TZ),
    col.new(:end_at, TIMESTAMP, index: true, index_not_null: true, data_key: "DTEND", optional: true, **tsconv),
    col.new(:start_date, DATE, index: true, index_not_null: true, data_key: "DTSTART", **dateconv),
    col.new(:end_date, DATE, index: true, index_not_null: true, data_key: "DTEND", optional: true, **dateconv),
    col.new(:status, TEXT, data_key: ["STATUS", "v"], optional: true),
    col.new(:categories, TEXT_ARRAY, data_key: ["CATEGORIES"], optional: true, converter: CONV_COMMA_SEP_ARRAY),
    col.new(:priority, INTEGER, data_key: ["PRIORITY", "v"], optional: true, converter: col::CONV_TO_I),
    col.new(:geo_lat, DECIMAL, data_key: ["GEO", "v"], optional: true, converter: CONV_GEO_LAT),
    col.new(:geo_lng, DECIMAL, data_key: ["GEO", "v"], optional: true, converter: CONV_GEO_LNG),
    col.new(:classification, TEXT, data_key: ["CLASS", "v"], optional: true),
    col.new(:recurring_event_id, TEXT, optional: true, index: true, index_not_null: true),
    col.new(:recurring_event_sequence, INTEGER, optional: true),
  ]
end

#_extra_index_specsArray<Webhookdb::Replicator::IndexSpec>



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 209

def _extra_index_specs
  return [
    Webhookdb::Replicator::IndexSpec.new(
      columns: [:calendar_external_id, :start_at, :end_at],
      where: Sequel[:status].is_distinct_from("CANCELLED") & (Sequel[:start_at] !~ nil),
    ),
    Webhookdb::Replicator::IndexSpec.new(
      columns: [:calendar_external_id, :start_date, :end_date],
      where: Sequel[:status].is_distinct_from("CANCELLED") & (Sequel[:start_date] !~ nil),
    ),
    Webhookdb::Replicator::IndexSpec.new(
      columns: [:row_updated_at],
      where: Sequel[status: "CANCELLED"],
      identifier: "cancelled_row_updated_at",
    ),
  ]
end

#_prepare_for_insert(resource, event, request, enrichment) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 173

def _prepare_for_insert(resource, event, request, enrichment)
  h = super
  # Events can have a DTSTART, but no DTEND.
  # https://icalendar.org/iCalendar-RFC-5545/3-6-1-event-component.html
  # In these cases, we need to:
  # - Use the duration, given.
  # - Dates default to the next day.
  # - Times default to start time.
  if (_implicit_end_time = h[:start_at] && !h[:end_at])
    self._set_implicit_end_at(resource, h)
  elsif (_implicit_end_date = h[:start_date] && !h[:end_date])
    self._set_implicit_end_date(resource, h)
  end
  return h
end

#_remote_key_columnObject



114
115
116
117
118
119
120
121
122
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 114

def _remote_key_column
  return Webhookdb::Replicator::Column.new(
    :compound_identity,
    TEXT,
    data_key: "<compound key, see converter>",
    converter: CONV_REMOTE_KEY,
    optional: true, # This is done via the converter, data_key never exists
  )
end

#_resource_and_event(request) ⇒ Object



160
161
162
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 160

def _resource_and_event(request)
  return request.body, nil
end

#_resource_to_data(r) ⇒ Object



164
165
166
167
168
169
170
171
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 164

def _resource_to_data(r, *)
  data = r.dup
  data.delete("calendar_external_id")
  data.delete("recurring_event_id")
  data.delete("recurring_event_sequence")
  data.delete("row_updated_at")
  return data
end

#_set_implicit_end_at(resource, h) ⇒ Object



199
200
201
202
203
204
205
206
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 199

def _set_implicit_end_at(resource, h)
  if (d = resource["DURATION"])
    dur = ActiveSupport::Duration.parse(d.fetch("v"))
    h[:end_at] = h[:start_at] + dur
    return
  end
  h[:end_at] = h[:start_at]
end

#_set_implicit_end_date(resource, h) ⇒ Object



189
190
191
192
193
194
195
196
197
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 189

def _set_implicit_end_date(resource, h)
  if (d = resource["DURATION"])
    # See https://icalendar.org/iCalendar-RFC-5545/3-3-6-duration.html
    dur = ActiveSupport::Duration.parse(d.fetch("v"))
    h[:end_date] = h[:start_date] + dur
    return
  end
  h[:end_date] = h[:start_date] + 1.day
end

#_timestamp_column_nameObject



158
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 158

def _timestamp_column_name = :last_modified_at

#_update_where_exprObject



227
228
229
230
231
232
233
234
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 227

def _update_where_expr
  # Compare against data to avoid the constant writes. JSONB != operations are very fast,
  # so this should not be any real performance issue.
  # last_modified_at is unreliable because LAST-MODIFIED is unreliable,
  # even in feeds it is set. There are cases, such as adding an EXDATE to an RRULE,
  # that do not trigger LAST-MODIFIED changes.
  return self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
end

#_webhook_response(_request) ⇒ Object



112
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 112

def _webhook_response(_request) = Webhookdb::WebhookResponse.ok

#backfill_not_supported_messageObject



405
406
407
408
409
410
411
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 405

def backfill_not_supported_message
  return %(#{self.resource_name_singular} does not support backfilling.
See https://docs.webhookdb.com/guides/icalendar/ for instructions on setting up your integration.

You can POST 'SYNC' messages to WebhookDB to force-sync a user's feed,
though keep in mind calendar providers only refresh feeds periodically.)
end

#calculate_webhook_state_machineObject



392
393
394
395
396
397
398
399
400
401
402
403
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 392

def calculate_webhook_state_machine
  if (step = self.calculate_dependency_state_machine_step(dependency_help: ""))
    return step
  end
  step = Webhookdb::Replicator::StateMachineStep.new
  step.output = %(Great! You are all set.
Refer to https://docs.webhookdb.com/guides/icalendar/ for detailed instructions
on syncing data from iCalendar/ics feeds.

#{self._query_help_output(prefix: "Once data is available, you can query #{self.resource_name_plural}")})
  return step.completed
end

#documentation_urlObject



10
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 10

def documentation_url = Webhookdb::Icalendar::DOCUMENTATION_URL

#on_dependency_webhook_upsert(_ical_svc, _ical_row) ⇒ Object



374
375
376
377
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 374

def on_dependency_webhook_upsert(_ical_svc, _ical_row, **)
  # We use an async job to sync when the dependency syncs
  return
end

#stale_row_deleterObject



390
# File 'lib/webhookdb/replicator/icalendar_event_v1.rb', line 390

def stale_row_deleter = StaleRowDeleter.new(self)