Module: Oneboxer

Defined in:
lib/oneboxer.rb

Defined Under Namespace

Classes: Result

Constant Summary collapse

ONEBOX_CSS_CLASS =
"onebox"
AUDIO_REGEX =
/\A\.(mp3|og[ga]|opus|wav|m4[abpr]|aac|flac)\z/i
VIDEO_REGEX =
/\A\.(mov|mp4|webm|m4v|3gp|ogv|avi|mpeg|ogv)\z/i

Class Method Summary collapse

Class Method Details

.allowed_post_typesObject



79
80
81
# File 'lib/oneboxer.rb', line 79

def self.allowed_post_types
  @allowed_post_types ||= [Post.types[:regular], Post.types[:moderator_action]]
end

.amazon_domainsObject



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/oneboxer.rb', line 37

def self.amazon_domains
  amazon_suffixes = %w[
    com
    com.br
    ca
    cn
    fr
    de
    in
    it
    co.jp
    com.mx
    nl
    pl
    sa
    sg
    es
    se
    com.tr
    ae
    co.uk
  ]
  amazon_suffixes.collect { |suffix| "https://www.amazon.#{suffix}" }
end

.apply(string_or_doc, extra_paths: nil) ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/oneboxer.rb', line 208

def self.apply(string_or_doc, extra_paths: nil)
  doc = string_or_doc
  doc = Loofah.html5_fragment(doc) if doc.is_a?(String)
  changed = false

  each_onebox_link(doc, extra_paths: extra_paths) do |url, element|
    onebox, _ = yield(url, element)
    next if onebox.blank?

    parsed_onebox = Loofah.html5_fragment(onebox)
    next if parsed_onebox.children.blank?

    changed = true

    parent = element.parent
    if parent&.node_name&.downcase == "p" &&
         parsed_onebox.children.any? { |child|
           HTML5_BLOCK_ELEMENTS.include?(child.node_name.downcase)
         }
      siblings = parent.children
      element_idx = siblings.find_index(element)
      before_idx = first_significant_element_index(siblings, element_idx - 1, -1)
      after_idx = first_significant_element_index(siblings, element_idx + 1, +1)

      if before_idx < 0 && after_idx >= siblings.size
        parent.replace parsed_onebox
      elsif before_idx < 0
        parent.children = siblings[after_idx..siblings.size]
        parent.add_previous_sibling(parsed_onebox)
      elsif after_idx >= siblings.size
        parent.children = siblings[0..before_idx]
        parent.add_next_sibling(parsed_onebox)
      else
        parent_rest = parent.dup

        parent.children = siblings[0..before_idx]
        parent_rest.children = siblings[after_idx..siblings.size]

        parent.add_next_sibling(parent_rest)
        parent.add_next_sibling(parsed_onebox)
      end
    else
      element.replace parsed_onebox
    end
  end

  Result.new(doc, changed)
end

.cache_failed!(url) ⇒ Object



295
296
297
# File 'lib/oneboxer.rb', line 295

def self.cache_failed!(url)
  Discourse.cache.write(onebox_failed_cache_key(url), true, expires_in: 1.hour)
end

.cache_response_body(uri, response) ⇒ Object



139
140
141
142
# File 'lib/oneboxer.rb', line 139

def self.cache_response_body(uri, response)
  key = redis_cached_response_body_key(uri)
  Discourse.redis.without_namespace.setex(key, 1.minutes.to_i, response)
end

.cache_response_body?(uri) ⇒ Boolean

Returns:

  • (Boolean)


128
129
130
131
132
133
134
135
136
137
# File 'lib/oneboxer.rb', line 128

def self.cache_response_body?(uri)
  uri = URI.parse(uri) if uri.is_a?(String)

  if SiteSetting.cache_onebox_response_body?
    SiteSetting
      .cache_onebox_response_body_domains
      .split("|")
      .any? { |domain| uri.hostname.ends_with?(domain) }
  end
end

.cached_onebox(url) ⇒ Object



103
104
105
106
107
108
109
110
111
# File 'lib/oneboxer.rb', line 103

def self.cached_onebox(url)
  if c = Discourse.cache.read(onebox_cache_key(url))
    c[:onebox]
  end
rescue => e
  invalidate(url)
  Rails.logger.warn("invalid cached onebox for #{url} #{e}")
  ""
end

.cached_preview(url) ⇒ Object



113
114
115
116
117
118
119
120
121
# File 'lib/oneboxer.rb', line 113

def self.cached_preview(url)
  if c = Discourse.cache.read(onebox_cache_key(url))
    c[:preview]
  end
rescue => e
  invalidate(url)
  Rails.logger.warn("invalid cached preview for #{url} #{e}")
  ""
end

.cached_response_body_exists?(uri) ⇒ Boolean

Returns:

  • (Boolean)


144
145
146
147
# File 'lib/oneboxer.rb', line 144

def self.cached_response_body_exists?(uri)
  key = redis_cached_response_body_key(uri)
  Discourse.redis.without_namespace.exists(key).to_i > 0
end

Parse URLs out of HTML, returning the document when finished.



159
160
161
162
163
164
165
166
# File 'lib/oneboxer.rb', line 159

def self.each_onebox_link(doc, extra_paths: [])
  onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths)
  if onebox_links.present?
    onebox_links.each { |link| yield(link["href"], link) if link["href"].present? }
  end

  doc
end

.engine(url) ⇒ Object



284
285
286
287
288
289
# File 'lib/oneboxer.rb', line 284

def self.engine(url)
  Onebox::Matcher.new(
    url,
    { allowed_iframe_regexes: Onebox::Engine.origins_to_regexes(allowed_iframe_origins) },
  ).oneboxed
end

.fetch_cached_response_body(uri) ⇒ Object



149
150
151
152
# File 'lib/oneboxer.rb', line 149

def self.fetch_cached_response_body(uri)
  key = redis_cached_response_body_key(uri)
  Discourse.redis.without_namespace.get(key)
end

.first_significant_element_index(elements, index, step) ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/oneboxer.rb', line 257

def self.first_significant_element_index(elements, index, step)
  while index >= 0 && index < elements.size &&
          (
            elements[index].node_name.downcase == "br" ||
              (
                elements[index].node_name.downcase == "text" &&
                  elements[index].to_html.strip.blank?
              )
          )
    index = index + step
  end

  index
end

.force_custom_user_agent_hostsObject



75
76
77
# File 'lib/oneboxer.rb', line 75

def self.force_custom_user_agent_hosts
  SiteSetting.force_custom_user_agent_hosts.split("|")
end

.force_get_hostsObject



62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/oneboxer.rb', line 62

def self.force_get_hosts
  hosts = []
  hosts += SiteSetting.force_get_hosts.split("|").collect { |domain| "https://#{domain}" }
  hosts +=
    SiteSetting
      .cache_onebox_response_body_domains
      .split("|")
      .collect { |domain| "https://www.#{domain}" }
  hosts += amazon_domains

  hosts.uniq
end

.ignore_redirectsObject



26
27
28
29
30
31
32
33
34
35
# File 'lib/oneboxer.rb', line 26

def self.ignore_redirects
  @ignore_redirects ||= [
    "http://www.dropbox.com",
    "http://store.steampowered.com",
    "http://vimeo.com",
    "https://www.youtube.com",
    "https://twitter.com",
    Discourse.base_url,
  ]
end

.invalidate(url) ⇒ Object



123
124
125
126
# File 'lib/oneboxer.rb', line 123

def self.invalidate(url)
  Discourse.cache.delete(onebox_cache_key(url))
  Discourse.cache.delete(onebox_failed_cache_key(url))
end

.is_previewing?(user_id) ⇒ Boolean

Returns:

  • (Boolean)


272
273
274
# File 'lib/oneboxer.rb', line 272

def self.is_previewing?(user_id)
  Discourse.redis.get(preview_key(user_id)) == "1"
end

.local_handlersObject



83
84
85
# File 'lib/oneboxer.rb', line 83

def self.local_handlers
  @local_handlers ||= {}
end

.onebox(url, options = nil) ⇒ Object



97
98
99
100
101
# File 'lib/oneboxer.rb', line 97

def self.onebox(url, options = nil)
  options ||= {}
  invalidate(url) if options[:invalidate_oneboxes]
  onebox_raw(url, options)[:onebox]
end

.onebox_previewed!(user_id) ⇒ Object



280
281
282
# File 'lib/oneboxer.rb', line 280

def self.onebox_previewed!(user_id)
  Discourse.redis.del(preview_key(user_id))
end

.preview(url, options = nil) ⇒ Object



91
92
93
94
95
# File 'lib/oneboxer.rb', line 91

def self.preview(url, options = nil)
  options ||= {}
  invalidate(url) if options[:invalidate_oneboxes]
  onebox_raw(url, options)[:preview]
end

.preview_onebox!(user_id) ⇒ Object



276
277
278
# File 'lib/oneboxer.rb', line 276

def self.preview_onebox!(user_id)
  Discourse.redis.setex(preview_key(user_id), 1.minute, "1")
end

.recently_failed?(url) ⇒ Boolean

Returns:

  • (Boolean)


291
292
293
# File 'lib/oneboxer.rb', line 291

def self.recently_failed?(url)
  Discourse.cache.read(onebox_failed_cache_key(url)).present?
end

.redis_cached_response_body_key(uri) ⇒ Object



154
155
156
# File 'lib/oneboxer.rb', line 154

def self.redis_cached_response_body_key(uri)
  "CACHED_RESPONSE_#{uri}"
end

.register_local_handler(controller, &handler) ⇒ Object



87
88
89
# File 'lib/oneboxer.rb', line 87

def self.register_local_handler(controller, &handler)
  local_handlers[controller] = handler
end