Class: StringTools::HTML::LinksRemoveScrubber

Inherits:
Object
  • Object
show all
Defined in:
lib/string_tools/html.rb

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ LinksRemoveScrubber

Returns a new instance of LinksRemoveScrubber.



58
59
60
61
62
# File 'lib/string_tools/html.rb', line 58

def initialize(options)
  @whitelist = options.fetch(:whitelist)
  @remove_without_host = options.fetch(:remove_without_host, true)
  @is_have_done_changes = false
end

Instance Method Details

#call(node) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/string_tools/html.rb', line 68

def call(node)
  href = node['href']
  return if href.blank?
  uri = Addressable::URI.parse(href).normalize
  if !uri.host
    replace_with_content node if @remove_without_host
  elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
    replace_with_content node
  end
rescue Addressable::URI::InvalidURIError
  replace_with_content node
end

#done_changes?Boolean

Returns:

  • (Boolean)


64
65
66
# File 'lib/string_tools/html.rb', line 64

def done_changes?
  @is_have_done_changes
end

#whitelisted?(domain) ⇒ Boolean

Returns:

  • (Boolean)


81
82
83
84
85
86
87
88
89
90
# File 'lib/string_tools/html.rb', line 81

def whitelisted?(domain)
  host_parts = domain.split('.')
  host = host_parts[-1] # com, ru ...
  (host_parts.length - 2).downto(0) do |i|
    subdomain = host_parts[i]
    host = "#{subdomain}.#{host}"
    return true if @whitelist.include? host
  end
  false
end