Class: Ficon

Inherits:
Object
  • Object
show all
Defined in:
lib/ficon.rb,
lib/ficon/cache.rb,
lib/ficon/image.rb,
lib/ficon/version.rb

Defined Under Namespace

Classes: Cache, Image

Constant Summary collapse

ALIVE =

URL health status constants

'alive'
DEAD =
'dead'
SICK =
'sick'
BLOCKED =
'blocked'
VERSION =
"0.6"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri, user_agent: nil) ⇒ Ficon

Returns a new instance of Ficon.



22
23
24
25
26
27
28
29
# File 'lib/ficon.rb', line 22

def initialize(uri, user_agent: nil)
  @uri = Addressable::URI.heuristic_parse(uri)
  @final_uri = @uri
  @site = {}
  @url_status = nil
  @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
  process
end

Instance Attribute Details

#final_uriObject (readonly)

Returns the value of attribute final_uri.



13
14
15
# File 'lib/ficon.rb', line 13

def final_uri
  @final_uri
end

#siteObject (readonly)

Returns the value of attribute site.



13
14
15
# File 'lib/ficon.rb', line 13

def site
  @site
end

#url_statusObject (readonly)

Returns the value of attribute url_status.



13
14
15
# File 'lib/ficon.rb', line 13

def url_status
  @url_status
end

#user_agentObject

Returns the value of attribute user_agent.



14
15
16
# File 'lib/ficon.rb', line 14

def user_agent
  @user_agent
end

Class Method Details

.clear_cacheObject



99
100
101
# File 'lib/ficon.rb', line 99

def self.clear_cache
  Cache.clear_cache
end

.normalise(base, candidate) ⇒ Object



132
133
134
135
136
137
138
139
140
# File 'lib/ficon.rb', line 132

def self.normalise(base, candidate)
  parsed_candidate = URI(candidate)
  base = URI(base) unless base.is_a? URI

  parsed_candidate.host = base.host if parsed_candidate.host.nil?      # Set relative URLs to absolute
  parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil?  # Set the schema if missing

  parsed_candidate.to_s
end

.page_images(uri, doc) ⇒ Object



126
127
128
129
130
# File 'lib/ficon.rb', line 126

def self.page_images(uri, doc)
  doc.xpath("//meta[@property='og:image']")
    .collect { |e| e.values.reject(&:empty?) }.flatten
    .collect { |v| (v[/^http/] || v[/^\//]) ? v : "/" + v }.collect { |result| normalise(uri, result) }.uniq.collect { |i| Image.new(i) }.sort_by(&:area).reverse
end

.site_images(uri, doc) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/ficon.rb', line 110

def self.site_images(uri, doc)
  results = []

  # Get tile color for Windows tiles
  tile_color = doc.at_xpath("//meta[@name='msapplication-TileColor']/@content")&.value

  paths = "//meta[@name='msapplication-TileImage']|//link[@type='image/ico' or @type='image/vnd.microsoft.icon']|//link[@rel='icon' or @rel='shortcut icon' or @rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon']"
  results += doc.xpath(paths).collect { |e| e.values.select { |v| v =~ /\.png$|\.jpg$|\.gif$|\.ico$|\.svg$|\.ico\?\d*$/ } }.flatten.collect { |v| (v[/^http/] || v[/^\//]) ? v : "/" + v }

  results.collect { |result| normalise(uri, result) }.uniq.collect do |url|
    # Check if this is a tile image to pass the color
    is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub(uri.to_s, "")}']")
    Image.new(url, is_tile ? tile_color : nil)
  end.sort_by(&:area).reverse
end

Instance Method Details

#classify_exception_status(exception) ⇒ Object



157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/ficon.rb', line 157

def classify_exception_status(exception)
  case exception
  when SocketError, Resolv::ResolvError
    DEAD  # DNS resolution failures
  when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
    SICK  # Network issues worth retrying
  when OpenSSL::SSL::SSLError
    SICK  # SSL certificate errors
  else
    SICK  # Default to retryable for unknown errors
  end
end

#classify_response_status(response) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/ficon.rb', line 142

def classify_response_status(response)
  case response.code.to_i
  when 200..299
    ALIVE
  when 404, 410
    DEAD
  when 401, 403, 429
    BLOCKED
  when 500..599
    SICK
  else
    SICK
  end
end

#descriptionObject



97
# File 'lib/ficon.rb', line 97

def description = @site[:description]

#docObject



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/ficon.rb', line 31

def doc
  # First try to fetch to determine final URL
  response = fetch_url(@uri) unless @data
  return nil if response.nil? && @data.nil?

  # Use final URL for caching
  cache = Cache.new(@final_uri)

  @data ||= cache.data

  if @data.nil? && response
    @data = response.body.force_encoding("UTF-8")
    cache.data = @data
    cache.etag = response["etag"] if response["etag"]
    cache.not_before = response["last-modified"] if response["last-modified"]
  end

  @doc ||= Nokogiri::HTML(@data)
  @doc
rescue Net::HTTPError, SocketError => e
  puts "HTTP Error: #{e.inspect}"
  nil
rescue TypeError => e
  if /^http/.match?(@uri.to_s)
    puts "#{e.inspect}"
    puts "#{e.backtrace.join('\n')}"
  else
    puts "Please prepend http:// or https:// to the URL"
  end
  nil
rescue RuntimeError => e
  puts "#{e.message}"
  nil
end

#other_page_data(document) ⇒ Object



103
104
105
106
107
108
# File 'lib/ficon.rb', line 103

def other_page_data(document)
  @site[:title] = document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath("//title")&.text&.strip
  @site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value
  canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value
  @site[:canonical] = canonical unless canonical == @uri.to_s
end

#page_imagesObject



93
# File 'lib/ficon.rb', line 93

def page_images = @site[:page_images] || []

#processObject



66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/ficon.rb', line 66

def process
  document = doc
  if document
    @site[:images] = self.class.site_images(@uri, document) || []
    @site[:page_images] = self.class.page_images(@uri, document) || []
    other_page_data(document)
  else
    @site[:images] = []
    @site[:page_images] = []
  end
  nil
end

#reportObject



79
80
81
82
83
84
85
86
87
88
89
# File 'lib/ficon.rb', line 79

def report
  report_lines = []
  report_lines << "Site icon: #{@site[:images].first}"
  report_lines << "Page icon: #{@site[:page_images].first}"
  report_lines << "Page title: #{@site[:title]}"
  report_lines << "Page description: #{@site[:description]}"
  report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s
  report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical]
  report_lines << "URL Status: #{@url_status}" if @url_status
  report_lines.join("\n") + "\n"
end

#site_iconsObject



91
# File 'lib/ficon.rb', line 91

def site_icons = @site[:images] || []

#titleObject



95
# File 'lib/ficon.rb', line 95

def title = @site[:title]