Class: Ficon
- Inherits:
-
Object
- Object
- Ficon
- Defined in:
- lib/ficon.rb,
lib/ficon/cache.rb,
lib/ficon/image.rb,
lib/ficon/version.rb
Defined Under Namespace
Constant Summary collapse
- ALIVE =
URL health status constants
'alive'
- DEAD =
'dead'
- SICK =
'sick'
- BLOCKED =
'blocked'
- VERSION =
"0.6"
Instance Attribute Summary collapse
-
#final_uri ⇒ Object
readonly
Returns the value of attribute final_uri.
-
#site ⇒ Object
readonly
Returns the value of attribute site.
-
#url_status ⇒ Object
readonly
Returns the value of attribute url_status.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Class Method Summary collapse
- .clear_cache ⇒ Object
- .normalise(base, candidate) ⇒ Object
- .page_images(uri, doc) ⇒ Object
- .site_images(uri, doc) ⇒ Object
Instance Method Summary collapse
- #classify_exception_status(exception) ⇒ Object
- #classify_response_status(response) ⇒ Object
- #description ⇒ Object
- #doc ⇒ Object
-
#initialize(uri, user_agent: nil) ⇒ Ficon
constructor
A new instance of Ficon.
- #other_page_data(document) ⇒ Object
- #page_images ⇒ Object
- #process ⇒ Object
- #report ⇒ Object
- #site_icons ⇒ Object
- #title ⇒ Object
Constructor Details
#initialize(uri, user_agent: nil) ⇒ Ficon
Returns a new instance of Ficon.
22 23 24 25 26 27 28 29 |
# File 'lib/ficon.rb', line 22 def initialize(uri, user_agent: nil) @uri = Addressable::URI.heuristic_parse(uri) @final_uri = @uri @site = {} @url_status = nil @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)" process end |
Instance Attribute Details
#final_uri ⇒ Object (readonly)
Returns the value of attribute final_uri.
13 14 15 |
# File 'lib/ficon.rb', line 13 def final_uri @final_uri end |
#site ⇒ Object (readonly)
Returns the value of attribute site.
13 14 15 |
# File 'lib/ficon.rb', line 13 def site @site end |
#url_status ⇒ Object (readonly)
Returns the value of attribute url_status.
13 14 15 |
# File 'lib/ficon.rb', line 13 def url_status @url_status end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
14 15 16 |
# File 'lib/ficon.rb', line 14 def user_agent @user_agent end |
Class Method Details
.clear_cache ⇒ Object
99 100 101 |
# File 'lib/ficon.rb', line 99 def self.clear_cache Cache.clear_cache end |
.normalise(base, candidate) ⇒ Object
132 133 134 135 136 137 138 139 140 |
# File 'lib/ficon.rb', line 132 def self.normalise(base, candidate) parsed_candidate = URI(candidate) base = URI(base) unless base.is_a? URI parsed_candidate.host = base.host if parsed_candidate.host.nil? # Set relative URLs to absolute parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil? # Set the schema if missing parsed_candidate.to_s end |
.page_images(uri, doc) ⇒ Object
126 127 128 129 130 |
# File 'lib/ficon.rb', line 126 def self.page_images(uri, doc) doc.xpath("//meta[@property='og:image']") .collect { |e| e.values.reject(&:empty?) }.flatten .collect { |v| (v[/^http/] || v[/^\//]) ? v : "/" + v }.collect { |result| normalise(uri, result) }.uniq.collect { |i| Image.new(i) }.sort_by(&:area).reverse end |
.site_images(uri, doc) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/ficon.rb', line 110 def self.site_images(uri, doc) results = [] # Get tile color for Windows tiles tile_color = doc.at_xpath("//meta[@name='msapplication-TileColor']/@content")&.value paths = "//meta[@name='msapplication-TileImage']|//link[@type='image/ico' or @type='image/vnd.microsoft.icon']|//link[@rel='icon' or @rel='shortcut icon' or @rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon']" results += doc.xpath(paths).collect { |e| e.values.select { |v| v =~ /\.png$|\.jpg$|\.gif$|\.ico$|\.svg$|\.ico\?\d*$/ } }.flatten.collect { |v| (v[/^http/] || v[/^\//]) ? v : "/" + v } results.collect { |result| normalise(uri, result) }.uniq.collect do |url| # Check if this is a tile image to pass the color is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub(uri.to_s, "")}']") Image.new(url, is_tile ? tile_color : nil) end.sort_by(&:area).reverse end |
Instance Method Details
#classify_exception_status(exception) ⇒ Object
157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/ficon.rb', line 157 def classify_exception_status(exception) case exception when SocketError, Resolv::ResolvError DEAD # DNS resolution failures when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED SICK # Network issues worth retrying when OpenSSL::SSL::SSLError SICK # SSL certificate errors else SICK # Default to retryable for unknown errors end end |
#classify_response_status(response) ⇒ Object
142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/ficon.rb', line 142 def classify_response_status(response) case response.code.to_i when 200..299 ALIVE when 404, 410 DEAD when 401, 403, 429 BLOCKED when 500..599 SICK else SICK end end |
#description ⇒ Object
97 |
# File 'lib/ficon.rb', line 97 def description = @site[:description] |
#doc ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/ficon.rb', line 31 def doc # First try to fetch to determine final URL response = fetch_url(@uri) unless @data return nil if response.nil? && @data.nil? # Use final URL for caching cache = Cache.new(@final_uri) @data ||= cache.data if @data.nil? && response @data = response.body.force_encoding("UTF-8") cache.data = @data cache.etag = response["etag"] if response["etag"] cache.not_before = response["last-modified"] if response["last-modified"] end @doc ||= Nokogiri::HTML(@data) @doc rescue Net::HTTPError, SocketError => e puts "HTTP Error: #{e.inspect}" nil rescue TypeError => e if /^http/.match?(@uri.to_s) puts "#{e.inspect}" puts "#{e.backtrace.join('\n')}" else puts "Please prepend http:// or https:// to the URL" end nil rescue RuntimeError => e puts "#{e.}" nil end |
#other_page_data(document) ⇒ Object
103 104 105 106 107 108 |
# File 'lib/ficon.rb', line 103 def other_page_data(document) @site[:title] = document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath("//title")&.text&.strip @site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value @site[:canonical] = canonical unless canonical == @uri.to_s end |
#page_images ⇒ Object
93 |
# File 'lib/ficon.rb', line 93 def page_images = @site[:page_images] || [] |
#process ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/ficon.rb', line 66 def process document = doc if document @site[:images] = self.class.site_images(@uri, document) || [] @site[:page_images] = self.class.page_images(@uri, document) || [] other_page_data(document) else @site[:images] = [] @site[:page_images] = [] end nil end |
#report ⇒ Object
79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/ficon.rb', line 79 def report report_lines = [] report_lines << "Site icon: #{@site[:images].first}" report_lines << "Page icon: #{@site[:page_images].first}" report_lines << "Page title: #{@site[:title]}" report_lines << "Page description: #{@site[:description]}" report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical] report_lines << "URL Status: #{@url_status}" if @url_status report_lines.join("\n") + "\n" end |
#site_icons ⇒ Object
91 |
# File 'lib/ficon.rb', line 91 def site_icons = @site[:images] || [] |
#title ⇒ Object
95 |
# File 'lib/ficon.rb', line 95 def title = @site[:title] |