Class: Trawler::Parser
- Inherits:
-
Object
- Object
- Trawler::Parser
- Defined in:
- lib/trawler/parser.rb
Instance Attribute Summary collapse
-
#meta_data ⇒ Object
readonly
Returns the value of attribute meta_data.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
- #description ⇒ Object
- #document ⇒ Object
- #images ⇒ Object
-
#initialize(options) ⇒ Parser
constructor
A new instance of Parser.
- #normalize_url(uri) ⇒ Object
- #title ⇒ Object
- #video ⇒ Object
- #video_still ⇒ Object
Constructor Details
#initialize(options) ⇒ Parser
Returns a new instance of Parser.
9 10 11 12 13 14 |
# File 'lib/trawler/parser.rb', line 9 def initialize() @page = [:page] @url = [:url] @min_image_size = [:image_size] @meta_data = Hashr.new end |
Instance Attribute Details
#meta_data ⇒ Object (readonly)
Returns the value of attribute meta_data.
7 8 9 |
# File 'lib/trawler/parser.rb', line 7 def @meta_data end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
6 7 8 |
# File 'lib/trawler/parser.rb', line 6 def url @url end |
Instance Method Details
#description ⇒ Object
20 21 22 |
# File 'lib/trawler/parser.rb', line 20 def description .nil? ? html_description : end |
#document ⇒ Object
48 49 50 51 52 |
# File 'lib/trawler/parser.rb', line 48 def document @document ||= Nokogiri::HTML(@page) @document.encoding = "UTF-8" @document end |
#images ⇒ Object
24 25 26 27 28 29 30 31 32 |
# File 'lib/trawler/parser.rb', line 24 def images images = [] images << find_images images.flatten! images = images.select { |img| !img.nil? }.select { |s| !s.empty? } images.flatten.map! { |img| img.strip } images.map! { |img| normalize_url(img) } images.uniq end |
#normalize_url(uri) ⇒ Object
34 35 36 37 38 39 40 41 42 |
# File 'lib/trawler/parser.rb', line 34 def normalize_url(uri) if uri =~ /^\w*\:/i return uri else Addressable::URI.join(url, uri).normalize.to_s end rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e add_fatal_error "Link parsing exception: #{e.}" and nil end |
#title ⇒ Object
16 17 18 |
# File 'lib/trawler/parser.rb', line 16 def title .nil? ? html_title : end |
#video ⇒ Object
44 45 46 |
# File 'lib/trawler/parser.rb', line 44 def video end |
#video_still ⇒ Object
54 55 56 |
# File 'lib/trawler/parser.rb', line 54 def video_still end |