Class: Trawler::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/trawler/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ Parser

Returns a new instance of Parser.



9
10
11
12
13
14
# File 'lib/trawler/parser.rb', line 9

def initialize(options)
  @page           = options[:page]
  @url            = options[:url]
  @min_image_size = options[:image_size] 
  @meta_data      = Hashr.new
end

Instance Attribute Details

#meta_dataObject (readonly)

Returns the value of attribute meta_data.



7
8
9
# File 'lib/trawler/parser.rb', line 7

def 
  @meta_data
end

#urlObject (readonly)

Returns the value of attribute url.



6
7
8
# File 'lib/trawler/parser.rb', line 6

def url
  @url
end

Instance Method Details

#descriptionObject



20
21
22
# File 'lib/trawler/parser.rb', line 20

def description
  meta_description.nil? ? html_description : meta_description
end

#documentObject



47
48
49
# File 'lib/trawler/parser.rb', line 47

def document 
  @document ||= Nokogiri::HTML(@page) 
end

#imagesObject



24
25
26
27
28
29
30
31
# File 'lib/trawler/parser.rb', line 24

def images
  images = [meta_image]
  images << find_images
  images.flatten!
  images = images.select { |img| !img.nil? }.select { |s| !s.empty? }
  images.flatten.map! { |img| img.strip }.uniq
  images.map { |img| normalize_url(img) }
end

#normalize_url(uri) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/trawler/parser.rb', line 33

def normalize_url(uri)
  if uri =~ /^\w*\:/i
    return uri
  else
    Addressable::URI.join(url, uri).normalize.to_s
  end
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
  add_fatal_error "Link parsing exception: #{e.message}" and nil
end

#titleObject



16
17
18
# File 'lib/trawler/parser.rb', line 16

def title
  meta_title.nil? ? html_title : meta_title
end

#videoObject



43
44
45
# File 'lib/trawler/parser.rb', line 43

def video
  meta_video
end