Class: Trawler::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/trawler/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ Parser

Returns a new instance of Parser.



9
10
11
12
13
14
# File 'lib/trawler/parser.rb', line 9

def initialize(options)
  @page           = options[:page]
  @url            = options[:url]
  @min_image_size = options[:image_size] 
  @meta_data      = Hashr.new
end

Instance Attribute Details

#meta_dataObject (readonly)

Returns the value of attribute meta_data.



7
8
9
# File 'lib/trawler/parser.rb', line 7

def 
  @meta_data
end

#urlObject (readonly)

Returns the value of attribute url.



6
7
8
# File 'lib/trawler/parser.rb', line 6

def url
  @url
end

Instance Method Details

#descriptionObject



20
21
22
# File 'lib/trawler/parser.rb', line 20

def description
  meta_description.nil? ? html_description : meta_description
end

#documentObject



48
49
50
51
52
# File 'lib/trawler/parser.rb', line 48

def document 
  @document ||= Nokogiri::HTML(@page) 
  @document.encoding = "UTF-8"
  @document
end

#imagesObject



24
25
26
27
28
29
30
31
32
# File 'lib/trawler/parser.rb', line 24

def images
  images = [meta_image]
  images << find_images
  images.flatten!
  images = images.select { |img| !img.nil? }.select { |s| !s.empty? }
  images.flatten.map! { |img| img.strip }
  images.map! { |img| normalize_url(img) }
  images.uniq
end

#normalize_url(uri) ⇒ Object



34
35
36
37
38
39
40
41
42
# File 'lib/trawler/parser.rb', line 34

def normalize_url(uri)
  if uri =~ /^\w*\:/i
    return uri
  else
    Addressable::URI.join(url, uri).normalize.to_s
  end
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
  add_fatal_error "Link parsing exception: #{e.message}" and nil
end

#titleObject



16
17
18
# File 'lib/trawler/parser.rb', line 16

def title
  meta_title.nil? ? html_title : meta_title
end

#videoObject



44
45
46
# File 'lib/trawler/parser.rb', line 44

def video
  meta_video
end

#video_stillObject



54
55
56
# File 'lib/trawler/parser.rb', line 54

def video_still
  meta_image
end