Class: ContentPreview::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/content-preview/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(images = []) ⇒ Parser

Returns a new instance of Parser.



8
9
10
# File 'lib/content-preview/parser.rb', line 8

def initialize(images = [])
  self.images = images
end

Instance Attribute Details

#descriptionObject

Returns the value of attribute description.



6
7
8
# File 'lib/content-preview/parser.rb', line 6

def description
  @description
end

#imagesObject

Returns the value of attribute images.



6
7
8
# File 'lib/content-preview/parser.rb', line 6

def images
  @images
end

#titleObject

Returns the value of attribute title.



6
7
8
# File 'lib/content-preview/parser.rb', line 6

def title
  @title
end

#videoObject

Returns the value of attribute video.



6
7
8
# File 'lib/content-preview/parser.rb', line 6

def video
  @video
end

Instance Method Details

#process(url) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/content-preview/parser.rb', line 12

def process(url)
  return unless url =~ /^http\:\/\//

  begin
    document = Nokogiri::HTML(open(url))
    process_open_graph(document)
    (document, url)

    # Return computed data
    {
      'title' => self.title,
      'description' => self.description,
      'images' => self.images,
      'video' => self.video
    }
  rescue Exception => e
    nil
  end
end

#process_meta_data(document, url) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/content-preview/parser.rb', line 53

def (document, url)
  unless self.title
    unless document.css('title').empty?
      self.title = document.css('title').text
    end
  end

  if self.images.empty?
    list = []
    document.traverse do |el|
      [el[:src], el[:href]].grep(/\.(jpg)$/i).map{|l| URI.join(url, l).to_s}.first(10).each do |image|
        list << image
      end
    end

    self.images = list
  end

  unless self.description
    unless document.xpath('//meta[starts-with(@name, "")]').empty?
      for tag in document.xpath('//meta[starts-with(@name, "")]') do
        if %w(description).include?(tag.first.last)
          self.description = tag['content']
        end
      end
    end
  end
end

#process_open_graph(document) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/content-preview/parser.rb', line 32

def process_open_graph(document)
  unless document.xpath('//meta[starts-with(@property, "og:")]').empty?
    for tag in document.xpath('//meta[starts-with(@property, "og:")]') do
      case tag.first.last
      when 'og:title'
        self.title = tag['content']

      when 'og:description'
        self.description = tag['content']

      when 'og:image'
        self.images << tag['content']

      when 'og:video'
        self.video = tag['content']

      end
    end
  end
end