Class: Ogo::Parsers::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/ogo/parsers/base.rb

Direct Known Subclasses

Opengraph

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parseable) ⇒ Base

Returns a new instance of Base.



7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/ogo/parsers/base.rb', line 7

def initialize(parseable)
  @page = \
    if parseable.include?('</html>')
      @url = ''
      Ogo::PageSource.new(parseable).parse!
    else
      _rf = Ogo::Utils::RedirectFollower.new(parseable).resolve
      page = Ogo::PageSource.new(_rf.body, charset: _rf.charset, url: _rf.url)
      @url = _rf.url
      page.parse!
    end
  @type = 'website'
end

Instance Attribute Details

#pageObject (readonly)

Returns the value of attribute page.



5
6
7
# File 'lib/ogo/parsers/base.rb', line 5

def page
  @page
end

#urlObject (readonly)

Returns the value of attribute url.



5
6
7
# File 'lib/ogo/parsers/base.rb', line 5

def url
  @url
end

Instance Method Details

#all_imagesObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/ogo/parsers/base.rb', line 43

def all_images
  @all_images ||= \
    begin
      imgs = (
        fetch_images("//head//meta[@itemprop='image']", "content") +
        fetch_images("//head//meta[@itemprop='logo']", "content") +
        fetch_images("//head//meta[@property='og:image']", "content") +
        fetch_images("//head//meta[@property='twitter:image:src']", "content") +
        fetch_images("//head//link[@rel='image_src']", "href") +
        fetch_images("//img", "src")
      ).flatten.compact.uniq
      host_uri = Addressable::URI.parse(url)
      imgs.map { |img|
        Ogo::ImageInfo.new(url: fix_image_path(img, host_uri))
      }
    end
end

#description(fallback = false) ⇒ Object



26
27
28
29
30
31
32
33
# File 'lib/ogo/parsers/base.rb', line 26

def description(fallback=false)
  description_meta = page.doc.xpath("//head//meta[@name='description']").first
  _desc = description_meta && description_meta.attribute("content").to_s.strip
  if !_desc || _desc.empty?
    _desc = fetch_first_text
  end
  _desc
end

#image(fallback = false) ⇒ Object



35
36
37
# File 'lib/ogo/parsers/base.rb', line 35

def image(fallback=false)
  all_images.first
end

#metadata(fallback = false) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/ogo/parsers/base.rb', line 61

def (fallback=false)
  _meta = {
    title: title,
    description: description,
    type: type,
    image: nil
  }
  if image
    _meta[:image] = {
      url:    image.url,
      width:  image.width,
      height: image.height,
      type:   image.type
    }
  end
  _meta
end

#title(fallback = false) ⇒ Object



21
22
23
24
# File 'lib/ogo/parsers/base.rb', line 21

def title(fallback=false)
  title_tag = page.doc.xpath('//head//title').first
  title_tag && title_tag.text.to_s.strip
end

#type(fallback = false) ⇒ Object



39
40
41
# File 'lib/ogo/parsers/base.rb', line 39

def type(fallback=false)
  @type
end