Class: LinkOracle::Extractor::Body

Inherits:
Base
  • Object
show all
Defined in:
lib/link_oracle/extractor/body.rb

Instance Attribute Summary

Attributes inherited from Base

#link_data, #parsed_body, #url

Instance Method Summary collapse

Methods inherited from Base

#get_content, #initialize

Constructor Details

This class inherits a constructor from LinkOracle::Extractor::Base

Instance Method Details

#descriptionsObject



49
50
51
# File 'lib/link_oracle/extractor/body.rb', line 49

def descriptions
  @description ||= parsed_body.xpath("//p/text()").first(3).compact.map{ |text| text.content }
end

#first_valid_size_imageObject



36
37
38
39
40
41
# File 'lib/link_oracle/extractor/body.rb', line 36

def first_valid_size_image
  @first_valid_size_image ||= formatted_images.find do |image|
    size = image_size(image)
    size[0] >= 100 && size[1] >= 100 if size
  end
end

#formatted_imagesObject



32
33
34
# File 'lib/link_oracle/extractor/body.rb', line 32

def formatted_images
  @formatted_images ||= parsed_images.map { |image_url| ::Utils::ImageUrlFormatter.new(url, image_url).perform }
end

#image_size(image) ⇒ Object



43
44
45
46
47
# File 'lib/link_oracle/extractor/body.rb', line 43

def image_size(image)
  ::FastImage.size(image)
rescue ::URI::InvalidURIError
  [0, 0]
end

#imagesObject



22
23
24
# File 'lib/link_oracle/extractor/body.rb', line 22

def images
  @images ||= first_valid_size_image ? [first_valid_size_image] : []
end

#parsed_imagesObject



26
27
28
29
30
# File 'lib/link_oracle/extractor/body.rb', line 26

def parsed_images
  @parsed_images ||= parsed_body.xpath(
    "//img[@src[(contains(.,'://') or contains(., '/')) and not(contains(.,'ads.') or contains(.,'ad.') or contains(.,'?') or contains(.,'.gif'))]]"
  ).map{ |node| node['src'] }
end

#performObject



8
9
10
11
12
13
14
# File 'lib/link_oracle/extractor/body.rb', line 8

def perform
  link_data.assign({
    titles: titles,
    image_urls: images,
    descriptions: descriptions
  })
end

#titlesObject



16
17
18
19
20
# File 'lib/link_oracle/extractor/body.rb', line 16

def titles
  @titles ||= parsed_body.xpath(
    "//h1/text() | //h2/text() | //h3/text()"
  ).first(3).compact.map{ |text| text.content }
end

#typeObject



4
5
6
# File 'lib/link_oracle/extractor/body.rb', line 4

def type
  :body
end