Class: LinkOracle::Extractor::Body
- Inherits:
-
Base
- Object
- Base
- LinkOracle::Extractor::Body
show all
- Defined in:
- lib/link_oracle/extractor/body.rb
Instance Attribute Summary
Attributes inherited from Base
#link_data, #parsed_body, #url
Instance Method Summary
collapse
Methods inherited from Base
#get_content, #initialize
Instance Method Details
#descriptions ⇒ Object
49
50
51
|
# File 'lib/link_oracle/extractor/body.rb', line 49
def descriptions
@description ||= parsed_body.xpath("//p/text()").first(3).compact.map{ |text| text.content }
end
|
#first_valid_size_image ⇒ Object
36
37
38
39
40
41
|
# File 'lib/link_oracle/extractor/body.rb', line 36
def first_valid_size_image
@first_valid_size_image ||= formatted_images.find do |image|
size = image_size(image)
size[0] >= 100 && size[1] >= 100 if size
end
end
|
32
33
34
|
# File 'lib/link_oracle/extractor/body.rb', line 32
def formatted_images
@formatted_images ||= parsed_images.map { |image_url| ::Utils::ImageUrlFormatter.new(url, image_url).perform }
end
|
#image_size(image) ⇒ Object
43
44
45
46
47
|
# File 'lib/link_oracle/extractor/body.rb', line 43
def image_size(image)
::FastImage.size(image)
rescue ::URI::InvalidURIError
[0, 0]
end
|
#images ⇒ Object
22
23
24
|
# File 'lib/link_oracle/extractor/body.rb', line 22
def images
@images ||= first_valid_size_image ? [first_valid_size_image] : []
end
|
#parsed_images ⇒ Object
26
27
28
29
30
|
# File 'lib/link_oracle/extractor/body.rb', line 26
def parsed_images
@parsed_images ||= parsed_body.xpath(
"//img[@src[(contains(.,'://') or contains(., '/')) and not(contains(.,'ads.') or contains(.,'ad.') or contains(.,'?') or contains(.,'.gif'))]]"
).map{ |node| node['src'] }
end
|
8
9
10
11
12
13
14
|
# File 'lib/link_oracle/extractor/body.rb', line 8
def perform
link_data.assign({
titles: titles,
image_urls: images,
descriptions: descriptions
})
end
|
#titles ⇒ Object
16
17
18
19
20
|
# File 'lib/link_oracle/extractor/body.rb', line 16
def titles
@titles ||= parsed_body.xpath(
"//h1/text() | //h2/text() | //h3/text()"
).first(3).compact.map{ |text| text.content }
end
|
#type ⇒ Object
4
5
6
|
# File 'lib/link_oracle/extractor/body.rb', line 4
def type
:body
end
|