Class: LinkOracle::Extractor::Body

Inherits:
Object
  • Object
show all
Defined in:
lib/link_oracle/extractor/body.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parsed_body) ⇒ Body

Returns a new instance of Body.



6
7
8
9
# File 'lib/link_oracle/extractor/body.rb', line 6

def initialize(parsed_body)
  @parsed_body = parsed_body
  @link_data = LinkData::Data.new
end

Instance Attribute Details

Returns the value of attribute link_data.



4
5
6
# File 'lib/link_oracle/extractor/body.rb', line 4

def link_data
  @link_data
end

#parsed_bodyObject (readonly)

Returns the value of attribute parsed_body.



4
5
6
# File 'lib/link_oracle/extractor/body.rb', line 4

def parsed_body
  @parsed_body
end

Instance Method Details

#descriptionsObject



32
33
34
# File 'lib/link_oracle/extractor/body.rb', line 32

def descriptions
  @description ||= parsed_body.xpath("//p/text()").first(3).compact.map{ |text| text.content }
end

#imagesObject



26
27
28
29
30
# File 'lib/link_oracle/extractor/body.rb', line 26

def images
  @images ||= parsed_body.xpath(
    "//img[@src[contains(.,'://') and not(contains(.,'ads.') or contains(.,'ad.') or contains(.,'?'))]]"
  ).first(3).compact.map{ |node| node['src'] }
end

#performObject



11
12
13
14
15
16
17
18
# File 'lib/link_oracle/extractor/body.rb', line 11

def perform
  link_data.assign({
    titles: titles,
    image_urls: images,
    descriptions: descriptions
  })

end

#titlesObject



20
21
22
23
24
# File 'lib/link_oracle/extractor/body.rb', line 20

def titles
  @titles ||= parsed_body.xpath(
    "//h1/text() | //h2/text() | //h3/text()"
  ).first(3).compact.map{ |text| text.content }
end