Class: Jaleb::Model::Amazon

Inherits:
Base
  • Object
show all
Defined in:
lib/jaleb/models/amazon.rb

Overview

A regular expression for determining if a url comes from a specific service/website

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#parse_price

Class Method Details

.regexpObject



3
4
5
# File 'lib/jaleb/models/amazon.rb', line 3

def self.regexp
  /amazon\.com/
end

Instance Method Details

#parse(data) ⇒ Object

Parse data and look for object attributes to give to object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/jaleb/models/amazon.rb', line 8

def parse(data)
  super(data)
  
  case doc
  when Nokogiri::HTML::Document
    # Get Name
    self.name = doc.css("h1#title").first_string 
    self.name = doc.xpath("string(//title)").split(" - ").first unless self.name
     
    # Get Description
    self.description = doc.css(".productDescriptionWrapper").first_string    
          
    # Get description from meta title if not found
    self.description = doc.xpath("//meta[@name='description']/@content").first_string if description.nil?
    
    # Get Price
    parse_price(doc.css("#actualPriceValue").first_string) 
    parse_price(doc.css("#priceblock_ourprice").first_string) unless self.price                  
    parse_price(doc.css("#priceblock_saleprice").first_string) unless self.price
    parse_price(doc.xpath("//span[contains(@id, 'price')]").first_string) unless self.price

    # Get Unqualified Price 
    parse_price(doc.xpath("//*[contains(@id, 'unqualifiedBuyBox')]//span").first_string) unless self.price

    # Get Used Price 
    parse_price(doc.xpath("//*[contains(@id, 'secondaryUsedAndNew')]//*[@class='price']").first_string) unless self.price


    # Get Images
    self.images = doc.xpath("//*[@data-action='main-image-click']//img").attribute_array
    self.images = doc.xpath("//*[@id='imageBlock']//img").attribute_array unless self.images

    # Get images for in-house products (kindle, etc.)
    self.images = doc.xpath("//*[@id='kib-ma-container-0']//img").attribute_array if self.images.empty?

    # Get images for third-party sellers
    self.images = doc.xpath("//*[@id='prodImageContainer']//img").attribute_array if self.images.empty?

    self.image = images.first
  end            
end