Class: AmazonDP::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/amazon_dp/parser.rb

Defined Under Namespace

Classes: Amazon18xError, ParseError

Constant Summary collapse

@@logger =
Logger.new(nil)

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.logger=(logger) ⇒ Object



29
30
31
# File 'lib/amazon_dp/parser.rb', line 29

def self.logger=(logger)
  @@logger = logger
end

Instance Method Details

#extract_iine(doc) ⇒ Object



69
70
71
72
73
# File 'lib/amazon_dp/parser.rb', line 69

def extract_iine(doc)
  doc.search("span.amazonLikeCountContainer > span").children.first.to_s.gsub(/,/, "").to_i
rescue
  nil
end

#extract_kindle_pages(doc) ⇒ Object



53
54
55
# File 'lib/amazon_dp/parser.rb', line 53

def extract_kindle_pages(doc)
  doc.search("li.listItem > a#pageCountAvailable > span").text.strip.match(/^[\d.]+/).to_s.to_i
end

#extract_kindle_price(doc) ⇒ Object



49
50
51
# File 'lib/amazon_dp/parser.rb', line 49

def extract_kindle_price(doc)
  doc.search(".priceLarge").text.strip.match(/[\d.]+$/).to_s.to_f
end

#extract_reviews(doc) ⇒ Object



63
64
65
66
67
# File 'lib/amazon_dp/parser.rb', line 63

def extract_reviews(doc)
  doc.search("div.tiny > b").children.first.to_s.match(/^[\d.]+/).to_s.to_i
rescue 
  nil
end

#extract_stars(doc) ⇒ Object



57
58
59
60
61
# File 'lib/amazon_dp/parser.rb', line 57

def extract_stars(doc)
  doc.search("span.crAvgStars > span.asinReviewsSummary > a > span.swSprite")[0].attributes["title"].value.match(/[\d.]+$/).to_s.to_f
rescue
  nil
end

#is_adult_notice_page?(doc) ⇒ Boolean

Returns:

  • (Boolean)


33
34
35
# File 'lib/amazon_dp/parser.rb', line 33

def is_adult_notice_page?(doc)
  return !doc.search("span.alert").empty?
end

#is_adult_product_page?(doc) ⇒ Boolean

Returns:

  • (Boolean)


37
38
39
40
41
# File 'lib/amazon_dp/parser.rb', line 37

def is_adult_product_page?(doc)
  return doc.search("span.highlight").first.children.last.text.strip == "[アダルト]"
rescue
  return false
end

#is_kindle_product_page?(doc) ⇒ Boolean

Returns:

  • (Boolean)


43
44
45
46
47
# File 'lib/amazon_dp/parser.rb', line 43

def is_kindle_product_page?(doc)
  return doc.search("table > tr > td > div.content > ul > li").first.children.last.text.strip == "Kindle版"
rescue
  return false
end

#parse_html(html_data) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/amazon_dp/parser.rb', line 75

def parse_html(html_data)
  @@logger.info "try to parse html data"
  html_data.encode("UTF-8", "cp932")
  doc = Nokogiri::HTML(html_data)
  if is_adult_notice_page?(doc)
    @@logger.info "this page is adult amazon page"
    raise Amazon18xError
  end
  kindle_flag = is_kindle_product_page?(doc)
  return PageInfo.new(
    :is_adult => is_adult_product_page?(doc),
    :is_kindle => kindle_flag,
    :kindle_price => (kindle_flag ? extract_kindle_price(doc) : nil),
    :kindle_pages => (kindle_flag ? extract_kindle_pages(doc) : nil),
    :stars => extract_stars(doc),
    :reviews => extract_reviews(doc),
    :iine => extract_iine(doc)
  )
end