Class: AmazonDP::Parser
- Inherits:
-
Object
show all
- Defined in:
- lib/amazon_dp/parser.rb
Defined Under Namespace
Classes: Amazon18xError, ParseError
Constant Summary
collapse
- @@logger =
Logger.new(nil)
Class Method Summary
collapse
Instance Method Summary
collapse
Class Method Details
.logger=(logger) ⇒ Object
29
30
31
|
# File 'lib/amazon_dp/parser.rb', line 29
def self.logger=(logger)
@@logger = logger
end
|
Instance Method Details
69
70
71
72
73
|
# File 'lib/amazon_dp/parser.rb', line 69
def (doc)
doc.search("span.amazonLikeCountContainer > span").children.first.to_s.gsub(/,/, "").to_i
rescue
nil
end
|
53
54
55
|
# File 'lib/amazon_dp/parser.rb', line 53
def (doc)
doc.search("li.listItem > a#pageCountAvailable > span").text.strip.match(/^[\d.]+/).to_s.to_i
end
|
49
50
51
|
# File 'lib/amazon_dp/parser.rb', line 49
def (doc)
doc.search(".priceLarge").text.strip.match(/[\d.]+$/).to_s.to_f
end
|
63
64
65
66
67
|
# File 'lib/amazon_dp/parser.rb', line 63
def (doc)
doc.search("div.tiny > b").children.first.to_s.match(/^[\d.]+/).to_s.to_i
rescue
nil
end
|
57
58
59
60
61
|
# File 'lib/amazon_dp/parser.rb', line 57
def (doc)
doc.search("span.crAvgStars > span.asinReviewsSummary > a > span.swSprite")[0].attributes["title"].value.match(/[\d.]+$/).to_s.to_f
rescue
nil
end
|
#is_adult_notice_page?(doc) ⇒ Boolean
33
34
35
|
# File 'lib/amazon_dp/parser.rb', line 33
def is_adult_notice_page?(doc)
return !doc.search("span.alert").empty?
end
|
#is_adult_product_page?(doc) ⇒ Boolean
37
38
39
40
41
|
# File 'lib/amazon_dp/parser.rb', line 37
def is_adult_product_page?(doc)
return doc.search("span.highlight").first.children.last.text.strip == "[アダルト]"
rescue
return false
end
|
#is_kindle_product_page?(doc) ⇒ Boolean
43
44
45
46
47
|
# File 'lib/amazon_dp/parser.rb', line 43
def is_kindle_product_page?(doc)
return doc.search("table > tr > td > div.content > ul > li").first.children.last.text.strip == "Kindle版"
rescue
return false
end
|
#parse_html(html_data) ⇒ Object
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/amazon_dp/parser.rb', line 75
def parse_html(html_data)
@@logger.info "try to parse html data"
html_data.encode("UTF-8", "cp932")
doc = Nokogiri::HTML(html_data)
if is_adult_notice_page?(doc)
@@logger.info "this page is adult amazon page"
raise Amazon18xError
end
kindle_flag = is_kindle_product_page?(doc)
return PageInfo.new(
:is_adult => is_adult_product_page?(doc),
:is_kindle => kindle_flag,
:kindle_price => (kindle_flag ? (doc) : nil),
:kindle_pages => (kindle_flag ? (doc) : nil),
:stars => (doc),
:reviews => (doc),
:iine => (doc)
)
end
|