Module: ParseHtml
Instance Method Summary
collapse
Methods included from Scraping
#scraping
#create_session
Methods included from UserAgent
#user_agent, #user_agents
Methods included from CreateUri
#all_type_product_uri
Instance Method Details
#amazon_points ⇒ Object
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# File 'lib/amazomania/parse_html.rb', line 92
def amazon_points
nodesets = @nokogiried_doc.xpath("//*/span[@class='a-color-secondary']")
amazon_points = []
next_nodeset_is_skipped = false
nodesets.each do |nodeset|
if next_nodeset_is_skipped
next_nodeset_is_skipped = false
next
end
if nodeset.to_s.include?("color: #990000;")
amazon_point = nodeset.inner_text
amazon_point = amazon_point.strip.gsub(/,/, '').sub(/([0-9]+)pt.*/, '\1')
amazon_points.push(amazon_point)
next_nodeset_is_skipped = true else
amazon_points.push("0")
end
end
amazon_points
end
|
#main_conditions ⇒ Object
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
# File 'lib/amazomania/parse_html.rb', line 43
def main_conditions
nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
main_conditions = []
nodesets.each do |nodeset|
condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
if condition_tmp.to_s.include?("中古品") main_condition = "中古品"
elsif
main_condition = nodeset.inner_text.gsub(" ", "").strip
end
main_conditions.push(main_condition)
end
main_conditions
end
|
#parse_html(asin) ⇒ Object
6
7
8
9
10
11
12
13
14
|
# File 'lib/amazomania/parse_html.rb', line 6
def parse_html(asin)
@nokogiried_doc = scraping(asin)
@shop_names = shop_names
@prices = prices
@main_conditions = main_conditions
@sub_conditions = sub_conditions
@shipping_prices = shipping_prices
@amazon_points = amazon_points
end
|
#prices ⇒ Object
34
35
36
37
38
39
40
41
|
# File 'lib/amazomania/parse_html.rb', line 34
def prices
nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-large a-color-price olpOfferPrice a-text-bold']")
prices = []
nodesets.each do |nodeset|
prices.push(nodeset.inner_text.lstrip.gsub(/[^\d]/, ""))
end
prices
end
|
#shipping_prices ⇒ Object
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
# File 'lib/amazomania/parse_html.rb', line 74
def shipping_prices
nodesets = @nokogiried_doc.xpath("//span[@class='a-color-secondary']")
shipping_prices = []
nodesets.each do |nodeset|
if nodeset.to_s.include?("color: #990000;") next
end
if nodeset.to_s.include?("olpShippingPrice")
shipping_price = nodeset.inner_text.chomp.gsub(" ", "").gsub(/(\r\n|\r|\n|\f)/,"")
shipping_price = shipping_price.gsub(/[^\d]/, "")
shipping_prices.push(shipping_price)
else
shipping_prices.push("0")
end
end
shipping_prices
end
|
#shop_names ⇒ Object
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
# File 'lib/amazomania/parse_html.rb', line 16
def shop_names
seller_count = 0
nodesets = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']") shop_names = []
nodesets.each do |nodeset| if nodeset.to_s.include?("seller") shop_name = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']/span/a")[seller_count].inner_text
shop_names.push(shop_name)
seller_count += 1
elsif nodeset.to_s.include?("amazon.co.jp/shops/") shop_names.push("Amazonアウトレット")
else
shop_names.push("Amazon.co.jp") end
end
shop_names
end
|
#sub_conditions ⇒ Object
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
# File 'lib/amazomania/parse_html.rb', line 58
def sub_conditions
nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
sub_conditions = []
nodesets.each do |nodeset|
condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
if condition_tmp.to_s.include?("中古品") condition_tmp =~ /中古品\-(.*)/
sub_condition = $1
else
sub_condition = "新品"
end
sub_conditions.push(sub_condition)
end
sub_conditions
end
|