Module: ParseHtml

Includes:
Scraping
Included in:
SalesData
Defined in:
lib/amazomania/parse_html.rb

Instance Method Summary collapse

Methods included from Scraping

#scraping

Methods included from CreateSession

#create_session

Methods included from UserAgent

#user_agent, #user_agents

Methods included from CreateUri

#all_type_product_uri

Instance Method Details

#amazon_pointsObject



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/amazomania/parse_html.rb', line 92

def amazon_points
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-color-secondary']")
  amazon_points = []
  next_nodeset_is_skipped = false
  nodesets.each do |nodeset|
    if next_nodeset_is_skipped
        next_nodeset_is_skipped = false
        next
    end
    if nodeset.to_s.include?("color: #990000;")
      amazon_point = nodeset.inner_text
      amazon_point = amazon_point.strip.gsub(/,/, '').sub(/([0-9]+)pt.*/, '\1')
      amazon_points.push(amazon_point)
      next_nodeset_is_skipped = true # 「ポイント」と「送料」に共通の class="a-color-secondary" ではポイントの方が先に来る
    else
      amazon_points.push("0")
    end
  end
  amazon_points
end

#main_conditionsObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/amazomania/parse_html.rb', line 43

def main_conditions
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
  main_conditions = []
  nodesets.each do |nodeset|
    condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
    if condition_tmp.to_s.include?("中古品") # to_s するのはとても荒い調べ方
      main_condition = "中古品"
    elsif
      main_condition = nodeset.inner_text.gsub(" ", "").strip
    end
    main_conditions.push(main_condition)
  end
  main_conditions
end

#parse_html(asin) ⇒ Object



6
7
8
9
10
11
12
13
14
# File 'lib/amazomania/parse_html.rb', line 6

def parse_html(asin)
  @nokogiried_doc   = scraping(asin)
  @shop_names       = shop_names
  @prices           = prices
  @main_conditions  = main_conditions
  @sub_conditions   = sub_conditions
  @shipping_prices  = shipping_prices
  @amazon_points    = amazon_points
end

#pricesObject



34
35
36
37
38
39
40
41
# File 'lib/amazomania/parse_html.rb', line 34

def prices
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-large a-color-price olpOfferPrice a-text-bold']")
  prices = []
  nodesets.each do |nodeset|
    prices.push(nodeset.inner_text.lstrip.gsub(/[^\d]/, ""))
  end
  prices
end

#shipping_pricesObject



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/amazomania/parse_html.rb', line 74

def shipping_prices
  nodesets = @nokogiried_doc.xpath("//span[@class='a-color-secondary']")
  shipping_prices = []
  nodesets.each do |nodeset|
    if nodeset.to_s.include?("color: #990000;") # to_s するのはとても荒い調べ方
      next
    end
    if nodeset.to_s.include?("olpShippingPrice")
      shipping_price = nodeset.inner_text.chomp.gsub(" ", "").gsub(/(\r\n|\r|\n|\f)/,"")
      shipping_price = shipping_price.gsub(/[^\d]/, "")
      shipping_prices.push(shipping_price)
    else
      shipping_prices.push("0")
    end
  end
  shipping_prices
end

#shop_namesObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/amazomania/parse_html.rb', line 16

def shop_names
  seller_count = 0
  nodesets = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']") # 浅くしないと Amazon のネイティブ出品 および Amazonプライム の場合が判別できない
  shop_names = []
  nodesets.each do |nodeset| # ネストがやや深い
    if nodeset.to_s.include?("seller") # to_s するのはとても荒い調べ方
      shop_name = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']/span/a")[seller_count].inner_text
      shop_names.push(shop_name)
      seller_count += 1
    elsif nodeset.to_s.include?("amazon.co.jp/shops/") # Amazonアウトレット の場合
      shop_names.push("Amazonアウトレット")
    else
      shop_names.push("Amazon.co.jp") # Amazon のネイティブ出品 or Amazonプライム の場合
    end
  end
  shop_names
end

#sub_conditionsObject



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/amazomania/parse_html.rb', line 58

def sub_conditions
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
  sub_conditions = []
  nodesets.each do |nodeset|
    condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
    if condition_tmp.to_s.include?("中古品") # to_s するのはとても荒い調べ方
      condition_tmp =~ /中古品\-(.*)/
      sub_condition = $1
    else
      sub_condition = "新品"
    end
    sub_conditions.push(sub_condition)
  end
  sub_conditions
end