Class: TeaShopper::SongScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/song_scraper.rb

Constant Summary collapse

BASE_URL =

Path definitions

"https://songtea.com"
INDEX_URL =
BASE_URL + "/pages/tea-by-type"

Instance Method Summary collapse

Instance Method Details

#scrape_profile_page(profile_url) ⇒ Object

  1. Scrape individual tea pages, such as songtea.com/collections/oolong-tea/products/dragon-phoenix-tender-heart

Example return values: self.scrape_profile_page(profile_url)

:size=>30.0, 
:price=>19.0, 
:price_per_oz=>20.10618, 
:flavors=>"Notes of orchid, spruce, and ghee.", 
:date=>"2019", 
:region=>"Taiwan",

Removed for now

:detailed_instructions=>"This tea accommodates a range of brew styles...", 
:instructions=>"Brew: 6 grams・150 ml・203° F・2 min", 

:description=>"2019 marks our first year offering this oolong from Taiwan’s Dragon Phoenix Gorge. The cooler temperatures and mist-shrouded gardens of this region product tea with clarity, aromatics, and texture.\nDragon Phoenix Tender Heart is produced by a small farm operated by the Zhang family..."



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/song_scraper.rb', line 73

def scrape_profile_page(profile_url)
  profile = {}

  # Store html document
  doc = Nokogiri::HTML(open(profile_url))
  container = doc.css("div#ProductSection div.product-single")
  
  # Get first selection from size and price select list
  size_price = container.css("form#AddToCartForm option").first.text.strip.split(" - ")

  # Get size, remove g, convert to float
  size = size_price.first[/\d+/].to_f
  profile[:size] = size

  # Get price, grab digits and decimal, convert to float. If price is 0.0, replace with "Sold Out".
  price = size_price.last[/\d+./].to_f
  price = "Sold Out" if price == 0.0
  profile[:price] = price

  # Calculate price per oz from initial price and size.
  # 30g size * 0.035274 conversion * price
  # If price is sold out, set to price_per_oz, as well.
  if price.is_a?(String)
    price_per_oz = price
    profile[:price_per_oz] = price_per_oz
  else 
    price_per_oz = size * 0.035274 * price
    profile[:price_per_oz] = price_per_oz.round(2)
  end

  # Gather all description paragraphs and separate into flavors, date, region. (And instructions and detailed instructions for future.)
  desc_array = container.css("div.product-description p").collect { |p| p.text }

  # Flavors
  profile[:flavors] = desc_array.shift
   
  # Remove second paragraph and separate into region and date
  region_year = desc_array.shift.split("・")
  profile[:date] = region_year[1]

  # Region. Grab text after "from" until the end
  profile[:region] = region_year.first[/(?<=from ).*/]
 
  # Future: when separating steep instructions, activate:
    # Steep instructions
    # Get detailed instructions first
    # profile[:detailed_instructions] = desc_array.pop
    # Get summary instructions next
    # profile[:instructions] = desc_array.pop
  
  # Full description
  profile[:description] = desc_array.join("\n\n")

  return profile
end

#scrape_teasObject

  1. Scrape teas from Song Teas by Type page: songtea.com/pages/tea-by-type

Example return values:

:name=>"Aged Baozhong, 1960s", 
:type=>"aged", 
:shop_name=>"Song Tea & Ceramics", 
:url=>"/collections/aged-tea/products/aged-baozhong-1960s", 
:stock=>""



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/song_scraper.rb', line 18

def scrape_teas
  teas = []

  # Store html, get tea profile container
  doc = Nokogiri::HTML(open(INDEX_URL))
  tea_types = doc.css("div.product-section")

  # Get shop name from meta tag
  shop_name = ""
  doc.css('meta').each { |meta| shop_name = meta.attr("content") if meta.attr("property") == "og:site_name" }

  # Iterate through tea types, then iterate through teas to create tea hash
  tea_types.each do |type|       
    type.css(".grid__item a.grid-link").each do |tea|
      
    # Replace "red" tea type with "black/red", to remove user confusion
    tea_type = type.attr("id").split("/").last.split("-").first 
    tea_type = "black/red" if tea_type == "red"
    
    # If tea is out of stock, store in hash
    tea.css("span.badge").text.include?("Sold Out")?stock = "sold out" : stock = ""

    # Add tea hash to array
    teas <<
    {
      :name => tea.css("p.grid-link__title").text,
      :type => tea_type,
      :shop_name => shop_name,
      :url => BASE_URL + tea.attr("href"),
      :stock => stock
    }
    end
  end

  # Return array
  return teas
end