Class: JoyceShop::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/joyceshop/scraper.rb

Overview

extract_data class uses xpath selectors to get attribs

Constant Summary collapse

BASE_URL =
'https://www.joyce-shop.com'
BASE_SCRAPE_URL =
"#{BASE_URL}/PDList.asp?"
LATEST_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
"#{BASE_SCRAPE_URL}brand=01&item1=&item2=&ya19=&keyword=&recommand=1305080002&ob=F"
TOPS_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
PANTS_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
ACCESSORIES_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
ITEM_SELECTOR =

xml selectors that will be used to scrape data

"//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
ITEM_INFO_SELECTOR =
"div[contains(@class, 'NEW_shop_list_info')]"
TITLE_SELECTOR =
"#{ITEM_INFO_SELECTOR}/div[1]"
IMAGE_SELECTOR =
"a/img[contains(@class, 'lazyload')]/@src"
PRICE_SELECTOR =
"#{ITEM_INFO_SELECTOR}/span"
"a[1]/@href"
TITLE_REGEX =

Regular

/([.\p{Han}[a-zA-Z]]+)/

Instance Method Summary collapse

Instance Method Details

#accessories(page, options = {}) ⇒ Object



50
51
52
53
# File 'lib/joyceshop/scraper.rb', line 50

def accessories(page, options = {})
  uri  = uri_with_options(build_uri(ACCESSORIES_URI, options), page)
  process_request(uri, options)
end

#latest(page, options = {}) ⇒ Object



30
31
32
33
# File 'lib/joyceshop/scraper.rb', line 30

def latest(page, options = {})
  uri  = uri_with_options(build_uri(LATEST_URI, options), page)
  process_request(uri, options)
end

#pants(page, options = {}) ⇒ Object



45
46
47
48
# File 'lib/joyceshop/scraper.rb', line 45

def pants(page, options = {})
  uri  = uri_with_options(build_uri(PANTS_URI, options), page)
  process_request(uri, options)
end


35
36
37
38
# File 'lib/joyceshop/scraper.rb', line 35

def popular(page, options = {})
  uri  = uri_with_options(build_uri(POPULAR_URI, options), page)
  process_request(uri, options)
end

#scrape(type, options = {}) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/joyceshop/scraper.rb', line 60

def scrape(type, options = {})
  records = []
  valid_args = [:tops, :popular, :pants, :pants,
    :accessories, :latest, :search]
  abort 'invalid parameter - scrape type' unless valid_args.include?(type.to_sym)
  scrape_what(type, options)
end

#search(page, options = {}) ⇒ Object



55
56
57
58
# File 'lib/joyceshop/scraper.rb', line 55

def search(page, options = {})
  uri  = uri_with_options(build_uri(BASE_SCRAPE_URL, options), page)
  process_request(uri, options)
end

#tops(page, options = {}) ⇒ Object



40
41
42
43
# File 'lib/joyceshop/scraper.rb', line 40

def tops(page, options = {})
  uri  = uri_with_options(build_uri(TOPS_URI, options), page)
  process_request(uri, options)
end