Class: JoyceShop::Scraper
- Inherits:
-
Object
- Object
- JoyceShop::Scraper
- Defined in:
- lib/joyceshop/scraper.rb
Overview
extract_data class uses xpath selectors to get attribs
Constant Summary collapse
- BASE_URL =
'https://www.joyce-shop.com'- BASE_SCRAPE_URL =
"#{BASE_URL}/PDList.asp?"- LATEST_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"- POPULAR_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=&item2=&ya19=&keyword=&recommand=1305080002&ob=F"- TOPS_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"- PANTS_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"- ACCESSORIES_URI =
"#{BASE_SCRAPE_URL}brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"- ITEM_SELECTOR =
xml selectors that will be used to scrape data
"//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"- ITEM_INFO_SELECTOR =
"div[contains(@class, 'NEW_shop_list_info')]"- TITLE_SELECTOR =
"#{ITEM_INFO_SELECTOR}/div[1]"- IMAGE_SELECTOR =
"a/img[contains(@class, 'lazyload')]/@src"- PRICE_SELECTOR =
"#{ITEM_INFO_SELECTOR}/span"- LINK_SELECTOR =
"a[1]/@href"- TITLE_REGEX =
Regular
/([.\p{Han}[a-zA-Z]]+)/
Instance Method Summary collapse
- #accessories(page, options = {}) ⇒ Object
- #latest(page, options = {}) ⇒ Object
- #pants(page, options = {}) ⇒ Object
- #popular(page, options = {}) ⇒ Object
- #scrape(type, options = {}) ⇒ Object
- #search(page, options = {}) ⇒ Object
- #tops(page, options = {}) ⇒ Object
Instance Method Details
#accessories(page, options = {}) ⇒ Object
50 51 52 53 |
# File 'lib/joyceshop/scraper.rb', line 50 def accessories(page, = {}) uri = (build_uri(ACCESSORIES_URI, ), page) process_request(uri, ) end |
#latest(page, options = {}) ⇒ Object
30 31 32 33 |
# File 'lib/joyceshop/scraper.rb', line 30 def latest(page, = {}) uri = (build_uri(LATEST_URI, ), page) process_request(uri, ) end |
#pants(page, options = {}) ⇒ Object
45 46 47 48 |
# File 'lib/joyceshop/scraper.rb', line 45 def pants(page, = {}) uri = (build_uri(PANTS_URI, ), page) process_request(uri, ) end |
#popular(page, options = {}) ⇒ Object
35 36 37 38 |
# File 'lib/joyceshop/scraper.rb', line 35 def popular(page, = {}) uri = (build_uri(POPULAR_URI, ), page) process_request(uri, ) end |
#scrape(type, options = {}) ⇒ Object
60 61 62 63 64 65 66 |
# File 'lib/joyceshop/scraper.rb', line 60 def scrape(type, = {}) records = [] valid_args = [:tops, :popular, :pants, :pants, :accessories, :latest, :search] abort 'invalid parameter - scrape type' unless valid_args.include?(type.to_sym) scrape_what(type, ) end |
#search(page, options = {}) ⇒ Object
55 56 57 58 |
# File 'lib/joyceshop/scraper.rb', line 55 def search(page, = {}) uri = (build_uri(BASE_SCRAPE_URL, ), page) process_request(uri, ) end |
#tops(page, options = {}) ⇒ Object
40 41 42 43 |
# File 'lib/joyceshop/scraper.rb', line 40 def tops(page, = {}) uri = (build_uri(TOPS_URI, ), page) process_request(uri, ) end |