Class: GameListScraper
- Inherits:
-
Object
- Object
- GameListScraper
- Defined in:
- lib/steam_scraper/game_list_scraper.rb
Overview
Core scraping class
Instance Method Summary collapse
- #get_page_contents(url) ⇒ Object
- #get_review_contents(entry) ⇒ Object
- #init_last_page_num ⇒ Object
-
#initialize(*_args) ⇒ GameListScraper
constructor
A new instance of GameListScraper.
- #scrape(first_page = 1, last_page = nil) ⇒ Object
- #scrape_entry(entry) ⇒ Object
- #scrape_icon_url(entry) ⇒ Object
- #scrape_name(entry) ⇒ Object
- #scrape_number_of_reviews(entry) ⇒ Object
- #scrape_page(current_page) ⇒ Object
- #scrape_platforms(entry) ⇒ Object
- #scrape_price(entry) ⇒ Object
- #scrape_release_date(entry) ⇒ Object
- #scrape_review_score(entry) ⇒ Object
- #scrape_url(entry) ⇒ Object
- #search_results(page_number) ⇒ Object
- #site ⇒ Object
Constructor Details
#initialize(*_args) ⇒ GameListScraper
Returns a new instance of GameListScraper.
5 6 7 8 9 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 5 def initialize(*_args) @game_list = [] @page_retriever = PageRetriever.new init_last_page_num end |
Instance Method Details
#get_page_contents(url) ⇒ Object
24 25 26 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 24 def get_page_contents(url) @page_retriever.retrieve(url) end |
#get_review_contents(entry) ⇒ Object
86 87 88 89 90 91 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 86 def get_review_contents(entry) node = entry.xpath(".//span[contains(@class, 'search_review_summary')]") result = nil result = node.attribute('data-store-tooltip').value unless node.empty? result end |
#init_last_page_num ⇒ Object
11 12 13 14 15 16 17 18 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 11 def init_last_page_num current_page_contents = get_page_contents(site + 1.to_s) @last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]") .text.scan(/(\d+)/i).flatten.last.to_i rescue StandardError => e puts 'Could not connect to Steam Store' raise e end |
#scrape(first_page = 1, last_page = nil) ⇒ Object
35 36 37 38 39 40 41 42 43 44 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 35 def scrape(first_page = 1, last_page = nil) last_page ||= @last_page_num result = Parallel.map(first_page..last_page, progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '), in_processes: 8) do |page| items_on_page = search_results(page) scrape_page(items_on_page) end @game_list.push(result).flatten! end |
#scrape_entry(entry) ⇒ Object
107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 107 def scrape_entry(entry) new_game = {} new_game[:url] = scrape_url(entry) new_game[:name] = scrape_name(entry) new_game[:price] = scrape_price(entry) new_game[:release_date] = scrape_release_date(entry) new_game[:platforms] = scrape_platforms(entry) new_game[:icon_url] = scrape_icon_url(entry) new_game[:review_score] = scrape_review_score(entry) new_game[:number_of_reviews] = scrape_number_of_reviews(entry) new_game end |
#scrape_icon_url(entry) ⇒ Object
82 83 84 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 82 def scrape_icon_url(entry) entry.xpath(".//div[contains(@class, 'search_capsule')]/img").attribute('src').value end |
#scrape_name(entry) ⇒ Object
58 59 60 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 58 def scrape_name(entry) entry.xpath(".//span[@class='title']").text end |
#scrape_number_of_reviews(entry) ⇒ Object
100 101 102 103 104 105 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 100 def scrape_number_of_reviews(entry) review_string = get_review_contents(entry) matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string) num_reviews = matches[1] unless matches.nil? num_reviews end |
#scrape_page(current_page) ⇒ Object
46 47 48 49 50 51 52 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 46 def scrape_page(current_page) entries = [] current_page.each do |entry| entries.push(scrape_entry(entry)) end entries end |
#scrape_platforms(entry) ⇒ Object
73 74 75 76 77 78 79 80 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 73 def scrape_platforms(entry) platforms = [] platforms.push('Windows') unless entry.xpath(".//span[contains(@class, 'win')]").empty? platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty? platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty? platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty? platforms end |
#scrape_price(entry) ⇒ Object
62 63 64 65 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 62 def scrape_price(entry) entry.xpath(".//div[contains(@class, 'search_price') and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last end |
#scrape_release_date(entry) ⇒ Object
67 68 69 70 71 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 67 def scrape_release_date(entry) Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text) rescue nil end |
#scrape_review_score(entry) ⇒ Object
93 94 95 96 97 98 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 93 def scrape_review_score(entry) review_string = get_review_contents(entry) matches = /.*(\d\d)[%]/i.match(review_string) review_percentage = matches[1] unless matches.nil? review_percentage end |
#scrape_url(entry) ⇒ Object
54 55 56 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 54 def scrape_url(entry) entry.attribute('href').value end |
#search_results(page_number) ⇒ Object
28 29 30 31 32 33 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 28 def search_results(page_number) current_page_contents = get_page_contents(site + page_number.to_s) current_page_contents.xpath("//div[@id='search_result_container']/div/a") rescue nil end |
#site ⇒ Object
20 21 22 |
# File 'lib/steam_scraper/game_list_scraper.rb', line 20 def site 'http://store.steampowered.com/search?page=' end |