Class: GamePageScraper
- Inherits:
-
Object
- Object
- GamePageScraper
- Defined in:
- lib/steam_scraper/game_page_scraper.rb
Overview
Class that scrapes a games actual page
Instance Method Summary collapse
- #get_page_contents(url) ⇒ Object
-
#initialize(*_args) ⇒ GamePageScraper
constructor
A new instance of GamePageScraper.
- #scrape(games_hash) ⇒ Object
- #scrape_developer(page_contents) ⇒ Object
- #scrape_game(game, url) ⇒ Object
- #scrape_game_with_valid_contents(game, page_contents) ⇒ Object
- #scrape_genres(page_contents) ⇒ Object
- #scrape_metacritic(page_contents) ⇒ Object
- #scrape_min_spec(page_contents) ⇒ Object
- #scrape_publisher(page_contents) ⇒ Object
- #scrape_recommended_spec(page_contents) ⇒ Object
- #scrape_spec(node) ⇒ Object
- #scrape_tags(page_contents) ⇒ Object
Constructor Details
#initialize(*_args) ⇒ GamePageScraper
Returns a new instance of GamePageScraper.
6 7 8 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 6 def initialize(*_args) @page_retriever = PageRetriever.new end |
Instance Method Details
#get_page_contents(url) ⇒ Object
39 40 41 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 39 def get_page_contents(url) @page_retriever.retrieve(url) end |
#scrape(games_hash) ⇒ Object
10 11 12 13 14 15 16 17 18 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 10 def scrape(games_hash) result = Parallel.map(games_hash, progress: 'Scraping additional per game data', in_processes: 8) do |game| url = game[:url] scrape_game(game, url) unless url.nil? end games_hash.push(result).flatten! end |
#scrape_developer(page_contents) ⇒ Object
67 68 69 70 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 67 def scrape_developer(page_contents) details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'developer')]").text.strip end |
#scrape_game(game, url) ⇒ Object
20 21 22 23 24 25 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 20 def scrape_game(game, url) page_contents = get_page_contents(url) game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil? game end |
#scrape_game_with_valid_contents(game, page_contents) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 27 def scrape_game_with_valid_contents(game, page_contents) game[:metacritic] = (page_contents) game[:tags] = (page_contents) game[:genres] = scrape_genres(page_contents) game[:developer] = scrape_developer(page_contents) game[:publisher] = scrape_publisher(page_contents) game[:min_spec] = scrape_min_spec(page_contents) game[:recommended_spec] = scrape_recommended_spec(page_contents) game end |
#scrape_genres(page_contents) ⇒ Object
57 58 59 60 61 62 63 64 65 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 57 def scrape_genres(page_contents) genres = [] details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'genre')]").each do |genre| genres.push(genre.text.strip) end genres end |
#scrape_metacritic(page_contents) ⇒ Object
43 44 45 46 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 43 def (page_contents) score_element = page_contents.xpath("//div[@id='game_area_metascore']/span").first score_element.text.to_i unless score_element.nil? end |
#scrape_min_spec(page_contents) ⇒ Object
77 78 79 80 81 82 83 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 77 def scrape_min_spec(page_contents) spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_leftCol']/ul/ul") if spec_block.empty? spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_full']/ul/ul") end scrape_spec(spec_block) end |
#scrape_publisher(page_contents) ⇒ Object
72 73 74 75 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 72 def scrape_publisher(page_contents) details = page_contents.xpath("//div[@class='details_block']") details.xpath(".//a[contains(@href, 'publisher')]").text.strip end |
#scrape_recommended_spec(page_contents) ⇒ Object
85 86 87 88 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 85 def scrape_recommended_spec(page_contents) spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_rightCol']/ul/ul") scrape_spec(spec_block) end |
#scrape_spec(node) ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 90 def scrape_spec(node) spec_array = node.text.split "\r" spec_hash = {} spec_array.each do |entry| value_pair = entry.split(':') next if value_pair.first.nil? key = value_pair.first.to_sym value = value_pair.last.strip spec_hash[key] = value end spec_hash end |
#scrape_tags(page_contents) ⇒ Object
48 49 50 51 52 53 54 55 |
# File 'lib/steam_scraper/game_page_scraper.rb', line 48 def (page_contents) = [] page_contents.xpath("//div[contains(@class, 'popular_tags')]/a").each do |tag| .push(tag.text.strip) end end |