Class: GameListScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/steam_scraper/game_list_scraper.rb

Overview

Core scraping class

Instance Method Summary collapse

Constructor Details

#initialize(*_args) ⇒ GameListScraper

Returns a new instance of GameListScraper.



5
6
7
8
9
# File 'lib/steam_scraper/game_list_scraper.rb', line 5

def initialize(*_args)
  @game_list = []
  @page_retriever = PageRetriever.new
  init_last_page_num
end

Instance Method Details

#get_page_contents(url) ⇒ Object



24
25
26
# File 'lib/steam_scraper/game_list_scraper.rb', line 24

def get_page_contents(url)
  @page_retriever.retrieve(url)
end

#get_review_contents(entry) ⇒ Object



86
87
88
89
90
91
# File 'lib/steam_scraper/game_list_scraper.rb', line 86

def get_review_contents(entry)
  node = entry.xpath(".//span[contains(@class, 'search_review_summary')]")
  result = nil
  result = node.attribute('data-store-tooltip').value unless node.empty?
  result
end

#init_last_page_numObject



11
12
13
14
15
16
17
18
# File 'lib/steam_scraper/game_list_scraper.rb', line 11

def init_last_page_num
  current_page_contents = get_page_contents(site + 1.to_s)
  @last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]")
                                        .text.scan(/(\d+)/i).flatten.last.to_i
rescue StandardError => e
  puts 'Could not connect to Steam Store'
  raise e
end

#scrape(first_page = 1, last_page = nil) ⇒ Object



35
36
37
38
39
40
41
42
43
44
# File 'lib/steam_scraper/game_list_scraper.rb', line 35

def scrape(first_page = 1, last_page = nil)
  last_page ||= @last_page_num
  result = Parallel.map(first_page..last_page,
                        progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '),
                        in_processes: 8) do |page|
    items_on_page = search_results(page)
    scrape_page(items_on_page)
  end
  @game_list.push(result).flatten!
end

#scrape_entry(entry) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/steam_scraper/game_list_scraper.rb', line 107

def scrape_entry(entry)
  new_game = {}
  new_game[:url] = scrape_url(entry)
  new_game[:name] = scrape_name(entry)
  new_game[:price] = scrape_price(entry)
  new_game[:release_date] = scrape_release_date(entry)
  new_game[:platforms] = scrape_platforms(entry)
  new_game[:icon_url] = scrape_icon_url(entry)
  new_game[:review_score] = scrape_review_score(entry)
  new_game[:number_of_reviews] = scrape_number_of_reviews(entry)
  new_game
end

#scrape_icon_url(entry) ⇒ Object



82
83
84
# File 'lib/steam_scraper/game_list_scraper.rb', line 82

def scrape_icon_url(entry)
  entry.xpath(".//div[contains(@class, 'search_capsule')]/img").attribute('src').value
end

#scrape_name(entry) ⇒ Object



58
59
60
# File 'lib/steam_scraper/game_list_scraper.rb', line 58

def scrape_name(entry)
  entry.xpath(".//span[@class='title']").text
end

#scrape_number_of_reviews(entry) ⇒ Object



100
101
102
103
104
105
# File 'lib/steam_scraper/game_list_scraper.rb', line 100

def scrape_number_of_reviews(entry)
  review_string = get_review_contents(entry)
  matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string)
  num_reviews = matches[1] unless matches.nil?
  num_reviews
end

#scrape_page(current_page) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/steam_scraper/game_list_scraper.rb', line 46

def scrape_page(current_page)
  entries = []
  current_page.each do |entry|
    entries.push(scrape_entry(entry))
  end
  entries
end

#scrape_platforms(entry) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/steam_scraper/game_list_scraper.rb', line 73

def scrape_platforms(entry)
  platforms = []
  platforms.push('Windows') unless entry.xpath(".//span[contains(@class, 'win')]").empty?
  platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty?
  platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty?
  platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty?
  platforms
end

#scrape_price(entry) ⇒ Object



62
63
64
65
# File 'lib/steam_scraper/game_list_scraper.rb', line 62

def scrape_price(entry)
  entry.xpath(".//div[contains(@class, 'search_price')
              and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last
end

#scrape_release_date(entry) ⇒ Object



67
68
69
70
71
# File 'lib/steam_scraper/game_list_scraper.rb', line 67

def scrape_release_date(entry)
  Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text)
rescue
  nil
end

#scrape_review_score(entry) ⇒ Object



93
94
95
96
97
98
# File 'lib/steam_scraper/game_list_scraper.rb', line 93

def scrape_review_score(entry)
  review_string = get_review_contents(entry)
  matches = /.*(\d\d)[%]/i.match(review_string)
  review_percentage = matches[1] unless matches.nil?
  review_percentage
end

#scrape_url(entry) ⇒ Object



54
55
56
# File 'lib/steam_scraper/game_list_scraper.rb', line 54

def scrape_url(entry)
  entry.attribute('href').value
end

#search_results(page_number) ⇒ Object



28
29
30
31
32
33
# File 'lib/steam_scraper/game_list_scraper.rb', line 28

def search_results(page_number)
  current_page_contents = get_page_contents(site + page_number.to_s)
  current_page_contents.xpath("//div[@id='search_result_container']/div/a")
rescue
  nil
end

#siteObject



20
21
22
# File 'lib/steam_scraper/game_list_scraper.rb', line 20

def site
  'http://store.steampowered.com/search?page='
end