Class: GamePageScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/steam_scraper/game_page_scraper.rb

Overview

Class that scrapes a games actual page

Instance Method Summary collapse

Constructor Details

#initialize(*_args) ⇒ GamePageScraper

Returns a new instance of GamePageScraper.



6
7
8
# File 'lib/steam_scraper/game_page_scraper.rb', line 6

def initialize(*_args)
  @page_retriever = PageRetriever.new
end

Instance Method Details

#get_page_contents(url) ⇒ Object



39
40
41
# File 'lib/steam_scraper/game_page_scraper.rb', line 39

def get_page_contents(url)
  @page_retriever.retrieve(url)
end

#scrape(games_hash) ⇒ Object



10
11
12
13
14
15
16
17
18
# File 'lib/steam_scraper/game_page_scraper.rb', line 10

def scrape(games_hash)
  result = Parallel.map(games_hash,
                        progress: 'Scraping additional per game data',
                        in_processes: 8) do |game|
    url = game[:url]
    scrape_game(game, url) unless url.nil?
  end
  games_hash.push(result).flatten!
end

#scrape_developer(page_contents) ⇒ Object



67
68
69
70
# File 'lib/steam_scraper/game_page_scraper.rb', line 67

def scrape_developer(page_contents)
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'developer')]").text.strip
end

#scrape_game(game, url) ⇒ Object



20
21
22
23
24
25
# File 'lib/steam_scraper/game_page_scraper.rb', line 20

def scrape_game(game, url)
  page_contents = get_page_contents(url)
  game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?

  game
end

#scrape_game_with_valid_contents(game, page_contents) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/steam_scraper/game_page_scraper.rb', line 27

def scrape_game_with_valid_contents(game, page_contents)
  game[:metacritic] = scrape_metacritic(page_contents)
  game[:tags] = scrape_tags(page_contents)
  game[:genres] = scrape_genres(page_contents)
  game[:developer] = scrape_developer(page_contents)
  game[:publisher] = scrape_publisher(page_contents)
  game[:min_spec] = scrape_min_spec(page_contents)
  game[:recommended_spec] = scrape_recommended_spec(page_contents)

  game
end

#scrape_genres(page_contents) ⇒ Object



57
58
59
60
61
62
63
64
65
# File 'lib/steam_scraper/game_page_scraper.rb', line 57

def scrape_genres(page_contents)
  genres = []
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'genre')]").each do |genre|
    genres.push(genre.text.strip)
  end

  genres
end

#scrape_metacritic(page_contents) ⇒ Object



43
44
45
46
# File 'lib/steam_scraper/game_page_scraper.rb', line 43

def scrape_metacritic(page_contents)
  score_element = page_contents.xpath("//div[@id='game_area_metascore']/span").first
  score_element.text.to_i unless score_element.nil?
end

#scrape_min_spec(page_contents) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/steam_scraper/game_page_scraper.rb', line 77

def scrape_min_spec(page_contents)
  spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_leftCol']/ul/ul")
  if spec_block.empty?
    spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_full']/ul/ul")
  end
  scrape_spec(spec_block)
end

#scrape_publisher(page_contents) ⇒ Object



72
73
74
75
# File 'lib/steam_scraper/game_page_scraper.rb', line 72

def scrape_publisher(page_contents)
  details = page_contents.xpath("//div[@class='details_block']")
  details.xpath(".//a[contains(@href, 'publisher')]").text.strip
end


85
86
87
88
# File 'lib/steam_scraper/game_page_scraper.rb', line 85

def scrape_recommended_spec(page_contents)
  spec_block = page_contents.xpath("//div[@data-os='win']/div[@class='game_area_sys_req_rightCol']/ul/ul")
  scrape_spec(spec_block)
end

#scrape_spec(node) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/steam_scraper/game_page_scraper.rb', line 90

def scrape_spec(node)
  spec_array = node.text.split "\r"
  spec_hash = {}
  spec_array.each do |entry|
    value_pair = entry.split(':')
    next if value_pair.first.nil?
    key = value_pair.first.to_sym
    value = value_pair.last.strip
    spec_hash[key] = value
  end

  spec_hash
end

#scrape_tags(page_contents) ⇒ Object



48
49
50
51
52
53
54
55
# File 'lib/steam_scraper/game_page_scraper.rb', line 48

def scrape_tags(page_contents)
  tags = []
  page_contents.xpath("//div[contains(@class, 'popular_tags')]/a").each do |tag|
    tags.push(tag.text.strip)
  end

  tags
end