Class: Gogdb::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/gogdb/engine.rb

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Engine

Returns a new instance of Engine.



4
5
6
7
# File 'lib/gogdb/engine.rb', line 4

def initialize(options={})
  @logger = Logger.new(options)
  @utils = Utils.new(@logger)
end

Instance Method Details

#fetch(options) ⇒ Object

Downloads data from source and updates database with changes

Parameters:

  • options (Hash)

    Additional options such as :limit and :type



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/gogdb/engine.rb', line 12

def fetch(options)
  # If user passed type parameter, we need to figure it out which urls we
  # need actually to fetch 
  sources = []
  sources.push(GOG_GAMES_URL) if options[:type] == "games"
  sources.push(GOG_MOVIES_URL) if options[:type] == "movies"
  sources = [GOG_GAMES_URL, GOG_MOVIES_URL] if options[:type] == "all" || options[:type] == nil

  sleep_time = 60 / options[:requestLimit]

  sources.each do |source|
    pages_number = get_pages_number(source)
    @logger.debug "Pages received: #{pages_number}"
    nil if pages_number < 1 # Do nothing if there are no pages

    # Loop through pages
    count = 0;
    catch :limitReached do
      for i in 0..pages_number do
        # Let's get products number and URL's for current page
        items_data = get_data(source)['products']
        items_number = items_data.length
        
        # Loop through products and get data of every one
        for i in 0..items_number do
          item_url = items_data[i]['url']
          item_data = get_data("#{GOG_URL}#{item_url}")['gameProductData']

          @logger.debug "Item received: #{item_data['title']}"

          count += 1
          # Break loop if limit is reached
          throw :limitReached if count >= options[:limit] && options[:limit] != 0
          
          sleep(sleep_time)
        end
      end
    end
  end
end

#get_data(url) ⇒ hash

Gets data from source and parses global javascript object.

Parameters:

  • url (string)

    Page URL

Returns:

  • (hash)


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/gogdb/engine.rb', line 71

def get_data(url)
  begin
    page = Net::HTTP.get(URI(url))
    JSON.parse(page[/(?<=var gogData = )(.*)(?=;)/,1])
  rescue => e
    @logger.warning "Cannot retrieve or parse data from gog.com"
    @logger.error e.message

    # In case crawler cannot access gog.com, let's check if gog.com is
    # actually online.
    ph = Net::Ping::HTTP.new(url)
    unless ph.ping?
      @logger.warning "Cannot establish connection to gog.com. Retrying..."
      
      # Retry connection until gog.com comes online. After that - retry
      # getting data.
      @utils.retryConnection(url)
      retry
    else
      @logger.warning "Connection to gog.com established successfully. Retrying..."
      sleep(15)
      retry
    end
  end
end

#get_pages_number(url) ⇒ integer

Gets pages number.

Parameters:

  • url (string)

    Page URL

Returns:

  • (integer)

    number of pages



57
58
59
60
61
62
63
64
65
# File 'lib/gogdb/engine.rb', line 57

def get_pages_number(url)
  begin
    get_data(url)["totalPages"]
  rescue => e
    @logger.error "Cannot retrieve pages number."
    @logger.debug e.message
    0
  end
end