Class: GScraper::Search::AJAXQuery

Inherits:
Query
  • Object
show all
Includes:
HasPages
Defined in:
lib/gscraper/search/ajax_query.rb

Overview

Represents a Query through the Google AJAX search API.

Constant Summary collapse

RESULTS_PER_PAGE =

Maximum results per-page

8
PATH =

AJAX API Path

'/uds/GwebSearch'
QUERY =

AJAX API Query string

'callback=google.search.WebSearch.RawCompletion&context=0&lstkp=0&rsz=large'
DEFAULT_SIG =

Default signature

'582c1116317355adf613a6a843f19ece'
DEFAULT_KEY =

Default key

'notsupplied'
DEFAULT_VERSION =

Default version

'1.0'

Constants inherited from Query

Query::DEFAULT_HOST, Query::SUB_DOMAIN

Instance Attribute Summary collapse

Attributes inherited from Query

#allintext, #allintitle, #allinurl, #define, #exact_phrase, #filetype, #info, #intext, #intitle, #inurl, #language, #link, #numeric_range, #query, #related, #search_host, #site, #with_words, #without_words

Class Method Summary collapse

Instance Method Summary collapse

Methods included from HasPages

#[], #each, #each_on_page, #each_on_pages, #each_page, #first_page, #page_cache, #page_index_of, #pages, #result_index_of, #result_offset_of

Methods inherited from Query

#expression, #format_modifier, #format_options

Constructor Details

#initialize(options = {}) {|query| ... } ⇒ AJAXQuery

Creates a new AJAX query.

Options Hash (options):

  • :search_host (String) — default: www.google.com

    The host to submit queries to.

  • :language (String, Symbol) — default: Languages.native

    The search language.

  • :sig (String) — default: '582c1116317355adf613a6a843f19ece'

    The search signature.

  • :key (String, Symbol) — default: 'notsupplied'

    The search key.

  • :version (Float) — default: 1.0

    The desired API version.

Yields:

  • (query)

    If a block is given, the new AJAX query will be passed to it.

Yield Parameters:



94
95
96
97
98
99
100
101
102
# File 'lib/gscraper/search/ajax_query.rb', line 94

def initialize(options={},&block)
  @agent = GScraper.web_agent(options)

  @sig     = options.fetch(:sig,DEFAULT_SIG)
  @key     = options.fetch(:key,DEFAULT_KEY)
  @version = options.fetch(:version,DEFAULT_VERSION)

  super(options,&block)
end

Instance Attribute Details

#keyObject

The search key



62
63
64
# File 'lib/gscraper/search/ajax_query.rb', line 62

def key
  @key
end

#sigObject

The search signature



59
60
61
# File 'lib/gscraper/search/ajax_query.rb', line 59

def sig
  @sig
end

#versionObject

The API version



65
66
67
# File 'lib/gscraper/search/ajax_query.rb', line 65

def version
  @version
end

Class Method Details

.from_url(url, options = {}) {|query| ... } ⇒ AJAXQuery

Creates a new AJAX query from the specified URL.

Yields:

  • (query)

    If a block is given, it will be passed the new AJAX query.

Yield Parameters:

See Also:

  • AJAXQuery.new


124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/gscraper/search/ajax_query.rb', line 124

def AJAXQuery.from_url(url,options={},&block)
  url = URI(url.to_s)

  options[:language] = url.query_params['hl']
  options[:query]    = url.query_params['q']

  options[:sig]     = url.query_params['sig']
  options[:key]     = url.query_params['key']
  options[:version] = url.query_params['v']

  return AJAXQuery.new(options,&block)
end

Instance Method Details

#page(page_index) ⇒ Page<Result>

A page containing results at the specified page index.



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/gscraper/search/ajax_query.rb', line 200

def page(page_index)
  Page.new do |new_page|
    body = @agent.get(page_url(page_index)).body
    hash = JSON.parse(body.scan(/\{.*\}/).first)

    rank_offset = result_offset_of(page_index)

    if (hash.kind_of?(Hash) && hash['results'])
      hash['results'].each_with_index do |result,index|
        rank  = rank_offset + (index + 1)
        title = Nokogiri::HTML(result['title']).inner_text
        url   = URI(URI.escape(result['unescapedUrl']))

        summary = unless result['content'].empty?
                    Nokogiri::HTML(result['content']).inner_text
                  else
                    ''
                  end

        cached_url = URI(result['cacheUrl'])

        new_page << Result.new(rank,title,url,summary,cached_url)
      end
    end
  end
end

#page_url(page_index) ⇒ URI::HTTP

The URL that represents the query at a specific page index.



181
182
183
184
185
186
187
188
189
# File 'lib/gscraper/search/ajax_query.rb', line 181

def page_url(page_index)
  url = search_url

  if page_index > 1
    url.query_params['start'] = result_offset_of(page_index)
  end

  return url
end

#results_per_pageInteger

The results per page.

See Also:



145
146
147
# File 'lib/gscraper/search/ajax_query.rb', line 145

def results_per_page
  RESULTS_PER_PAGE
end

#search_urlURI::HTTP

The URL that represents the query.



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/gscraper/search/ajax_query.rb', line 155

def search_url
  search_url = URI::HTTP.build(
    :host  => search_host,
    :path  => PATH,
    :query => QUERY
  )

  search_url.query_params['hl']  = @language
  search_url.query_params['gss'] = '.com'
  search_url.query_params['q']   = expression
  search_url.query_params['sig'] = @sig
  search_url.query_params['key'] = @key
  search_url.query_params['v']   = @version

  return search_url
end