Module: WikipediaWrapper

Extended by:
WikipediaWrapper
Included in:
WikipediaWrapper
Defined in:
lib/wikipedia_wrapper.rb,
lib/wikipedia_wrapper/page.rb,
lib/wikipedia_wrapper/util.rb,
lib/wikipedia_wrapper/image.rb,
lib/wikipedia_wrapper/version.rb,
lib/wikipedia_wrapper/exception.rb,
lib/wikipedia_wrapper/configuration.rb

Overview

Author:

Defined Under Namespace

Classes: Configuration, ConfigurationError, DisambiguationError, FormatError, HTTPTimeoutError, Image, InvalidRequestError, MultiplePagesError, Page, PageError, RedirectError, WikiImage, WikipediaError

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.check_results(term, raw_results) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/wikipedia_wrapper/util.rb', line 46

def self.check_results(term, raw_results)

  if raw_results['query']['pages'].length > 1
    raise WikipediaWrapper::MultiplePagesError.new(raw_results['query']['pages'].map { |p| p['title'] }, term)
  elsif raw_results['query']['pages'].length < 1
    raise WikipediaWrapper::PageError.new(term)
  end

  key, page_info = raw_results['query']['pages'].first
  if key == '-1'
    raise WikipediaWrapper::PageError.new(term)
  end

  # Check for disambiguation pages
  if page_info['pageprops'] && page_info['pageprops']['disambiguation']
    raise WikipediaWrapper::DisambiguationError.new(term)
  end

end

.fetch(params) ⇒ Hash

Given the request parameters, params, fetch the response from the API URL and parse it as JSON. Raise an InvalidRequestError if an error occurrs.

Parameters:

  • params (Hash{Symbol => String})

    hash of the properties that should be added to the request URL

Returns:

  • (Hash)

    the JSON response of the server converted in to a hash

Raises:



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/wikipedia_wrapper/util.rb', line 18

def self.fetch(params)

  # if no action is defined, set it to 'query'
  if !params.key?(:action)
    params[:action] = 'query'
  end

  params[:format] = 'json' # always return json format

  # FIXME: deal with continuation
  #params[:continue] = '' # does not work for autocomplete

  query_part = params.map { |k, v| v.empty? ? "#{k}" : "#{k}=#{v}" }.join("&")
  endpoint_url = URI.encode("#{WikipediaWrapper.config.api_url}?#{query_part}")

  raw_results = cache.fetch(endpoint_url) {
    f = open(endpoint_url, "User-Agent" => config.user_agent)
    JSON.parse(f.read)
  }

  if params[:action] != 'opensearch' && raw_results.key?('error')
    raise WikipediaWrapper::InvalidRequestError.new(endpoint_url, raw_results['error']['info'])
  end

  return raw_results

end

Instance Method Details

#autocomplete(term, limit: 10, redirect: true) ⇒ Hash{String=>String}

Returns a hash where the keys are the titles of the articles and the values are a short description of the page.

Parameters:

  • term (String)

    the term to get the autocompletions for (used as a prefix)

  • limit (Integer) (defaults to: 10)

    the maximum number of results to return (may not exceed 100)

  • redirect (Boolean) (defaults to: true)

    whether redirects should be followed for suggestions

Returns:

  • (Hash{String=>String})

    a hash where the keys are the titles of the articles and the values are a short description of the page

Raises:

See Also:



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/wikipedia_wrapper.rb', line 168

def autocomplete(term, limit: 10, redirect: true)

  query_params = {
    'action': 'opensearch',
    'search': term,
    'redirects': redirect ? 'resolve' : 'return',
    'limit': (limit > 100 ? 100 : limit).to_s
  }

  raw_results = fetch(query_params)

  if raw_results.length != 4
    raise WikipediaWrapper::FormatError.new("autocomplete", "array had length of #{raw_results.length} instead of 4")
  end

  num_suggestions = raw_results[1].length - 1

  results = {}
  for i in 0..num_suggestions
    results[raw_results[1][i]] = raw_results[2][i]
  end

  return results

end

#cacheCache

Retrieve the cache for this module if it is already defined, otherwise create a new Cache, defaulting to an in-memory cache

Returns:

  • (Cache)

    the cache



35
36
37
38
39
40
41
# File 'lib/wikipedia_wrapper.rb', line 35

def cache
  if @cache.nil?
    @cache = Cache.new
    @cache.config.default_ttl = config.default_ttl
  end
  @cache
end

#cache=(raw_client, timeout: config.default_ttl) ⇒ Object

Define the caching client

Examples:

WikipediaWrapper.cache(Memcached.new('127.0.0.1:11211', :binary_protocol => true))
WikipediaWrapper.cache(Dalli::Client.new)
WikipediaWrapper.cache(Redis.new)
WikipediaWrapper.cache(Rails.new)

Parameters:

  • raw_client (Memcached, Dalli::Client, Redis, memcache-client)

    a caching client (Memcached, Dalli, memcache-client, redis)

  • timeout (Integer) (defaults to: config.default_ttl)

    default timeout for the cache entries [in seconds]

See Also:



53
54
55
56
# File 'lib/wikipedia_wrapper.rb', line 53

def cache=(raw_client, timeout: config.default_ttl)
  @cache = Cache.wrap(raw_client)
  @cache.config.default_ttl = timeout
end

#check_page(term) ⇒ String

Function to determine whether there is a page with that term. It uses the search and suggestion functionality to find a possible match and raises a PageError if no page could be found.

Parameters:

  • term (String)

    the term for which we want a page

Returns:

  • (String)

    the actual title of the page

Raises:



202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/wikipedia_wrapper.rb', line 202

def check_page(term)

  results, suggestion = search(term, limit: 1, suggestion: true)
  if !suggestion.nil?
    return suggestion
  elsif results.length == 1
    title, snippet = results.first
    return title
  else
    raise WikipediaWrapper::PageError.new(term)
  end

end

#configObject



14
15
16
# File 'lib/wikipedia_wrapper.rb', line 14

def config
  @config ||= Configuration.new
end

#configure {|config| ... } ⇒ Object

Set up configuration options

Examples:

WikipediaWrapper.configure do |config|
  config.api_key = 'http://en.wikipedia.org/w/api.php'
  config.user_agent = 'WikipediaWrapper/0.0.1 (http://sykaeh.github.com/wikipedia_wrapper/) Ruby/2.2.1'
  config.default_ttl = 604800
end

Yield Parameters:



27
28
29
30
# File 'lib/wikipedia_wrapper.rb', line 27

def configure
  @config ||= Configuration.new
  yield(config)
end

#page(term, auto_suggest: true, redirect: true) ⇒ WikipediaWrapper::Page

Convenience function to retrieve a Wikipedia page

Parameters:

  • term (String)

    the title of the page

  • auto_suggest (Boolean) (defaults to: true)

    whether the search and autocorrect suggestion should be used to find a valid term (default: true)

  • redirect (Boolean) (defaults to: true)

    whether redirects should be followed automatically (default: true)

Returns:



65
66
67
68
69
70
71
72
73
# File 'lib/wikipedia_wrapper.rb', line 65

def page(term, auto_suggest: true, redirect: true)

  if auto_suggest
    term = check_page(term)
  end

  return WikipediaWrapper::Page.new(term, redirect: redirect)

end

#search(term, limit: 10, suggestion: false) ⇒ {String => String}, Array<{String => String}, <String, nil>>

Do a Wikipedia search for the given term

Parameters:

  • limit (Integer) (defaults to: 10)

    the maximimum number of results returned

  • suggestion (Boolean) (defaults to: false)

    set to true if you want an autocorrect suggestion

Returns:

  • ({String => String})

    if suggestion is false, return a Hash of the suggestions (as keys) and a snippet of the search result as values

  • (Array<{String => String}, <String, nil>>)

    if suggestion is true, return return a Hash of the suggestions (as keys) and a snippet of the search result as values in the first position of the array and in the second position a proposed suggestion or nil if there was no suggestion



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/wikipedia_wrapper.rb', line 132

def search(term, limit: 10, suggestion: false)

  search_params = {
    'list': 'search',
    'srprop': 'snippet',
    'srlimit': limit.to_s,
    'srsearch': term
  }

  raw_results = fetch(search_params)

  results = {}

  raw_results['query']['search'].each do |sr|
    results[sr['title']] = sr['snippet'].gsub(/<span .*>(?<term>[^<]*)<\/span>/, '\k<term>')
  end

  if suggestion
    s = raw_results['query']['searchinfo'].key?('suggestion') ? raw_results['query']['searchinfo']['suggestion'] : nil
    return [results, s]
  else
    return results
  end

end

#summary(term, html: false, sentences: 0, chars: 0) ⇒ String

Note:

This is a convenience wrapper - auto_suggest and redirect are enabled by default

Plain text or basic HTML summary of the page. Redirects are always followed automatically.

Parameters:

  • term (String)

    the title of the page

  • html (Boolean) (defaults to: false)

    if true, return basic HTML instead of plain text

  • sentences (Integer) (defaults to: 0)

    if set, return the first ‘sentences` sentences (can be no greater than 10).

  • chars (Integer) (defaults to: 0)

    if set, return only the first ‘chars` characters (actual text returned may be slightly longer).

Returns:

  • (String)

    the plain text or basic HTML summary of that page

Raises:



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/wikipedia_wrapper.rb', line 87

def summary(term, html: false, sentences: 0, chars: 0)

  # get auto_suggest
  term = check_page(term)

  query_params = {
    'redirects': '',
    'prop': 'extracts|pageprops',
    'titles': term,
    'ppprop': 'disambiguation',
  }

  if !html
    query_params['explaintext'] = ''
  end

  if sentences
    query_params[:exsentences] = (sentences > 10 ? 10 : sentences).to_s
  elsif chars
    query_params[:exchars] = chars.to_s
  else
    query_params[:exintro] = ''
  end

  raw_results = fetch(query_params)
  check_results(term, raw_results)

  id, info = raw_results['query']['pages'].first
  summary = info['extract']

  return summary
end