Module: Wikipedia

Defined in:
lib/wikipedia.rb

Constant Summary collapse

URL =
"http://%LANG%.wikipedia.org/w/api.php?action=parse&page="

Class Method Summary collapse

Class Method Details

.article(n, lang = :en) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/wikipedia.rb', line 29

def self.article( n, lang = :en )

  texts = []

  raw_data = open( URL.gsub("%LANG%", lang.to_s)+escape(n) ).read()

  he = HTMLEntities.new()

  # characters = { Regexp.new("\\[(.*)\\]") => '' }

    raw_data = he.decode( he.decode( raw_data ) ).gsub("\n", "") # >:D

  Hpricot(raw_data).search('p').each do |ph|
    texts << escape_text( ph.inner_text )
  end

  return texts

end

.escape(s) ⇒ Object



49
50
51
52
53
54
55
# File 'lib/wikipedia.rb', line 49

def self.escape(s)

  s.capitalize_every_word!

  CGI.escape( s )

end

.escape_text(s) ⇒ Object



57
58
59
60
61
62
63
64
65
# File 'lib/wikipedia.rb', line 57

def self.escape_text(s)

  # Hpricot's inner_text() does this already but we don't want the cite-notes stuff: [0], [1], etc.

  { Regexp.new("\\[(.*)\\]") => '' }.each { |str, replace_with| s.gsub!( str, replace_with ) }

  s

end