Class: WikiGetter

Inherits:
Object
  • Object
show all
Defined in:
lib/cogibara/operators/wiki/wiki_getter.rb

Instance Method Summary collapse

Instance Method Details

#checkRedirect(article) ⇒ Object



8
9
10
11
12
13
14
# File 'lib/cogibara/operators/wiki/wiki_getter.rb', line 8

def checkRedirect(article)
  if(/\Aredirect/ =~ article.strip)
    return article.strip.gsub(/\Aredirect/,'')
  elsif(/\AREDIRECT/ =~ article.strip)
    return article.strip.gsub(/\AREDIRECT/,'').strip
  end
end

#getArticle(title) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/cogibara/operators/wiki/wiki_getter.rb', line 16

def getArticle(title)
    article = JSON.parse(Wikipedia.find(title).json)["query"]["pages"].to_a[0][1]["revisions"]
    unless article
      "Could not find article, sorry"
    else
      raw = article[0]["*"]
      # parsed = Wikitext::Parser.new.parse(raw)
      parsed = WikiCloth::Parser.new({:data => raw})
      sanitized = Sanitize.clean(parsed.to_html)
      redirect = checkRedirect(sanitized)
      if redirect
        puts "redirecting to #{redirect}"
        getArticle(redirect)
      else
        ref_links = sanitized.gsub(/\[\d+\]/,'').gsub(/\[edit\]/,'')
        http_links = ref_links.gsub(/\[http+\s?\]/m,'')


        # leftover = sanitized.gsub(/\[\[.*\]\]/, '').gsub(/\{\{.*\}\}/, '')
        # leftover = leftover.gsub(/\[\[.*?\]\]/m, '').gsub(/\{\{.*?\}\}/m, '').gsub(/<.*?>/m,'')
        # leftover = leftover.gsub(/\'\'/,'').gsub(/\[http.*?\]/m,'').gsub(/\{\|.*?\|\}/m,'').gsub(/\}\}/,'')
        # removeUgly = leftover.gsub(/\|/,' ').gsub(/\_/,'')
      end
    end

  # leftover
  # leftover
end