Class: WikiGetter
- Inherits:
-
Object
- Object
- WikiGetter
- Defined in:
- lib/cogibara/operators/wiki/wiki_getter.rb
Instance Method Summary collapse
Instance Method Details
#checkRedirect(article) ⇒ Object
8 9 10 11 12 13 14 |
# File 'lib/cogibara/operators/wiki/wiki_getter.rb', line 8 def checkRedirect(article) if(/\Aredirect/ =~ article.strip) return article.strip.gsub(/\Aredirect/,'') elsif(/\AREDIRECT/ =~ article.strip) return article.strip.gsub(/\AREDIRECT/,'').strip end end |
#getArticle(title) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/cogibara/operators/wiki/wiki_getter.rb', line 16 def getArticle(title) article = JSON.parse(Wikipedia.find(title).json)["query"]["pages"].to_a[0][1]["revisions"] unless article "Could not find article, sorry" else raw = article[0]["*"] # parsed = Wikitext::Parser.new.parse(raw) parsed = WikiCloth::Parser.new({:data => raw}) sanitized = Sanitize.clean(parsed.to_html) redirect = checkRedirect(sanitized) if redirect puts "redirecting to #{redirect}" getArticle(redirect) else ref_links = sanitized.gsub(/\[\d+\]/,'').gsub(/\[edit\]/,'') http_links = ref_links.gsub(/\[http+\s?\]/m,'') # leftover = sanitized.gsub(/\[\[.*\]\]/, '').gsub(/\{\{.*\}\}/, '') # leftover = leftover.gsub(/\[\[.*?\]\]/m, '').gsub(/\{\{.*?\}\}/m, '').gsub(/<.*?>/m,'') # leftover = leftover.gsub(/\'\'/,'').gsub(/\[http.*?\]/m,'').gsub(/\{\|.*?\|\}/m,'').gsub(/\}\}/,'') # removeUgly = leftover.gsub(/\|/,' ').gsub(/\_/,'') end end # leftover # leftover end |