Class: Wikipedia::Page
- Inherits:
-
Object
- Object
- Wikipedia::Page
- Defined in:
- lib/wikipedia/page.rb
Class Method Summary collapse
Instance Method Summary collapse
- #categories ⇒ Object
- #content ⇒ Object
- #coordinates ⇒ Object
- #editurl ⇒ Object
- #extlinks ⇒ Object
- #fullurl ⇒ Object
- #image_descriptionurl ⇒ Object
- #image_descriptionurls ⇒ Object
- #image_metadata ⇒ Object
- #image_url ⇒ Object
- #image_urls ⇒ Object
- #images ⇒ Object
-
#initialize(json) ⇒ Page
constructor
A new instance of Page.
- #json ⇒ Object
- #links ⇒ Object
- #page ⇒ Object
- #raw_data ⇒ Object
- #redirect? ⇒ Boolean
- #redirect_title ⇒ Object
- #sanitized_content ⇒ Object
- #templates ⇒ Object
- #title ⇒ Object
Constructor Details
#initialize(json) ⇒ Page
Returns a new instance of Page.
3 4 5 6 7 |
# File 'lib/wikipedia/page.rb', line 3 def initialize(json) require 'json' @json = json @data = JSON::load(json) end |
Class Method Details
.sanitize(s) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/wikipedia/page.rb', line 101 def self.sanitize( s ) if s s = s.dup # strip anything inside curly braces! while s =~ /\{\{[^\{\}]+?\}\}/ s.gsub!(/\{\{[^\{\}]+?\}\}/, '') end # strip info box s.sub!(/^\{\|[^\{\}]+?\n\|\}\n/, '') # strip internal links s.gsub!(/\[\[([^\]\|]+?)\|([^\]\|]+?)\]\]/, '\2') s.gsub!(/\[\[([^\]\|]+?)\]\]/, '\1') # strip images and file links s.gsub!(/\[\[Image:[^\[\]]+?\]\]/, '') s.gsub!(/\[\[File:[^\[\]]+?\]\]/, '') # convert bold/italic to html s.gsub!(/'''''(.+?)'''''/, '<b><i>\1</i></b>') s.gsub!(/'''(.+?)'''/, '<b>\1</b>') s.gsub!(/''(.+?)''/, '<i>\1</i>') # misc s.gsub!(/<ref[^<>]*>[\s\S]*?<\/ref>/, '') s.gsub!(/<!--[^>]+?-->/, '') s.gsub!(' ', ' ') s.strip! # create paragraphs sections = s.split("\n\n") if sections.size > 1 s = sections.map {|s| "<p>#{s.strip}</p>" }.join("\n") end s end end |
Instance Method Details
#categories ⇒ Object
43 44 45 |
# File 'lib/wikipedia/page.rb', line 43 def categories page['categories'].map {|c| c['title'] } if page['categories'] end |
#content ⇒ Object
13 14 15 |
# File 'lib/wikipedia/page.rb', line 13 def content page['revisions'].first['*'] if page['revisions'] end |
#coordinates ⇒ Object
75 76 77 |
# File 'lib/wikipedia/page.rb', line 75 def coordinates page['coordinates'].first.values if page['coordinates'] end |
#editurl ⇒ Object
39 40 41 |
# File 'lib/wikipedia/page.rb', line 39 def editurl page['editurl'] end |
#extlinks ⇒ Object
51 52 53 |
# File 'lib/wikipedia/page.rb', line 51 def extlinks page['extlinks'].map {|c| c['*'] } if page['extlinks'] end |
#fullurl ⇒ Object
35 36 37 |
# File 'lib/wikipedia/page.rb', line 35 def fullurl page['fullurl'] end |
#image_descriptionurl ⇒ Object
63 64 65 |
# File 'lib/wikipedia/page.rb', line 63 def image_descriptionurl page['imageinfo'].first['descriptionurl'] if page['imageinfo'] end |
#image_descriptionurls ⇒ Object
71 72 73 |
# File 'lib/wikipedia/page.rb', line 71 def image_descriptionurls .map {|img| img.image_descriptionurl } end |
#image_metadata ⇒ Object
83 84 85 86 87 88 89 90 91 |
# File 'lib/wikipedia/page.rb', line 83 def unless @cached_image_metadata if list = images filtered = list.select {|i| i =~ /:.+\.(jpg|jpeg|png|gif)$/i && !i.include?("LinkFA-star") } @cached_image_metadata = filtered.map {|title| Wikipedia.find_image(title) } end end @cached_image_metadata || [] end |
#image_url ⇒ Object
59 60 61 |
# File 'lib/wikipedia/page.rb', line 59 def image_url page['imageinfo'].first['url'] if page['imageinfo'] end |
#image_urls ⇒ Object
67 68 69 |
# File 'lib/wikipedia/page.rb', line 67 def image_urls .map {|img| img.image_url } end |
#images ⇒ Object
55 56 57 |
# File 'lib/wikipedia/page.rb', line 55 def images page['images'].map {|c| c['title'] } if page['images'] end |
#json ⇒ Object
97 98 99 |
# File 'lib/wikipedia/page.rb', line 97 def json @json end |
#links ⇒ Object
47 48 49 |
# File 'lib/wikipedia/page.rb', line 47 def links page['links'].map {|c| c['title'] } if page['links'] end |
#page ⇒ Object
9 10 11 |
# File 'lib/wikipedia/page.rb', line 9 def page @data['query']['pages'].values.first end |
#raw_data ⇒ Object
79 80 81 |
# File 'lib/wikipedia/page.rb', line 79 def raw_data @data end |
#redirect? ⇒ Boolean
21 22 23 |
# File 'lib/wikipedia/page.rb', line 21 def redirect? content && content.match(/\#REDIRECT\s*\[\[(.*?)\]\]/i) end |
#redirect_title ⇒ Object
25 26 27 28 29 |
# File 'lib/wikipedia/page.rb', line 25 def redirect_title if matches = redirect? matches[1] end end |
#sanitized_content ⇒ Object
17 18 19 |
# File 'lib/wikipedia/page.rb', line 17 def sanitized_content self.class.sanitize(content) end |
#templates ⇒ Object
93 94 95 |
# File 'lib/wikipedia/page.rb', line 93 def templates page['templates'].map {|c| c['title'] } if page['templates'] end |
#title ⇒ Object
31 32 33 |
# File 'lib/wikipedia/page.rb', line 31 def title page['title'] end |