Class: Wikipedia::Page
- Inherits:
-
Object
- Object
- Wikipedia::Page
- Defined in:
- lib/wikipedia/page.rb
Instance Attribute Summary collapse
-
#json ⇒ Object
readonly
Returns the value of attribute json.
Class Method Summary collapse
-
.sanitize(s) ⇒ Object
rubocop:disable Metrics/MethodLength rubocop:disable Metrics/AbcSize.
Instance Method Summary collapse
- #categories ⇒ Object
- #content ⇒ Object
- #coordinates ⇒ Object
- #editurl ⇒ Object
- #extlinks ⇒ Object
- #fullurl ⇒ Object
- #image_descriptionurl ⇒ Object
- #image_descriptionurls ⇒ Object
- #image_metadata ⇒ Object
- #image_url ⇒ Object
- #image_urls ⇒ Object
- #images ⇒ Object
-
#initialize(json) ⇒ Page
constructor
A new instance of Page.
- #links ⇒ Object
- #page ⇒ Object
- #raw_data ⇒ Object
- #redirect? ⇒ Boolean
- #redirect_title ⇒ Object
- #sanitized_content ⇒ Object
- #summary ⇒ Object
- #templates ⇒ Object
- #text ⇒ Object
- #title ⇒ Object
Constructor Details
#initialize(json) ⇒ Page
5 6 7 8 9 |
# File 'lib/wikipedia/page.rb', line 5 def initialize(json) require 'json' @json = json @data = JSON.parse(json) end |
Instance Attribute Details
#json ⇒ Object (readonly)
Returns the value of attribute json.
3 4 5 |
# File 'lib/wikipedia/page.rb', line 3 def json @json end |
Class Method Details
.sanitize(s) ⇒ Object
rubocop:disable Metrics/MethodLength rubocop:disable Metrics/AbcSize
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/wikipedia/page.rb', line 106 def self.sanitize( s ) return unless s # strip anything inside curly braces! s.gsub!(/\{\{[^\{\}]+?\}\}/, '') while s =~ /\{\{[^\{\}]+?\}\}/ # strip info box s.sub!(/^\{\|[^\{\}]+?\n\|\}\n/, '') # strip internal links s.gsub!(/\[\[([^\]\|]+?)\|([^\]\|]+?)\]\]/, '\2') s.gsub!(/\[\[([^\]\|]+?)\]\]/, '\1') # strip images and file links s.gsub!(/\[\[Image:[^\[\]]+?\]\]/, '') s.gsub!(/\[\[File:[^\[\]]+?\]\]/, '') # convert bold/italic to html s.gsub!(/'''''(.+?)'''''/, '<b><i>\1</i></b>') s.gsub!(/'''(.+?)'''/, '<b>\1</b>') s.gsub!(/''(.+?)''/, '<i>\1</i>') # misc s.gsub!(/<ref[^<>]*>[\s\S]*?<\/ref>/, '') s.gsub!(/<!--[^>]+?-->/, '') s.gsub!(' ', ' ') s.strip! # create paragraphs sections = s.split("\n\n") if sections.size > 1 s = sections.map { |paragraph| "<p>#{paragraph.strip}</p>" }.join("\n") end s end |
Instance Method Details
#categories ⇒ Object
51 52 53 |
# File 'lib/wikipedia/page.rb', line 51 def categories page['categories'].map { |c| c['title'] } if page['categories'] end |
#content ⇒ Object
15 16 17 |
# File 'lib/wikipedia/page.rb', line 15 def content page['revisions'].first['*'] if page['revisions'] end |
#coordinates ⇒ Object
83 84 85 |
# File 'lib/wikipedia/page.rb', line 83 def coordinates page['coordinates'].first.values if page['coordinates'] end |
#editurl ⇒ Object
39 40 41 |
# File 'lib/wikipedia/page.rb', line 39 def editurl page['editurl'] end |
#extlinks ⇒ Object
59 60 61 |
# File 'lib/wikipedia/page.rb', line 59 def extlinks page['extlinks'].map { |c| c['*'] } if page['extlinks'] end |
#fullurl ⇒ Object
35 36 37 |
# File 'lib/wikipedia/page.rb', line 35 def fullurl page['fullurl'] end |
#image_descriptionurl ⇒ Object
71 72 73 |
# File 'lib/wikipedia/page.rb', line 71 def image_descriptionurl page['imageinfo'].first['descriptionurl'] if page['imageinfo'] end |
#image_descriptionurls ⇒ Object
79 80 81 |
# File 'lib/wikipedia/page.rb', line 79 def image_descriptionurls .map(&:image_descriptionurl) end |
#image_metadata ⇒ Object
91 92 93 94 95 96 97 98 |
# File 'lib/wikipedia/page.rb', line 91 def unless return if images.nil? filtered = images.select { |i| i =~ /:.+\.(jpg|jpeg|png|gif|svg)$/i && !i.include?('LinkFA-star') } = filtered.map { |title| Wikipedia.find_image(title) } end || [] end |
#image_url ⇒ Object
67 68 69 |
# File 'lib/wikipedia/page.rb', line 67 def image_url page['imageinfo'].first['url'] if page['imageinfo'] end |
#image_urls ⇒ Object
75 76 77 |
# File 'lib/wikipedia/page.rb', line 75 def image_urls .map(&:image_url) end |
#images ⇒ Object
63 64 65 |
# File 'lib/wikipedia/page.rb', line 63 def images page['images'].map { |c| c['title'] } if page['images'] end |
#links ⇒ Object
55 56 57 |
# File 'lib/wikipedia/page.rb', line 55 def links page['links'].map { |c| c['title'] } if page['links'] end |
#page ⇒ Object
11 12 13 |
# File 'lib/wikipedia/page.rb', line 11 def page @data['query']['pages'].values.first if @data['query']['pages'] end |
#raw_data ⇒ Object
87 88 89 |
# File 'lib/wikipedia/page.rb', line 87 def raw_data @data end |
#redirect? ⇒ Boolean
23 24 25 |
# File 'lib/wikipedia/page.rb', line 23 def redirect? content && content.match(/\#REDIRECT\s*\[\[(.*?)\]\]/i) end |
#redirect_title ⇒ Object
27 28 29 |
# File 'lib/wikipedia/page.rb', line 27 def redirect_title redirect?[1] rescue nil end |
#sanitized_content ⇒ Object
19 20 21 |
# File 'lib/wikipedia/page.rb', line 19 def sanitized_content self.class.sanitize(content) end |
#summary ⇒ Object
47 48 49 |
# File 'lib/wikipedia/page.rb', line 47 def summary page['extract'].split('==')[0].strip if page['extract'] && page['extract'] != '' end |
#templates ⇒ Object
100 101 102 |
# File 'lib/wikipedia/page.rb', line 100 def templates page['templates'].map { |c| c['title'] } if page['templates'] end |
#text ⇒ Object
43 44 45 |
# File 'lib/wikipedia/page.rb', line 43 def text page['extract'] end |
#title ⇒ Object
31 32 33 |
# File 'lib/wikipedia/page.rb', line 31 def title page['title'] end |