Class: Wiki::Yggdrasil::Article

Inherits:
Object
  • Object
show all
Defined in:
lib/wiki/article.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri:) ⇒ Article

Returns a new instance of Article.

Raises:

  • (ArgumentError)


9
10
11
12
13
14
15
16
# File 'lib/wiki/article.rb', line 9

def initialize(uri:)
  raise ArgumentError unless Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri)
  @uri         = uri
  @summary     = nil
  @child_links = nil
  @name        = nil
  @checksum    = nil
end

Instance Attribute Details

#uriObject (readonly)

Returns the value of attribute uri.



7
8
9
# File 'lib/wiki/article.rb', line 7

def uri
  @uri
end

Class Method Details

.is_valid_wiki_article?(uri:) ⇒ Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/wiki/article.rb', line 55

def self.is_valid_wiki_article?(uri:)
  uri =~ /.*wikipedia\.org\/wiki\/.+/ ? true : false
end

.remove_italic_tags(uri_list) ⇒ Object



51
52
53
# File 'lib/wiki/article.rb', line 51

def self.remove_italic_tags(uri_list)
  
end

Instance Method Details

#checksumObject



22
23
24
# File 'lib/wiki/article.rb', line 22

def checksum
  Digest::MD5.hexdigest(@summary.to_s)
end


26
27
28
29
30
31
# File 'lib/wiki/article.rb', line 26

def child_links(help: false)
  formatted_links = format_links
  validated_links = formatted_links.select { |uri| Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri) }
  
  @child_links  ||= validated_links
end


42
43
44
45
46
47
48
49
# File 'lib/wiki/article.rb', line 42

def format_links(anchors: self.scrape_links)
  uris = anchors.map do |anchor|
    anchor.nil? || anchor['href'].nil? ? next : 'https://en.wikipedia.org' << anchor['href'] ## nil href attributes are often self refs (but possibly not always). Ignore them.
    ## TODO: take care of this in .scrape_links with a css selector (like the Help:IPA links)
  end

  uris.compact
end

#nameObject



37
38
39
40
# File 'lib/wiki/article.rb', line 37

def name
  @name ||= Nokogiri::HTML(open(self.uri)).css('#firstHeading').inner_html
  ## TODO: Cleanup
end

TODO test help_links param in spec



33
34
35
# File 'lib/wiki/article.rb', line 33

def scrape_links(help_links: false) ## TODO test help_links param in spec
  help_links ? self.summary.css('p a') : self.summary.css('p a[href!="/wiki/Help:IPA/English"]')
end

#summaryObject



18
19
20
# File 'lib/wiki/article.rb', line 18

def summary
  @summary ||= Nokogiri::HTML(Nokogiri::HTML(open(self.uri)).to_s.split('<div id="toc" class="toc">')[0]).css('p') ## TODO: Cleanup
end