Class: Alexandria::BookProviders::WebsiteBasedProvider

Inherits:
GenericProvider show all
Defined in:
lib/alexandria/book_providers/web.rb

Direct Known Subclasses

ThaliaProvider, WorldCatProvider

Instance Attribute Summary

Attributes inherited from AbstractProvider

#fullname, #name, #prefs

Instance Method Summary collapse

Methods inherited from AbstractProvider

#<=>, abstract?, #abstract?, #action_name, #enabled, #reinitialize, #remove, #toggle_enabled, #transport, unabstract, #variable_name

Constructor Details

#initialize(name, fullname = nil) ⇒ WebsiteBasedProvider

Returns a new instance of WebsiteBasedProvider.



13
14
15
16
# File 'lib/alexandria/book_providers/web.rb', line 13

def initialize(name, fullname = nil)
  super(name, fullname)
  @htmlentities = HTMLEntities.new
end

Instance Method Details

#html_to_doc(html, source_data_charset = "ISO-8859-1") ⇒ Object



18
19
20
21
22
23
# File 'lib/alexandria/book_providers/web.rb', line 18

def html_to_doc(html, source_data_charset = "ISO-8859-1")
  html.force_encoding source_data_charset
  utf8_html = html.encode("utf-8")
  normalized_html = @htmlentities.decode(utf8_html)
  Nokogiri.parse(normalized_html)
end

#text_of(node) ⇒ Object

from Palatina



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/alexandria/book_providers/web.rb', line 26

def text_of(node)
  if node.nil?
    nil
  elsif node.text?
    node.to_html
  elsif node.elem?
    if node.children.nil?
      nil
    else
      node_text = node.children.map { |n| text_of(n) }.join
      node_text.strip.squeeze(" ")
    end
  end
  # node.inner_html.strip
end