Class: Epuber::Compiler::XHTMLProcessor

Inherits:

Object

Object
Epuber::Compiler::XHTMLProcessor

show all

Defined in:: lib/epuber/compiler/xhtml_processor.rb

Defined Under Namespace

Classes: UnparseableLinkError

Class Method Summary collapse

.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ Object

Method to add all missing items in XML root.
.add_styles(xhtml_doc, styles) ⇒ Object

Method for adding style sheets with links, method will not add duplicate items.
.add_viewport(xhtml_doc, viewport_size) ⇒ Object

Adds viewport meta tag to head of some document, but only if there is not some existing tag.
.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ Object

Nil.
.resolve_links(xhtml_doc, file_path, file_finder) ⇒ Array<URI>

Resolves all links to files in XHTML document and returns the valid and resolved versions.
.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ Array<URI>

Resolves all links to files in XHTML document and returns the valid and resolved versions.
.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ URI

Method which will resolve path to file from pattern.
.using_javascript?(xhtml_doc) ⇒ Bool
.xml_document_from_string(text, file_path = nil) ⇒ Nokogiri::XML::Document

Method for parsing incomplete XML, supports multiple root elements.

Class Method Details

.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ `Object`

Method to add all missing items in XML root

Required items:

- html (with all namespaces and other attributes)
- body
  - head (with title)

Parameters:

xhtml_doc (Nokogiri::XML::Document) —

input XML document to work with
title (String) —

title of this document, since this is required by EPUB specification
epub_version (Epuber::Version) —

version of result EPUB

Returns:

# File 'lib/epuber/compiler/xhtml_processor.rb', line 65

def self.add_missing_root_elements(xhtml_doc, title, epub_version)
  # add missing body element
  if xhtml_doc.at_css('body').nil?
    xhtml_doc.root.surround_with_element('body')
  end

  # add missing root html element
  if xhtml_doc.at_css('html').nil?
    attrs               = {}
    attrs['xmlns']      = 'http://www.w3.org/1999/xhtml'
    attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3
    xhtml_doc.root.surround_with_element('html', attrs)
  end

  # add missing head in html
  if xhtml_doc.at_css('html > head').nil?
    html = xhtml_doc.css('html').first
    head = xhtml_doc.create_element('head')
    head << xhtml_doc.create_element('title', title)
    head << xhtml_doc.create_element('meta', charset: 'utf-8') if epub_version >= 3.0

    html.children.first.before(head)
  end
end

.add_styles(xhtml_doc, styles) ⇒ `Object`

Method for adding style sheets with links, method will not add duplicate items

Parameters:

xhtml_doc (Nokogiri::XML::Document) —

input XML document to work with
styles (Array<String>) —

links to files

Returns:

# File 'lib/epuber/compiler/xhtml_processor.rb', line 97

def self.add_styles(xhtml_doc, styles)
  head  = xhtml_doc.at_css('html > head')
  old_links = head.css('link[rel="stylesheet"]').map { |node| node['href'] }

  links_to_add = styles - old_links

  links_to_add.each do |path|
    head << xhtml_doc.create_element('link', href: path, rel: 'stylesheet', type: 'text/css')
  end
end

.add_viewport(xhtml_doc, viewport_size) ⇒ `Object`

Adds viewport meta tag to head of some document, but only if there is not some existing tag

Parameters:

xhtml_doc (Nokogiri::XML::Document)
viewport_size (Epuber::Size)

# File 'lib/epuber/compiler/xhtml_processor.rb', line 113

def self.add_viewport(xhtml_doc, viewport_size)
  head = xhtml_doc.at_css('html > head')
  return unless head.at_css("meta[name='viewport']").nil?

  s = viewport_size
  head << xhtml_doc.create_element('meta', name: 'viewport', content: "width=#{s.width},height=#{s.height}")
end

.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ `Object`

Returns nil.

Parameters:

xhtml_doc (Nokogiri::XML::Document)
file_path (String) —

path of referring file
file_resolver (FileResolver)

Returns:

# File 'lib/epuber/compiler/xhtml_processor.rb', line 222

def self.resolve_images(xhtml_doc, file_path, file_resolver)
  dirname = File.dirname(file_path)

  xhtml_doc.css('img').each do |img|
    path = img['src']
    next if path.nil?

    begin
      new_path = file_resolver.dest_finder.find_file(path, groups: :image, context_path: dirname)

    rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError
      begin
        new_path = resolved_link_to_file(path, :image, dirname, file_resolver.source_finder).to_s
        pkg_abs_path = File.expand_path(new_path, dirname).unicode_normalize
        pkg_new_path = Pathname.new(pkg_abs_path).relative_path_from(Pathname.new(file_resolver.source_path)).to_s

        file = FileTypes::ImageFile.new(pkg_new_path)
        file.path_type = :manifest
        file_resolver.add_file(file)

      rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError => e
        UI.warning(e.to_s, location: img)

        next
      end
    end

    img['src'] = new_path
  end
end

.resolve_links(xhtml_doc, file_path, file_finder) ⇒ `Array<URI>`

Resolves all links to files in XHTML document and returns the valid and resolved versions

Parameters:

xhtml_doc (Nokogiri::XML::Document) —

input XML document to work with
file_path (String) —

path to file from which is searching for other file
file_finder (Epuber::Compiler::FileFinder) —

finder for searching for files

Returns:

(Array<URI>) —

resolved links

# File 'lib/epuber/compiler/xhtml_processor.rb', line 201

def self.resolve_links(xhtml_doc, file_path, file_finder)
  [
    resolve_links_for(xhtml_doc, 'a', 'href', :text, file_path, file_finder),
    resolve_links_for(xhtml_doc, 'map > area', 'href', :text, file_path, file_finder),
  ].flatten
end

.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ `Array<URI>`

Resolves all links to files in XHTML document and returns the valid and resolved versions

Parameters:

xhtml_doc (Nokogiri::XML::Document) —

input XML document to work with
tag_name (String) —

CSS selector for tag
attribute_name (String) —

name of attribute
groups (Symbol | Array<Symbol>) —

groups of the searching file, could be for example :image when searching for file from tag <img>
file_path (String) —

path to file from which is searching for other file
file_finder (Epuber::Compiler::FileFinder) —

finder for searching for files

Returns:

(Array<URI>) —

resolved links

# File 'lib/epuber/compiler/xhtml_processor.rb', line 168

def self.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder)
  founded_links = []

  xhtml_doc.css("#{tag_name}[#{attribute_name}]").each do |node|
    begin
      src = node[attribute_name]
      # @type [String] src

      next if src.nil?

      target_file = resolved_link_to_file(src, groups, file_path, file_finder)
      founded_links << target_file

      node[attribute_name] = target_file.to_s
    rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError => e
      UI.warning(e.to_s, location: node)

      # skip not found files
      next
    end
  end

  founded_links
end

.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ `URI`

Method which will resolve path to file from pattern

Parameters:

path (String) —

pattern or path of the file
groups (Symbol | Array<Symbol>) —

groups of the searching file, could be for example :image when searching for file from tag <img>
file_path (String) —

path to file from which is searching for other file
file_finder (Epuber::Compiler::FileFinder) —

finder for searching for files

Returns:

(URI) —

resolved path to file or remote web page

Raises:

UnparseableLinkError, FileFinder::FileNotFoundError, FileFinder::MultipleFilesFoundError

# File 'lib/epuber/compiler/xhtml_processor.rb', line 132

def self.resolved_link_to_file(path, groups, file_path, file_finder)
  raise FileFinders::FileNotFoundError.new(path, file_path) if path.empty?

  begin
    uri = URI(path)
  rescue URI::InvalidURIError
    begin
      uri = URI(URI::encode(path))
    rescue URI::InvalidURIError
      # skip not valid uri
      raise UnparseableLinkError, "Unparseable link `#{path}`"
    end
  end

  # skip uri with scheme (links to web pages)
  return uri unless uri.scheme.nil?

  # skip empty path
  return uri if uri.path.empty? && !uri.fragment.nil? && !uri.fragment.empty?

  uri.path = file_finder.find_file(uri.path, groups: groups, context_path: file_path)

  uri
end

.using_javascript?(xhtml_doc) ⇒ `Bool`

Parameters:

xhtml_doc (Nokogiri::XML::Document) —

input XML document to work with

Returns:

(Bool)



212
213
214

# File 'lib/epuber/compiler/xhtml_processor.rb', line 212

def self.using_javascript?(xhtml_doc)
  !xhtml_doc.at_css('script').nil?
end

.xml_document_from_string(text, file_path = nil) ⇒ `Nokogiri::XML::Document`

Method for parsing incomplete XML, supports multiple root elements

Parameters:

text (String) —

input XHTML text

Returns:

(Nokogiri::XML::Document) —

parsed document

# File 'lib/epuber/compiler/xhtml_processor.rb', line 22

def self.xml_document_from_string(text, file_path = nil)
  if /\A[\n\r ]+(<\?xml)/ =~ text
    UI.warning('XML header must be at the beginning of document', location: UI::Location.new(file_path, 1))

    text = text.lstrip
  end

  doc = Nokogiri::XML(text)
  doc.encoding = 'UTF-8'
  doc.file_path = file_path

  fragment = Nokogiri::XML.fragment(text)
  root_elements = fragment.children.select { |el| el.element? }

  if root_elements.count == 1
    doc.root = root_elements.first
  elsif fragment.at_css('body').nil?
    doc.root = doc.create_element('body')

    fragment.children.select do |child|
      child.element? || child.comment? || child.text?
    end.each do |child|
      doc.root.add_child(child)
    end
  end

  doc
end

Class: Epuber::Compiler::XHTMLProcessor

Defined Under Namespace

Class Method Summary collapse

Class Method Details

.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ Object

.add_styles(xhtml_doc, styles) ⇒ Object

.add_viewport(xhtml_doc, viewport_size) ⇒ Object

.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ Object

.resolve_links(xhtml_doc, file_path, file_finder) ⇒ Array<URI>

.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ Array<URI>

.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ URI

.using_javascript?(xhtml_doc) ⇒ Bool

.xml_document_from_string(text, file_path = nil) ⇒ Nokogiri::XML::Document

.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ `Object`

.add_styles(xhtml_doc, styles) ⇒ `Object`

.add_viewport(xhtml_doc, viewport_size) ⇒ `Object`

.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ `Object`

.resolve_links(xhtml_doc, file_path, file_finder) ⇒ `Array<URI>`

.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ `Array<URI>`

.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ `URI`

.using_javascript?(xhtml_doc) ⇒ `Bool`

.xml_document_from_string(text, file_path = nil) ⇒ `Nokogiri::XML::Document`