Class: ReVIEW::Epub2Html

Inherits:

Object

Object
ReVIEW::Epub2Html

show all

Defined in:: lib/review/epub2html.rb

Class Method Summary collapse

.execute(*args) ⇒ Object

Instance Method Summary collapse

#execute(*args) ⇒ Object
#initialize ⇒ Epub2Html constructor

A new instance of Epub2Html.
#join_html(reffile) ⇒ Object
#make_list ⇒ Object
#modify_html(fname, html) ⇒ Object
#parse_epub(epubname) ⇒ Object
#sanitize(s) ⇒ Object
#take_headtail(html) ⇒ Object

Constructor Details

#initialize ⇒ `Epub2Html`

Returns a new instance of Epub2Html.

# File 'lib/review/epub2html.rb', line 54

def initialize
  @opfxml = nil
  @htmls = {}
  @head = nil
  @tail = nil
  @inline_footnote = nil
end

Class Method Details

.execute(*args) ⇒ `Object`



22
23
24

# File 'lib/review/epub2html.rb', line 22

def self.execute(*args)
  new.execute(*args)
end

Instance Method Details

#execute(*args) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 26

def execute(*args)
  opts = OptionParser.new

  opts.banner = <<EOT
Usage: review-epub2html [options] EPUBfile [file_for_head_and_foot] > HTMLfile
   file_for_head_and_foot: HTML file to extract header and footer area.
                           This file must be contained in the EPUB.
                           If omitted, the first found file is used.

EOT
  opts.version = ReVIEW::VERSION
  opts.on('--help', 'Prints this message and quit.') do
    puts opts.help
    exit 0
  end
  opts.on('--inline-footnote', 'Embed footnote blocks in paragraph.') { @inline_footnote = true }

  opts.parse!(args)

  if args[0].nil? || !File.exist?(args[0])
    puts opts.help
    exit 1
  end

  parse_epub(args[0])
  puts join_html(args[1])
end

#join_html(reffile) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 145

def join_html(reffile)
  body = []
  make_list.each do |fname|
    if @head.nil? && (reffile.nil? || reffile == fname)
      take_headtail(@htmls[fname])
    end

    body << modify_html(fname, @htmls[fname])
  end
  "#{@head}\n#{body.join("\n")}\n#{@tail}"
end

#make_list ⇒ `Object`

# File 'lib/review/epub2html.rb', line 157

def make_list
  items = {}
  @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
    items[e.attributes['id']] = e.attributes['href']
  end

  files = []
  @opfxml.each_element('/package/spine/itemref') do |e|
    files.push(items[e.attributes['idref']])
  end

  files
end

#modify_html(fname, html) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 88

def modify_html(fname, html)
  doc = REXML::Document.new(html)
  doc.context[:attribute_quote] = :quote

  ids = {}

  doc.each_element('//*[@id]') do |e|
    sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
    while ids[sid]
      sid += 'E'
    end
    ids[sid] = true
    e.attributes['id'] = sid
  end

  doc.each_element('//a[@href]') do |e|
    href = e.attributes['href']
    if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
      next
    end

    file, anc = href.split('#', 2)
    anc = if anc
            if file.empty?
              "#{sanitize(fname)}_#{sanitize(anc)}"
            else
              "#{sanitize(file)}_#{sanitize(anc)}"
            end
          else
            sanitize(file)
          end

    e.attributes['href'] = "##{anc}"
  end

  if @inline_footnote
    # move footnotes to inline as same as LaTeX.
    footnotes = {}

    doc.each_element("//div[@class='footnote']") do |e|
      e.name = 'span'
      e.attributes.delete('epub:type')
      footnotes[e.attributes['id']] = e
      e.remove
    end

    doc.each_element("//a[@class='noteref']") do |e|
      e.parent.insert_after(e, footnotes[e.attributes['href'].sub('#', '')])
      e.remove
    end
  end

  doc.to_s.
    sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
    sub(%r{(</body>).*}m, '</section>')
end

#parse_epub(epubname) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 62

def parse_epub(epubname)
  Zip::File.open(epubname) do |zio|
    zio.each do |entry|
      if /.+\.opf\Z/.match?(entry.name)
        opf = entry.get_input_stream.read
        @opfxml = REXML::Document.new(opf)
      elsif /.+\.x?html\Z/.match?(entry.name)
        @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
      end
    end
  end
  nil
end

#sanitize(s) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 81

def sanitize(s)
  s = s.sub(/\.x?html\Z/, '').
      sub(%r{\A\./}, '')
  's_' + CGI.escape(s).
         gsub(/[.,+%]/, '_')
end

#take_headtail(html) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 76

def take_headtail(html)
  @head = html.sub(/(<body.*?>).*/m, '\1')
  @tail = html.sub(%r{.*(</body>)}m, '\1')
end

Class: ReVIEW::Epub2Html

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Epub2Html

Class Method Details

.execute(*args) ⇒ Object

Instance Method Details

#execute(*args) ⇒ Object

#join_html(reffile) ⇒ Object

#make_list ⇒ Object

#modify_html(fname, html) ⇒ Object

#parse_epub(epubname) ⇒ Object

#sanitize(s) ⇒ Object

#take_headtail(html) ⇒ Object

#initialize ⇒ `Epub2Html`

.execute(*args) ⇒ `Object`

#execute(*args) ⇒ `Object`

#join_html(reffile) ⇒ `Object`

#make_list ⇒ `Object`

#modify_html(fname, html) ⇒ `Object`

#parse_epub(epubname) ⇒ `Object`

#sanitize(s) ⇒ `Object`

#take_headtail(html) ⇒ `Object`