Class: ReVIEW::Epub2Html

Inherits:

Object

Object
ReVIEW::Epub2Html

show all

Defined in:: lib/review/epub2html.rb

Class Method Summary collapse

.execute(*args) ⇒ Object

Instance Method Summary collapse

#execute(*args) ⇒ Object
#initialize ⇒ Epub2Html constructor

A new instance of Epub2Html.
#join_html(reffile) ⇒ Object
#make_list ⇒ Object
#modify_html(fname, html) ⇒ Object
#parse_epub(epubname) ⇒ Object
#sanitize(s) ⇒ Object
#take_headtail(html) ⇒ Object

Constructor Details

#initialize ⇒ `Epub2Html`

Returns a new instance of Epub2Html.

# File 'lib/review/epub2html.rb', line 34

def initialize
  @opfxml = nil
  @htmls = {}
  @head = nil
  @tail = nil
end

Class Method Details

.execute(*args) ⇒ `Object`



15
16
17

# File 'lib/review/epub2html.rb', line 15

def self.execute(*args)
  new.execute(*args)
end

Instance Method Details

#execute(*args) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 19

def execute(*args)
  if args[0].nil? || !File.exist?(args[0])
    STDERR.puts "Usage: \#{File.basename($PROGRAM_NAME)} EPUBfile [file_for_head_and_foot] > HTMLfile\n   file_for_head_and_foot: HTML file to extract header and footer area.\n                           This file must be contained in the EPUB.\n                           If omitted, the first found file is used.\n"
    exit 1
  end

  parse_epub(args[0])
  puts join_html(args[1])
end

#join_html(reffile) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 107

def join_html(reffile)
  body = []
  make_list.each do |fname|
    if @head.nil? && (reffile.nil? || reffile == fname)
      take_headtail(@htmls[fname])
    end

    body << modify_html(fname, @htmls[fname])
  end
  "#{@head}\n#{body.join("\n")}\n#{@tail}"
end

#make_list ⇒ `Object`

# File 'lib/review/epub2html.rb', line 119

def make_list
  items = {}
  @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
    items[e.attributes['id']] = e.attributes['href']
  end

  files = []
  @opfxml.each_element('/package/spine/itemref') do |e|
    files.push(items[e.attributes['idref']])
  end

  files
end

#modify_html(fname, html) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 67

def modify_html(fname, html)
  doc = REXML::Document.new(html)
  doc.context[:attribute_quote] = :quote

  ids = {}

  doc.each_element('//*[@id]') do |e|
    sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
    while ids[sid]
      sid += 'E'
    end
    ids[sid] = true
    e.attributes['id'] = sid
  end

  doc.each_element('//a[@href]') do |e|
    href = e.attributes['href']
    if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
      next
    end

    file, anc = href.split('#', 2)
    if anc
      if file.empty?
        anc = "#{sanitize(fname)}_#{sanitize(anc)}"
      else
        anc = "#{sanitize(file)}_#{sanitize(anc)}"
      end
    else
      anc = sanitize(file)
    end

    e.attributes['href'] = "##{anc}"
  end

  doc.to_s.
    sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
    sub(%r{(</body>).*}m, '</section>')
end

#parse_epub(epubname) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 41

def parse_epub(epubname)
  Zip::File.open(epubname) do |zio|
    zio.each do |entry|
      if entry.name =~ /.+\.opf\Z/
        opf = entry.get_input_stream.read
        @opfxml = REXML::Document.new(opf)
      elsif entry.name =~ /.+\.x?html\Z/
        @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
      end
    end
  end
  nil
end

#sanitize(s) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 60

def sanitize(s)
  s = s.sub(/\.x?html\Z/, '').
      sub(%r{\A\./}, '')
  's_' + CGI.escape(s).
         gsub(/[.,+%]/, '_')
end

#take_headtail(html) ⇒ `Object`

# File 'lib/review/epub2html.rb', line 55

def take_headtail(html)
  @head = html.sub(/(<body.*?>).*/m, '\1')
  @tail = html.sub(%r{.*(</body>)}m, '\1')
end

Class: ReVIEW::Epub2Html

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Epub2Html

Class Method Details

.execute(*args) ⇒ Object

Instance Method Details

#execute(*args) ⇒ Object

#join_html(reffile) ⇒ Object

#make_list ⇒ Object

#modify_html(fname, html) ⇒ Object

#parse_epub(epubname) ⇒ Object

#sanitize(s) ⇒ Object

#take_headtail(html) ⇒ Object

#initialize ⇒ `Epub2Html`

.execute(*args) ⇒ `Object`

#execute(*args) ⇒ `Object`

#join_html(reffile) ⇒ `Object`

#make_list ⇒ `Object`

#modify_html(fname, html) ⇒ `Object`

#parse_epub(epubname) ⇒ `Object`

#sanitize(s) ⇒ `Object`

#take_headtail(html) ⇒ `Object`