Module: IsoDoc::WordFunction::Postprocess

Included in:: IsoDoc::WordConvert

Defined in:: lib/isodoc/word_function/postprocess.rb,
lib/isodoc/word_function/postprocess_cover.rb

Constant Summary collapse

WORD_NOKOHEAD = add namespaces for Word fragments

"<!DOCTYPE html SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\"\nxmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\"\nxmlns:w=\"urn:schemas-microsoft-com:office:word\"\nxmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\">\n<head> <title></title> <meta charset=\"UTF-8\" /> </head>\n<body> </body> </html>\n".freeze

WORD_TOC_SUFFIX1 =

"<p class=\"MsoToc1\"><span lang=\"EN-GB\"><span\n  style='mso-element:field-end'></span></span><span\n  lang=\"EN-GB\"><o:p>&nbsp;</o:p></span></p>\n".freeze

Instance Method Summary collapse

Instance Method Details

#authority_cleanup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 92

def authority_cleanup(docxml)
  %w(copyright license legal feedback).each do |t|
    authority_cleanup1(docxml, t)
  end
end

#authority_cleanup1(docxml, klass) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 80

def authority_cleanup1(docxml, klass)
  dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']")
  auth = docxml.at("//div[@id = 'boilerplate-#{klass}' "\
                   "or @class = 'boilerplate-#{klass}']")
  auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each(&:remove)
  auth&.xpath(".//h1 | .//h2")&.each do |h|
    h.name = "p"
    h["class"] = "TitlePageSubhead"
  end
  dest and auth and dest.replace(auth.remove)
end

#colgroup_widths(table) ⇒ `Object`

assume percentages

# File 'lib/isodoc/word_function/postprocess.rb', line 125

def colgroup_widths(table)
  table.xpath("./colgroup/col").each_with_object([]) do |c, m|
    m << c["width"].sub(/%$/, "").to_f
  end
end

#generate_header(filename, _dir) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 98

def generate_header(filename, _dir)
  return nil unless @header

  template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8"))
  meta = @meta.get.merge(@labels ? { labels: @labels } : {})
    .merge(@meta.labels ? { labels: @meta.labels } : {})
  meta[:filename] = filename
  params = meta.transform_keys(&:to_s)
  Tempfile.open(%w(header html), encoding: "utf-8") do |f|
    f.write(template.render(params))
    f
  end
end

#insert_toc(intro, docxml, level) ⇒ `Object`



26
27
28

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 26

def insert_toc(intro, docxml, level)
  intro.sub(/WORDTOC/, make_WordToC(docxml, level))
end

#list_add(xpath, lvl) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 164

def list_add(xpath, lvl)
  xpath.each do |list|
    (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l|
      l.xpath("./p | ./div | ./table").each_with_index do |p, i|
        next if i.zero?

        p.wrap(%{<div class="ListContLevel#{lvl}"/>})
      end
      list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"),
               lvl + 1)
      list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"),
               lvl + 1)
    end
  end
end

#make_WordToC(docxml, level) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 69

def make_WordToC(docxml, level)
  toc = ""
  # docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]").
  xpath = (1..level).each.map { |i| "//h#{i}" }.join (" | ")
  docxml.xpath(xpath).each do |h|
    toc += word_toc_entry(h.name[1].to_i, header_strip(h))
  end
  toc.sub(/(<p class="MsoToc1">)/,
          %{\\1#{word_toc_preface(level)}}) + WORD_TOC_SUFFIX1
end

#postprocess(result, filename, dir) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 32

def postprocess(result, filename, dir)
  filename = filename.sub(/\.doc$/, "")
  header = generate_header(filename, dir)
  result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
  toWord(result, filename, dir, header)
  @files_to_delete.each { |f| FileUtils.rm_f f }
end

#split_at_section_break(docxml, sect, brk, idx) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 132

def split_at_section_break(docxml, sect, brk, idx)
  move = brk.parent.xpath("following::node()") &
    brk.document.xpath("//div[@class = '#{sect}']//*")
  ins = docxml.at("//div[@class = '#{sect}']")
    .after("<div class='#{sect}_#{idx}'/>").next_element
  move.each do |m|
    next if m.at("./ancestor::div[@class = '#{sect}_#{idx}']")

    ins << m.remove
  end
end

#style_update(node, css) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 139

def style_update(node, css)
  return unless node

  node["style"] =
    node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css
end

#table_note_cleanup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 23

def table_note_cleanup(docxml)
  super
  # preempt html2doc putting MsoNormal there
  docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]")
    .each do |p|
    p["class"] = "Note"
  end
end

#to_word_xhtml_fragment(xml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 18

def to_word_xhtml_fragment(xml)
  doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
  ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
end

#toWord(result, filename, dir, header) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 40

def toWord(result, filename, dir, header)
  result = from_xhtml(word_cleanup(to_xhtml(result)))
  @wordstylesheet = wordstylesheet_update
  Html2Doc.process(
    result,
    filename: filename,
    imagedir: @localdir,
    stylesheet: @wordstylesheet&.path,
    header_file: header&.path, dir: dir,
    asciimathdelims: [@openmathdelim, @closemathdelim],
    liststyles: { ul: @ulstyle, ol: @olstyle }
  )
  header&.unlink
  @wordstylesheet.unlink if @wordstylesheet.is_a?(Tempfile)
end

#word_admonition_images(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 71

def word_admonition_images(docxml)
  docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
    i["width"], i["height"] =
      Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
  end
end

#word_annex_cleanup(docxml) ⇒ `Object`

197	# File 'lib/isodoc/word_function/postprocess.rb', line 197 def word_annex_cleanup(docxml); end

#word_cleanup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 78

def word_cleanup(docxml)
  word_annex_cleanup(docxml)
  word_preface(docxml)
  word_nested_tables(docxml)
  word_colgroup(docxml)
  word_table_align(docxml)
  word_table_separator(docxml)
  word_admonition_images(docxml)
  word_list_continuations(docxml)
  word_example_cleanup(docxml)
  word_pseudocode_cleanup(docxml)
  word_image_caption(docxml)
  word_section_breaks(docxml)
  authority_cleanup(docxml)
  word_footnote_format(docxml)
  docxml
end

#word_colgroup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 96

def word_colgroup(docxml)
  cells2d = {}
  docxml.xpath("//table[colgroup]").each do |t|
    w = colgroup_widths(t)
    t.xpath(".//tr").each_with_index { |_tr, r| cells2d[r] = {} }
    t.xpath(".//tr").each_with_index do |tr, r|
      tr.xpath("./td | ./th").each_with_index do |td, _i|
        x = 0
        rs = td&.attr("rowspan")&.to_i || 1
        cs = td&.attr("colspan")&.to_i || 1
        while cells2d[r][x]
          x += 1
        end
        (r..(r + rs - 1)).each do |y2|
          (x..(x + cs - 1)).each do |x2|
            cells2d[y2][x2] = 1
          end
        end
        width = (x..(x + cs - 1)).each_with_object({ width: 0 }) do |z, m|
          m[:width] += w[z]
        end
        td["width"] = "#{width[:width]}%"
        x += cs
      end
    end
  end
end

#word_cover(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 9

def word_cover(docxml)
  cover = File.read(@wordcoverpage, encoding: "UTF-8")
  cover = populate_template(cover, :word)
  coverxml = to_word_xhtml_fragment(cover)
  docxml.at('//div[@class="WordSection1"]').children.first.previous =
    coverxml.to_xml(encoding: "US-ASCII")
end

#word_example_cleanup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 199

def word_example_cleanup(docxml)
  docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p|
    p["class"] = "example"
  end
end

#word_footnote_format(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 223

def word_footnote_format(docxml)
  # the content is in a[@epub:type = 'footnote']//sup, but in Word,
  # we need to inject content around the autonumbered footnote reference
  docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
    footnote_reference_format(x)
  end
  docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
               "//span[@class = 'TableFootnoteRef']").each do |x|
    table_footnote_reference_format(x)
  end
  docxml
end

#word_image_caption(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 146

def word_image_caption(docxml)
  docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']")
    .each do |t|
    if t&.previous_element&.name == "img"
      img = t.previous_element
      t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>")
    end
    style_update(t&.previous_element, "page-break-after:avoid;")
  end
end

#word_intro(docxml, level) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 17

def word_intro(docxml, level)
  intro = insert_toc(File.read(@wordintropage, encoding: "UTF-8"),
                     docxml, level)
  intro = populate_template(intro, :word)
  introxml = to_word_xhtml_fragment(intro)
  docxml.at('//div[@class="WordSection2"]').children.first.previous =
    introxml.to_xml(encoding: "US-ASCII")
end

#word_list_continuations(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 157

def word_list_continuations(docxml)
  list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"),
           1)
  list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"),
           1)
end

#word_nested_tables(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 131

def word_nested_tables(docxml)
  docxml.xpath("//table").each do |t|
    t.xpath(".//table").reverse.each do |tt|
      t.next = tt.remove
    end
  end
end

#word_preface(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 4

def word_preface(docxml)
  word_cover(docxml) if @wordcoverpage
  word_intro(docxml, @wordToClevels) if @wordintropage
end

#word_pseudocode_cleanup(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 205

def word_pseudocode_cleanup(docxml)
  docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p|
    p["class"] = "pseudocode"
  end
end

#word_remove_pb_before_annex(docxml) ⇒ `Object`

applies for <div class=“WordSectionN_M”><p><pagebreak/></p>…

# File 'lib/isodoc/word_function/postprocess.rb', line 212

def word_remove_pb_before_annex(docxml)
  docxml.xpath("//div[p/br]").each do |d|
    /^WordSection\d+_\d+$/.match(d["class"]) or next
    d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
    d.elements[0].elements[0].name == "br" &&
      d.elements[0].elements[0]["style"] ==
        "mso-special-character:line-break;page-break-before:always" or next
    d.elements[0].remove
  end
end

#word_section_breaks(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 112

def word_section_breaks(docxml)
  @landscapestyle = ""
  word_section_breaks1(docxml, "WordSection2")
  word_section_breaks1(docxml, "WordSection3")
  word_remove_pb_before_annex(docxml)
  docxml.xpath("//br[@orientation]").each do |br|
    br.delete("orientation")
  end
end

#word_section_breaks1(docxml, sect) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 122

def word_section_breaks1(docxml, sect)
  docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse
    .each_with_index do |br, i|
    @landscapestyle +=
      "\ndiv.#{sect}_#{i} {page:#{sect}"\
      "#{br['orientation'] == 'landscape' ? 'L' : 'P'};}\n"
    split_at_section_break(docxml, sect, br, i)
  end
end

#word_table_align(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 180

def word_table_align(docxml)
  docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p|
    next if p["align"]

    style_update(p, "text-align: #{p.parent['align']}")
  end
end

#word_table_separator(docxml) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 188

def word_table_separator(docxml)
  docxml.xpath("//p[@class = 'TableTitle']").each do |t|
    next unless t.children.empty?

    t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;")
    t.children = "&nbsp;"
  end
end

#word_toc_entry(toclevel, heading) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 30

def word_toc_entry(toclevel, heading)
  bookmark = bookmarkid # Random.rand(1000000000)
  "    <p class=\"MsoToc\#{toclevel}\"><span class=\"MsoHyperlink\"><span\n    lang=\"EN-GB\" style='mso-no-proof:yes'>\n    <a href=\"#_Toc\#{bookmark}\">\#{heading}<span lang=\"EN-GB\"\n    class=\"MsoTocTextSpan\">\n    <span style='mso-tab-count:1 dotted'>. </span>\n    </span><span lang=\"EN-GB\" class=\"MsoTocTextSpan\">\n    <span style='mso-element:field-begin'></span></span>\n    <span lang=\"EN-GB\"\n    class=\"MsoTocTextSpan\"> PAGEREF _Toc\#{bookmark} \\\\h </span>\n      <span lang=\"EN-GB\" class=\"MsoTocTextSpan\"><span\n      style='mso-element:field-separator'></span></span><span\n      lang=\"EN-GB\" class=\"MsoTocTextSpan\">1</span>\n      <span lang=\"EN-GB\"\n      class=\"MsoTocTextSpan\"></span><span\n      lang=\"EN-GB\" class=\"MsoTocTextSpan\"><span\n      style='mso-element:field-end'></span></span></a></span></span></p>\n\n  TOC\nend\n"

#word_toc_preface(level) ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess_cover.rb', line 53

def word_toc_preface(level)
  "    <span lang=\"EN-GB\"><span\n      style='mso-element:field-begin'></span><span\n      style='mso-spacerun:yes'>&#xA0;</span>TOC\n      \\\\o &quot;1-\#{level}&quot; \\\\h \\\\z \\\\u <span\n      style='mso-element:field-separator'></span></span>\n  TOC\nend\n".freeze

#wordstylesheet_update ⇒ `Object`

# File 'lib/isodoc/word_function/postprocess.rb', line 56

def wordstylesheet_update
  return if @wordstylesheet.nil?

  f = File.open(@wordstylesheet.path, "a")
  @landscapestyle.empty? or f.write(@landscapestyle)
  if @wordstylesheet_override && @wordstylesheet
    f.write(@wordstylesheet_override.read)
    @wordstylesheet_override.close
  elsif @wordstylesheet_override && !@wordstylesheet
    @wordstylesheet = @wordstylesheet_override
  end
  f.close
  @wordstylesheet
end

Module: IsoDoc::WordFunction::Postprocess

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#authority_cleanup(docxml) ⇒ Object

#authority_cleanup1(docxml, klass) ⇒ Object

#colgroup_widths(table) ⇒ Object

#generate_header(filename, _dir) ⇒ Object

#insert_toc(intro, docxml, level) ⇒ Object

#list_add(xpath, lvl) ⇒ Object

#make_WordToC(docxml, level) ⇒ Object

#postprocess(result, filename, dir) ⇒ Object

#split_at_section_break(docxml, sect, brk, idx) ⇒ Object

#style_update(node, css) ⇒ Object

#table_note_cleanup(docxml) ⇒ Object

#to_word_xhtml_fragment(xml) ⇒ Object

#toWord(result, filename, dir, header) ⇒ Object

#word_admonition_images(docxml) ⇒ Object

#word_annex_cleanup(docxml) ⇒ Object

#word_cleanup(docxml) ⇒ Object

#word_colgroup(docxml) ⇒ Object

#word_cover(docxml) ⇒ Object

#word_example_cleanup(docxml) ⇒ Object

#word_footnote_format(docxml) ⇒ Object

#word_image_caption(docxml) ⇒ Object

#word_intro(docxml, level) ⇒ Object

#word_list_continuations(docxml) ⇒ Object

#word_nested_tables(docxml) ⇒ Object

#word_preface(docxml) ⇒ Object

#word_pseudocode_cleanup(docxml) ⇒ Object

#word_remove_pb_before_annex(docxml) ⇒ Object

#word_section_breaks(docxml) ⇒ Object

#word_section_breaks1(docxml, sect) ⇒ Object

#word_table_align(docxml) ⇒ Object

#word_table_separator(docxml) ⇒ Object

#word_toc_entry(toclevel, heading) ⇒ Object

#word_toc_preface(level) ⇒ Object

#wordstylesheet_update ⇒ Object

#authority_cleanup(docxml) ⇒ `Object`

#authority_cleanup1(docxml, klass) ⇒ `Object`

#colgroup_widths(table) ⇒ `Object`

#generate_header(filename, _dir) ⇒ `Object`

#insert_toc(intro, docxml, level) ⇒ `Object`

#list_add(xpath, lvl) ⇒ `Object`

#make_WordToC(docxml, level) ⇒ `Object`

#postprocess(result, filename, dir) ⇒ `Object`

#split_at_section_break(docxml, sect, brk, idx) ⇒ `Object`

#style_update(node, css) ⇒ `Object`

#table_note_cleanup(docxml) ⇒ `Object`

#to_word_xhtml_fragment(xml) ⇒ `Object`

#toWord(result, filename, dir, header) ⇒ `Object`

#word_admonition_images(docxml) ⇒ `Object`

#word_annex_cleanup(docxml) ⇒ `Object`

#word_cleanup(docxml) ⇒ `Object`

#word_colgroup(docxml) ⇒ `Object`

#word_cover(docxml) ⇒ `Object`

#word_example_cleanup(docxml) ⇒ `Object`

#word_footnote_format(docxml) ⇒ `Object`

#word_image_caption(docxml) ⇒ `Object`

#word_intro(docxml, level) ⇒ `Object`

#word_list_continuations(docxml) ⇒ `Object`

#word_nested_tables(docxml) ⇒ `Object`

#word_preface(docxml) ⇒ `Object`

#word_pseudocode_cleanup(docxml) ⇒ `Object`

#word_remove_pb_before_annex(docxml) ⇒ `Object`

#word_section_breaks(docxml) ⇒ `Object`

#word_section_breaks1(docxml, sect) ⇒ `Object`

#word_table_align(docxml) ⇒ `Object`

#word_table_separator(docxml) ⇒ `Object`

#word_toc_entry(toclevel, heading) ⇒ `Object`

#word_toc_preface(level) ⇒ `Object`

#wordstylesheet_update ⇒ `Object`