Module: IsoDoc::WordFunction::Postprocess
- Included in:
- IsoDoc::WordConvert
- Defined in:
- lib/isodoc/word_function/postprocess.rb,
lib/isodoc/word_function/postprocess_cover.rb
Constant Summary collapse
- WORD_NOKOHEAD =
add namespaces for Word fragments
<<~HERE.freeze <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"> <head> <title></title> <meta charset="UTF-8" /> </head> <body> </body> </html> HERE
- WORD_TOC_SUFFIX1 =
<<~TOC.freeze <p class="MsoToc1"><span lang="EN-GB"><span style='mso-element:field-end'></span></span><span lang="EN-GB"><o:p> </o:p></span></p> TOC
Instance Method Summary collapse
- #authority_cleanup(docxml) ⇒ Object
- #authority_cleanup1(docxml, klass) ⇒ Object
- #generate_header(filename, _dir) ⇒ Object
- #insert_toc(intro, docxml, level) ⇒ Object
- #list_add(xpath, lvl) ⇒ Object
- #make_WordToC(docxml, level) ⇒ Object
- #postprocess(result, filename, dir) ⇒ Object
- #split_at_section_break(docxml, sect, br, i) ⇒ Object
- #style_update(node, css) ⇒ Object
- #table_note_cleanup(docxml) ⇒ Object
- #to_word_xhtml_fragment(xml) ⇒ Object
- #toWord(result, filename, dir, header) ⇒ Object
- #word_admonition_images(docxml) ⇒ Object
- #word_annex_cleanup(docxml) ⇒ Object
- #word_cleanup(docxml) ⇒ Object
- #word_cover(docxml) ⇒ Object
- #word_example_cleanup(docxml) ⇒ Object
- #word_footnote_format(docxml) ⇒ Object
- #word_image_caption(docxml) ⇒ Object
- #word_intro(docxml, level) ⇒ Object
- #word_list_continuations(docxml) ⇒ Object
- #word_nested_tables(docxml) ⇒ Object
- #word_preface(docxml) ⇒ Object
- #word_pseudocode_cleanup(docxml) ⇒ Object
-
#word_remove_pb_before_annex(docxml) ⇒ Object
applies for <div class=“WordSectionN_M”><p><pagebreak/></p>…
- #word_section_breaks(docxml) ⇒ Object
- #word_section_breaks1(docxml, sect) ⇒ Object
- #word_table_align(docxml) ⇒ Object
-
#word_table_separator(docxml) ⇒ Object
EMPTY_PARA = “<p style=‘margin-top:0cm;margin-right:0cm;”\ “margin-bottom:0cm;margin-left:0.0pt;margin-bottom:.0001pt;”\ “line-height:1.0pt;mso-line-height-rule:exactly’>”\ “<span lang=EN-GB style=‘display:none;mso-hide:all’> </span></p>”.
- #word_toc_entry(toclevel, heading) ⇒ Object
- #word_toc_preface(level) ⇒ Object
Instance Method Details
#authority_cleanup(docxml) ⇒ Object
100 101 102 103 104 |
# File 'lib/isodoc/word_function/postprocess.rb', line 100 def (docxml) %w(copyright license legal feedback).each do |t| (docxml, t) end end |
#authority_cleanup1(docxml, klass) ⇒ Object
89 90 91 92 93 94 95 96 97 98 |
# File 'lib/isodoc/word_function/postprocess.rb', line 89 def (docxml, klass) dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']") auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']") auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove } auth&.xpath(".//h1 | .//h2")&.each do |h| h.name = "p" h["class"] = "TitlePageSubhead" end dest and auth and dest.replace(auth.remove) end |
#generate_header(filename, _dir) ⇒ Object
183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/isodoc/word_function/postprocess.rb', line 183 def generate_header(filename, _dir) return nil unless @header template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8")) = @meta.get.merge(@labels || {}).merge(@meta.labels || {}) [:filename] = filename params = .map { |k, v| [k.to_s, v] }.to_h Tempfile.open(%w(header html), :encoding => "utf-8") do |f| f.write(template.render(params)) f end end |
#insert_toc(intro, docxml, level) ⇒ Object
25 26 27 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 25 def insert_toc(intro, docxml, level) intro.sub(/WORDTOC/, make_WordToC(docxml, level)) end |
#list_add(xpath, lvl) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/isodoc/word_function/postprocess.rb', line 127 def list_add(xpath, lvl) xpath.each do |list| (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l| l.xpath("./p | ./div | ./table").each_with_index do |p, i| next if i == 0 p.wrap(%{<div class="ListContLevel#{lvl}"/>}) end list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"), lvl + 1) list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"), lvl + 1) end end end |
#make_WordToC(docxml, level) ⇒ Object
68 69 70 71 72 73 74 75 76 77 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 68 def make_WordToC(docxml, level) toc = "" #docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]"). xpath = (1..level).each.map { |i| "//h#{i}" }.join (" | ") docxml.xpath(xpath).each do |h| toc += word_toc_entry(h.name[1].to_i, header_strip(h)) end toc.sub(/(<p class="MsoToc1">)/, %{\\1#{word_toc_preface(level)}}) + WORD_TOC_SUFFIX1 end |
#postprocess(result, filename, dir) ⇒ Object
34 35 36 37 38 39 40 |
# File 'lib/isodoc/word_function/postprocess.rb', line 34 def postprocess(result, filename, dir) filename = filename.sub(/\.doc$/, "") header = generate_header(filename, dir) result = from_xhtml(cleanup(to_xhtml(textcleanup(result)))) toWord(result, filename, dir, header) @files_to_delete.each { |f| FileUtils.rm_f f } end |
#split_at_section_break(docxml, sect, br, i) ⇒ Object
212 213 214 215 216 217 218 219 220 221 |
# File 'lib/isodoc/word_function/postprocess.rb', line 212 def split_at_section_break(docxml, sect, br, i) move = br.parent.xpath("following::node()") & br.document.xpath("//div[@class = '#{sect}']//*") ins = docxml.at("//div[@class = '#{sect}']"). after("<div class='#{sect}_#{i}'/>").next_element move.each do |m| next if m.at("./ancestor::div[@class = '#{sect}_#{i}']") ins << m.remove end end |
#style_update(node, css) ⇒ Object
106 107 108 109 |
# File 'lib/isodoc/word_function/postprocess.rb', line 106 def style_update(node, css) return unless node node["style"] = node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css end |
#table_note_cleanup(docxml) ⇒ Object
25 26 27 28 29 30 31 32 |
# File 'lib/isodoc/word_function/postprocess.rb', line 25 def table_note_cleanup(docxml) super # preempt html2doc putting MsoNormal there docxml.xpath("//p[not(self::*[@class])]"\ "[ancestor::*[@class = 'Note']]").each do |p| p["class"] = "Note" end end |
#to_word_xhtml_fragment(xml) ⇒ Object
19 20 21 22 23 |
# File 'lib/isodoc/word_function/postprocess.rb', line 19 def to_word_xhtml_fragment(xml) doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD) fragment = ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root) fragment end |
#toWord(result, filename, dir, header) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/isodoc/word_function/postprocess.rb', line 42 def toWord(result, filename, dir, header) result = from_xhtml(word_cleanup(to_xhtml(result))) unless @landscapestyle.empty? @wordstylesheet&.open @wordstylesheet&.write(@landscapestyle) @wordstylesheet&.close end Html2Doc.process(result, filename: filename, stylesheet: @wordstylesheet&.path, header_file: header&.path, dir: dir, asciimathdelims: [@openmathdelim, @closemathdelim], liststyles: { ul: @ulstyle, ol: @olstyle }) header&.unlink @wordstylesheet&.unlink end |
#word_admonition_images(docxml) ⇒ Object
57 58 59 60 61 62 |
# File 'lib/isodoc/word_function/postprocess.rb', line 57 def word_admonition_images(docxml) docxml.xpath("//div[@class = 'Admonition']//img").each do |i| i["width"], i["height"] = Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300) end end |
#word_annex_cleanup(docxml) ⇒ Object
168 169 |
# File 'lib/isodoc/word_function/postprocess.rb', line 168 def word_annex_cleanup(docxml) end |
#word_cleanup(docxml) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/isodoc/word_function/postprocess.rb', line 64 def word_cleanup(docxml) word_annex_cleanup(docxml) word_preface(docxml) word_nested_tables(docxml) word_table_align(docxml) word_table_separator(docxml) word_admonition_images(docxml) word_list_continuations(docxml) word_example_cleanup(docxml) word_pseudocode_cleanup(docxml) (docxml) word_section_breaks(docxml) (docxml) word_footnote_format(docxml) docxml end |
#word_cover(docxml) ⇒ Object
8 9 10 11 12 13 14 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 8 def word_cover(docxml) cover = File.read(@wordcoverpage, encoding: "UTF-8") cover = populate_template(cover, :word) coverxml = to_word_xhtml_fragment(cover) docxml.at('//div[@class="WordSection1"]').children.first.previous = coverxml.to_xml(encoding: "US-ASCII") end |
#word_example_cleanup(docxml) ⇒ Object
171 172 173 174 175 |
# File 'lib/isodoc/word_function/postprocess.rb', line 171 def word_example_cleanup(docxml) docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p| p["class"] = "example" end end |
#word_footnote_format(docxml) ⇒ Object
234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/isodoc/word_function/postprocess.rb', line 234 def word_footnote_format(docxml) # the content is in a[@epub:type = 'footnote']//sup, but in Word, # we need to inject content around the autonumbered footnote reference docxml.xpath("//a[@epub:type = 'footnote']").each do |x| footnote_reference_format(x) end docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\ "//span[@class = 'TableFootnoteRef']").each do |x| table_footnote_reference_format(x) end docxml end |
#word_image_caption(docxml) ⇒ Object
111 112 113 114 115 116 117 118 119 120 |
# File 'lib/isodoc/word_function/postprocess.rb', line 111 def (docxml) docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']"). each do |t| if t&.previous_element&.name == "img" img = t.previous_element t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>") end style_update(t&.previous_element, "page-break-after:avoid;") end end |
#word_intro(docxml, level) ⇒ Object
16 17 18 19 20 21 22 23 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 16 def word_intro(docxml, level) intro = insert_toc(File.read(@wordintropage, encoding: "UTF-8"), docxml, level) intro = populate_template(intro, :word) introxml = to_word_xhtml_fragment(intro) docxml.at('//div[@class="WordSection2"]').children.first.previous = introxml.to_xml(encoding: "US-ASCII") end |
#word_list_continuations(docxml) ⇒ Object
122 123 124 125 |
# File 'lib/isodoc/word_function/postprocess.rb', line 122 def word_list_continuations(docxml) list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"), 1) list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"), 1) end |
#word_nested_tables(docxml) ⇒ Object
81 82 83 84 85 86 87 |
# File 'lib/isodoc/word_function/postprocess.rb', line 81 def word_nested_tables(docxml) docxml.xpath("//table").each do |t| t.xpath(".//table").reverse.each do |tt| t.next = tt.remove end end end |
#word_preface(docxml) ⇒ Object
3 4 5 6 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 3 def word_preface(docxml) word_cover(docxml) if @wordcoverpage word_intro(docxml, @wordToClevels) if @wordintropage end |
#word_pseudocode_cleanup(docxml) ⇒ Object
177 178 179 180 181 |
# File 'lib/isodoc/word_function/postprocess.rb', line 177 def word_pseudocode_cleanup(docxml) docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p| p["class"] = "pseudocode" end end |
#word_remove_pb_before_annex(docxml) ⇒ Object
applies for <div class=“WordSectionN_M”><p><pagebreak/></p>…
224 225 226 227 228 229 230 231 232 |
# File 'lib/isodoc/word_function/postprocess.rb', line 224 def word_remove_pb_before_annex(docxml) docxml.xpath("//div[p/br]").each do |d| /^WordSection\d+_\d+$/.match(d["class"]) or next d.elements[0].name == "p" && !d.elements[0].elements.empty? or next d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] == "mso-special-character:line-break;page-break-before:always" or next d.elements[0].remove end end |
#word_section_breaks(docxml) ⇒ Object
195 196 197 198 199 200 201 |
# File 'lib/isodoc/word_function/postprocess.rb', line 195 def word_section_breaks(docxml) @landscapestyle = "" word_section_breaks1(docxml, "WordSection2") word_section_breaks1(docxml, "WordSection3") word_remove_pb_before_annex(docxml) docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") } end |
#word_section_breaks1(docxml, sect) ⇒ Object
203 204 205 206 207 208 209 210 |
# File 'lib/isodoc/word_function/postprocess.rb', line 203 def word_section_breaks1(docxml, sect) docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse. each_with_index do |br, i| @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\ "#{br["orientation"] == "landscape" ? "L" : "P"};}\n" split_at_section_break(docxml, sect, br, i) end end |
#word_table_align(docxml) ⇒ Object
140 141 142 143 144 145 |
# File 'lib/isodoc/word_function/postprocess.rb', line 140 def word_table_align(docxml) docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p| next if p["align"] style_update(p, "text-align: #{p.parent["align"]}") end end |
#word_table_separator(docxml) ⇒ Object
EMPTY_PARA = “<p style=‘margin-top:0cm;margin-right:0cm;”\
"margin-bottom:0cm;margin-left:0.0pt;margin-bottom:.0001pt;"\
"line-height:1.0pt;mso-line-height-rule:exactly'>"\
"<span lang=EN-GB style='display:none;mso-hide:all'> </span></p>"
def table_after_table(docxml)
docxml.xpath("//table[following-sibling::*[1]/self::table]").each do |t|
t.add_next_sibling(EMPTY_PARA)
end
end
160 161 162 163 164 165 166 |
# File 'lib/isodoc/word_function/postprocess.rb', line 160 def word_table_separator(docxml) docxml.xpath("//p[@class = 'TableTitle']").each do |t| next unless t.children.empty? t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;") t.children = " " end end |
#word_toc_entry(toclevel, heading) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 29 def word_toc_entry(toclevel, heading) bookmark = bookmarkid # Random.rand(1000000000) <<~TOC <p class="MsoToc#{toclevel}"><span class="MsoHyperlink"><span lang="EN-GB" style='mso-no-proof:yes'> <a href="#_Toc#{bookmark}">#{heading}<span lang="EN-GB" class="MsoTocTextSpan"> <span style='mso-tab-count:1 dotted'>. </span> </span><span lang="EN-GB" class="MsoTocTextSpan"> <span style='mso-element:field-begin'></span></span> <span lang="EN-GB" class="MsoTocTextSpan"> PAGEREF _Toc#{bookmark} \\h </span> <span lang="EN-GB" class="MsoTocTextSpan"><span style='mso-element:field-separator'></span></span><span lang="EN-GB" class="MsoTocTextSpan">1</span> <span lang="EN-GB" class="MsoTocTextSpan"></span><span lang="EN-GB" class="MsoTocTextSpan"><span style='mso-element:field-end'></span></span></a></span></span></p> TOC end |
#word_toc_preface(level) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/isodoc/word_function/postprocess_cover.rb', line 52 def word_toc_preface(level) <<~TOC.freeze <span lang="EN-GB"><span style='mso-element:field-begin'></span><span style='mso-spacerun:yes'> </span>TOC \\o "1-#{level}" \\h \\z \\u <span style='mso-element:field-separator'></span></span> TOC end |