Module: IsoDoc::Function::Cleanup

Included in:
Common
Defined in:
lib/isodoc/function/cleanup.rb

Constant Summary collapse

FIGURE_WITH_FOOTNOTES =
"//div[@class = 'figure'][descendant::aside]"\
"[not(descendant::div[@class = 'figure'])]".freeze

Instance Method Summary collapse

Instance Method Details

#admonition_cleanup(docxml) ⇒ Object



72
73
74
75
76
77
78
79
# File 'lib/isodoc/function/cleanup.rb', line 72

def admonition_cleanup(docxml)
  docxml.xpath("//div[@class = 'Admonition'][title]").each do |d|
    title = d.at("./title")
    n = title.next_element
    n&.children&.first&.add_previous_sibling(title.remove.text + "—")
  end
  docxml
end

#break_up_long_strings(text) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/isodoc/function/cleanup.rb', line 50

def break_up_long_strings(text)
  return text if /^\s*$/.match?(text)

  text.split(/(?=\s)/).map do |w|
    if /^\s*$/.match(text) || (w.size < 30) then w
    else
      w.scan(/.{,30}/).map do |w1|
        w1.size < 30 ? w1 : break_up_long_strings1(w1)
      end.join
    end
  end.join
end

#break_up_long_strings1(text) ⇒ Object



63
64
65
66
67
68
69
70
# File 'lib/isodoc/function/cleanup.rb', line 63

def break_up_long_strings1(text)
  s = text.split(%r{(?<=[,.?+;/=])})
  if s.size == 1 then "#{text} "
  else
    s[-1] = " #{s[-1]}"
    s.join
  end
end

#cleanup(docxml) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
# File 'lib/isodoc/function/cleanup.rb', line 24

def cleanup(docxml)
  @i18n ||= i18n_init(@lang, @script)
  comment_cleanup(docxml)
  footnote_cleanup(docxml)
  inline_header_cleanup(docxml)
  figure_cleanup(docxml)
  table_cleanup(docxml)
  symbols_cleanup(docxml)
  example_cleanup(docxml)
  admonition_cleanup(docxml)
end

#example_cleanup(docxml) ⇒ Object



81
82
83
84
85
86
# File 'lib/isodoc/function/cleanup.rb', line 81

def example_cleanup(docxml)
  docxml.xpath("//table[@class = 'example']//p[not(@class)]").each do |p|
    p["class"] = "example"
  end
  docxml
end

#figure_aside_process(elem, aside, key) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/isodoc/function/cleanup.rb', line 101

def figure_aside_process(elem, aside, key)
  # get rid of footnote link, it is in diagram
  elem&.at("./a[@class='TableFootnoteRef']")&.remove
  fnref = elem.at(".//span[@class='TableFootnoteRef']/..")
  dt = key.add_child("<dt></dt>").first
  dd = key.add_child("<dd></dd>").first
  fnref.parent = dt
  aside.xpath(".//p").each do |a|
    a.delete("class")
    a.parent = dd
  end
end

#figure_cleanup(docxml) ⇒ Object

move footnotes into key, and get rid of footnote reference since it is in diagram



116
117
118
119
120
121
122
123
124
125
126
# File 'lib/isodoc/function/cleanup.rb', line 116

def figure_cleanup(docxml)
  docxml.xpath(FIGURE_WITH_FOOTNOTES).each do |f|
    next unless f.at(".//aside[not(ancestor::p[@class = 'FigureTitle'])]")

    key = figure_get_or_make_dl(f)
    f.xpath(".//aside").each do |aside|
      figure_aside_process(f, aside, key)
    end
  end
  docxml
end

#figure_get_or_make_dl(elem) ⇒ Object



88
89
90
91
92
93
94
95
# File 'lib/isodoc/function/cleanup.rb', line 88

def figure_get_or_make_dl(elem)
  dl = elem.at(".//dl")
  if dl.nil?
    elem.add_child("<p><b>#{@i18n.key}</b></p><dl></dl>")
    dl = elem.at(".//dl")
  end
  dl
end

#footnote_cleanup(docxml) ⇒ Object



141
142
143
144
145
146
# File 'lib/isodoc/function/cleanup.rb', line 141

def footnote_cleanup(docxml)
  docxml.xpath('//a[@class = "FootnoteRef"]/sup').each_with_index do |x, i|
    x.content = (i + 1).to_s
  end
  docxml
end

#footnote_reference_format(link) ⇒ Object



222
223
224
# File 'lib/isodoc/function/cleanup.rb', line 222

def footnote_reference_format(link)
  link
end

#inline_header_cleanup(docxml) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/isodoc/function/cleanup.rb', line 128

def inline_header_cleanup(docxml)
  docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
    x.delete("class")
    n = x.next_element
    if n.nil?
      x.name = "p"
    else
      n.children.first.previous = x.remove
    end
  end
  docxml
end

#merge_fnref_into_fn_text(elem) ⇒ Object



148
149
150
151
152
# File 'lib/isodoc/function/cleanup.rb', line 148

def merge_fnref_into_fn_text(elem)
  fn = elem.at('.//span[@class="TableFootnoteRef"]/..')
  n = fn.next_element
  n&.children&.first&.add_previous_sibling(fn.remove)
end

#new_fullcolspan_row(table, tfoot) ⇒ Object



186
187
188
189
190
191
192
193
194
195
196
# File 'lib/isodoc/function/cleanup.rb', line 186

def new_fullcolspan_row(table, tfoot)
  # how many columns in the table?
  cols = 0
  table.at(".//tr").xpath("./td | ./th").each do |td|
    cols += (td["colspan"] ? td["colspan"].to_i : 1)
  end
  style =
    %{border-top:0pt;border-bottom:#{IsoDoc::Function::Table::SW} 1.5pt;}
  tfoot.add_child("<tr><td colspan='#{cols}' style='#{style}'/></tr>")
  tfoot.xpath(".//td").last
end

#passthrough_cleanup(docxml) ⇒ Object



17
18
19
20
21
22
# File 'lib/isodoc/function/cleanup.rb', line 17

def passthrough_cleanup(docxml)
  docxml.split(%r{(<passthrough>|</passthrough>)}).each_slice(4).map do |a|
    a.size > 2 and a[2] = HTMLEntities.new.decode(a[2])
    [a[0], a[2]]
  end.join
end

#remove_bottom_border(cell) ⇒ Object



170
171
172
173
# File 'lib/isodoc/function/cleanup.rb', line 170

def remove_bottom_border(cell)
  cell["style"] =
    cell["style"].gsub(/border-bottom:[^;]+;/, "border-bottom:0pt;")
end

#symbols_cleanup(docxml) ⇒ Object



216
# File 'lib/isodoc/function/cleanup.rb', line 216

def symbols_cleanup(docxml); end

#table_cleanup(docxml) ⇒ Object



209
210
211
212
213
214
# File 'lib/isodoc/function/cleanup.rb', line 209

def table_cleanup(docxml)
  table_footnote_cleanup(docxml)
  table_note_cleanup(docxml)
  table_long_strings_cleanup(docxml)
  docxml
end

#table_footnote_cleanup(docxml) ⇒ Object

preempt html2doc putting MsoNormal under TableFootnote class



155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/isodoc/function/cleanup.rb', line 155

def table_footnote_cleanup(docxml)
  docxml.xpath("//table[descendant::aside]").each do |t|
    t.xpath(".//aside").each do |a|
      merge_fnref_into_fn_text(a)
      a.name = "div"
      a["class"] = "TableFootnote"
      t << a.remove
    end
  end
  docxml.xpath("//p[not(self::*[@class])]"\
               "[ancestor::*[@class = 'TableFootnote']]").each do |p|
    p["class"] = "TableFootnote"
  end
end

#table_footnote_reference_format(link) ⇒ Object



218
219
220
# File 'lib/isodoc/function/cleanup.rb', line 218

def table_footnote_reference_format(link)
  link
end

#table_get_or_make_tfoot(table) ⇒ Object



175
176
177
178
179
180
181
182
183
184
# File 'lib/isodoc/function/cleanup.rb', line 175

def table_get_or_make_tfoot(table)
  tfoot = table.at(".//tfoot")
  if tfoot.nil?
    table.add_child("<tfoot></tfoot>")
    tfoot = table.at(".//tfoot")
  else
    tfoot.xpath(".//td | .//th").each { |td| remove_bottom_border(td) }
  end
  tfoot
end

#table_long_strings_cleanup(docxml) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/isodoc/function/cleanup.rb', line 36

def table_long_strings_cleanup(docxml)
  return unless @break_up_urls_in_tables == true

  docxml.xpath("//td | //th").each do |d|
    d.traverse do |n|
      next unless n.text?

      n.replace(HTMLEntities.new.encode(
                  break_up_long_strings(n.text),
                ))
    end
  end
end

#table_note_cleanup(docxml) ⇒ Object



198
199
200
201
202
203
204
205
206
207
# File 'lib/isodoc/function/cleanup.rb', line 198

def table_note_cleanup(docxml)
  docxml.xpath("//table[div[@class = 'Note' or "\
               "@class = 'TableFootnote']]").each do |t|
    tfoot = table_get_or_make_tfoot(t)
    insert_here = new_fullcolspan_row(t, tfoot)
    t.xpath("div[@class = 'Note' or @class = 'TableFootnote']").each do |d|
      d.parent = insert_here
    end
  end
end

#termref_cleanup(docxml) ⇒ Object



7
8
9
10
11
12
13
14
15
# File 'lib/isodoc/function/cleanup.rb', line 7

def termref_cleanup(docxml)
  docxml
    .gsub(/\s*\[MODIFICATION\]\s*\[\/TERMREF\]/,
          l10n(", #{@i18n.modified} [/TERMREF]"))
    .gsub(%r{\s*\[/TERMREF\]\s*</p>\s*<p>\s*\[TERMREF\]}, "; ")
    .gsub(/\[TERMREF\]\s*/, l10n("[#{@i18n.source}: "))
    .gsub(%r{\s*\[/TERMREF\]\s*}, l10n("]"))
    .gsub(/\s*\[MODIFICATION\]/, l10n(", #{@i18n.modified} &mdash; "))
end

#textcleanup(docxml) ⇒ Object



3
4
5
# File 'lib/isodoc/function/cleanup.rb', line 3

def textcleanup(docxml)
  termref_cleanup(passthrough_cleanup(docxml))
end