Module: Metanorma::Utils

Defined in:
lib/utils/image.rb,
lib/utils/log.rb,
lib/utils/xml.rb,
lib/utils/main.rb,
lib/utils/version.rb,
lib/utils/namespace.rb,
lib/utils/hash_transform_keys.rb,
lib/utils/hash_transform_keys.rb

Overview

Image methods were moved to the Vectory gem

Defined Under Namespace

Modules: Array, Hash Classes: Log, Namespace

Constant Summary collapse

NAMECHAR =
"\u0000-\u002c\u002f\u003a-\u0040\\u005b-\u005e" \
"\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e" \
"\u2000-\u200b" \
"\u200e-\u203e\u2041-\u206f\u2190-\u2bff\u2ff0-\u3000".freeze
NAMESTARTCHAR =
"\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f" \
"\u203f-\u2040".freeze
NOKOHEAD =
"<!DOCTYPE html SYSTEM\n\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head> <title></title> <meta charset=\"UTF-8\" /> </head>\n<body> </body> </html>\n".freeze
LONGSTR_THRESHOLD =
10
LONGSTR_NOPUNCT =
2
STR_BREAKUP_RE =
%r{
 (?<=[=_—–\u2009→?+;]) | # break after any of these
 (?<=[,.:])(?!\d) | # break on punct only if not preceding digit
 (?<=[>])(?![>]) | # > not >->
 (?<=[\]])(?![\]]) | # ] not ]-]
 (?<=//) | # //
 (?<=[/])(?![/]) | # / not /-/
 (?<![<])(?=[<]) | # < not <-<
 (?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
}x.freeze
CAMEL_CASE_RE =
%r{
  (?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
}x.freeze
VERSION =
"1.7.7".freeze

Class Method Summary collapse

Class Method Details

.anchor_attributesObject

all element/attribute pairs that are ID anchors in Metanorma



108
109
110
111
112
# File 'lib/utils/xml.rb', line 108

def anchor_attributes
  [%w[* id], %w[* bibitemid], %w[review from],
   %w[review to], %w[index to], %w[xref target],
   %w[callout target], %w[location target]]
end

.anchor_or_uuid(node = nil) ⇒ Object



43
44
45
46
# File 'lib/utils/xml.rb', line 43

def anchor_or_uuid(node = nil)
  uuid = UUIDTools::UUID.random_create
  node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
end

.asciidoc_sub(text, flavour = :standoc) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
# File 'lib/utils/main.rb', line 20

def asciidoc_sub(text, flavour = :standoc)
  return nil if text.nil?
  return "" if text.empty?

  d = Asciidoctor::Document.new(
    text.lines.entries,
    { header_footer: false, backend: flavour },
  )
  b = d.parse.blocks.first
  b.apply_subs(b.source)
end

.attr_code(attributes) ⇒ Object



24
25
26
27
28
# File 'lib/utils/xml.rb', line 24

def attr_code(attributes)
  attributes.compact.transform_values do |v|
    v.is_a?(String) ? HTMLEntities.new.decode(v) : v
  end
end

.break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT) ⇒ Object

break on punct every LONGSTRING_THRESHOLD chars, with zero width space if punct fails, try break on camel case, with soft hyphen break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT, with soft hyphen



136
137
138
139
140
141
142
143
144
145
146
# File 'lib/utils/main.rb', line 136

def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
  /^\s*$/.match?(text) and return text
  text.split(/(?=(?:\s|-))/).map do |w|
    if /^\s*$/.match(text) || (w.size < threshold) then w
    else
      w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
        w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
      end.join
    end
  end.join
end

.break_up_long_str1(text, iteration, nopunct) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
# File 'lib/utils/main.rb', line 163

def break_up_long_str1(text, iteration, nopunct)
  s, separator = break_up_long_str2(text)
  if s.size == 1 # could not break up
    (iteration % nopunct).zero? and
      text += "\u00ad" # force soft hyphen
    text
  else
    s[-1] = "#{separator}#{s[-1]}"
    s.join
  end
end

.break_up_long_str2(text) ⇒ Object



175
176
177
178
179
180
181
182
183
# File 'lib/utils/main.rb', line 175

def break_up_long_str2(text)
  s = text.split(STR_BREAKUP_RE, -1)
  separator = "\u200b"
  if s.size == 1
    s = text.split(CAMEL_CASE_RE)
    separator = "\u00ad"
  end
  [s, separator]
end

.case_transform_xml(xml, kase) ⇒ Object



139
140
141
142
143
144
145
146
# File 'lib/utils/xml.rb', line 139

def case_transform_xml(xml, kase)
  x = Nokogiri::XML("<root>#{xml}</root>")
  x.traverse do |e|
    e.text? or next
    e.replace(e.text.send(kase))
  end
  x.root.children.to_xml
end

.create_namespace(xmldoc) ⇒ Object



21
22
23
# File 'lib/utils/namespace.rb', line 21

def create_namespace(xmldoc)
  Namespace.new(xmldoc)
end

.csv_split(text, delim = ";") ⇒ Object

, “ => ,” : CSV definition does not deal with space followed by quote at start of field



13
14
15
16
17
18
# File 'lib/utils/main.rb', line 13

def csv_split(text, delim = ";")
  text.nil? || text.empty? and return []
  CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""),
                 liberal_parsing: true,
                 col_sep: delim)&.compact&.map(&:strip)
end

.default_script(lang) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/utils/main.rb', line 109

def default_script(lang)
  case lang
  when "ar", "fa" then "Arab"
  when "ur" then "Aran"
  when "ru", "bg" then "Cyrl"
  when "hi" then "Deva"
  when "el" then "Grek"
  when "zh" then "Hans"
  when "ko" then "Kore"
  when "he" then "Hebr"
  when "ja" then "Jpan"
  else
    "Latn"
  end
end

.dl_to_attrs(elem, dlist, name) ⇒ Object

convert definition list term/value pair into Nokogiri XML attribute



115
116
117
118
119
# File 'lib/utils/xml.rb', line 115

def dl_to_attrs(elem, dlist, name)
  e = dlist.at("./dt[text()='#{name}']") or return
  val = e.at("./following::dd/p") || e.at("./following::dd") or return
  elem[name] = val.text
end

.dl_to_elems(ins, elem, dlist, name) ⇒ Object

convert definition list term/value pairs into Nokogiri XML elements



122
123
124
125
126
127
128
129
# File 'lib/utils/xml.rb', line 122

def dl_to_elems(ins, elem, dlist, name)
  a = elem.at("./#{name}[last()]")
  ins = a if a
  dlist.xpath("./dt[text()='#{name}']").each do |e|
    ins = dl_to_elems1(e, name, ins)
  end
  ins
end

.dl_to_elems1(term, name, ins) ⇒ Object



131
132
133
134
135
136
137
# File 'lib/utils/xml.rb', line 131

def dl_to_elems1(term, name, ins)
  v = term.at("./following::dd")
  e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
  v.name = name
  ins.next = v
  ins.next
end

.endash_date(elem) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/utils/main.rb', line 48

def endash_date(elem)
  elem.traverse do |n|
    next unless n.text?

    n.replace(n.text.gsub(/\s+--?\s+/, "&#8211;").gsub("--", "&#8211;"))
  end
end

.external_path(path) ⇒ Object



98
99
100
101
102
103
104
105
106
107
# File 'lib/utils/main.rb', line 98

def external_path(path)
  win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) ||
           (RUBY_PLATFORM =~ /mswin|mingw/))
  if win
    path.gsub!(%{/}, "\\")
    path[/\s/] ? "\"#{path}\"" : path
  else
    path
  end
end

.guid_anchor?(id) ⇒ Boolean



148
149
150
151
# File 'lib/utils/xml.rb', line 148

def guid_anchor?(id)
  /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
    .match?(id)
end

.localdir(node) ⇒ Object



32
33
34
35
# File 'lib/utils/main.rb', line 32

def localdir(node)
  docfile = node.attr("docfile")
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
end

.noko(script = "Latn", &block) ⇒ Object

block for processing XML document fragments as XHTML, to allow for HTMLentities Unescape special chars used in Asciidoctor substitution processing



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/utils/xml.rb', line 51

def noko(script = "Latn", &block)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  fragment = doc.fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  eoln = %w(Hans Hant Jpan).include?(script) ? "" : " "
  fragment.to_xml(encoding: "US-ASCII", indent: 0,
                  save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .lines.map do |l|
    l.gsub(/>\n$/, ">").gsub(/\n$/m, eoln).gsub("&#150;", "\u0096")
      .gsub("&#151;", "\u0097").gsub("&#x96;", "\u0096")
      .gsub("&#x97;", "\u0097")
  end
end

.noko_html(&block) ⇒ Object



65
66
67
68
69
70
71
72
73
74
# File 'lib/utils/xml.rb', line 65

def noko_html(&block)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  fragment = doc.fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  fragment.to_xml(encoding: "UTF-8", indent: 0,
                  save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .lines.map do |l|
    l.gsub(/\s*\n/, "")
  end
end

.ns(xpath) ⇒ Object



81
82
83
84
85
86
# File 'lib/utils/xml.rb', line 81

def ns(xpath)
  xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1")
    .gsub(%r{::([a-zA-z])}, "::xmlns:\\1")
    .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1")
    .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1")
end

.numeric_escapes(xml) ⇒ Object



88
89
90
91
92
93
94
95
96
# File 'lib/utils/xml.rb', line 88

def numeric_escapes(xml)
  c = HTMLEntities.new
  xml.split(/(&[^ \r\n\t#;]+;)/).map do |t|
    if /^(&[^ \t\r\n#;]+;)/.match?(t)
      c.encode(c.decode(t), :hexadecimal)
    else t
    end
  end.join
end

.rtl_script?(script) ⇒ Boolean



125
126
127
# File 'lib/utils/main.rb', line 125

def rtl_script?(script)
  %w(Arab Aran Hebr).include? script
end

.set_nested_value(hash, keys, new_val) ⇒ Object

Set hash value using keys path mod from stackoverflow.com/a/42425884



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/utils/main.rb', line 58

def set_nested_value(hash, keys, new_val)
  key = keys[0]
  if keys.length == 1
    hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val)
                else hash[key].nil? ? new_val : [hash[key], new_val]
                end
  elsif hash[key].is_a?(::Array)
    hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil?
    hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash)
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  elsif hash[key].nil? || hash[key].empty?
    hash[key] = {}
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]]
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif !hash[key][keys[1]]
    hash[key] = [hash[key], {}]
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  else
    set_nested_value(hash[key], keys[1..-1], new_val)
  end
  hash
end

.smartformat(text) ⇒ Object

TODO needs internationalisation



38
39
40
41
42
43
44
45
46
# File 'lib/utils/main.rb', line 38

def smartformat(text)
  HTMLEntities.new.encode(
    HTMLEntities.new.decode(
      text.gsub(/ --? /, "&#8201;&#8212;&#8201;")
      .gsub("--", "&#8212;"),
    )
      .smart_format, :basic
  )
end

.strict_capitalize_first(str) ⇒ Object



90
91
92
93
94
95
96
# File 'lib/utils/main.rb', line 90

def strict_capitalize_first(str)
  str.split(/ /).each_with_index.map do |w, i|
    letters = w.chars
    letters.first.upcase! if i.zero?
    letters.join
  end.join(" ")
end

.strict_capitalize_phrase(str) ⇒ Object



82
83
84
85
86
87
88
# File 'lib/utils/main.rb', line 82

def strict_capitalize_phrase(str)
  str.split(/ /).map do |w|
    letters = w.chars
    letters.first.upcase!
    letters.join
  end.join(" ")
end

.to_ncname(tag, asciionly: true) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/utils/xml.rb', line 30

def to_ncname(tag, asciionly: true)
  asciionly and tag = HTMLEntities.new.encode(tag, :basic,
                                              :hexadecimal)
  start = tag[0]
  ret1 = if %r([#{NAMECHAR}#])o.match?(start)
           "_"
         else
           (%r([#{NAMESTARTCHAR}#])o.match?(start) ? "_#{start}" : start)
         end
  ret2 = tag[1..-1] || ""
  (ret1 || "") + ret2.gsub(%r([#{NAMECHAR}#])o, "_")
end

.to_xhtml_fragment(xml) ⇒ Object



76
77
78
79
# File 'lib/utils/xml.rb', line 76

def to_xhtml_fragment(xml)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  doc.fragment(xml)
end

.wrap_in_para(node, out) ⇒ Object

if the contents of node are blocks, output them to out; else, wrap them in <p>



100
101
102
103
104
105
# File 'lib/utils/xml.rb', line 100

def wrap_in_para(node, out)
  if node.blocks? then out << node.content
  else
    out.p { |p| p << node.content }
  end
end