Module: Metanorma::Utils

Defined in:: lib/utils/image.rb,
lib/utils/log.rb,
lib/utils/xml.rb,
lib/utils/main.rb,
lib/utils/version.rb,
lib/utils/namespace.rb,
lib/utils/hash_transform_keys.rb,
lib/utils/hash_transform_keys.rb

Overview

Image methods were moved to the Vectory gem

Defined Under Namespace

Modules: Array, Hash Classes: Log, Namespace

Constant Summary collapse

NAMECHAR =

"\u0000-\u002c\u002f\u003a-\u0040\\u005b-\u005e" \
"\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e" \
"\u2000-\u200b" \
"\u200e-\u203e\u2041-\u206f\u2190-\u2bff\u2ff0-\u3000".freeze

NAMESTARTCHAR =

"\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f" \
"\u203f-\u2040".freeze

NOKOHEAD =

"<!DOCTYPE html SYSTEM\n\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head> <title></title> <meta charset=\"UTF-8\" /> </head>\n<body> </body> </html>\n".freeze

LONGSTR_THRESHOLD =

LONGSTR_NOPUNCT =

STR_BREAKUP_RE =

%r{
 (?<=[=_—–\u2009→?+;]) | # break after any of these
 (?<=[,.:])(?!\d) | # break on punct only if not preceding digit
 (?<=[>])(?![>]) | # > not >->
 (?<=[\]])(?![\]]) | # ] not ]-]
 (?<=//) | # //
 (?<=[/])(?![/]) | # / not /-/
 (?<![<])(?=[<]) | # < not <-<
 (?<=\p{L})(?=[(\{\[]\p{L}) # letter and bracket, followed by letter
}x.freeze

CAMEL_CASE_RE =

%r{
  (?<=\p{Ll}\p{Ll})(?=\p{Lu}\p{Ll}\p{Ll}) # 2 lowerc / upperc, 2 lowerc
}x.freeze

VERSION =

"1.7.7".freeze

Class Method Summary collapse

.anchor_attributes ⇒ Object

all element/attribute pairs that are ID anchors in Metanorma.
.anchor_or_uuid(node = nil) ⇒ Object
.asciidoc_sub(text, flavour = :standoc) ⇒ Object
.attr_code(attributes) ⇒ Object
.break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT) ⇒ Object

break on punct every LONGSTRING_THRESHOLD chars, with zero width space if punct fails, try break on camel case, with soft hyphen break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT, with soft hyphen.
.break_up_long_str1(text, iteration, nopunct) ⇒ Object
.break_up_long_str2(text) ⇒ Object
.case_transform_xml(xml, kase) ⇒ Object
.create_namespace(xmldoc) ⇒ Object
.csv_split(text, delim = ";") ⇒ Object

, “ => ,” : CSV definition does not deal with space followed by quote at start of field.
.default_script(lang) ⇒ Object
.dl_to_attrs(elem, dlist, name) ⇒ Object

convert definition list term/value pair into Nokogiri XML attribute.
.dl_to_elems(ins, elem, dlist, name) ⇒ Object

convert definition list term/value pairs into Nokogiri XML elements.
.dl_to_elems1(term, name, ins) ⇒ Object
.endash_date(elem) ⇒ Object
.external_path(path) ⇒ Object
.guid_anchor?(id) ⇒ Boolean
.localdir(node) ⇒ Object
.noko(script = "Latn", &block) ⇒ Object

block for processing XML document fragments as XHTML, to allow for HTMLentities Unescape special chars used in Asciidoctor substitution processing.
.noko_html(&block) ⇒ Object
.ns(xpath) ⇒ Object
.numeric_escapes(xml) ⇒ Object
.rtl_script?(script) ⇒ Boolean
.set_nested_value(hash, keys, new_val) ⇒ Object

Set hash value using keys path mod from stackoverflow.com/a/42425884.
.smartformat(text) ⇒ Object

TODO needs internationalisation.
.strict_capitalize_first(str) ⇒ Object
.strict_capitalize_phrase(str) ⇒ Object
.to_ncname(tag, asciionly: true) ⇒ Object
.to_xhtml_fragment(xml) ⇒ Object
.wrap_in_para(node, out) ⇒ Object

if the contents of node are blocks, output them to out; else, wrap them in <p>.

Class Method Details

.anchor_attributes ⇒ `Object`

all element/attribute pairs that are ID anchors in Metanorma

# File 'lib/utils/xml.rb', line 108

def anchor_attributes
  [%w[* id], %w[* bibitemid], %w[review from],
   %w[review to], %w[index to], %w[xref target],
   %w[callout target], %w[location target]]
end

.anchor_or_uuid(node = nil) ⇒ `Object`

# File 'lib/utils/xml.rb', line 43

def anchor_or_uuid(node = nil)
  uuid = UUIDTools::UUID.random_create
  node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
end

.asciidoc_sub(text, flavour = :standoc) ⇒ `Object`

# File 'lib/utils/main.rb', line 20

def asciidoc_sub(text, flavour = :standoc)
  return nil if text.nil?
  return "" if text.empty?

  d = Asciidoctor::Document.new(
    text.lines.entries,
    { header_footer: false, backend: flavour },
  )
  b = d.parse.blocks.first
  b.apply_subs(b.source)
end

.attr_code(attributes) ⇒ `Object`

# File 'lib/utils/xml.rb', line 24

def attr_code(attributes)
  attributes.compact.transform_values do |v|
    v.is_a?(String) ? HTMLEntities.new.decode(v) : v
  end
end

.break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT) ⇒ `Object`

break on punct every LONGSTRING_THRESHOLD chars, with zero width space if punct fails, try break on camel case, with soft hyphen break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT, with soft hyphen

# File 'lib/utils/main.rb', line 136

def break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT)
  /^\s*$/.match?(text) and return text
  text.split(/(?=(?:\s|-))/).map do |w|
    if /^\s*$/.match(text) || (w.size < threshold) then w
    else
      w.scan(/.{,#{threshold}}/o).map.with_index do |w1, i|
        w1.size < threshold ? w1 : break_up_long_str1(w1, i + 1, nopunct)
      end.join
    end
  end.join
end

.break_up_long_str1(text, iteration, nopunct) ⇒ `Object`

# File 'lib/utils/main.rb', line 163

def break_up_long_str1(text, iteration, nopunct)
  s, separator = break_up_long_str2(text)
  if s.size == 1 # could not break up
    (iteration % nopunct).zero? and
      text += "\u00ad" # force soft hyphen
    text
  else
    s[-1] = "#{separator}#{s[-1]}"
    s.join
  end
end

.break_up_long_str2(text) ⇒ `Object`

# File 'lib/utils/main.rb', line 175

def break_up_long_str2(text)
  s = text.split(STR_BREAKUP_RE, -1)
  separator = "\u200b"
  if s.size == 1
    s = text.split(CAMEL_CASE_RE)
    separator = "\u00ad"
  end
  [s, separator]
end

.case_transform_xml(xml, kase) ⇒ `Object`

# File 'lib/utils/xml.rb', line 139

def case_transform_xml(xml, kase)
  x = Nokogiri::XML("<root>#{xml}</root>")
  x.traverse do |e|
    e.text? or next
    e.replace(e.text.send(kase))
  end
  x.root.children.to_xml
end

.create_namespace(xmldoc) ⇒ `Object`



21
22
23

# File 'lib/utils/namespace.rb', line 21

def create_namespace(xmldoc)
  Namespace.new(xmldoc)
end

.csv_split(text, delim = ";") ⇒ `Object`

, “ => ,” : CSV definition does not deal with space followed by quote at start of field

# File 'lib/utils/main.rb', line 13

def csv_split(text, delim = ";")
  text.nil? || text.empty? and return []
  CSV.parse_line(text.gsub(/#{delim} "(?!")/, "#{delim}\""),
                 liberal_parsing: true,
                 col_sep: delim)&.compact&.map(&:strip)
end

.default_script(lang) ⇒ `Object`

# File 'lib/utils/main.rb', line 109

def default_script(lang)
  case lang
  when "ar", "fa" then "Arab"
  when "ur" then "Aran"
  when "ru", "bg" then "Cyrl"
  when "hi" then "Deva"
  when "el" then "Grek"
  when "zh" then "Hans"
  when "ko" then "Kore"
  when "he" then "Hebr"
  when "ja" then "Jpan"
  else
    "Latn"
  end
end

.dl_to_attrs(elem, dlist, name) ⇒ `Object`

convert definition list term/value pair into Nokogiri XML attribute

# File 'lib/utils/xml.rb', line 115

def dl_to_attrs(elem, dlist, name)
  e = dlist.at("./dt[text()='#{name}']") or return
  val = e.at("./following::dd/p") || e.at("./following::dd") or return
  elem[name] = val.text
end

.dl_to_elems(ins, elem, dlist, name) ⇒ `Object`

convert definition list term/value pairs into Nokogiri XML elements

# File 'lib/utils/xml.rb', line 122

def dl_to_elems(ins, elem, dlist, name)
  a = elem.at("./#{name}[last()]")
  ins = a if a
  dlist.xpath("./dt[text()='#{name}']").each do |e|
    ins = dl_to_elems1(e, name, ins)
  end
  ins
end

.dl_to_elems1(term, name, ins) ⇒ `Object`

# File 'lib/utils/xml.rb', line 131

def dl_to_elems1(term, name, ins)
  v = term.at("./following::dd")
  e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first
  v.name = name
  ins.next = v
  ins.next
end

.endash_date(elem) ⇒ `Object`

# File 'lib/utils/main.rb', line 48

def endash_date(elem)
  elem.traverse do |n|
    next unless n.text?

    n.replace(n.text.gsub(/\s+--?\s+/, "&#8211;").gsub("--", "&#8211;"))
  end
end

.external_path(path) ⇒ `Object`

# File 'lib/utils/main.rb', line 98

def external_path(path)
  win = !!((RUBY_PLATFORM =~ /(win|w)(32|64)$/) ||
           (RUBY_PLATFORM =~ /mswin|mingw/))
  if win
    path.gsub!(%{/}, "\\")
    path[/\s/] ? "\"#{path}\"" : path
  else
    path
  end
end

.guid_anchor?(id) ⇒ `Boolean`

# File 'lib/utils/xml.rb', line 148

def guid_anchor?(id)
  /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i
    .match?(id)
end

.localdir(node) ⇒ `Object`

# File 'lib/utils/main.rb', line 32

def localdir(node)
  docfile = node.attr("docfile")
  docfile.nil? ? "./" : "#{Pathname.new(docfile).parent}/"
end

.noko(script = "Latn", &block) ⇒ `Object`

block for processing XML document fragments as XHTML, to allow for HTMLentities Unescape special chars used in Asciidoctor substitution processing

# File 'lib/utils/xml.rb', line 51

def noko(script = "Latn", &block)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  fragment = doc.fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  eoln = %w(Hans Hant Jpan).include?(script) ? "" : " "
  fragment.to_xml(encoding: "US-ASCII", indent: 0,
                  save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .lines.map do |l|
    l.gsub(/>\n$/, ">").gsub(/\n$/m, eoln).gsub("&#150;", "\u0096")
      .gsub("&#151;", "\u0097").gsub("&#x96;", "\u0096")
      .gsub("&#x97;", "\u0097")
  end
end

.noko_html(&block) ⇒ `Object`

# File 'lib/utils/xml.rb', line 65

def noko_html(&block)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  fragment = doc.fragment("")
  ::Nokogiri::XML::Builder.with fragment, &block
  fragment.to_xml(encoding: "UTF-8", indent: 0,
                  save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
    .lines.map do |l|
    l.gsub(/\s*\n/, "")
  end
end

.ns(xpath) ⇒ `Object`

# File 'lib/utils/xml.rb', line 81

def ns(xpath)
  xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1")
    .gsub(%r{::([a-zA-z])}, "::xmlns:\\1")
    .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1")
    .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1")
end

.numeric_escapes(xml) ⇒ `Object`

# File 'lib/utils/xml.rb', line 88

def numeric_escapes(xml)
  c = HTMLEntities.new
  xml.split(/(&[^ \r\n\t#;]+;)/).map do |t|
    if /^(&[^ \t\r\n#;]+;)/.match?(t)
      c.encode(c.decode(t), :hexadecimal)
    else t
    end
  end.join
end

.rtl_script?(script) ⇒ `Boolean`



125
126
127

# File 'lib/utils/main.rb', line 125

def rtl_script?(script)
  %w(Arab Aran Hebr).include? script
end

.set_nested_value(hash, keys, new_val) ⇒ `Object`

Set hash value using keys path mod from stackoverflow.com/a/42425884

# File 'lib/utils/main.rb', line 58

def set_nested_value(hash, keys, new_val)
  key = keys[0]
  if keys.length == 1
    hash[key] = if hash[key].is_a?(::Array) then (hash[key] << new_val)
                else hash[key].nil? ? new_val : [hash[key], new_val]
                end
  elsif hash[key].is_a?(::Array)
    hash[key][-1] = {} if !hash[key].empty? && hash[key][-1].nil?
    hash[key] << {} if hash[key].empty? || !hash[key][-1].is_a?(::Hash)
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  elsif hash[key].nil? || hash[key].empty?
    hash[key] = {}
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif hash[key].is_a?(::Hash) && !hash[key][keys[1]]
    set_nested_value(hash[key], keys[1..-1], new_val)
  elsif !hash[key][keys[1]]
    hash[key] = [hash[key], {}]
    set_nested_value(hash[key][-1], keys[1..-1], new_val)
  else
    set_nested_value(hash[key], keys[1..-1], new_val)
  end
  hash
end

.smartformat(text) ⇒ `Object`

TODO needs internationalisation

# File 'lib/utils/main.rb', line 38

def smartformat(text)
  HTMLEntities.new.encode(
    HTMLEntities.new.decode(
      text.gsub(/ --? /, "&#8201;&#8212;&#8201;")
      .gsub("--", "&#8212;"),
    )
      .smart_format, :basic
  )
end

.strict_capitalize_first(str) ⇒ `Object`

# File 'lib/utils/main.rb', line 90

def strict_capitalize_first(str)
  str.split(/ /).each_with_index.map do |w, i|
    letters = w.chars
    letters.first.upcase! if i.zero?
    letters.join
  end.join(" ")
end

.strict_capitalize_phrase(str) ⇒ `Object`

# File 'lib/utils/main.rb', line 82

def strict_capitalize_phrase(str)
  str.split(/ /).map do |w|
    letters = w.chars
    letters.first.upcase!
    letters.join
  end.join(" ")
end

.to_ncname(tag, asciionly: true) ⇒ `Object`

# File 'lib/utils/xml.rb', line 30

def to_ncname(tag, asciionly: true)
  asciionly and tag = HTMLEntities.new.encode(tag, :basic,
                                              :hexadecimal)
  start = tag[0]
  ret1 = if %r([#{NAMECHAR}#])o.match?(start)
           "_"
         else
           (%r([#{NAMESTARTCHAR}#])o.match?(start) ? "_#{start}" : start)
         end
  ret2 = tag[1..-1] || ""
  (ret1 || "") + ret2.gsub(%r([#{NAMECHAR}#])o, "_")
end

.to_xhtml_fragment(xml) ⇒ `Object`

# File 'lib/utils/xml.rb', line 76

def to_xhtml_fragment(xml)
  doc = ::Nokogiri::XML.parse(NOKOHEAD)
  doc.fragment(xml)
end

.wrap_in_para(node, out) ⇒ `Object`

if the contents of node are blocks, output them to out; else, wrap them in <p>

# File 'lib/utils/xml.rb', line 100

def wrap_in_para(node, out)
  if node.blocks? then out << node.content
  else
    out.p { |p| p << node.content }
  end
end

Module: Metanorma::Utils

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.anchor_attributes ⇒ Object

.anchor_or_uuid(node = nil) ⇒ Object

.asciidoc_sub(text, flavour = :standoc) ⇒ Object

.attr_code(attributes) ⇒ Object

.break_up_long_str(text, threshold = LONGSTR_THRESHOLD, nopunct = LONGSTR_NOPUNCT) ⇒ Object

.break_up_long_str1(text, iteration, nopunct) ⇒ Object

.break_up_long_str2(text) ⇒ Object

.case_transform_xml(xml, kase) ⇒ Object

.create_namespace(xmldoc) ⇒ Object

.csv_split(text, delim = ";") ⇒ Object

.default_script(lang) ⇒ Object

.dl_to_attrs(elem, dlist, name) ⇒ Object

.dl_to_elems(ins, elem, dlist, name) ⇒ Object

.dl_to_elems1(term, name, ins) ⇒ Object

.endash_date(elem) ⇒ Object

.external_path(path) ⇒ Object

.guid_anchor?(id) ⇒ Boolean

.localdir(node) ⇒ Object

.noko(script = "Latn", &block) ⇒ Object

.noko_html(&block) ⇒ Object

.ns(xpath) ⇒ Object

.numeric_escapes(xml) ⇒ Object

.rtl_script?(script) ⇒ Boolean

.set_nested_value(hash, keys, new_val) ⇒ Object

.smartformat(text) ⇒ Object

.strict_capitalize_first(str) ⇒ Object

.strict_capitalize_phrase(str) ⇒ Object

.to_ncname(tag, asciionly: true) ⇒ Object

.to_xhtml_fragment(xml) ⇒ Object

.wrap_in_para(node, out) ⇒ Object