Class: Metanorma::Collection::FileLookup

Inherits:
Object
  • Object
show all
Defined in:
lib/metanorma/collection/filelookup/filelookup.rb,
lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, parent) ⇒ FileLookup

hash for each document in collection of document identifier to: document reference (fileref or id), type of document reference, and bibdata entry for that file



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 15

def initialize(path, parent)
  @c = HTMLEntities.new
  @files = {}
  @parent = parent
  @xml = parent.xml
  @isodoc = parent.isodoc
  @isodoc_presxml = parent.isodoc_presxml
  @path = path
  @compile = parent.compile
  @documents = parent.documents
  @files_to_delete = []
  @disambig = Util::DisambigFiles.new
  @manifest = parent.manifest
  read_files(@manifest.entry, parent.manifest)
  #warn pp @files
end

Instance Attribute Details

#files_to_deleteObject

Returns the value of attribute files_to_delete.



9
10
11
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 9

def files_to_delete
  @files_to_delete
end

#parentObject

Returns the value of attribute parent.



9
10
11
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 9

def parent
  @parent
end

Instance Method Details

#add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry) ⇒ Object



60
61
62
63
64
65
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 60

def add_cover_one_doc_coll(manifest, sectionsplit_manifest, key, entry)
  idx = File.join(File.dirname(sectionsplit_manifest.file), "index.html")
  FileUtils.cp entry[:ref], idx
  manifest["#{key}:index1.html"] =
    entry.merge(out_path: "index.html", ref: idx)
end

#add_document_suffix(identifier, doc) ⇒ Object



133
134
135
136
137
138
139
140
141
142
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 133

def add_document_suffix(identifier, doc)
  document_suffix = Metanorma::Utils::to_ncname(identifier)
  Util::anchor_id_attributes.each do |(tag_name, attr_name)|
    Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name,
                              @isodoc)
  end
  url_in_css_styles(doc, document_suffix)
  doc.root["document_suffix"] ||= ""
  doc.root["document_suffix"] += document_suffix
end

#add_section_splitObject



7
8
9
10
11
12
13
14
15
16
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 7

def add_section_split
  ret = @files.keys.each_with_object({}) do |k, m|
    if @files[k][:sectionsplit] && !@files[k][:attachment]
      process_section_split_instance(k, m)
      cleanup_section_split_instance(k, m)
    end
    m[k] = @files[k]
  end
  @files = ret
end

#add_section_split_attachments(manifest, ident) ⇒ Object



77
78
79
80
81
82
83
84
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 77

def add_section_split_attachments(manifest, ident)
  attachments = @sectionsplit
    .section_split_attachments(out: File.dirname(manifest.file))
  attachments or return
  @files[ident][:out_path] = attachments
  { attachment: true, index: false, out_path: attachments,
    ref: File.join(File.dirname(manifest.file), attachments) }
end

#add_section_split_cover(manifest, sectionsplit_manifest, ident) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 47

def add_section_split_cover(manifest, sectionsplit_manifest, ident)
  cover = @sectionsplit
    .section_split_cover(sectionsplit_manifest,
                         @parent.dir_name_cleanse(ident),
                         one_doc_collection?)
  @files[ident][:out_path] = cover
  src = File.join(File.dirname(sectionsplit_manifest.file), cover)
  m = { attachment: true, index: false, out_path: cover, ref: src }
  manifest["#{ident}:index.html"] = m
  one_doc_collection? and
    add_cover_one_doc_coll(manifest, sectionsplit_manifest, ident, m)
end

#add_section_split_instance(file, manifest, key, idx) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 86

def add_section_split_instance(file, manifest, key, idx)
  presfile, newkey, xml = add_section_split_instance_prep(file, key)
  anchors = read_anchors(xml)
  m = { parentid: key, presentationxml: true, type: "fileref",
        rel_path: file[:url], out_path: File.basename(file[:url]),
        anchors: anchors, anchors_lookup: anchors_lookup(anchors),
        ids: read_ids(xml), format: @files[key][:format],
        sectionsplit_output: true, indirect_key: @sectionsplit.key,
        bibdata: @files[key][:bibdata], ref: presfile }
  m[:bare] = true unless idx.zero?
  manifest[newkey] = m
  @files_to_delete << file[:url]
end

#add_section_split_instance_prep(file, key) ⇒ Object



100
101
102
103
104
105
106
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 100

def add_section_split_instance_prep(file, key)
  presfile = File.join(File.dirname(@files[key][:ref]),
                       File.basename(file[:url]))
  newkey = key("#{key.strip} #{file[:title]}")
  xml = Nokogiri::XML(File.read(presfile), &:huge)
  [presfile, newkey, xml]
end

#anchors_lookup(anchors) ⇒ Object



78
79
80
81
82
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 78

def anchors_lookup(anchors)
  anchors.values.each_with_object({}) do |v, m|
    v.each_value { |v1| m[v1] = true }
  end
end

#bibdata_extract(xml) ⇒ Object



84
85
86
87
88
89
90
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 84

def bibdata_extract(xml)
  anchors = read_anchors(xml)
  { anchors: anchors, anchors_lookup: anchors_lookup(anchors),
    ids: read_ids(xml),
    bibdata: xml.at(ns("//bibdata")),
    document_suffix: xml.root["document_suffix"] }
end

#bibdata_process(entry, ident) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 66

def bibdata_process(entry, ident)
  if entry[:attachment]
    entry[:bibdata] =
      Metanorma::Collection::Document.attachment_bibitem(ident).root
  else
    file, _filename = targetfile(entry, read: true)
    xml = Nokogiri::XML(file, &:huge)
    add_document_suffix(ident, xml)
    entry.merge!(bibdata_extract(xml))
  end
end

#bibitem_process(entry) ⇒ Object



92
93
94
95
96
97
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 92

def bibitem_process(entry)
  entry[:bibitem] = entry[:bibdata].dup
  entry[:bibitem].name = "bibitem"
  entry[:bibitem]["hidden"] = "true"
  entry[:bibitem].at("./*[local-name() = 'ext']")&.remove
end

#cleanup_section_split_instance(key, manifest) ⇒ Object



41
42
43
44
45
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 41

def cleanup_section_split_instance(key, manifest)
  @files_to_delete << manifest["#{key}:index.html"][:ref]
  # @files[key].delete(:ids).delete(:anchors)
  @files[key][:indirect_key] = @sectionsplit.key
end

#derive_format(entry, parent) ⇒ Object



40
41
42
43
44
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 40

def derive_format(entry, parent)
  entry.attachment and return
  entry.format ||= parent.format || %w(xml presentation html)
  entry.format |= ["xml", "presentation"]
end

#eachObject



258
259
260
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 258

def each
  @files.each
end

#each_with_indexObject



262
263
264
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 262

def each_with_index
  @files.each_with_index
end

#file_entry(ref, identifier) ⇒ Object

ref is the absolute source file address rel_path is the relative source file address, relative to the YAML location out_path is the destination file address, with any references outside the working directory (../../…) truncated, and based on relative path identifier is the id with only spaces, no nbsp



104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 104

def file_entry(ref, identifier)
  ref.file or return
  abs = @documents[Util::key identifier].file
  ret = if ref.file
          { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
            out_path: output_file_path(ref),
            format: ref.format&.map(&:to_sym) }.compact
        else { type: "id", ref: ref.id }
        end
  file_entry_copy(ref, ret)
  ret.compact
end

#file_entry_copy(ref, ret) ⇒ Object



125
126
127
128
129
130
131
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 125

def file_entry_copy(ref, ret)
  %w(attachment sectionsplit index presentation-xml url
     bare-after-first).each do |s|
    ref.respond_to?(s.to_sym) and
      ret[s.delete("-").to_sym] = ref.send(s)
  end
end

#get(ident, attr = nil) ⇒ Object



248
249
250
251
252
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 248

def get(ident, attr = nil)
  if attr then @files[key(ident)][attr]
  else @files[key(ident)]
  end
end

#key(ident) ⇒ Object



239
240
241
242
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 239

def key(ident)
  @c.decode(ident).gsub(/(\p{Zs})+/, " ")
    .sub(/^metanorma-collection /, "")
end

#keysObject



244
245
246
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 244

def keys
  @files.keys
end

#ns(xpath) ⇒ Object



266
267
268
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 266

def ns(xpath)
  @isodoc.ns(xpath)
end

#one_doc_collection?Boolean



67
68
69
70
71
72
73
74
75
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 67

def one_doc_collection?
  docs = 0
  @files.each_value do |v|
    v[:attachment] and next
    v[:presentationxml] and next
    docs += 1
  end
  docs <= 1
end

#output_file_path(ref) ⇒ Object

TODO make the output file location reflect source location universally, not just for attachments: no File.basename



119
120
121
122
123
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 119

def output_file_path(ref)
  f = File.basename(ref.file)
  ref.attachment and f = ref.file
  @disambig.source2dest_filename(f)
end

#process_section_split_instance(key, manifest) ⇒ Object



18
19
20
21
22
23
24
25
26
27
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 18

def process_section_split_instance(key, manifest)
  s, sectionsplit_manifest = sectionsplit(key)
  # section_split_instance_threads(s, manifest, key)
  s.each_with_index do |f1, i|
    add_section_split_instance(f1, manifest, key, i)
  end
  a = add_section_split_attachments(sectionsplit_manifest, key) and
    manifest["#{key}:attachments"] = a
  add_section_split_cover(manifest, sectionsplit_manifest, key)
end

#read_anchors(xml) ⇒ Object

map locality type and label (e.g. “clause” “1”) to id = anchor for a document Note: will only key clauses, which have unambiguous reference label in locality. Notes, examples etc with containers are just plunked against UUIDs, so that their IDs can at least be registered to be tracked as existing.



209
210
211
212
213
214
215
216
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 209

def read_anchors(xml)
  xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n,
                            { locale: @locale })
  xrefs.parse xml
  xrefs.get.each_with_object({}) do |(k, v), ret|
    read_anchors1(k, v, ret)
  end
end

#read_anchors1(key, val, ret) ⇒ Object



218
219
220
221
222
223
224
225
226
227
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 218

def read_anchors1(key, val, ret)
  val[:type] ||= "clause"
  ret[val[:type]] ||= {}
  index = if val[:container] || val[:label].nil? || val[:label].empty?
            UUIDTools::UUID.random_create.to_s
          else val[:label].gsub(%r{<[^>]+>}, "")
          end
  ret[val[:type]][index] = key
  v = val[:value] and ret[val[:type]][v.gsub(%r{<[^>]+>}, "")] = key
end

#read_file(manifest) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 46

def read_file(manifest)
  i, k = read_file_idents(manifest)
  entry = file_entry(manifest, k) or return
  bibdata_process(entry, i)
  bibitem_process(entry)
  @files[key(i)] = entry
end

#read_file_idents(manifest) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 54

def read_file_idents(manifest)
  id = manifest.identifier
  sanitised_id = key(@isodoc.docid_prefix("", manifest.identifier.dup))
  #       if manifest.bibdata and # NO, DO NOT FISH FOR THE GENUINE IDENTIFIER IN BIBDATA
  #         d = manifest.bibdata.docidentifier.detect { |x| x.primary } ||
  #           manifest.bibdata.docidentifier.first
  #         k = d.id
  #         i = key(@isodoc.docid_prefix(d.type, d.id.dup))
  #       end
  [id, sanitised_id]
end

#read_files(entries, parent) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 32

def read_files(entries, parent)
  Array(entries).each do |e|
    derive_format(e, parent)
    e.file and read_file(e)
    read_files(e.entry, e)
  end
end

#read_ids(xml) ⇒ Object

Also parse all ids in doc (including ones which won’t be xref targets)



230
231
232
233
234
235
236
237
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 230

def read_ids(xml)
  ret = {}
  xml.traverse do |x|
    x.text? and next
    x["id"] and ret[x["id"]] = true
  end
  ret
end

#ref_file(ref, out, read, doc) ⇒ Object



190
191
192
193
194
195
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 190

def ref_file(ref, out, read, doc)
  file = File.read(ref, encoding: "utf-8") if read
  filename = out.dup
  filename.sub!(/\.xml$/, ".html") if doc
  [file, filename]
end

#section_split_instance_threads(s, manifest, key) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 29

def section_split_instance_threads(s, manifest, key)
  @mutex = Mutex.new
  pool = Concurrent::FixedThreadPool.new(4)
  s.each_with_index do |f1, i|
    pool.post do
      add_section_split_instance(f1, manifest, key, i)
    end
  end
  pool.shutdown
  pool.wait_for_termination
end

#sectionsplit(ident) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb', line 108

def sectionsplit(ident)
  file = @files[ident][:ref]
  @sectionsplit = ::Metanorma::Collection::Sectionsplit
    .new(input: file, base: @files[ident][:out_path],
         dir: File.dirname(file), output: @files[ident][:out_path],
         compile_opts: @parent.compile_options, ident: ident,
         fileslookup: self, isodoc: @isodoc,
         isodoc_presxml: @isodoc_presxml,
         document_suffix: @files[ident][:document_suffix])
  coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] }
  xml = Nokogiri::XML(File.read(file, encoding: "UTF-8"), &:huge)
  [coll, @sectionsplit
    .collection_manifest(File.basename(file), coll, xml, nil,
                         File.dirname(file))]
end

#set(ident, attr, value) ⇒ Object



254
255
256
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 254

def set(ident, attr, value)
  @files[key(ident)][attr] = value
end

#targetfile(data, options) ⇒ Array<String, nil>

return file contents + output filename for each file in the collection, given a docref entry so my URL should end with html or pdf or whatever formed relative to YAML file, not input path, relative to calling function



176
177
178
179
180
181
182
183
184
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 176

def targetfile(data, options)
  options = { read: false, doc: true, relative: false }.merge(options)
  path = options[:relative] ? data[:rel_path] : data[:ref]
  if data[:type] == "fileref"
    ref_file path, data[:out_path], options[:read], options[:doc]
  else
    xml_file data[:id], options[:read]
  end
end

#targetfile_id(ident, options) ⇒ Object



186
187
188
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 186

def targetfile_id(ident, options)
  targetfile(get(ident), options)
end

#url(ident, options) ⇒ Object

return citation url for file so my URL should end with html or pdf or whatever



155
156
157
158
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 155

def url(ident, options)
  data = get(ident)
  data[:url] || targetfile(data, options)[1]
end

#url?(ident) ⇒ Boolean

are references to the file to be linked to a file in the collection, or externally? Determines whether file suffix anchors are to be used



162
163
164
165
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 162

def url?(ident)
  data = get(ident) or return false
  data[:url]
end

#url_in_css_styles(doc, document_suffix) ⇒ Object

update relative URLs, url(#…), in CSS in @style attrs (including SVG)



145
146
147
148
149
150
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 145

def url_in_css_styles(doc, document_suffix)
  doc.xpath("//*[@style]").each do |s|
    s["style"] = s["style"]
      .gsub(%r{url\(#([^()]+)\)}, "url(#\\1_#{document_suffix})")
  end
end

#xml_file(id, read) ⇒ Object



197
198
199
200
201
# File 'lib/metanorma/collection/filelookup/filelookup.rb', line 197

def xml_file(id, read)
  file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
  filename = "#{id}.html"
  [file, filename]
end