Class: Metanorma::Compile

Inherits:
Object
  • Object
show all
Includes:
CompileOptions, Flavor, Validator, Writeable
Defined in:
lib/metanorma/compile/flavor.rb,
lib/metanorma/compile/compile.rb,
lib/metanorma/compile/extract.rb,
lib/metanorma/compile/validator.rb,
lib/metanorma/compile/writeable.rb,
lib/metanorma/compile/relaton_drop.rb,
lib/metanorma/compile/compile_options.rb,
lib/metanorma/compile/output_filename.rb,
lib/metanorma/compile/output_filename_config.rb

Defined Under Namespace

Modules: CompileOptions, Extract, Flavor, Validator, Writeable Classes: OutputFilename, OutputFilenameConfig, RelatonDrop

Constant Summary collapse

DEFAULT_NUM_WORKERS =
3

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Writeable

#export_output

Methods included from Flavor

#load_flavor, #stdtype2flavor

Methods included from CompileOptions

#extract_extensions, #extract_options, #extract_xml_options, #font_install, #get_extensions, #require_libraries

Methods included from Validator

#validate_format!, #validate_options!, #validate_type!

Constructor Details

#initializeCompile

Returns a new instance of Compile.



35
36
37
38
39
40
41
# File 'lib/metanorma/compile/compile.rb', line 35

def initialize
  @registry = Metanorma::Registry.instance
  @errors = []
  @isodoc = IsoDoc::Convert.new({})
  @fontist_installed = false
  @log = Metanorma::Utils::Log.new
end

Instance Attribute Details

#errorsArray<String> (readonly)

Returns:



32
33
34
# File 'lib/metanorma/compile/compile.rb', line 32

def errors
  @errors
end

#processorObject (readonly)

Returns the value of attribute processor.



33
34
35
# File 'lib/metanorma/compile/compile.rb', line 33

def processor
  @processor
end

Instance Method Details

#clean_exit(options) ⇒ Object



89
90
91
92
# File 'lib/metanorma/compile/compile.rb', line 89

def clean_exit(options)
  options[:novalid] and return
  @log.write
end

#compile(filename, options = {}) ⇒ Object

Main compile method that orchestrates the document conversion process

Parameters:

  • filename (String)

    path to the input file

  • options (Hash) (defaults to: {})

    compilation options



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/metanorma/compile/compile.rb', line 46

def compile(filename, options = {})
  process_options!(filename, options)
  @processor = @registry.find_processor(options[:type].to_sym)

  # Step 1: Generate Semantic XML
  semantic_result = generate_semantic_xml(filename, options)
  return nil unless semantic_result

  source_file, semantic_xml = semantic_result

  # Step 2: Prepare output paths
  xml = Nokogiri::XML(semantic_xml, &:huge)
  bibdata = (xml)
  output_paths = prepare_output_paths(filename, bibdata, options)

  # Step 3: Determine which output formats to generate
  extensions = get_extensions(options)
  return nil unless extensions

  # Step 4: Extract information from Semantic XML if requested
  extract_information(semantic_xml, bibdata, options)

  # Step 5: Generate output formats from Semantic XML
  generate_outputs(
    source_file,
    semantic_xml,
    bibdata,
    extensions,
    output_paths,
    options,
  )
ensure
  clean_exit(options)
end

#export_relaton_from_bibdata(bibdata, options) ⇒ Object

Export given bibliographic data to Relaton XML on disk

Parameters:

  • bibdata (Nokogiri::XML::Element)

    the bibliographic data element

  • options (Hash)

    compilation options



242
243
244
245
246
247
248
# File 'lib/metanorma/compile/compile.rb', line 242

def export_relaton_from_bibdata(bibdata, options)
  return unless options[:relaton]

  # docid = bibdata&.at("./xmlns:docidentifier")&.text || options[:filename]
  # outname = docid.sub(/^\s+/, "").sub(/\s+$/, "").gsub(/\s+/, "-") + ".xml"
  export_output(options[:relaton], bibdata.to_xml)
end

#extract_information(semantic_xml, bibdata, options) ⇒ Object

Step 4: Extract information from Semantic XML

Parameters:

  • semantic_xml (String)

    semantic XML content

  • options (Hash)

    compilation options



141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/metanorma/compile/compile.rb', line 141

def extract_information(semantic_xml, bibdata, options)
  # Extract Relaton bibliographic data
  export_relaton_from_bibdata(bibdata, options) if options[:relaton]

  # Extract other components (sourcecode, images, requirements)
  if options[:extract]
    Extract.extract(
      semantic_xml,
      options[:extract],
      options[:extract_type],
    )
  end
end

#extract_relaton_metadata(xml) ⇒ Nokogiri::XML::Element

Returns the bibliographic data element.

Parameters:

  • xml (Nokogiri::XML::Document)

    the XML document

Returns:

  • (Nokogiri::XML::Element)

    the bibliographic data element



252
253
254
# File 'lib/metanorma/compile/compile.rb', line 252

def (xml)
  xml.at("//bibdata") || xml.at("//xmlns:bibdata")
end

#gather_and_install_fonts(source_file, options, extensions) ⇒ Object

isodoc is Raw Metanorma XML



267
268
269
270
271
272
# File 'lib/metanorma/compile/compile.rb', line 267

def gather_and_install_fonts(source_file, options, extensions)
  Util.sort_extensions_execution(extensions).each do |ext|
    isodoc_options = get_isodoc_options(source_file, options, ext)
    font_install(isodoc_options.merge(options))
  end
end

#generate_outputs(source_file, semantic_xml, bibdata, extensions, output_paths, options) ⇒ Object

Step 5: Generate output formats from Semantic XML

Parameters:

  • source_file (String)

    source file content

  • semantic_xml (String)

    semantic XML content

  • bibdata (Nokogiri::XML::Element)

    the bibliographic data element

  • extensions (Array<Symbol>)

    output formats to generate

  • output_paths (Hash)

    paths for output files

  • options (Hash)

    compilation options



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/metanorma/compile/compile.rb', line 162

def generate_outputs(
  source_file, semantic_xml, bibdata, extensions, output_paths, options
)
  if extensions == %i(presentation)
    # Just generate presentation XML
    generate_presentation_xml(
      source_file, semantic_xml, bibdata, output_paths, options
    )
  else
    # Generate multiple output formats with parallel processing
    generate_outputs_parallel(
      source_file, semantic_xml, bibdata, extensions, output_paths, options
    )
  end
end

#generate_outputs_parallel(source_file, semantic_xml, bibdata, extensions, output_paths, options) ⇒ Object

Generate multiple output formats with parallel processing



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/metanorma/compile/compile.rb', line 188

def generate_outputs_parallel(
  source_file, semantic_xml, bibdata, extensions, output_paths, options
)
  @queue = ::Metanorma::Util::WorkersPool.new(
    ENV["METANORMA_PARALLEL"]&.to_i || DEFAULT_NUM_WORKERS,
  )

  # Install required fonts for all extensions
  gather_and_install_fonts(source_file, options.dup, extensions)

  # Process each extension in order
  process_extensions_in_order(
    source_file, semantic_xml, bibdata, extensions, output_paths, options
  )

  @queue.shutdown
end

#generate_presentation_xml(source_file, semantic_xml, bibdata, output_paths, options) ⇒ Object

Generate presentation XML from semantic XML



179
180
181
182
183
184
185
# File 'lib/metanorma/compile/compile.rb', line 179

def generate_presentation_xml(
  source_file, semantic_xml, bibdata, output_paths, options
)
  process_ext(
    :presentation, source_file, semantic_xml, bibdata, output_paths, options
  )
end

#generate_semantic_xml(filename, options) ⇒ Array?

Step 1: Generate Semantic XML from input file

Parameters:

  • filename (String)

    input file path

  • options (Hash)

    compilation options

Returns:

  • (Array, nil)

    tuple of [source_file, semantic_xml] or nil on failure



98
99
100
101
102
103
104
105
106
107
# File 'lib/metanorma/compile/compile.rb', line 98

def generate_semantic_xml(filename, options)
  case extname = File.extname(filename)
  when ".adoc" then process_input_adoc(filename, options)
  when ".xml" then process_input_xml(filename, options)
  else
    Util.log("[metanorma] Error: file extension #{extname} " \
             "is not supported.", :error)
    nil
  end
end

#prepare_output_paths(filename, bibdata, options) ⇒ Hash

Step 2: Prepare output paths for generated files Use default filename template if empty string is provided.

Parameters:

  • filename (String)

    input file path

  • bibdata (Nokogiri::XML::Element)

    the bibliographic data element

  • options (Hash)

    compilation options

Returns:

  • (Hash)

    paths for different output formats



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/metanorma/compile/compile.rb', line 116

def prepare_output_paths(filename, bibdata, options)
  basename = if !options[:filename_template].nil?
               drop = RelatonDrop.new(bibdata)
               config = OutputFilenameConfig.new(options[:filename_template])
               config.generate_filename(drop)
             else
               filename.sub(/\.[^.]+$/, "")
             end

  @output_filename = OutputFilename.new(
    basename,
    options[:output_dir],
    @processor,
  )

  {
    xml: @output_filename.semantic_xml,
    orig_filename: filename,
    presentationxml: @output_filename.presentation_xml,
  }
end

#process_ext(ext, source_file, semantic_xml, bibdata, output_paths, options) ⇒ Object

Process a single extension (output format)



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/metanorma/compile/compile.rb', line 275

def process_ext(ext, source_file, semantic_xml, bibdata, output_paths,
options)
  output_paths[:ext] = @processor.output_formats[ext]
  output_paths[:out] = @output_filename.for_format(ext) ||
    output_paths[:xml].sub(/\.[^.]+$/, ".#{output_paths[:ext]}")
  isodoc_options = get_isodoc_options(source_file, options, ext)

  # Handle special cases first
  return true if process_ext_special(
    ext, semantic_xml, bibdata, output_paths, options, isodoc_options
  )

  # Otherwise, determine if it uses presentation XML
  if @processor.use_presentation_xml(ext)
    # Format requires presentation XML first, then convert to final format
    process_via_presentation_xml(ext, output_paths, options, isodoc_options)
  else
    # Format can be generated directly from semantic XML
    process_from_semantic_xml(
      ext, output_paths, semantic_xml, isodoc_options
    )
  end
end

#process_ext_special(ext, semantic_xml, bibdata, output_paths, options, isodoc_options) ⇒ Object

Process special extensions with custom handling



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/metanorma/compile/compile.rb', line 300

def process_ext_special(
  ext, semantic_xml, bibdata, output_paths, options, isodoc_options
)
  if ext == :rxl

    # Special case: Relaton export
    export_relaton_from_bibdata(
      bibdata,
      options.merge(relaton: output_paths[:out]),
    )
    true

  elsif ext == :presentation && options[:passthrough_presentation_xml]

    # Special case: Pass through presentation XML
    f = if File.exist?(output_paths[:orig_filename])
          output_paths[:orig_filename]
        else
          output_paths[:xml]
        end

    FileUtils.cp f, output_paths[:presentationxml]
    true

  elsif ext == :html && options[:sectionsplit]

    # Special case: Split HTML into sections
    sectionsplit_convert(
      output_paths[:xml], semantic_xml, output_paths[:out], isodoc_options
    )
    true
  else
    false
  end
end

#process_extensions_in_order(source_file, semantic_xml, bibdata, extensions, output_paths, options) ⇒ Object



206
207
208
209
210
211
212
213
214
# File 'lib/metanorma/compile/compile.rb', line 206

def process_extensions_in_order(
  source_file, semantic_xml, bibdata, extensions, output_paths, options
)
  Util.sort_extensions_execution(extensions).each do |ext|
    process_ext(
      ext, source_file, semantic_xml, bibdata, output_paths, options
    ) or break
  end
end

#process_from_semantic_xml(ext, output_paths, semantic_xml, isodoc_options) ⇒ Object

Process format directly from semantic XML



356
357
358
359
360
361
362
363
364
365
# File 'lib/metanorma/compile/compile.rb', line 356

def process_from_semantic_xml(ext, output_paths, semantic_xml,
isodoc_options)
  @processor.output(semantic_xml, output_paths[:xml], output_paths[:out],
                    ext, isodoc_options)
  true # Return as Thread equivalent
rescue StandardError => e
  strict = ext == :presentation || isodoc_options[:strict] == "true"
  isodoc_error_process(e, strict, true)
  ext != :presentation
end

#process_input_adoc(filename, options) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
# File 'lib/metanorma/compile/compile.rb', line 216

def process_input_adoc(filename, options)
  Util.log("[metanorma] Processing: AsciiDoc input.", :info)
  file = read_file(filename)
  options[:asciimath] and
    file.sub!(/^(=[^\n]+\n)/, "\\1:mn-keep-asciimath:\n")
  dir = File.dirname(filename)
  dir != "." and
    file = file.gsub(/^include::/, "include::#{dir}/")
      .gsub(/^embed::/, "embed::#{dir}/")
  [file, @processor.input_to_isodoc(file, filename, options)]
end

#process_input_xml(filename, _options) ⇒ Object



228
229
230
231
232
233
# File 'lib/metanorma/compile/compile.rb', line 228

def process_input_xml(filename, _options)
  Util.log("[metanorma] Processing: Metanorma XML input.", :info)
  # TODO NN: this is a hack -- we should provide/bridge the
  # document attributes in Metanorma XML
  ["", read_file(filename)]
end

#process_options!(filename, options) ⇒ Object



81
82
83
84
85
86
87
# File 'lib/metanorma/compile/compile.rb', line 81

def process_options!(filename, options)
  require_libraries(options)
  options = extract_options(filename, options)
  validate_options!(options)
  @log.save_to(filename, options[:output_dir])
  options[:log] = @log
end

#process_output_from_presentation_xml(ext, output_paths, options, isodoc_options) ⇒ Object

Generate output format from presentation XML



345
346
347
348
349
350
351
352
353
# File 'lib/metanorma/compile/compile.rb', line 345

def process_output_from_presentation_xml(ext, output_paths, options,
isodoc_options)
  @processor.output(nil, output_paths[:presentationxml],
                    output_paths[:out], ext, isodoc_options)
  wrap_html(options, output_paths[:ext], output_paths[:out])
rescue StandardError => e
  strict = ext == :presentation || isodoc_options[:strict] == true
  isodoc_error_process(e, strict, false)
end

#process_via_presentation_xml(ext, output_paths, options, isodoc_options) ⇒ Object

Process format that requires presentation XML



337
338
339
340
341
342
# File 'lib/metanorma/compile/compile.rb', line 337

def process_via_presentation_xml(ext, output_paths, options, isodoc_options)
  @queue.schedule(ext, output_paths.dup, options.dup,
                  isodoc_options.dup) do |a, b, c, d|
    process_output_from_presentation_xml(a, b, c, d)
  end
end

#read_file(filename) ⇒ Object



235
236
237
# File 'lib/metanorma/compile/compile.rb', line 235

def read_file(filename)
  File.read(filename, encoding: "utf-8").gsub("\r\n", "\n")
end

#sectionsplit_convert(input_filename, file, output_filename = nil, opts = {}) ⇒ Object

assume we pass in Presentation XML, but we want to recover Semantic XML



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# File 'lib/metanorma/compile/compile.rb', line 368

def sectionsplit_convert(input_filename, file, output_filename = nil,
                         opts = {})
  @isodoc ||= IsoDoc::PresentationXMLConvert.new({})
  input_filename += ".xml" unless input_filename.match?(/\.xml$/)
  File.exist?(input_filename) or
    export_output(input_filename, file)
  presxml = File.read(input_filename, encoding: "utf-8")
  _xml, filename, dir = @isodoc.convert_init(presxml, input_filename, false)

  ::Metanorma::Collection::Sectionsplit.new(
    input: input_filename,
    isodoc: @isodoc,
    xml: presxml,
    base: File.basename(output_filename || filename),
    output: output_filename || filename,
    dir: dir,
    compile_opts: opts,
  ).build_collection
end

#wrap_html(options, file_extension, outfilename) ⇒ Object



256
257
258
259
260
261
262
263
# File 'lib/metanorma/compile/compile.rb', line 256

def wrap_html(options, file_extension, outfilename)
  if options[:wrapper] && /html$/.match(file_extension)
    outfilename = outfilename.sub(/\.html$/, "")
    FileUtils.mkdir_p outfilename
    FileUtils.mv "#{outfilename}.html", outfilename
    FileUtils.mv "#{outfilename}_images", outfilename, force: true
  end
end