Class: RDF::CLI

Inherits:
Object
  • Object
show all
Defined in:
lib/rdf/cli.rb

Overview

Individual formats can modify options by updating Reader.options or Writer.options. Format-specific commands are taken from Format.cli_commands for each loaded format, which returns an array of lambdas taking arguments and options.

Other than ‘help`, all commands parse an input file.

Multiple commands may be added in sequence to execute a pipeline.

Format-specific commands should verify that the reader and/or output format are appropriate for the command.

Examples:

Creating Reader-specific options:

class Reader
  def self.options
    [
      RDF::CLI::Option.new(
        symbol: :canonicalize,
        datatype: TrueClass,
        on: ["--canonicalize"],
        description: "Canonicalize input/output.") {true},
      RDF::CLI::Option.new(
        symbol: :uri,
        datatype: RDF::URI,
        on: ["--uri STRING"],
        description: "URI.") {|v| RDF::URI(v)},
    ]
  end

Creating Format-specific commands:

class Format
  def self.cli_commands
    {
      count: {
        description: "",
        parse: true,
        lambda: ->(argv, opts) {}
      },
    }
  end

Adding a command manually

class MyCommand
  RDF::CLI.add_command(:count, description: "Count statements") do |argv, opts|
    count = 0
    RDF::CLI.parse(argv, opts) do |reader|
      reader.each_statement do |statement|
        count += 1
      end
    end
    $stdout.puts "Parsed #{count} statements"
  end
end

Defined Under Namespace

Classes: Option

Constant Summary collapse

COMMANDS =
{
  count: {
    description: "Count statements in parsed input",
    parse: false,
    help: "count [options] [args...]\nreturns number of parsed statements",
    lambda: ->(argv, opts) do
      unless repository.count > 0
        start = Time.new
        count = 0
        self.parse(argv, opts) do |reader|
          reader.each_statement do |statement|
            count += 1
          end
        end
        secs = Time.new - start
        $stdout.puts "Parsed #{count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
      end
    end
  },
  help: {
    description: "This message",
    parse: false,
    lambda: ->(argv, opts) {self.usage(self.options)}
  },
  lengths: {
    description: "Lengths of each parsed statement",
    parse: true,
    help: "lengths [options] [args...]\nreturns statement lengths",
    lambda: ->(argv, opts) do
      repository.each_statement do |statement|
        $stdout.puts statement.to_s.size
      end
    end
  },
  objects: {
    description: "Serialize each parsed object to N-Triples",
    parse: true,
    help: "objects [options] [args...]\nreturns unique objects",
    lambda: ->(argv, opts) do
      $stdout.puts "Objects"
      repository.each_object do |object|
        $stdout.puts object.to_ntriples
      end
    end
  },
  predicates: {
    description: "Serialize each parsed predicate to N-Triples",
    parse: true,
    help: "predicates [options] [args...]\nreturns unique predicates",
    lambda: ->(argv, opts) do
      $stdout.puts "Predicates"
      repository.each_predicate do |predicate|
        $stdout.puts predicate.to_ntriples
      end
    end
  },
  serialize: {
    description: "Serialize each parsed statement to N-Triples, or the specified output format",
    parse: true,
    help: "serialize [options] [args...]\nserialize output using specified format (or n-triples if not specified)",
    lambda: ->(argv, opts) do
      writer_class = RDF::Writer.for(opts[:output_format]) || RDF::NTriples::Writer
      out = opts[:output] || $stdout
      opts = opts.merge(prefixes: {})
      writer_opts = opts.merge(standard_prefixes: true)
      writer_class.new(out, writer_opts) do |writer|
        writer << repository
      end
    end
  },
  subjects: {
    description: "Serialize each parsed subject to N-Triples",
    parse: true,
    help: "subjects [options] [args...]\nreturns unique subjects",
    lambda: ->(argv, opts) do
      $stdout.puts "Subjects"
      repository.each_subject do |subject|
        $stdout.puts subject.to_ntriples
      end
    end
  },
  validate: {
    description: "Validate parsed input",
    parse: true,
    help: "validate [options] [args...]\nvalidates parsed input (may also be used with --validate)",
    lambda: ->(argv, opts) do
      $stdout.puts "Input is " + (repository.valid? ? "" :"in") + "valid"
    end
  }
}

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.repositoryRDF::Repository

Repository containing parsed statements

Returns:



215
216
217
# File 'lib/rdf/cli.rb', line 215

def repository
  @repository
end

Class Method Details

.abort(msg) ⇒ void

This method returns an undefined value.

Parameters:

  • msg (String)


458
459
460
# File 'lib/rdf/cli.rb', line 458

def self.abort(msg)
  Kernel.abort "#{basename}: #{msg}"
end

.add_command(command, options = {}) {|argv, opts| ... } ⇒ Object

Add a command.

Parameters:

  • command (#to_sym)
  • options (Hash{Symbol => String}) (defaults to: {})

Options Hash (options):

  • description (String)
  • help (String)

    string to display for help

  • parse (Boolean)

    parse input files in to Repository, or not.

  • options (Array<RDF::CLI::Option>)

    specific to this command

Yields:

  • argv, opts

Yield Parameters:

  • argv (Array<String>)
  • opts (Hash)

Yield Returns:

  • (void)


412
413
414
415
# File 'lib/rdf/cli.rb', line 412

def self.add_command(command, options = {}, &block)
  options[:lambda] = block if block_given?
  COMMANDS[command.to_sym] ||= options
end

.basenameString

Returns:

  • (String)


220
# File 'lib/rdf/cli.rb', line 220

def self.basename() File.basename($0) end

.commandsArray<String>

Returns list of executable commands.

Returns:

  • (Array<String>)

    list of executable commands



385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/rdf/cli.rb', line 385

def self.commands
  # First, load commands from other formats
  unless @commands_loaded
    RDF::Format.each do |format|
      format.cli_commands.each do |command, options|
        options = {lambda: options} unless options.is_a?(Hash)
        add_command(command, options)
      end
    end
    @commands_loaded = true
  end
  COMMANDS.keys.map(&:to_s).sort
end

.exec(args, options = {}) ⇒ Boolean

Execute one or more commands, parsing input as necessary

Parameters:

  • args (Array<String>)

Returns:

  • (Boolean)


342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/rdf/cli.rb', line 342

def self.exec(args, options = {})
  out = options[:output] || $stdout
  out.set_encoding(Encoding::UTF_8) if out.respond_to?(:set_encoding) && RUBY_PLATFORM == "java"
  cmds, args = args.partition {|e| commands.include?(e.to_s)}

  if cmds.empty?
    usage(options.fetch(:option_parser, self.options))
    abort "No command given"
  end

  if cmds.first == 'help'
    on_cmd = cmds[1]
    if on_cmd && COMMANDS.fetch(on_cmd.to_sym, {})[:help]
      usage(options.fetch(:option_parser, self.options), banner: "Usage: #{self.basename.split('/').last} #{COMMANDS[on_cmd.to_sym][:help]}")
    else
      usage(options.fetch(:option_parser, self.options))
    end
    return
  end

  @repository = RDF::Repository.new

  # Parse input files if any command requires it
  if cmds.any? {|c| COMMANDS[c.to_sym][:parse]}
    start = Time.new
    count = 0
    self.parse(args, options) do |reader|
      @repository << reader
    end
    secs = Time.new - start
    $stdout.puts "Parsed #{repository.count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
  end

  # Run each command in sequence
  cmds.each do |command|
    COMMANDS[command.to_sym][:lambda].call(args, options)
  end
rescue ArgumentError => e
  abort e.message
end

.formats(reader: false, writer: false) ⇒ Array<String>

Returns list of available formats.

Returns:

  • (Array<String>)

    list of available formats



419
420
421
422
423
424
425
426
427
# File 'lib/rdf/cli.rb', line 419

def self.formats(reader: false, writer: false)
  f = RDF::Format.sort_by(&:to_sym).each.
    select {|f| (reader ? f.reader : (writer ? f.writer : (f.reader || f.writer)))}.
    inject({}) do |memo, reader|
      memo.merge(reader.to_sym => reader.name)
  end
  sym_len = f.keys.map {|k| k.to_s.length}.max
  f.map {|s, t| "%*s: %s" % [sym_len, s, t]}
end

.options {|options| ... } ⇒ OptionParser

Yields:

Yield Parameters:

Returns:



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/rdf/cli.rb', line 226

def self.options(&block)
  options = OptionParser.new
  logger = Logger.new($stderr)
  logger.level = Logger::ERROR
  logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
  opts = options.options = {
    debug:          false,
    evaluate:       nil,
    format:         nil,
    output:         $stdout,
    output_format:  :ntriples,
    logger:         logger
  }

  # Add default Reader and Writer options
  RDF::Reader.options.each do |cli_opt|
    next if opts.has_key?(cli_opt.symbol)
    on_args = cli_opt.on || []
    on_args << cli_opt.description if cli_opt.description
    options.on(*on_args) do |arg|
      opts[cli_opt.symbol] = cli_opt.call(arg)
    end
  end
  RDF::Writer.options.each do |cli_opt|
    next if opts.has_key?(cli_opt.symbol)
    on_args = cli_opt.on || []
    on_args << cli_opt.description if cli_opt.description
    options.on(*on_args) do |arg|
      opts[cli_opt.symbol] = cli_opt.call(arg)
    end
  end

  # Command-specific options
  if block_given?
    case block.arity
      when 1 then block.call(options)
      else options.instance_eval(&block)
    end
  end
  options.banner = "Usage: #{self.basename} command+ [options] [args...]"

  options.on('-d', '--debug',   'Enable debug output for troubleshooting.') do
    opts[:logger].level = Logger::DEBUG
  end

  options.on("-e", "--evaluate STRING", "Evaluate argument as RDF input, if no files are specified") do |arg|
    opts[:evaluate] = arg
  end

  options.on("--input-format FORMAT", "--format FORMAT", "Format of input file, uses heuristic if not specified") do |arg|
    unless reader = RDF::Reader.for(arg.downcase.to_sym)
      self.abort "No reader found for #{arg.downcase.to_sym}. Available readers:\n  #{self.formats(reader: true).join("\n  ")}"
    end

    # Add format-specific reader options
    reader.options.each do |cli_opt|
      next if opts.has_key?(cli_opt.symbol)
      on_args = cli_opt.on || []
      on_args << cli_opt.description if cli_opt.description
      options.on(*on_args) do |arg|
        opts[cli_opt.symbol] = cli_opt.call(arg)
      end
    end
    opts[:format] = arg.downcase.to_sym
  end

  options.on("-o", "--output FILE", "File to write output, defaults to STDOUT") do |arg|
    opts[:output] = File.open(arg, "w")
  end

  options.on("--output-format FORMAT", "Format of output file, defaults to NTriples") do |arg|
    unless writer = RDF::Writer.for(arg.downcase.to_sym)
      self.abort "No writer found for #{arg.downcase.to_sym}. Available writers:\n  #{self.formats(writer: true).join("\n  ")}"
    end

    # Add format-specific writer options
    writer.options.each do |cli_opt|
      next if opts.has_key?(cli_opt.symbol)
      on_args = cli_opt.on || []
      on_args << cli_opt.description if cli_opt.description
      options.on(*on_args) do |arg|
        opts[cli_opt.symbol] = cli_opt.call(arg)
      end
    end
    opts[:output_format] = arg.downcase.to_sym
  end

  options.on_tail("-h", "--help", "Show this message") do
    self.usage(options)
    exit(0)
  end

  begin
    options.parse!
  rescue OptionParser::InvalidOption => e
    abort e
  end

  options
end

.parse(files, options = {}) {|reader| ... } ⇒ nil

Parse each file, $stdin or specified string in ‘options` yielding a reader

Parameters:

  • files (Array<String>)

Yields:

  • (reader)

Yield Parameters:

Returns:

  • (nil)


437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/rdf/cli.rb', line 437

def self.parse(files, options = {}, &block)
  if files.empty?
    # If files are empty, either use options[:execute]
    input = options[:evaluate] ? StringIO.new(options[:evaluate]) : $stdin
    input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
    RDF::Reader.for(options[:format] || :ntriples).new(input, options) do |reader|
      yield(reader)
    end
  else
    files.each do |file|
      RDF::Reader.open(file, options) do |reader|
        (@readers ||= []) << reader.class.to_s
        yield(reader)
      end
    end
  end
end

.usage(options, banner: nil) ⇒ Object

Output usage message



329
330
331
332
333
334
335
# File 'lib/rdf/cli.rb', line 329

def self.usage(options, banner: nil)
  options.banner = banner if banner
  $stdout.puts options
  $stdout.puts "Note: available commands and options may be different depending on selected --input-format and/or --output-format."
  $stdout.puts "Available commands:\n\t#{self.commands.join("\n\t")}"
  $stdout.puts "Available formats:\n\t#{(self.formats).join("\n\t")}"
end