Class: PROIEL::Commands::Convert

Inherits:
PROIEL::Command show all
Defined in:
lib/proiel/cli/commands/convert.rb

Class Method Summary collapse

Methods inherited from PROIEL::Command

inherited, subclasses

Class Method Details

.init_with_program(prog) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/proiel/cli/commands/convert.rb', line 5

def init_with_program(prog)
  prog.command(:convert) do |c|
    c.syntax 'convert format'
    c.description 'Convert to a different format'

    c.command(:proielxml) do |f|
      f.syntax '[options] [filename(s)]'
      f.description 'Convert to PROIEL XML format'
      f.option 'remove-not-annotated', '--remove-not-annotated', 'Remove sentences that have not been annotated'
      f.option 'remove-not-reviewed', '--remove-not-reviewed', 'Remove sentences that have not been reviewed'
      f.option 'remove-morphology', '--remove-morphology', 'Remove morphological annotation (part of speech, morphology and lemma)'
      f.option 'remove-syntax', '--remove-syntax', 'Remove syntactic annotation (relation, head ID and slashes)'
      f.option 'remove-information-structure', '--remove-information-structure', 'Remove informtion structure annotation (antecedent ID, information status and contrast group)'
      f.option 'remove-status', '--remove-status', 'Remove sentence status (i.e. revert all sentences to unannotated status)'
      f.option 'remove-alignments', '--remove-alignments', 'Remove alignments'
      f.option 'remove-annotator', '--remove-annotator', 'Remove annotator information'
      f.option 'remove-reviewer', '--remove-reviewer', 'Remove reviewer information'
      f.option 'remove-empty-divs', '--remove-empty-divs', 'Remove div elements that do not contain any sentences'
      f.action { |args, options| process(args, options, PROIEL::Converter::PROIELXML) }
    end

    c.command(:tnt) do |f|
      f.syntax '[options] filename(s)'
      f.description 'Convert to TNT/hunpos format'
      f.option 'morphology', '-m', '--morphology', 'Include morphological tags'
      f.action { |args, options| process(args, options, PROIEL::Converter::TNT) }
    end

    c.command(:"conll-x") do |f|
      f.syntax 'filename(s)'
      f.description 'Convert to CoNLL-X format'
      f.action { |args, options| process(args, options, PROIEL::Converter::CoNLLX) }
    end

    c.command(:"conll-u") do |f|
      f.syntax 'filename(s)'
      f.description 'Convert to CoNLL-U format'
      f.action { |args, options| process(args, options, PROIEL::Converter::CoNLLU) }
    end

    c.command(:tiger) do |f|
      f.syntax 'filename(s)'
      f.description 'Convert to TIGER XML format'
      f.action { |args, options| process(args, options, PROIEL::Converter::Tiger) }
    end

    c.command(:tiger2) do |f|
      f.syntax 'filename(s)'
      f.description 'Convert to TIGER2 format'
      f.action { |args, options| process(args, options, PROIEL::Converter::Tiger2) }
    end

    c.command(:text) do |f|
      f.syntax 'filename(s)'
      f.description 'Convert to plain text (UTF-8 with Unix line-endings)'
      f.option 'diffable', '-d', '--diffable', 'Make the output diffable'
      f.action { |args, options| process(args, options, PROIEL::Converter::Text) }
    end

    c.command(:lexc) do |f|
      f.syntax '[options] filename(s)'
      f.description 'Convert to lexc format'
      f.option 'morphology', '-m', '--morphology', 'Include morphological tags'
      f.action { |args, options| process(args, options, PROIEL::Converter::Lexc) }
    end

    c.action do |_, _|
      STDERR.puts 'Missing or invalid format. Use --help for more information.'
      exit 1
    end
  end
end

.process(args, options, converter) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/proiel/cli/commands/convert.rb', line 78

def process(args, options, converter)
  tb = PROIEL::Treebank.new

  if args.empty?
    STDERR.puts "Reading from standard input...".green if options['verbose']
    tb.load_from_xml(STDIN)
  else
    args.each do |filename|
      STDERR.puts "Reading #{filename}...".green if options['verbose']

      tb.load_from_xml(filename)
    end
  end

  converter.process(tb, options)
end