Class: PROIEL::Commands::CountWords

Inherits:
PROIEL::Command show all
Defined in:
lib/proiel/cli/commands/info.rb

Class Method Summary collapse

Methods inherited from PROIEL::Command

inherited, subclasses

Class Method Details

.init_with_program(prog) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/proiel/cli/commands/info.rb', line 5

def init_with_program(prog)
  prog.command(:info) do |c|
    c.syntax 'info [options] filename(s)'
    c.description 'Show information about the treebank'

    c.action do |args, options|
      if args.empty?
        STDERR.puts 'Missing filename(s). Use --help for more information.'
      else
        process(args, options)
      end
    end
  end
end

.pretty_electronic_text_info(source) ⇒ Object



104
105
106
107
108
109
110
# File 'lib/proiel/cli/commands/info.rb', line 104

def pretty_electronic_text_info(source)
  [source.electronic_text_title,
   source.electronic_text_editor ? "ed. #{source.electronic_text_editor}" : nil,
   source.electronic_text_publisher,
   source.electronic_text_place,
   source.electronic_text_date].compact.join(', ')
end

.pretty_language(source) ⇒ Object



87
88
89
90
91
92
93
94
# File 'lib/proiel/cli/commands/info.rb', line 87

def pretty_language(source)
  case source.language
  when 'lat'
    'Latin'
  else
    "Unknown (language code #{source.language})"
  end
end

.pretty_license(source) ⇒ Object



112
113
114
115
116
117
118
# File 'lib/proiel/cli/commands/info.rb', line 112

def pretty_license(source)
  if source.license_url
    "#{source.license} (#{source.license_url})"
  else
    source.license
  end
end

.pretty_printed_text_info(source) ⇒ Object



96
97
98
99
100
101
102
# File 'lib/proiel/cli/commands/info.rb', line 96

def pretty_printed_text_info(source)
  [source.printed_text_title,
   source.printed_text_editor ? "ed. #{source.printed_text_editor}" : nil,
   source.printed_text_publisher,
   source.printed_text_place,
   source.printed_text_date].compact.join(', ')
end

.pretty_title(source) ⇒ Object



120
121
122
# File 'lib/proiel/cli/commands/info.rb', line 120

def pretty_title(source)
  [source.author, source.title].compact.join(', ')
end

.process(args, options) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/proiel/cli/commands/info.rb', line 20

def process(args, options)
  tb = PROIEL::Treebank.new

  args.each do |filename|
    STDERR.puts "Reading #{filename}...".green if options['verbose']

    tb.load_from_xml(filename)
  end

  t = treebank_statistics(tb)

  puts "Loaded treebank files contain #{tb.sources.count} source(s)".yellow
  puts "   Overall size: #{t.sentence_count} sentence(s), #{t.token_count} token(s)"
  puts

  tb.sources.each_with_index do |source, i|
    s = source_statistics(source)
    n = s.sentence_count
    r = s.reviewed_sentence_count * 100.0 / n
    a = s.annotated_sentence_count * 100.0 / n

    puts "#{i + 1}. #{pretty_title(source)}".yellow
    puts "   Version:      #{source.date}"
    puts "   License:      #{pretty_license(source)}"
    puts "   Language:     #{pretty_language(source)}"
    puts "   Printed text: #{pretty_printed_text_info(source)}"
    puts "   Electr. text: #{pretty_electronic_text_info(source)}"
    puts "   Size:         #{n} sentence(s), #{s.token_count} token(s)"
    puts "   Annotation:   %.2f%% reviewed, %.2f%% annotated" % [r, a]
  end
end

.source_statistics(source) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/proiel/cli/commands/info.rb', line 68

def source_statistics(source)
  OpenStruct.new.tap do |s|
    s.sentence_count = 0
    s.token_count = 0
    s.annotated_sentence_count = 0
    s.reviewed_sentence_count = 0

    source.divs.each do |div|
      div.sentences.each do |sentence|
        s.token_count += sentence.tokens.count
      end

      s.sentence_count += div.sentences.count
      s.annotated_sentence_count += div.sentences.select(&:annotated?).count
      s.reviewed_sentence_count += div.sentences.select(&:reviewed?).count
    end
  end
end

.treebank_statistics(tb) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/proiel/cli/commands/info.rb', line 52

def treebank_statistics(tb)
  OpenStruct.new.tap do |s|
    s.sentence_count = 0
    s.token_count = 0
    s.annotated_sentence_count = 0
    s.reviewed_sentence_count = 0

    tb.sources.each do |source|
      s.token_count += source_statistics(source).token_count
      s.sentence_count += source_statistics(source).sentence_count
      s.annotated_sentence_count += source_statistics(source).annotated_sentence_count
      s.reviewed_sentence_count += source_statistics(source).reviewed_sentence_count
    end
  end
end