Class: Opener::LanguageIdentifier

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/language_identifier.rb,
lib/opener/language_identifier/cli.rb,
lib/opener/language_identifier/server.rb,
lib/opener/language_identifier/version.rb,
lib/opener/language_identifier/detector.rb,
lib/opener/language_identifier/kaf_builder.rb

Overview

Language identifier class that can detect various languages such as Dutch, German and Swedish.

Defined Under Namespace

Classes: CLI, Detector, KafBuilder, Server

Constant Summary collapse

DEFAULT_OPTIONS =

Hash containing the default options to use.

Returns:

  • (Hash)
{
  :args      => [],
  :kaf       => true,
  :benchmark => false
}.freeze
VERSION =
"3.1.6"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ LanguageIdentifier

Returns a new instance of LanguageIdentifier.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :args (Array)

    Arbitrary arguments to pass to the underlying kernel.

  • :kaf (TrueClass|FalseClass)

    When set to ‘true` the results will be displayed as KAF.

  • :benchmark (TrueClass|FalseClass)

    When set to ‘true` benchmarking output will be added to the KAF document.



49
50
51
52
# File 'lib/opener/language_identifier.rb', line 49

def initialize(options = {})
  @options = DEFAULT_OPTIONS.merge(options)
  @detector = Detector.instance
end

Instance Attribute Details

#optionsHash (readonly)

Returns:

  • (Hash)


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/opener/language_identifier.rb', line 23

class LanguageIdentifier
  attr_reader :options

  ##
  # Hash containing the default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :args      => [],
    :kaf       => true,
    :benchmark => false
  }.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args Arbitrary arguments to pass to the
  #  underlying kernel.
  #
  # @option options [TrueClass|FalseClass] :kaf When set to `true` the
  #  results will be displayed as KAF.
  #
  # @option options [TrueClass|FalseClass] :benchmark When set to `true`
  #  benchmarking output will be added to the KAF document.
  #
  def initialize(options = {})
    @options = DEFAULT_OPTIONS.merge(options)
    @detector = Detector.instance
  end

  ##
  # Processes the input and returns an Array containing the output of STDOUT,
  # STDERR and an object containing process information.
  #
  # @param [String] input The text of which to detect the language.
  # @return [Array]
  #
  def run(input)
    output    = nil
    benchmark = Opener::Core::Benchmark.new('opener-language-identifier')

    results = benchmark.measure do
      if options[:probs]
        output = @detector.probabilities(input)
      else
        output = @detector.detect(input)
        output = build_kaf(input, output) if options[:kaf]
      end
    end

    if options[:kaf] and options[:benchmark]
      output = benchmark.write(output, results)
    end

    return output

  rescue Exception => error
    return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
  end

  alias identify run

  protected

  ##
  # Builds a KAF document containing the input and the correct XML language
  # tag based on the output of the kernel.
  #
  # @param [String] input The input text.
  # @param [String] language The detected language
  # @return [String]
  #
  def build_kaf(input, language)
    builder = KafBuilder.new(input, language)
    builder.build

    return builder.to_s
  end
end

Instance Method Details

#run(input) ⇒ Array Also known as: identify

Processes the input and returns an Array containing the output of STDOUT, STDERR and an object containing process information.

Parameters:

  • input (String)

    The text of which to detect the language.

Returns:

  • (Array)


61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/opener/language_identifier.rb', line 61

def run(input)
  output    = nil
  benchmark = Opener::Core::Benchmark.new('opener-language-identifier')

  results = benchmark.measure do
    if options[:probs]
      output = @detector.probabilities(input)
    else
      output = @detector.detect(input)
      output = build_kaf(input, output) if options[:kaf]
    end
  end

  if options[:kaf] and options[:benchmark]
    output = benchmark.write(output, results)
  end

  return output

rescue Exception => error
  return Opener::Core::ErrorLayer.new(input, error.message, self.class).add
end