Class: Opener::LanguageIdentifier

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/language_identifier.rb,
lib/opener/language_identifier/cli.rb,
lib/opener/language_identifier/server.rb,
lib/opener/language_identifier/version.rb,
lib/opener/language_identifier/detector.rb,
lib/opener/language_identifier/kaf_builder.rb

Overview

Language identifier class that can detect various languages such as Dutch, German and Swedish.

Defined Under Namespace

Classes: CLI, Detector, KafBuilder, Server

Constant Summary collapse

DEFAULT_OPTIONS =

Hash containing the default options to use.

{
  :args  => [],
  :kaf   => true,
  :probs => false
}.freeze
VERSION =
'4.2.1'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ LanguageIdentifier

Returns a new instance of LanguageIdentifier.

Options Hash (options):

  • :args (Array)

    Arbitrary arguments to pass to the underlying kernel.

  • :kaf (TrueClass|FalseClass)

    When set to ‘true` the results will be displayed as KAF.

  • :probs (TrueClass|FalseClass)

    Wen set the probabilities are returned instead of the language/KAF.



46
47
48
49
# File 'lib/opener/language_identifier.rb', line 46

def initialize(options = {})
  @options  = DEFAULT_OPTIONS.merge(options)
  @detector = Detector.new
end

Instance Attribute Details

#optionsHash (readonly)



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/opener/language_identifier.rb', line 20

class LanguageIdentifier
  attr_reader :options

  ##
  # Hash containing the default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :args  => [],
    :kaf   => true,
    :probs => false
  }.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args Arbitrary arguments to pass to the
  #  underlying kernel.
  #
  # @option options [TrueClass|FalseClass] :kaf When set to `true` the
  #  results will be displayed as KAF.
  #
  # @option options [TrueClass|FalseClass] :probs Wen set the probabilities
  #  are returned instead of the language/KAF.
  #
  def initialize(options = {})
    @options  = DEFAULT_OPTIONS.merge(options)
    @detector = Detector.new
  end

  ##
  # Processes the input and returns an Array containing the output of STDOUT,
  # STDERR and an object containing process information.
  #
  # @param [String] input The text of which to detect the language.
  # @return [Array]
  #
  def run(input)
    if options[:probs]
      output = @detector.probabilities(input)
    else
      output = @detector.detect(input)
      output = build_kaf(input, output) if options[:kaf]
    end

    return output
  end

  alias identify run

  protected

  ##
  # Builds a KAF document containing the input and the correct XML language
  # tag based on the output of the kernel.
  #
  # @param [String] input The input text.
  # @param [String] language The detected language
  # @return [String]
  #
  def build_kaf(input, language)
    builder = KafBuilder.new(input, language)
    builder.build

    return builder.to_s
  end
end

Instance Method Details

#run(input) ⇒ Array Also known as: identify

Processes the input and returns an Array containing the output of STDOUT, STDERR and an object containing process information.



58
59
60
61
62
63
64
65
66
67
# File 'lib/opener/language_identifier.rb', line 58

def run(input)
  if options[:probs]
    output = @detector.probabilities(input)
  else
    output = @detector.detect(input)
    output = build_kaf(input, output) if options[:kaf]
  end

  return output
end