Class: Opener::Ners::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/ners/base.rb,
lib/opener/ners/base/version.rb

Overview

Base NER class that supports various languages such as Dutch and English.

Constant Summary collapse

MODELS_PATH =

The default models directory.

File.expand_path('../../../../models', __FILE__)
VERSION =
'3.1.0'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Base

Returns a new instance of Base.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :enable_time (TrueClass|FalseClass)

    Whether or not to enable dynamic timestamps (enabled by default).



30
31
32
33
34
# File 'lib/opener/ners/base.rb', line 30

def initialize(options = {})
  @models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH

  @enable_time = options.fetch(:enable_time, true)
end

Instance Attribute Details

#enable_timeTrueClass|FalseClass (readonly)

Returns:

  • (TrueClass|FalseClass)


22
23
24
# File 'lib/opener/ners/base.rb', line 22

def enable_time
  @enable_time
end

#modelsString (readonly)

Returns:

  • (String)


19
20
21
# File 'lib/opener/ners/base.rb', line 19

def models
  @models
end

Instance Method Details

#language_from_kaf(input) ⇒ String

Returns the language for the given KAF document.

Parameters:

  • input (String)

Returns:

  • (String)


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/opener/ners/base.rb', line 73

def language_from_kaf(input)
  parser   = Oga::XML::PullParser.new(input)
  language = nil

  parser.parse do |node|
    if node.is_a?(Oga::XML::Element) and node.name == 'KAF'
      language = node.get('xml:lang')
      break
    end
  end

  # Make sure nobody can _somehow_ inject a language such as "../../foo".
  unless language =~ /\A[a-zA-Z\-_]+\z/
    raise Core::UnsupportedLanguageError, language
  end

  language
end

#new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument

Parameters:

  • input (String)

    The input KAF document as a string.

Returns:

  • (Java::ixa.kaflib.KAFDocument)


60
61
62
63
64
65
# File 'lib/opener/ners/base.rb', line 60

def new_kaf_document(input)
  input_io = StringIO.new(input)
  reader   = Java::java.io.InputStreamReader.new(input_io.to_inputstream)

  Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
end

#run(input) ⇒ Array

Runs the command and returns the output of STDOUT, STDERR and the process information.

Parameters:

  • input (String)

    The input to process.

Returns:

  • (Array)

Raises:

  • (Core::UnsupportedLanguageError)


43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/opener/ners/base.rb', line 43

def run(input)
  lang  = language_from_kaf(input)
  model = File.join(models, "#{lang}.bin")

  raise(Core::UnsupportedLanguageError, lang) unless File.file?(model)

  kaf        = new_kaf_document(input)
  properties = build_properties(lang, model)
  annotator  = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties)

  annotator.annotate_kaf(enable_time, kaf)
end