Class: Opener::Ners::Base
- Inherits:
-
Object
- Object
- Opener::Ners::Base
- Defined in:
- lib/opener/ners/base.rb,
lib/opener/ners/base/version.rb
Overview
Base NER class that supports various languages such as Dutch and English.
Constant Summary collapse
- MODELS_PATH =
The default models directory.
File.('../../../../models', __FILE__)
- VERSION =
'3.1.0'
Instance Attribute Summary collapse
- #enable_time ⇒ TrueClass|FalseClass readonly
- #models ⇒ String readonly
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ Base
constructor
A new instance of Base.
-
#language_from_kaf(input) ⇒ String
Returns the language for the given KAF document.
- #new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument
-
#run(input) ⇒ Array
Runs the command and returns the output of STDOUT, STDERR and the process information.
Constructor Details
#initialize(options = {}) ⇒ Base
Returns a new instance of Base.
30 31 32 33 34 |
# File 'lib/opener/ners/base.rb', line 30 def initialize( = {}) @models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH @enable_time = .fetch(:enable_time, true) end |
Instance Attribute Details
#enable_time ⇒ TrueClass|FalseClass (readonly)
22 23 24 |
# File 'lib/opener/ners/base.rb', line 22 def enable_time @enable_time end |
#models ⇒ String (readonly)
19 20 21 |
# File 'lib/opener/ners/base.rb', line 19 def models @models end |
Instance Method Details
#language_from_kaf(input) ⇒ String
Returns the language for the given KAF document.
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/opener/ners/base.rb', line 73 def language_from_kaf(input) parser = Oga::XML::PullParser.new(input) language = nil parser.parse do |node| if node.is_a?(Oga::XML::Element) and node.name == 'KAF' language = node.get('xml:lang') break end end # Make sure nobody can _somehow_ inject a language such as "../../foo". unless language =~ /\A[a-zA-Z\-_]+\z/ raise Core::UnsupportedLanguageError, language end language end |
#new_kaf_document(input) ⇒ Java::ixa.kaflib.KAFDocument
60 61 62 63 64 65 |
# File 'lib/opener/ners/base.rb', line 60 def new_kaf_document(input) input_io = StringIO.new(input) reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream) Java::ixa.kaflib.KAFDocument.create_from_stream(reader) end |
#run(input) ⇒ Array
Runs the command and returns the output of STDOUT, STDERR and the process information.
43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/opener/ners/base.rb', line 43 def run(input) lang = language_from_kaf(input) model = File.join(models, "#{lang}.bin") raise(Core::UnsupportedLanguageError, lang) unless File.file?(model) kaf = new_kaf_document(input) properties = build_properties(lang, model) annotator = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties) annotator.annotate_kaf(enable_time, kaf) end |