Class: NerRuby::Recognizer
- Inherits:
-
Object
- Object
- NerRuby::Recognizer
- Defined in:
- lib/ner_ruby/recognizer.rb
Constant Summary collapse
- @@cache =
ModelCache.new
Class Method Summary collapse
- .clear_cache ⇒ Object
-
.from_pretrained(name) ⇒ Object
Load a recognizer from a registered model name.
Instance Method Summary collapse
-
#initialize(model: nil, tokenizer: nil, label_map: nil, backend: nil, provider: nil, api_key: nil) ⇒ Recognizer
constructor
A new instance of Recognizer.
- #recognize(text, labels: nil) ⇒ Object
- #recognize_batch(texts, labels: nil) ⇒ Object
Constructor Details
#initialize(model: nil, tokenizer: nil, label_map: nil, backend: nil, provider: nil, api_key: nil) ⇒ Recognizer
Returns a new instance of Recognizer.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/ner_ruby/recognizer.rb', line 7 def initialize(model: nil, tokenizer: nil, label_map: nil, backend: nil, provider: nil, api_key: nil) config = NerRuby.configuration if backend == :api @api_model = Models::Api.new(provider: provider || :openai, api_key: api_key) else model_path = model || config.default_model_path tokenizer_path = tokenizer || config.default_tokenizer_path if model_path && tokenizer_path raise ModelNotFoundError, "Model not found: #{model_path}" unless File.exist?(model_path) if config.enable_cache && @@cache.has?(model_path) cached = @@cache.get(model_path) @model = cached[:model] @tokenizer = cached[:tokenizer] else @model = Models::Onnx.new(model_path: model_path) @tokenizer = load_tokenizer(tokenizer_path) @@cache.set(model_path, { model: @model, tokenizer: @tokenizer }) if config.enable_cache end detected_label_map = label_map || @model.label_map @decoder = Decoder.new(label_map: detected_label_map) @pipeline = Pipeline.new(model: @model, tokenizer: @tokenizer, decoder: @decoder) end end end |
Class Method Details
.clear_cache ⇒ Object
84 85 86 |
# File 'lib/ner_ruby/recognizer.rb', line 84 def self.clear_cache @@cache.clear end |
.from_pretrained(name) ⇒ Object
Load a recognizer from a registered model name
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/ner_ruby/recognizer.rb', line 37 def self.from_pretrained(name) config = NerRuby.configuration model_info = config.model_registry.get(name) raise Error, "Unknown model: #{name}. Available: #{config.model_registry.available.join(', ')}" unless model_info if model_info[:model_path] && model_info[:tokenizer_path] new( model: model_info[:model_path], tokenizer: model_info[:tokenizer_path], label_map: model_info[:label_map] ) else # API-based fallback new(backend: :api, provider: :huggingface) end end |
Instance Method Details
#recognize(text, labels: nil) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/ner_ruby/recognizer.rb', line 54 def recognize(text, labels: nil) return [] if text.nil? || text.strip.empty? validate_labels!(labels) if labels if @api_model entities = @api_model.recognize(text, labels: labels) else raise Error, "No model loaded. Provide model and tokenizer paths." unless @pipeline entities = @pipeline.call(text) end # Merge adjacent entities of the same type if NerRuby.configuration.merge_adjacent entities = merge_adjacent_entities(entities) end # Filter by labels if labels label_syms = labels.map(&:to_sym) entities = entities.select { |e| label_syms.include?(e.label) } end # Filter by per-type or global min_score filter_by_score(entities) end |
#recognize_batch(texts, labels: nil) ⇒ Object
80 81 82 |
# File 'lib/ner_ruby/recognizer.rb', line 80 def recognize_batch(texts, labels: nil) texts.map { |text| recognize(text, labels: labels) } end |