Class: NerRuby::Recognizer

Inherits:
Object
  • Object
show all
Defined in:
lib/ner_ruby/recognizer.rb

Constant Summary collapse

@@cache =
ModelCache.new

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model: nil, tokenizer: nil, label_map: nil, backend: nil, provider: nil, api_key: nil) ⇒ Recognizer

Returns a new instance of Recognizer.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/ner_ruby/recognizer.rb', line 7

def initialize(model: nil, tokenizer: nil, label_map: nil, backend: nil, provider: nil, api_key: nil)
  config = NerRuby.configuration

  if backend == :api
    @api_model = Models::Api.new(provider: provider || :openai, api_key: api_key)
  else
    model_path = model || config.default_model_path
    tokenizer_path = tokenizer || config.default_tokenizer_path

    if model_path && tokenizer_path
      raise ModelNotFoundError, "Model not found: #{model_path}" unless File.exist?(model_path)

      if config.enable_cache && @@cache.has?(model_path)
        cached = @@cache.get(model_path)
        @model = cached[:model]
        @tokenizer = cached[:tokenizer]
      else
        @model = Models::Onnx.new(model_path: model_path)
        @tokenizer = load_tokenizer(tokenizer_path)
        @@cache.set(model_path, { model: @model, tokenizer: @tokenizer }) if config.enable_cache
      end

      detected_label_map = label_map || @model.label_map
      @decoder = Decoder.new(label_map: detected_label_map)
      @pipeline = Pipeline.new(model: @model, tokenizer: @tokenizer, decoder: @decoder)
    end
  end
end

Class Method Details

.clear_cacheObject



84
85
86
# File 'lib/ner_ruby/recognizer.rb', line 84

def self.clear_cache
  @@cache.clear
end

.from_pretrained(name) ⇒ Object

Load a recognizer from a registered model name

Raises:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/ner_ruby/recognizer.rb', line 37

def self.from_pretrained(name)
  config = NerRuby.configuration
  model_info = config.model_registry.get(name)
  raise Error, "Unknown model: #{name}. Available: #{config.model_registry.available.join(', ')}" unless model_info

  if model_info[:model_path] && model_info[:tokenizer_path]
    new(
      model: model_info[:model_path],
      tokenizer: model_info[:tokenizer_path],
      label_map: model_info[:label_map]
    )
  else
    # API-based fallback
    new(backend: :api, provider: :huggingface)
  end
end

Instance Method Details

#recognize(text, labels: nil) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/ner_ruby/recognizer.rb', line 54

def recognize(text, labels: nil)
  return [] if text.nil? || text.strip.empty?
  validate_labels!(labels) if labels

  if @api_model
    entities = @api_model.recognize(text, labels: labels)
  else
    raise Error, "No model loaded. Provide model and tokenizer paths." unless @pipeline
    entities = @pipeline.call(text)
  end

  # Merge adjacent entities of the same type
  if NerRuby.configuration.merge_adjacent
    entities = merge_adjacent_entities(entities)
  end

  # Filter by labels
  if labels
    label_syms = labels.map(&:to_sym)
    entities = entities.select { |e| label_syms.include?(e.label) }
  end

  # Filter by per-type or global min_score
  filter_by_score(entities)
end

#recognize_batch(texts, labels: nil) ⇒ Object



80
81
82
# File 'lib/ner_ruby/recognizer.rb', line 80

def recognize_batch(texts, labels: nil)
  texts.map { |text| recognize(text, labels: labels) }
end