Class: NerRuby::Decoder

Inherits:
Object
  • Object
show all
Defined in:
lib/ner_ruby/decoder.rb

Constant Summary collapse

LABEL_MAPS =
{
  "bert-base-NER" => {
    0 => "O",
    1 => "B-MISC",
    2 => "I-MISC",
    3 => "B-PER",
    4 => "I-PER",
    5 => "B-ORG",
    6 => "I-ORG",
    7 => "B-LOC",
    8 => "I-LOC"
  }
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(label_map: nil) ⇒ Decoder



19
20
21
# File 'lib/ner_ruby/decoder.rb', line 19

def initialize(label_map: nil)
  @label_map = label_map || LABEL_MAPS["bert-base-NER"]
end

Instance Method Details

#decode(tokens, predictions, scores: nil, original_text: nil) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/ner_ruby/decoder.rb', line 23

def decode(tokens, predictions, scores: nil, original_text: nil)
  entities = []
  current_entity = nil

  tokens.each_with_index do |token, i|
    next if special_token?(token)

    label = @label_map[predictions[i]] || "O"
    score = scores ? scores[i] : 1.0

    if label.start_with?("B-")
      entities << build_entity(current_entity, original_text) if current_entity
      entity_type = label.sub("B-", "")
      current_entity = { raw_tokens: [token], label: entity_type, scores: [score] }
    elsif label.start_with?("I-") && current_entity
      entity_type = label.sub("I-", "")
      if entity_type == current_entity[:label]
        current_entity[:raw_tokens] << token
        current_entity[:scores] << score
      else
        entities << build_entity(current_entity, original_text)
        current_entity = nil
      end
    else
      entities << build_entity(current_entity, original_text) if current_entity
      current_entity = nil
    end
  end

  entities << build_entity(current_entity, original_text) if current_entity
  entities
end