23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
# File 'lib/ner_ruby/decoder.rb', line 23
def decode(tokens, predictions, scores: nil, original_text: nil)
entities = []
current_entity = nil
tokens.each_with_index do |token, i|
next if special_token?(token)
label = @label_map[predictions[i]] || "O"
score = scores ? scores[i] : 1.0
if label.start_with?("B-")
entities << build_entity(current_entity, original_text) if current_entity
entity_type = label.sub("B-", "")
current_entity = { raw_tokens: [token], label: entity_type, scores: [score] }
elsif label.start_with?("I-") && current_entity
entity_type = label.sub("I-", "")
if entity_type == current_entity[:label]
current_entity[:raw_tokens] << token
current_entity[:scores] << score
else
entities << build_entity(current_entity, original_text)
current_entity = nil
end
else
entities << build_entity(current_entity, original_text) if current_entity
current_entity = nil
end
end
entities << build_entity(current_entity, original_text) if current_entity
entities
end
|