Class: Mitie::NER

Inherits:
Object
  • Object
show all
Defined in:
lib/mitie/ner.rb

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ NER

Returns a new instance of NER.



3
4
5
6
# File 'lib/mitie/ner.rb', line 3

def initialize(path)
  @pointer = FFI.mitie_load_named_entity_extractor(path)
  ObjectSpace.define_finalizer(self, self.class.finalize(pointer))
end

Instance Method Details

#entities(text) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/mitie/ner.rb', line 36

def entities(text)
  entities = []
  tokens, tokens_ptr = tokens_with_offset_with_ptr(text)
  detections = FFI.mitie_extract_entities(pointer, tokens_ptr)
  num_detections = FFI.mitie_ner_get_num_detections(detections)
  num_detections.times do |i|
    pos = FFI.mitie_ner_get_detection_position(detections, i)
    len = FFI.mitie_ner_get_detection_length(detections, i)
    tag = FFI.mitie_ner_get_detection_tagstr(detections, i).to_s
    score = FFI.mitie_ner_get_detection_score(detections, i)
    tok = tokens[pos, len]
    offset = tok[0][1]
    finish = tok[-1][1] + tok[-1][0].size
    entities << {
      text: text[offset...finish],
      tag: tag,
      score: score,
      offset: offset
    }
  end
  entities
ensure
  FFI.mitie_free(tokens_ptr) if tokens_ptr
  FFI.mitie_free(detections) if detections
end

#tagsObject



8
9
10
11
12
# File 'lib/mitie/ner.rb', line 8

def tags
  FFI.mitie_get_num_possible_ner_tags(pointer).times.map do |i|
    FFI.mitie_get_named_entity_tagstr(pointer, i).to_s
  end
end

#tokens(text) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/mitie/ner.rb', line 14

def tokens(text)
  tokens = []
  ptr = FFI.mitie_tokenize(text)
  i = 0
  loop do
    token = (ptr + i * Fiddle::SIZEOF_VOIDP).ptr
    break if token.null?
    tokens << token.to_s.force_encoding(text.encoding)
    i += 1
  end
  tokens
ensure
  FFI.mitie_free(ptr) if ptr
end

#tokens_with_offset(text) ⇒ Object



29
30
31
32
33
34
# File 'lib/mitie/ner.rb', line 29

def tokens_with_offset(text)
  tokens, ptr = tokens_with_offset_with_ptr(text)
  tokens
ensure
  FFI.mitie_free(ptr) if ptr
end