Class: Mitie::Document
- Inherits:
-
Object
- Object
- Mitie::Document
- Defined in:
- lib/mitie/document.rb
Instance Attribute Summary collapse
-
#model ⇒ Object
readonly
Returns the value of attribute model.
-
#text ⇒ Object
readonly
Returns the value of attribute text.
Instance Method Summary collapse
- #entities ⇒ Object
-
#initialize(model, text) ⇒ Document
constructor
A new instance of Document.
- #tokens ⇒ Object
- #tokens_with_offset ⇒ Object
Constructor Details
#initialize(model, text) ⇒ Document
Returns a new instance of Document.
5 6 7 8 |
# File 'lib/mitie/document.rb', line 5 def initialize(model, text) @model = model @text = text end |
Instance Attribute Details
#model ⇒ Object (readonly)
Returns the value of attribute model.
3 4 5 |
# File 'lib/mitie/document.rb', line 3 def model @model end |
#text ⇒ Object (readonly)
Returns the value of attribute text.
3 4 5 |
# File 'lib/mitie/document.rb', line 3 def text @text end |
Instance Method Details
#entities ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/mitie/document.rb', line 34 def entities @entities ||= begin entities = [] tokens = tokens_with_offset detections = FFI.mitie_extract_entities(pointer, tokens_ptr) num_detections = FFI.mitie_ner_get_num_detections(detections) num_detections.times do |i| pos = FFI.mitie_ner_get_detection_position(detections, i) len = FFI.mitie_ner_get_detection_length(detections, i) tag = FFI.(detections, i).to_s score = FFI.mitie_ner_get_detection_score(detections, i) tok = tokens[pos, len] offset = tok[0][1] entity = {} if offset finish = tok[-1][1] + tok[-1][0].bytesize entity[:text] = text.byteslice(offset...finish) else entity[:text] = tok.map(&:first) end entity[:tag] = tag entity[:score] = score entity[:offset] = offset if offset entity[:token_index] = pos entity[:token_length] = len entities << entity end entities ensure FFI.mitie_free(detections) if detections end end |
#tokens ⇒ Object
10 11 12 |
# File 'lib/mitie/document.rb', line 10 def tokens @tokens ||= tokens_with_offset.map(&:first) end |
#tokens_with_offset ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/mitie/document.rb', line 14 def tokens_with_offset @tokens_with_offset ||= begin if text.is_a?(Array) # offsets are unknown when given tokens text.map { |v| [v, nil] } else i = 0 tokens = [] loop do token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr break if token.null? offset = (offsets_ptr.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!") tokens << [token.to_s.force_encoding(text.encoding), offset] i += 1 end tokens end end end |