Class: NER

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/ner/NER.rb,
lib/rbbt/ner/rner.rb

Instance Method Summary collapse

Constructor Details

#initialize(model = nil) ⇒ NER

Returns a new instance of NER.



167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/rbbt/ner/rner.rb', line 167

def initialize(model = nil)
  begin
    require 'CRFPP'
  rescue Exception
    require File.join(Rbbt.datadir, 'third_party/crf++/ruby/CRFPP')
  end

  model ||= File.join(Rbbt.datadir, + 'ner/model/BC2')

  @parser = NERFeatures.new(model + '.config')
  @reverse = @parser.reverse
  @tagger = CRFPP::Tagger.new("-m #{ model } -v 3 -n2")
end

Instance Method Details

#entities(text, protect = false, *args) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/rbbt/ner/NER.rb', line 6

def entities(text, protect = false, *args)
  case
  when Array === text
    text.collect do |element|
      matches = entities(element, protect, *args)
      matches.each{|match|
        match.offset += element.offset if match.offset and element.offset
      }
      matches
    end.flatten
  when (Segmented === text and protect)
    entities(text.split_segments(true), protect, *args)
  else
    match(text, *args)
  end
end

#extract(text) ⇒ Object



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/rbbt/ner/rner.rb', line 181

def extract(text)
  features = @parser.text_features(text)

  @tagger.clear
  features.each{|feats|
    @tagger.add(feats.join(" "))
  }

  @tagger.parse

  found = []
  mention = []

  @tagger.size.times{|i|
    label = @tagger.y(i)
    word  = @tagger.x(i,0)

    if word == ')' 
      mention.push(')') if mention.join =~ /\(/
      next
    end

    case label
    when 1
      if mention.any? && ( mention.join(" ").is_special? || mention.select{|m| m.is_special?}.any?)
        found.push(mention)
        mention = []
      end
      mention.push(word)
    when 2
      mention.push(word)
    when 0
      found.push(mention) if mention.any?
      mention = []
    end
  }

  found << mention if mention.any?

  found.collect{|list| 
    list = list.reverse if @reverse
    list.join(" ")
  }
end