Class: NER
- Inherits:
-
Object
- Object
- NER
- Defined in:
- lib/rbbt/ner/NER.rb,
lib/rbbt/ner/rner.rb
Direct Known Subclasses
Abner, Banner, ChemicalTagger, NGramPrefixDictionary, OSCAR3, OSCAR4, RegExpNER, TokenTrieNER
Instance Method Summary collapse
- #entities(text, protect = false, *args) ⇒ Object
- #extract(text) ⇒ Object
-
#initialize(model = nil) ⇒ NER
constructor
A new instance of NER.
Constructor Details
#initialize(model = nil) ⇒ NER
Returns a new instance of NER.
167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/rbbt/ner/rner.rb', line 167 def initialize(model = nil) begin require 'CRFPP' rescue Exception require File.join(Rbbt.datadir, 'third_party/crf++/ruby/CRFPP') end model ||= File.join(Rbbt.datadir, + 'ner/model/BC2') @parser = NERFeatures.new(model + '.config') @reverse = @parser.reverse @tagger = CRFPP::Tagger.new("-m #{ model } -v 3 -n2") end |
Instance Method Details
#entities(text, protect = false, *args) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/rbbt/ner/NER.rb', line 6 def entities(text, protect = false, *args) case when Array === text text.collect do |element| matches = entities(element, protect, *args) matches.each{|match| match.offset += element.offset if match.offset and element.offset } matches end.flatten when (Segmented === text and protect) entities(text.split_segments(true), protect, *args) else match(text, *args) end end |
#extract(text) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/rbbt/ner/rner.rb', line 181 def extract(text) features = @parser.text_features(text) @tagger.clear features.each{|feats| @tagger.add(feats.join(" ")) } @tagger.parse found = [] mention = [] @tagger.size.times{|i| label = @tagger.y(i) word = @tagger.x(i,0) if word == ')' mention.push(')') if mention.join =~ /\(/ next end case label when 1 if mention.any? && ( mention.join(" ").is_special? || mention.select{|m| m.is_special?}.any?) found.push(mention) mention = [] end mention.push(word) when 2 mention.push(word) when 0 found.push(mention) if mention.any? mention = [] end } found << mention if mention.any? found.collect{|list| list = list.reverse if @reverse list.join(" ") } end |