Class: RegExpNER
- Inherits:
-
Object
- Object
- RegExpNER
- Defined in:
- lib/rbbt/ner/regexpNER.rb
Class Method Summary collapse
- .build_re(names, ignorecase = true) ⇒ Object
- .build_re_old(names, ignorecase = true) ⇒ Object
- .match_re(text, res) ⇒ Object
Instance Method Summary collapse
-
#initialize(lexicon, options = {}) ⇒ RegExpNER
constructor
A new instance of RegExpNER.
- #match(text) ⇒ Object
- #match_hash(text) ⇒ Object
Constructor Details
#initialize(lexicon, options = {}) ⇒ RegExpNER
Returns a new instance of RegExpNER.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/rbbt/ner/regexpNER.rb', line 33 def initialize(lexicon, = {}) = {:flatten => true, :ignorecase => true, :stopwords => nil}.merge [:stopwords] = $stopwords if $stopwords && ([:stopwords].nil? || [:stopwords] == true) [:stopwords] ||= [] data = Open.to_hash(lexicon, ) @index = {} data.collect{|code, names| next if code.nil? || code == "" if [:stopwords].any? names = names.select{|n| ! [:stopwords].include?([:ignorecase] ? n.downcase : n) } end @index[code] = RegExpNER.build_re(names, [:ignorecase]) } end |
Class Method Details
.build_re(names, ignorecase = true) ⇒ Object
22 23 24 25 26 27 28 29 30 |
# File 'lib/rbbt/ner/regexpNER.rb', line 22 def self.build_re(names, ignorecase=true) res = names.compact.select{|n| n != ""}. sort{|a,b| b.length <=> a.length}. collect{|n| Regexp.quote(n) } /\b(#{ res.join("|").gsub(/\\?\s/,'\s+') })\b/ end |
.build_re_old(names, ignorecase = true) ⇒ Object
14 15 16 17 18 19 20 |
# File 'lib/rbbt/ner/regexpNER.rb', line 14 def self.build_re_old(names, ignorecase=true) names.compact.select{|n| n != ""}. sort{|a,b| b.length <=> a.length}. collect{|n| re = Regexp.quote(n).gsub(/\\?\s/,'\s+') } end |
.match_re(text, res) ⇒ Object
6 7 8 9 10 11 12 |
# File 'lib/rbbt/ner/regexpNER.rb', line 6 def self.match_re(text, res) res = [res] unless Array === res res.collect{|re| text.scan(re) }.flatten end |
Instance Method Details
#match(text) ⇒ Object
65 66 67 |
# File 'lib/rbbt/ner/regexpNER.rb', line 65 def match(text) match_hash(text) end |