Class: RegExpNER

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/ner/regexpNER.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lexicon, options = {}) ⇒ RegExpNER

Returns a new instance of RegExpNER.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/rbbt/ner/regexpNER.rb', line 33

def initialize(lexicon, options = {})
  options = {:flatten => true, :ignorecase => true, :stopwords => nil}.merge options

  options[:stopwords] = $stopwords if $stopwords && (options[:stopwords].nil? || options[:stopwords] == true)
  options[:stopwords] ||= []

  data = Open.to_hash(lexicon, options)

  @index = {}
  data.collect{|code, names|
    next if code.nil? || code == ""
    if options[:stopwords].any?
      names = names.select{|n| 
        ! options[:stopwords].include?(options[:ignorecase] ? n.downcase : n)
      } 
    end
    @index[code] = RegExpNER.build_re(names, options[:ignorecase])
 }
end

Class Method Details

.build_re(names, ignorecase = true) ⇒ Object



22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/ner/regexpNER.rb', line 22

def self.build_re(names, ignorecase=true)
  res = names.compact.select{|n| n != ""}.
    sort{|a,b| b.length <=> a.length}.
    collect{|n| 
      Regexp.quote(n)
    }

  /\b(#{ res.join("|").gsub(/\\?\s/,'\s+') })\b/
end

.build_re_old(names, ignorecase = true) ⇒ Object



14
15
16
17
18
19
20
# File 'lib/rbbt/ner/regexpNER.rb', line 14

def self.build_re_old(names, ignorecase=true)
  names.compact.select{|n| n != ""}.
    sort{|a,b| b.length <=> a.length}.
    collect{|n| 
      re = Regexp.quote(n).gsub(/\\?\s/,'\s+')
    }
end

.match_re(text, res) ⇒ Object



6
7
8
9
10
11
12
# File 'lib/rbbt/ner/regexpNER.rb', line 6

def self.match_re(text, res)
  res = [res] unless Array === res

  res.collect{|re|
    text.scan(re) 
  }.flatten
end

Instance Method Details

#match(text) ⇒ Object



65
66
67
# File 'lib/rbbt/ner/regexpNER.rb', line 65

def match(text)
  match_hash(text)
end

#match_hash(text) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rbbt/ner/regexpNER.rb', line 53

def match_hash(text)
  return {} if text.nil? || text == ""
  matches = {}
  @index.each{|code, re|
    RegExpNER.match_re(text, re).each{|match|
       matches[code] ||= []
       matches[code] << match
    }
  }
  matches
end