Class: RegExpNER
- Includes:
- SimpleDSL
- Defined in:
- lib/rbbt/ner/regexpNER.rb
Instance Attribute Summary collapse
-
#regexps ⇒ Object
Returns the value of attribute regexps.
Class Method Summary collapse
- .match_regexp(text, regexp, type = nil) ⇒ Object
- .match_regexp_hash(text, regexp_hash) ⇒ Object
- .match_regexp_list(text, regexp_list, type = nil) ⇒ Object
Instance Method Summary collapse
- #__define_regexp_hook(name, regexp, *args) ⇒ Object
- #add_regexp(list = {}) ⇒ Object
- #define_regexp(*args, &block) ⇒ Object
-
#initialize(regexps = {}) ⇒ RegExpNER
constructor
A new instance of RegExpNER.
- #match(text) ⇒ Object
- #token_score(*args) ⇒ Object
Methods inherited from NER
Constructor Details
#initialize(regexps = {}) ⇒ RegExpNER
Returns a new instance of RegExpNER.
70 71 72 |
# File 'lib/rbbt/ner/regexpNER.rb', line 70 def initialize(regexps = {}) @regexps = regexps.collect{|p| p } end |
Instance Attribute Details
#regexps ⇒ Object
Returns the value of attribute regexps.
69 70 71 |
# File 'lib/rbbt/ner/regexpNER.rb', line 69 def regexps @regexps end |
Class Method Details
.match_regexp(text, regexp, type = nil) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/rbbt/ner/regexpNER.rb', line 8 def self.match_regexp(text, regexp, type = nil) matches = [] start = 0 while matchdata = text.match(regexp) pre = matchdata.pre_match post = matchdata.post_match match = matchdata[0] if matchdata.captures.any? capture = matchdata.captures.first more_pre, more_post = match.split(/#{capture}/) match = capture pre << more_pre if more_pre post = more_post << post if more_post end if match and not match.empty? NamedEntity.setup(match, start + pre.length, type) matches << match end start += pre.length + match.length text = post end matches end |
.match_regexp_hash(text, regexp_hash) ⇒ Object
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/rbbt/ner/regexpNER.rb', line 51 def self.match_regexp_hash(text, regexp_hash) matches = [] regexp_hash.each do |type, regexp_list| regexp_list = [regexp_list] unless Array === regexp_list chunks = Segment.split(text, matches) chunks.each do |chunk| chunk_offset = chunk.offset match_regexp_list(chunk, regexp_list, type).each do |match| match.offset = match.offset + chunk_offset; matches << match end end end matches end |
.match_regexp_list(text, regexp_list, type = nil) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/rbbt/ner/regexpNER.rb', line 37 def self.match_regexp_list(text, regexp_list, type = nil) matches = [] regexp_list.each do |regexp| chunks = Segment.split(text, matches) chunks.each do |chunk| new_matches = match_regexp(chunk, regexp, type) new_matches.each do |match| match.offset += chunk.offset; matches << match end end end matches end |
Instance Method Details
#__define_regexp_hook(name, regexp, *args) ⇒ Object
78 79 80 |
# File 'lib/rbbt/ner/regexpNER.rb', line 78 def __define_regexp_hook(name, regexp, *args) @regexps << [name, regexp] end |
#add_regexp(list = {}) ⇒ Object
86 87 88 |
# File 'lib/rbbt/ner/regexpNER.rb', line 86 def add_regexp(list = {}) @regexps.concat list.collect end |
#define_regexp(*args, &block) ⇒ Object
82 83 84 |
# File 'lib/rbbt/ner/regexpNER.rb', line 82 def define_regexp(*args, &block) load_config("__define_regexp_hook", *args, &block) end |
#match(text) ⇒ Object
90 91 92 93 94 95 |
# File 'lib/rbbt/ner/regexpNER.rb', line 90 def match(text) matches = RegExpNER.match_regexp_hash(text, @regexps) matches.collect do |m| NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m) end end |
#token_score(*args) ⇒ Object
74 75 76 |
# File 'lib/rbbt/ner/regexpNER.rb', line 74 def token_score(*args) 1 end |