Module: Token
- Defined in:
- lib/rbbt/ner/segment/token.rb
Instance Attribute Summary collapse
-
#offset ⇒ Object
Returns the value of attribute offset.
-
#original ⇒ Object
Returns the value of attribute original.
Class Method Summary collapse
- .all_annotations ⇒ Object
- .setup(text, start, original = nil) ⇒ Object
- .tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0) ⇒ Object
Instance Method Summary collapse
Instance Attribute Details
#offset ⇒ Object
Returns the value of attribute offset.
5 6 7 |
# File 'lib/rbbt/ner/segment/token.rb', line 5 def offset @offset end |
#original ⇒ Object
Returns the value of attribute original.
5 6 7 |
# File 'lib/rbbt/ner/segment/token.rb', line 5 def original @original end |
Class Method Details
.all_annotations ⇒ Object
7 8 9 |
# File 'lib/rbbt/ner/segment/token.rb', line 7 def self.all_annotations [:offset, :original] end |
.setup(text, start, original = nil) ⇒ Object
11 12 13 14 15 16 |
# File 'lib/rbbt/ner/segment/token.rb', line 11 def self.setup(text, start, original = nil) text.extend Token text.offset = start text.original = original text end |
.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/rbbt/ner/segment/token.rb', line 34 def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0) tokens = [] while matchdata = text.match(split_at) tokens << Token.setup(matchdata.pre_match, start) unless matchdata.pre_match.empty? tokens << Token.setup(matchdata.captures.first, start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty? start += matchdata.end(0) text = matchdata.post_match end tokens << Token.setup(text, start) unless text.empty? tokens end |
Instance Method Details
#end ⇒ Object
26 27 28 |
# File 'lib/rbbt/ner/segment/token.rb', line 26 def end offset + self.length - 1 end |
#id ⇒ Object
22 23 24 |
# File 'lib/rbbt/ner/segment/token.rb', line 22 def id Misc.hash2md5 info.merge :self => self end |
#info ⇒ Object
18 19 20 |
# File 'lib/rbbt/ner/segment/token.rb', line 18 def info {:original => original, :offset => offset} end |
#range ⇒ Object
30 31 32 |
# File 'lib/rbbt/ner/segment/token.rb', line 30 def range (offset..self.end) end |