Module: Token

Defined in:
lib/rbbt/ner/segment/token.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#offsetObject

Returns the value of attribute offset.



5
6
7
# File 'lib/rbbt/ner/segment/token.rb', line 5

def offset
  @offset
end

#originalObject

Returns the value of attribute original.



5
6
7
# File 'lib/rbbt/ner/segment/token.rb', line 5

def original
  @original
end

Class Method Details

.all_annotationsObject



7
8
9
# File 'lib/rbbt/ner/segment/token.rb', line 7

def self.all_annotations
  [:offset, :original]
end

.setup(text, start, original = nil) ⇒ Object



11
12
13
14
15
16
# File 'lib/rbbt/ner/segment/token.rb', line 11

def self.setup(text, start, original = nil)
  text.extend Token
  text.offset = start
  text.original = original
  text
end

.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/rbbt/ner/segment/token.rb', line 34

def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)

  tokens = []
  while matchdata = text.match(split_at)
    tokens << Token.setup(matchdata.pre_match, start) unless matchdata.pre_match.empty?
    tokens << Token.setup(matchdata.captures.first, start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
    start += matchdata.end(0)
    text = matchdata.post_match
  end

  tokens << Token.setup(text, start) unless text.empty?

  tokens
end

Instance Method Details

#endObject



26
27
28
# File 'lib/rbbt/ner/segment/token.rb', line 26

def end
  offset + self.length - 1
end

#idObject



22
23
24
# File 'lib/rbbt/ner/segment/token.rb', line 22

def id
  Misc.hash2md5 info.merge :self => self
end

#infoObject



18
19
20
# File 'lib/rbbt/ner/segment/token.rb', line 18

def info
  {:original => original, :offset => offset}
end

#rangeObject



30
31
32
# File 'lib/rbbt/ner/segment/token.rb', line 30

def range
  (offset..self.end)
end