11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
# File 'lib/worte/tokenizer.rb', line 11
def tokenize(str)
reset
str.split("\n").each_with_index do |line, l|
@start_word = -1
@buf_word = ''
@last_token = nil
line.each_char.with_index do |ch, c|
case
when FS.match(ch) || BREAK.include?(ch)
add_token_from_buffer(l)
else
if @start_word == -1
@start_word = c
end
@buf_word += ch
end
end
add_token_from_buffer(l)
if !@last_token
@last_token = Token.new('', [l, 0])
@tokens << @last_token
end
@last_token.newline = true
end
@tokens
end
|