23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/rbbt/nlp/spaCy.rb', line 23
def self.segments(text, lang = 'en')
docid = text.docid if Document === text
corpus = text.corpus if Document === text
tokens = self.tokens(text, lang).collect do |token|
info = {}
PROPERTIES.each do |p|
info[p] = token.instance_eval(p.to_s)
end
info[:type] = "SpaCy"
info[:offset] = token.idx
info[:dep] = token.dep_ + "->" + token.head.idx.to_s
info[:docid] = docid if docid
info[:corpus] = corpus if corpus
SpaCyToken.setup(token.text, info)
end
SpaCyToken.setup(tokens, :corpus => corpus)
end
|