Module: SpaCy

Defined in:
lib/rbbt/nlp/spaCy.rb

Constant Summary collapse

PROPERTIES =
%w(lemma_ is_punct is_space shape_ pos_ tag_)

Class Method Summary collapse

Class Method Details

.segments(text, lang = 'en') ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/nlp/spaCy.rb', line 23

def self.segments(text, lang = 'en')
  docid = text.docid if Document === text 
  corpus = text.corpus if Document === text 
  tokens = self.tokens(text, lang).collect do |token|
    info = {}
    PROPERTIES.each do |p|
      info[p] = token.instance_eval(p.to_s)
    end
    info[:type] = "SpaCy"
    info[:offset] = token.idx
    info[:dep] = token.dep_ + "->" + token.head.idx.to_s
    info[:docid] = docid if docid
    info[:corpus] = corpus if corpus
    SpaCyToken.setup(token.text, info)
  end
  SpaCyToken.setup(tokens, :corpus => corpus)
end

.tokens(text, lang = 'en') ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/rbbt/nlp/spaCy.rb', line 10

def self.tokens(text, lang = 'en')

  tokens = []
  RbbtPython.run 'spacy' do
    nlp = spacy.load(lang)
    doc = nlp.call(text)
    doc.__len__.times do |i|
      tokens << doc.__getitem__(i)
    end
  end
  tokens
end