Class: Furigana::Mecab

Inherits:
Object
  • Object
show all
Defined in:
lib/furigana/mecab.rb

Class Method Summary collapse

Class Method Details

.tokenize(text) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/furigana/mecab.rb', line 6

def tokenize(text)
  surface_form, reading = 0, 1
  stdout, stderr, status = Open3.capture3("mecab -Ochasen", :stdin_data => sanitize_text(text))

  lines = split_stdout(stdout)

  lines.reduce([]) do |kanji_tokens, line|
    return kanji_tokens if line == 'EOS'

    columns = line.split("\t")
    kanji_tokens << {
      :surface_form => columns[surface_form],
      :reading      => columns[reading]
    }
  end
end