Module: CabochaParser
- Defined in:
- lib/kokugo_tagger/parser.rb
Class Method Summary collapse
- .parse(line) ⇒ Object
- .parse_chunk(line) ⇒ Object
- .parse_excab(line) ⇒ Object
- .parse_token(line) ⇒ Object
Class Method Details
.parse(line) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/kokugo_tagger/parser.rb', line 5 def parse(line) case line.chomp when /^#/ return parse_excab(line) when /^\*/ return parse_chunk(line) when 'EOS' return {type: 'EOS'} when '' return nil else return parse_token(line) end end |
.parse_chunk(line) ⇒ Object
32 33 34 35 36 37 38 |
# File 'lib/kokugo_tagger/parser.rb', line 32 def parse_chunk(line) null, id, dep, part, score = line.chomp.split("\s") link, rel = dep[0..-2], dep[-1] head, func = part.split('/') chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score} return chunk end |
.parse_excab(line) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/kokugo_tagger/parser.rb', line 19 def parse_excab(line) line = line.gsub('\"', '""') null, type, *data = CSV.parse_line(line.chomp, col_sep:' ') case type when 'SEGMENT', 'SEGMENT_S', 'LINK', 'LINK_S' excab = {type: type, name: data[0], start: data[1].to_i, end: data[2].to_i, comment: data[3]} when 'GROUP', 'GROUP_S' excab = {type: type, name: data[0], member: data[1..-2], comment: data[-1]} when 'ATTR' excab = {type: type, name: data[0], value: data[1]} end return excab end |
.parse_token(line) ⇒ Object
39 40 41 42 43 44 45 |
# File 'lib/kokugo_tagger/parser.rb', line 39 def parse_token(line) text, attrs, ne = line.chomp.split("\t") attrs = CSV.parse_line(attrs, col_sep:',') pos = attrs[0, 4].delete_if{|item| item.empty?}.join('-') token = {type: 'TOKEN', text: text, ne: ne, pos: pos, ctype: attrs[4], cform: attrs[5]} return token end |