Module: CabochaParser

Defined in:
lib/kokugo_tagger/parser.rb

Class Method Summary collapse

Class Method Details

.parse(line) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/kokugo_tagger/parser.rb', line 5

def parse(line)
	case line.chomp
	when /^#/
		return parse_excab(line)
	when /^\*/
		return parse_chunk(line)
	when 'EOS'
		return {type: 'EOS'}
	when ''
		return nil
	else
		return parse_token(line)
	end
end

.parse_chunk(line) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/kokugo_tagger/parser.rb', line 32

def parse_chunk(line)
	null, id, dep, part, score = line.chomp.split("\s")
	link, rel = dep[0..-2], dep[-1]
	head, func = part.split('/')
	chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score}
	return chunk
end

.parse_excab(line) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/kokugo_tagger/parser.rb', line 19

def parse_excab(line)
	line = line.gsub('\"', '""')
	null, type, *data = CSV.parse_line(line.chomp, col_sep:' ')
	case type
	when 'SEGMENT', 'SEGMENT_S', 'LINK', 'LINK_S'
		excab = {type: type, name: data[0], start: data[1].to_i, end: data[2].to_i, comment: data[3]}
	when 'GROUP', 'GROUP_S'
		excab = {type: type, name: data[0], member: data[1..-2], comment: data[-1]}
	when 'ATTR'
		excab = {type: type, name: data[0], value: data[1]}
	end
	return excab
end

.parse_token(line) ⇒ Object



39
40
41
42
43
44
45
# File 'lib/kokugo_tagger/parser.rb', line 39

def parse_token(line)
	text, attrs, ne = line.chomp.split("\t")
	attrs = CSV.parse_line(attrs, col_sep:',')
	pos = attrs[0, 4].delete_if{|item| item.empty?}.join('-')
	token = {type: 'TOKEN', text: text, ne: ne, pos: pos, ctype: attrs[4], cform: attrs[5]}
	return token
end