Class: JapaneseNames::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/japanese_names/parser.rb

Overview

Provides methods for parsing Japanese name strings.

Instance Method Summary collapse

Instance Method Details

#split(kanji, kana) ⇒ Object

Given a kanji and kana representation of a name splits into to family/given names.

The choice to prioritize family name is arbitrary. Further analysis is needed for whether given or family name should be prioritized.

Returns Array [[kanji_fam, kanji_giv], [kana_fam, kana_giv]] if there was a match. Returns nil if there was no match.



16
17
18
# File 'lib/japanese_names/parser.rb', line 16

def split(kanji, kana)
  split_fam(kanji, kana) || split_giv(kanji, kana)
end

#split_fam(kanji, kana) ⇒ Object



31
32
33
34
35
36
37
38
39
40
# File 'lib/japanese_names/parser.rb', line 31

def split_fam(kanji, kana)
  return nil unless kanji && kana
  kanji, kana = kanji.strip, kana.strip
  dict = Enamdict.find(kanji: window_left(kanji))
  dict.sort!{|x,y| y[0].size <=> x[0].size}
  kana_match = nil
  if match = dict.detect{|m| kana_match = kana[/^#{hk m[1]}/]}
    return [[match[0], mask_left(kanji, match[0])],[kana_match, mask_left(kana, kana_match)]]
  end
end

#split_giv(kanji, kana) ⇒ Object



20
21
22
23
24
25
26
27
28
29
# File 'lib/japanese_names/parser.rb', line 20

def split_giv(kanji, kana)
  return nil unless kanji && kana
  kanji, kana = kanji.strip, kana.strip
  dict = Enamdict.find(kanji: window_right(kanji))
  dict.sort!{|x,y| y[0].size <=> x[0].size}
  kana_match = nil
  if match = dict.detect{|m| kana_match = kana[/#{hk m[1]}$/]}
    return [[mask_right(kanji, match[0]), match[0]],[mask_right(kana, kana_match), kana_match]]
  end
end