Method: Ting::HanyuPinyinParser#parse_cluster

Defined in:
lib/ting/hanyu_pinyin_parser.rb

#parse_cluster(pinyin) ⇒ Object

Raises:

  • (ArgumentError)


41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/ting/hanyu_pinyin_parser.rb', line 41

def parse_cluster(pinyin)
  syllables = []

  # Chop off one syllable at a time from the end by continuously matching the same regular expression.
  # This ensures the pinyin will be split into only valid pinyin syllables. Because a match capture will
  # only contain the *last* content it has matched, we have to use a loop.
  while match = pinyin_regexp.match(pinyin)
    # If an 'r' at the end was matched, this implies that all other parts of the string were matched as
    # syllables, and this cluster uses erhua.
    if 'r' == match[3]
      syllables << 'er'
      pinyin = pinyin.chop
    end
    last_syllable = match[2] || match[1]
    syllables << last_syllable
    pinyin = pinyin[0, pinyin.length - last_syllable.length]
  end

  raise ArgumentError, "Unparseable pinyin fragment encountered: #{pinyin}" if !pinyin.empty?

  syllables.reverse
end