Module: ZhongwenTools::Romanization::Pinyin

Defined in:
lib/zhongwen_tools/romanization/pinyin.rb

Class Method Summary collapse

Class Method Details

.add_hyphens_to_pyn(str) ⇒ Object



100
101
102
103
104
105
106
# File 'lib/zhongwen_tools/romanization/pinyin.rb', line 100

def self.add_hyphens_to_pyn(str)
  results = str.split(' ').map do |s|
    split_pyn(s).join('-')
  end

  results.join(' ')
end

.py?(str) ⇒ Boolean

Public: checks if a string is pinyin.

http://en.wikipedia.org/wiki/Pinyin

Examples

py?('nǐ hǎo')
# => true

Returns Boolean.

Returns:

  • (Boolean)


72
73
74
75
76
77
78
79
80
81
82
# File 'lib/zhongwen_tools/romanization/pinyin.rb', line 72

def self.py?(str)
  if str[ZhongwenTools::Regex.only_tones].nil? && str[/[1-5]/].nil?
    pyn?(str)
  else
    # NOTE: py regex does not include capitals with tones.
    # NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
    regex = /(#{ ZhongwenTools::Regex.punc }|#{ ZhongwenTools::Regex.py }|[\s\-])/
    str = str.gsub('ngu', 'n-gu')
    ZhongwenTools::Caps.downcase(str).gsub(regex, '').strip == ''
  end
end

.pyn?(str) ⇒ Boolean

Public: checks if a string is pinyin.

Examples

pyn?('pin1-yin1')
# => true

Returns Boolean.

Returns:

  • (Boolean)


91
92
93
94
95
96
97
98
# File 'lib/zhongwen_tools/romanization/pinyin.rb', line 91

def self.pyn?(str)
  # FIXME: use strip_punctuation method
  normalized_str = ZhongwenTools::Caps.downcase(str.gsub(ZhongwenTools::Regex.punc, '').gsub(/[\s\-]/, ''))
  pyn_arr = split_pyn(normalized_str).map{ |p| p }

  pyn_matches_properly?(pyn_arr, normalized_str) &&
    are_all_pyn_syllables_complete?(pyn_arr)
end

.split_py(str) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/zhongwen_tools/romanization/pinyin.rb', line 45

def self.split_py(str)
  words = str.split(' ')

  results = words.map do |word|
    word, is_capitalized = normalize_pinyin(word)
    # NOTE: Special Case "fǎnguāng" should be "fǎn" + "guāng"
    #       Special Case "yìnián" should be "yì" + "nián"
    word = word.gsub('ngu', 'n-gu')
      .gsub(/([#{ ZhongwenTools::Regex.only_tones }])(ni[#{ ZhongwenTools::Regex.py_tones['a'] }])/){ "#{ $1 }-#{ $2 }" }
    result = word.split(/['\-]/).flatten.map do |x|
      find_py(x)
    end

    recapitalize(result.flatten, is_capitalized)
  end

  results.flatten
end

.split_pyn(str) ⇒ Object



38
39
40
41
42
43
# File 'lib/zhongwen_tools/romanization/pinyin.rb', line 38

def self.split_pyn(str)
  # FIXME: ignore punctuation
  regex = str[/[1-5]/].nil? ?  /(#{ZhongwenTools::Regex.pinyin_toneless})/ : /(#{ZhongwenTools::Regex.pyn}|#{ZhongwenTools::Regex.pinyin_toneless})/
  # NOTE: p[/[^\-]*/].to_s is 25% faster thang gsub('-', '')
  str.scan(regex).map{ |arr| arr[0].strip[/[^\-]*/].to_s }.flatten
end