Module: Romaji

Defined in:
lib/romaji.rb,
lib/romaji/version.rb,
lib/romaji/constants.rb,
lib/romaji/string_extension.rb

Defined Under Namespace

Modules: StringExtension

Constant Summary collapse

VERSION =
'0.2.2'
ROMAJI_MAX_LENGTH =
3
KANA_MAX_LENGTH =
2
ROMAJI2KANA =
{
  "a"=>"", "i"=>"", "u"=>"", "e"=>"", "o"=>"", "-"=>"",
  "xa"=>"", "xi"=>"", "xu"=>"", "xe"=>"", "xo"=>"",
  "ka"=>"", "ki"=>"", "ku"=>"", "ke"=>"", "ko"=>"",
  "ca"=>"", "cu"=>"", "co"=>"",
  "ga"=>"", "gi"=>"", "gu"=>"", "ge"=>"", "go"=>"",
  "sa"=>"", "si"=>"", "su"=>"", "se"=>"", "so"=>"",
  "za"=>"", "zi"=>"", "zu"=>"", "ze"=>"", "zo"=>"",
  "ja"=>"ジャ","ji"=>"", "ju"=>"ジュ","je"=>"ジェ","jo"=>"ジョ",
  "ta"=>"", "ti"=>"", "tu"=>"", "te"=>"", "to"=>"",
  "da"=>"", "di"=>"", "du"=>"", "de"=>"", "do"=>"",
  "na"=>"", "ni"=>"", "nu"=>"", "ne"=>"", "no"=>"",
  "ha"=>"", "hi"=>"", "hu"=>"", "he"=>"", "ho"=>"",
  "ba"=>"", "bi"=>"", "bu"=>"", "be"=>"", "bo"=>"",
  "pa"=>"", "pi"=>"", "pu"=>"", "pe"=>"", "po"=>"",
  "va"=>"ヴァ","vi"=>"ヴィ","vu"=>"", "ve"=>"ヴェ","vo"=>"ヴォ",
  "fa"=>"ファ","fi"=>"フィ","fu"=>"", "fe"=>"フェ","fo"=>"フォ",
  "ma"=>"", "mi"=>"", "mu"=>"", "me"=>"", "mo"=>"",
  "ya"=>"", "yi"=>"", "yu"=>"", "ye"=>"イェ", "yo"=>"",
  "ra"=>"", "ri"=>"", "ru"=>"", "re"=>"", "ro"=>"",
  "la"=>"", "li"=>"", "lu"=>"", "le"=>"", "lo"=>"",
  "wa"=>"", "wi"=>"ウィ", "wu"=>"", "we"=>"ウェ", "wo"=>"",
  "nn"=>"", "n"=>"",
  "tsu"=>"",
  "xka"=>"", "xke"=>"",
  "xwa"=>"", "xtsu"=>"",   "xya"=>"",  "xyu"=>"",  "xyo"=>"",
  "kya"=>"キャ", "kyi"=>"キィ", "kyu"=>"キュ", "kye"=>"キェ", "kyo"=>"キョ",
  "gya"=>"ギャ", "gyi"=>"ギィ", "gyu"=>"ギュ", "gye"=>"ギェ", "gyo"=>"ギョ",
  "sya"=>"シャ", "syi"=>"シィ", "syu"=>"シュ", "sye"=>"シェ", "syo"=>"ショ",
  "sha"=>"シャ", "shi"=>"",  "shu"=>"シュ", "she"=>"シェ", "sho"=>"ショ",
  "zya"=>"ジャ", "zyi"=>"ジィ", "zyu"=>"ジュ", "zye"=>"ジェ", "zyo"=>"ジョ",
  "jya"=>"ジャ", "jyi"=>"ジィ", "jyu"=>"ジュ", "jye"=>"ジェ", "jyo"=>"ジョ",
  "tya"=>"チャ", "tyi"=>"チィ", "tyu"=>"チュ", "tye"=>"チェ", "tyo"=>"チョ",
  "cya"=>"チャ", "cyi"=>"チィ", "cyu"=>"チュ", "cye"=>"チェ", "cyo"=>"チョ",
  "cha"=>"チャ", "chi"=>"",  "chu"=>"チュ", "che"=>"チェ", "cho"=>"チョ",
  "tha"=>"テャ", "thi"=>"ティ", "thu"=>"テュ", "the"=>"テェ", "tho"=>"テョ",
  "dya"=>"ヂャ", "dyi"=>"ヂィ", "dyu"=>"ヂュ", "dye"=>"ヂェ", "dyo"=>"ヂョ",
  "dha"=>"デャ", "dhi"=>"ディ", "dhu"=>"デュ", "dhe"=>"デェ", "dho"=>"デョ",
  "nya"=>"ニャ", "nyi"=>"ニィ", "nyu"=>"ニュ", "nye"=>"ニェ", "nyo"=>"ニョ",
  "hya"=>"ヒャ", "hyi"=>"ヒィ", "hyu"=>"ヒュ", "hye"=>"ヒェ", "hyo"=>"ヒョ",
  "bya"=>"ビャ", "byi"=>"ビィ", "byu"=>"ビュ", "bye"=>"ビェ", "byo"=>"ビョ",
  "pya"=>"ピャ", "pyi"=>"ピィ", "pyu"=>"ピュ", "pye"=>"ピェ", "pyo"=>"ピョ",
  "mya"=>"ミャ", "myi"=>"ミィ", "myu"=>"ミュ", "mye"=>"ミェ", "myo"=>"ミョ",
  "rya"=>"リャ", "ryi"=>"リィ", "ryu"=>"リュ", "rye"=>"リェ", "ryo"=>"リョ",
  "lya"=>"リャ", "lyi"=>"リィ", "lyu"=>"リュ", "lye"=>"リェ", "lyo"=>"リョ"
}
KANA2ROMAJI =
{
  '' => ['a'], '' => ['i'], '' => ['u'], '' => ['e'], '' => ['o'],
  '' => ['ka', 'ca'], '' => ['ki'], '' => ['ku'], '' => ['ke'], '' => ['ko', 'co'],
  '' => ['sa'], '' => ['shi', 'si'], '' => ['su'], '' => ['se'], '' => ['so'],
  '' => ['ta'], '' => ['chi', 'ti', 'ci'], '' => ['tsu', 'tu'], '' => ['te'], '' => ['to'],
  '' => ['na'], '' => ['ni'], '' => ['nu'], '' => ['ne'], '' => ['no'],
  '' => ['ha'], '' => ['hi'], '' => ['fu', 'hu'], '' => ['he'], '' => ['ho'],
  '' => ['ma'], '' => ['mi'], '' => ['mu'], '' => ['me'], '' => ['mo'],
  '' => ['ya'], '' => ['yu'], '' => ['yo'],
  '' => ['ra'], '' => ['ri'], '' => ['ru'], '' => ['re'], '' => ['ro'],
  '' => ['wa'], 'ウィ' => ['wi'], 'ウェ' => ['we'], '' => ['wo'], '' => ['-'],
  '' => ['ga'], '' => ['gi'], '' => ['gu'], '' => ['ge'], '' => ['go'],
  '' => ['za'], '' => ['ji', 'zi'], '' => ['zu'], '' => ['ze'], '' => ['zo'],
  '' => ['da'], '' => ['di'], '' => ['du'], '' => ['de'], '' => ['do'],
  '' => ['ba'], '' => ['bi'], '' => ['bu'], '' => ['be'], '' => ['bo'],
  '' => ['pa'], '' => ['pi'], '' => ['pu'], '' => ['pe'], '' => ['po'],
  'キャ' => ['kya'], 'キュ' => ['kyu'], 'キェ' => ['kye'], 'キョ' => ['kyo'],
  'ギャ' => ['gya'], 'ギュ' => ['gyu'], 'ギェ' => ['gye'], 'ギョ' => ['gyo'],
  'シャ' => ['sha', 'sya'], 'シュ' => ['shu', 'syu'], 'シェ' => ['she', 'sye'], 'ショ' => ['sho', 'syo'],
  'ジャ' => ['ja', 'jya', 'zya'], 'ジュ' => ['ju', 'jyu', 'zyu'], 'ジェ' => ['je', 'jye', 'zye'], 'ジョ' => ['jo', 'jyo', 'zyo'],
  'チャ' => ['cha', 'cya', 'tya'], 'チュ' => ['chu', 'cyu', 'tyu'], 'チェ' => ['che', 'cye', 'tye'], 'チョ' => ['cho', 'cyo', 'tyo'],
  'ヂャ' => ['dya'], 'ヂュ' => ['dyu'], 'ヂェ' => ['dye'], 'ヂョ' => ['dyo'],
  'テャ' => ['tha'], 'ティ' => ['thi'], 'テュ' => ['thu'], 'テョ' => ['tho'],
  'ニャ' => ['nya'], 'ニュ' => ['nyu'], 'ニェ' => ['nye'], 'ニョ' => ['nyo'],
  'ヒャ' => ['hya'], 'ヒュ' => ['hyu'], 'ヒェ' => ['hye'], 'ヒョ' => ['hyo'],
  'ビャ' => ['bya'], 'ビュ' => ['byu'], 'ビェ' => ['bye'], 'ビョ' => ['byo'],
  'ピャ' => ['pya'], 'ピュ' => ['pyu'], 'ピェ' => ['pye'], 'ピョ' => ['pyo'],
  'ファ' => ['fa'], 'フィ' => ['fi'], 'フェ' => ['fe'], 'フォ' => ['fo'],
  'ミャ' => ['mya'], 'ミュ' => ['myu'], 'ミェ' => ['mye'], 'ミョ' => ['myo'],
  'リャ' => ['rya'], 'リュ' => ['ryu'], 'リェ' => ['rye'], 'リョ' => ['ryo'],
  'ヴァ' => ['va'], 'ヴィ' => ['vi'], '' => ['vu'], 'ヴェ' => ['ve'], 'ヴォ' => ['vo'],
  'デャ' => ['dha'], 'ディ' => ['dhi'], 'デュ' => ['dhu'], 'デェ' => ['dhe'], 'デョ' => ['dho']
}

Class Method Summary collapse

Class Method Details

.hira2kata(text) ⇒ Object



101
102
103
# File 'lib/romaji.rb', line 101

def self.hira2kata(text)
  NKF.nkf("--katakana -Ww", text)
end

.kana2romaji(text) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/romaji.rb', line 52

def self.kana2romaji(text)
  text = hira2kata(self.normalize(text))
  pos = 0
  k = nil
  romaji = ''
  chars =  text.split(//u)
  while true
    # ン
    if chars[pos] == ''
      next_char_romaji = KANA2ROMAJI[chars[pos + 1]]
      if next_char_romaji && ['p', 'b', 'm'].include?(next_char_romaji[0].slice(0,1))
        romaji += 'm'
      else
        romaji += 'n'
      end
      pos += 1
      next
    end

    # ッ
    if chars[pos] == ''
      next_char_romaji = KANA2ROMAJI[chars[pos + 1]]
      if ['a', 'i', 'u', 'e', 'o', 'n', nil].include?(chars[pos + 1]) || next_char_romaji.nil?
        romaji += 'xtsu'
      else
        romaji += (next_char_romaji[0].slice(0,1))
      end
      pos += 1
      next
    end

    ROMAJI_MAX_LENGTH.downto(1) do |t|
      substr = chars.slice(pos, t).join
      k = KANA2ROMAJI[substr]
      if k
        romaji += k[0]
        pos += t
        break
      end
    end
    unless k
      romaji += chars.slice(pos, 1).join
      pos += 1
    end
    break if pos >= chars.size
  end
  romaji
end

.kata2hira(text) ⇒ Object



105
106
107
# File 'lib/romaji.rb', line 105

def self.kata2hira(text)
  NKF.nkf("--hiragana -Ww", text)
end

.normalize(text) ⇒ Object



109
110
111
# File 'lib/romaji.rb', line 109

def self.normalize(text)
  NKF.nkf('-mZ0Wwh0', text).downcase
end

.romaji2kana(text, options = {}) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/romaji.rb', line 9

def self.romaji2kana(text, options = {})
  text = hira2kata(self.normalize(text))
  pos = 0
  k = nil
  kana = ''
  chars =  text.split(//u)
  while true
    # ン
    if chars[pos] == 'm' && ['p', 'b', 'm'].include?(chars[pos + 1])
      kana += ''
      pos += 1
      next
    end

    # ッ
    if chars[pos] == chars[pos + 1] && !['a', 'i', 'u', 'e', 'o', 'n'].include?(chars[pos]) && chars[pos] =~ /[a-z]/
      kana += ''
      pos += 1
      next
    end

    ROMAJI_MAX_LENGTH.downto(1) do |t|
      substr = chars.slice(pos, t).join
      k = ROMAJI2KANA[substr]
      if k
        kana += k
        pos += t
        break
      end
    end
    unless k
      kana += chars.slice(pos, 1).join
      pos += 1
    end
    break if pos >= chars.size
  end

  kana_type = options[:kana_type] || :katakana
  kana = kata2hira(kana) if :hiragana == kana_type.to_sym
    
  return kana
end