Module: ITRANS

Defined in:
lib/wiki_lyrics/utils/itrans.rb

Constant Summary collapse

@@itrans_dir =
File.dirname( File.expand_path(__FILE__) ) + "/../itrans"
@@null_dev =
"/dev/null"
@@devanagari2itrans =
{
	ITRANS.unicode( 0x0901 ) => "",
	# vowels:
	ITRANS.unicode( 0x0905 ) => "a",
	ITRANS.unicode( 0x0906 ) => "aa", # /A
	ITRANS.unicode( 0x093E ) => "aa", # /A
	ITRANS.unicode( 0x0907 ) => "i",
	ITRANS.unicode( 0x093F ) => "i",
	ITRANS.unicode( 0x0908 ) => "ii", # /I
	ITRANS.unicode( 0x0940 ) => "ii", # /I
	ITRANS.unicode( 0x0909 ) => "u",
	ITRANS.unicode( 0x0941 ) => "u",
	ITRANS.unicode( 0x090A ) => "uu", # /U
	ITRANS.unicode( 0x0942 ) => "uu", # /U
	ITRANS.unicode( 0x090B ) => "RRi", # R^i
	ITRANS.unicode( 0x0943 ) => "RRi", # R^i
	ITRANS.unicode( 0x090C ) => "LLi", # L^i
	ITRANS.unicode( 0x0944 ) => "LLi", # L^i
	ITRANS.unicode( 0x090F ) => "e",
	ITRANS.unicode( 0x0947 ) => "e",
	ITRANS.unicode( 0x0910 ) => "ai",
	ITRANS.unicode( 0x0948 ) => "ai",
	ITRANS.unicode( 0x0913 ) => "o",
	ITRANS.unicode( 0x094B ) => "o",
	ITRANS.unicode( 0x0914 ) => "au",
	ITRANS.unicode( 0x094C ) => "au",
	# itrans irregular
	"क्ष"=> "kSh", # x / kS
	"त्र"=> "tr",
	"ज्ञ"=> "j~n", # GY / dny
	"श्र"=> "shr",
}
@@devanagari2itrans_consonants =
{
	# gutturals:
	ITRANS.unicode( 0x0915 ) => "k",
	ITRANS.unicode( 0x0916 ) => "kh",
#		ITRANS.unicode( 0x0916 ) => ".Nkh",
	ITRANS.unicode( 0x0917 ) => "g",
	ITRANS.unicode( 0x0918 ) => "gh",
	ITRANS.unicode( 0x0918 ) => "~N",
	# palatals:
	ITRANS.unicode( 0x091A ) => "ch",
	ITRANS.unicode( 0x091B ) => "Ch",
	ITRANS.unicode( 0x091C ) => "j",
	ITRANS.unicode( 0x091D ) => "jh",
	ITRANS.unicode( 0x091E ) => "~n", # JN
	# retroflexes:
	ITRANS.unicode( 0x091F ) => "T",
	ITRANS.unicode( 0x0920 ) => "Th",
	ITRANS.unicode( 0x0921 ) => "D",
	ITRANS.unicode( 0x0922 ) => "Dh",
#		ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?)
	ITRANS.unicode( 0x0923 ) => "N",
	# dentals:
	ITRANS.unicode( 0x0924 ) => "t",
	ITRANS.unicode( 0x0925 ) => "th",
	ITRANS.unicode( 0x0926 ) => "d",
	ITRANS.unicode( 0x0927 ) => "dh",
	ITRANS.unicode( 0x0928 ) => "n",
	# labials:
	ITRANS.unicode( 0x092A ) => "p",
	ITRANS.unicode( 0x092B ) => "ph",
	ITRANS.unicode( 0x092C ) => "b",
	ITRANS.unicode( 0x092D ) => "bh",
	ITRANS.unicode( 0x092E ) => "m",
	# semi-vowels:
	ITRANS.unicode( 0x092F ) => "y",
	ITRANS.unicode( 0x0930 ) => "r",
	ITRANS.unicode( 0x0932 ) => "l",
	ITRANS.unicode( 0x0935 ) => "v", # w
	# sibilants:
	ITRANS.unicode( 0x0936 ) => "sh",
	ITRANS.unicode( 0x0937 ) => "Sh", # shh
	ITRANS.unicode( 0x0938 ) => "s",
	# miscellaneous:
	ITRANS.unicode( 0x0939 ) => "h",
	ITRANS.unicode( 0x0902 ) => ".n", # M / .m
	ITRANS.unicode( 0x0903 ) => "H", # .h
	ITRANS.unicode( 0x0950 ) => "OM", # AUM
	# other consonants:
	"क़" => "q",
	ITRANS.unicode( 0x0958 ) => "q",
	"ख़" => "Kh",
	"ग़" => "G",
	"ज़" => "z",
	ITRANS.unicode( 0x095B ) => "z",
	"फ़" => "f",
	"ड़" => ".D", # R
	ITRANS.unicode( 0x095C ) => ".D", # R (valid?)
	"ढ़" => ".Dh", # Rh
}

Class Method Summary collapse

Class Method Details

.from_devanagari(text) ⇒ Object



58
59
60
# File 'lib/wiki_lyrics/utils/itrans.rb', line 58

def ITRANS.from_devanagari( text )
	return ITRANS.from_devanagari!( String.new( text ) )
end

.from_devanagari!(text) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/wiki_lyrics/utils/itrans.rb', line 44

def ITRANS.from_devanagari!( text )
	@@devanagari2itrans.each() do |devana, itrans|
		text.gsub!( devana, itrans )
	end
	@@devanagari2itrans_consonants.each() do |devana, itrans|
		# is the only symbol in the 'word' --> add an 'a' at the end:
		text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" )
		# is not followed by a vocal --> add an 'a' at the end:
		text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" )
		text.gsub!( devana, itrans )
	end
	return text
end

.normalize(text) ⇒ Object



26
27
28
# File 'lib/wiki_lyrics/utils/itrans.rb', line 26

def ITRANS.normalize( text )
	return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) )
end

.to_devanagari(text) ⇒ Object



34
35
36
37
38
39
40
41
42
# File 'lib/wiki_lyrics/utils/itrans.rb', line 34

def ITRANS.to_devanagari( text )
	orig_pwd = Dir.pwd()
	Dir.chdir( @@itrans_dir )
	trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}`
	Dir.chdir( orig_pwd )
	trans.gsub!( /%[^\n]*/, "" ) # TODO search line
	trans.strip!()
	return trans
end

.to_devanagari!(text) ⇒ Object



30
31
32
# File 'lib/wiki_lyrics/utils/itrans.rb', line 30

def ITRANS.to_devanagari!( text )
	text.replace( to_devanagari( text ) )
end

.unicode(codepoint) ⇒ Object



62
63
64
# File 'lib/wiki_lyrics/utils/itrans.rb', line 62

def ITRANS.unicode( codepoint )
	[codepoint].pack( "U*" )
end