Module: Dphil::Transliterate
- Defined in:
- lib/dphil/transliterate.rb
Overview
Transliteration module for basic romanization formats.
Class Method Summary collapse
- .default_script ⇒ Object
- .default_script=(scr) ⇒ Object
- .detect(str) ⇒ Object
- .iast_kh(str) ⇒ Object
- .iast_slp1(str) ⇒ Object
- .kh_iast(str) ⇒ Object
- .normalize_iast(word) ⇒ Object
- .normalize_slp1(st) ⇒ Object
- .script_supported?(script) ⇒ Boolean
- .slp1_iast(str) ⇒ Object
- .to_ascii(str) ⇒ Object
- .transliterate(str, first, second = nil) ⇒ Object (also: t)
- .unicode_downcase(st, ignore_control = false) ⇒ Object
- .unicode_downcase!(str, ignore_control = false) ⇒ Object
Class Method Details
.default_script ⇒ Object
14 15 16 |
# File 'lib/dphil/transliterate.rb', line 14 def default_script @default_script end |
.default_script=(scr) ⇒ Object
18 19 20 21 22 23 24 25 |
# File 'lib/dphil/transliterate.rb', line 18 def default_script=(scr) scr = scr.to_sym if script_supported?(scr) @default_script = scr else warn "Script unsupported [:#{scr}]" end end |
.detect(str) ⇒ Object
62 63 64 |
# File 'lib/dphil/transliterate.rb', line 62 def detect(str) Sanscript::Detect.detect_scheme(str) end |
.iast_kh(str) ⇒ Object
46 47 48 |
# File 'lib/dphil/transliterate.rb', line 46 def iast_kh(str) transliterate(str, :iast, :kh) end |
.iast_slp1(str) ⇒ Object
54 55 56 |
# File 'lib/dphil/transliterate.rb', line 54 def iast_slp1(str) transliterate(str, :iast, :slp1) end |
.kh_iast(str) ⇒ Object
50 51 52 |
# File 'lib/dphil/transliterate.rb', line 50 def kh_iast(str) transliterate(str, :kh, :iast) end |
.normalize_iast(word) ⇒ Object
84 85 86 87 |
# File 'lib/dphil/transliterate.rb', line 84 def normalize_iast(word) out = iast_slp1(word) normalize_slp1(out) end |
.normalize_slp1(st) ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/dphil/transliterate.rb', line 66 def normalize_slp1(st) out = st.dup out.gsub!(Constants::TRANS_CTRL_WORD) do |match| control_content = match[Constants::TRANS_CTRL_WORD_CONTENT, 1] next match if control_content&.match(Constants::TRANS_CTRL_WORD_PROCESSED) "{###{Digest::SHA1.hexdigest(control_content).rjust(40, '0')}##}" end process_string!(out) do |token| token.tr!("b", "v") token.gsub!(/['‘]\b/, "") # Avagraha token.gsub!(/\B[NYRnm]/, "M") # Medial and final nasals token.gsub!(/\B[Hrs]\b/, "") # Final visarga/r/s token.gsub!(%r{[\.\-\_\\\/]}, "") # Punctuation token end end |
.script_supported?(script) ⇒ Boolean
34 35 36 |
# File 'lib/dphil/transliterate.rb', line 34 def script_supported?(script) Sanscript::Transliterate.scheme_names.include?(script) end |
.slp1_iast(str) ⇒ Object
58 59 60 |
# File 'lib/dphil/transliterate.rb', line 58 def slp1_iast(str) transliterate(str, :slp1, :iast) end |
.to_ascii(str) ⇒ Object
38 39 40 41 42 43 44 |
# File 'lib/dphil/transliterate.rb', line 38 def to_ascii(str) process_string(str) do |out| out.unicode_normalize!(:nfd) out.gsub!(/[^\u0000-\u007F]+/, "") out end end |
.transliterate(str, first, second = nil) ⇒ Object Also known as: t
27 28 29 30 31 32 |
# File 'lib/dphil/transliterate.rb', line 27 def transliterate(str, first, second = nil) Sanscript.transliterate(str, first, second, default_script: default_script) rescue RuntimeError => e Dphil.logger.error "Transliteration Error: #{e}" return str end |
.unicode_downcase(st, ignore_control = false) ⇒ Object
94 95 96 |
# File 'lib/dphil/transliterate.rb', line 94 def unicode_downcase(st, ignore_control = false) unicode_downcase!(st.dup, ignore_control) end |
.unicode_downcase!(str, ignore_control = false) ⇒ Object
89 90 91 92 |
# File 'lib/dphil/transliterate.rb', line 89 def unicode_downcase!(str, ignore_control = false) return UNICODE_DOWNCASE_PROC.call(str) if ignore_control process_string!(str, &UNICODE_DOWNCASE_PROC) end |