Module: Dphil::Transliterate

Defined in:
lib/dphil/transliterate.rb

Overview

Transliteration module for basic romanization formats.

Class Method Summary collapse

Class Method Details

.default_scriptObject



14
15
16
# File 'lib/dphil/transliterate.rb', line 14

def default_script
  @default_script
end

.default_script=(scr) ⇒ Object



18
19
20
21
22
23
24
25
# File 'lib/dphil/transliterate.rb', line 18

def default_script=(scr)
  scr = scr.to_sym
  if script_supported?(scr)
    @default_script = scr
  else
    warn "Script unsupported [:#{scr}]"
  end
end

.detect(str) ⇒ Object



62
63
64
# File 'lib/dphil/transliterate.rb', line 62

def detect(str)
  Sanscript::Detect.detect_scheme(str)
end

.iast_kh(str) ⇒ Object



46
47
48
# File 'lib/dphil/transliterate.rb', line 46

def iast_kh(str)
  transliterate(str, :iast, :kh)
end

.iast_slp1(str) ⇒ Object



54
55
56
# File 'lib/dphil/transliterate.rb', line 54

def iast_slp1(str)
  transliterate(str, :iast, :slp1)
end

.kh_iast(str) ⇒ Object



50
51
52
# File 'lib/dphil/transliterate.rb', line 50

def kh_iast(str)
  transliterate(str, :kh, :iast)
end

.normalize_iast(word) ⇒ Object



84
85
86
87
# File 'lib/dphil/transliterate.rb', line 84

def normalize_iast(word)
  out = iast_slp1(word)
  normalize_slp1(out)
end

.normalize_slp1(st) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/dphil/transliterate.rb', line 66

def normalize_slp1(st)
  out = st.dup
  out.gsub!(Constants::TRANS_CTRL_WORD) do |match|
    control_content = match[Constants::TRANS_CTRL_WORD_CONTENT, 1]
    next match if control_content&.match(Constants::TRANS_CTRL_WORD_PROCESSED)
    "{###{Digest::SHA1.hexdigest(control_content).rjust(40, '0')}##}"
  end

  process_string!(out) do |token|
    token.tr!("b", "v")
    token.gsub!(/['‘]\b/, "") # Avagraha
    token.gsub!(/\B[NYRnm]/, "M") # Medial and final nasals
    token.gsub!(/\B[Hrs]\b/, "") # Final visarga/r/s
    token.gsub!(%r{[\.\-\_\\\/]}, "") # Punctuation
    token
  end
end

.script_supported?(script) ⇒ Boolean

Returns:

  • (Boolean)


34
35
36
# File 'lib/dphil/transliterate.rb', line 34

def script_supported?(script)
  Sanscript::Transliterate.scheme_names.include?(script)
end

.slp1_iast(str) ⇒ Object



58
59
60
# File 'lib/dphil/transliterate.rb', line 58

def slp1_iast(str)
  transliterate(str, :slp1, :iast)
end

.to_ascii(str) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/dphil/transliterate.rb', line 38

def to_ascii(str)
  process_string(str) do |out|
    out.unicode_normalize!(:nfd)
    out.gsub!(/[^\u0000-\u007F]+/, "")
    out
  end
end

.transliterate(str, first, second = nil) ⇒ Object Also known as: t



27
28
29
30
31
32
# File 'lib/dphil/transliterate.rb', line 27

def transliterate(str, first, second = nil)
  Sanscript.transliterate(str, first, second, default_script: default_script)
rescue RuntimeError => e
  Dphil.logger.error "Transliteration Error: #{e}"
  return str
end

.unicode_downcase(st, ignore_control = false) ⇒ Object



94
95
96
# File 'lib/dphil/transliterate.rb', line 94

def unicode_downcase(st, ignore_control = false)
  unicode_downcase!(st.dup, ignore_control)
end

.unicode_downcase!(str, ignore_control = false) ⇒ Object



89
90
91
92
# File 'lib/dphil/transliterate.rb', line 89

def unicode_downcase!(str, ignore_control = false)
  return UNICODE_DOWNCASE_PROC.call(str) if ignore_control
  process_string!(str, &UNICODE_DOWNCASE_PROC)
end