Module: Interscript

Extended by:: Fs, Opal

Defined in:: lib/interscript.rb,
lib/interscript/fs.rb,
lib/interscript/opal.rb,
lib/interscript/command.rb,
lib/interscript/mapping.rb,
lib/interscript/version.rb,
lib/interscript/opal_map_translate.rb

Overview

Transliteration

Defined Under Namespace

Modules: Fs, Opal, OpalMapTranslate Classes: Command, ExternalProcessNotRecognizedError, ExternalProcessUnavailableError, InvalidSystemError, Mapping

Constant Summary collapse

VERSION =

"0.1.5"

Constants included from Opal

Opal::ALPHA_REGEXP

Constants included from Fs

Fs::ALPHA_REGEXP

Class Method Summary collapse

.transliterate(system_code, string, maps = {}) ⇒ Object

Methods included from Opal

external_processing, mkregexp, sub_replace

Methods included from Fs

external_process, external_processing, import_python_modules, root_path, sub_replace, transliterate_file

Class Method Details

.transliterate(system_code, string, maps = {}) ⇒ `Object`

# File 'lib/interscript.rb', line 23

def transliterate(system_code, string, maps={})
  unless maps.has_key? system_code
    maps[system_code] = Interscript::Mapping.for(system_code)
  end
  # mapping = Interscript::Mapping.for(system_code)
  mapping = maps[system_code]

  # First, apply chained transliteration as specified in the list `chain`
  chain = mapping.chain.dup
  while chain.length > 0
    string = transliterate(chain.shift, string, maps)
  end

  # Then, apply the rest of the map
  separator = mapping.character_separator || ""
  word_separator = mapping.word_separator || ""
  title_case = mapping.title_case
  downcase = mapping.downcase

  # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
  # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
  charmap = mapping.characters_hash
  dictmap = mapping.dictionary_hash
  trie = mapping.dictionary_trie

  string = external_processing(mapping, string)

  pos = 0
  while pos < string.to_s.size
    m = 0
    wordmatch = ""

    # Using Trie, find the longest matching substring
    while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
      wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
      m += 1
    end

    m = wordmatch.length
    if m > 0
      repl = dictmap[string[pos..pos+m-1]]
      string = sub_replace(string, pos, m, repl)
      pos += repl.length
    else
      pos += 1
    end
  end

  output = string.clone
  offsets = Array.new string.to_s.size, 1

  # mapping.rules.each do |r|
  #   string.to_s.scan(/#{r['pattern']}/) do |matches|
  #     match = Regexp.last_match
  #     pos = match.offset(0).first
  #     result = r['result'].clone
  #     matches.each.with_index { |v, i| result.sub!(/\\#{i + 1}/, v) } if matches.is_a? Array
  #     result.upcase! if up_case_around?(string, pos)
  #     output[offsets[0...pos].sum, match[0].size] = result
  #     offsets[pos] += result.size - match[0].size
  #   end
  # end

  mapping.rules.each do |r|
    next unless output
    re = mkregexp(r["pattern"])
    output = output.gsub(re, r["result"])
  end

  charmap.each do |k, v|
    while (match = output&.match(/#{k}/))
      pos = match.offset(0).first
      result = !downcase && up_case_around?(output, pos) ? v.upcase : v

      # if more than one, choose the first one
      result = result[0] if result.is_a?(Array)

      output = sub_replace(
        output,
        pos,
        match[0].size,
        add_separator(separator, pos, result)
      )
    end
  end

  mapping.postrules.each do |r|
    next unless output
    re = mkregexp(r["pattern"])
    output = output.gsub(re, r["result"])
  end

  return unless output

  output = output.sub(/^(.)/, &:upcase) if title_case
  if word_separator != ''
    output = output.gsub(/#{word_separator}#{separator}/u, word_separator)

    if title_case
      output = output.gsub(/#{word_separator}(.)/u, &:upcase)
    end
  end

  output.unicode_normalize
end

Module: Interscript

Overview

Defined Under Namespace

Constant Summary collapse

Constants included from Opal

Constants included from Fs

Class Method Summary collapse

Methods included from Opal

Methods included from Fs

Class Method Details

.transliterate(system_code, string, maps = {}) ⇒ Object

.transliterate(system_code, string, maps = {}) ⇒ `Object`