Module: UnicodeScript

Defined in:
lib/unicode-script.rb,
lib/unicode_script/core.rb,
lib/unicode_script/charts.rb,
lib/unicode_script/version.rb

Defined Under Namespace

Modules: Version

Constant Summary collapse

CHARTS =
{'armenian' => (0x0530..0x058f),
    'coptic' => (0x2c80..0x2cff),
    'greek and coptic' => (0x0370..0x03ff),
    'cypriot syllabary' => (0x10800..0x1083f),
    'cyrilic' => (0x0400..0x04ff),
    'cyrilic supplement' => (0x0500..0x052f),
    'cyrillic extended-a' => (0x2de0..0x2dff),
    'cyrillic extended-b' => (0xa640..0xa69f),
    'georgian' => (0x10a0..0x10ff),
    'georgian supplement' => (0x2d00..0x2d2f),
    'hiragana' => (0x3040..0x309f),
    'glagolitic' => (0x2c00..0x2c5f),
    'gothic' => (0x10330..0x1034f),
    'greek extended' => (0x1f00..0x1fff),
    'basic latin' => (0x0000..0x007f),
    'c1 controls and latin-1 supplement' => (0x0080..0x00ff),
    'latin extended-a' => (0x0100..0x017f),
    'latin extended-b' => (0x0180..0x024f),
    'latin extended-c' => (0x2c60..0x2c7f),
    'latin extended-d' => (0xa720..0xa7ff),
    'latin extended additional' => (0x1e00..0x1eff),
    'fullwidth ascii' => (0x0020..0x007e),
    'halfwidth cjk punctuation' => (0x3000..0x303f),
    'halfwidth hangul' => (0x3130..0x318f),
    'linear b syllabary' => (0x10000..0x1007f),
    'linear b ideograms' => (0x10080..0x100ff),
    'ogham' => (0x1680..0x169f),
    'old italic' => (0x10300..0x1032f),
    'phaistos disc' => (0x101d0..0x101ff),
    'runic' => (0x16a0..0x16ff),
    'shavian' => (0x10450..0x1047f),
    'ipa extensions' => (0x0250..0x02af),
    'phonetic extensions' => (0x1d00..0x1d7f),
    'phonetic extensions supplement' => (0x1d80..0x1dbf),
    'modifier tone letters' => (0xa700..0xa71f),
    'spacing modifier letters' => (0x02b0..0x02ff),
    'superscripts and subscripts' => (0x2070..0x209f),
    'combining diacritical marks' => (0x0300..0x036f),
    'combining diacritical marks supplement' => (0x1dc0..0x1dff),
    'combining half marks' => (0xfe20..0xfe2f),
    'bamum' => (0xa6a0..0xa6ff),
    'bamum supplement' => (0x16800..0x16a3f),
    'egyptian hieroglyphs' => (0x13000..0x1342f),
    'ethiopic' => (0x1200..0x137f),
    'ethiopic supplement' => (0x1380..0x139f),
    'ethiopic extended' => (0x2d80..0x2ddf),
    'ethiopic extended-a' => (0xab00..0xab2f),
    'meroitic cursive' => (0x109a0..0x109ff),
    'meroitic hieroglyphs' => (0x10980..0x1099f),
    'nko' => (0x07c0..0x07ff),
    'osmanya' => (0x10480..0x104af),
    'tifinagh' => (0x2d30..0x2d7f),
    'vai' => (0xa500..0xa63f),
    'arabic' => (0x0600..0x06ff),
    'arabic supplement' => (0x0750..0x077f),
    'arabic extended-a' => (0x08a0..0x08ff),
    'arabic presentation forms-a' => (0xfb50..0xfdff),
    'arabic presentation forms-b' => (0xfe70..0xfeff),
    'imperial aramaic' => (0x10840..0x1085f),
    'avestan' => (0x10b00..0x10b3f),
    'carian' => (0x102a0..0x102df),
    'cuneiform' => (0x12000..0x123ff),
    'cuneiform numbers and punctuation' => (0x12400..0x1247f),
    'old persian' => (0x103a0..0x103df),
    'ugaritic' => (0x10380..0x1039f),
    'hebrew' => (0x0590..0x05ff),
    'lycian' => (0x10280..0x1029f),
    'lydian' => (0x10920..0x1093f),
    'mandaic' => (0x0840..0x085f),
    'old south arabian' => (0x10a60..0x10a7f),
    'inscriptional pahlavi' => (0x10b60..0x10b7f),
    'inscriptional parthian' => (0x10b40..0x10b5f),
    'phoenician' => (0x10900..0x1091f),
    'samaritan' => (0x0800..0x083f),
    'syriac' => (0x0700..0x074f),
    'mongolian' => (0x1800..0x18af),
    'old turkic' => (0x10c00..0x10c4f),
    'phags-pa' => (0xa840..0xa87f),
    'tibetan' => (0x0f00..0x0fff),
    'bengali' => (0x0980..0x09ff),
    'brahmi' => (0x11000..0x1107f),
    'chakma' => (0x11100..0x1114f),
    'devanagari' => (0x0900..0x097f),
    'devanagari extended' => (0xa8e0..0xa8ff),
    'gujarati' => (0x0a80..0x0aff),
    'gurmukhi' => (0x0a00..0x0a7f),
    'kaithi' => (0x11080..0x110cf),
    'kannada' => (0x0c80..0x0cff),
    'kharoshthi' => (0x10a00..0x10a5f),
    'lepcha' => (0x1c00..0x1c4f),
    'limbu' => (0x1900..0x194f),
    'malayalam' => (0x0d00..0x0d7f),
    'meetei mayek' => (0xabc0..0xabff),
    'meetei mayek extensions' => (0xaae0..0xaaff),
    'ol chiki' => (0x1c50..0x1c7f),
    'oriya' => (0x0b00..0x0b7f),
    'saurashtra' => (0xa880..0xa8df),
    'sharada' => (0x11180..0x111df),
    'sinhala' => (0x0d80..0x0dff),
    'sora sompeng' => (0x110d0..0x110ff),
    'syloti nagri' => (0xa800..0xa82f),
    'takri' => (0x11680..0x116cf),
    'tamil' => (0x0b80..0x0bff),
    'telugu' => (0x0c00..0x0c7f),
    'thaana' => (0x0780..0x07bf),
    'vedic extensions' => (0x1cd0..0x1cff),
    'balinese' => (0x1b00..0x1b7f),
    'batak' => (0x1bc0..0x1bff),
    'buginese' => (0x1a00..0x1a1f),
    'cham' => (0xaa00..0xaa5f),
    'javanese' => (0xa980..0xa9df),
    'kayah li' => (0xa900..0xa92f),
    'khmer' => (0x1780..0x17ff),
    'khmer symbols' => (0x19e0..0x19ff),
    'lao' => (0x0e80..0x0eff),
    'myanmar' => (0x1000..0x109f),
    'myanmar extended-a' => (0xaa60..0xaa7f),
    'new tai lue' => (0x1980..0x19df),
    'rejang' => (0xa930..0xa95f),
    'sundanese' => (0x1b80..0x1bbf),
    'sundanese supplement' => (0x1cc0..0x1ccf),
    'tai le' => (0x1950..0x197f),
    'tai tham' => (0x1a20..0x1aaf),
    'tai viet' => (0xaa80..0xaadf),
    'thai' => (0x0e00..0x0e7f),
    'buhid' => (0x1740..0x175f),
    'hanunoo' => (0x1720..0x173f),
    'tagalog' => (0x1700..0x171f),
    'tagbanwa' => (0x1760..0x177f),
    'bopomofo' => (0x3100..0x312f),
    'bopomofo extended' => (0x31a0..0x31bf),
    'cjk unified ideographs' => (0x4e00..0x9fcc),
    'cjk unified ideographs extension a' => (0x3400..0x4db5),
    'cjk unified ideographs extension b' => (0x20000..0x2a6d6),
    'cjk unified ideographs extension c' => (0x2a700..0x2b734),
    'cjk unified ideographs extension d' => (0x2b740..0x2b81d),
    'cjk compatibility ideographs' => (0xf900..0xfaff),
    'cjk compatibility ideographs supplement' => (0x2f800..0x2fa1f),
    'kangxi radicals' => (0x2f00..0x2fdf),
    'cjk radicals supplement' => (0x2e80..0x2eff),
    'cjk strokes' => (0x31c0..0x31ef),
    'hangul jamo' => (0x1100..0x11ff),
    'hangul jamo extended-a' => (0xa960..0xa97f),
    'hangul jamo extended-b' => (0xd7b0..0xd7ff),
    'hangul compatibility jamo' => (0x3130..0x318f),
    'hiragana' => (0x3040..0x309f),
    'katakana' => (0x30a0..0x30ff),
    'katakana phonetic extensions' => (0x31f0..0x31ff),
    'kana supplement' => (0x1b000..0x1b0ff),
    'kanbun' => (0x3190..0x319f),
    'lisu' => (0xa4d0..0xa4ff),
    'miao' => (0x16f00..0x16f9f),
    'yi syllables' => (0xa000..0xa48f),
    'yi radicals' => (0xa490..0xa4cf),
    'cherokee' => (0x13a0..0x13ff),
    'deseret' => (0x10400..0x1044f),
    'unified canadian aboriginal syllabics' => (0x1400..0x167f),
    'unified canadian aboriginal syllabics extended' => (0x18b0..0x18ff)
}

Class Method Summary collapse

Class Method Details

.detect(string) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/unicode_script/core.rb', line 3

def self.detect(string)
  res = []
  string.tr!(' ','')
  string.codepoints.each do |c|
    script = find_script(c)
    index = res.find_index{|v| v[:script] == script}
    if script
      if index
        res[index][:value].push(c.chr)
      else
        res.push({script: script, value: [].push(c.chr)})
      end
    end

  end
  res.each do |r|
    r[:value] = r[:value].join('')
  end
  res
end

.method_missing(method, val) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/unicode_script/core.rb', line 24

def self.method_missing(method, val)
  script_name = method.to_s.gsub('_', ' ').chop
  puts script_name
  if charted? script_name
    val.codepoints.each do |point|
      return false if !(CHARTS[script_name].include?(point))
    end
    return true
  else
    super
  end
end

.respond_to_missing?(method, include_private = false) ⇒ Boolean

Returns:

  • (Boolean)


37
38
39
# File 'lib/unicode_script/core.rb', line 37

def self.respond_to_missing?(method, include_private = false)
  charted?(method.to_s.gsub('_', ' ').chop) || super
end