Module: UnicodeScript

Defined in:
lib/unicode-script.rb,
lib/unicode_script/core.rb,
lib/unicode_script/charts.rb,
lib/unicode_script/version.rb

Defined Under Namespace

Modules: Version

Constant Summary collapse

CHARTS =
[{:name => 'Armenian', :range => (0x0530..0x058F)},
    {:name => 'Coptic', :range => (0x2C80..0x2CFF)},
    {:name => 'Greek and Coptic', :range => (0x0370..0x03FF)},
    {:name => 'Cypriot Syllabary', :range => (0x10800..0x1083F)},
    {:name => 'Cyrilic', :range => (0x0400..0x04FF)},
    {:name => 'Cyrilic Supplement', :range => (0x0500..0x052F)},
    {:name => 'Cyrillic Extended-A', :range => (0x2DE0..0x2DFF)},
    {:name => 'Cyrillic Extended-B', :range => (0xA640..0xA69F)},
    {:name => 'Georgian', :range => (0x10A0..0x10FF)},
    {:name => 'Georgian Supplement', :range => (0x2D00..0x2D2F)},
    {:name => 'Hiragana', :range => (0x3040..0x309F)},
    {:name => 'Glagolitic', :range => (0x2C00..0x2C5F)},
    {:name => 'Gothic', :range => (0x10330..0x1034F)},
    {:name => 'Greek Extended', :range => (0x1F00..0x1FFF)},
    {:name => 'Basic Latin', :range => (0x0000..0x007F)},
    {:name => 'C1 Controls and Latin-1 Supplement', :range => (0x0080..0x00FF)},
    {:name => 'Latin Extended-A', :range => (0x0100..0x017F)},
    {:name => 'Latin Extended-B', :range => (0x0180..0x024F)},
    {:name => 'Latin Extended-C', :range => (0x2C60..0x2C7F)},
    {:name => 'Latin Extended-D', :range => (0xA720..0xA7FF)},
    {:name => 'Latin Extended Additional', :range => (0x1E00..0x1EFF)},
    {:name => 'Fullwidth ASCII', :range => (0x0020..0x007E)},
    {:name => 'Halfwidth CJK punctuation', :range => (0x3000..0x303F)},
    {:name => 'Halfwidth Hangul', :range => (0x3130..0x318F)},
    {:name => 'Linear B Syllabary', :range => (0x10000..0x1007F)},
    {:name => 'Linear B Ideograms', :range => (0x10080..0x100FF)},
    {:name => 'Ogham', :range => (0x1680..0x169F)},
    {:name => 'Old Italic', :range => (0x10300..0x1032F)},
    {:name => 'Phaistos Disc', :range => (0x101D0..0x101FF)},
    {:name => 'Runic', :range => (0x16A0..0x16FF)},
    {:name => 'Shavian', :range => (0x10450..0x1047F)},
    {:name => 'IPA Extensions', :range => (0x0250..0x02AF)},
    {:name => 'Phonetic Extensions', :range => (0x1D00..0x1D7F)},
    {:name => 'Phonetic Extensions Supplement', :range => (0x1D80..0x1DBF)},
    {:name => 'Modifier Tone Letters', :range => (0xA700..0xA71F)},
    {:name => 'Spacing Modifier Letters', :range => (0x02B0..0x02FF)},
    {:name => 'Superscripts and Subscripts', :range => (0x2070..0x209F)},
    {:name => 'Combining Diacritical Marks', :range => (0x0300..0x036F)},
    {:name => 'Combining Diacritical Marks Supplement', :range => (0x1DC0..0x1DFF)},
    {:name => 'Combining Half Marks', :range => (0xFE20..0xFE2F)},
    {:name => 'Bamum', :range => (0xA6A0..0xA6FF)},
    {:name => 'Bamum Supplement', :range => (0x16800..0x16A3F)},
    {:name => 'Egyptian Hieroglyphs', :range => (0x13000..0x1342F)},
    {:name => 'Ethiopic', :range => (0x1200..0x137F)},
    {:name => 'Ethiopic Supplement', :range => (0x1380..0x139F)},
    {:name => 'Ethiopic Extended', :range => (0x2D80..0x2DDF)},
    {:name => 'Ethiopic Extended-A', :range => (0xAB00..0xAB2F)},
    {:name => 'Meroitic Cursive', :range => (0x109A0..0x109FF)},
    {:name => 'Meroitic Hieroglyphs', :range => (0x10980..0x1099F)},
    {:name => 'NKo', :range => (0x07C0..0x07FF)},
    {:name => 'Osmanya', :range => (0x10480..0x104AF)},
    {:name => 'Tifinagh', :range => (0x2D30..0x2D7F)},
    {:name => 'Vai', :range => (0xA500..0xA63F)},
    {:name => 'Arabic', :range => (0x0600..0x06FF)},
    {:name => 'Arabic Supplement', :range => (0x0750..0x077F)},
    {:name => 'Arabic Extended-A', :range => (0x08A0..0x08FF)},
    {:name => 'Arabic Presentation Forms-A', :range => (0xFB50..0xFDFF)},
    {:name => 'Arabic Presentation Forms-B', :range => (0xFE70..0xFEFF)},
    {:name => 'Imperial Aramaic', :range => (0x10840..0x1085F)},
    {:name => 'Avestan', :range => (0x10B00..0x10B3F)},
    {:name => 'Carian', :range => (0x102A0..0x102DF)},
    {:name => 'Cuneiform', :range => (0x12000..0x123FF)},
    {:name => 'Cuneiform Numbers and Punctuation', :range => (0x12400..0x1247F)},
    {:name => 'Old Persian', :range => (0x103A0..0x103DF)},
    {:name => 'Ugaritic', :range => (0x10380..0x1039F)},
    {:name => 'Hebrew', :range => (0x0590..0x05FF)},
    {:name => 'Lycian', :range => (0x10280..0x1029F)},
    {:name => 'Lydian', :range => (0x10920..0x1093F)},
    {:name => 'Mandaic', :range => (0x0840..0x085F)},
    {:name => 'Old South Arabian', :range => (0x10A60..0x10A7F)},
    {:name => 'Inscriptional Pahlavi', :range => (0x10B60..0x10B7F)},
    {:name => 'Inscriptional Parthian', :range => (0x10B40..0x10B5F)},
    {:name => 'Phoenician', :range => (0x10900..0x1091F)},
    {:name => 'Samaritan', :range => (0x0800..0x083F)},
    {:name => 'Syriac', :range => (0x0700..0x074F)},
    {:name => 'Mongolian', :range => (0x1800..0x18AF)},
    {:name => 'Old Turkic', :range => (0x10C00..0x10C4F)},
    {:name => 'Phags-pa', :range => (0xA840..0xA87F)},
    {:name => 'Tibetan', :range => (0x0F00..0x0FFF)},
    {:name => 'Bengali', :range => (0x0980..0x09FF)},
    {:name => 'Brahmi', :range => (0x11000..0x1107F)},
    {:name => 'Chakma', :range => (0x11100..0x1114F)},
    {:name => 'Devanagari', :range => (0x0900..0x097F)},
    {:name => 'Devanagari Extended', :range => (0xA8E0..0xA8FF)},
    {:name => 'Gujarati', :range => (0x0A80..0x0AFF)},
    {:name => 'Gurmukhi', :range => (0x0A00..0x0A7F)},
    {:name => 'Kaithi', :range => (0x11080..0x110CF)},
    {:name => 'Kannada', :range => (0x0C80..0x0CFF)},
    {:name => 'Kharoshthi', :range => (0x10A00..0x10A5F)},
    {:name => 'Lepcha', :range => (0x1C00..0x1C4F)},
    {:name => 'Limbu', :range => (0x1900..0x194F)},
    {:name => 'Malayalam', :range => (0x0D00..0x0D7F)},
    {:name => 'Meetei Mayek', :range => (0xABC0..0xABFF)},
    {:name => 'Meetei Mayek Extensions', :range => (0xAAE0..0xAAFF)},
    {:name => 'Ol Chiki', :range => (0x1C50..0x1C7F)},
    {:name => 'Oriya', :range => (0x0B00..0x0B7F)},
    {:name => 'Saurashtra', :range => (0xA880..0xA8DF)},
    {:name => 'Sharada', :range => (0x11180..0x111DF)},
    {:name => 'Sinhala', :range => (0x0D80..0x0DFF)},
    {:name => 'Sora Sompeng', :range => (0x110D0..0x110FF)},
    {:name => 'Syloti Nagri', :range => (0xA800..0xA82F)},
    {:name => 'Takri', :range => (0x11680..0x116CF)},
    {:name => 'Tamil', :range => (0x0B80..0x0BFF)},
    {:name => 'Telugu', :range => (0x0C00..0x0C7F)},
    {:name => 'Thaana', :range => (0x0780..0x07BF)},
    {:name => 'Vedic Extensions', :range => (0x1CD0..0x1CFF)},
    {:name => 'Balinese', :range => (0x1B00..0x1B7F)},
    {:name => 'Batak', :range => (0x1BC0..0x1BFF)},
    {:name => 'Buginese', :range => (0x1A00..0x1A1F)},
    {:name => 'Cham', :range => (0xAA00..0xAA5F)},
    {:name => 'Javanese', :range => (0xA980..0xA9DF)},
    {:name => 'Kayah Li', :range => (0xA900..0xA92F)},
    {:name => 'Khmer', :range => (0x1780..0x17FF)},
    {:name => 'Khmer Symbols', :range => (0x19E0..0x19FF)},
    {:name => 'Lao', :range => (0x0E80..0x0EFF)},
    {:name => 'Myanmar', :range => (0x1000..0x109F)},
    {:name => 'Myanmar Extended-A', :range => (0xAA60..0xAA7F)},
    {:name => 'New Tai Lue', :range => (0x1980..0x19DF)},
    {:name => 'Rejang', :range => (0xA930..0xA95F)},
    {:name => 'Sundanese', :range => (0x1B80..0x1BBF)},
    {:name => 'Sundanese Supplement', :range => (0x1CC0..0x1CCF)},
    {:name => 'Tai Le', :range => (0x1950..0x197F)},
    {:name => 'Tai Tham', :range => (0x1A20..0x1AAF)},
    {:name => 'Tai Viet', :range => (0xAA80..0xAADF)},
    {:name => 'Thai', :range => (0x0E00..0x0E7F)},
    {:name => 'Buhid', :range => (0x1740..0x175F)},
    {:name => 'Hanunoo', :range => (0x1720..0x173F)},
    {:name => 'Tagalog', :range => (0x1700..0x171F)},
    {:name => 'Tagbanwa', :range => (0x1760..0x177F)},
    {:name => 'Bopomofo', :range => (0x3100..0x312F)},
    {:name => 'Bopomofo Extended', :range => (0x31A0..0x31BF)},
    {:name => 'CJK Unified Ideographs', :range => (0x4E00..0x9FCC)},
    {:name => 'CJK Unified Ideographs Extension A', :range => (0x3400..0x4DB5)},
    {:name => 'CJK Unified Ideographs Extension B', :range => (0x20000..0x2A6D6)},
    {:name => 'CJK Unified Ideographs Extension C', :range => (0x2A700..0x2B734)},
    {:name => 'CJK Unified Ideographs Extension D', :range => (0x2B740..0x2B81D)},
    {:name => 'CJK Compatibility Ideographs', :range => (0xF900..0xFAFF)},
    {:name => 'CJK Compatibility Ideographs Supplement', :range => (0x2F800..0x2FA1F)},
    {:name => 'Kangxi Radicals', :range => (0x2F00..0x2FDF)},
    {:name => 'CJK Radicals Supplement', :range => (0x2E80..0x2EFF)},
    {:name => 'CJK Strokes', :range => (0x31C0..0x31EF)},
    {:name => 'Hangul Jamo', :range => (0x1100..0x11FF)},
    {:name => 'Hangul Jamo Extended-A', :range => (0xA960..0xA97F)},
    {:name => 'Hangul Jamo Extended-B', :range => (0xD7B0..0xD7FF)},
    {:name => 'Hangul Compatibility Jamo', :range => (0x3130..0x318F)},
    {:name => 'Hiragana', :range => (0x3040..0x309F)},
    {:name => 'Katakana', :range => (0x30A0..0x30FF)},
    {:name => 'Katakana Phonetic Extensions', :range => (0x31F0..0x31FF)},
    {:name => 'Kana Supplement', :range => (0x1B000..0x1B0FF)},
    {:name => 'Kanbun', :range => (0x3190..0x319F)},
    {:name => 'Lisu', :range => (0xA4D0..0xA4FF)},
    {:name => 'Miao', :range => (0x16F00..0x16F9F)},
    {:name => 'Yi Syllables', :range => (0xA000..0xA48F)},
    {:name => 'Yi Radicals', :range => (0xA490..0xA4CF)},
    {:name => 'Cherokee', :range => (0x13A0..0x13FF)},
    {:name => 'Deseret', :range => (0x10400..0x1044F)},
    {:name => 'Unified Canadian Aboriginal Syllabics', :range => (0x1400..0x167F)},
    {:name => 'Unified Canadian Aboriginal Syllabics Extended', :range => (0x18B0..0x18FF)}
]

Class Method Summary collapse

Class Method Details

.detect(string) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/unicode_script/core.rb', line 3

def self.detect string
  res = []
  string.tr!(' ','')
  string.codepoints.each do |c|
    script = find_script(c)
    index = res.find_index{|v| v[:script] == script}
    if script
      if index
        res[index][:value].push(c.chr)
      else
        res.push({:script => script, :value => [].push(c.chr)})
      end
    end

  end  
  res.each do |r| 
    r[:value] = r[:value].join('')
  end
  res
end

.method_missing(method, val) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/unicode_script/core.rb', line 25

def self.method_missing method, val
  CHARTS.each do |c|
    if c[:name].downcase == method.to_s.chop
      val.codepoints.each do |p|
        return false if !(c[:range].include?(p)) 
      end
      return true
    end
  end
  super
end