Module: Unicode::DisplayWidth::IndexBuilder

Defined in:
lib/unicode/display_width/index_builder.rb

Constant Summary collapse

EAST_ASIAN_WIDTH_DATA_URL =
"http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt".freeze
EAST_ASIAN_WIDTH_DATA_FILENAME =
(DATA_DIRECTORY + 'EastAsianWidth.txt').freeze
IGNORE_CATEGORIES =
%w[Cs Co Cn].freeze
ZERO_WIDTH_CATEGORIES =
%w[Mn Me Cf].freeze
ZERO_WIDTH_CODEPOINTS =
[*0x1160..0x11FF].freeze
SPECIAL_WIDTHS =
{
  0x0    =>  0, # \0 NULL
  0x5    =>  0, #    ENQUIRY
  0x7    =>  0, # \a BELL
  0x8    => -1, # \b BACKSPACE
  0xA    =>  0, # \n LINE FEED
  0xB    =>  0, # \v LINE TABULATION
  0xC    =>  0, # \f FORM FEED
  0xD    =>  0, # \r CARRIAGE RETURN
  0xE    =>  0, #    SHIFT OUT
  0xF    =>  0, #    SHIFT IN
  0x00AD =>  1, #    SOFT HYPHEN
  0x2E3A =>  2, #    TWO-EM DASH
  0x2E3B =>  3, #    THREE-EM DASH
}.freeze

Class Method Summary collapse

Class Method Details

.build!Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/unicode/display_width/index_builder.rb', line 34

def self.build!
  data = File.open(EAST_ASIAN_WIDTH_DATA_FILENAME)
  data.rewind
  Dir.mkdir(DATA_DIRECTORY) unless Dir.exists?(DATA_DIRECTORY)
  index = {}

  data.each_line{ |line|
    line =~ /^(\S+?);(\S+)\s+#\s(\S+).*$/
    if $1 && $2
      cps, width, category = $1, $2, $3
      next if IGNORE_CATEGORIES.include?(category)
      if cps['..']
        codepoints = Range.new(*cps.split('..').map{ |cp| cp.to_i(16) })
      else
        codepoints = [cps.to_i(16)]
      end

      codepoints.each{ |cp|
        index[cp] = is_zero_width?(category, cp) ? 0 : width.to_sym
      }
    end
  }

  index.merge! SPECIAL_WIDTHS
  File.open(INDEX_FILENAME, 'wb') { |f| Marshal.dump(index, f) }
end

.fetch!Object



27
28
29
30
31
32
# File 'lib/unicode/display_width/index_builder.rb', line 27

def self.fetch!
  require 'open-uri'
  open(EAST_ASIAN_WIDTH_DATA_URL) { |f|
    File.write(EAST_ASIAN_WIDTH_DATA_FILENAME, f.read)
  }
end

.is_zero_width?(category, cp) ⇒ Boolean

Returns:

  • (Boolean)


61
62
63
64
65
# File 'lib/unicode/display_width/index_builder.rb', line 61

def self.is_zero_width?(category, cp)
  ( ZERO_WIDTH_CATEGORIES.include?(category) &&
      [cp].pack('U') !~ /\p{Cf}(?<=\p{Arabic})/ ) ||
    ZERO_WIDTH_CODEPOINTS.include?(cp)
end