Class: Unicoder::Builder::DisplayWidth

Inherits:
Object
  • Object
show all
Includes:
Unicoder::Builder, MultiDimensionalArrayBuilder
Defined in:
lib/unicoder/builders/display_width.rb

Constant Summary collapse

IGNORE_CATEGORIES =
%w[Cs Co Cn].freeze
ZERO_WIDTH_CATEGORIES =
%w[Mn Me Cf].freeze
ZERO_WIDTH_CODEPOINTS =
[*0x1160..0x11FF].freeze
SPECIAL_WIDTHS =
{
  0x0    =>  0, # \0 NULL
  0x5    =>  0, #    ENQUIRY
  0x7    =>  0, # \a BELL
  0x8    => -1, # \b BACKSPACE
  0xA    =>  0, # \n LINE FEED
  0xB    =>  0, # \v LINE TABULATION
  0xC    =>  0, # \f FORM FEED
  0xD    =>  0, # \r CARRIAGE RETURN
  0xE    =>  0, #    SHIFT OUT
  0xF    =>  0, #    SHIFT IN
  0x00AD =>  1, #    SOFT HYPHEN
  0x2E3A =>  2, #    TWO-EM DASH
  0x2E3B =>  3, #    THREE-EM DASH
}.freeze

Instance Attribute Summary

Attributes included from Unicoder::Builder

#index

Instance Method Summary collapse

Methods included from MultiDimensionalArrayBuilder

#assign_codepoint, #compress!

Methods included from Unicoder::Builder

#assign_codepoint, build, #export, #initialize, #parse_file

Instance Method Details

#determine_width(codepoint, category, east_asian_width) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/unicoder/builders/display_width.rb', line 56

def determine_width(codepoint, category, east_asian_width)
  if  ( ZERO_WIDTH_CATEGORIES.include?(category) &&
        [codepoint].pack('U') !~ /\p{Cf}(?<=\p{Arabic})/ ) ||
      ZERO_WIDTH_CODEPOINTS.include?(codepoint)
    0
  elsif east_asian_width == "F" || east_asian_width == "W"
    2
  elsif east_asian_width == "A"
    :A
  else
    nil
  end
end

#initialize_indexObject



26
27
28
# File 'lib/unicoder/builders/display_width.rb', line 26

def initialize_index
  @index = []
end

#parse!Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/unicoder/builders/display_width.rb', line 30

def parse!
  parse_file :east_asian_width, :line, regex: /^(?<codepoints>\S+?);(?<width>\S+)\s+#\s(?<category>\S+).*$/ do |line|
    next if IGNORE_CATEGORIES.include?(line["category"])

    if line["codepoints"]['..']
      codepoints = Range.new(*line["codepoints"].split('..').map{ |codepoint|
        codepoint.to_i(16)
      })
    else
      codepoints = [line["codepoints"].to_i(16)]
    end

    codepoints.each{ |codepoint|
      assign_codepoint codepoint, determine_width(codepoint, line["category"], line["width"])
    }
  end

  SPECIAL_WIDTHS.each{ |codepoint, value|
    assign_codepoint codepoint, value
  }

  4.times{ compress! }

  p @index
end