Class: Unicoder::Builder::DisplayWidth
- Inherits:
- 
      Object
      
        - Object
- Unicoder::Builder::DisplayWidth
 
- Includes:
- Unicoder::Builder, MultiDimensionalArrayBuilder
- Defined in:
- lib/unicoder/builders/display_width.rb
Constant Summary collapse
- ZERO_WIDTH_CATEGORIES =
- %w[Mn Me Zl Zp Cf].freeze 
- ZERO_WIDTH_HANGUL =
- [ *0x1160..0x11FF, # HANGUL JUNGSEONG *0xD7B0..0xD7FF, # HANGUL JUNGSEONG ].freeze 
- WIDE_RANGES =
- [ *0x3400..0x4DBF, *0x4E00..0x9FFF, *0xF900..0xFAFF, *0x20000..0x2FFFD, *0x30000..0x3FFFD, ].freeze 
- SPECIAL_WIDTHS =
- { 0x0 => 0, # \0 NULL 0x5 => 0, # ENQUIRY 0x7 => 0, # \a BELL 0x8 => -1, # \b BACKSPACE 0xA => 0, # \n LINE FEED 0xB => 0, # \v LINE TABULATION 0xC => 0, # \f FORM FEED 0xD => 0, # \r CARRIAGE RETURN 0xE => 0, # SHIFT OUT 0xF => 0, # SHIFT IN # 0x85 => 0, # NEXT LINE 0xAD => nil, # SOFT HYPHEN, nil = 1 (default) 0x2E3A => 2, # TWO-EM DASH 0x2E3B => 3, # THREE-EM DASH }.freeze 
Instance Attribute Summary
Attributes included from Unicoder::Builder
Instance Method Summary collapse
- #determine_width(codepoint, category, east_asian_width, ambiguous) ⇒ Object
- #initialize_index ⇒ Object
- #parse! ⇒ Object
Methods included from MultiDimensionalArrayBuilder
#assign_codepoint, #compress!, #remove_trailing_nils!
Methods included from Unicoder::Builder
#assign, #assign_codepoint, build, #export, #initialize, #meta, #parse_file
Instance Method Details
#determine_width(codepoint, category, east_asian_width, ambiguous) ⇒ Object
| 103 104 105 106 107 108 109 110 111 112 113 114 | # File 'lib/unicoder/builders/display_width.rb', line 103 def determine_width(codepoint, category, east_asian_width, ambiguous) if ( ZERO_WIDTH_CATEGORIES.include?(category) && [codepoint].pack('U') !~ /\p{Cf}(?<=\p{Arabic})/ ) 0 elsif east_asian_width == "F" || east_asian_width == "W" 2 elsif east_asian_width == "A" ambiguous == 1 ? nil : ambiguous else nil end end | 
#initialize_index ⇒ Object
| 39 40 41 42 43 44 45 | # File 'lib/unicoder/builders/display_width.rb', line 39 def initialize_index @index = { WIDTH_ONE: [], WIDTH_TWO: [], } @ignorable = [] end | 
#parse! ⇒ Object
| 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | # File 'lib/unicoder/builders/display_width.rb', line 47 def parse! # Find Ignorables parse_file :core_properties, :line, begin: /^# Derived Property: Default_Ignorable_Code_Point$/, end: /^# ================================================$/, regex: /^(?<codepoints>\S+)\s+; Default_Ignorable_Code_Point.*$/ do |line| if line["codepoints"]['..'] single_or_multiple_codepoints = Range.new(*line["codepoints"].split('..').map{ |codepoint| codepoint.to_i(16) }) else single_or_multiple_codepoints = line["codepoints"].to_i(16) end @ignorable += [*single_or_multiple_codepoints] end # Assign based on East Asian Width parse_file :east_asian_width, :line, regex: /^(?<codepoints>\S+?)\s*;\s*(?<width>\S+)\s+#\s(?<category>\S+).*$/ do |line| if line["codepoints"]['..'] codepoints = Range.new(*line["codepoints"].split('..').map{ |codepoint| codepoint.to_i(16) }) else codepoints = [line["codepoints"].to_i(16)] end codepoints.each{ |codepoint| assign :WIDTH_ONE, codepoint, determine_width(codepoint, line["category"], line["width"], 1) assign :WIDTH_TWO, codepoint, determine_width(codepoint, line["category"], line["width"], 2) } end # Assign Ranges ## Zero-width (ZERO_WIDTH_HANGUL | @ignorable).each{ |codepoint| assign :WIDTH_ONE, codepoint, 0 assign :WIDTH_TWO, codepoint, 0 } ## Full-width WIDE_RANGES.each{ |codepoint| assign :WIDTH_ONE, codepoint, 2 assign :WIDTH_TWO, codepoint, 2 } ## Table SPECIAL_WIDTHS.each{ |codepoint, value| assign :WIDTH_ONE, codepoint, value assign :WIDTH_TWO, codepoint, value } # Compres Index 4.times{ compress! @index[:WIDTH_ONE] } 4.times{ compress! @index[:WIDTH_TWO] } remove_trailing_nils! @index[:WIDTH_ONE] remove_trailing_nils! @index[:WIDTH_TWO] end |