Class: Unicoder::Builder::Categories

Inherits:
Object
  • Object
show all
Includes:
Unicoder::Builder, MultiDimensionalArrayBuilder
Defined in:
lib/unicoder/builders/categories.rb

Overview

Assigns categories to every codepoint using a multi dimensional Array index structure

Instance Attribute Summary

Attributes included from Unicoder::Builder

#formats, #index, #option

Instance Method Summary collapse

Methods included from MultiDimensionalArrayBuilder

#assign_codepoint, #compress!, #remove_trailing_nils!

Methods included from Unicoder::Builder

#assign, #assign_codepoint, build, #export, #initialize, #meta, #parse_file

Instance Method Details

#initialize_indexObject



8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/unicoder/builders/categories.rb', line 8

def initialize_index
  @index = {
    CATEGORIES: [],
    CATEGORY_NAMES: {},
    OFFSETS: [
      0x10000,
      0x1000,
      0x100,
      0x10
    ],
  }
  @range_start = nil
end

#parse!Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/unicoder/builders/categories.rb', line 22

def parse!
  parse_file :general_categories, :line, regex: /^(?<from>[^. ]+)(?:..(?<to>\S+))?\s*; (?<category>\S+).*$/ do |line|
    if line["to"]
      (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint|
        assign_codepoint(codepoint, line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
      }
    else
      assign_codepoint(line["from"].to_i(16), line["category"] == "Cn" ? nil : line["category"], @index[:CATEGORIES])
    end
  end

  4.times{ compress! @index[:CATEGORIES] }
  remove_trailing_nils! @index[:CATEGORIES]

  parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
    @index[:CATEGORY_NAMES][line["short"]] = line["long"]
  end

  @index
end