Class: Unicoder::Builder::Categories

Inherits:
Object
  • Object
show all
Includes:
Unicoder::Builder, MultiDimensionalArrayBuilder
Defined in:
lib/unicoder/builders/categories.rb

Overview

Assigns categories to every codepoint using a multi dimensional Array index structure

Instance Attribute Summary

Attributes included from Unicoder::Builder

#index

Instance Method Summary collapse

Methods included from MultiDimensionalArrayBuilder

#assign_codepoint, #compress!

Methods included from Unicoder::Builder

#assign_codepoint, build, #export, #initialize, #parse_file

Instance Method Details

#initialize_indexObject



8
9
10
11
12
13
14
# File 'lib/unicoder/builders/categories.rb', line 8

def initialize_index
  @index = {
    CATEGORIES: [],
    CATEGORY_NAMES: {},
  }
  @range_start = nil
end

#parse!Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/unicoder/builders/categories.rb', line 16

def parse!
  parse_file :unicode_data, :line, regex: /^(?<codepoint>.+?);(?<range><(?!control).+>)?.*?;(?<category>.+?);.*$/ do |line|
    if line["range"]
      if line["range"] =~ /First/
        @range_start = line["codepoint"].to_i(16)
      elsif line["range"] =~ /Last/ && @range_start
        (@range_start..line["codepoint"].to_i(16)).each{ |codepoint|
          assign_codepoint(codepoint, line["category"], @index[:CATEGORIES])
        }
      else
        raise ArgumentError, "inconsistent range found in data, don't know what to do"
      end
    else
      assign_codepoint(line["codepoint"].to_i(16), line["category"], @index[:CATEGORIES])
    end
  end

  4.times{ compress! @index[:CATEGORIES] }

  parse_file :property_value_aliases, :line, regex: /^gc ; (?<short>\S{2}?) *; (?<long>\S+).*$/ do |line|
    @index[:CATEGORY_NAMES][line["short"]] = line["long"]
  end

  @index
end