Module: Unicoder::Builder

Included in:
Blocks, Categories, Confusable, DisplayWidth, Scripts
Defined in:
lib/unicoder/builder.rb,
lib/unicoder/builders/blocks.rb,
lib/unicoder/builders/scripts.rb,
lib/unicoder/builders/categories.rb,
lib/unicoder/builders/confusable.rb,
lib/unicoder/builders/display_width.rb

Overview

A builder defines a parse function which translates one (ore more) unicode data files into an index hash

Defined Under Namespace

Classes: Blocks, Categories, Confusable, DisplayWidth, Scripts

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#indexObject (readonly)

Returns the value of attribute index.



7
8
9
# File 'lib/unicoder/builder.rb', line 7

def index
  @index
end

Class Method Details

.build(identifier, **options) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/unicoder/builder.rb', line 58

def self.build(identifier, **options)
  format = options[:format] || :marshal
  require_relative "builders/#{identifier}"
  # require "unicoder/builders/#{identifier}"
  builder_class = self.const_get(identifier.to_s.gsub(/(?:^|_)([a-z])/){ $1.upcase })
  builder = builder_class.new(options[:unicode_version] || CURRENT_UNICODE_VERSION)
  puts "Building index for #{identifier}…"
  builder.parse!
  index_file = builder.export(options)

  destination ||= options[:destination] || identifier.to_s
  destination += ".#{format}"
  destination += ".gz" if options[:gzip]
  bytes = File.write destination, index_file

  puts "Index created at: #{destination} (#{bytes} bytes written)"
end

Instance Method Details

#assign_codepoint(codepoint, value, index = @index) ⇒ Object



18
19
20
# File 'lib/unicoder/builder.rb', line 18

def assign_codepoint(codepoint, value, index = @index)
  index[codepoint] = value
end

#export(format: :marshal, **options) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/unicoder/builder.rb', line 40

def export(format: :marshal, **options)
  p index if options[:verbose]

  case format.to_sym
  when :marshal
    index_file = Marshal.dump(index)
  when :json
    index_file = JSON.dump(index)
  end

  # if false# || options[:gzip]
  if options[:gzip]
    Gem.gzip(index_file)
  else
    index_file
  end
end

#initialize(unicode_version = nil) ⇒ Object



9
10
11
12
# File 'lib/unicoder/builder.rb', line 9

def initialize(unicode_version = nil)
  @unicode_version = unicode_version
  initialize_index
end

#initialize_indexObject



14
15
16
# File 'lib/unicoder/builder.rb', line 14

def initialize_index
  @index = {}
end

#parse!Object

Raises:

  • (ArgumentError)


22
23
24
# File 'lib/unicoder/builder.rb', line 22

def parse!
  raise ArgumentError, "abstract"
end

#parse_file(identifier, parse_mode, **parse_options) ⇒ Object

Raises:

  • (ArgumentError)


26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/unicoder/builder.rb', line 26

def parse_file(identifier, parse_mode, **parse_options)
  filename = UNICODE_FILES[identifier.to_sym] || filename
  raise ArgumentError, "No valid file identifier or filename given" if !filename
  filename.sub! 'VERSION', @unicode_version
  Downloader.fetch(identifier) unless File.exists?(filename)
  file = File.read(LOCAL_DATA_DIRECTORY + filename)

  if parse_mode == :line
    file.each_line{ |line|
      yield Hash[ $~.names.zip( $~.captures ) ] if line =~ parse_options[:regex]
    }
  end
end