Module: Unicoder::Builder

Included in:
Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types
Defined in:
lib/unicoder/builder.rb,
lib/unicoder/builders/name.rb,
lib/unicoder/builders/emoji.rb,
lib/unicoder/builders/types.rb,
lib/unicoder/builders/blocks.rb,
lib/unicoder/builders/scripts.rb,
lib/unicoder/builders/categories.rb,
lib/unicoder/builders/confusable.rb,
lib/unicoder/builders/display_width.rb,
lib/unicoder/builders/numeric_value.rb,
lib/unicoder/builders/sequence_name.rb

Overview

A builder defines a parse function which translates one (ore more) unicode data files into an index hash

Defined Under Namespace

Classes: Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#formatsObject (readonly)

Returns the value of attribute formats.



8
9
10
# File 'lib/unicoder/builder.rb', line 8

def formats
  @formats
end

#indexObject (readonly)

Returns the value of attribute index.



8
9
10
# File 'lib/unicoder/builder.rb', line 8

def index
  @index
end

#optionObject

Returns the value of attribute option.



8
9
10
# File 'lib/unicoder/builder.rb', line 8

def option
  @option
end

Class Method Details

.build(identifier, **options) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/unicoder/builder.rb', line 113

def self.build(identifier, **options)
  format = options[:format] || :marshal
  require_relative "builders/#{identifier}"
  # require "unicoder/builders/#{identifier}"
  builder_class = self.const_get(identifier.to_s.gsub(/(?:^|_)([a-z])/){ $1.upcase })
  builder = builder_class.new(
    options[:unicode_version],
    options[:emoji_version],
    format
  )
  puts "Building index for #{identifier}…"
  if options[:option]
    builder.option = options[:option]
  end
  builder.parse!
  index_file = builder.export(**options)

  destination ||= options[:destination] || identifier.to_s
  destination += "#{builder.formats.dig(format.to_sym, :ext)}"
  destination += ".gz" if options[:gzip]
  bytes = File.write destination, index_file

  puts "Index created at: #{destination} (#{bytes} bytes written)"
end

Instance Method Details

#assign(sub_index_name, codepoint, value) ⇒ Object



55
56
57
# File 'lib/unicoder/builder.rb', line 55

def assign(sub_index_name, codepoint, value)
  assign_codepoint(codepoint, value, index[sub_index_name])
end

#assign_codepoint(codepoint, value, idx = @index) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/unicoder/builder.rb', line 47

def assign_codepoint(codepoint, value, idx = @index)
  if option =~ /charkeys/
    idx[[codepoint].pack("U*")] = value
  else
    idx[codepoint] = value
  end
end

#export(format: :marshal, **options) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/unicoder/builder.rb', line 87

def export(format: :marshal, **options)
  p index if options[:verbose]

  if options[:meta]
    idx = meta.merge(index)
  else
    idx = index
  end


  case format.to_sym
  when :marshal
    index_file = Marshal.dump(idx)
  when :json
    index_file = JSON.dump(idx)
  when :esm
    index_file = "export default " + JSON.dump(idx)
  end

  if options[:gzip]
    Gem::Util.gzip(index_file)
  else
    index_file
  end
end

#initialize(unicode_version = nil, emoji_version = nil, format = nil) ⇒ Object



36
37
38
39
40
41
# File 'lib/unicoder/builder.rb', line 36

def initialize(unicode_version = nil, emoji_version = nil, format = nil)
  @unicode_version = unicode_version || CURRENT_UNICODE_VERSION
  @emoji_version = emoji_version || CURRENT_EMOJI_VERSION
  @option = formats[format.to_sym] ? formats[format.to_sym][:option] || "" : ""
  initialize_index
end

#initialize_indexObject



43
44
45
# File 'lib/unicoder/builder.rb', line 43

def initialize_index
  @index = {}
end

#metaObject



27
28
29
30
31
32
33
34
# File 'lib/unicoder/builder.rb', line 27

def meta
  {
    META: {
      generator: "unicoder v#{Unicoder::VERSION}",
      unicodeVersion: @unicode_version,
    },
  }
end

#parse!Object

Raises:

  • (ArgumentError)


59
60
61
# File 'lib/unicoder/builder.rb', line 59

def parse!
  raise ArgumentError, "abstract"
end

#parse_file(identifier, parse_mode, **parse_options) ⇒ Object

Raises:

  • (ArgumentError)


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/unicoder/builder.rb', line 63

def parse_file(identifier, parse_mode, **parse_options)
  filename = UNICODE_FILES[identifier.to_sym] || filename
  raise ArgumentError, "No valid file identifier or filename given" if !filename
  filename = filename.dup
  filename.sub! 'UNICODE_VERSION', @unicode_version
  filename.sub! 'EMOJI_VERSION', @emoji_version
  filename.sub! 'EMOJI_RELATED_VERSION', EMOJI_RELATED_UNICODE_VERSIONS[@emoji_version]
  filename.sub! '.zip', ''
  filename.sub! /\A(https?|ftp):\//, ""
  Downloader.fetch(identifier) unless File.exist?(LOCAL_DATA_DIRECTORY + filename)
  file = File.read(LOCAL_DATA_DIRECTORY + filename)

  if parse_mode == :line
    file.each_line{ |line|
      yield Hash[ $~.names.zip( $~.captures ) ] if line =~ parse_options[:regex]
    }
  elsif parse_mode == :xml
    require "oga"
    yield Oga.parse_xml(file)
  else
    yield file
  end
end