Class: Wordmap

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/wordmap.rb,
lib/wordmap/access.rb,
lib/wordmap/builder.rb,
lib/wordmap/version.rb,
lib/wordmap/file_access.rb,
lib/wordmap/index_value.rb

Defined Under Namespace

Modules: Access, Builder, FileAccess, IndexValue

Constant Summary collapse

SPACER =
"\0".freeze
LTRIM_REGEX =
/\A#{SPACER}+/.freeze
VERSION =
'0.3.0'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Wordmap

Returns a new instance of Wordmap.



50
51
52
53
54
# File 'lib/wordmap.rb', line 50

def initialize(path)
  @descriptors = Access.load_descriptors(Dir["#{path}/{vec*,data}"], SPACER)
  @indexes = load_indexes(Dir["#{path}/i-*"])
  @size = @descriptors['data'][:meta][:cell_count]
end

Instance Attribute Details

#sizeObject (readonly)

Returns the value of attribute size.



10
11
12
# File 'lib/wordmap.rb', line 10

def size
  @size
end

Class Method Details

.create(path, hash, index_names = []) ⇒ Object

Raises:

  • (ArgumentError)


16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/wordmap.rb', line 16

def create(path, hash, index_names = [])
  raise ArgumentError, "Path already exists: #{path}" if Dir.exist?(path)

  index_data = index_names.map { |name| [name, {}] }.to_h
  vecs = Builder.build_vectors(hash)
  cells_c = vecs.map(&:size).reduce(:*)

  Dir.mktmpdir do |dirpath|
    vecs.each.with_index do |vec, i|
      Builder.write_vector("#{dirpath}/vec#{i}", vec, SPACER)
    end

    Builder.write_data(dirpath, vecs, cells_c, hash, SPACER) do |k, v, i|
      index_names.each do |name|
        index_keys = Array(yield(name, k, v)).compact
        next if index_keys.empty?
        index_keys.each do |index_key|
          index_data[name][index_key] ||= []
          index_data[name][index_key] << i
        end
      end
    end

    index_data.each do |name, data|
      next if data.empty?
      data.transform_values! { |v| IndexValue.pack(v) }
      create("#{dirpath}/i-#{name}.wmap", data)
    end

    FileUtils.cp_r(dirpath, path)
  end
end

Instance Method Details

#[](*key, trace: nil) ⇒ Object



83
84
85
# File 'lib/wordmap.rb', line 83

def [](*key, trace: nil)
  Access.each_by_key(@descriptors, key, LTRIM_REGEX, trace).to_a
end

#each(vec_or_index = nil, trace: nil) ⇒ Object



87
88
89
90
# File 'lib/wordmap.rb', line 87

def each(vec_or_index = nil, trace: nil)
  enum = Access.each(@descriptors, @indexes, vec_or_index, LTRIM_REGEX, trace)
  block_given? ? enum.each { |v| yield(v) } : enum
end

#query(*query, trace: nil) ⇒ Object

Query consists of one or more clauses. Each clause is an array.

Clauses can have 2 shapes:

1. ['key1', 'key2', ...] # match any of these main keys
2. [:index_name, 'key1', 'key2', ...] # match by any of these index keys
  • OR logic is used inside a clause (matches are unioned)

  • AND logic is used between clauses (matches are intersected)

Example 1:

query(['horse1', 'horse2', 'horse3'], [:trait, 'fluffy'])

Out of horse1, horse2, horse3 return only the fluffy ones.

Example 2:

query([:color, 'orange', 'green'], [:type, 'vegetable', 'fruit'])

Return all orange and green fruits and vegetables.



77
78
79
80
81
# File 'lib/wordmap.rb', line 77

def query(*query, trace: nil)
  enum =
    Access.each_by_query(@descriptors, @indexes, query, LTRIM_REGEX, trace)
  block_given? ? enum.each { |v| yield(v) } : enum
end