Module: Twins

Defined in:
lib/twins.rb,
lib/twins/version.rb,
lib/twins/utilities.rb

Defined Under Namespace

Modules: Utilities

Constant Summary collapse

VERSION =
"0.0.4"

Class Method Summary collapse

Class Method Details

.consolidate(collection, options = {}) ⇒ HashWithIndifferentAccess, Nil

Consolidates keys with mode or lowest distance

Parameters:

  • collection (Enumerable)

    A collection of Hash or Hash-like objects

  • options (Hash) (defaults to: {})

Returns:

  • (HashWithIndifferentAccess, Nil)


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/twins.rb', line 11

def consolidate(collection, options = {})
  return nil unless collection.any?
  ensure_collection_uniformity!(collection)

  if collection.first.is_a?(Hash)
    indiff_collection = collection
  else
    indiff_collection = collection.map { |element| element.to_hash }
  end

  options = options.with_indifferent_access
  consolidated = Hash.new

  indiff_collection.each do |element|
    element.each_pair do |key, value|
      # Recursively consolidate nested hashes
      if value.is_a?(Hash) && !consolidated[key]
        consolidated[key] = consolidate(indiff_collection.map { |el| el[key] })
      else
        # Filter elements without a given key to avoid unintentionally nil values
        values = indiff_collection.select { |el| el.has_key?(key) }.map { |el| el[key] }

        if options[:priority].try(:[], key)
          # Compute each element's distance from the given priority
          distances = values.map { |f| Twins::Utilities.distance(options[:priority][key], f) }

          # The best candidate is the first element with the shortest distance
          consolidated[key] = values[distances.index(distances.min)]
        else
          # The best candidate is the mode or the first one
          consolidated[key] = Twins::Utilities.mode(values)
        end
      end
    end
  end

  consolidated.with_indifferent_access
end

.pick(collection, options = {}) ⇒ Object, Nil

Find element with the highest count of modes or the lowest overall distances

Parameters:

  • collection (Enumerable)

    A collection of Hash or Hash-like objects

  • options (Hash) (defaults to: {})

Returns:

  • (Object, Nil)


56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/twins.rb', line 56

def pick(collection, options = {})
  return nil unless collection.any?
  ensure_collection_uniformity!(collection)

  options = options.with_indifferent_access

  if options[:priority]
    pick_by_priority(collection, options[:priority])
  else
    pick_by_mode(collection)
  end
end

.pick_by_mode(collection) ⇒ Object, Nil

Find the element with the highest count of modes

Parameters:

  • collection (Enumerable)

Returns:

  • (Object, Nil)


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/twins.rb', line 74

def pick_by_mode(collection)
  return nil unless collection.any?

  if collection.first.is_a?(Hash)
    indiff_collection = collection
  else
    indiff_collection = collection.map { |element| element.to_hash.with_indifferent_access }
  end

  collection.max_by do |element|
    if collection.first.is_a?(Hash)
      indiff_element = element
    else
      indiff_element = element.to_hash.with_indifferent_access
    end

    # Build a map of modes for each existing key
    modes = indiff_element.map do |key, value|
      # Filter elements without a given key to avoid unintentionally nil values
      values = indiff_collection.select { |el| el.has_key?(key) }.map { |el| el[key] }
      [key, Twins::Utilities.mode(values)]
    end
    modes = Hash[modes]

    # Count the number of modes present in element
    modes.select { |key, mode| indiff_element[key] == mode }.count
  end
end

.pick_by_priority(collection, priorities) ⇒ Object, Nil

Find the element with the lowest overall distances

Parameters:

  • collection (Enumerable)
  • options (Hash)

Returns:

  • (Object, Nil)

Raises:

  • (ArgumentError)


109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/twins.rb', line 109

def pick_by_priority(collection, priorities)
  return nil unless collection.any?
  raise ArgumentError unless priorities.is_a?(Hash)

  collection.min_by do |element|
    if collection.first.is_a?(Hash)
      indiff_element = element
    else
      indiff_element = element.to_hash.with_indifferent_access
    end

    priorities.map do |key, value|
      Twins::Utilities.distance(value, indiff_element[key])
    end.sum
  end
end