Class: Hyperll::HyperLogLog
- Inherits:
-
Object
- Object
- Hyperll::HyperLogLog
- Defined in:
- lib/hyperll/hyper_log_log.rb
Constant Summary collapse
- INT_SIZE =
32- INT_HASH =
0xFFFFFFFF
Instance Attribute Summary collapse
-
#log2m ⇒ Object
readonly
Returns the value of attribute log2m.
Class Method Summary collapse
Instance Method Summary collapse
- #cardinality ⇒ Object
-
#initialize(log2m, register_set = nil) ⇒ HyperLogLog
constructor
Constructs a new HyperLogLog instance.
- #merge(*others) ⇒ Object
- #offer(obj) ⇒ Object
- #offer_hashed(value) ⇒ Object
- #serialize ⇒ Object
Constructor Details
#initialize(log2m, register_set = nil) ⇒ HyperLogLog
Constructs a new HyperLogLog instance
log2m - accuracy of the counter; larger values are more accurate
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/hyperll/hyper_log_log.rb', line 19 def initialize(log2m, register_set = nil) @log2m = log2m @count = 2 ** log2m @register_set = register_set || RegisterSet.new(@count) case log2m when 4 @alphaMM = 0.673 * @count * @count when 5 @alphaMM = 0.697 * @count * @count when 6 @alphaMM = 0.709 * @count * @count else @alphaMM = (0.7213 / (1 + 1.079 / @count)) * @count * @count end end |
Instance Attribute Details
#log2m ⇒ Object (readonly)
Returns the value of attribute log2m.
9 10 11 |
# File 'lib/hyperll/hyper_log_log.rb', line 9 def log2m @log2m end |
Class Method Details
.unserialize(serialized) ⇒ Object
11 12 13 14 |
# File 'lib/hyperll/hyper_log_log.rb', line 11 def self.unserialize(serialized) log2m, rs_size, *rs_values = serialized.unpack("N*") new(log2m, RegisterSet.new(2 ** log2m, rs_values)) end |
Instance Method Details
#cardinality ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/hyperll/hyper_log_log.rb', line 46 def cardinality register_sum = 0.0 zeros = 0.0 @register_set.each do |value| register_sum += 1.0 / (1 << value) zeros += 1 if value == 0 end estimate = @alphaMM * (1 / register_sum) if estimate <= (5.0 / 2.0) * @count # small range estimate (@count * Math.log(@count / zeros)).round else estimate.round end end |
#merge(*others) ⇒ Object
63 64 65 66 67 68 69 70 71 |
# File 'lib/hyperll/hyper_log_log.rb', line 63 def merge(*others) raise "Cannot merge hyperloglogs of different sizes" unless others.all? { |o| o.log2m == log2m } others.each do |other| @register_set.merge(other.register_set) end self end |
#offer(obj) ⇒ Object
36 37 38 |
# File 'lib/hyperll/hyper_log_log.rb', line 36 def offer(obj) offer_hashed(MurmurHash.hash(obj)) end |
#offer_hashed(value) ⇒ Object
40 41 42 43 44 |
# File 'lib/hyperll/hyper_log_log.rb', line 40 def offer_hashed(value) j = value >> (INT_SIZE - @log2m) r = number_of_leading_zeros(((value << @log2m) & INT_HASH) | (1 << (@log2m - 1)) + 1) + 1 @register_set.update_if_greater(j, r) end |
#serialize ⇒ Object
73 74 75 |
# File 'lib/hyperll/hyper_log_log.rb', line 73 def serialize [@log2m, @register_set.size * 4].pack("N*") + @register_set.serialize end |