Module: PgHashFunc::Hasher

Defined in:
lib/pg_hash_func/hasher.rb

Overview

Internal implementation of PostgreSQL hashing logic.

Constant Summary collapse

HASH_PARTITION_SEED =

Constants derived from PostgreSQL source/behavior

0x7A5B22367996DCFD
PARTITION_MAGIC_CONSTANT =
0x4992394d24f64163
UINT32_MASK =
0xFFFFFFFF
UINT64_MASK =
0xFFFFFFFFFFFFFFFF

Class Method Summary collapse

Class Method Details

.calculate_partition_index_bigint(value:, seed:, magic_constant:, num_partitions:) ⇒ Object

Calculates the target partition index for a given bigint value.

Raises:

  • (ArgumentError)


118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/pg_hash_func/hasher.rb', line 118

def self.calculate_partition_index_bigint(value:, seed:, magic_constant:, num_partitions:)
  raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?

  hash_val = hashint8extended(value: value, seed: seed)

  # First, interpret the 64-bit hash as signed, matching PostgreSQL's
  # behavior where the C function's uint64 return value is received in SQL
  # as a signed int8.
  signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val

  # Now add the magic constant in signed 64-bit arithmetic (two's-
  # complement wrap-around). We keep the wrap-around by masking back to
  # 64-bits as PostgreSQL does with uint64 arithmetic before the cast.
  unsigned_sum = (signed_hash + magic_constant) & UINT64_MASK

  # Cast that wrapped result back to signed 64-bit for the final modulo.
  signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum

  # Follow the expression that postgres uses internally:
  rem = signed_sum.remainder(num_partitions)
  idx = (rem + num_partitions) % num_partitions
  idx.to_i
end

.calculate_partition_index_int4(value:, seed:, num_partitions:) ⇒ Object

Calculates the target partition index for a given int4 value.

Raises:

  • (ArgumentError)


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/pg_hash_func/hasher.rb', line 143

def self.calculate_partition_index_int4(value:, seed:, num_partitions:)
  raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?

  hash_val = hashint4extended(value: value, seed: seed)

  signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val

  # PostgreSQL does *not* add the partition magic constant for int2/int4
  # hash partitioning (see get_hash_partition_greatest_modulus_int4 in the
  # backend). Only bigint types add the magic. Therefore we skip it here.
  unsigned_sum = signed_hash & UINT64_MASK

  signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum

  rem = signed_sum.remainder(num_partitions)
  idx = (rem + num_partitions) % num_partitions
  idx.to_i
end

.final(state) ⇒ Object

Corresponds to final(a, b, c) macro in hashfn.c



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/pg_hash_func/hasher.rb', line 48

def self.final(state)
  a, b, c = state
  c ^= b
  c = (c - rot(b, 14)) & UINT32_MASK
  a ^= c
  a = (a - rot(c, 11)) & UINT32_MASK
  b ^= a
  b = (b - rot(a, 25)) & UINT32_MASK
  c ^= b
  c = (c - rot(b, 16)) & UINT32_MASK
  a ^= c
  a = (a - rot(c, 4)) & UINT32_MASK
  b ^= a
  b = (b - rot(a, 14)) & UINT32_MASK
  c ^= b
  c = (c - rot(b, 24)) & UINT32_MASK
  [a, b, c]
end

.hash_uint32_extended(key_value, seed) ⇒ Object

Corresponds to hash_bytes_uint32_extended(uint32 k, uint64 seed) This implementation is based on analysis of specific PostgreSQL code paths related to partitioning, and may differ slightly from a general lookup3 implementation.



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/pg_hash_func/hasher.rb', line 70

def self.hash_uint32_extended(key_value, seed)
  key_value &= UINT32_MASK
  seed &= UINT64_MASK

  initval = 0x9e3779b9 + 4 + 3_923_095
  a = b = c = initval & UINT32_MASK

  # Perturb state with seed parts and mix if seed is non-zero
  if seed != 0
    a = (a + (seed >> 32)) & UINT32_MASK
    b = (b + (seed & UINT32_MASK)) & UINT32_MASK
    a, b, c = mix([a, b, c])
  end

  a = (a + key_value) & UINT32_MASK
  _, b, c = final([a, b, c])

  (((b.to_i << 32) | c.to_i) & UINT64_MASK)
end

.hashint4extended(value:, seed:) ⇒ Object

Corresponds to hashint4extended(int32 val, uint64 seed) logic



112
113
114
115
# File 'lib/pg_hash_func/hasher.rb', line 112

def self.hashint4extended(value:, seed:)
  val32 = value.to_i & UINT32_MASK
  hash_uint32_extended(val32, seed & UINT64_MASK)
end

.hashint8extended(value:, seed:) ⇒ Object

Corresponds to hashint8extended(int64 val, uint64 seed) logic



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/pg_hash_func/hasher.rb', line 91

def self.hashint8extended(value:, seed:)
  val = value.to_i
  seed &= UINT64_MASK

  val_masked64 = val & UINT64_MASK
  lohalf = (val_masked64 & UINT32_MASK)
  hihalf = ((val_masked64 >> 32) & UINT32_MASK)

  val_int64 = val_masked64 > 0x7FFFFFFFFFFFFFFF ? (val_masked64 - (1 << 64)) : val_masked64
  is_positive_or_zero_int64 = (val_int64 >= 0)

  lohalf ^= if is_positive_or_zero_int64
              hihalf
            else
              (~hihalf & UINT32_MASK)
            end

  hash_uint32_extended(lohalf, seed)
end

.mix(state) ⇒ Object

Corresponds to mix(a, b, c) macro in hashfn.c



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/pg_hash_func/hasher.rb', line 24

def self.mix(state)
  a, b, c = state
  a = (a - c) & UINT32_MASK
  a ^= rot(c, 4)
  c = (c + b) & UINT32_MASK
  b = (b - a) & UINT32_MASK
  b ^= rot(a, 6)
  a = (a + c) & UINT32_MASK
  c = (c - b) & UINT32_MASK
  c ^= rot(b, 8)
  b = (b + a) & UINT32_MASK
  a = (a - c) & UINT32_MASK
  a ^= rot(c, 16)
  c = (c + b) & UINT32_MASK
  b = (b - a) & UINT32_MASK
  b ^= rot(a, 19)
  a = (a + c) & UINT32_MASK
  c = (c - b) & UINT32_MASK
  c ^= rot(b, 4)
  b = (b + a) & UINT32_MASK
  [a, b, c]
end

.rot(value, rotation_bits) ⇒ Object

Corresponds to rot(x, k) -> pg_rotate_left32(x, k)



18
19
20
21
# File 'lib/pg_hash_func/hasher.rb', line 18

def self.rot(value, rotation_bits)
  value &= UINT32_MASK
  (((value << rotation_bits) | (value >> (32 - rotation_bits))) & UINT32_MASK)
end