Class: IdPack::IdPacker

Inherits:
Object
  • Object
show all
Defined in:
lib/id_pack/id_packer.rb

Overview

This is a module to encode an integer array into our compressed format. Basically there are only 2 methods in this module, encode and decode.

Usage:

encode:
  a usual use case of encode is to provide the server with object ids
  that have already been fetched and hence we don't need their data to
  be returned

  Example:

    IdPack::IdPacker.new.encode([5, 6, 21, 23, 25]) # => "_F~C_P.V"

decode:
  mainly used by the server to convert the compressed string back into
  the integer array

  Example:

    IdPack::IdPacker.new.decode("_F~C_P.V") # => [5, 6, 21, 23, 25]

Defined Under Namespace

Classes: InvalidEncodedCharException

Constant Summary collapse

SPACES_PREFIX =
'_'.freeze
BINARY_PREFIX =
'.'.freeze
RANGE_PREFIX =
'~'.freeze
WINDOW_SIZE =
10
EXCLUDE_NIL =
true
ENCODED_NUMBER_CHARS =
"#{(('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a).join}-".freeze

Instance Method Summary collapse

Instance Method Details

#decode(encoded_caches) ⇒ Object

“_F~C_P.V”

> [5, 6, 21, 23, 25]



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/id_pack/id_packer.rb', line 136

def decode(encoded_caches)
  curr_encoded_string_prefix = nil

  ids = []
  start_id = 0
  encoded_number = ''

  encoded_caches.each_char do |c|
    if [SPACES_PREFIX, BINARY_PREFIX, RANGE_PREFIX].include?(c)
      unless curr_encoded_string_prefix == nil
        ids_to_include, end_id = convert_encoded_number_to_ids(
          curr_encoded_string_prefix, encoded_number, start_id
        )
        ids.concat(ids_to_include)
        start_id = end_id + (c == SPACES_PREFIX ? 0 : 1)
      end
      curr_encoded_string_prefix = c
      encoded_number = ''
    else
      encoded_number = encoded_number + c
    end

  end

  unless curr_encoded_string_prefix == nil
    ids_to_include, end_id = convert_encoded_number_to_ids(
      curr_encoded_string_prefix, encoded_number, start_id
    )
    ids.concat(ids_to_include)
    start_id = end_id + 1
  end

  ids
rescue InvalidEncodedCharException
  # corrupted encoded_caches, assume nothing cached
  []
end

#decode_sync_str(sync_str, base_timestamp = 0) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/id_pack/id_packer.rb', line 220

def decode_sync_str(sync_str, base_timestamp = 0)
  # format of sync_str:
  # min_last_synced_at,
  # "encoded_0", diff_last_requested_at_0,
  # "encoded_1", diff_last_requested_at_1,
  # "encoded_2", diff_last_requested_at_2, ...

  sync_str = sync_str.encode('UTF-8', 'UTF-8', invalid: :replace)

  encoded_min_last_synced_at, *encoded_ranges = sync_str.split(',')
  min_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_min_last_synced_at).to_i

  grouped_encoded_ranges = encoded_ranges.inject([]) do |grouped, encoded_range|
    grouped << [] if grouped.last.nil? || grouped.last.length >= 2
    grouped.last << encoded_range
    grouped
  end

  grouped_encoded_ranges.inject({}) do |synced_at_map, (encoded_caches, encoded_diff_last_synced_at)|
    primary_keys_str = LZString.decompress_from_encoded_uri_component(encoded_caches)
    primary_keys = primary_keys_str.split(",")

    if primary_keys.first.to_i.to_s == primary_keys.first
      primary_keys.map!(&:to_i)
    else
      primary_keys = primary_keys_str.scan(/.{32}/).map do |uuid_str|
        [uuid_str[0, 8], uuid_str[8, 4], uuid_str[12, 4], uuid_str[16, 4],
         uuid_str[20, 16]].join("-")
      end
    end

    diff_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_diff_last_synced_at).to_i
    last_synced_at = min_last_synced_at + diff_last_synced_at + base_timestamp

    primary_keys.each do |key|
      synced_at_map[key] = [synced_at_map[key], last_synced_at].compact.max
    end

    synced_at_map
  end
rescue StandardError
  # invalid sync_str, return empty map
  {}
end

#encode(array, window_size = WINDOW_SIZE, _exclude_nil = EXCLUDE_NIL, output_charset = ENCODED_NUMBER_CHARS) ⇒ Object

5, 6, 21, 23, 25

> “_F~C_P.V”



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/id_pack/id_packer.rb', line 38

def encode(array, window_size = WINDOW_SIZE, _exclude_nil = EXCLUDE_NIL, output_charset = ENCODED_NUMBER_CHARS)
  encoded_array = ''

  ranges = convert_numbers_to_ranges array.uniq.sort
  prev_end = 0
  curr_start = 1
  spaces = 0
  group_with_prev = false
  ranges_to_group = []
  binary_number = ''
  decimal_number = 0
  encoded_string = ''

  ranges.each_with_index do |range, _i|
    spaces = range.begin - prev_end

    if group_with_prev
      if range.end - curr_start + 1 == window_size
        ranges_to_group << range
        binary_number = convert_ranges_to_binary_number ranges_to_group
        decimal_number = convert_binary_number_to_decimal_number binary_number
        encoded_string = BINARY_PREFIX + encode_decimal_number(
          decimal_number, output_charset
        )
        encoded_array += encoded_string
        ranges_to_group = []
        group_with_prev = false
      elsif range.end - curr_start + 1 >= window_size
        if ranges_to_group.length == 1
          encoded_string = RANGE_PREFIX + encode_decimal_number(
            ranges_to_group.first.size, output_charset
          )
          encoded_array += encoded_string
        else
          binary_number = convert_ranges_to_binary_number ranges_to_group
          decimal_number = convert_binary_number_to_decimal_number binary_number
          encoded_string = BINARY_PREFIX + encode_decimal_number(
            decimal_number, output_charset
          )
          encoded_array += encoded_string
        end
        ranges_to_group = []
        encoded_string = SPACES_PREFIX + encode_decimal_number(spaces,
                                                               output_charset)
        encoded_array += encoded_string

        if range.size >= window_size
          encoded_string = RANGE_PREFIX + encode_decimal_number(range.size,
                                                                output_charset)
          encoded_array += encoded_string
          group_with_prev = false
        else
          ranges_to_group.push range
          curr_start = range.begin
          group_with_prev = true
        end
      else
        ranges_to_group.push range
      end
    else
      if spaces >= 0
        encoded_string = SPACES_PREFIX + encode_decimal_number(spaces,
                                                               output_charset)
        encoded_array += encoded_string
      end

      if range.size >= window_size
        encoded_string = RANGE_PREFIX + encode_decimal_number(range.size,
                                                              output_charset)
        encoded_array += encoded_string
      else
        ranges_to_group.push range
        curr_start = range.begin
        group_with_prev = true
      end
    end

    prev_end = range.end
  end

  if ranges_to_group.length == 1
    encoded_string = RANGE_PREFIX + encode_decimal_number(
      ranges_to_group.first.size, output_charset
    )
    encoded_array += encoded_string
  elsif ranges_to_group.length.positive?
    binary_number = convert_ranges_to_binary_number ranges_to_group
    decimal_number = convert_binary_number_to_decimal_number binary_number
    encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number,
                                                           output_charset)
    encoded_array += encoded_string
  end

  encoded_array
end

#encode_sync_str(id_synced_at) ⇒ Object

Input: id_synced_at:

1 => synced_at_1_timestamp,
2 => synced_at_2_timestamp,
10 => synced_at_10_timestamp, ...

Expected output of sync_str: min_last_synced_at,\ “encoded_0”,diff_last_synced_at_0,\ “encoded_1”,diff_last_synced_at_1,\ “encoded_2”,diff_last_synced_at_2, …



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/id_pack/id_packer.rb', line 186

def encode_sync_str(id_synced_at)
  min_synced_at = id_synced_at.values.min
  encoded_min_synced_at = LZString.compress_to_encoded_uri_component(min_synced_at.to_s)

  grouped_synced_at = id_synced_at.group_by do |_id, synced_at|
    synced_at
  end

  grouped_synced_at.inject([encoded_min_synced_at]) do |sync_str_arr, (synced_at, ids_group)|
    ids = ids_group.map do |id_group|
      int_id = id_group[0].to_s.to_i

      if int_id && int_id.to_s == id_group[0].to_s
        int_id
      else
        id_group[0].to_s
      end
    end

    joined_ids = if ids.first.is_a?(String)
                   ids.join("").gsub(/-/,
                                     "")
                 else
                   ids.join(",")
                 end

    encoded_indices = LZString.compress_to_encoded_uri_component(joined_ids)
    diff_synced_at = synced_at - min_synced_at
    encoded_diff_synced_at = LZString.compress_to_encoded_uri_component(diff_synced_at.to_s)

    sync_str_arr << "#{encoded_indices},#{encoded_diff_synced_at}"
  end.join(",")
end