Class: TimezoneFinder::TimezoneFinder

Inherits:
Object
  • Object
show all
Defined in:
lib/timezone_finder/timezone_finder.rb

Overview

This class lets you quickly find the timezone of a point on earth. It keeps the binary file with the timezonefinder open in reading mode to enable fast consequent access. In the file currently used there are two shortcuts stored per degree of latitude and one per degree of longitude (tests evaluated this to be the fastest setup when being used with numba)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeTimezoneFinder

Returns a new instance of TimezoneFinder.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/timezone_finder/timezone_finder.rb', line 14

def initialize
  # open the file in binary reading mode
  @binary_file = open(File.join(File.dirname(__FILE__), 'timezone_data.bin'), 'rb')

  # for more info on what is stored how in the .bin please read the comments in file_converter
  # read the first 2byte int (= number of polygons stored in the .bin)
  @nr_of_entries = @binary_file.read(2).unpack('S<')[0]

  # set addresses
  # the address where the shortcut section starts (after all the polygons) this is 34 433 054
  @shortcuts_start = @binary_file.read(4).unpack('L<')[0]

  @amount_of_holes = @binary_file.read(2).unpack('S<')[0]

  @hole_area_start = @binary_file.read(4).unpack('L<')[0]

  @nr_val_start_address = 2 * @nr_of_entries + 12
  @adr_start_address = 4 * @nr_of_entries + 12
  @bound_start_address = 8 * @nr_of_entries + 12
  # @poly_start_address = 40 * @nr_of_entries + 12
  @first_shortcut_address = @shortcuts_start + 259_200

  @nr_val_hole_address = @hole_area_start + @amount_of_holes * 2
  @adr_hole_address = @hole_area_start + @amount_of_holes * 4
  # @hole_data_start = @hole_area_start + @amount_of_holes * 8

  # for store for which polygons (how many) holes exits and the id of the first of those holes
  @hole_registry = {}
  last_encountered_line_nr = 0
  first_hole_id = 0
  amount_of_holes = 0
  @binary_file.seek(@hole_area_start)
  (0...@amount_of_holes).each do |i|
    related_line = @binary_file.read(2).unpack('S<')[0]
    # puts(related_line)
    if related_line == last_encountered_line_nr
      amount_of_holes += 1
    else
      if i != 0
        @hole_registry.update(last_encountered_line_nr => [amount_of_holes, first_hole_id])
      end

      last_encountered_line_nr = related_line
      first_hole_id = i
      amount_of_holes = 1
    end
  end

  # write the entry for the last hole(s) in the registry
  @hole_registry.update(last_encountered_line_nr => [amount_of_holes, first_hole_id])

  ObjectSpace.define_finalizer(self, self.class.__del__(@binary_file))
end

Class Method Details

.__del__(file) ⇒ Object



68
69
70
71
72
# File 'lib/timezone_finder/timezone_finder.rb', line 68

def self.__del__(file)
  proc do
    file.close
  end
end

Instance Method Details

#_holes_of_line(line = 0) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/timezone_finder/timezone_finder.rb', line 137

def _holes_of_line(line = 0)
  amount_of_holes, hole_id = @hole_registry.fetch(line)

  (0...amount_of_holes).each do |_i|
    @binary_file.seek(@nr_val_hole_address + 2 * hole_id)
    nr_of_values = @binary_file.read(2).unpack('S<')[0]

    @binary_file.seek(@adr_hole_address + 4 * hole_id)
    @binary_file.seek(@binary_file.read(4).unpack('L<')[0])

    yield [Helpers.fromfile(@binary_file, false, 4, nr_of_values),
           Helpers.fromfile(@binary_file, false, 4, nr_of_values)]

    hole_id += 1
  end
rescue KeyError
end

#certain_timezone_at(lng: 0.0, lat: 0.0) ⇒ Object

this function looks up in which polygon the point certainly is included this is much slower than ‘timezone_at’! :param lng: longitude of the point in degree :param lat: latitude in degree :return: the timezone name of the polygon the point is included in or None



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
# File 'lib/timezone_finder/timezone_finder.rb', line 452

def certain_timezone_at(lng: 0.0, lat: 0.0)
  if lng > 180.0 or lng < -180.0 or lat > 90.0 or lat < -90.0
    raise "The coordinates are out ouf bounds: (#{lng}, #{lat})"
  end

  possible_polygons = shortcuts_of(lng, lat)

  # x = longitude  y = latitude  both converted to 8byte int
  x = Helpers.coord2int(lng)
  y = Helpers.coord2int(lat)

  possible_polygons.each do |polygon_nr|
    # get boundaries
    @binary_file.seek((@bound_start_address + 16 * polygon_nr))
    boundaries = Helpers.fromfile(@binary_file, false, 4, 4)
    unless x > boundaries[0] or x < boundaries[1] or y > boundaries[2] or y < boundaries[3]

      outside_all_holes = true
      # when the point is within a hole of the polygon this timezone doesn't need to be checked
      _holes_of_line(polygon_nr) do |hole_coordinates|
        if Helpers.inside_polygon(x, y, hole_coordinates)
          outside_all_holes = false
          break
        end
      end

      if outside_all_holes
        if Helpers.inside_polygon(x, y, coords_of(polygon_nr))
          return TIMEZONE_NAMES[id_of(polygon_nr)]
        end
      end
    end
  end
  nil
end

#closest_timezone_at(lng: nil, lat: nil, delta_degree: 1, exact_computation: false, return_distances: false, force_evaluation: false) ⇒ Object

This function searches for the closest polygon in the surrounding shortcuts. Make sure that the point does not lie within a polygon (for that case the algorithm is simply wrong!) Note that the algorithm won’t find the closest polygon when it’s on the ‘other end of earth’ (it can’t search beyond the 180 deg lng border yet) this checks all the polygons within [delta_degree] degree lng and lat Keep in mind that x degrees lat are not the same distance apart than x degree lng! This is also the reason why there could still be a closer polygon even though you got a result already. order to make sure to get the closest polygon, you should increase the search radius until you get a result and then increase it once more (and take that result). This should only make a difference in really rare cases however. :param lng: longitude of the point in degree :param lat: latitude in degree :param delta_degree: the ‘search radius’ in degree :param exact_computation: when enabled the distance to every polygon edge is computed (way more complicated), instead of only evaluating the distances to all the vertices (=default). This only makes a real difference when polygons are very close. :param return_distances: when enabled the output looks like this: ( ‘tz_name_of_the_closest_polygon’,[ distances to all polygons in km], [tz_names of all polygons]) :param force_evaluation: :return: the timezone name of the closest found polygon, the list of distances or None



266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/timezone_finder/timezone_finder.rb', line 266

def closest_timezone_at(lng: nil, lat: nil, delta_degree: 1, exact_computation: false, return_distances: false, force_evaluation: false)
  exact_routine = lambda do |polygon_nr|
    coords = coords_of(polygon_nr)
    nr_points = coords[0].length
    empty_array = [[0.0] * nr_points, [0.0] * nr_points]
    Helpers.distance_to_polygon_exact(lng, lat, nr_points, coords, empty_array)
  end

  normal_routine = lambda do |polygon_nr|
    coords = coords_of(polygon_nr)
    nr_points = coords[0].length
    Helpers.distance_to_polygon(lng, lat, nr_points, coords)
  end

  if lng > 180.0 or lng < -180.0 or lat > 90.0 or lat < -90.0
    raise "The coordinates are out ouf bounds: (#{lng}, #{lat})"
  end

  routine = if exact_computation
              exact_routine
            else
              normal_routine
            end

  # the maximum possible distance is half the perimeter of earth pi * 12743km = 40,054.xxx km
  min_distance = 40_100
  # transform point X into cartesian coordinates
  current_closest_id = nil
  central_x_shortcut = (lng + 180).floor.to_i
  central_y_shortcut = ((90 - lat) * 2).floor.to_i

  lng = Helpers.radians(lng)
  lat = Helpers.radians(lat)

  possible_polygons = []

  # there are 2 shortcuts per 1 degree lat, so to cover 1 degree two shortcuts (rows) have to be checked
  # the highest shortcut is 0
  top = [central_y_shortcut - 2 * delta_degree, 0].max
  # the lowest shortcut is 360 (= 2 shortcuts per 1 degree lat)
  bottom = [central_y_shortcut + 2 * delta_degree, 360].min

  # the most left shortcut is 0
  left = [central_x_shortcut - delta_degree, 0].max
  # the most right shortcut is 360 (= 1 shortcuts per 1 degree lng)
  right = [central_x_shortcut + delta_degree, 360].min

  # select all the polygons from the surrounding shortcuts
  (left..right).each do |x|
    (top..bottom).each do |y|
      polygons_of_shortcut(x, y).each do |p|
        possible_polygons << p if possible_polygons.index(p).nil?
      end
    end
  end

  polygons_in_list = possible_polygons.length

  return nil if polygons_in_list == 0

  # initialize the list of ids
  # TODO sorting doesn't give a bonus here?!
  possible_polygons, ids, zones_are_equal = compile_id_list(possible_polygons, polygons_in_list,
                                                            dont_sort: true)

  # if all the polygons in this shortcut belong to the same zone return it
  if zones_are_equal
    return TIMEZONE_NAMES[ids[0]] unless return_distances || force_evaluation
  end

  distances = [nil] * polygons_in_list
  pointer = 0
  if force_evaluation
    possible_polygons.each do |possible_polygon|
      distance = routine.call(possible_polygon)
      distances[pointer] = distance
      if distance < min_distance
        min_distance = distance
        current_closest_id = ids[pointer]
      end
      pointer += 1
    end
  else
    # stores which polygons have been checked yet
    already_checked = [false] * polygons_in_list
    polygons_checked = 0

    while polygons_checked < polygons_in_list
      # only check a polygon when its id is not the closest a the moment!
      if already_checked[pointer] or ids[pointer] == current_closest_id
        # go to the next polygon
        polygons_checked += 1

      else
        # this polygon has to be checked
        distance = routine.call(possible_polygons[pointer])
        distances[pointer] = distance

        already_checked[pointer] = true
        if distance < min_distance # rubocop:disable Metrics/BlockNesting
          min_distance = distance
          current_closest_id = ids[pointer]
          # whole list has to be searched again!
          polygons_checked = 1
        end
      end
      pointer = (pointer + 1) % polygons_in_list
    end
  end

  if return_distances
    return TIMEZONE_NAMES[current_closest_id], distances, ids.map { |x| TIMEZONE_NAMES[x] }
  end

  TIMEZONE_NAMES[current_closest_id]
end

#compile_id_list(polygon_id_list, nr_of_polygons, dont_sort: false) ⇒ Object

sorts the polygons_id list from least to most occurrences of the zone ids (->speed up) approx. 0.24% of all realistic points benefit from sorting (0.4% for random points)

percentage of sorting usage for 100k points

in most of those cases there are only two types of zones (= entries in counted_zones) and one of them has only one entry. That means after checking one polygon timezone_at() already stops. Sorting only really makes sense for closest_timezone_at(). :param polygon_id_list: :param nr_of_polygons: length of polygon_id_list :param dont_sort: if this is set to True, the sorting algorithms is skipped :return: sorted list of polygon_ids, sorted list of zone_ids, boolean: do all entries belong to the same zone



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/timezone_finder/timezone_finder.rb', line 165

def compile_id_list(polygon_id_list, nr_of_polygons, dont_sort: false)
  all_equal = lambda do |input_data|
    x = nil
    for x in input_data
      # first_val = x
      break
    end
    input_data.each do |y|
      return false if x != y
    end
    true
  end

  # print(polygon_id_list)
  # print(zone_id_list)
  zone_id_list = [0] * nr_of_polygons
  if dont_sort
    pointer_local = 0
    first_id = id_of(polygon_id_list[0])
    equal = true
    polygon_id_list.each do |polygon_id|
      zone_id = id_of(polygon_id)
      equal = false if zone_id != first_id
      zone_id_list[pointer_local] = zone_id
      pointer_local += 1
    end

    return polygon_id_list, zone_id_list, equal
  end

  counted_zones = {}
  pointer_local = 0
  polygon_id_list.each do |polygon_id|
    zone_id = id_of(polygon_id)
    zone_id_list[pointer_local] = zone_id
    pointer_local += 1
    counted_zones[zone_id] = counted_zones.fetch(zone_id, 0) + 1
  end
  # print(counted_zones)

  return polygon_id_list, zone_id_list, true if counted_zones.length == 1

  if all_equal.call(counted_zones.values)
    return polygon_id_list, zone_id_list, false
  end

  counted_zones_sorted = counted_zones.sort_by { |_key, value| value }
  # print(counted_zones_sorted)

  sorted_polygon_id_list = [0] * nr_of_polygons
  sorted_zone_id_list = [0] * nr_of_polygons

  pointer_output = 0
  pointer_output2 = 0
  counted_zones_sorted.each do |zone_id, amount|
    # write all polygons from this zone in the new list
    pointer_local = 0
    detected_polygons = 0
    while detected_polygons < amount
      if zone_id_list[pointer_local] == zone_id
        # the polygon at the pointer has the wanted zone_id
        detected_polygons += 1
        sorted_polygon_id_list[pointer_output] = polygon_id_list[pointer_local]
        pointer_output += 1
      end

      pointer_local += 1
    end

    (0...amount).each do |_pointer_local|
      sorted_zone_id_list[pointer_output2] = zone_id
      pointer_output2 += 1
    end
  end

  # print(sorted_polygon_id_list)
  # print(sorted_zone_id_list)

  [sorted_polygon_id_list, sorted_zone_id_list, false]
end

#coords_of(line = 0) ⇒ Object



123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/timezone_finder/timezone_finder.rb', line 123

def coords_of(line = 0)
  @binary_file.seek((@nr_val_start_address + 2 * line))
  nr_of_values = @binary_file.read(2).unpack('S<')[0]

  @binary_file.seek(@adr_start_address + 4 * line)
  @binary_file.seek(@binary_file.read(4).unpack('L<')[0])

  # return [Helpers.fromfile(@binary_file, false, 8, nr_of_values),
  #         Helpers.fromfile(@binary_file, false, 8, nr_of_values)]

  [Helpers.fromfile(@binary_file, false, 4, nr_of_values),
   Helpers.fromfile(@binary_file, false, 4, nr_of_values)]
end

#id_of(line = 0) ⇒ Object



74
75
76
77
78
# File 'lib/timezone_finder/timezone_finder.rb', line 74

def id_of(line = 0)
  # ids start at address 6. per line one unsigned 2byte int is used
  @binary_file.seek((12 + 2 * line))
  @binary_file.read(2).unpack('S<')[0]
end

#ids_of(iterable) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/timezone_finder/timezone_finder.rb', line 80

def ids_of(iterable)
  id_array = [0] * iterable.length

  i = 0
  iterable.each do |line_nr|
    @binary_file.seek((12 + 2 * line_nr))
    id_array[i] = @binary_file.read(2).unpack('S<')[0]
    i += 1
  end

  id_array
end

#polygons_of_shortcut(x = 0, y = 0) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/timezone_finder/timezone_finder.rb', line 110

def polygons_of_shortcut(x = 0, y = 0)
  # get the address of the first entry in this shortcut
  # offset: 180 * number of shortcuts per lat degree * 2bytes = entries per column of x shortcuts
  # shortcuts are stored: (0,0) (0,1) (0,2)... (1,0)...
  @binary_file.seek(@shortcuts_start + 720 * x + 2 * y)

  nr_of_polygons = @binary_file.read(2).unpack('S<')[0]

  @binary_file.seek(@first_shortcut_address + 1440 * x + 4 * y)
  @binary_file.seek(@binary_file.read(4).unpack('L<')[0])
  Helpers.fromfile(@binary_file, true, 2, nr_of_polygons)
end

#shortcuts_of(lng = 0.0, lat = 0.0) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/timezone_finder/timezone_finder.rb', line 93

def shortcuts_of(lng = 0.0, lat = 0.0)
  # convert coords into shortcut
  x = (lng + 180).floor.to_i
  y = ((90 - lat) * 2).floor.to_i

  # get the address of the first entry in this shortcut
  # offset: 180 * number of shortcuts per lat degree * 2bytes = entries per column of x shortcuts
  # shortcuts are stored: (0,0) (0,1) (0,2)... (1,0)...
  @binary_file.seek(@shortcuts_start + 720 * x + 2 * y)

  nr_of_polygons = @binary_file.read(2).unpack('S<')[0]

  @binary_file.seek(@first_shortcut_address + 1440 * x + 4 * y)
  @binary_file.seek(@binary_file.read(4).unpack('L<')[0])
  Helpers.fromfile(@binary_file, true, 2, nr_of_polygons)
end

#timezone_at(lng: 0.0, lat: 0.0) ⇒ Object

this function looks up in which polygons the point could be included to speed things up there are shortcuts being used (stored in the binary file) especially for large polygons it is expensive to check if a point is really included, so certain simplifications are made and even when you get a hit the point might actually not be inside the polygon (for example when there is only one timezone nearby) if you want to make sure a point is really inside a timezone use ‘certain_timezone_at’ :param lng: longitude of the point in degree (-180 to 180) :param lat: latitude in degree (90 to -90) :return: the timezone name of the matching polygon or None



392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
# File 'lib/timezone_finder/timezone_finder.rb', line 392

def timezone_at(lng: 0.0, lat: 0.0)
  if lng > 180.0 or lng < -180.0 or lat > 90.0 or lat < -90.0
    raise "The coordinates are out ouf bounds: (#{lng}, #{lat})"
  end

  possible_polygons = shortcuts_of(lng, lat)

  # x = longitude  y = latitude  both converted to 8byte int
  x = Helpers.coord2int(lng)
  y = Helpers.coord2int(lat)

  nr_possible_polygons = possible_polygons.length

  return nil if nr_possible_polygons == 0

  return TIMEZONE_NAMES[id_of(possible_polygons[0])] if nr_possible_polygons == 1

  # initialize the list of ids
  # and sort possible_polygons from least to most occurrences of zone_id
  possible_polygons, ids, only_one_zone = compile_id_list(possible_polygons, nr_possible_polygons)

  return TIMEZONE_NAMES[ids[0]] if only_one_zone

  # otherwise check if the point is included for all the possible polygons
  (0...nr_possible_polygons).each do |i|
    polygon_nr = possible_polygons[i]

    # get the boundaries of the polygon = (lng_max, lng_min, lat_max, lat_min)
    @binary_file.seek((@bound_start_address + 16 * polygon_nr))
    boundaries = Helpers.fromfile(@binary_file, false, 4, 4)
    # only run the expensive algorithm if the point is withing the boundaries
    unless x > boundaries[0] or x < boundaries[1] or y > boundaries[2] or y < boundaries[3]

      outside_all_holes = true
      # when the point is within a hole of the polygon, this timezone doesn't need to be checked
      _holes_of_line(polygon_nr) do |hole_coordinates|
        if Helpers.inside_polygon(x, y, hole_coordinates)
          outside_all_holes = false
          break
        end
      end

      if outside_all_holes
        if Helpers.inside_polygon(x, y, coords_of(polygon_nr))
          return TIMEZONE_NAMES[ids[i]]
        end
      end
    end
    # when after the current polygon only polygons from the same zone appear, return this zone
    same_element = Helpers.all_the_same(i + 1, nr_possible_polygons, ids)
    return TIMEZONE_NAMES[same_element] if same_element != -1
  end
  nil
end