Class: Geoptima::Dataset

Inherits:
Object
  • Object
show all
Includes:
ErrorCounter
Defined in:
lib/geoptima/data.rb

Instance Attribute Summary collapse

Attributes included from ErrorCounter

#errors

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ErrorCounter

#combine_errors, #incr_error, #report_errors

Constructor Details

#initialize(name, options = {}) ⇒ Dataset

Returns a new instance of Dataset.



863
864
865
866
867
868
869
870
871
# File 'lib/geoptima/data.rb', line 863

def initialize(name,options={})
  @name = name
  @data = []
  @options = options
  @time_range = options[:time_range] || DateRange.new(Config[:min_datetime],Config[:max_datetime])
  @location_range = options[:location_range] || LocationRange.everywhere
  @geolocation_options = options[:geolocation_options] || {}
  @fields = {}
end

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



861
862
863
# File 'lib/geoptima/data.rb', line 861

def name
  @name
end

#optionsObject (readonly)

Returns the value of attribute options.



861
862
863
# File 'lib/geoptima/data.rb', line 861

def options
  @options
end

Class Method Details

.add_directory_to_datasets(datasets, directory, options = {}) ⇒ Object



1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
# File 'lib/geoptima/data.rb', line 1133

def self.add_directory_to_datasets(datasets,directory,options={})
  Dir.open(directory).each do |file|
    next if(file =~ /^\./)
    path = "#{directory}/#{file}"
    if File.directory? path
      add_directory_to_datasets(datasets,path,options)
    elsif file =~ /\.json/i
      add_file_to_datasets(datasets,path,options)
    else
      puts "Ignoring files without JSON extension: #{path}"
    end
  end
end

.add_file_to_datasets(datasets, file, options = {}) ⇒ Object



1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
# File 'lib/geoptima/data.rb', line 1118

def self.add_file_to_datasets(datasets,file,options={})
  if File.directory?(file)
    add_directory_to_datasets(datasets,file,options)
  else
    geoptima=Geoptima::Data.new(file)
    unless geoptima.valid?
      puts "INVALID: #{geoptima.start}\t#{file}\n\n"
    else
      key = options[:combine_all] ? 'all' : geoptima.id
      datasets[key] ||= Geoptima::Dataset.new(key, options)
      datasets[key] << geoptima
    end
  end
end

.make_datasets(files, options = {}) ⇒ Object



1147
1148
1149
1150
1151
1152
1153
# File 'lib/geoptima/data.rb', line 1147

def self.make_datasets(files, options={})
  datasets = {}
  files.each do |file|
    add_file_to_datasets(datasets,file,options)
  end
  datasets
end

Instance Method Details

#<<(data) ⇒ Object



873
874
875
876
# File 'lib/geoptima/data.rb', line 873

def <<(data)
  @sorted = nil
  @data << data
end

#[](key) ⇒ Object



943
944
945
# File 'lib/geoptima/data.rb', line 943

def [](key)
  @fields[key.downcase] ||= @data.map{|d| d[key]}.compact.uniq
end

#descriptionObject



1114
1115
1116
# File 'lib/geoptima/data.rb', line 1114

def description
  "Dataset:#{name}, IMEI:#{imeis.join(',')}, IMSI:#{imsis.join(',')}, Platform:#{platforms.join(',')}, Model:#{models.join(',')}, OS:#{oses.join(',')}, Files:#{file_count}, Events:#{sorted && sorted.length}"
end

#dump_timers(out = STDOUT) ⇒ Object



1025
1026
1027
1028
1029
1030
1031
# File 'lib/geoptima/data.rb', line 1025

def dump_timers(out=STDOUT)
  out.puts "Printing timer information for #{@timers.length} timers:"
  @timers.keys.sort.each do |key|
    t = @timers[key]
    out.puts "\t#{t.describe}"
  end
end

#each_trace {|trace| ... } ⇒ Object

Yields:

  • (trace)


1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
# File 'lib/geoptima/data.rb', line 1084

def each_trace
  trace = nil
  timer('each_trace').start
  sorted('gps').each do |gps|
    trace ||= Trace.new(self)
    if trace.too_far(gps)
      yield trace
      trace = Trace.new(self)
    end
    trace << gps
  end
  yield trace if(trace)
  timer('each_trace').stop
end

#events_namesObject



1016
1017
1018
# File 'lib/geoptima/data.rb', line 1016

def events_names
  @data.map{ |v| v.events_names }.flatten.uniq.sort
end

#file_countObject



878
879
880
# File 'lib/geoptima/data.rb', line 878

def file_count
  @data.length
end

#firstObject



959
960
961
962
# File 'lib/geoptima/data.rb', line 959

def first
  merge_events unless @sorted
  @sorted[nil][0]
end

#header(names = nil) ⇒ Object



985
986
987
988
989
990
# File 'lib/geoptima/data.rb', line 985

def header(names=nil)
  merge_events unless @sorted
  (names || events_names).map do |event_type|
    [(s=sorted(event_type)[0]) && s.header]
  end.flatten
end

#imeiObject



890
891
892
# File 'lib/geoptima/data.rb', line 890

def imei
  imeis[0]
end

#imeisObject



894
895
896
# File 'lib/geoptima/data.rb', line 894

def imeis
  @imeis ||= ('imei')
end

#imsiObject



882
883
884
# File 'lib/geoptima/data.rb', line 882

def imsi
  imsis[0]
end

#imsisObject



886
887
888
# File 'lib/geoptima/data.rb', line 886

def imsis
  @imsis ||= ('imsi')
end

#lastObject



964
965
966
967
# File 'lib/geoptima/data.rb', line 964

def last
  merge_events unless @sorted
  @sorted[nil][-1]
end

#lengthObject



969
970
971
# File 'lib/geoptima/data.rb', line 969

def length
  sorted.length
end

#locate_eventsObject



1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
# File 'lib/geoptima/data.rb', line 1099

def locate_events
  puts "Locating #{sorted.length} events" if(true||$debug)
  locator = Geoptima::Locator.new self.sorted, @geolocation_options
  timer("locate.all").start
  locator.locate
  timer("locate.all").stop
  if (true||$debug)
    puts "Located #{locator.located.length} / #{sorted.length} events (timed: #{timer("locate.all")}"
  end
end

#make_all_from_metadata(field_name) ⇒ Object



898
899
900
901
902
903
904
905
906
907
908
909
# File 'lib/geoptima/data.rb', line 898

def (field_name)
  @data.inject({}) do |a,d|
    a[d[field_name]] ||= 0
    a[d[field_name]] += d.count.to_i
    a
  end.to_a.sort do |a,b|
    b[1]<=>a[1]
  end.map do |x|
    #puts "Have #{field_name}: #{x.join('=')}"
    x[0]
  end.compact.uniq
end

#merge_eventsObject



1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
# File 'lib/geoptima/data.rb', line 1033

def merge_events
  @sorted ||= {}
  unless @sorted[nil]
    timer('merge_events').start
    event_hash = {}
    puts "Creating sorted maps for #{self}" if($debug)
    events_names.each do |name|
      is_gps = name == 'gps'
      puts "Preparing maps for #{name}" if($debug)
      @data.each do |data|
        puts "Processing #{(e=data.events[name]) && e.length} events for #{name}" if($debug)
        (events = data.events[name]) && events.each do |event|
          puts "\t\tTesting #{event.time} inside #{@time_range}" if($debug)
          if @time_range.include?(event.time)
            puts "\t\t\tEvent at #{event.time} is inside #{@time_range}" if($debug)
            if !is_gps || @location_range.nil? || @location_range.include?(event.location)
              key = "#{event.time_key} #{name}"
              event_hash[key] = event
            end
          end
        end
        combine_errors data
      end
      puts "After adding #{name} events, maps are #{event_hash.length} long" if($debug)
    end
    puts "Merging and sorting #{event_hash.keys.length} maps" if($debug)
    timer('merge_events.sort').start
    @sorted[nil] = event_hash.keys.sort.map{|k| event_hash[k]}
    timer('merge_events.sort').stop
    puts "Sorted #{@sorted[nil].length} events" if($debug)
    timer('merge_events.locate').start
    locate_events if(options[:locate])
    timer('merge_events.locate').stop
    timer('merge_events').stop
  end
  @sorted
end

#modelsObject



951
952
953
# File 'lib/geoptima/data.rb', line 951

def models
  self['Model']
end

#osesObject



955
956
957
# File 'lib/geoptima/data.rb', line 955

def oses
  self['OS']
end

#platformsObject



947
948
949
# File 'lib/geoptima/data.rb', line 947

def platforms
  self['Platform']
end

#recent(event, key, seconds = 60) ⇒ Object



911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
# File 'lib/geoptima/data.rb', line 911

def recent(event,key,seconds=60)
  unless event[key]
    timer("export.event.recent").start
    timer("export.event.recent.#{key}").start
    if imei = event.file.imei
      puts "Searching for recent values for '#{key}' starting at event #{event}" if($debug)
      ev,prop=key.split(/\./)
      ar=sorted
      puts "\tSearching through #{ar && ar.length} events for event type #{ev} and property #{prop}" if($debug)
      if i=ar.index(event)
        afe = while(i>0)
          fe = ar[i-=1]
          puts "\t\tTesting event[#{i}]: #{fe}" if($debug)
          break(fe) if(fe.nil? || (event.time - fe.time) * SPERDAY > seconds || (fe.name == ev && fe.file.imei == imei))
        end
        if afe && afe.name == ev
          puts "\t\tFound event[#{i}] with #{prop} => #{afe[prop]} and time gap of #{(event.time - fe.time) * SPERDAY} seconds" if($debug)
          event[key] = afe[prop]
        end
      else
        puts "Event not found in search for recent '#{key}': #{event}"
      end
    else
      puts "Not searching for correlated data without imei: #{event}"
    end
    timer("export.event.recent.#{key}").stop
    timer("export.event.recent").stop
  end
#      @recent[key] ||= ''
  event[key]
end

#sorted(event_type = nil) ⇒ Object



973
974
975
976
977
978
979
980
981
982
983
# File 'lib/geoptima/data.rb', line 973

def sorted(event_type=nil)
  merge_events unless @sorted
  unless @sorted[event_type] || event_type.nil?
    timer("sorted.#{event_type}").start
    @sorted[event_type] = @sorted[nil].reject do |event|
      event.name != event_type
    end
    timer("sorted.#{event_type}").stop
  end
  @sorted[event_type]
end

#statsObject



992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
# File 'lib/geoptima/data.rb', line 992

def stats
  merge_events unless @sorted
  unless @stats
    timer('stats').start
    @stats = {}
    event_count = 0
    sorted.each do |event|
      event_count += 1
      event.header.each do |field|
        key = "#{event.name}.#{field}"
        value = event[field]
        @stats[key] ||= {}
        @stats[key][value] ||= 0
        @stats[key][value] += 1
      end
    end
    timer('stats').stop
  end
  @stats.reject! do |k,v|
    v.length > 500 || v.length > 10 && v.length > event_count / 2
  end
  @stats
end

#timer(name) ⇒ Object



1020
1021
1022
1023
# File 'lib/geoptima/data.rb', line 1020

def timer(name)
  @timers ||= {}
  @timers[name] ||= Geoptima::Timer.new(name)
end

#to_sObject



1110
1111
1112
# File 'lib/geoptima/data.rb', line 1110

def to_s
  (imei.to_s.length < 1 || name == imei) ? name : imeis.join(',')
end

#waypoints(waypoints = nil) ⇒ Object



1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
# File 'lib/geoptima/data.rb', line 1071

def waypoints(waypoints=nil)
  @waypoints ||= {}
  event_type = waypoints=='all' ? nil : waypoints
  unless @waypoints[event_type]
    @waypoints[event_type] = Trace.new(self, :type => 'ways', :name => "waypoints-#{self.name}")
    sorted(event_type).each do |e|
      e = e.to_type
      @waypoints[event_type] << e if(e.valid_gpx?)
    end
  end
  @waypoints[event_type]
end