Class: CitySDK::FileReader

Inherits:
Object
  • Object
show all
Defined in:
lib/citysdk/file_reader.rb

Constant Summary collapse

RE_Y =
/lat|(y.*coord)|(y.*loc(atie|ation)?)/i
RE_X =
/lon|lng|(x.*coord)|(x.*loc(atie|ation)?)/i
RE_GEO =
/^geom(etry)?|location|locatie$/i
RE_NAME =
/(title|titel|naam|name)/i
RE_A_NAME =
/^(naam|name|title|titel)$/i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pars) ⇒ FileReader

Returns a new instance of FileReader.



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/citysdk/file_reader.rb', line 21

def initialize(pars)
  @params = pars
  file_path = File.expand_path(@params[:file_path])
  if File.extname(file_path) == '.csdk'
    readCsdk(file_path)
  else
    ext = @params[:originalfile] ? File.extname(@params[:originalfile]) : File.extname(file_path)
    case ext
      when /\.zip/i
        readZip(file_path)
      when /\.(geo)?json/i
        readJSON(file_path)
      when /\.shp/i
        readShape(file_path)
      when /\.csv|tsv/i
        readCsv(file_path)
      when /\.csdk/i
        readCsdk(file_path)
      else
        raise "Unknown or unsupported file type: #{ext}."
    end
  end
  

  @params[:rowcount] = @content.length
  getFields if not @params[:fields]
  guessName if not @params[:name]
  guessSRID if not @params[:srid]
  findUniqueField  if not @params[:unique_id]
  getAddress if not @params[:hasaddress]
  @params[:hasgeometry] = 'unknown' if @params[:hasgeometry].nil?
end

Instance Attribute Details

#contentObject (readonly)

Returns the value of attribute content.



19
20
21
# File 'lib/citysdk/file_reader.rb', line 19

def content
  @content
end

#fileObject (readonly)

Returns the value of attribute file.



19
20
21
# File 'lib/citysdk/file_reader.rb', line 19

def file
  @file
end

#paramsObject (readonly)

Returns the value of attribute params.



19
20
21
# File 'lib/citysdk/file_reader.rb', line 19

def params
  @params
end

Instance Method Details

#findColSep(f) ⇒ Object



142
143
144
145
146
147
148
149
# File 'lib/citysdk/file_reader.rb', line 142

def findColSep(f)
  a = f.gets
  b = f.gets
  [";","\t","|"].each do |s|
    return s if (a.split(s).length == b.split(s).length) and b.split(s).length > 1
  end
  ','
end

#findGeometryObject



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/citysdk/file_reader.rb', line 179

def findGeometry
  xfield = nil; xs = true
  yfield = nil; ys = true
  @content[0][:properties].each do |k,v|
    next if k.nil?
    if k.to_s =~ RE_GEO
      srid,g_type = isWkbGeometry(v)
      if(srid)
        @params[:srid] = srid
        @params[:geomtry_type] = g_type
        @content.each do |h|
          a,b,g = isWkbGeometry(h[:properties][k])
          h[:geometry] = g
          h[:properties].delete(k)
        end
        @params[:hasgeometry] = 'certain'
        return true
      end
      
      srid,g_type = isGeoJSON(v)
      if(srid)
        @params[:srid] = srid
        @params[:geomtry_type] = g_type
        @content.each do |h|
          h[:geometry] = h[:properties][k]
          h[:properties].delete(k)
        end
        @params[:hasgeometry] = 'certain'
        return true
      end
      
      @content.each do |h|
        h[:geometry] = h[:properties][k]
        h[:properties].delete(k)
      end
      @params[:hasgeometry] = 'maybe'
      return
    end
    
    hdc = k.to_sym.downcase
    if hdc == 'longitude' or hdc == 'lon'
      xfield=k; xs=false
    end
    if hdc == 'latitude' or hdc == 'lat'
      yfield=k; ys=false
    end
    xfield = k if xs and (hdc =~ RE_X) 
    yfield = k if ys and (hdc =~ RE_Y)
  end

  if xfield and yfield and (xfield != yfield)
    @params[:hasgeometry] = 'certain'
    @content.each do |h|
      h[:geometry] = {:type => 'Point', :coordinates => [h[:properties][xfield].gsub(',','.').to_f, h[:properties][yfield].gsub(',','.').to_f]}
      h[:properties].delete(yfield)
      h[:properties].delete(xfield)
    end
    @params[:geomtry_type] = 'Point'
    return true
  end
  false
end

#findUniqueFieldObject



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/citysdk/file_reader.rb', line 77

def findUniqueField
  fields = {}
  unfield = nil

  return if @content[0][:id]

  @content.each do |h|
    h[:properties].each do |k,v|
      fields[k] = {} if fields[k].nil?
      (fields[k][v] == nil) ? fields[k][v] = 1 : fields[k][v] += 1
    end
  end

  fields.each_key do |k|
    if fields[k].length == @params[:rowcount]
      @params[:unique_id] = unfield = k
      break
    end
  end

  if unfield
    @content.each do |h|
      h[:id] = h[:properties][unfield]
    end
  end

end

#getAddressObject



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/citysdk/file_reader.rb', line 54

def getAddress
  pd = pc = hn = ad = false
  @params[:fields].reverse.each do |f|
    pd = f if ( f.to_s =~ /postcode|post/i )
    pc = f if ( f.to_s =~ /^(post|zip)code$/i )
    hn = f if ( f.to_s =~ /huisnummer|housenumber|(house|huis)(nr|no)|number/i)
    ad = f if ( f.to_s =~ /address|street|straat|adres/i)
  end
  @params[:hasaddress] = 'unknown'
  if (ad or hn)
    if pc 
      @params[:hasaddress] = 'certain'
      @params[:postcode] = pc
    elsif pd
      @params[:hasaddress] = 'maybe'
      @params[:postcode] = pd
    end
    @params[:housenumber] = hn ? hn : ad
  end
end

#getFieldsObject



117
118
119
120
121
122
# File 'lib/citysdk/file_reader.rb', line 117

def getFields
  @params[:fields] = []
  @content[0][:properties].each_key do |k|
    @params[:fields] << (k.to_sym rescue k) || k
  end
end

#guessNameObject



105
106
107
108
109
110
111
112
113
114
115
# File 'lib/citysdk/file_reader.rb', line 105

def guessName
  @params[:fields].reverse.each do |k|
    if(k.to_s =~ RE_A_NAME)
      @params[:name] = k
      return
    end
    if(k.to_s =~ RE_NAME)
      @params[:name] = k
    end
  end
end

#guessSRIDObject



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/citysdk/file_reader.rb', line 124

def guessSRID
  return if @content[0][:geometry].nil?
  @params[:srid] = 4326 
  g = @content[0][:geometry][:coordinates]
  while g[0].is_a?(Array)
    g = g[0]
  end
  lon = g[0]
  lat = g[1]
  # if lon > -180.0 and lon < 180.0 and lat > -90.0 and lat < 90.0 
  #   @params[:srid] = 4326 
  # els
  if lon > -7000.0 and lon < 300000.0 and lat > 289000.0 and lat < 629000.0
    # Dutch new rd system
    @params[:srid] = 28992
  end
end

#isGeoJSON(s) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/citysdk/file_reader.rb', line 163

def isGeoJSON(s)
  begin
    if ['Point', 'MultiPoint', 'LineString', 'MultiLineString', 'Polygon', 'MultiPolygon', 'GeometryCollection'].include?(s[:type])
      srid = 4326
      if s[:crs] && s[:crs][:type] == 'OGC'
        urn = s[:crs][:properties][:urn].split(':')
        srid = urn.last.to_i if (urn[4] == 'EPSG')
      end
      return srid,s[:type],s
    end
  rescue Exception=>e
  end
  nil
end

#isWkbGeometry(s) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
# File 'lib/citysdk/file_reader.rb', line 151

def isWkbGeometry(s)
  begin
    f = GeoRuby::SimpleFeatures::GeometryFactory::new
    p = GeoRuby::SimpleFeatures::HexEWKBParser.new(f)
    p.parse(s)
    g = f.geometry
    return g.srid,g.as_json[:type],g
  rescue Exception=>e
  end
  nil
end

#readCsdk(path) ⇒ Object



345
346
347
348
349
# File 'lib/citysdk/file_reader.rb', line 345

def readCsdk(path)
  h = Marshal.load(File.read(path))
  @params = h[:config]
  @content = h[:content]
end

#readCsv(path) ⇒ Object



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/citysdk/file_reader.rb', line 243

def readCsv(path)
  @file = path
  c=''
  File.open(path, "r:bom|utf-8") do |fd|
    c = fd.read
  end
  if true != @params[:utf8_fixed]
    detect = CharlockHolmes::EncodingDetector.detect(c)
    c =	CharlockHolmes::Converter.convert(c, detect[:encoding], 'UTF-8') if detect
  end
  c = c.force_encoding('utf-8')
  @content = []
  @params[:colsep] = findColSep(StringIO.new(c))
  csv = CSV.new(c, :col_sep => @params[:colsep], :headers => true, :skip_blanks =>true)
  csv.each do |row|
    r = row.to_hash
    h = {}
    r.each do |k,v|
      h[(k.to_sym rescue k) || k] = v
    end
    @content << {:properties => h }
  end
  findGeometry
end

#readJSON(path) ⇒ Object



269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/citysdk/file_reader.rb', line 269

def readJSON(path)
  @content = []
  @file = path
  raw = ''
  File.open(path, "r:bom|utf-8") do |fd|
    raw = fd.read
  end
  hash = CitySDK::parseJson(raw)

  if hash.is_a?(Hash) and hash[:type] and (hash[:type] == 'FeatureCollection')
    hash[:features].each do |f|
      f.delete(:type)
      @content << f
    end
    @params[:hasgeometry] = 'certain'
    findUniqueField if @content[0][:id].nil?
  else
    val,length = nil,0

    if hash.is_a?(Array)
       val,length = hash,hash.length
    else
      hash.each do |k,v|
        if v.is_a?(Array)
          val,length = v,v.length if v.length > length
        end
      end
    end
    
    if val
      val.each do |h|
        @content << {:properties => h}
      end
    end
    findGeometry
  end
end

#readShape(path) ⇒ Object



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/citysdk/file_reader.rb', line 320

def readShape(path)
  @content = []
  @file = path
  
  prj = path.gsub(/.shp$/i,"") + '.prj'
  prj = File.exists?(prj) ? File.read(prj) : nil
  sridFromPrj(prj) if (prj and @params[:srid].nil?)
  
  @params[:hasgeometry] = 'certain'
  
  
  GeoRuby::Shp4r::ShpFile.open(path) do |shp|
    shp.each do |shape|
      h = {}
      h[:geometry] = CitySDK::parseJson(shape.geometry.to_json) #a GeoRuby SimpleFeature
      h[:properties] = {}
      att_data = shape.data #a Hash
      shp.fields.each do |field|
        h[:properties][field.name.to_sym] = att_data[field.name]
      end
      @content << h
    end
  end
end

#readZip(path) ⇒ Object



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# File 'lib/citysdk/file_reader.rb', line 351

def readZip(path)
  begin 
    Dir.mktmpdir("cdkfi_#{File.basename(path).gsub(/\A/,'')}") do |dir| 
      raise "Error unzipping #{path}." if not system "unzip '#{path}' -d '#{dir}' > /dev/null 2>&1"
      Dir.foreach(dir) do |f|
        next if f =~ /^\./
        case File.extname(f)
          when /\.(geo)?json/i
            readJSON(dir+'/'+f)
            return
          when /\.shp/i
            readShape(dir+'/'+f)
            return
          when /\.csv|tsv/i
            readCsv(dir+'/'+f)
            return
        end
      end
    end
  rescue Exception => e
    raise CitySDK::Exception(e.message, {:originalfile => path}, __FILE__,__LINE__)
  end
end

#sridFromPrj(str) ⇒ Object



308
309
310
311
312
313
314
315
316
317
318
# File 'lib/citysdk/file_reader.rb', line 308

def sridFromPrj(str)
  begin
    connection = Faraday.new :url => "http://prj2epsg.org"
    resp = connection.get('/search.json', {:mode => 'wkt', :terms => str})
    if resp.status == 200 
      resp = CitySDK::parseJson resp.body
      @params[:srid] = resp[:codes][0][:code].to_i
    end
  rescue
  end
end

#write(path = nil) ⇒ Object



375
376
377
378
379
380
381
382
383
384
385
386
# File 'lib/citysdk/file_reader.rb', line 375

def write(path=nil)
  path = @file_path if path.nil?
  path = path + '.csdk'
  begin
    File.open(path,"w") do |fd|
      fd.write( Marshal.dump({:config=>@params, :content=>@content}) )
    end
  rescue
    return nil
  end
  return path
end