Class: Export::Dwca::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/export/dwca/data.rb

Overview

Wrapper to build DWCA zipfiles for a specific project. See tasks/accesssions/report/dwc_controller.rb for use.

With help from thinkingeek.com/2013/11/15/create-temporary-zip-file-send-response-rails/

Usage:

begin
 data = Dwca::Data.new(DwcOccurrence.where(project_id: sessions_current_project_id)
ensure
 data.cleanup
end

Always use the ensure/data.cleanup pattern!

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(core_scope: nil, extension_scopes: {}, predicate_extensions: {}) ⇒ Data

Returns a new instance of Data.

Parameters:

  • args (Hash)

Raises:

  • (ArgumentError)


55
56
57
58
59
60
61
62
63
64
# File 'lib/export/dwca/data.rb', line 55

def initialize(core_scope: nil, extension_scopes: {}, predicate_extensions: {} )
  raise ArgumentError, 'must pass a core_scope' if core_scope.nil?

  @core_scope = core_scope

  @biological_associations_extension = extension_scopes[:biological_associations] #! STring
  @media_extension = extension_scopes[:media] #  = get_scope(core_scope)

  @data_predicate_ids = { collection_object_predicate_id: [], collecting_event_predicate_id: [] }.merge(predicate_extensions)
end

Instance Attribute Details

#all_dataObject

Returns Tempfile.

Returns:

  • Tempfile



52
53
54
# File 'lib/export/dwca/data.rb', line 52

def all_data
  @all_data
end

#biological_associations_extensionScope?

Returning BiologicalAssociation

Returns:

  • (Scope, nil)

    Returning BiologicalAssociation



35
36
37
# File 'lib/export/dwca/data.rb', line 35

def biological_associations_extension
  @biological_associations_extension
end

#core_scopeObject

!params core_scope [String, ActiveRecord::Relation]

String is fully formed SQL


31
32
33
# File 'lib/export/dwca/data.rb', line 31

def core_scope
  @core_scope
end

#dataTempfile

Returns the csv data as a tempfile.

Returns:

  • (Tempfile)

    the csv data as a tempfile



119
120
121
# File 'lib/export/dwca/data.rb', line 119

def data
  @data
end

#data_predicate_idsObject

collection_object_predicate_id: [], collecting_event_predicate_id: []

Returns:

  • Hash



49
50
51
# File 'lib/export/dwca/data.rb', line 49

def data_predicate_ids
  @data_predicate_ids
end

#emlTempfile

This is a stub, and only half-heartedly done. You should be using IPT for the time being. See also

https://github.com/gbif/ipt/wiki/
https://github.com/gbif/ipt/wiki/#exemplar-datasets

TODO: reference biological_resource_extension.csv

Returns:

  • (Tempfile)

    metadata about this dataset



250
251
252
# File 'lib/export/dwca/data.rb', line 250

def eml
  @eml
end

#filenameString (readonly)

the name of zipfile

Returns:

  • (String)


422
423
424
# File 'lib/export/dwca/data.rb', line 422

def filename
  @filename
end

#media_extensionScope?

Returns @return Image(?).

Returns:

  • (Scope, nil)

    @return Image(?)



39
40
41
# File 'lib/export/dwca/data.rb', line 39

def media_extension
  @media_extension
end

#metaObject

Returns the value of attribute meta.



25
26
27
# File 'lib/export/dwca/data.rb', line 25

def meta
  @meta
end

#predicate_dataObject

Returns the value of attribute predicate_data.



45
46
47
# File 'lib/export/dwca/data.rb', line 45

def predicate_data
  @predicate_data
end

#totalObject

TODO update



41
42
43
# File 'lib/export/dwca/data.rb', line 41

def total
  @total
end

#zipfileTempfile

Returns the zipfile.

Returns:

  • (Tempfile)

    the zipfile



413
414
415
# File 'lib/export/dwca/data.rb', line 413

def zipfile
  @zipfile
end

Instance Method Details

#biological_associations_resource_relationshipObject



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/export/dwca/data.rb', line 337

def biological_associations_resource_relationship
  return nil if biological_associations_extension.nil?
  @biological_associations_resource_relationship = Tempfile.new('biological_resource_relationship.xml')

  content = nil

  if no_records?
    content = "\n"
  else
    content = Export::Csv::Dwc::Extension::BiologicalAssociations.csv(biological_associations_extension)
  end

  @biological_associations_resource_relationship.write(content)
  @biological_associations_resource_relationship.flush
  @biological_associations_resource_relationship.rewind
  @biological_associations_resource_relationship
end

#build_zipObject



394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# File 'lib/export/dwca/data.rb', line 394

def build_zip
  t = Tempfile.new(filename)

  Zip::OutputStream.open(t) { |zos| }

  Zip::File.open(t.path, Zip::File::CREATE) do |zip|
    zip.add('data.tsv', all_data.path)

    zip.add('media.csv', media.path) if media_extension
    zip.add('resource_relationships.tsv', biological_associations_resource_relationship.path) if biological_associations_extension

    zip.add('meta.xml', meta.path)
    zip.add('eml.xml', eml.path)
  end
  t
end

#cleanupTrue

Returns close and delete all temporary files.

Returns:

  • (True)

    close and delete all temporary files



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
# File 'lib/export/dwca/data.rb', line 429

def cleanup
  zipfile.close
  zipfile.unlink
  meta.close
  meta.unlink
  eml.close
  eml.unlink
  data.close
  data.unlink

  if biological_associations_extension
    biological_associations_resource_relationship.close
    biological_associations_resource_relationship.unlink
  end
  
  if predicate_options_present?
    predicate_data.close
    predicate_data.unlink
  end
  all_data.close
  all_data.unlink
  true
end

#csvCSV

Returns the data as a CSV object.

Returns:

  • (CSV)

    the data as a CSV object



99
100
101
102
103
104
105
106
107
108
109
# File 'lib/export/dwca/data.rb', line 99

def csv
  ::Export::Csv.generate_csv(
    core_scope.computed_columns,
    # TODO: check to see if we nee dthis
    exclude_columns: ::DwcOccurrence.excluded_columns,
    column_order: ::CollectionObject::DWC_OCCURRENCE_MAP.keys, # TODO: add other maps here
    trim_columns: true, # going to have to be optional
    trim_rows: false,
    header_converters: [:dwc_headers]
  )
end

#meta_fieldsArray

id, and non-standard DwC colums are handled elsewhere

Returns:

  • (Array)

    use the temporarily written, and refined, CSV file to read off the existing headers so we can use them in writing meta.yml



359
360
361
362
363
364
# File 'lib/export/dwca/data.rb', line 359

def meta_fields
  return [] if no_records?
  h = File.open(all_data, &:gets)&.strip&.split("\t")
  h&.shift
  h || []
end

#no_records?Boolean

Returns true if provided core_scope returns no records.

Returns:

  • (Boolean)

    true if provided core_scope returns no records



113
114
115
# File 'lib/export/dwca/data.rb', line 113

def no_records?
  total == 0
end

#package_download(download) ⇒ Download

Returns a download instance.

Parameters:

Returns:



455
456
457
458
# File 'lib/export/dwca/data.rb', line 455

def package_download(download)
  download.update!(source_file_path: zipfile.path)
  download
end

#predicate_options_present?Boolean

Returns:

  • (Boolean)


89
90
91
# File 'lib/export/dwca/data.rb', line 89

def predicate_options_present?
  data_predicate_ids[:collection_object_predicate_id].present? || data_predicate_ids[:collecting_event_predicate_id].present?
end