Class: Datasets::Rdatasets

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/rdatasets.rb

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initialize(package_name, dataset_name) ⇒ Rdatasets

Returns a new instance of Rdatasets.


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/datasets/rdatasets.rb', line 59

def initialize(package_name, dataset_name)
  list = RdatasetsList.new

  info = list.filter(package: package_name, dataset: dataset_name).first
  unless info
    raise ArgumentError, "Unable to locate dataset #{package_name}/#{dataset_name}"
  end

  super()
  @metadata.id = "rdatasets-#{package_name}-#{dataset_name}"
  @metadata.name = "Rdatasets: #{package_name}: #{dataset_name}"
  @metadata.url = info.csv
  @metadata.licenses = ["GPL-3"]
  @metadata.description = info.title

  # Follow the original directory structure in the cache directory
  @data_path = cache_dir_path + (dataset_name + ".csv")

  @package_name = package_name
  @dataset_name = dataset_name
end

Instance Method Details

#each(&block) ⇒ Object


81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/datasets/rdatasets.rb', line 81

def each(&block)
  return to_enum(__method__) unless block_given?

  download(@data_path, @metadata.url) unless @data_path.exist?
  CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
    csv.each do |row|
      record = row.to_h
      record.delete("")
      record.transform_keys!(&:to_sym)
      yield record
    end
  end
end