Module: RDatasets

Defined in:
lib/rdatasets.rb,
lib/rdatasets/version.rb

Overview

Module for RDatasets

Constant Summary collapse

VERSION =
'0.7.0'

Class Method Summary collapse

Class Method Details

.dfDaru::DataFrame

Display information of all data sets.

Returns:



83
84
85
86
# File 'lib/rdatasets.rb', line 83

def df
  file_path = File.expand_path('../data/datasets.csv', __dir__)
  Daru::DataFrame.from_csv(file_path)
end

.get_file_path(package_name, dataset_name) ⇒ String

Get the file path of a certain dataset.

Parameters:

  • package_name (String, Symbol)

    :R package name

  • dataset_name (String, Symbol)

    :R dataset name

Returns:

  • (String)


69
70
71
72
73
74
75
76
77
78
79
# File 'lib/rdatasets.rb', line 69

def get_file_path(package_name, dataset_name)
  rdata_directory = File.expand_path('../data', __dir__)
  package_name = package_name.to_s if package_name.is_a? Symbol
  dataset_name = dataset_name.to_s if dataset_name.is_a? Symbol

  # "car" package directory is a symbolic link.
  # Do not use Symbolic links because they can cause error on Windows.
  package_name = 'carData' if package_name == 'car'
  dataset_name += '.csv'
  File.join(rdata_directory, package_name, dataset_name)
end

.load(package_name, dataset_name = nil) ⇒ Daru::DataFrame

Load a certain dataset and returns a dataframe.

Parameters:

  • package_name (String, Symbol)

    :R package name

  • dataset_name (String, Symbol) (defaults to: nil)

    :R dataset name

Returns:



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rdatasets.rb', line 48

def load(package_name, dataset_name = nil)
  if dataset_name
    file_path = get_file_path(package_name, dataset_name)
    raise "No such file -- #{file_path}" unless File.exist? file_path

    dataframe = Daru::DataFrame.from_csv(file_path)
    if original_index_is_sequential? dataframe
      # `dataframe.set_index` is slow
      dataframe.index = dataframe.at 0
      dataframe.delete_vector dataframe.at(0).name
    end
    dataframe
  else
    package(package_name)
  end
end

.method_missing(package_name) ⇒ Object



32
33
34
35
36
# File 'lib/rdatasets.rb', line 32

def self.method_missing(package_name)
  return Package.new(package_name) if RDatasets.packages.include? package_name

  super
end

.package(package_name) ⇒ Array<Symbol>

Show a list of datasets included in the package.

Parameters:

  • :R (String, Symbol)

    package name

Returns:

  • (Array<Symbol>)


97
98
99
100
# File 'lib/rdatasets.rb', line 97

def package(package_name)
  ds = df.where(df['Package'].eq package_name.to_s)
  ds['Item'].to_a.map(&:to_sym)
end

.packagesArray<Symbol>

Show a list of all packages.

Returns:

  • (Array<Symbol>)


90
91
92
# File 'lib/rdatasets.rb', line 90

def packages
  df['Package'].to_a.uniq.map(&:to_sym)
end

.respond_to_missing?(package_name, include_private) ⇒ Boolean

Returns:

  • (Boolean)


38
39
40
# File 'lib/rdatasets.rb', line 38

def self.respond_to_missing?(package_name, include_private)
  RDatasets.packages.include?(package_name) ? true : super
end

.search(pattern) ⇒ Daru::DataFrame

Search available datasets. (items and titles) If the argument is a string, ignore case.

Parameters:

  • pattern (String, Regexp)

    :The pattern to search for

Returns:



106
107
108
109
110
111
# File 'lib/rdatasets.rb', line 106

def search(pattern)
  pattern = /#{pattern}/i if pattern.is_a? String
  df.filter(:row) do |row|
    row['Item'] =~ pattern || row['Title'] =~ pattern
  end
end