Module: Rdatasets

Defined in:
lib/rdatasets.rb,
lib/rdatasets/version.rb

Overview

Module for Rdatasets

Constant Summary collapse

VERSION =
"0.8.0"

Class Method Summary collapse

Class Method Details

.dfPolars::DataFrame

Display information of all data sets.

Returns:



87
88
89
90
# File 'lib/rdatasets.rb', line 87

def df
  file_path = File.expand_path("../data/datasets.csv.gz", __dir__)
  read_gzip_dataframe(file_path) # Use the refactored helper method
end

.get_file_path(package_name, dataset_name) ⇒ String

Get the file path of a certain dataset.

Parameters:

  • package_name (String, Symbol)

    :R package name

  • dataset_name (String, Symbol)

    :R dataset name

Returns:

  • (String)


73
74
75
76
77
78
79
80
81
82
83
# File 'lib/rdatasets.rb', line 73

def get_file_path(package_name, dataset_name)
  rdata_directory = File.expand_path("../data/csv", __dir__)
  package_name = package_name.to_s if package_name.is_a? Symbol
  dataset_name = dataset_name.to_s if dataset_name.is_a? Symbol

  # "car" package directory is a symbolic link.
  # Do not use Symbolic links because they can cause error on Windows.
  package_name = "carData" if package_name == "car"
  dataset_name += ".csv.gz"
  File.join(rdata_directory, package_name, dataset_name)
end

.load(package_name, dataset_name = nil) ⇒ Polars::DataFrame

Load a certain dataset and returns a dataframe.

Parameters:

  • package_name (String, Symbol)

    :R package name

  • dataset_name (String, Symbol) (defaults to: nil)

    :R dataset name

Returns:



50
51
52
53
54
55
56
57
58
59
# File 'lib/rdatasets.rb', line 50

def load(package_name, dataset_name = nil)
  if dataset_name
    file_path = get_file_path(package_name, dataset_name)
    raise "No such file -- #{file_path}" unless File.exist?(file_path)

    read_gzip_dataframe(file_path) # Refactored to use helper method
  else
    package(package_name)
  end
end

.method_missing(package_name) ⇒ Object



34
35
36
37
38
# File 'lib/rdatasets.rb', line 34

def self.method_missing(package_name)
  return Package.new(package_name) if Rdatasets.packages.include? package_name

  super
end

.package(package_name) ⇒ Array<Symbol>

Show a list of datasets included in the package.

Parameters:

  • :R (String, Symbol)

    package name

Returns:

  • (Array<Symbol>)


101
102
103
104
# File 'lib/rdatasets.rb', line 101

def package(package_name)
  ds = df[Polars.col("Package") == package_name.to_s]
  ds["Item"].to_a.map(&:to_sym)
end

.packagesArray<Symbol>

Show a list of all packages.

Returns:

  • (Array<Symbol>)


94
95
96
# File 'lib/rdatasets.rb', line 94

def packages
  df["Package"].to_a.uniq.map(&:to_sym)
end

.read_gzip_dataframe(file_path) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/rdatasets.rb', line 61

def read_gzip_dataframe(file_path)
  Zlib::GzipReader.open(file_path) do |gz|
    Polars.read_csv(gz, infer_schema_length: 10_000)
  end
rescue StandardError => e
  raise "Failed to read Gzip file: #{e.message}"
end

.respond_to_missing?(package_name, include_private) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/rdatasets.rb', line 40

def self.respond_to_missing?(package_name, include_private)
  Rdatasets.packages.include?(package_name) ? true : super
end

.search(pattern) ⇒ Polars::DataFrame

Search available datasets. (items and titles) If the argument is a string, ignore case.

Parameters:

  • pattern (String, Regexp)

    :The pattern to search for

Returns:



110
111
112
113
114
115
116
117
118
119
120
# File 'lib/rdatasets.rb', line 110

def search(pattern)
  case pattern
  when String
    "(?-mix:#{pattern})"
  when Regexp
    pattern = pattern.to_s if pattern.is_a? Regexp
  else
    raise ArgumentError, "Invalid argument type: #{pattern.class}"
  end
  df.filter((Polars.col("Item").str.contains(pattern)) | (Polars.col("Title").str.contains(pattern)))
end