Class: Kaggle::Client

Inherits:
Object
  • Object
show all
Includes:
HTTParty
Defined in:
lib/kaggle/client.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil, timeout: nil, cache_only: false) ⇒ Client

Returns a new instance of Client.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/kaggle/client.rb', line 9

def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
               timeout: nil, cache_only: false)
  load_credentials(username, api_key, credentials_file)
  @download_path = download_path || Constants::DEFAULT_DOWNLOAD_PATH
  @cache_path = cache_path || Constants::DEFAULT_CACHE_PATH
  @timeout = timeout || Constants::DEFAULT_TIMEOUT
  @cache_only = cache_only

  unless cache_only || (valid_credential?(@username) && valid_credential?(@api_key))
    raise AuthenticationError,
          'Username and API key are required (or set cache_only: true for cache-only access)'
  end

  ensure_directories_exist
  setup_httparty_options unless cache_only
end

Instance Attribute Details

#api_keyObject (readonly)

Returns the value of attribute api_key.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def api_key
  @api_key
end

#cache_onlyObject (readonly)

Returns the value of attribute cache_only.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def cache_only
  @cache_only
end

#cache_pathObject (readonly)

Returns the value of attribute cache_path.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def cache_path
  @cache_path
end

#download_pathObject (readonly)

Returns the value of attribute download_path.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def download_path
  @download_path
end

#timeoutObject (readonly)

Returns the value of attribute timeout.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def timeout
  @timeout
end

#usernameObject (readonly)

Returns the value of attribute username.



7
8
9
# File 'lib/kaggle/client.rb', line 7

def username
  @username
end

Instance Method Details

#dataset_files(dataset_owner, dataset_name) ⇒ Object



76
77
78
79
80
81
82
83
84
85
# File 'lib/kaggle/client.rb', line 76

def dataset_files(dataset_owner, dataset_name)
  dataset_path = "#{dataset_owner}/#{dataset_name}"
  response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:files]}/#{dataset_path}")

  raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}" unless response.success?

  Oj.load(response.body)
rescue Oj::ParseError => e
  raise ParseError, "Failed to parse dataset files response: #{e.message}"
end

#download_dataset(dataset_owner, dataset_name, options = {}) ⇒ Object

Raises:



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/kaggle/client.rb', line 26

def download_dataset(dataset_owner, dataset_name, options = {})
  dataset_path = "#{dataset_owner}/#{dataset_name}"

  # Check cache first for parsed data
  if options[:use_cache] && options[:parse_csv]
    cache_key = generate_cache_key(dataset_path)
    return load_from_cache(cache_key) if cached_file_exists?(cache_key)
  end

  # Check if we already have extracted files for this dataset
  extracted_dir = get_extracted_dir(dataset_path)
  if options[:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir)
    return handle_existing_dataset(extracted_dir, options)
  end

  # If cache_only mode and no cached data found, return nil or raise based on force_cache option
  if @cache_only
    if options[:force_cache]
      raise CacheNotFoundError, "Dataset '#{dataset_path}' not found in cache and force_cache is enabled"
    else
      return nil # Gracefully return nil when cache_only but not forced
    end
  end

  # Download the zip file
  response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:download]}/#{dataset_path}")

  raise DownloadError, "Failed to download dataset: #{response.message}" unless response.success?

  # Save zip file
  zip_file = save_zip_file(dataset_path, response.body)

  # Extract zip file
  extract_zip_file(zip_file, extracted_dir)

  # Clean up zip file
  File.delete(zip_file) if File.exist?(zip_file)

  # Handle the extracted files
  result = handle_extracted_dataset(extracted_dir, options)

  # Cache parsed CSV data if requested
  if options[:use_cache] && options[:parse_csv] && (result.is_a?(Hash) || result.is_a?(Array))
    cache_key = generate_cache_key(dataset_path)
    cache_parsed_data(cache_key, result)
  end

  result
end

#parse_csv_to_json(file_path) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/kaggle/client.rb', line 87

def parse_csv_to_json(file_path)
  raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path)
  raise Error, "File is not a CSV: #{file_path}" unless csv_file?(file_path)

  data = []
  CSV.foreach(file_path, headers: true) do |row|
    data << row.to_hash
  end

  data
rescue CSV::MalformedCSVError => e
  raise ParseError, "Failed to parse CSV file: #{e.message}"
end