Class: Kaggle::Client
Instance Attribute Summary collapse
-
#api_key ⇒ Object
readonly
Returns the value of attribute api_key.
-
#cache_only ⇒ Object
readonly
Returns the value of attribute cache_only.
-
#cache_path ⇒ Object
readonly
Returns the value of attribute cache_path.
-
#download_path ⇒ Object
readonly
Returns the value of attribute download_path.
-
#timeout ⇒ Object
readonly
Returns the value of attribute timeout.
-
#username ⇒ Object
readonly
Returns the value of attribute username.
Instance Method Summary collapse
- #dataset_files(dataset_owner, dataset_name) ⇒ Object
- #download_dataset(dataset_owner, dataset_name, options = {}) ⇒ Object
-
#initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil, timeout: nil, cache_only: false) ⇒ Client
constructor
A new instance of Client.
- #parse_csv_to_json(file_path) ⇒ Object
Constructor Details
#initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil, timeout: nil, cache_only: false) ⇒ Client
Returns a new instance of Client.
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/kaggle/client.rb', line 9 def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil, timeout: nil, cache_only: false) load_credentials(username, api_key, credentials_file) @download_path = download_path || Constants::DEFAULT_DOWNLOAD_PATH @cache_path = cache_path || Constants::DEFAULT_CACHE_PATH @timeout = timeout || Constants::DEFAULT_TIMEOUT @cache_only = cache_only unless cache_only || (valid_credential?(@username) && valid_credential?(@api_key)) raise AuthenticationError, 'Username and API key are required (or set cache_only: true for cache-only access)' end ensure_directories_exist unless cache_only end |
Instance Attribute Details
#api_key ⇒ Object (readonly)
Returns the value of attribute api_key.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def api_key @api_key end |
#cache_only ⇒ Object (readonly)
Returns the value of attribute cache_only.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def cache_only @cache_only end |
#cache_path ⇒ Object (readonly)
Returns the value of attribute cache_path.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def cache_path @cache_path end |
#download_path ⇒ Object (readonly)
Returns the value of attribute download_path.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def download_path @download_path end |
#timeout ⇒ Object (readonly)
Returns the value of attribute timeout.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def timeout @timeout end |
#username ⇒ Object (readonly)
Returns the value of attribute username.
7 8 9 |
# File 'lib/kaggle/client.rb', line 7 def username @username end |
Instance Method Details
#dataset_files(dataset_owner, dataset_name) ⇒ Object
76 77 78 79 80 81 82 83 84 85 |
# File 'lib/kaggle/client.rb', line 76 def dataset_files(dataset_owner, dataset_name) dataset_path = "#{dataset_owner}/#{dataset_name}" response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:files]}/#{dataset_path}") raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}" unless response.success? Oj.load(response.body) rescue Oj::ParseError => e raise ParseError, "Failed to parse dataset files response: #{e.}" end |
#download_dataset(dataset_owner, dataset_name, options = {}) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/kaggle/client.rb', line 26 def download_dataset(dataset_owner, dataset_name, = {}) dataset_path = "#{dataset_owner}/#{dataset_name}" # Check cache first for parsed data if [:use_cache] && [:parse_csv] cache_key = generate_cache_key(dataset_path) return load_from_cache(cache_key) if cached_file_exists?(cache_key) end # Check if we already have extracted files for this dataset extracted_dir = get_extracted_dir(dataset_path) if [:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir) return handle_existing_dataset(extracted_dir, ) end # If cache_only mode and no cached data found, return nil or raise based on force_cache option if @cache_only if [:force_cache] raise CacheNotFoundError, "Dataset '#{dataset_path}' not found in cache and force_cache is enabled" else return nil # Gracefully return nil when cache_only but not forced end end # Download the zip file response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:download]}/#{dataset_path}") raise DownloadError, "Failed to download dataset: #{response.}" unless response.success? # Save zip file zip_file = save_zip_file(dataset_path, response.body) # Extract zip file extract_zip_file(zip_file, extracted_dir) # Clean up zip file File.delete(zip_file) if File.exist?(zip_file) # Handle the extracted files result = handle_extracted_dataset(extracted_dir, ) # Cache parsed CSV data if requested if [:use_cache] && [:parse_csv] && (result.is_a?(Hash) || result.is_a?(Array)) cache_key = generate_cache_key(dataset_path) cache_parsed_data(cache_key, result) end result end |
#parse_csv_to_json(file_path) ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/kaggle/client.rb', line 87 def parse_csv_to_json(file_path) raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path) raise Error, "File is not a CSV: #{file_path}" unless csv_file?(file_path) data = [] CSV.foreach(file_path, headers: true) do |row| data << row.to_hash end data rescue CSV::MalformedCSVError => e raise ParseError, "Failed to parse CSV file: #{e.}" end |