Class: EasyML::Data::Datasource::S3Datasource
- Inherits:
-
Object
- Object
- EasyML::Data::Datasource::S3Datasource
- Includes:
- GlueGun::DSL
- Defined in:
- lib/easy_ml/data/datasource/s3_datasource.rb
Instance Method Summary collapse
Instance Method Details
#data ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 53 def data pull do |did_sync| output_path = File.join(root_dir, "combined_data.csv") if did_sync combined_df = merge_data combined_df.write_csv(output_path) else Polars.read_csv(output_path, **polars_args) end end combined_df end |
#in_batches(of: 10_000) ⇒ Object
40 41 42 43 44 45 46 47 |
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 40 def in_batches(of: 10_000) # Currently ignores batch size, TODO: implement pull files.each do |file| csv = Polars.read_csv(file, **polars_args) yield csv end end |
#refresh! ⇒ Object
49 50 51 |
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 49 def refresh! synced_directory.sync end |
#s3_prefix=(arg) ⇒ Object
19 20 21 |
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 19 def s3_prefix=(arg) super(arg.to_s.gsub(%r{^/|/$}, "")) end |