Class: EasyML::Data::Datasource::S3Datasource

Inherits:
Object
  • Object
show all
Includes:
GlueGun::DSL
Defined in:
lib/easy_ml/data/datasource/s3_datasource.rb

Instance Method Summary collapse

Instance Method Details

#dataObject



53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 53

def data
  pull do |did_sync|
    output_path = File.join(root_dir, "combined_data.csv")

    if did_sync
      combined_df = merge_data
      combined_df.write_csv(output_path)
    else
      Polars.read_csv(output_path, **polars_args)
    end
  end
  combined_df
end

#in_batches(of: 10_000) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 40

def in_batches(of: 10_000)
  # Currently ignores batch size, TODO: implement
  pull
  files.each do |file|
    csv = Polars.read_csv(file, **polars_args)
    yield csv
  end
end

#refresh!Object



49
50
51
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 49

def refresh!
  synced_directory.sync
end

#s3_prefix=(arg) ⇒ Object



19
20
21
# File 'lib/easy_ml/data/datasource/s3_datasource.rb', line 19

def s3_prefix=(arg)
  super(arg.to_s.gsub(%r{^/|/$}, ""))
end