RubyDoc.info: Class: OpenC3::BucketUtilities – Documentation for openc3 (5.9.1)

.bucket_load(*args, scope: $openc3_scope) ⇒ `Object`

Raises:

(LoadError)

# File 'lib/openc3/utilities/bucket_utilities.rb', line 33

def self.bucket_load(*args, scope: $openc3_scope)
  scope = ENV['OPENC3_SCOPE'] unless scope
  scope = 'DEFAULT' unless scope
  path = args[0]

  # Only support TARGET files
  if path[0] == '/' or path.split('/')[0].to_s.upcase != path.split('/')[0]
    raise LoadError, "only relative TARGET files are allowed -- #{path}"
  end
  extension = File.extname(path)
  path = path + '.rb' if extension == ""

  # Retrieve the text of the script from S3
  text = TargetFile.body(scope, path)
  raise LoadError, "Bucket file #{path} not found for scope #{scope}" unless text

  # Execute the script directly without instrumentation because we are doing require/load
  Object.class_eval(text, path, 1)

  # Successful load/require returns true
  true
end

.compress_file(filename, chunk_size = 50_000_000) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 129

def self.compress_file(filename, chunk_size = 50_000_000)
  zipped = "#{filename}.gz"

  Zlib::GzipWriter.open(zipped) do |gz|
    gz.mtime = File.mtime(filename)
    gz.orig_name = filename
    File.open(filename, 'rb') do |file|
      while chunk = file.read(chunk_size) do
        gz.write(chunk)
      end
    end
  end

  return zipped
end

.directory_in_time_range(directory, start_time, end_time) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 169

def self.directory_in_time_range(directory, start_time, end_time)
  basename = File.basename(directory)
  directory_start_time = DateTime.strptime(basename, DIRECTORY_TIMESTAMP_FORMAT).to_time
  directory_end_time = directory_start_time + Time::SEC_PER_DAY
  if (not start_time or start_time < directory_end_time) and (not end_time or end_time >= directory_start_time)
    return true
  else
    return false
  end
end

.file_in_time_range(bucket_path, start_time, end_time, overlap:) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 192

def self.file_in_time_range(bucket_path, start_time, end_time, overlap:)
  file_start_time, file_end_time = get_file_times(bucket_path)
  if overlap
    if (not start_time or start_time <= file_end_time) and (not end_time or end_time >= file_start_time)
      return true
    end
  else
    if (not start_time or start_time <= file_start_time) and (not end_time or end_time >= file_end_time)
      return true
    end
  end
  return false
end

.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil, overlap: false, max_request: 1000, max_total: 100_000) ⇒ `Object`

Parameters:

bucket (String) —

Name of the bucket to list
prefix (String) —

Prefix to filter all files by
start_time (Time|nil) —

Ruby time to find files after. nil means no start (first file on).
end_time (Time|nil) —

Ruby time to find files before. nil means no end (up to last file).
overlap (Boolean) (defaults to: false) —

Whether to include files which overlap the start and end time
max_request (Integer) (defaults to: 1000) —

How many files to request in each API call
max_total (Integer) (defaults to: 100_000) —

Total number of files before stopping API requests

# File 'lib/openc3/utilities/bucket_utilities.rb', line 63

def self.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil,
                            overlap: false, max_request: 1000, max_total: 100_000)
  client = Bucket.getClient()
  oldest_list = []

  # Return nothing if bucket doesn't exist (it won't at the very beginning)
  unless client.exist?(bucket)
    return oldest_list
  end

  directories = client.list_files(bucket: bucket, path: prefix, only_directories: true)
  filtered_directories = filter_directories_to_time_range(directories, start_time, end_time)
  filtered_directories.each do |directory|
    directory_files = client.list_objects(bucket: bucket, prefix: "#{prefix}/#{directory}", max_request: max_request, max_total: max_total)
    files = filter_files_to_time_range(directory_files, start_time, end_time, file_suffix: file_suffix, overlap: overlap)
    oldest_list.concat(files)
  end
  return oldest_list
end

.filter_directories_to_time_range(directories, start_time, end_time) ⇒ `Object`

Private methods

# File 'lib/openc3/utilities/bucket_utilities.rb', line 161

def self.filter_directories_to_time_range(directories, start_time, end_time)
  result = []
  directories.each do |directory|
    result << directory if directory_in_time_range(directory, start_time, end_time)
  end
  return result
end

.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 180

def self.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false)
  result = []
  files.each do |file|
    file_key = file.key.to_s
    next if file_suffix and not file_key.end_with?(file_suffix)
    if file_in_time_range(file_key, start_time, end_time, overlap: overlap)
      result << file_key
    end
  end
  return result
end

.get_cache_control(filename) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 121

def self.get_cache_control(filename)
  # Allow caching for files that have a filename versioning strategy
  has_version_number = /(-|_|\.)\d+(-|_|\.)\d+(-|_|\.)\d+\./.match(filename)
  has_content_hash = /\.[a-f0-9]{20}\./.match(filename)
  return nil if has_version_number or has_content_hash
  return 'no-store'
end

.get_file_times(bucket_path) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 206

def self.get_file_times(bucket_path)
  basename = File.basename(bucket_path)
  file_start_timestamp, file_end_timestamp, _ = basename.split("__")
  file_start_time = DateTime.strptime(file_start_timestamp, FILE_TIMESTAMP_FORMAT).to_time
  file_end_time = DateTime.strptime(file_end_timestamp, FILE_TIMESTAMP_FORMAT).to_time
  return file_start_time, file_end_time
end

.move_log_file_to_bucket(filename, bucket_key, metadata: {}) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 83

def self.move_log_file_to_bucket(filename, bucket_key, metadata: {})
  Thread.new do
    client = Bucket.getClient()

    orig_filename = nil
    if File.extname(filename) != '.txt'
      orig_filename = filename
      filename = compress_file(filename)
      bucket_key += '.gz'
    end

    retry_count = 0
    begin
      # We want to open this as a file and pass that to put_object to allow
      # this to work with really large files. Otherwise the entire file has
      # to be held in memory!
      File.open(filename, 'rb') do |file|
        client.put_object(bucket: ENV['OPENC3_LOGS_BUCKET'], key: bucket_key, body: file, metadata: metadata)
      end
    rescue => err
      # Try to upload file three times
      retry_count += 1
      raise err if retry_count >= 3
      Logger.warn("Error saving log file to bucket - retry #{retry_count}: #{filename}\n#{err.formatted}")
      sleep(1)
      retry
    end

    Logger.debug "wrote #{ENV['OPENC3_LOGS_BUCKET']}/#{bucket_key}"
    ReducerModel.add_file(bucket_key) # Record the new file for data reduction

    File.delete(orig_filename) if orig_filename
    File.delete(filename)
  rescue => err
    Logger.error("Error saving log file to bucket: #{filename}\n#{err.formatted}")
  end
end

.uncompress_file(filename, chunk_size = 50_000_000) ⇒ `Object`

# File 'lib/openc3/utilities/bucket_utilities.rb', line 145

def self.uncompress_file(filename, chunk_size = 50_000_000)
  unzipped = filename[0..-4] # Drop .gz

  Zlib::GzipReader.open(filename) do |gz|
    File.open(unzipped, 'wb') do |file|
      while chunk = gz.read(chunk_size)
        file.write(chunk)
      end
    end
  end

  return unzipped
end

Class: OpenC3::BucketUtilities

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.bucket_load(*args, scope: $openc3_scope) ⇒ `Object`

.compress_file(filename, chunk_size = 50_000_000) ⇒ `Object`

.directory_in_time_range(directory, start_time, end_time) ⇒ `Object`

.file_in_time_range(bucket_path, start_time, end_time, overlap:) ⇒ `Object`

.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil, overlap: false, max_request: 1000, max_total: 100_000) ⇒ `Object`

.filter_directories_to_time_range(directories, start_time, end_time) ⇒ `Object`

.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false) ⇒ `Object`

.get_cache_control(filename) ⇒ `Object`

.get_file_times(bucket_path) ⇒ `Object`

.move_log_file_to_bucket(filename, bucket_key, metadata: {}) ⇒ `Object`

.uncompress_file(filename, chunk_size = 50_000_000) ⇒ `Object`

Class: OpenC3::BucketUtilities

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.bucket_load(*args, scope: $openc3_scope) ⇒ Object

.compress_file(filename, chunk_size = 50_000_000) ⇒ Object

.directory_in_time_range(directory, start_time, end_time) ⇒ Object

.file_in_time_range(bucket_path, start_time, end_time, overlap:) ⇒ Object

.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil, overlap: false, max_request: 1000, max_total: 100_000) ⇒ Object

.filter_directories_to_time_range(directories, start_time, end_time) ⇒ Object

.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false) ⇒ Object

.get_cache_control(filename) ⇒ Object

.get_file_times(bucket_path) ⇒ Object

.move_log_file_to_bucket(filename, bucket_key, metadata: {}) ⇒ Object

.uncompress_file(filename, chunk_size = 50_000_000) ⇒ Object

.bucket_load(*args, scope: $openc3_scope) ⇒ `Object`

.compress_file(filename, chunk_size = 50_000_000) ⇒ `Object`

.directory_in_time_range(directory, start_time, end_time) ⇒ `Object`

.file_in_time_range(bucket_path, start_time, end_time, overlap:) ⇒ `Object`

.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil, overlap: false, max_request: 1000, max_total: 100_000) ⇒ `Object`

.filter_directories_to_time_range(directories, start_time, end_time) ⇒ `Object`

.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false) ⇒ `Object`

.get_cache_control(filename) ⇒ `Object`

.get_file_times(bucket_path) ⇒ `Object`

.move_log_file_to_bucket(filename, bucket_key, metadata: {}) ⇒ `Object`

.uncompress_file(filename, chunk_size = 50_000_000) ⇒ `Object`