Class: OpenC3::BucketUtilities

Inherits:
Object
  • Object
show all
Defined in:
lib/openc3/utilities/bucket_utilities.rb

Constant Summary collapse

FILE_TIMESTAMP_FORMAT =
"%Y%m%d%H%M%S%N"
DIRECTORY_TIMESTAMP_FORMAT =
"%Y%m%d"

Class Method Summary collapse

Class Method Details

.bucket_load(*args, scope: $openc3_scope) ⇒ Object

Raises:

  • (LoadError)


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/openc3/utilities/bucket_utilities.rb', line 33

def self.bucket_load(*args, scope: $openc3_scope)
  scope = ENV['OPENC3_SCOPE'] unless scope
  scope = 'DEFAULT' unless scope
  path = args[0]

  # Only support TARGET files
  if path[0] == '/' or path.split('/')[0].to_s.upcase != path.split('/')[0]
    raise LoadError, "only relative TARGET files are allowed -- #{path}"
  end
  extension = File.extname(path)
  path = path + '.rb' if extension == ""

  # Retrieve the text of the script from S3
  text = TargetFile.body(scope, path)
  raise LoadError, "Bucket file #{path} not found for scope #{scope}" unless text

  # Execute the script directly without instrumentation because we are doing require/load
  Object.class_eval(text, path, 1)

  # Successful load/require returns true
  true
end

.compress_file(filename, chunk_size = 50_000_000) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/openc3/utilities/bucket_utilities.rb', line 129

def self.compress_file(filename, chunk_size = 50_000_000)
  zipped = "#{filename}.gz"

  Zlib::GzipWriter.open(zipped) do |gz|
    gz.mtime = File.mtime(filename)
    gz.orig_name = filename
    File.open(filename, 'rb') do |file|
      while chunk = file.read(chunk_size) do
        gz.write(chunk)
      end
    end
  end

  return zipped
end

.directory_in_time_range(directory, start_time, end_time) ⇒ Object



169
170
171
172
173
174
175
176
177
178
# File 'lib/openc3/utilities/bucket_utilities.rb', line 169

def self.directory_in_time_range(directory, start_time, end_time)
  basename = File.basename(directory)
  directory_start_time = DateTime.strptime(basename, DIRECTORY_TIMESTAMP_FORMAT).to_time
  directory_end_time = directory_start_time + Time::SEC_PER_DAY
  if (not start_time or start_time < directory_end_time) and (not end_time or end_time >= directory_start_time)
    return true
  else
    return false
  end
end

.file_in_time_range(bucket_path, start_time, end_time, overlap:) ⇒ Object



192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/openc3/utilities/bucket_utilities.rb', line 192

def self.file_in_time_range(bucket_path, start_time, end_time, overlap:)
  file_start_time, file_end_time = get_file_times(bucket_path)
  if overlap
    if (not start_time or start_time <= file_end_time) and (not end_time or end_time >= file_start_time)
      return true
    end
  else
    if (not start_time or start_time <= file_start_time) and (not end_time or end_time >= file_end_time)
      return true
    end
  end
  return false
end

.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil, overlap: false, max_request: 1000, max_total: 100_000) ⇒ Object

Parameters:

  • bucket (String)

    Name of the bucket to list

  • prefix (String)

    Prefix to filter all files by

  • start_time (Time|nil)

    Ruby time to find files after. nil means no start (first file on).

  • end_time (Time|nil)

    Ruby time to find files before. nil means no end (up to last file).

  • overlap (Boolean) (defaults to: false)

    Whether to include files which overlap the start and end time

  • max_request (Integer) (defaults to: 1000)

    How many files to request in each API call

  • max_total (Integer) (defaults to: 100_000)

    Total number of files before stopping API requests



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/openc3/utilities/bucket_utilities.rb', line 63

def self.files_between_time(bucket, prefix, start_time, end_time, file_suffix: nil,
                            overlap: false, max_request: 1000, max_total: 100_000)
  client = Bucket.getClient()
  oldest_list = []

  # Return nothing if bucket doesn't exist (it won't at the very beginning)
  unless client.exist?(bucket)
    return oldest_list
  end

  directories = client.list_files(bucket: bucket, path: prefix, only_directories: true)
  filtered_directories = filter_directories_to_time_range(directories, start_time, end_time)
  filtered_directories.each do |directory|
    directory_files = client.list_objects(bucket: bucket, prefix: "#{prefix}/#{directory}", max_request: max_request, max_total: max_total)
    files = filter_files_to_time_range(directory_files, start_time, end_time, file_suffix: file_suffix, overlap: overlap)
    oldest_list.concat(files)
  end
  return oldest_list
end

.filter_directories_to_time_range(directories, start_time, end_time) ⇒ Object

Private methods



161
162
163
164
165
166
167
# File 'lib/openc3/utilities/bucket_utilities.rb', line 161

def self.filter_directories_to_time_range(directories, start_time, end_time)
  result = []
  directories.each do |directory|
    result << directory if directory_in_time_range(directory, start_time, end_time)
  end
  return result
end

.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
# File 'lib/openc3/utilities/bucket_utilities.rb', line 180

def self.filter_files_to_time_range(files, start_time, end_time, file_suffix: nil, overlap: false)
  result = []
  files.each do |file|
    file_key = file.key.to_s
    next if file_suffix and not file_key.end_with?(file_suffix)
    if file_in_time_range(file_key, start_time, end_time, overlap: overlap)
      result << file_key
    end
  end
  return result
end

.get_cache_control(filename) ⇒ Object



121
122
123
124
125
126
127
# File 'lib/openc3/utilities/bucket_utilities.rb', line 121

def self.get_cache_control(filename)
  # Allow caching for files that have a filename versioning strategy
  has_version_number = /(-|_|\.)\d+(-|_|\.)\d+(-|_|\.)\d+\./.match(filename)
  has_content_hash = /\.[a-f0-9]{20}\./.match(filename)
  return nil if has_version_number or has_content_hash
  return 'no-store'
end

.get_file_times(bucket_path) ⇒ Object



206
207
208
209
210
211
212
# File 'lib/openc3/utilities/bucket_utilities.rb', line 206

def self.get_file_times(bucket_path)
  basename = File.basename(bucket_path)
  file_start_timestamp, file_end_timestamp, _ = basename.split("__")
  file_start_time = DateTime.strptime(file_start_timestamp, FILE_TIMESTAMP_FORMAT).to_time
  file_end_time = DateTime.strptime(file_end_timestamp, FILE_TIMESTAMP_FORMAT).to_time
  return file_start_time, file_end_time
end

.move_log_file_to_bucket(filename, bucket_key, metadata: {}) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/openc3/utilities/bucket_utilities.rb', line 83

def self.move_log_file_to_bucket(filename, bucket_key, metadata: {})
  Thread.new do
    client = Bucket.getClient()

    orig_filename = nil
    if File.extname(filename) != '.txt'
      orig_filename = filename
      filename = compress_file(filename)
      bucket_key += '.gz'
    end

    retry_count = 0
    begin
      # We want to open this as a file and pass that to put_object to allow
      # this to work with really large files. Otherwise the entire file has
      # to be held in memory!
      File.open(filename, 'rb') do |file|
        client.put_object(bucket: ENV['OPENC3_LOGS_BUCKET'], key: bucket_key, body: file, metadata: )
      end
    rescue => err
      # Try to upload file three times
      retry_count += 1
      raise err if retry_count >= 3
      Logger.warn("Error saving log file to bucket - retry #{retry_count}: #{filename}\n#{err.formatted}")
      sleep(1)
      retry
    end

    Logger.debug "wrote #{ENV['OPENC3_LOGS_BUCKET']}/#{bucket_key}"
    ReducerModel.add_file(bucket_key) # Record the new file for data reduction

    File.delete(orig_filename) if orig_filename
    File.delete(filename)
  rescue => err
    Logger.error("Error saving log file to bucket: #{filename}\n#{err.formatted}")
  end
end

.uncompress_file(filename, chunk_size = 50_000_000) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/openc3/utilities/bucket_utilities.rb', line 145

def self.uncompress_file(filename, chunk_size = 50_000_000)
  unzipped = filename[0..-4] # Drop .gz

  Zlib::GzipReader.open(filename) do |gz|
    File.open(unzipped, 'wb') do |file|
      while chunk = gz.read(chunk_size)
        file.write(chunk)
      end
    end
  end

  return unzipped
end