Class: Aspera::Cli::Plugins::Preview

Inherits:
BasicAuth show all
Defined in:
lib/aspera/cli/plugins/preview.rb

Constant Summary collapse

ACTIONS =
%i[scan events trevents check test show].freeze

Constants inherited from Base

Base::ALL_OPS, Base::GLOBAL_OPS, Base::INSTANCE_OPS, Base::MAX_ITEMS, Base::MAX_PAGES, Base::REGEX_LOOKUP_ID_BY_FIELD

Instance Attribute Summary collapse

Attributes inherited from Base

#context

Instance Method Summary collapse

Methods inherited from BasicAuth

#basic_auth_api, #basic_auth_params, declare_options

Methods inherited from Base

#add_manual_header, #config, declare_options, #do_bulk_operation, #entity_execute, #formatter, #instance_identifier, #list_entities_limit_offset_total_count, #lookup_entity_by_field, #options, #persistency, #query_read_delete, #transfer, #value_create_modify

Constructor Details

#initialize(**_) ⇒ Preview



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/aspera/cli/plugins/preview.rb', line 53

def initialize(**_)
  super
  @skip_types = []
  @default_transfer_spec = nil
  # by default generate all supported formats (clone, as altered by options)
  @preview_formats_to_generate = Aspera::Preview::Generator::PREVIEW_FORMATS.clone
  # options for generation
  @gen_options = Aspera::Preview::Options.new
  # used to trigger periodic processing
  @periodic = TimerLimiter.new(LOG_LIMITER_SEC)
  # Proc
  @filter_block = nil
  # link CLI options to gen_info attributes
  options.declare(
    :skip_format, 'Skip this preview format (multiple possible)', values: Aspera::Preview::Generator::PREVIEW_FORMATS,
    handler: {o: self, m: :option_skip_format}, default: []
  )
  options.declare(
    :folder_reset_cache, 'Force detection of generated preview by refresh cache',
    values: %i[no header read],
    handler: {o: self, m: :option_folder_reset_cache},
    default: :no
  )
  options.declare(:skip_types, 'Skip types in comma separated list', handler: {o: self, m: :option_skip_types})
  options.declare(:previews_folder, 'Preview folder in storage root', handler: {o: self, m: :option_previews_folder}, default: DEFAULT_PREVIEWS_FOLDER)
  options.declare(:skip_folders, 'List of folder to skip', handler: {o: self, m: :option_skip_folders}, default: [])
  options.declare(:base, 'Basename of output for for test')
  options.declare(:scan_path, 'Subpath in folder id to start scan in (default=/)')
  options.declare(:scan_id, 'Folder id in storage to start scan in, default is access key main folder id')
  options.declare(:mimemagic, 'Use Mime type detection of gem mimemagic', values: :bool, default: false)
  options.declare(:overwrite, 'When to overwrite result file', values: %i[always never mtime], handler: {o: self, m: :option_overwrite}, default: :mtime)
  options.declare(
    :file_access, 'How to read and write files in repository',
    values: %i[local remote],
    handler: {o: self, m: :option_file_access},
    default: :local
  )

  # add other options for generator (and set default values)
  Aspera::Preview::Options::DESCRIPTIONS.each do |opt|
    values = if opt.key?(:values)
      opt[:values]
    elsif Cli::Manager::BOOLEAN_SIMPLE.include?(opt[:default])
      :bool
    end
    options.declare(opt[:name], opt[:description].capitalize, values: values, handler: {o: @gen_options, m: opt[:name]}, default: opt[:default])
  end

  options.parse_options!
  Aspera.assert_type(@option_skip_folders, Array){'skip_folder'}
  @tmp_folder = File.join(TempFileManager.instance.global_temp, "#{TMP_DIR_PREFIX}.#{SecureRandom.uuid}")
  FileUtils.mkdir_p(@tmp_folder)
  Log.log.debug{"tmpdir: #{@tmp_folder}"}
end

Instance Attribute Details

#option_file_accessObject

Returns the value of attribute option_file_access.



51
52
53
# File 'lib/aspera/cli/plugins/preview.rb', line 51

def option_file_access
  @option_file_access
end

#option_folder_reset_cacheObject

Returns the value of attribute option_folder_reset_cache.



51
52
53
# File 'lib/aspera/cli/plugins/preview.rb', line 51

def option_folder_reset_cache
  @option_folder_reset_cache
end

#option_overwriteObject

Returns the value of attribute option_overwrite.



51
52
53
# File 'lib/aspera/cli/plugins/preview.rb', line 51

def option_overwrite
  @option_overwrite
end

#option_previews_folderObject

option_skip_format has special accessors



50
51
52
# File 'lib/aspera/cli/plugins/preview.rb', line 50

def option_previews_folder
  @option_previews_folder
end

#option_skip_foldersObject

Returns the value of attribute option_skip_folders.



51
52
53
# File 'lib/aspera/cli/plugins/preview.rb', line 51

def option_skip_folders
  @option_skip_folders
end

Instance Method Details

#do_transfer(direction, folder_id, source_filename, destination = '/') ⇒ Object



218
219
220
221
222
223
224
225
226
227
# File 'lib/aspera/cli/plugins/preview.rb', line 218

def do_transfer(direction, folder_id, source_filename, destination = '/')
  Aspera.assert(!(destination.nil? && direction.eql?(Transfer::Spec::DIRECTION_RECEIVE)))
  t_spec = @api_node.transfer_spec_gen4(folder_id, direction, {
    'paths' => [{'source' => source_filename}],
    'tags'  => {Transfer::Spec::TAG_RESERVED => {PREV_GEN_TAG => true}}
  })
  # force destination, need to set this in transfer agent else it gets overwritten, not do: t_spec['destination_root']=destination
  transfer.option_transfer_spec_deep_merge({'destination_root' => destination})
  Main.result_transfer(transfer.start(t_spec))
end

#entry_preview_folder_name(entry) ⇒ Object

defined by node api



266
267
268
# File 'lib/aspera/cli/plugins/preview.rb', line 266

def entry_preview_folder_name(entry)
  "#{entry['id']}#{PREVIEW_FOLDER_SUFFIX}"
end

#execute_actionObject



401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
# File 'lib/aspera/cli/plugins/preview.rb', line 401

def execute_action
  command = options.get_next_command(ACTIONS)
  unless %i[check test show].include?(command)
    # this will use node api
    @api_node = Api::Node.new(**basic_auth_params)
    @transfer_server_address = URI.parse(@api_node.base_url).host
    # get current access key
    @access_key_self = @api_node.read('access_keys/self')
    # TODO: check events is activated here:
    # note that docroot is good to look at as well
    node_info = @api_node.read('info')
    Log.log.debug{"root: #{node_info['docroot']}"}
    @access_remote = @option_file_access.eql?(:remote)
    Log.log.debug{"remote: #{@access_remote}"}
    Log.log.debug{"access key info: #{@access_key_self}"}
    # TODO: can the previews folder parameter be read from node api ?
    @option_skip_folders.push("/#{@option_previews_folder}")
    if @access_remote
      # NOTE: the filter "name", it's why we take the first one
      @previews_folder_entry = get_folder_entries(@access_key_self['root_file_id'], {name: @option_previews_folder}).first
      raise Cli::Error, "Folder #{@option_previews_folder} does not exist on node. " \
        'Please create it in the storage root, or specify an alternate name.' if @previews_folder_entry.nil?
    else
      Aspera.assert(@access_key_self['storage']['type'].eql?('local')){'only local storage allowed in this mode'}
      @local_storage_root = @access_key_self['storage']['path']
      # TODO: option to override @local_storage_root='xxx'
      @local_storage_root = @local_storage_root[PVCL_LOCAL_STORAGE.length..-1] if @local_storage_root.start_with?(PVCL_LOCAL_STORAGE)
      # TODO: windows could have "C:" ?
      Aspera.assert(@local_storage_root.start_with?('/')){"not local storage: #{@local_storage_root}"}
      Aspera.assert(File.directory?(@local_storage_root), type: Cli::Error){"Local storage root folder #{@local_storage_root} does not exist."}
      @local_preview_folder = File.join(@local_storage_root, @option_previews_folder)
      raise Cli::Error, "Folder #{@local_preview_folder} does not exist locally. " \
        'Please create it, or specify an alternate name.' unless File.directory?(@local_preview_folder)
      # protection to avoid clash of file id for two different access keys
      marker_file = File.join(@local_preview_folder, AK_MARKER_FILE)
      Log.log.debug{"marker file: #{marker_file}"}
      if File.exist?(marker_file)
        ak = File.read(marker_file).chomp
        Aspera.assert(@access_key_self['id'].eql?(ak)){"mismatch access key in #{marker_file}: contains #{ak}, using #{@access_key_self['id']}"}
      else
        File.write(marker_file, @access_key_self['id'])
      end
    end
  end
  Aspera::Preview::FileTypes.instance.use_mimemagic = options.get_option(:mimemagic, mandatory: true)
  # check tools that are anyway required for all cases
  Aspera::Preview::Utils.check_tools(@skip_types)
  case command
  when :scan
    scan_path = options.get_option(:scan_path)
    scan_id = options.get_option(:scan_id)
    # by default start at root
    folder_info =
      if scan_id.nil?
        {
          'id'   => @access_key_self['root_file_id'],
          'name' => '/',
          'type' => 'folder',
          'path' => '/'
        }
      else
        @api_node.read("files/#{scan_id}")
      end
    @filter_block = Api::Node.file_matcher_from_argument(options)
    scan_folder_files(folder_info, scan_path)
    return Main.result_status('scan finished')
  when :events, :trevents
    @filter_block = Api::Node.file_matcher_from_argument(options)
    iteration_persistency = nil
    if options.get_option(:once_only, mandatory: true)
      iteration_persistency = PersistencyActionOnce.new(
        manager: persistency,
        data:    [],
        id:      IdGenerator.from_list([
          'preview_iteration',
          command.to_s,
          options.get_option(:url, mandatory: true),
          options.get_option(:username, mandatory: true)
        ])
      )
    end
    # call processing method specified by command line command
    send(:"process_#{command}", iteration_persistency)
    return Main.result_status("#{command} finished")
  when :check
    return Main.result_status('Tools validated')
  when :test, :show
    source = options.get_next_argument('source file')
    format = options.get_next_argument('format', accept_list: Aspera::Preview::Generator::PREVIEW_FORMATS, default: :png)
    generated_file_path = preview_filename(format, options.get_option(:base))
    g = Aspera::Preview::Generator.new(source, generated_file_path, @gen_options, @tmp_folder, nil)
    g.generate
    if command.eql?(:show)
      terminal_options = options.get_option(:query, default: {}).symbolize_keys
      Log.log.debug{"preview: #{generated_file_path}"}
      formatter.display_status(Aspera::Preview::Terminal.build(File.read(generated_file_path), **terminal_options))
    end
    return Main.result_status("generated: #{generated_file_path}")
  else Aspera.error_unexpected_value(command)
  end
ensure
  Log.log.debug{"cleaning up temp folder #{@tmp_folder}"}
  FileUtils.rm_rf(@tmp_folder)
end

#generate_preview(entry) ⇒ Object

generate preview files for one folder entry (file) if necessary entry must contain “parent_file_id” if remote.



278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/aspera/cli/plugins/preview.rb', line 278

def generate_preview(entry)
  # prepare generic information
  gen_infos = @preview_formats_to_generate.map do |preview_format|
    {
      preview_format: preview_format,
      base_dest:      preview_filename(preview_format)
    }
  end
  # lets gather some infos on possibly existing previews
  # it depends if files access locally or remotely
  # folder where previews will be generated for this particular entry
  local_entry_preview_dir = @access_remote ? get_infos_remote(gen_infos, entry) : get_infos_local(gen_infos, entry)
  # here we have the status on preview files
  # let's find if they need generation
  gen_infos.select! do |gen_info|
    # if it exists, what about overwrite policy ?
    if gen_info[:preview_exist]
      case @option_overwrite
      when :always
        # continue: generate
      when :never
        # never overwrite
        next false
      when :mtime
        # skip if preview is newer than original
        next false if gen_info[:preview_newer_than_original]
      end
    end
    begin
      # need generator for further checks
      gen_info[:generator] = Aspera::Preview::Generator.new(gen_info[:src], gen_info[:dst], @gen_options, @tmp_folder, entry['content_type'])
    rescue
      # no conversion supported
      next false
    end
    # shall we skip it ?
    next false if @skip_types.include?(gen_info[:generator].conversion_type)
    # ok we need to generate
    true
  end
  return if gen_infos.empty?
  # create folder if needed
  FileUtils.mkdir_p(local_entry_preview_dir)
  if @access_remote
    Aspera.assert(!entry['parent_file_id'].nil?){'missing parent_file_id in entry'}
    #  download original file to temp folder
    do_transfer(Transfer::Spec::DIRECTION_RECEIVE, entry['parent_file_id'], entry['name'], @tmp_folder)
  end
  Log.log.info{"source: #{entry['id']}: #{entry['path']}"}
  gen_infos.each do |gen_info|
    gen_info[:generator].generate rescue nil
  end
  if @access_remote
    # upload
    do_transfer(Transfer::Spec::DIRECTION_SEND, @previews_folder_entry['id'], local_entry_preview_dir)
    # cleanup after upload
    FileUtils.rm_rf(local_entry_preview_dir)
    File.delete(File.join(@tmp_folder, entry['name']))
  end
  # force read file updated previews
  @api_node.read("files/#{entry['id']}") if @option_folder_reset_cache.eql?(:read)
rescue StandardError => e
  Log.log.error{"Ignore: #{e.message}"}
  Log.log.debug(e.backtrace.join("\n").red)
end

#get_folder_entries(file_id, request_args = nil) ⇒ Object

/files/id/files is normally cached in redis, but we can discard the cache but /files/id is not cached



131
132
133
134
135
136
137
138
139
140
# File 'lib/aspera/cli/plugins/preview.rb', line 131

def get_folder_entries(file_id, request_args = nil)
  headers = {'Accept' => Rest::MIME_JSON}
  headers['X-Aspera-Cache-Control'] = 'no-cache' if @option_folder_reset_cache.eql?(:header)
  return @api_node.call(
    operation: 'GET',
    subpath:   "files/#{file_id}/files",
    headers:   headers,
    query:     request_args
  )[:data]
end

#get_infos_local(gen_infos, entry) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/aspera/cli/plugins/preview.rb', line 229

def get_infos_local(gen_infos, entry)
  local_original_filepath = File.join(@local_storage_root, entry['path'])
  original_mtime = File.mtime(local_original_filepath)
  # out
  local_entry_preview_dir = File.join(@local_preview_folder, entry_preview_folder_name(entry))
  gen_infos.each do |gen_info|
    gen_info[:src] = local_original_filepath
    gen_info[:dst] = File.join(local_entry_preview_dir, gen_info[:base_dest])
    gen_info[:preview_exist] = File.exist?(gen_info[:dst])
    gen_info[:preview_newer_than_original] = (gen_info[:preview_exist] && (File.mtime(gen_info[:dst]) > original_mtime))
  end
  return local_entry_preview_dir
end

#get_infos_remote(gen_infos, entry) ⇒ Object



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/aspera/cli/plugins/preview.rb', line 243

def get_infos_remote(gen_infos, entry)
  # store source directly here
  local_original_filepath = File.join(@tmp_folder, entry['name'])
  # require 'date'
  # original_mtime=DateTime.parse(entry['modified_time'])
  # out: where previews are generated
  local_entry_preview_dir = File.join(@tmp_folder, entry_preview_folder_name(entry))
  file_info = @api_node.read("files/#{entry['id']}")
  # TODO: this does not work because previews is hidden in api (gen4)
  # this_preview_folder_entries=get_folder_entries(@previews_folder_entry['id'],{name: @entry_preview_folder_name})
  # TODO: use gen3 api to list files and get date
  gen_infos.each do |gen_info|
    gen_info[:src] = local_original_filepath
    gen_info[:dst] = File.join(local_entry_preview_dir, gen_info[:base_dest])
    # TODO: use this_preview_folder_entries (but it's hidden)
    gen_info[:preview_exist] = file_info.key?('preview')
    # TODO: get change time and compare, useful ?
    gen_info[:preview_newer_than_original] = gen_info[:preview_exist]
  end
  return local_entry_preview_dir
end

#option_skip_formatObject



125
126
127
# File 'lib/aspera/cli/plugins/preview.rb', line 125

def option_skip_format
  return @preview_formats_to_generate.map(&:to_s).join(',')
end

#option_skip_format=(value) ⇒ Object



121
122
123
# File 'lib/aspera/cli/plugins/preview.rb', line 121

def option_skip_format=(value)
  @preview_formats_to_generate.delete(value)
end

#option_skip_typesObject



117
118
119
# File 'lib/aspera/cli/plugins/preview.rb', line 117

def option_skip_types
  return @skip_types.map(&:to_s).join(',')
end

#option_skip_types=(value) ⇒ Object



108
109
110
111
112
113
114
115
# File 'lib/aspera/cli/plugins/preview.rb', line 108

def option_skip_types=(value)
  @skip_types = []
  value.split(',').each do |v|
    s = v.to_sym
    Aspera.assert_values(s, Aspera::Preview::FileTypes::CONVERSION_TYPES){'skip_types'}
    @skip_types.push(s)
  end
end

#preview_filename(preview_format, base_name = nil) ⇒ Object

Generate a file name based on basename and format (extension)



271
272
273
274
# File 'lib/aspera/cli/plugins/preview.rb', line 271

def preview_filename(preview_format, base_name = nil)
  base_name ||= PREVIEW_BASENAME
  return "#{base_name}.#{preview_format}"
end

#process_events(iteration_persistency) ⇒ Object

requests recent events on node api and process newly modified folders



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/aspera/cli/plugins/preview.rb', line 186

def process_events(iteration_persistency)
  # get new file creation by access key (TODO: what if file already existed?)
  events_filter = {
    'access_key' => @access_key_self['id'],
    'type'       => 'file.*'
  }
  # optionally add iteration token from persistency
  events_filter['iteration_token'] = iteration_persistency.data.first unless iteration_persistency.nil?
  events = @api_node.read('events', events_filter)
  return if events.empty?
  events.each do |event|
    # process only files
    if event.dig('data', 'type').eql?('file')
      file_entry = @api_node.read("files/#{event['data']['id']}") rescue nil
      if !file_entry.nil? &&
          @option_skip_folders.none?{ |d| file_entry['path'].start_with?(d)}
        file_entry['parent_file_id'] = event['data']['parent_file_id']
        Log.log.error('TODO'.red) if event['types'].include?('file.deleted')
        generate_preview(file_entry) if event['types'].include?('file.deleted')
      end
    end
    # log/persist periodically or last one
    next unless @periodic.trigger? || event.equal?(events.last)
    Log.log.info{"Processing event #{event['id']}"}
    # save checkpoint to avoid losing processing in case of error
    if !iteration_persistency.nil?
      iteration_persistency.data[0] = event['id'].to_s
      iteration_persistency.save
    end
  end
end

#process_trevents(iteration_persistency) ⇒ Object

old version based on folders



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/aspera/cli/plugins/preview.rb', line 144

def process_trevents(iteration_persistency)
  events_filter = {
    'access_key' => @access_key_self['id'],
    'type'       => 'download.ended'
  }
  # optionally add iteration token from persistency
  events_filter['iteration_token'] = iteration_persistency.data.first unless iteration_persistency.nil?
  begin
    events = @api_node.read('events', events_filter)
  rescue RestCallError => e
    if e.message.include?('Invalid iteration_token')
      Log.log.warn{"Retrying without iteration token: #{e}"}
      events_filter.delete('iteration_token')
      retry
    end
    raise e
  end
  return if events.empty?
  events.each do |event|
    if event['data']['direction'].eql?(Transfer::Spec::DIRECTION_RECEIVE) &&
        event['data']['status'].eql?('completed') &&
        event['data']['error_code'].eql?(0) &&
        event['data'].dig('tags', Transfer::Spec::TAG_RESERVED, PREV_GEN_TAG).nil?
      folder_id = event.dig('data', 'tags', Transfer::Spec::TAG_RESERVED, 'node', 'file_id')
      folder_id ||= event.dig('data', 'file_id')
      if !folder_id.nil?
        folder_entry = @api_node.read("files/#{folder_id}") rescue nil
        scan_folder_files(folder_entry) unless folder_entry.nil?
      end
    end
    # log/persist periodically or last one
    next unless @periodic.trigger? || event.equal?(events.last)
    Log.log.info{"Processed event #{event['id']}"}
    # save checkpoint to avoid losing processing in case of error
    if !iteration_persistency.nil?
      iteration_persistency.data[0] = event['id'].to_s
      iteration_persistency.save
    end
  end
end

#scan_folder_files(top_entry, top_path = nil) ⇒ Object

scan all files in provided folder entry



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/aspera/cli/plugins/preview.rb', line 346

def scan_folder_files(top_entry, top_path = nil)
  unless top_path.nil?
    # canonical path: start with / and ends with /
    top_path = "/#{top_path.split('/').reject(&:empty?).join('/')}/"
  end
  Log.log.debug{"scan: #{top_entry} : #{top_path}".green}
  # don't use recursive call, use list instead
  entries_to_process = [top_entry]
  until entries_to_process.empty?
    entry = entries_to_process.shift
    # process this entry only if it is within the top_path
    entry_path_with_slash = entry['path']
    Log.log.info{"processing entry #{entry_path_with_slash}"} if @periodic.trigger?
    entry_path_with_slash = "#{entry_path_with_slash}/" unless entry_path_with_slash.end_with?('/')
    if !top_path.nil? && !top_path.start_with?(entry_path_with_slash) && !entry_path_with_slash.start_with?(top_path)
      Log.log.debug{"#{entry['path']} folder (skip start)".bg_red}
      next
    end
    Log.log.debug{"item:#{entry}"}
    begin
      case entry['type']
      when 'file'
        if @filter_block.call(entry)
          generate_preview(entry)
        else
          Log.log.debug('skip by filter')
        end
      when 'link'
        Log.log.debug('Ignoring link.')
      when 'folder'
        if @option_skip_folders.include?(entry['path'])
          Log.log.debug{"#{entry['path']} folder (skip list)".bg_red}
        else
          Log.log.debug{"#{entry['path']} folder".green}
          # get folder content
          folder_entries = get_folder_entries(entry['id'])
          # process all items in current folder
          folder_entries.each do |folder_entry|
            # add path for older versions of ES
            folder_entry['path'] = entry_path_with_slash + folder_entry['name'] if !folder_entry.key?('path')
            folder_entry['parent_file_id'] = entry['id']
            entries_to_process.push(folder_entry)
          end
        end
      else
        Log.log.warn{"unknown entry type: #{entry['type']}"}
      end
    rescue StandardError => e
      Log.log.warn{"An error occurred: #{e}, ignoring"}
    end
  end
end