Class: Remi::Extractor::SftpFile

Inherits:
FileSystem show all
Defined in:
lib/remi/data_subjects/sftp_file.rb

Overview

Sftp File extractor Used to extract files from an SFTP server

class MyJob < Remi::Job source :some_file do extractor Remi::Extractor::SftpFile.new( credentials: { host: 'coolserver.com', username: 'myself', password: 'secret' }, remote_path: '/', pattern: /^some_file_\d14.csv/, most_recent_only: true )

parser Remi::Parser::CsvFile.new(
  csv_options: {
    headers: true,
    col_sep: ','
  }
)

end end

job = MyJob.new job.some_file.df # =># # id name # 0 1 Albert # 1 2 Betsy # 2 3 Camu

Constant Summary collapse

N_RETRY =
3

Instance Attribute Summary collapse

Attributes inherited from FileSystem

#created_within, #group_by, #local_path, #most_recent_by, #most_recent_only, #pattern, #remote_path

Attributes inherited from Remi::Extractor

#logger

Instance Method Summary collapse

Methods inherited from FileSystem

#entries, #get_created_within, #matching_entries, #most_recent_matching_entry, #most_recent_matching_entry_in_group

Constructor Details

#initialize(*args, **kargs, &block) ⇒ SftpFile

Returns a new instance of SftpFile.

Parameters:

  • credentials (Hash)

    Options hash containing login credentials

  • credentials (String)

    :host SFTP host (e.g., coolserver.com)

  • credentials (String)

    :username SFTP username

  • credentials (String)

    :password SFTP password

  • credentials (String)

    :port SFTP port (default: 22)



45
46
47
48
# File 'lib/remi/data_subjects/sftp_file.rb', line 45

def initialize(*args, **kargs, &block)
  super
  init_sftp_extractor(*args, **kargs)
end

Instance Attribute Details

#hostObject (readonly)

Returns the value of attribute host.



50
51
52
# File 'lib/remi/data_subjects/sftp_file.rb', line 50

def host
  @host
end

#passwordObject (readonly)

Returns the value of attribute password.



52
53
54
# File 'lib/remi/data_subjects/sftp_file.rb', line 52

def password
  @password
end

#portObject (readonly)

Returns the value of attribute port.



53
54
55
# File 'lib/remi/data_subjects/sftp_file.rb', line 53

def port
  @port
end

#usernameObject (readonly)

Returns the value of attribute username.



51
52
53
# File 'lib/remi/data_subjects/sftp_file.rb', line 51

def username
  @username
end

Instance Method Details

#all_entriesArray<Extractor::FileSystemEntry>

Returns (Memoized) list of objects in the bucket/prefix.

Returns:



70
71
72
# File 'lib/remi/data_subjects/sftp_file.rb', line 70

def all_entries
  @all_entries ||= all_entries!
end

#all_entries!Array<Extractor::FileSystemEntry>

Returns (Memoized) list of objects in the bucket/prefix.

Returns:



75
76
77
78
79
80
81
82
83
84
85
# File 'lib/remi/data_subjects/sftp_file.rb', line 75

def all_entries!
  sftp_entries = connection { |sftp| sftp.dir.entries(@remote_path) }
  sftp_entries.map do |entry|
    # Early versions of the protocol don't support create time, fake it with modified time?
    FileSystemEntry.new(
      pathname: File.join(@remote_path, entry.name),
      create_time: entry.attributes.respond_to?(:createtime) ? entry.attributes.createtime : entry.attributes.mtime,
      modified_time: entry.attributes.mtime
    )
  end
end

#extractArray<String>

Called to extract files from the source filesystem.

Returns:

  • (Array<String>)

    An array of paths to a local copy of the files extacted



57
58
59
60
61
62
63
64
65
66
67
# File 'lib/remi/data_subjects/sftp_file.rb', line 57

def extract
  connection do |sftp|
    entries.map do |entry|
      local_file = File.join(@local_path, entry.name)
      logger.info "Downloading #{entry.name} to #{local_file}"
      retry_download { sftp.download!(File.join(@remote_path, entry.name), local_file) }
      local_file

    end
  end
end