Class: Remi::Extractor::FileSystem

Inherits:
Remi::Extractor show all
Defined in:
lib/remi/data_subjects/file_system.rb

Overview

Parent class used to describe things that behave like file systems (e.g., local file systems, ftp servers, S3 objects) to be used for extraction.

Direct Known Subclasses

Gsheet, LocalFile, S3File, SftpFile

Defined Under Namespace

Classes: FileNotFoundError

Instance Attribute Summary collapse

Attributes inherited from Remi::Extractor

#logger

Instance Method Summary collapse

Constructor Details

#initialize(*args, **kargs, &block) ⇒ FileSystem

Returns a new instance of FileSystem.



42
43
44
45
# File 'lib/remi/data_subjects/file_system.rb', line 42

def initialize(*args, **kargs, &block)
  super
  init_file_system(*args, **kargs)
end

Instance Attribute Details

#created_withinObject (readonly)

Returns the value of attribute created_within.



53
54
55
# File 'lib/remi/data_subjects/file_system.rb', line 53

def created_within
  @created_within
end

#group_byObject (readonly)

Returns the value of attribute group_by.



51
52
53
# File 'lib/remi/data_subjects/file_system.rb', line 51

def group_by
  @group_by
end

#local_pathObject (readonly)

Returns the value of attribute local_path.



49
50
51
# File 'lib/remi/data_subjects/file_system.rb', line 49

def local_path
  @local_path
end

#most_recent_byObject (readonly)

Returns the value of attribute most_recent_by.



52
53
54
# File 'lib/remi/data_subjects/file_system.rb', line 52

def most_recent_by
  @most_recent_by
end

#most_recent_onlyObject (readonly)

Returns the value of attribute most_recent_only.



50
51
52
# File 'lib/remi/data_subjects/file_system.rb', line 50

def most_recent_only
  @most_recent_only
end

#patternObject (readonly)

Returns the value of attribute pattern.



48
49
50
# File 'lib/remi/data_subjects/file_system.rb', line 48

def pattern
  @pattern
end

#remote_pathObject (readonly)

Returns the value of attribute remote_path.



47
48
49
# File 'lib/remi/data_subjects/file_system.rb', line 47

def remote_path
  @remote_path
end

Instance Method Details

#all_entriesObject

Public: Returns an array of all FileSystemEntry instances that are in the remote_path. NOTE: all_entries is responsible for matching the path using @remote_path

Raises:

  • (NoMethodError)


65
66
67
# File 'lib/remi/data_subjects/file_system.rb', line 65

def all_entries
  raise NoMethodError, "#{__method__} not defined for#{self.class.name}"
end

#entriesObject

Public: Returns just the entries that are to be extracted.



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/remi/data_subjects/file_system.rb', line 70

def entries
  if @group_by
    most_recent_matching_entry_in_group
  elsif @most_recent_only
    Array(most_recent_matching_entry)
  elsif @created_within
    get_created_within
  else
    matching_entries
  end
end

#extractObject

Public: Called to extract files from the source filesystem.

Returns an array with containing the paths to all files extracted.

Raises:

  • (NoMethodError)


59
60
61
# File 'lib/remi/data_subjects/file_system.rb', line 59

def extract
  raise NoMethodError, "#{__method__} not defined for#{self.class.name}"
end

#get_created_withinObject



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/remi/data_subjects/file_system.rb', line 90

def get_created_within

  if @most_recent_only
    first_entry = matching_entries.sort_by { |e| e.send(@most_recent_by)}.reverse.first
    if ((Date.today.to_time - Time.at(first_entry.create_time)) / 1.hour) < @created_within
      Array(first_entry)
    else
      raise "No file Found. All files are older than #{@created_within} hrs"
    end
  else
    entries_with_group = matching_entries.map do |entry|
      if ((Time.new.to_time - Time.at(entry.create_time) ) / 1.seconds) < @created_within
        entry
      end
    end.compact
    if entries_with_group.length > 0
      Array(entries_with_group)
    else
      raise "No files Found. All files are older than #{@created_within} hrs"
    end
  end
end

#matching_entriesObject



82
83
84
# File 'lib/remi/data_subjects/file_system.rb', line 82

def matching_entries
  all_entries.select { |e| @pattern.match e.name }
end

#most_recent_matching_entryObject



86
87
88
# File 'lib/remi/data_subjects/file_system.rb', line 86

def most_recent_matching_entry
  matching_entries.sort_by { |e| e.send(@most_recent_by) }.reverse.first
end

#most_recent_matching_entry_in_groupObject



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/remi/data_subjects/file_system.rb', line 113

def most_recent_matching_entry_in_group
  entries_with_group = matching_entries.map do |entry|
    match = entry.name.match(@group_by)
    next unless match

    group = match.to_a[1..-1]
    { group: group, entry: entry }
  end.compact
  sorted_entries_with_group = entries_with_group.sort_by { |e| [e[:group], e[:entry].send(@most_recent_by)] }.reverse

  last_group = nil
  sorted_entries_with_group.map do |entry|
    next unless entry[:group] != last_group
    last_group = entry[:group]
    entry[:entry]
  end.compact
end