Class: Keep::Manifest

Inherits:
Object
  • Object
show all
Defined in:
lib/arvados/keep.rb

Instance Method Summary collapse

Constructor Details

#initialize(manifest_text) ⇒ Manifest

Class to parse a manifest text and provide common views of that data.



101
102
103
104
# File 'lib/arvados/keep.rb', line 101

def initialize(manifest_text)
  @text = manifest_text
  @files = nil
end

Instance Method Details

#each_file_specObject



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/arvados/keep.rb', line 147

def each_file_spec
  return to_enum(__method__) unless block_given?
  @text.each_line do |line|
    stream_name = nil
    in_file_tokens = false
    line.scan /\S+/ do |token|
      if stream_name.nil?
        stream_name = unescape token
      elsif in_file_tokens or not Locator.valid? token
        in_file_tokens = true
        yield [stream_name] + split_file_token(token)
      end
    end
  end
  true
end

#each_lineObject



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/arvados/keep.rb', line 106

def each_line
  return to_enum(__method__) unless block_given?
  @text.each_line do |line|
    stream_name = nil
    block_tokens = []
    file_tokens = []
    line.scan /\S+/ do |token|
      if stream_name.nil?
        stream_name = unescape token
      elsif file_tokens.empty? and Locator.valid? token
        block_tokens << token
      else
        file_tokens << unescape(token)
      end
    end
    # Ignore blank lines
    next if stream_name.nil?
    yield [stream_name, block_tokens, file_tokens]
  end
end

#exact_file_count?(want_count) ⇒ Boolean

Returns:

  • (Boolean)


199
200
201
# File 'lib/arvados/keep.rb', line 199

def exact_file_count?(want_count)
  files_count(want_count + 1) == want_count
end

#filesObject



164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/arvados/keep.rb', line 164

def files
  if @files.nil?
    file_sizes = Hash.new(0)
    each_file_spec do |streamname, _, filesize, filename|
      file_sizes[[streamname, filename]] += filesize
    end
    @files = file_sizes.each_pair.map do |(streamname, filename), size|
      [streamname, filename, size]
    end
  end
  @files
end

#files_count(stop_after = nil) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/arvados/keep.rb', line 177

def files_count(stop_after=nil)
  # Return the number of files represented in this manifest.
  # If stop_after is provided, files_count will read the manifest
  # incrementally, and return immediately when it counts that number of
  # files.  This can help you avoid parsing the entire manifest if you
  # just want to check if a small number of files are specified.
  if stop_after.nil? or not @files.nil?
    return files.size
  end
  seen_files = {}
  each_file_spec do |streamname, _, _, filename|
    seen_files[[streamname, filename]] = true
    return stop_after if (seen_files.size >= stop_after)
  end
  seen_files.size
end

#files_sizeObject



194
195
196
197
# File 'lib/arvados/keep.rb', line 194

def files_size
  # Return the total size of all files in this manifest.
  files.reduce(0) { |total, (_, _, size)| total + size }
end

#has_file?(want_stream, want_file = nil) ⇒ Boolean

Returns:

  • (Boolean)


207
208
209
210
211
212
213
214
215
216
217
# File 'lib/arvados/keep.rb', line 207

def has_file?(want_stream, want_file=nil)
  if want_file.nil?
    want_stream, want_file = File.split(want_stream)
  end
  each_file_spec do |streamname, _, _, name|
    if streamname == want_stream and name == want_file
      return true
    end
  end
  false
end

#minimum_file_count?(want_count) ⇒ Boolean

Returns:

  • (Boolean)


203
204
205
# File 'lib/arvados/keep.rb', line 203

def minimum_file_count?(want_count)
  files_count(want_count) >= want_count
end

#split_file_token(token) ⇒ Object



139
140
141
142
143
144
145
# File 'lib/arvados/keep.rb', line 139

def split_file_token token
  start_pos, filesize, filename = token.split(':', 3)
  if filename.nil?
    raise ArgumentError.new "Invalid file token '#{token}'"
  end
  [start_pos.to_i, filesize.to_i, unescape(filename)]
end

#unescape(s) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
# File 'lib/arvados/keep.rb', line 127

def unescape(s)
  # Parse backslash escapes in a Keep manifest stream or file name.
  s.gsub(/\\(\\|[0-7]{3})/) do |_|
    case $1
    when '\\'
      '\\'
    else
      $1.to_i(8).chr
    end
  end
end