Class: Keep::Manifest
- Inherits:
-
Object
- Object
- Keep::Manifest
- Defined in:
- lib/arvados/keep.rb
Constant Summary collapse
- STRICT_STREAM_TOKEN_REGEXP =
/^(\.)(\/[^\/\s]+)*$/- STRICT_FILE_TOKEN_REGEXP =
/^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
Class Method Summary collapse
- .valid?(manifest) ⇒ Boolean
-
.validate!(manifest) ⇒ Object
Verify that a given manifest is valid according to arvados.org/projects/arvados/wiki/Keep_manifest_format.
Instance Method Summary collapse
- #each_file_spec ⇒ Object
- #each_line ⇒ Object
- #exact_file_count?(want_count) ⇒ Boolean
- #files ⇒ Object
- #files_count(stop_after = nil) ⇒ Object
- #files_size ⇒ Object
- #has_file?(want_stream, want_file = nil) ⇒ Boolean
-
#initialize(manifest_text) ⇒ Manifest
constructor
Class to parse a manifest text and provide common views of that data.
- #minimum_file_count?(want_count) ⇒ Boolean
- #split_file_token(token) ⇒ Object
- #unescape(s) ⇒ Object
Constructor Details
#initialize(manifest_text) ⇒ Manifest
Class to parse a manifest text and provide common views of that data.
108 109 110 111 |
# File 'lib/arvados/keep.rb', line 108 def initialize(manifest_text) @text = manifest_text @files = nil end |
Class Method Details
.valid?(manifest) ⇒ Boolean
284 285 286 287 288 289 290 291 |
# File 'lib/arvados/keep.rb', line 284 def self.valid? manifest begin validate! manifest true rescue ArgumentError false end end |
.validate!(manifest) ⇒ Object
Verify that a given manifest is valid according to arvados.org/projects/arvados/wiki/Keep_manifest_format
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/arvados/keep.rb', line 237 def self.validate! manifest raise ArgumentError.new "No manifest found" if !manifest return true if manifest.empty? raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n") line_count = 0 manifest.each_line do |line| line_count += 1 words = line[0..-2].split(/ /) raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty? count = 0 word = words.shift count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1 count = 0 word = words.shift while word =~ Locator::LOCATOR_REGEXP word = words.shift count += 1 end raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0 count = 0 while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty? word = words.shift count += 1 end if word raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}" elsif count == 0 raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens" end # Ruby's split() method silently drops trailing empty tokens # (which are not allowed by the manifest format) so we have to # check trailing spaces manually. raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n" end true end |
Instance Method Details
#each_file_spec ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/arvados/keep.rb', line 154 def each_file_spec return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil in_file_tokens = false line.scan(/\S+/) do |token| if stream_name.nil? stream_name = unescape token elsif in_file_tokens or not Locator.valid? token in_file_tokens = true file_tokens = split_file_token(token) stream_name_adjuster = '' if file_tokens[2].include?('/') # '/' in filename parts = file_tokens[2].rpartition('/') stream_name_adjuster = parts[1] + parts[0] # /dir_parts file_tokens[2] = parts[2] end yield [stream_name + stream_name_adjuster] + file_tokens end end end true end |
#each_line ⇒ Object
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/arvados/keep.rb', line 113 def each_line return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil block_tokens = [] file_tokens = [] line.scan(/\S+/) do |token| if stream_name.nil? stream_name = unescape token elsif file_tokens.empty? and Locator.valid? token block_tokens << token else file_tokens << unescape(token) end end # Ignore blank lines next if stream_name.nil? yield [stream_name, block_tokens, file_tokens] end end |
#exact_file_count?(want_count) ⇒ Boolean
215 216 217 |
# File 'lib/arvados/keep.rb', line 215 def exact_file_count?(want_count) files_count(want_count + 1) == want_count end |
#files ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/arvados/keep.rb', line 180 def files if @files.nil? file_sizes = Hash.new(0) each_file_spec do |streamname, _, filesize, filename| file_sizes[[streamname, filename]] += filesize end @files = file_sizes.each_pair.map do |(streamname, filename), size| [streamname, filename, size] end end @files end |
#files_count(stop_after = nil) ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/arvados/keep.rb', line 193 def files_count(stop_after=nil) # Return the number of files represented in this manifest. # If stop_after is provided, files_count will read the manifest # incrementally, and return immediately when it counts that number of # files. This can help you avoid parsing the entire manifest if you # just want to check if a small number of files are specified. if stop_after.nil? or not @files.nil? return files.size end seen_files = {} each_file_spec do |streamname, _, _, filename| seen_files[[streamname, filename]] = true return stop_after if (seen_files.size >= stop_after) end seen_files.size end |
#files_size ⇒ Object
210 211 212 213 |
# File 'lib/arvados/keep.rb', line 210 def files_size # Return the total size of all files in this manifest. files.reduce(0) { |total, (_, _, size)| total + size } end |
#has_file?(want_stream, want_file = nil) ⇒ Boolean
223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/arvados/keep.rb', line 223 def has_file?(want_stream, want_file=nil) if want_file.nil? want_stream, want_file = File.split(want_stream) end each_file_spec do |streamname, _, _, name| if streamname == want_stream and name == want_file return true end end false end |
#minimum_file_count?(want_count) ⇒ Boolean
219 220 221 |
# File 'lib/arvados/keep.rb', line 219 def minimum_file_count?(want_count) files_count(want_count) >= want_count end |
#split_file_token(token) ⇒ Object
146 147 148 149 150 151 152 |
# File 'lib/arvados/keep.rb', line 146 def split_file_token token start_pos, filesize, filename = token.split(':', 3) if filename.nil? raise ArgumentError.new "Invalid file token '#{token}'" end [start_pos.to_i, filesize.to_i, unescape(filename)] end |
#unescape(s) ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/arvados/keep.rb', line 134 def unescape(s) # Parse backslash escapes in a Keep manifest stream or file name. s.gsub(/\\(\\|[0-7]{3})/) do |_| case $1 when '\\' '\\' else $1.to_i(8).chr end end end |