Class: Keep::Manifest
- Inherits:
-
Object
- Object
- Keep::Manifest
- Defined in:
- lib/arvados/keep.rb
Instance Method Summary collapse
- #each_file_spec(speclist) ⇒ Object
- #each_line ⇒ Object
- #exact_file_count?(want_count) ⇒ Boolean
- #files ⇒ Object
- #files_count(stop_after = nil) ⇒ Object
- #has_file?(want_stream, want_file = nil) ⇒ Boolean
-
#initialize(manifest_text) ⇒ Manifest
constructor
Class to parse a manifest text and provide common views of that data.
- #minimum_file_count?(want_count) ⇒ Boolean
- #unescape(s) ⇒ Object
Constructor Details
#initialize(manifest_text) ⇒ Manifest
Class to parse a manifest text and provide common views of that data.
95 96 97 98 |
# File 'lib/arvados/keep.rb', line 95 def initialize(manifest_text) @text = manifest_text @files = nil end |
Instance Method Details
#each_file_spec(speclist) ⇒ Object
126 127 128 129 130 131 132 |
# File 'lib/arvados/keep.rb', line 126 def each_file_spec(speclist) return to_enum(__method__, speclist) unless block_given? speclist.each do |filespec| start_pos, filesize, filename = filespec.split(':', 3) yield [start_pos.to_i, filesize.to_i, filename] end end |
#each_line ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/arvados/keep.rb', line 100 def each_line return to_enum(__method__) unless block_given? @text.each_line do |line| tokens = line.split stream_name = unescape(tokens.shift) blocks = [] while loc = Locator.parse(tokens.first) blocks << loc tokens.shift end yield [stream_name, blocks, tokens.map { |s| unescape(s) }] end end |
#exact_file_count?(want_count) ⇒ Boolean
168 169 170 |
# File 'lib/arvados/keep.rb', line 168 def exact_file_count?(want_count) files_count(want_count + 1) == want_count end |
#files ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# File 'lib/arvados/keep.rb', line 134 def files if @files.nil? file_sizes = Hash.new(0) each_line do |streamname, blocklist, filelist| each_file_spec(filelist) do |_, filesize, filename| file_sizes[[streamname, filename]] += filesize end end @files = file_sizes.each_pair.map do |(streamname, filename), size| [streamname, filename, size] end end @files end |
#files_count(stop_after = nil) ⇒ Object
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/arvados/keep.rb', line 149 def files_count(stop_after=nil) # Return the number of files represented in this manifest. # If stop_after is provided, files_count will read the manifest # incrementally, and return immediately when it counts that number of # files. This can help you avoid parsing the entire manifest if you # just want to check if a small number of files are specified. if stop_after.nil? or not @files.nil? return files.size end seen_files = {} each_line do |streamname, blocklist, filelist| each_file_spec(filelist) do |_, _, filename| seen_files[[streamname, filename]] = true return stop_after if (seen_files.size >= stop_after) end end seen_files.size end |
#has_file?(want_stream, want_file = nil) ⇒ Boolean
176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/arvados/keep.rb', line 176 def has_file?(want_stream, want_file=nil) if want_file.nil? want_stream, want_file = File.split(want_stream) end each_line do |stream_name, _, filelist| if (stream_name == want_stream) and each_file_spec(filelist).any? { |_, _, name| name == want_file } return true end end false end |
#minimum_file_count?(want_count) ⇒ Boolean
172 173 174 |
# File 'lib/arvados/keep.rb', line 172 def minimum_file_count?(want_count) files_count(want_count) >= want_count end |
#unescape(s) ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/arvados/keep.rb', line 114 def unescape(s) # Parse backslash escapes in a Keep manifest stream or file name. s.gsub(/\\(\\|[0-7]{3})/) do |_| case $1 when '\\' '\\' else $1.to_i(8).chr end end end |