Class: Keep::Manifest
- Inherits:
-
Object
- Object
- Keep::Manifest
- Defined in:
- lib/arvados/keep.rb
Instance Method Summary collapse
- #each_file_spec ⇒ Object
- #each_line ⇒ Object
- #exact_file_count?(want_count) ⇒ Boolean
- #files ⇒ Object
- #files_count(stop_after = nil) ⇒ Object
- #has_file?(want_stream, want_file = nil) ⇒ Boolean
-
#initialize(manifest_text) ⇒ Manifest
constructor
Class to parse a manifest text and provide common views of that data.
- #minimum_file_count?(want_count) ⇒ Boolean
- #split_file_token(token) ⇒ Object
- #unescape(s) ⇒ Object
Constructor Details
#initialize(manifest_text) ⇒ Manifest
Class to parse a manifest text and provide common views of that data.
101 102 103 104 |
# File 'lib/arvados/keep.rb', line 101 def initialize(manifest_text) @text = manifest_text @files = nil end |
Instance Method Details
#each_file_spec ⇒ Object
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/arvados/keep.rb', line 144 def each_file_spec return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil in_file_tokens = false line.scan /\S+/ do |token| if stream_name.nil? stream_name = unescape token elsif in_file_tokens or not Locator.valid? token in_file_tokens = true yield [stream_name] + split_file_token(token) end end end true end |
#each_line ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/arvados/keep.rb', line 106 def each_line return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil block_tokens = [] file_tokens = [] line.scan /\S+/ do |token| if stream_name.nil? stream_name = unescape token elsif file_tokens.empty? and Locator.valid? token block_tokens << token else file_tokens << unescape(token) end end # Ignore blank lines next if stream_name.nil? yield [stream_name, block_tokens, file_tokens] end end |
#exact_file_count?(want_count) ⇒ Boolean
191 192 193 |
# File 'lib/arvados/keep.rb', line 191 def exact_file_count?(want_count) files_count(want_count + 1) == want_count end |
#files ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/arvados/keep.rb', line 161 def files if @files.nil? file_sizes = Hash.new(0) each_file_spec do |streamname, _, filesize, filename| file_sizes[[streamname, filename]] += filesize end @files = file_sizes.each_pair.map do |(streamname, filename), size| [streamname, filename, size] end end @files end |
#files_count(stop_after = nil) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/arvados/keep.rb', line 174 def files_count(stop_after=nil) # Return the number of files represented in this manifest. # If stop_after is provided, files_count will read the manifest # incrementally, and return immediately when it counts that number of # files. This can help you avoid parsing the entire manifest if you # just want to check if a small number of files are specified. if stop_after.nil? or not @files.nil? return files.size end seen_files = {} each_file_spec do |streamname, _, _, filename| seen_files[[streamname, filename]] = true return stop_after if (seen_files.size >= stop_after) end seen_files.size end |
#has_file?(want_stream, want_file = nil) ⇒ Boolean
199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/arvados/keep.rb', line 199 def has_file?(want_stream, want_file=nil) if want_file.nil? want_stream, want_file = File.split(want_stream) end each_file_spec do |streamname, _, _, name| if streamname == want_stream and name == want_file return true end end false end |
#minimum_file_count?(want_count) ⇒ Boolean
195 196 197 |
# File 'lib/arvados/keep.rb', line 195 def minimum_file_count?(want_count) files_count(want_count) >= want_count end |
#split_file_token(token) ⇒ Object
139 140 141 142 |
# File 'lib/arvados/keep.rb', line 139 def split_file_token token start_pos, filesize, filename = token.split(':', 3) [start_pos.to_i, filesize.to_i, filename] end |
#unescape(s) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/arvados/keep.rb', line 127 def unescape(s) # Parse backslash escapes in a Keep manifest stream or file name. s.gsub(/\\(\\|[0-7]{3})/) do |_| case $1 when '\\' '\\' else $1.to_i(8).chr end end end |