Class: Keep::Manifest

Inherits:
Object
  • Object
show all
Defined in:
lib/arvados/keep.rb

Instance Method Summary collapse

Constructor Details

#initialize(manifest_text) ⇒ Manifest

Class to parse a manifest text and provide common views of that data.



95
96
97
98
# File 'lib/arvados/keep.rb', line 95

def initialize(manifest_text)
  @text = manifest_text
  @files = nil
end

Instance Method Details

#each_file_spec(speclist) ⇒ Object



126
127
128
129
130
131
132
# File 'lib/arvados/keep.rb', line 126

def each_file_spec(speclist)
  return to_enum(__method__, speclist) unless block_given?
  speclist.each do |filespec|
    start_pos, filesize, filename = filespec.split(':', 3)
    yield [start_pos.to_i, filesize.to_i, filename]
  end
end

#each_lineObject



100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/arvados/keep.rb', line 100

def each_line
  return to_enum(__method__) unless block_given?
  @text.each_line do |line|
    tokens = line.split
    stream_name = unescape(tokens.shift)
    blocks = []
    while loc = Locator.parse(tokens.first)
      blocks << loc
      tokens.shift
    end
    yield [stream_name, blocks, tokens.map { |s| unescape(s) }]
  end
end

#exact_file_count?(want_count) ⇒ Boolean



168
169
170
# File 'lib/arvados/keep.rb', line 168

def exact_file_count?(want_count)
  files_count(want_count + 1) == want_count
end

#filesObject



134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/arvados/keep.rb', line 134

def files
  if @files.nil?
    file_sizes = Hash.new(0)
    each_line do |streamname, blocklist, filelist|
      each_file_spec(filelist) do |_, filesize, filename|
        file_sizes[[streamname, filename]] += filesize
      end
    end
    @files = file_sizes.each_pair.map do |(streamname, filename), size|
      [streamname, filename, size]
    end
  end
  @files
end

#files_count(stop_after = nil) ⇒ Object



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/arvados/keep.rb', line 149

def files_count(stop_after=nil)
  # Return the number of files represented in this manifest.
  # If stop_after is provided, files_count will read the manifest
  # incrementally, and return immediately when it counts that number of
  # files.  This can help you avoid parsing the entire manifest if you
  # just want to check if a small number of files are specified.
  if stop_after.nil? or not @files.nil?
    return files.size
  end
  seen_files = {}
  each_line do |streamname, blocklist, filelist|
    each_file_spec(filelist) do |_, _, filename|
      seen_files[[streamname, filename]] = true
      return stop_after if (seen_files.size >= stop_after)
    end
  end
  seen_files.size
end

#has_file?(want_stream, want_file = nil) ⇒ Boolean



176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/arvados/keep.rb', line 176

def has_file?(want_stream, want_file=nil)
  if want_file.nil?
    want_stream, want_file = File.split(want_stream)
  end
  each_line do |stream_name, _, filelist|
    if (stream_name == want_stream) and
        each_file_spec(filelist).any? { |_, _, name| name == want_file }
      return true
    end
  end
  false
end

#minimum_file_count?(want_count) ⇒ Boolean



172
173
174
# File 'lib/arvados/keep.rb', line 172

def minimum_file_count?(want_count)
  files_count(want_count) >= want_count
end

#unescape(s) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
# File 'lib/arvados/keep.rb', line 114

def unescape(s)
  # Parse backslash escapes in a Keep manifest stream or file name.
  s.gsub(/\\(\\|[0-7]{3})/) do |_|
    case $1
    when '\\'
      '\\'
    else
      $1.to_i(8).chr
    end
  end
end