Class: Ole::Storage

Inherits:
Object
  • Object
show all
Defined in:
lib/ole/storage/base.rb,
lib/ole/storage/version.rb,
lib/ole/storage/meta_data.rb,
lib/ole/storage/file_system.rb

Overview

:nodoc:

Defined Under Namespace

Classes: AllocationTable, DirClass, Dirent, FileClass, FormatError, Header, MetaData, RangesIOMigrateable, RangesIOResizeable

Constant Summary collapse

VERSION =
'1.2.12'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg, mode = nil, params = {}) ⇒ Storage

arg should be either a filename, or an IO object, and needs to be seekable. mode is optional, and should be a regular mode string.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/ole/storage/base.rb', line 40

def initialize arg, mode=nil, params={}
	params, mode = mode, nil if Hash === mode
	params = {:update_timestamps => true}.merge(params)
	@params = params
	
	# get the io object
	@close_parent, @io = if String === arg
		mode ||= 'rb'
		[true, open(arg, mode)]
	else
		raise ArgumentError, 'unable to specify mode string with io object' if mode
		[false, arg]
	end
	# force encoding, to avoid picking up source encoding with StringIO or files in text mode
	@io.set_encoding Encoding::ASCII_8BIT if @io.respond_to?(:set_encoding)
	# do we have this file opened for writing? use mode when provided,
	# otherwise try no-op methods which will raise if read-only
	@writeable = begin
		if mode
			IOMode.new(mode).writeable?
		else
			# works on mri 1.8 & jruby
			@io.flush
			begin
				# works on mri 1.9 & rubinius, throws EBADF on windows
				@io.write_nonblock('') if @io.respond_to?(:write_nonblock)
			rescue Errno::EBADF
				# for windows
				@io.syswrite('');
			end
			true
		end
	rescue IOError
		false
	end
	# silence undefined warning in clear
	@sb_file = nil
	# if the io object has data, we should load it, otherwise start afresh
	# this should be based on the mode string rather.
	@io.size > 0 ? load : clear
end

Instance Attribute Details

#bbatObject (readonly)

Low level internals, you probably shouldn’t need to mess with these



36
37
38
# File 'lib/ole/storage/base.rb', line 36

def bbat
  @bbat
end

#close_parentObject (readonly)

The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable



34
35
36
# File 'lib/ole/storage/base.rb', line 34

def close_parent
  @close_parent
end

#direntsObject (readonly)

The tree structure in its original flattened form. only valid after #load, or #flush.



31
32
33
# File 'lib/ole/storage/base.rb', line 31

def dirents
  @dirents
end

#headerObject (readonly)

Low level internals, you probably shouldn’t need to mess with these



36
37
38
# File 'lib/ole/storage/base.rb', line 36

def header
  @header
end

#ioObject (readonly)

The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable



34
35
36
# File 'lib/ole/storage/base.rb', line 34

def io
  @io
end

#paramsObject (readonly)

options used at creation time



27
28
29
# File 'lib/ole/storage/base.rb', line 27

def params
  @params
end

#rootObject (readonly)

The top of the ole tree structure



29
30
31
# File 'lib/ole/storage/base.rb', line 29

def root
  @root
end

#sb_fileObject (readonly)

Low level internals, you probably shouldn’t need to mess with these



36
37
38
# File 'lib/ole/storage/base.rb', line 36

def sb_file
  @sb_file
end

#sbatObject (readonly)

Low level internals, you probably shouldn’t need to mess with these



36
37
38
# File 'lib/ole/storage/base.rb', line 36

def sbat
  @sbat
end

#writeableObject (readonly)

The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable



34
35
36
# File 'lib/ole/storage/base.rb', line 34

def writeable
  @writeable
end

Class Method Details

.open(arg, mode = nil, params = {}) ⇒ Object

somewhat similar to File.open, the open class method allows a block form where the Ole::Storage object is automatically closed on completion of the block.



84
85
86
87
88
89
90
91
92
# File 'lib/ole/storage/base.rb', line 84

def self.open arg, mode=nil, params={}
	ole = new arg, mode, params
	if block_given?
		begin   yield ole
		ensure; ole.close
		end
	else ole
	end
end

Instance Method Details

#bat_for_size(size) ⇒ Object



333
334
335
336
# File 'lib/ole/storage/base.rb', line 333

def bat_for_size size
	# note >=, not > previously.
	size >= @header.threshold ? @bbat : @sbat
end

#clearObject



295
296
297
298
299
300
301
302
303
304
305
306
307
308
# File 'lib/ole/storage/base.rb', line 295

def clear
	# initialize to equivalent of loading an empty ole document.
	Log.warn 'creating new ole storage object on non-writable io' unless @writeable
	@header = Header.new
	@bbat = AllocationTable::Big.new self
	@root = Dirent.new self, :type => :root, :name => 'Root Entry'
	@dirents = [@root]
	@root.idx = 0
	@sb_file.close if @sb_file
	@sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
	@sbat = AllocationTable::Small.new self
	# throw everything else the hell away
	@io.truncate 0
end

#closeObject



164
165
166
167
168
# File 'lib/ole/storage/base.rb', line 164

def close
	@sb_file.close
	flush if @writeable
	@io.close if @close_parent
end

#dirObject



40
41
42
# File 'lib/ole/storage/file_system.rb', line 40

def dir
	@dir ||= DirClass.new self
end

#dirent_from_path(path) ⇒ Object

tries to get a dirent for path. return nil if it doesn’t exist (change it)



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/ole/storage/file_system.rb', line 46

def dirent_from_path path
	dirent = @root
	path = file.expand_path(path).split('/')
	until path.empty?
		part = path.shift
		next if part.empty?
		return nil if dirent.file?
		return nil unless dirent = dirent/part
	end
	dirent
end

#fileObject



36
37
38
# File 'lib/ole/storage/file_system.rb', line 36

def file
	@file ||= FileClass.new self
end

#flushObject

the flush method is the main “save” method. all file contents are always written directly to the file by the RangesIO objects, all this method does is write out all the file meta data - dirents, allocation tables, file header etc.

maybe add an option to zero the padding, and any remaining avail blocks in the allocation table.

TODO: long and overly complex. simplify and test better. eg, perhaps move serialization of bbat to AllocationTable::Big.



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/ole/storage/base.rb', line 180

def flush
	# update root dirent, and flatten dirent tree
	@root.name = 'Root Entry'
	@root.first_block = @sb_file.first_block
	@root.size = @sb_file.size
	@dirents = @root.flatten

	# serialize the dirents using the bbat
	RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
		io.write @dirents.map { |dirent| dirent.to_s }.join
		padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
		io.write 0.chr * padding
		@header.dirent_start = io.first_block
	end

	# serialize the sbat
	# perhaps the blocks used by the sbat should be marked with BAT?
	RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
		io.write @sbat.to_s
		@header.sbat_start = io.first_block
		@header.num_sbat = @bbat.chain(@header.sbat_start).length
	end

	# create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
	# truncate. then when its time to write, convert that chain and some chunk of blocks at
	# the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
	# done.
	# this is perhaps not good, as we reclaim all bat blocks here, which
	# may include the sbat we just wrote. FIXME
	@bbat.map! do |b|
		b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b
	end

	# currently we use a loop. this could be better, but basically,
	# the act of writing out the bat, itself requires blocks which get
	# recorded in the bat.
	#
	# i'm sure that there'd be some simpler closed form solution to this. solve
	# recursive func:
	#
	#   num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
	#   bbat_len = initial_bbat_len + num_mbat_blocks
	#   mbat_len = ceil(bbat_len * 4 / block_size)
	#
	# the actual bbat allocation table is itself stored throughout the file, and that chain
	# is stored in the initial blocks, and the mbat blocks.
	num_mbat_blocks = 0
	io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
	# truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
	# contiguous chunk at the end.
	# hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
	# delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
	# be fixed easily, add an io truncate
	@bbat.truncate!
	@io.truncate @bbat.block_size * (@bbat.length + 1)
	while true
		# get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
		# the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
		# progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
		# mbat must remain contiguous.
		bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
		# now storing the excess mbat blocks also increases the size of the bbat:
		new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil
		if new_num_mbat_blocks != num_mbat_blocks
			# need more space for the mbat.
			num_mbat_blocks = new_num_mbat_blocks
		elsif io.size != bbat_data_len
			# need more space for the bat
			# this may grow the bbat, depending on existing available blocks
			io.truncate bbat_data_len
		else
			break
		end
	end

	# now extract the info we want:
	ranges = io.ranges
	bbat_chain = @bbat.chain io.first_block
	io.close
	bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
	# tack on the mbat stuff
	@header.num_bat = bbat_chain.length
	mbat_blocks = (0...num_mbat_blocks).map do
		block = @bbat.free_block
		@bbat[block] = AllocationTable::META_BAT
		block
	end
	@header.mbat_start = mbat_blocks.first || AllocationTable::EOC

	# now finally write the bbat, using a not resizable io.
	# the mode here will be 'r', which allows write atm. 
	RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s }

	# this is the mbat. pad it out.
	bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
	@header.num_mbat = num_mbat_blocks
	if num_mbat_blocks != 0
		# write out the mbat blocks now. first of all, where are they going to be?
		mbat_data = bbat_chain[109..-1]
		# expand the mbat_data to include the linked list forward pointers.
		mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a.
			zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a }
		# pad out the last one.
		mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length)))
		RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f|
			f.write mbat_data.flatten.pack('V*')
		end
	end

	# now seek back and write the header out
	@io.seek 0
	@io.write @header.to_s + bbat_chain[0, 109].pack('V*')
	@io.flush
end

#inspectObject



338
339
340
# File 'lib/ole/storage/base.rb', line 338

def inspect
	"#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
end

#loadObject

load document from file.

TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)

  1. reterminate any chain not ending in EOC. compare file size with actually allocated blocks per file.

  2. pass through all chain heads looking for collisions, and making sure nothing points to them (ie they are really heads). in both sbat and mbat

  3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks in the bat for them.

  4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size, (eg what is used for truncate in #flush), then maybe add some sort of message about that. it will be automatically thrown away at close time.



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/ole/storage/base.rb', line 107

def load
	# we always read 512 for the header block. if the block size ends up being different,
	# what happens to the 109 fat entries. are there more/less entries?
	@io.rewind
	header_block = @io.read 512
	@header = Header.new header_block

	# create an empty bbat.
	@bbat = AllocationTable::Big.new self
	bbat_chain = header_block[Header::SIZE..-1].unpack 'V*'
	mbat_block = @header.mbat_start
	@header.num_mbat.times do
		blocks = @bbat.read([mbat_block]).unpack 'V*'
		mbat_block = blocks.pop
		bbat_chain += blocks
	end
	# am i using num_bat in the right way?
	@bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
	
	# get block chain for directories, read it, then split it into chunks and load the
	# directory entries. semantics changed - used to cut at first dir where dir.type == 0
	@dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE).
		map { |str| Dirent.new self, str }

	# now reorder from flat into a tree
	# links are stored in some kind of balanced binary tree
	# check that everything is visited at least, and at most once
	# similarly with the blocks of the file.
	# was thinking of moving this to Dirent.to_tree instead.
	class << @dirents
		def to_tree idx=0
			return [] if idx == Dirent::EOT
			d = self[idx]
			to_tree(d.child).each { |child| d << child }
			raise FormatError, "directory #{d.inspect} used twice" if d.idx
			d.idx = idx
			to_tree(d.prev) + [d] + to_tree(d.next)
		end
	end

	@root = @dirents.to_tree.first
	@dirents.reject! { |d| d.type_id == 0 }
	# silence this warning by default, its not really important (issue #5).
	# fairly common one appears to be "R" (from office OS X?) which smells
	# like some kind of UTF16 snafu, but scottwillson also has had some kanji...
	#Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
	unused = @dirents.reject(&:idx).length
	Log.warn "#{unused} unused directories" if unused > 0

	# FIXME i don't currently use @header.num_sbat which i should
	# hmm. nor do i write it. it means what exactly again?
	# which mode to use here?
	@sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
	@sbat = AllocationTable::Small.new self
	@sbat.load @bbat.read(@header.sbat_start)
end

#meta_dataObject



145
146
147
# File 'lib/ole/storage/meta_data.rb', line 145

def 
	@meta_data ||= MetaData.new(self)
end

#repack(temp = :file) ⇒ Object

could be useful with mis-behaving ole documents. or to just clean them up.



311
312
313
314
315
316
317
318
319
320
321
# File 'lib/ole/storage/base.rb', line 311

def repack temp=:file
	case temp
	when :file
		Tempfile.open 'ole-repack' do |io|
			io.binmode
			repack_using_io io
		end
	when :mem;  StringIO.open('', &method(:repack_using_io))
	else raise ArgumentError, "unknown temp backing #{temp.inspect}"
	end
end

#repack_using_io(temp_io) ⇒ Object



323
324
325
326
327
328
329
330
331
# File 'lib/ole/storage/base.rb', line 323

def repack_using_io temp_io
	@io.rewind
	IO.copy @io, temp_io
	clear
	Storage.open temp_io, nil, @params do |temp_ole|
		#temp_ole.root.type = :dir
		Dirent.copy temp_ole.root, root
	end
end