Class: Roo::OpenOffice

Inherits:
Base
  • Object
show all
Defined in:
lib/roo/open_office.rb

Direct Known Subclasses

LibreOffice

Constant Summary

Constants inherited from Base

Base::MAX_ROW_COL, Base::MIN_ROW_COL, Base::TEMP_PREFIX

Instance Attribute Summary

Attributes inherited from Base

#header_line, #headers

Instance Method Summary collapse

Methods inherited from Base

#cell_type_by_value, #clean_sheet_if_need, #collect_last_row_col_for_sheet, #column, #default_sheet, #default_sheet=, #each, #each_with_pagename, #empty?, #find, #first_column_as_letter, #first_last_row_col_for_sheet, #info, #inspect, #last_column_as_letter, #parse, #reload, #row, #row_with, #search_or_set_header, #set, #sheet, #to_csv, #to_matrix, #to_xml, #to_yaml

Constructor Details

#initialize(filename, options = {}) ⇒ OpenOffice

initialization and opening of a spreadsheet file values for packed: :zip



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/roo/open_office.rb', line 10

def initialize(filename, options={})
  packed = options[:packed]
  file_warning = options[:file_warning] || :error

  file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
  @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
  @filename = local_filename(filename, @tmpdir, packed)
  #TODO: @cells_read[:default] = false
  Zip::File.open(@filename) do |zip_file|
    if content_entry = zip_file.glob("content.xml").first
      roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
      content_entry.extract(roo_content_xml_path)
      decrypt_if_necessary(
        zip_file,
        content_entry,
        roo_content_xml_path,
        options
      )
    else
      raise ArgumentError, 'file missing required content.xml'
    end
  end
  super(filename, options)
  @formula = Hash.new
  @style = Hash.new
  @style_defaults = Hash.new { |h,k| h[k] = [] }
  @style_definitions = Hash.new
  @comment = Hash.new
  @comments_read = Hash.new
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(m, *args) ⇒ Object



254
255
256
257
258
259
260
261
262
263
264
# File 'lib/roo/open_office.rb', line 254

def method_missing(m,*args)
  read_labels
  # is method name a label name
  if @label.has_key?(m.to_s)
    row,col = label(m.to_s)
    cell(row,col)
  else
    # call super for methods like #a1
    super
  end
end

Instance Method Details

#cell(row, col, sheet = nil) ⇒ Object

Returns the content of a spreadsheet-cell. (1,1) is the upper left corner. (1,1), (1,‘A’), (‘A’,1), (‘a’,1) all refers to the cell at the first line and first row.



270
271
272
273
274
275
276
277
278
279
# File 'lib/roo/open_office.rb', line 270

def cell(row, col, sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  row,col = normalize(row,col)
  if celltype(row,col,sheet) == :date
    yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
    return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
  end
  @cell[sheet][[row,col]]
end

#celltype(row, col, sheet = nil) ⇒ Object

returns the type of a cell:

  • :float

  • :string

  • :date

  • :percentage

  • :formula

  • :time

  • :datetime



323
324
325
326
327
328
329
330
331
332
# File 'lib/roo/open_office.rb', line 323

def celltype(row,col,sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  row,col = normalize(row,col)
  if @formula[sheet][[row,col]]
    return :formula
  else
    @cell_type[sheet][[row,col]]
  end
end

#comment(row, col, sheet = nil) ⇒ Object

returns the comment at (row/col) nil if there is no comment



386
387
388
389
390
391
392
# File 'lib/roo/open_office.rb', line 386

def comment(row,col,sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  row,col = normalize(row,col)
  return nil unless @comment[sheet]
  @comment[sheet][[row,col]]
end

#comments(sheet = nil) ⇒ Object

returns each comment in the selected sheet as an array of elements

row, col, comment


396
397
398
399
400
401
402
403
404
405
406
# File 'lib/roo/open_office.rb', line 396

def comments(sheet=nil)
  sheet ||= default_sheet
  read_comments(sheet) unless @comments_read[sheet]
  if @comment[sheet]
    @comment[sheet].each.collect do |elem|
      [elem[0][0],elem[0][1],elem[1]]
    end
  else
    []
  end
end

#decrypt(content_entry, cipher) ⇒ Object

Block decrypt raw bytes from the zip file based on the cipher



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/roo/open_office.rb', line 225

def decrypt(content_entry, cipher)
  # Zip::Entry.extract writes a 0-length file when trying
  # to extract an encrypted stream, so we read the
  # raw bytes based on the offset and lengths
  decrypted = ""
  File.open(@filename, "rb") do |zipfile|
    zipfile.seek(
      content_entry.local_header_offset +
        content_entry.calculate_local_header_size
    )
    total_to_read = content_entry.compressed_size
    block_size = 4096
    if block_size > total_to_read
      block_size = total_to_read
    end
    while buffer = zipfile.read(block_size)
      decrypted += cipher.update(buffer)
      total_to_read -= buffer.length
      if total_to_read == 0
        break
      end
      if block_size > total_to_read
        block_size = total_to_read
      end
    end
  end
  decrypted + cipher.final
end

#decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options) ⇒ Object

If the ODS file has an encryption-data element, then try to decrypt. If successful, the temporary content.xml will be overwritten with decrypted contents.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/roo/open_office.rb', line 44

def decrypt_if_necessary(
  zip_file,
  content_entry,
  roo_content_xml_path, options
)
  # Check if content.xml is encrypted by extracting manifest.xml
  # and searching for a manifest:encryption-data element
  
  if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
    roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
    manifest_entry.extract(roo_manifest_xml_path)
    manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
    
    # XPath search for manifest:encryption-data only for the content.xml
    # file
    
    encryption_data = manifest.xpath(
      "//manifest:file-entry[@manifest:full-path='content.xml']"\
      "/manifest:encryption-data"
    ).first
    
    # If XPath returns a node, then we know content.xml is encrypted
    
    if !encryption_data.nil?
      
      # Since we know it's encrypted, we check for the password option
      # and if it doesn't exist, raise an argument error
      
      password = options[:password]
      if !password.nil?
        perform_decryption(
          encryption_data,
          password,
          content_entry,
          roo_content_xml_path
        )
      else
        raise ArgumentError,
            'file is encrypted but password was not supplied'
      end
    end
  else
    raise ArgumentError, 'file missing required META-INF/manifest.xml'
  end
end

#find_cipher(algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv) ⇒ Object

Create a cipher based on an ODS algorithm URI from manifest.xml



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/roo/open_office.rb', line 175

def find_cipher(
  algorithm,
  key_derivation_name,
  hashed_password,
  salt,
  iteration_count,
  iv
)
  cipher = nil
  if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
    cipher = OpenSSL::Cipher.new('AES-256-CBC')
    cipher.decrypt
    cipher.padding = 0
    cipher.key = find_cipher_key(
      cipher,
      key_derivation_name,
      hashed_password,
      salt,
      iteration_count
    )
    cipher.iv = iv
  else
    raise ArgumentError, 'Unknown algorithm ' + algorithm
  end
  cipher
end

#find_cipher_key(cipher, key_derivation_name, hashed_password, salt, iteration_count) ⇒ Object

Create a cipher key based on an ODS algorithm string from manifest.xml



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/roo/open_office.rb', line 203

def find_cipher_key(
  cipher,
  key_derivation_name,
  hashed_password,
  salt,
  iteration_count
)
  if key_derivation_name.eql? "PBKDF2"
    key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
      hashed_password,
      salt,
      iteration_count,
      cipher.key_len
    )
  else
    raise ArgumentError, 'Unknown key derivation name ' +
        key_derivation_name
  end
  key
end

#font(row, col, sheet = nil) ⇒ Object

Given a cell, return the cell’s style



307
308
309
310
311
312
313
# File 'lib/roo/open_office.rb', line 307

def font(row, col, sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  row,col = normalize(row,col)
  style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
  @style_definitions[style_name]
end

#formula(row, col, sheet = nil) ⇒ Object Also known as: formula?

Returns the formula at (row,col). Returns nil if there is no formula. The method #formula? checks if there is a formula.



284
285
286
287
288
289
# File 'lib/roo/open_office.rb', line 284

def formula(row,col,sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  row,col = normalize(row,col)
  @formula[sheet][[row,col]]
end

#formulas(sheet = nil) ⇒ Object

returns each formula in the selected sheet as an array of elements

row, col, formula


294
295
296
297
298
299
300
301
302
303
304
# File 'lib/roo/open_office.rb', line 294

def formulas(sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  if @formula[sheet]
    @formula[sheet].each.collect do |elem|
      [elem[0][0], elem[0][1], elem[1]]
    end
  else
    []
  end
end

#label(labelname) ⇒ Object

returns the row,col values of the labelled cell (nil,nil) if label is not defined



357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/roo/open_office.rb', line 357

def label(labelname)
  read_labels
  unless @label.size > 0
    return nil,nil,nil
  end
  if @label.has_key? labelname
    return @label[labelname][1].to_i,
      ::Roo::Utils.letter_to_number(@label[labelname][2]),
      @label[labelname][0]
  else
    return nil,nil,nil
  end
end

#labels(sheet = nil) ⇒ Object

Returns an array which all labels. Each element is an array with

labelname, [row,col,sheetname]


373
374
375
376
377
378
379
380
381
382
# File 'lib/roo/open_office.rb', line 373

def labels(sheet=nil)
  read_labels
  @label.map do |label|
    [ label[0], # name
      [ label[1][1].to_i, # row
        ::Roo::Utils.letter_to_number(label[1][2]), # column
        label[1][0], # sheet
      ] ]
  end
end

#officeversionObject

version of the Roo::OpenOffice document at 2007 this is always “1.0”



342
343
344
345
# File 'lib/roo/open_office.rb', line 342

def officeversion
  oo_version
  @officeversion
end

#perform_decryption(encryption_data, password, content_entry, roo_content_xml_path) ⇒ Object

Process the ODS encryption manifest and perform the decryption



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/roo/open_office.rb', line 91

def perform_decryption(
  encryption_data,
  password,
  content_entry,
  roo_content_xml_path
)
  # Extract various expected attributes from the manifest that
  # describe the encryption
  
  algorithm_node = encryption_data.xpath("manifest:algorithm").first
  key_derivation_node =
      encryption_data.xpath("manifest:key-derivation").first
  start_key_generation_node =
      encryption_data.xpath("manifest:start-key-generation").first
  
  # If we have all the expected elements, then we can perform
  # the decryption.
  
  if !algorithm_node.nil? && !key_derivation_node.nil? &&
      !start_key_generation_node.nil?
    
    # The algorithm is a URI describing the algorithm used
    algorithm = algorithm_node['manifest:algorithm-name']
    
    # The initialization vector is base-64 encoded
    iv = Base64.decode64(
      algorithm_node['manifest:initialisation-vector']
    )
    key_derivation_name =
        key_derivation_node['manifest:key-derivation-name']
    key_size = key_derivation_node['manifest:key-size'].to_i
    iteration_count =
        key_derivation_node['manifest:iteration-count'].to_i
    salt = Base64.decode64(key_derivation_node['manifest:salt'])
    
    # The key is hashed with an algorithm represented by this URI
    key_generation_name =
        start_key_generation_node[
          'manifest:start-key-generation-name'
        ]
    key_generation_size =
        start_key_generation_node['manifest:key-size'].to_i
    
    hashed_password = password
    key = nil
    
    if key_generation_name.eql?(
      "http://www.w3.org/2000/09/xmldsig#sha256"
    )
      hashed_password = Digest::SHA256.digest(password)
    else
      raise ArgumentError, 'Unknown key generation algorithm ' +
          key_generation_name
    end
    
    cipher = find_cipher(
      algorithm,
      key_derivation_name,
      hashed_password,
      salt,
      iteration_count,
      iv
    )
    
    begin
      decrypted = decrypt(content_entry, cipher)
      
      # Finally, inflate the decrypted stream and overwrite
      # content.xml
      IO.binwrite(
        roo_content_xml_path,
        Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
      )
    rescue StandardError => error
      raise ArgumentError,
          'Invalid password or other data error: ' + error.to_s
    end
  else
    raise ArgumentError,
        'manifest.xml missing encryption-data elements'
  end
end

#sheetsObject



334
335
336
337
338
# File 'lib/roo/open_office.rb', line 334

def sheets
  doc.xpath("//*[local-name()='table']").map do |sheet|
    sheet.attributes["name"].value
  end
end

#to_s(sheet = nil) ⇒ Object

shows the internal representation of all cells mainly for debugging purposes



349
350
351
352
353
# File 'lib/roo/open_office.rb', line 349

def to_s(sheet=nil)
  sheet ||= default_sheet
  read_cells(sheet)
  @cell[sheet].inspect
end