Module: Workbook::Readers::XlsxReader

Includes:
XlsShared
Included in:
Book
Defined in:
lib/workbook/readers/xlsx_reader.rb

Constant Summary

Constants included from XlsShared

Workbook::Readers::XlsShared::XLS_COLORS

Instance Method Summary collapse

Methods included from XlsShared

#html_color_to_xls_color, #ms_formatting_to_strftime, #num_fmt_id_to_ms_formatting, #strftime_to_ms_format, #xls_number_to_date, #xls_number_to_time

Instance Method Details

#extract_xlsx_backgrounds(styles) ⇒ Object



116
117
118
119
120
121
122
123
124
# File 'lib/workbook/readers/xlsx_reader.rb', line 116

def extract_xlsx_backgrounds styles
  styles.css("fills fill").collect do |fill|
    hash = {}
    patternFill = fill.css("patternFill").first
    # TODO: convert to html-hex
    hash[:background] = patternFill.attr("patternType") if patternFill
    hash
  end
end

#extract_xlsx_number_formats(styles) ⇒ Object

Extracts number formats from styles.xml



106
107
108
109
110
111
112
113
114
# File 'lib/workbook/readers/xlsx_reader.rb', line 106

def extract_xlsx_number_formats styles
  hash = {}
  styles.css("numFmts numFmt").each do |fmt|
    format_id = fmt.attr("numFmtId").to_i
    parsed_format_string = ms_formatting_to_strftime(fmt.attr("formatCode"))
    hash[format_id] = parsed_format_string
  end
  hash
end

#load_xlsm(file_obj, options = {}) ⇒ Object



12
13
14
# File 'lib/workbook/readers/xlsx_reader.rb', line 12

def load_xlsm file_obj, options={}
  self.load_xlsx file_obj, options
end

#load_xlsx(file_obj, options = {}) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/workbook/readers/xlsx_reader.rb', line 15

def load_xlsx file_obj, options={}
  file_obj = file_obj.path if file_obj.is_a? File
  sheets = {}
  shared_string_file = ""
  styles = ""
  workbook = ""
  workbook_rels = ""
  Zip::File.open(file_obj) do |zipfile|
    zipfile.entries.each do |file|
      if file.name.match(/^xl\/worksheets\/(.*)\.xml$/)
        sheets[file.name.sub(/^xl\//,'')] = zipfile.read(file.name)
      elsif file.name.match(/xl\/sharedStrings.xml$/)
        shared_string_file = zipfile.read(file.name)
      elsif file.name.match(/xl\/workbook.xml$/)
        workbook = zipfile.read(file.name)
      elsif file.name.match(/xl\/_rels\/workbook.xml.rels$/)
        workbook_rels = zipfile.read(file.name)
      elsif file.name.match(/xl\/styles.xml$/)
        styles = zipfile.read(file.name)
      end
      # content = zipfile.read(file.name) if file.name == "content.xml"
    end
  end

  parse_xlsx_styles(styles)


  relation_file = {}
  Nokogiri::XML(workbook_rels).css("Relationships Relationship").each do |relship|
    relation_file[relship.attr("Id")]=relship.attr("Target")
  end

  @shared_strings = parse_shared_string_file(shared_string_file)

  Nokogiri::XML(workbook).css("sheets sheet").each do |sheet|
    name = sheet.attr("name")
    filename = relation_file[sheet.attr("r:id")]
    state = sheet.attr("state")
    if state != "hidden"
      sheet = sheets[filename]
      self.push parse_xlsx_sheet(sheet)
      self.last.name = name
    end
  end

  @shared_strings = nil
  self.each do |sheet|
    sheet.each do |table|
      table.trim!
    end
  end
end

#pad_xlsx_row(row) ⇒ Object



163
164
165
166
167
168
# File 'lib/workbook/readers/xlsx_reader.rb', line 163

def pad_xlsx_row(row)
  row.each_with_index do |cell, index|
    row[index]=Workbook::Cell.new(nil) if cell.nil? and !cell.is_a?(Workbook::Cell)
  end
  row
end

#parse_shared_string_file(file) ⇒ Object



126
127
128
# File 'lib/workbook/readers/xlsx_reader.rb', line 126

def parse_shared_string_file file
  Nokogiri::XML(file).css("sst si").collect{|a| a.text}
end

#parse_xlsxObject



206
207
# File 'lib/workbook/readers/xlsx_reader.rb', line 206

def parse_xlsx
end

#parse_xlsx_cell(cell) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/workbook/readers/xlsx_reader.rb', line 170

def parse_xlsx_cell cell
  # style_id = cell.attr('s')
  # p cell
  type = cell.attr('t')
  formatIndex = cell.attr('s').to_i
  position = cell.attr('r')
  formula = cell.css('f').text()
  value = cell.text
  fmt = template.formats[formatIndex]

  # puts type
  if type == "n" or type == nil
    if fmt.derived_type == :date
      value = xls_number_to_date(value)
    elsif fmt.derived_type == :time
      value = xls_number_to_time(value)
    elsif formula == "TRUE()"
      value = true
    elsif formula == "FALSE()"
      value = false
    elsif type == "n"
      value = value.match(/\./) ? value.to_f : value.to_i
    end
  elsif type == "b"
    if value.to_s == "0"
      value = false
    elsif value.to_s == "1"
      value = true
    end
  elsif type == "s"
    value = @shared_strings[value.to_i]
  end
  cell = Workbook::Cell.new(value, format: fmt)
  cell.formula = formula
  {cell: cell, position: position}
end

#parse_xlsx_column(column) ⇒ Object



146
147
148
149
150
# File 'lib/workbook/readers/xlsx_reader.rb', line 146

def parse_xlsx_column column
  col = Workbook::Column.new()
  col.width = column.attr("width").to_f
  col
end

#parse_xlsx_fonts(styles) ⇒ Object

Extracts fonts descriptors from styles.xml



96
97
98
99
100
101
102
103
# File 'lib/workbook/readers/xlsx_reader.rb', line 96

def parse_xlsx_fonts styles
  styles.css("fonts font").collect do |font|
    hash = {}
    hash[:font_family] = font.css("name").first.attr("val") if font.css("name")
    hash[:font_size] = font.css("sz").first.attr("val").to_i if font.css("name")
    hash
  end
end

#parse_xlsx_row(row) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
# File 'lib/workbook/readers/xlsx_reader.rb', line 151

def parse_xlsx_row row
  cells_with_pos = row.css('c').collect{|a| parse_xlsx_cell(a)}
  row = Workbook::Row.new
  cells_with_pos.each do |cell_with_pos|
    position = cell_with_pos[:position]
    col = position.match(/^[A-Z]*/).to_s
    row[col] = cell_with_pos[:cell]
  end
  row = pad_xlsx_row(row)
  row
end

#parse_xlsx_sheet(sheet_xml) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/workbook/readers/xlsx_reader.rb', line 129

def parse_xlsx_sheet sheet_xml
  sheet = Workbook::Sheet.new
  table = sheet.table

  noko_xml = Nokogiri::XML(sheet_xml)

  rows = noko_xml.css('sheetData row').collect{|row| parse_xlsx_row(row)}
  rows.each do |row|
    table << row
  end

  columns = noko_xml.css('cols col').collect{|col| parse_xlsx_column(col) }
  table.columns = columns

  sheet
end

#parse_xlsx_styles(styles) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/workbook/readers/xlsx_reader.rb', line 68

def parse_xlsx_styles(styles)
  styles = Nokogiri::XML(styles)

  fonts = parse_xlsx_fonts styles
  backgrounds = extract_xlsx_backgrounds styles
  customNumberFormats = extract_xlsx_number_formats styles


  styles.css("cellXfs xf").each do |cellformat|
    hash = {}
    # <xf numFmtId="0" fontId="1" fillId="2" borderId="0" xfId="0" applyFont="1" applyFill="1" applyAlignment="1">
    background_hash = backgrounds[cellformat.attr("applyFill").to_i]
    hash.merge!(background_hash) if background_hash

    font_hash = fonts[cellformat.attr("applyFill").to_i]
    hash.merge!(font_hash) if font_hash

    id = (cellformat.attr("numFmtId")).to_i
    if id >= 164
      hash[:numberformat] = customNumberFormats[id]
    else
      hash[:numberformat] = ms_formatting_to_strftime(id)
    end
    self.template.add_format Workbook::Format.new(hash)
  end
end