Class: Mspire::Mzml

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/mspire/mzml.rb,
lib/mspire/mzml/cv.rb,
lib/mspire/mzml/run.rb,
lib/mspire/mzml/list.rb,
lib/mspire/mzml/scan.rb,
lib/mspire/mzml/plms1.rb,
lib/mspire/mzml/sample.rb,
lib/mspire/mzml/contact.rb,
lib/mspire/mzml/product.rb,
lib/mspire/mzml/software.rb,
lib/mspire/mzml/spectrum.rb,
lib/mspire/mzml/component.rb,
lib/mspire/mzml/precursor.rb,
lib/mspire/mzml/scan_list.rb,
lib/mspire/mzml/activation.rb,
lib/mspire/mzml/data_array.rb,
lib/mspire/mzml/index_list.rb,
lib/mspire/mzml/scan_window.rb,
lib/mspire/mzml/source_file.rb,
lib/mspire/mzml/chromatogram.rb,
lib/mspire/mzml/file_content.rb,
lib/mspire/mzml/selected_ion.rb,
lib/mspire/mzml/scan_settings.rb,
lib/mspire/mzml/spectrum_list.rb,
lib/mspire/mzml/data_processing.rb,
lib/mspire/mzml/file_description.rb,
lib/mspire/mzml/isolation_window.rb,
lib/mspire/mzml/chromatogram_list.rb,
lib/mspire/mzml/processing_method.rb,
lib/mspire/mzml/instrument_configuration.rb,
lib/mspire/mzml/data_array_container_like.rb,
lib/mspire/mzml/referenceable_param_group.rb

Overview

Reading an mzxml file:

Mspire::Mzml.open("somefile.mzML") do |mzml|
  mzml.each do |spectrum|
    scan = spectrum.scan
    spectrum.mzs                  # array of m/zs
    spectrum.intensities          # array of intensities
    spectrum.points.each do |mz,intensity|
      puts "mz: #{mz} intensity: #{intensity}" 
    end
  end
end

Note that the mzml object supports random spectrum access (even if the mzml was not indexed):

mzml[22]  # retrieve spectrum at index 22

Writing an mzml file from scratch:

spec1 = Mspire::Mzml::Spectrum.new('scan=1', params: ['MS:1000127', ['MS:1000511', 1]]) do |spec|
  spec.data_arrays = [[1,2,3], [4,5,6]]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 40 seconds
      scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
    end
    sl << scan
  end
end

mzml = Mspire::Mzml.new do |mzml|
  mzml.id = 'the_little_example'
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
  mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
    fd.file_content = Mspire::Mzml::FileContent.new
    fd.source_files << Mspire::Mzml::SourceFile.new
  end
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC",[], params: ['MS:1000031'])
  mzml.instrument_configurations << default_instrument_config
  software = Mspire::Mzml::Software.new
  mzml.software_list << software
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
  mzml.data_processing_list << default_data_processing
  mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
    spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing)
    spectrum_list.push(spec1)
    run.spectrum_list = spectrum_list
  end
end

Defined Under Namespace

Modules: Component, DataArrayContainerLike, Default, List, Parser Classes: Activation, CV, Chromatogram, ChromatogramList, Contact, DataArray, DataProcessing, FileContent, FileDescription, Index, IndexList, InstrumentConfiguration, IsolationWindow, Precursor, ProcessingMethod, Product, ReferenceableParamGroup, Run, Sample, Scan, ScanList, ScanNumbersNotFound, ScanNumbersNotUnique, ScanSettings, ScanWindow, SelectedIon, Software, SourceFile, Spectrum, SpectrumList

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg = nil, &block) ⇒ Mzml

arg must be an IO object for automatic index and header parsing to occur. If arg is a hash, then attributes are set. In addition (or alternatively) a block called that yields self to setup the object.

io must respond_to?(:size), giving the size of the io object in bytes which allows seeking. get_index_list is called to get or create the index list.



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/mspire/mzml.rb', line 147

def initialize(arg=nil, &block)
  %w(cvs software_list instrument_configurations data_processing_list).each {|guy| self.send( guy + '=', [] ) }

  case arg
  when IO
    @io = arg
    @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
    @index_list = get_index_list
    read_header!
  when Hash
    arg.each {|k,v| self.send("#{k}=", v) }
  end
  if block
    block.call(self)
  end
end

Instance Attribute Details

#accessionObject

(optional) e.g. a PRIDE accession number



98
99
100
# File 'lib/mspire/mzml.rb', line 98

def accession
  @accession
end

#cvsObject

(required) an array of Mspire::Mzml::CV objects



105
106
107
# File 'lib/mspire/mzml.rb', line 105

def cvs
  @cvs
end

#data_processing_listObject

(required) an array of Mspire::Mzml::DataProcessing objects



126
127
128
# File 'lib/mspire/mzml.rb', line 126

def data_processing_list
  @data_processing_list
end

#encodingObject

Returns the value of attribute encoding.



138
139
140
# File 'lib/mspire/mzml.rb', line 138

def encoding
  @encoding
end

#file_descriptionObject

(required) an Mspire::Mzml::FileDescription



108
109
110
# File 'lib/mspire/mzml.rb', line 108

def file_description
  @file_description
end

#idObject

(optional) an id for accessing from external files



92
93
94
# File 'lib/mspire/mzml.rb', line 92

def id
  @id
end

#index_listObject

Returns the value of attribute index_list.



137
138
139
# File 'lib/mspire/mzml.rb', line 137

def index_list
  @index_list
end

#instrument_configurationsObject

(required) an array of Mspire::Mzml::InstrumentConfiguration objects



123
124
125
# File 'lib/mspire/mzml.rb', line 123

def instrument_configurations
  @instrument_configurations
end

#ioObject

Returns the value of attribute io.



136
137
138
# File 'lib/mspire/mzml.rb', line 136

def io
  @io
end

#referenceable_param_groupsObject

(optional) an array of CV::ReferenceableParamGroup objects



111
112
113
# File 'lib/mspire/mzml.rb', line 111

def referenceable_param_groups
  @referenceable_param_groups
end

#runObject

(required) an Mspire::Mzml::Run object



129
130
131
# File 'lib/mspire/mzml.rb', line 129

def run
  @run
end

#samplesObject

(optional) an array of Mspire::Mzml::Sample objects



114
115
116
# File 'lib/mspire/mzml.rb', line 114

def samples
  @samples
end

#scan_settings_listObject

(optional) an array of Mspire::Mzml::ScanSettings objects



120
121
122
# File 'lib/mspire/mzml.rb', line 120

def scan_settings_list
  @scan_settings_list
end

#software_listObject

(required) an array of Mspire::Mzml::Software objects



117
118
119
# File 'lib/mspire/mzml.rb', line 117

def software_list
  @software_list
end

#versionObject

(required) the Mzml document version



95
96
97
# File 'lib/mspire/mzml.rb', line 95

def version
  @version
end

Class Method Details

.foreach(filename, &block) ⇒ Object



216
217
218
219
220
221
# File 'lib/mspire/mzml.rb', line 216

def foreach(filename, &block)
  block or return enum_for(__method__, filename)
  open(filename) do |mzml|
    mzml.each(&block)
  end
end

.open(filename, &block) ⇒ Object

read-only right now



210
211
212
213
214
# File 'lib/mspire/mzml.rb', line 210

def open(filename, &block)
  File.open(filename) do |io|
    block.call(self.new(io))
  end
end

Instance Method Details

#create_index_listMspire::Mzml::IndexList

Reads through and captures start bytes



321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/mspire/mzml.rb', line 321

def create_index_list
  indices_hash = @io.bookmark(true) do |inner_io|   # sets to beginning of file
    indices = {:spectrum => {}, :chromatogram => {}}
    byte_total = 0
    @io.each do |line|
      if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
        indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
      end
      byte_total += line.bytesize
    end
    indices
  end

  indices = indices_hash.map do |sym, hash|
    indices = Index.new ; ids = []
    hash.each {|id, startbyte| ids << id ; indices << startbyte }
    indices.ids = ids ; indices.name = sym
    indices
  end
  IndexList.new(indices)
end

#each_spectrum(&block) ⇒ Object Also known as: each



238
239
240
241
242
243
244
245
246
247
# File 'lib/mspire/mzml.rb', line 238

def each_spectrum(&block)
  block or return enum_for(__method__)
  (0...@index_list[:spectrum].size).each do |int|
    block.call(spectrum(int))
  end
  #block_given? or return enum_for(__method__)
  #(0...@index_list[:spectrum].size).each do |int|
  #  yield spectrum(int)
  #end
end

#each_spectrum_node(&block) ⇒ Object

returns the Nokogiri::XML::Node object associated with each spectrum



250
251
252
253
254
# File 'lib/mspire/mzml.rb', line 250

def each_spectrum_node(&block)
  @index_list[:spectrum].each do |start_byte|
    block.call spectrum_node_from_start_byte(start_byte)
  end
end

#get_index_listArray

reads or creates an index list

Returns:

  • (Array)

    an array of indices



345
346
347
# File 'lib/mspire/mzml.rb', line 345

def get_index_list
  read_index_list || create_index_list
end

#get_xml_string(start_byte, name = :spectrum) ⇒ Object

name can be :spectrum or :chromatogram



225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/mspire/mzml.rb', line 225

def get_xml_string(start_byte, name=:spectrum)
  io.seek(start_byte)
  data = []
  regexp = %r{</#{name}>}
  io.each_line do |line|
    data << line 
    #unless (line.index('<binary') && line[-12..-1].include?('</binary>'))
      break if regexp.match(line)
    #end
  end
  data.join
end

#read_header!Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/mspire/mzml.rb', line 164

def read_header!
  @io.rewind
  chunk_size = 2**12
  loc = 0
  string = ''
  while chunk = @io.read(chunk_size)
    string << chunk
    start_looking = ((loc-20) < 0) ? 0 : (loc-20)
    break if string[start_looking..-1] =~ /<(spectrum|chromatogram)/
    loc += chunk_size
  end
  doc = Nokogiri::XML.parse(string, nil, @encoding, Parser::NOBLANKS)
  mzml_n = doc.root
  if mzml_n.name == 'indexedmzML'
    mzml_n = mzml_n.child
  end
  cv_list_n = mzml_n.child
  file_description_n = cv_list_n.next
  self.cvs = cv_list_n.children.map do |cv_n|
    Mspire::Mzml::CV.from_xml(cv_n)
  end
  self.file_description = Mspire::Mzml::FileDescription.from_xml(file_description_n)
  next_n = file_description_n.next
  loop do
    case next_n.name
    when 'referenceableParamGroupList'
      # get a hash ready
    when 'sampleList'
      # set objects
    when 'softwareList'  # required
      # set objects
    when 'instrumentConfigurationList'
      # set objects
    when 'dataProcessingList'
      # set objects
    when 'run'
      # get defaults ready
      break
    end
    next_n = next_n.next
  end
end

#read_index_listMspire::Mzml::IndexList

mzML

Returns:



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/mspire/mzml.rb', line 298

def read_index_list
  if offset=Mspire::Mzml::Index.index_offset(@io)
    @io.seek(offset)
    xml = Nokogiri::XML.parse(@io.read, nil, @encoding, Parser::NOBLANKS)
    index_list = xml.root
    num_indices = index_list['count'].to_i
    array = index_list.children.map do |index_n|
      #index = Index.new(index_n['name'])
      index = Index.new
      index.name = index_n['name'].to_sym
      ids = []
      index_n.children.map do |offset_n| 
        index << offset_n.text.to_i 
        ids << offset_n['idRef']
      end
      index.ids = ids
      index
    end
    IndexList.new(array)
  end
end

#sizeObject

returns the number of spectra



278
279
280
# File 'lib/mspire/mzml.rb', line 278

def size
  @index_list[:spectrum].size
end

#spectrum(arg) ⇒ Mspire::Spectrum Also known as: []

Returns a spectrum object.

Parameters:

  • arg (Object)

    an index number (Integer) or id string (String)

Returns:



271
272
273
274
275
# File 'lib/mspire/mzml.rb', line 271

def spectrum(arg)
  start_byte = index_list[0].start_byte(arg)
  spec_n = spectrum_node_from_start_byte(start_byte)
  Mspire::Mzml::Spectrum.from_xml(spec_n)
end

#spectrum_from_scan_num(scan_num) ⇒ Mspire::Spectrum

Returns a spectrum object, or nil if not found.

Parameters:

  • scan_num (Integer)

    the scan number

Returns:

Raises:



289
290
291
292
293
294
# File 'lib/mspire/mzml.rb', line 289

def spectrum_from_scan_num(scan_num)
  @scan_to_index ||= @index_list[0].create_scan_index
  raise ScanNumbersNotUnique if @scan_to_index == false
  raise ScanNumbersNotFound if @scan_to_index == nil
  spectrum(@scan_to_index[scan_num])
end

#spectrum_node(index) ⇒ Object

returns the nokogiri xml node for the spectrum at that index



259
260
261
# File 'lib/mspire/mzml.rb', line 259

def spectrum_node(index)
  spectrum_node_from_start_byte(@index_list[:spectrum][index])
end

#spectrum_node_from_start_byte(start_byte) ⇒ Object



263
264
265
266
267
# File 'lib/mspire/mzml.rb', line 263

def spectrum_node_from_start_byte(start_byte)
  xml = get_xml_string(start_byte, :spectrum)
  doc = Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS)
  doc.root
end

#to_plms1(use_scan_nums = true) ⇒ Object

will use scan numbers if use_scan_nums is true, otherwise it will use index numbers in place of scan nums



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/mspire/mzml/plms1.rb', line 8

def to_plms1(use_scan_nums=true)
  spectrum_index_list = self.index_list[:spectrum]
  scan_nums = 
    if use_scan_nums 
      spectrum_index_list.create_scan_to_index.keys
    else
      (0...spectrum_index_list.size).to_a
    end
  retention_times = self.enum_for(:each_spectrum_node).map do |xml_node|
    rt_xml_node=xml_node.xpath("scanList/scan/cvParam[@accession='MS:1000016']")[0]
    raise 'no retention time xml node' unless rt_xml_node
    retention_time = rt_xml_node['value'].to_f
    case rt_xml_node['unitName']
    when 'minute'
      retention_time * 60
    when 'second'
      retention_time
    else
      raise 'retention time must be in minutes or seconds (or add some code to handle)'
    end
  end
  # plms1 only requires that the obect respond to :each, giving a spectrum
  # object, so an Mzml object will work.
  Mspire::Plms1.new(scan_nums, retention_times, self)
end

#to_xml(filename = nil) ⇒ Object

Because mzml files are often very large, we try to avoid storing the entire object tree in memory before writing.

takes a filename and uses builder to write to it if no filename is given, returns a string



354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/mspire/mzml.rb', line 354

def to_xml(filename=nil)
  # TODO: support indexed mzml files
  io = filename ? File.open(filename, 'w') : StringIO.new
  xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
  xml.instruct!

  mzml_atts = Default::NAMESPACE.dup
  mzml_atts[:version] = @version || Default::VERSION
  mzml_atts[:accession] = @accession if @accession
  mzml_atts[:id] = @id if @id

  xml.mzML(mzml_atts) do |mzml_n|
    # the 'if' statements capture whether or not the list is required or not
    raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 
    Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
    @file_description.to_xml(mzml_n)
    if @referenceable_param_groups
      Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
    end
    if @samples
      Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
    end
    Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
    if @scan_settings_list && @scan_settings_list.size > 0
      Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
    end
    icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
    Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
    @run.to_xml(mzml_n)
  end
  
  if filename
    io.close 
    self
  else
    io.string
  end
end