Class: Mspire::Mzml

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Convenience, Reader
Defined in:
lib/mspire/mzml.rb,
lib/mspire/mzml/cv.rb,
lib/mspire/mzml/run.rb,
lib/mspire/mzml/list.rb,
lib/mspire/mzml/scan.rb,
lib/mspire/mzml/index.rb,
lib/mspire/mzml/plms1.rb,
lib/mspire/mzml/parser.rb,
lib/mspire/mzml/reader.rb,
lib/mspire/mzml/reader.rb,
lib/mspire/mzml/sample.rb,
lib/mspire/mzml/contact.rb,
lib/mspire/mzml/product.rb,
lib/mspire/mzml/io_index.rb,
lib/mspire/mzml/software.rb,
lib/mspire/mzml/spectrum.rb,
lib/mspire/mzml/component.rb,
lib/mspire/mzml/precursor.rb,
lib/mspire/mzml/scan_list.rb,
lib/mspire/mzml/activation.rb,
lib/mspire/mzml/data_array.rb,
lib/mspire/mzml/index_list.rb,
lib/mspire/mzml/scan_window.rb,
lib/mspire/mzml/source_file.rb,
lib/mspire/mzml/chromatogram.rb,
lib/mspire/mzml/file_content.rb,
lib/mspire/mzml/selected_ion.rb,
lib/mspire/mzml/scan_settings.rb,
lib/mspire/mzml/spectrum_list.rb,
lib/mspire/mzml/data_processing.rb,
lib/mspire/mzml/file_description.rb,
lib/mspire/mzml/isolation_window.rb,
lib/mspire/mzml/chromatogram_list.rb,
lib/mspire/mzml/io_indexable_list.rb,
lib/mspire/mzml/processing_method.rb,
lib/mspire/mzml/instrument_configuration.rb,
lib/mspire/mzml/data_array_container_like.rb,
lib/mspire/mzml/referenceable_param_group.rb

Overview

Reading an mzml file:

Mspire::Mzml.open("somefile.mzML") do |mzml|
  mzml.each do |spectrum|
    scan = spectrum.scan
    spectrum.mzs                  # array of m/zs
    spectrum.intensities          # array of intensities
    spectrum.peaks do |mz,intensity|
      puts "mz: #{mz} intensity: #{intensity}" 
    end

    spectrum.params  # list all the params associated with an object

    # true if key exists and no value, the value if present, or false
    if spectrum.fetch_by_acc('MS:1000128')
      puts "this is a profile spectrum!"
    end

    if spectrum.ms_level == 2
      low_mz = spectrum.scan_list.first.scan_windows.first.to_i
      puts "begin scan at #{low_mz} m/z"
    end
  end

  mzml.each_chromatogram do |chrm|
    chrm.times
    chrm.intensities
  end
end

Note that the mzml object supports random spectrum access (even if the mzml was not indexed):

mzml[22]  # retrieve spectrum at index 22

Writing an mzml file from scratch:

spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec|
  # profile and ms_level 1
  spec.describe_many!(['MS:1000128', ['MS:1000511', 1]])
  spec.data_arrays = [
    Mspire::Mzml::DataArray[1,2,3].describe!('MS:1000514'),  
    Mspire::Mzml::DataArray[4,5,6].describe!('MS:1000515')   
  ]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 42 seconds
      scan.describe! 'MS:1000016', 40.0, 'UO:0000010'
    end
    sl << scan
  end
end

spec2 = Mspire::Mzml::Spectrum.new('scan=2') do |spec| 
  # centroid,  ms_level 2, MSn spectrum, 
  spec.describe_many!(['MS:1000127', ['MS:1000511', 2], "MS:1000580"])
  spec.data_arrays = [
    Mspire::Mzml::DataArray[1,2,3.5].describe!('MS:1000514'),  
    Mspire::Mzml::DataArray[5,6,5].describe!('MS:1000515')   
  ]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 42 seconds
      scan.describe! 'MS:1000016', 45.0, 'UO:0000010'
    end
    sl << scan
  end
  precursor = Mspire::Mzml::Precursor.new( spec1.id )
  si = Mspire::Mzml::SelectedIon.new
  # the selected ion m/z:
  si.describe! "MS:1000744", 2.0
  # the selected ion charge state
  si.describe! "MS:1000041", 2
  # the selected ion intensity
  si.describe! "MS:1000042", 5
  precursor.selected_ions = [si]
  spec.precursors = [precursor]
end

mzml = Mspire::Mzml.new do |mzml|
  mzml.id = 'ms1_and_ms2'
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
  mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
    fd.file_content = Mspire::Mzml::FileContent.new
    fd.source_files << Mspire::Mzml::SourceFile.new
  end
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
  mzml.instrument_configurations << default_instrument_config
  software = Mspire::Mzml::Software.new
  mzml.software_list << software
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
  mzml.data_processing_list << default_data_processing
  mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
    spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, [spec1, spec2])
    run.spectrum_list = spectrum_list
  end
end

mzml.write("writtenxml.mzML")

Defined Under Namespace

Modules: Component, Convenience, DataArrayContainerLike, Default, List, Parser, Reader Classes: Activation, Analyzer, CV, Chromatogram, ChromatogramList, Contact, DataArray, DataProcessing, Detector, FileContent, FileDescription, IOIndex, IOIndexableList, Index, IndexList, InstrumentConfiguration, IsolationWindow, Precursor, ProcessingMethod, Product, ReferenceableParamGroup, Run, Sample, Scan, ScanList, ScanNumbersNotFound, ScanNumbersNotUnique, ScanSettings, ScanWindow, SelectedIon, Software, Source, SourceFile, Spectrum, SpectrumList

Instance Attribute Summary collapse

Attributes included from Reader

#link

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Reader

#get_default_data_processing_ids, #get_header_string, #read_header!, #set_from_xml_io!

Methods included from Convenience

#chromatogram, #each_chromatogram, #each_spectrum, #length, #num_chromatograms, #spectrum, #spectrum_from_scan_num

Methods included from Enumerable

#index_by, #uniq_by

Constructor Details

#initialize(arg = nil, &block) ⇒ Mzml

arg must be an IO object for automatic index and header parsing to occur. If arg is a hash, then attributes are set. In addition (or alternatively) a block called that yields self to setup the object.

io must respond_to?(:size), giving the size of the io object in bytes which allows seeking. get_index_list is called to get or create the index list.



202
203
204
205
206
207
208
209
210
211
212
# File 'lib/mspire/mzml.rb', line 202

def initialize(arg=nil, &block)
  %w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) }

  case arg
  when IO
    set_from_xml_io!(arg)
  when Hash
    arg.each {|k,v| self.send("#{k}=", v) }
  end
  block.call(self) if block
end

Instance Attribute Details

#accessionObject

(optional) e.g. a PRIDE accession number



151
152
153
# File 'lib/mspire/mzml.rb', line 151

def accession
  @accession
end

#cvsObject

(required) an array of Mspire::Mzml::CV objects



158
159
160
# File 'lib/mspire/mzml.rb', line 158

def cvs
  @cvs
end

#data_processing_listObject

(required) an array of Mspire::Mzml::DataProcessing objects



179
180
181
# File 'lib/mspire/mzml.rb', line 179

def data_processing_list
  @data_processing_list
end

#encodingObject

xml file encoding



192
193
194
# File 'lib/mspire/mzml.rb', line 192

def encoding
  @encoding
end

#file_descriptionObject

(required) an Mspire::Mzml::FileDescription



161
162
163
# File 'lib/mspire/mzml.rb', line 161

def file_description
  @file_description
end

#idObject

(optional) an id for accessing from external files



145
146
147
# File 'lib/mspire/mzml.rb', line 145

def id
  @id
end

#index_listObject

Mspire::Mzml::IndexList object associated with the file (only expected when reading mzml files at the moment)



189
190
191
# File 'lib/mspire/mzml.rb', line 189

def index_list
  @index_list
end

#instrument_configurationsObject

(required) an array of Mspire::Mzml::InstrumentConfiguration objects



176
177
178
# File 'lib/mspire/mzml.rb', line 176

def instrument_configurations
  @instrument_configurations
end

#ioObject

the io object of the mzml file



185
186
187
# File 'lib/mspire/mzml.rb', line 185

def io
  @io
end

#referenceable_param_groupsObject

(optional) an array of CV::ReferenceableParamGroup objects



164
165
166
# File 'lib/mspire/mzml.rb', line 164

def referenceable_param_groups
  @referenceable_param_groups
end

#runObject

(required) an Mspire::Mzml::Run object



182
183
184
# File 'lib/mspire/mzml.rb', line 182

def run
  @run
end

#samplesObject

(optional) an array of Mspire::Mzml::Sample objects



167
168
169
# File 'lib/mspire/mzml.rb', line 167

def samples
  @samples
end

#scan_settings_listObject

(optional) an array of Mspire::Mzml::ScanSettings objects



173
174
175
# File 'lib/mspire/mzml.rb', line 173

def scan_settings_list
  @scan_settings_list
end

#software_listObject

(required) an array of Mspire::Mzml::Software objects



170
171
172
# File 'lib/mspire/mzml.rb', line 170

def software_list
  @software_list
end

#versionObject

(required) the Mzml document version



148
149
150
# File 'lib/mspire/mzml.rb', line 148

def version
  @version
end

Class Method Details

.foreach(filename, &block) ⇒ Object



121
122
123
124
125
126
# File 'lib/mspire/mzml.rb', line 121

def foreach(filename, &block)
  return to_enum(__method__, filename) unless block
  open(filename) do |mzml|
    mzml.each(&block)
  end
end

.open(filename, &block) ⇒ Object

read-only right now



115
116
117
118
119
# File 'lib/mspire/mzml.rb', line 115

def open(filename, &block)
  File.open(filename) do |io|
    block.call(self.new(io))
  end
end

Instance Method Details

#to_plms1(use_scan_nums = true) ⇒ Object

will use scan numbers if use_scan_nums is true (typically start with one), otherwise it will use index numbers (starts with zero)



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/mspire/mzml/plms1.rb', line 8

def to_plms1(use_scan_nums=true)
  spectrum_index = self.index_list[:spectrum]

  scan_nums = spectrum_index.create_scan_to_index.keys if use_scan_nums

  nums = [] ; rts = [] ; spectra = []

  self.each_with_index do |spec, index|
    next unless spec.ms_level == 1
    nums << (use_scan_nums ? scan_nums[index] : index)
    spectra << spec
    rts << spec.retention_time
  end
  Mspire::Plms1.new(nums, rts, spectra)
end

#to_xml(filename = nil) ⇒ Object Also known as: write

Because mzml files are often very large, we try to avoid storing the entire object tree in memory before writing.

takes a filename and uses builder to write to it if no filename is given, returns a string



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/mspire/mzml.rb', line 267

def to_xml(filename=nil)
  # TODO: support indexed mzml files
  io = filename ? File.open(filename, 'w') : StringIO.new
  xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
  xml.instruct!

  mzml_atts = Default::NAMESPACE.dup
  mzml_atts[:version] = @version || Default::VERSION
  mzml_atts[:accession] = @accession if @accession
  mzml_atts[:id] = @id if @id

  xml.mzML(mzml_atts) do |mzml_n|
    # the 'if' statements capture whether or not the list is required or not
    raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 
    Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
    @file_description.to_xml(mzml_n)
    if @referenceable_param_groups
      Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
    end
    if @samples && @samples.size > 0
      Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
    end
    Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
    if @scan_settings_list && @scan_settings_list.size > 0
      Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
    end
    icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
    Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
    @run.to_xml(mzml_n)
  end
  
  if filename
    io.close 
    self
  else
    io.string
  end
end