Class: Ms::Msrun::Axml::Mzxml

Inherits:
Object
  • Object
show all
Defined in:
lib/ms/msrun/axml/mzxml.rb

Constant Summary collapse

NetworkOrder =
true

Instance Method Summary collapse

Instance Method Details

#add_scan_nodes(nodes, scans, scn_index, scans_by_num, version, io) ⇒ Object

assumes that node contains scans and checks any scan nodes for children



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/ms/msrun/axml/mzxml.rb', line 96

def add_scan_nodes(nodes, scans, scn_index, scans_by_num, version, io)
  nodes.each do |scan_n|
    scan = create_scan(scan_n, scans_by_num, io)
    #puts "scannum: "
    #p scan[0]
    scans[scn_index] = scan
    scans_by_num[scan[0]] = scan 
    scn_index += 1
    if version > '1.0'
      new_nodes = scan_n.find('child::scan')
      if new_nodes.size > 0
        scn_index = add_scan_nodes(new_nodes, scans, scn_index, scans_by_num, version, io)
      end
    end
  end
  scn_index
end

#create_scan(scan_n, scans_by_num, io = nil) ⇒ Object

takes a scan node and creates a scan object the parent scan is the one directly above it in mslevel



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/ms/msrun/axml/mzxml.rb', line 68

def create_scan(scan_n, scans_by_num, io=nil)
  scan = new_scan_from_node(scan_n)
  prec = nil
  scan_n.each do |node|
    case node.name
    when 'precursorMz'
      # should be able to do this!!!
      #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
      raise RuntimeError, "the msrun object can only handle one precursor!" unless prec.nil?
      prec = Ms::Precursor.new
      prec[1] = node['precursorIntensity'].to_f
      prec[0] = node.content.to_f
      if x = node['precursorScanNum']
        prec[2] = scans_by_num[x.to_i]
      end
    when 'peaks'
      # assumes that parsing was done with a LazyPeaks parser!
      nc = node.text
      data = Ms::Data::LazyIO.new(io, nc.first, nc.last, Ms::Data::LazyIO.unpack_code(node['precision'].to_i, NetworkOrder))
      scan[8] = Ms::Spectrum.new(Ms::Data::Interleaved.new(data))
    end
  end
  scan[7] = prec
  scan
end

#msrun_node(node, version) ⇒ Object



114
115
116
117
118
119
120
121
122
# File 'lib/ms/msrun/axml/mzxml.rb', line 114

def msrun_node(node, version)
  if version >= '2.0' 
    kids = node.children.select {|v| v.name == 'msRun' }
    raise(NotImplementedError, "one msrun per doc right now" ) if kids.size > 1
    kids.first
  else
    node
  end
end

#new_scan_from_node(node) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/ms/msrun/axml/mzxml.rb', line 124

def new_scan_from_node(node)
  scan = Ms::Scan.new  # array class creates one with 9 positions
  scan[0] = node['num'].to_i
  scan[1] = node['msLevel'].to_i
  if x = node['retentionTime']
    scan[2] = x[2...-1].to_f
  end
  if x = node['startMz']
    scan[3] = x.to_f
    scan[4] = node['endMz'].to_f
    scan[5] = node['peaksCount'].to_i
    scan[6] = node['totIonCurrent'].to_f
  end
  scan
end

#parse(msrun_obj, io, version) ⇒ Object

version is a string



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/ms/msrun/axml/mzxml.rb', line 20

def parse(msrun_obj, io, version)
  root = AXML.parse(io, :text_indices => 'peaks', :parser => :xmlparser)
  msrun_n = msrun_node(root, version)

  # The filename
  parent_n = msrun_n.find_first_child('parentFile')
  fn = parent_n['fileName']
  fn.gsub!(/\\/, '/')
  msrun_obj.parent_basename = File.basename(fn)
  dn = File.dirname(fn)
  dn = nil if dn == '.' && !fn.include?('/')
  msrun_obj.parent_location = dn

  ## HEADER
  scan_count = msrun_n['scanCount'].to_i
  msrun_obj.scan_count = scan_count

  scans_by_num = Array.new(scan_count + 1)

  ## SPECTRUM
  parent = nil
  scans = Array.new( scan_count )
  scn_index = 0

  if version >= '3.0'
    warn '[version 3.0 parsing may fail if > 1 peak list per scan]'
    # note that mzXML version 3.0 *can* have more than one peak...
    # I'm not sure how to deal with that since I have one spectrum/scan
  end

  scan_nodes = msrun_n.find_children('scan')
  add_scan_nodes(scan_nodes, scans, scn_index, scans_by_num, version, io)

  ## update the scan's parents
  Ms::Msrun.add_parent_scan(scans)

  # note that startTime and endTime are optional AND in >2.2 are dateTime
  # instead of duration types!, so we will just use scan times...
  # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
  # export.  They give the start and end time in seconds, but they are
  # really minutes.  All the more reason to use the first and last scans!
  msrun_obj.start_time = scans.first.time
  msrun_obj.end_time = scans.last.time
  msrun_obj.scans = scans
end