Class: Ms::Sequest::Params

Inherits:
Object
  • Object
show all
Defined in:
lib/ms/sequest/params.rb

Overview

1) provides a reader and simple parameter lookup for SEQUEST params files supporting Bioworks 3.1-3.3.1.

params = Ms::Sequest::Params.new("sequest.params") # filename by default
params = Ms::Sequest::Params.new.parse_io(some_io_object)

params.some_parameter  # => any parameter defined has a method
params.nonexistent_parameter # => nil

Provides consistent behavior between different versions important info:

# some basic methods shared by all versions:
params.version              # => '3.1' | '3.2' | '3.3'
params.enzyme               # => enzyme name with no parentheses
params.min_number_termini 
params.database             # => first_database_name 
params.enzyme_specificity   # => [offset, cleave_at, expect_if_after]
params.precursor_mass_type  # => "average" | "monoisotopic"
params.fragment_mass_type   # => "average" | "monoisotopic"

# some backwards/forwards compatibility methods:
params.max_num_internal_cleavages  # == max_num_internal_cleavage_sites
params.fragment_ion_tol     # => fragment_ion_tolerance

Constant Summary collapse

Bioworks31_Enzyme_Info_Array =
[
  ['No_Enzyme', 0, '-', '-'],   # 0
  ['Trypsin', 1, 'KR', '-'],  # 1
  ['Trypsin(KRLNH)', 1, 'KRLNH', '-'],  # 2
  ['Chymotrypsin', 1, 'FWYL', '-'],  # 3
  ['Chymotrypsin(FWY)', 1, 'FWY', 'P'],  # 4
  ['Clostripain', 1, 'R', '-'],  # 5
  ['Cyanogen_Bromide', 1, 'M', '-'],  # 6
  ['IodosoBenzoate', 1, 'W', '-'],  # 7
  ['Proline_Endopept', 1, 'P', '-'],  # 8
  ['Staph_Protease', 1, 'E', '-'],  # 9
  ['Trypsin_K', 1, 'K', 'P'],  # 10
  ['Trypsin_R', 1, 'R', 'P'],  # 11
  ['GluC', 1, 'ED', '-'],  # 12
  ['LysC', 1, 'K', '-'],  # 13
  ['AspN', 0, 'D', '-'],  # 14
  ['Elastase', 1, 'ALIV', 'P'],  # 15
  ['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'],  # 16
]
@@param_re =

current attributes supported are: bioworks 3.2:

/ = ?/o
@@param_two_split =
';'
@@sequest_line =
/\[SEQUEST\]/o

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file = nil) ⇒ Params

all keys and values stored as strings! will accept a sequest.params file or .srf file


67
68
69
70
71
# File 'lib/ms/sequest/params.rb', line 67

def initialize(file=nil)
  if file
    parse_file(file)
  end
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args) ⇒ Object


209
210
211
212
213
214
215
# File 'lib/ms/sequest/params.rb', line 209

def method_missing(name, *args)
  string = name.to_s
  if @opts.key?(string)    ; return @opts[string]
  elsif @mods.key?(string) ; return @mods[string]
  else                     ; return nil
  end
end

Instance Attribute Details

#modsObject

the static weights added to amino acids


63
64
65
# File 'lib/ms/sequest/params.rb', line 63

def mods
  @mods
end

#optsObject

the general options


61
62
63
# File 'lib/ms/sequest/params.rb', line 61

def opts
  @opts
end

Instance Method Details

#_sys_ind_basename(file) ⇒ Object

Returns a system independent basename Splits on “" or ”/“


222
223
224
# File 'lib/ms/sequest/params.rb', line 222

def _sys_ind_basename(file)
  return file.split(/[\\\/]/)[-1]
end

#databaseObject


233
234
235
# File 'lib/ms/sequest/params.rb', line 233

def database
  @opts["first_database_name"]
end

#database_path=(newpath) ⇒ Object

changes the path of the database


227
228
229
230
231
# File 'lib/ms/sequest/params.rb', line 227

def database_path=(newpath)
  db = @opts["first_database_name"]
  newpath = File.join(newpath, _sys_ind_basename(db))
  @opts["first_database_name"] = newpath
end

#enzymeObject

returns the enzyme name (but no parentheses connected with the name). this will likely be capitalized.


294
295
296
297
298
299
300
301
302
303
# File 'lib/ms/sequest/params.rb', line 294

def enzyme
  v = self.version
  basic_name = 
    if v == '3.1'
      Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
    elsif v >= '3.2'
      @opts["enzyme_info"]
    end
  basic_name.split('(')[0]
end

#enzyme_specificityObject

returns( offset, cleave_at, except_if_after ) offset is an Integer specifying how far after an amino acid to cut cleave_at is a string of all amino acids that should be cut at except_if_after for not cutting after those normal tryptic behavior would be: [1, 'KR', 'P'] NOTE: a '-' in a params file is returned as an '' (empty string) AspN is [0,'D','']


149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/ms/sequest/params.rb', line 149

def enzyme_specificity
  enzyme_ar = 
    if version == '3.1'
      Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
    elsif version >= '3.2'
      arr = enzyme_info.split(/\s+/)[2,3]
      arr[0] = arr[0].to_i
      arr
    else
      raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
    end
  enzyme_ar.map! do |str|
    if str == '-' ; ''
    else ; str
    end
  end
  enzyme_ar
end

#fragment_ion_tolObject


322
323
324
# File 'lib/ms/sequest/params.rb', line 322

def fragment_ion_tol
  @opts["fragment_ion_tolerance"]
end

#fragment_mass_typeObject


200
201
202
203
204
205
206
207
# File 'lib/ms/sequest/params.rb', line 200

def fragment_mass_type
  fmtype = 
    case @opts['mass_type_fragment']
    when '0' ; "average"
    when '1' ; "monoisotopic"
    else ; abort "error in mass_type_fragment in sequest!"
    end
end

#grab_params(fh) ⇒ Object

returns hash of params up until add_U_user_amino_acid


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/ms/sequest/params.rb', line 74

def grab_params(fh)
  hash = {}
  in_add_amino_acid_section = false
  add_section_re = /^\s*add_/
    prev_pos = nil
  while line = fh.gets
    if line =~ add_section_re
      in_add_amino_acid_section = true
    end
    if (in_add_amino_acid_section and !(line =~ add_section_re))
      fh.pos = prev_pos
      break
    end
    prev_pos = fh.pos
    if line =~ /\w+/
      one,two = line.split @@param_re
      two,comment = two.split @@param_two_split
      hash[one] = two.rstrip
    end
  end
  hash
end

#mass_index(based_on = :precursor) ⇒ Object

returns the appropriate aminoacid mass lookup table from Ms::Mass::AA based_on may be :precursor or :fragment


239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/ms/sequest/params.rb', line 239

def mass_index(based_on=:precursor)
  reply = case based_on
          when :precursor ; precursor_mass_type
          when :fragment ; fragment_mass_type
          end
  case reply
  when 'average'
    Ms::Mass::AA::AVG
  when 'monoisotopic'
    Ms::Mass::AA::MONO
  end
end

#max_num_differential_AA_per_modObject


326
327
328
# File 'lib/ms/sequest/params.rb', line 326

def max_num_differential_AA_per_mod
  @opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
end

#max_num_internal_cleavagesObject


305
306
307
# File 'lib/ms/sequest/params.rb', line 305

def max_num_internal_cleavages
  @opts["max_num_internal_cleavage_sites"]
end

#min_number_terminiObject

at least in Bioworks 3.2, the First number after the enzyme is the indication of the enzymatic end stringency (required):

1 = Fully enzymatic
2 = Either end
3 = N terminal only
4 = C terminal only

So, to get min_number_termini we map like this:

1 => 2
2 => 1

261
262
263
264
265
266
267
268
269
270
# File 'lib/ms/sequest/params.rb', line 261

def min_number_termini
  if e_info = @opts["enzyme_info"]
    case e_info.split(" ")[1]
    when "1" ; return "2"
    when "2" ; return "1"
    end
  end
  warn "No Enzyme termini info, using min_number_termini = '1'"
  return "1"
end

#parse_file(file) ⇒ Object

parses file and drops the .hdr behind indexed fasta files returns self can read sequest.params file or .srf file handle


135
136
137
138
139
140
# File 'lib/ms/sequest/params.rb', line 135

def parse_file(file)
  File.open(file) do |fh|
    parse_io(fh)
  end
  self
end

#parse_io(fh) ⇒ Object

returns self or nil if no sequest found in the io


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/ms/sequest/params.rb', line 98

def parse_io(fh)
  # seek to the SEQUEST file
  if fh.respond_to?(:set_encoding)
    # this mimics ruby1.8 behavior as we read in the file
    fh.set_encoding('ASCII-8BIT')
  end
  loop do
    line = fh.gets
    return nil if line.nil?  # we return nil if we reach then end of the file without seeing sequest params
    if line =~ @@sequest_line
      # double check that we are in a sequest params file:
      pos = fh.pos
      if fh.gets =~ /^first_database_name/
        fh.pos = pos
        break
      end
    end
  end
  @opts = grab_params(fh)
  @opts["search_engine"] = "SEQUEST"
  # extract out the mods
  @mods = {}
  @opts.each do |k,v|
    if k =~ /^add_/
      @mods[k] = @opts.delete(k)
    end
  end

  ## this gets rid of the .hdr postfix on indexed databases
  @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
  self
end

#peptide_mass_tolObject

my take on peptide_mass_units: (see www.ionsource.com/tutorial/isotopes/slide2.htm) amu = atomic mass units = (mass_real - mass_measured).abs (??abs??) mmu = milli mass units (amu / 1000) ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]


315
316
317
318
319
320
# File 'lib/ms/sequest/params.rb', line 315

def peptide_mass_tol
  if @opts["peptide_mass_units"] != "0"
    puts "WARNING: peptide_mass_tol units need to be adjusted!"
  end
  @opts["peptide_mass_tolerance"]
end

#precursor_mass_typeObject


192
193
194
195
196
197
198
# File 'lib/ms/sequest/params.rb', line 192

def precursor_mass_type
  case @opts['mass_type_parent']
  when '0' ; "average" 
  when '1' ; "monoisotopic"
  else ; abort "error in mass_type_parent in sequest!"
  end
end

#sequenceObject

I'm not sure if this is the right mapping for sequence_search_constraint?


186
187
188
189
190
# File 'lib/ms/sequest/params.rb', line 186

def sequence
  pseq = @opts['partial_sequence'] 
  if !pseq || pseq == "" ; pseq = "0" end
  pseq
end

#static_modsObject

returns a hash by add_<whatever> of any static mods != 0 the values are still as strings


332
333
334
335
336
337
338
339
340
# File 'lib/ms/sequest/params.rb', line 332

def static_mods
  hash = {}
  @mods.each do |k,v|
    if v.to_f != 0.0
      hash[k] = v
    end
  end
  hash
end

#versionObject

Returns the version of the sequest.params file Returns String “3.3” if contains “fragment_ion_units” Returns String “3.2” if contains “enyzme_info” Returns String “3.1” if contains “enzyme_number”


172
173
174
175
176
177
# File 'lib/ms/sequest/params.rb', line 172

def version
  if @opts['fragment_ion_units'] ; return '3.3'
  elsif @opts['enzyme_info'] ; return '3.2'
  elsif @opts['enzyme_number'] ; return '3.1'
  end
end