Class: MS::Sequest::Params

Inherits:
Object
  • Object
show all
Defined in:
lib/ms/sequest/params.rb,
lib/ms/sequest/pepxml/params.rb

Overview

1) provides a reader and simple parameter lookup for SEQUEST params files supporting Bioworks 3.1-3.3.1.

params = MS::Sequest::Params.new("sequest.params") # filename by default
params = MS::Sequest::Params.new.parse_io(some_io_object)

params.some_parameter  # => any parameter defined has a method
params.nonexistent_parameter # => nil

Provides consistent behavior between different versions important info:

# some basic methods shared by all versions:
params.version              # => '3.1' | '3.2' | '3.3'
params.enzyme               # => enzyme name with no parentheses
params.min_number_termini 
params.database             # => first_database_name 
params.enzyme_specificity   # => [offset, cleave_at, expect_if_after]
params.precursor_mass_type  # => "average" | "monoisotopic"
params.fragment_mass_type   # => "average" | "monoisotopic"

# some backwards/forwards compatibility methods:
params.max_num_internal_cleavages  # == max_num_internal_cleavage_sites
params.fragment_ion_tol     # => fragment_ion_tolerance

Constant Summary collapse

Bioworks31_Enzyme_Info_Array =
[
  ['No_Enzyme', 0, '-', '-'],   # 0
  ['Trypsin', 1, 'KR', '-'],  # 1
  ['Trypsin(KRLNH)', 1, 'KRLNH', '-'],  # 2
  ['Chymotrypsin', 1, 'FWYL', '-'],  # 3
  ['Chymotrypsin(FWY)', 1, 'FWY', 'P'],  # 4
  ['Clostripain', 1, 'R', '-'],  # 5
  ['Cyanogen_Bromide', 1, 'M', '-'],  # 6
  ['IodosoBenzoate', 1, 'W', '-'],  # 7
  ['Proline_Endopept', 1, 'P', '-'],  # 8
  ['Staph_Protease', 1, 'E', '-'],  # 9
  ['Trypsin_K', 1, 'K', 'P'],  # 10
  ['Trypsin_R', 1, 'R', 'P'],  # 11
  ['GluC', 1, 'ED', '-'],  # 12
  ['LysC', 1, 'K', '-'],  # 13
  ['AspN', 0, 'D', '-'],  # 14
  ['Elastase', 1, 'ALIV', 'P'],  # 15
  ['Elastase/Tryp/Chymo', 1, 'ALIVKRWFY', 'P'],  # 16
]
@@param_re =

current attributes supported are: bioworks 3.2:

/ = ?/o
@@param_two_split =
';'
@@sequest_line =
/\[SEQUEST\]/o

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file = nil) ⇒ Params

all keys and values stored as strings! will accept a sequest.params file or .srf file



67
68
69
70
71
# File 'lib/ms/sequest/params.rb', line 67

def initialize(file=nil)
  if file
    parse_file(file)
  end
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args) ⇒ Object



209
210
211
212
213
214
215
# File 'lib/ms/sequest/params.rb', line 209

def method_missing(name, *args)
  string = name.to_s
  if @opts.key?(string)    ; return @opts[string]
  elsif @mods.key?(string) ; return @mods[string]
  else                     ; return nil
  end
end

Instance Attribute Details

#modsObject

the static weights added to amino acids



63
64
65
# File 'lib/ms/sequest/params.rb', line 63

def mods
  @mods
end

#optsObject

the general options



61
62
63
# File 'lib/ms/sequest/params.rb', line 61

def opts
  @opts
end

Instance Method Details

#_sys_ind_basename(file) ⇒ Object

Returns a system independent basename Splits on “" or ”/“



222
223
224
# File 'lib/ms/sequest/params.rb', line 222

def _sys_ind_basename(file)
  return file.split(/[\\\/]/)[-1]
end

#databaseObject



233
234
235
# File 'lib/ms/sequest/params.rb', line 233

def database
  @opts["first_database_name"]
end

#database_path=(newpath) ⇒ Object

changes the path of the database



227
228
229
230
231
# File 'lib/ms/sequest/params.rb', line 227

def database_path=(newpath)
  db = @opts["first_database_name"]
  newpath = File.join(newpath, _sys_ind_basename(db))
  @opts["first_database_name"] = newpath
end

#enzyme(split_on = /[_\(]/) ⇒ Object

returns the enzyme name (but no parentheses connected with the name). this will likely be capitalized. the regular expression splits the name and returns the first part (or just the name if not found)



276
277
278
279
280
281
282
283
284
285
# File 'lib/ms/sequest/params.rb', line 276

def enzyme(split_on=/[_\(]/)
  basic_name = 
    if self.version == '3.1'
      Bioworks31_Enzyme_Info_Array[ @opts['enzyme_number'].to_i ][0]
    else    # v >= '3.2' applies to all later versions??
      @opts["enzyme_info"]
    end
  name_plus_parenthesis = basic_name.split(' ',2).first
  name_plus_parenthesis.split(split_on,2).first
end

#enzyme_specificityObject

returns( offset, cleave_at, except_if_after ) offset is an Integer specifying how far after an amino acid to cut cleave_at is a string of all amino acids that should be cut at except_if_after for not cutting after those normal tryptic behavior would be: [1, ‘KR’, ‘P’] NOTE: a ‘-’ in a params file is returned as an ” (empty string) AspN is [0,‘D’,”]



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/ms/sequest/params.rb', line 149

def enzyme_specificity
  enzyme_ar = 
    if version == '3.1'
      Bioworks31_Enzyme_Info_Array[@opts['enzyme_number'].to_i][1,3]
    elsif version >= '3.2'
      arr = enzyme_info.split(/\s+/)[2,3]
      arr[0] = arr[0].to_i
      arr
    else
      raise ArgumentError, "don't recognize anything but Bioworks 3.1--3.3"
    end
  enzyme_ar.map! do |str|
    if str == '-' ; ''
    else ; str
    end
  end
  enzyme_ar
end

#fragment_ion_tolObject



304
305
306
# File 'lib/ms/sequest/params.rb', line 304

def fragment_ion_tol
  @opts["fragment_ion_tolerance"]
end

#fragment_mass_typeObject



200
201
202
203
204
205
206
207
# File 'lib/ms/sequest/params.rb', line 200

def fragment_mass_type
  fmtype = 
    case @opts['mass_type_fragment']
    when '0' ; "average"
    when '1' ; "monoisotopic"
    else ; abort "error in mass_type_fragment in sequest!"
    end
end

#grab_params(fh) ⇒ Object

returns hash of params up until add_U_user_amino_acid



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/ms/sequest/params.rb', line 74

def grab_params(fh)
  hash = {}
  in_add_amino_acid_section = false
  add_section_re = /^\s*add_/
    prev_pos = nil
  while line = fh.gets
    if line =~ add_section_re
      in_add_amino_acid_section = true
    end
    if (in_add_amino_acid_section and !(line =~ add_section_re))
      fh.pos = prev_pos
      break
    end
    prev_pos = fh.pos
    if line =~ /\w+/
      one,two = line.split @@param_re
      two,comment = two.split @@param_two_split
      hash[one] = two.rstrip
    end
  end
  hash
end

#mass_index(based_on = :precursor) ⇒ Object

returns the appropriate aminoacid mass lookup table from MS::Mass::AA based_on may be :precursor or :fragment



239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/ms/sequest/params.rb', line 239

def mass_index(based_on=:precursor)
  reply = case based_on
          when :precursor ; precursor_mass_type
          when :fragment ; fragment_mass_type
          end
  case reply
  when 'average'
    MS::Mass::AA::AVG
  when 'monoisotopic'
    MS::Mass::AA::MONO
  end
end

#max_num_differential_AA_per_modObject



308
309
310
# File 'lib/ms/sequest/params.rb', line 308

def max_num_differential_AA_per_mod
  @opts["max_num_differential_AA_per_mod"] || @opts["max_num_differential_per_peptide"]
end

#max_num_internal_cleavagesObject



287
288
289
# File 'lib/ms/sequest/params.rb', line 287

def max_num_internal_cleavages
  @opts["max_num_internal_cleavage_sites"]
end

#min_number_terminiObject

at least in Bioworks 3.2, the First number after the enzyme is the indication of the enzymatic end stringency (required):

1 = Fully enzymatic
2 = Either end
3 = N terminal only
4 = C terminal only

So, to get min_number_termini we map like this:

1 => 2
2 => 1


261
262
263
264
265
266
267
268
269
270
# File 'lib/ms/sequest/params.rb', line 261

def min_number_termini
  if e_info = @opts["enzyme_info"]
    case e_info.split(" ")[1]
    when "1" ; return "2"
    when "2" ; return "1"
    end
  end
  warn "No Enzyme termini info, using min_number_termini = '1'"
  return "1"
end

#parse_file(file) ⇒ Object

parses file and drops the .hdr behind indexed fasta files returns self can read sequest.params file or .srf file handle



135
136
137
138
139
140
# File 'lib/ms/sequest/params.rb', line 135

def parse_file(file)
  File.open(file) do |fh|
    parse_io(fh)
  end
  self
end

#parse_io(fh) ⇒ Object

returns self or nil if no sequest found in the io



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/ms/sequest/params.rb', line 98

def parse_io(fh)
  # seek to the SEQUEST file
  if fh.respond_to?(:set_encoding)
    # this mimics ruby1.8 behavior as we read in the file
    fh.set_encoding('ASCII-8BIT')
  end
  loop do
    line = fh.gets
    return nil if line.nil?  # we return nil if we reach then end of the file without seeing sequest params
    if line =~ @@sequest_line
      # double check that we are in a sequest params file:
      pos = fh.pos
      if fh.gets =~ /^first_database_name/
        fh.pos = pos
        break
      end
    end
  end
  @opts = grab_params(fh)
  @opts["search_engine"] = "SEQUEST"
  # extract out the mods
  @mods = {}
  @opts.each do |k,v|
    if k =~ /^add_/
      @mods[k] = @opts.delete(k)
    end
  end

  ## this gets rid of the .hdr postfix on indexed databases
  @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
  self
end

#peptide_mass_tolObject

my take on peptide_mass_units: (see www.ionsource.com/tutorial/isotopes/slide2.htm) amu = atomic mass units = (mass_real - mass_measured).abs (??abs??) mmu = milli mass units (amu / 1000) ppm = parts per million = 10^6 * ∆m_accuracy / mass_measured [ where ∆m_accuracy = mass_real – mass_measured ]



297
298
299
300
301
302
# File 'lib/ms/sequest/params.rb', line 297

def peptide_mass_tol
  if @opts["peptide_mass_units"] != "0"
    puts "WARNING: peptide_mass_tol units need to be adjusted!"
  end
  @opts["peptide_mass_tolerance"]
end

#precursor_mass_typeObject



192
193
194
195
196
197
198
# File 'lib/ms/sequest/params.rb', line 192

def precursor_mass_type
  case @opts['mass_type_parent']
  when '0' ; "average" 
  when '1' ; "monoisotopic"
  else ; abort "error in mass_type_parent in sequest!"
  end
end

#sample_enzymeObject

returns a MS::Ident::Pepxml::SampleEnzyme object



8
9
10
# File 'lib/ms/sequest/pepxml/params.rb', line 8

def sample_enzyme
  MS::Ident::Pepxml::SampleEnzyme.new(sample_enzyme_hash)
end

#sample_enzyme_hashObject

returns a hash suitable for setting a MS::Ident::Pepxml::SampleEnzyme object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/ms/sequest/pepxml/params.rb', line 13

def sample_enzyme_hash
  (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
    if v == '' ; nil ; else v end
  end
  hash = {}
  hash[:name] = self.enzyme
  hash[:cut] = cleave_at
  hash[:no_cut] = except_if_after
  hash[:sense] =
    if hash[:name] == "No_Enzyme"
      nil
    elsif offset == 1
      'C'
    elsif offset == 0
      'N'
    end
  hash
end

#sequenceObject

I’m not sure if this is the right mapping for sequence_search_constraint?



186
187
188
189
190
# File 'lib/ms/sequest/params.rb', line 186

def sequence
  pseq = @opts['partial_sequence'] 
  if !pseq || pseq == "" ; pseq = "0" end
  pseq
end

#static_modsObject

returns a hash by add_<whatever> of any static mods != 0 the values are still as strings



314
315
316
317
318
319
320
321
322
# File 'lib/ms/sequest/params.rb', line 314

def static_mods
  hash = {}
  @mods.each do |k,v|
    if v.to_f != 0.0
      hash[k] = v
    end
  end
  hash
end

#versionObject

Returns the version of the sequest.params file Returns String “3.3” if contains “fragment_ion_units” Returns String “3.2” if contains “enyzme_info” Returns String “3.1” if contains “enzyme_number”



172
173
174
175
176
177
# File 'lib/ms/sequest/params.rb', line 172

def version
  if @opts['fragment_ion_units'] ; return '3.3'
  elsif @opts['enzyme_info'] ; return '3.2'
  elsif @opts['enzyme_number'] ; return '3.1'
  end
end