Class: Mascot::DAT

Inherits:
Object
  • Object
show all
Defined in:
lib/mascot/dat.rb,
lib/mascot/dat/psm.rb,
lib/mascot/dat/query.rb,
lib/mascot/dat/enzyme.rb,
lib/mascot/dat/masses.rb,
lib/mascot/dat/summary.rb,
lib/mascot/dat/version.rb,
lib/mascot/dat/peptides.rb,
lib/mascot/dat/proteins.rb,
lib/mascot/dat/parameters.rb,
lib/mascot/dat/header_info.rb,
lib/mascot/dat/search_databases.rb

Overview

A parser for Mascot flat file results.

NOTE: This parser creates another file that indexes the byte position offsets of the various mime sections of a DAT file. For some reason, DAT files indexes are the line numbers within the file, making random access more difficult than it needs to be.

<b>If you do not want this index file created, you need to pass in

<code> false</code> to the <code>cache_index</code> argument

Defined Under Namespace

Classes: Enzyme, HeaderInfo, Masses, PSM, Parameters, Peptides, Proteins, Query, SearchDatabases, Summary

Constant Summary collapse

SECTIONS =
["summary", "decoy_summary", "et_summary", "parameters",
"peptides", "decoy_peptides", "et_peptides",
"proteins", "header", "enzyme", "taxonomy", "unimod",
"quantitation", "masses", "mixture", "decoy_mixture", "index"]
VERSION =
"0.2.0"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dat_file_path, cache_index = true) ⇒ DAT

Returns a new instance of DAT.



35
36
37
38
39
40
41
42
# File 'lib/mascot/dat.rb', line 35

def initialize(dat_file_path, cache_index=true)
  @dat_file = File.open(dat_file_path)
  @idx = {}
  @boundary = nil
  @boundary_string = nil
  @cache_index = cache_index
  parse_index
end

Instance Attribute Details

#boundaryObject (readonly)

Returns the value of attribute boundary.



27
28
29
# File 'lib/mascot/dat.rb', line 27

def boundary
  @boundary
end

#boundary_stringObject (readonly)

Returns the value of attribute boundary_string.



28
29
30
# File 'lib/mascot/dat.rb', line 28

def boundary_string
  @boundary_string
end

#dat_fileObject (readonly)

Returns the value of attribute dat_file.



29
30
31
# File 'lib/mascot/dat.rb', line 29

def dat_file
  @dat_file
end

#idxObject (readonly)

Returns the value of attribute idx.



26
27
28
# File 'lib/mascot/dat.rb', line 26

def idx
  @idx
end

Class Method Details

.open(dat_file_path, cache_index = true) ⇒ Object



44
45
46
# File 'lib/mascot/dat.rb', line 44

def self.open(dat_file_path, cache_index=true)
  DAT.new(dat_file_path, cache_index)
end

Instance Method Details

#closeObject



48
49
50
# File 'lib/mascot/dat.rb', line 48

def close
  @dat_file.close
end

#decoy_peptides(cache_psm_index = true) ⇒ Object



118
119
120
# File 'lib/mascot/dat.rb', line 118

def decoy_peptides(cache_psm_index=true)
  Mascot::DAT::Peptides.new(self.dat_file, self.idx[:decoy_peptides], cache_psm_index)
end

#enzyme[Mascot::DAT::Enzyme]

Parse the enzyme information from the DAT file

Returns:



91
92
93
# File 'lib/mascot/dat.rb', line 91

def enzyme
  @enzyme ||= Mascot::DAT::Enzyme.new(self.read_section(:enzyme))
end

#goto(key) ⇒ Object

Go to a section of the Mascot DAT file



62
63
64
65
66
67
68
# File 'lib/mascot/dat.rb', line 62

def goto(key)
  if @idx.has_key?(key.to_sym)
    @dat_file.pos = @idx[key.to_sym]
  else
    raise Exception.new "Invalid DAT section \"#{key}\""
  end
end

#massesMascot::DAT::Masses

Parse the masses section of the DAT file

Returns:



97
98
99
# File 'lib/mascot/dat.rb', line 97

def masses
  @masses ||= Mascot::DAT::Masses.new(self.read_section(:masses))
end

#parametersMascot::DAT::Parameters

Parses parameters from DAT file



102
103
104
# File 'lib/mascot/dat.rb', line 102

def parameters
  @params ||= Mascot::DAT::Parameters.new(self.read_section(:parameters))
end

#peptides(cache_psm_index = true) ⇒ Mascot::DAT::Peptides

Puts the IO cursor at the beginning of peptide result section. Returns an iterator/parser for PSM results



114
115
116
# File 'lib/mascot/dat.rb', line 114

def peptides(cache_psm_index=true)
  Mascot::DAT::Peptides.new(self.dat_file, self.idx[:peptides], cache_psm_index)
end

#proteins(cache_protein_byteoffsets = true) ⇒ Object



123
124
125
# File 'lib/mascot/dat.rb', line 123

def proteins(cache_protein_byteoffsets=true)
  Mascot::DAT::Proteins.new(self.dat_file, self.idx[:proteins], cache_protein_byteoffsets)
end

#query(n) ⇒ Mascot::DAT::Query Also known as: spectrum

Return a specific query spectrum from the DAT file

Parameters:

  • n

    The query spectrum numerical index

Returns:



56
57
58
# File 'lib/mascot/dat.rb', line 56

def query(n)
  return Mascot::DAT::Query.new(self.read_section(:"query#{n}"))
end

#read_section(key) ⇒ String

Read a section of the DAT file into memory. THIS IS NOT RECOMMENDED UNLESS YOU KNOW WHAT YOU ARE DOING.

Parameters:

  • key (String or Symbol)

    The section name

Returns:

  • (String)

    The section of the DAT file as a String. The section includes the MIME boundary and content type definition lines.



77
78
79
80
81
82
83
84
85
86
# File 'lib/mascot/dat.rb', line 77

def read_section(key)
  self.goto(key.to_sym)
  # read past the initial boundary marker
  tmp = @dat_file.readline
  @dat_file.each do |l|
    break if l =~ @boundary
    tmp << l
  end
  tmp
end

#search_databasesMascot::DAT::SearchDatabases

Parses and return search databases from DAT file



108
109
110
# File 'lib/mascot/dat.rb', line 108

def search_databases
  @search_databases ||= Mascot::DAT::SearchDatabases.new(parameters)
end