Class: Bio::GCG::Msf

Inherits:
Object show all
Defined in:
lib/bio/appl/gcg/msf.rb

Overview

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Constant Summary collapse

DELIMITER =

delimiter used by Bio::FlatFile

RS = nil

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Msf

Creates a new Msf object.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/bio/appl/gcg/msf.rb', line 31

def initialize(str)
  str = str.sub(/\A[\r\n]+/, '')
  preamble, @data = str.split(/^\/\/$/, 2)
  preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '')
  @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
  preamble.sub!(/.*\.\.\s*$/m, '')
  @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s
  d = $&.to_s
  if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
    @entry_id = m[1].to_s.strip
    @length   = (m[2] ? m[2].to_i : nil)
    @seq_type = m[3]
    @date     = m[4].to_s.strip
    @checksum = (m[6] ? m[6].to_i : nil)
  end

  @seq_info = []
  preamble.each_line do |x|
    if /Name\: / =~ x then
      s = {}
      x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
      @seq_info << s
    end
  end

  @description.sub!(/\A(\r\n|\r|\n)/, '')
  @align = nil
end

Instance Attribute Details

#checksumObject (readonly)

checksum



76
77
78
# File 'lib/bio/appl/gcg/msf.rb', line 76

def checksum
  @checksum
end

#dateObject (readonly)

date



73
74
75
# File 'lib/bio/appl/gcg/msf.rb', line 73

def date
  @date
end

#descriptionObject (readonly)

description



61
62
63
# File 'lib/bio/appl/gcg/msf.rb', line 61

def description
  @description
end

#entry_idObject (readonly)

ID of the alignment



64
65
66
# File 'lib/bio/appl/gcg/msf.rb', line 64

def entry_id
  @entry_id
end

#headingObject (readonly)

heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)



80
81
82
# File 'lib/bio/appl/gcg/msf.rb', line 80

def heading
  @heading
end

#lengthObject (readonly)

alignment length



67
68
69
# File 'lib/bio/appl/gcg/msf.rb', line 67

def length
  @length
end

#seq_typeObject (readonly)

sequence type (“N” for DNA/RNA or “P” for protein)



70
71
72
# File 'lib/bio/appl/gcg/msf.rb', line 70

def seq_type
  @seq_type
end

Instance Method Details

#alignmentObject

returns Bio::Alignment object.



176
177
178
179
# File 'lib/bio/appl/gcg/msf.rb', line 176

def alignment
  do_parse
  @align
end

#compcheckObject

CompCheck field



118
119
120
121
122
123
124
125
126
127
# File 'lib/bio/appl/gcg/msf.rb', line 118

def compcheck
  unless defined?(@compcheck)
    if /CompCheck\: +(\d+)/ =~ @description then
      @compcheck = $1.to_i
    else
      @compcheck = nil
    end
  end
  @compcheck
end

#gap_length_weightObject

gap length weight



109
110
111
112
113
114
115
# File 'lib/bio/appl/gcg/msf.rb', line 109

def gap_length_weight
  unless defined?(@gap_length_weight)
    /GapLengthWeight\: +(\S+)/ =~ @description
    @gap_length_weight = $1
  end
  @gap_length_weight
end

#gap_weightObject

gap weight



100
101
102
103
104
105
106
# File 'lib/bio/appl/gcg/msf.rb', line 100

def gap_weight
  unless defined?(@gap_weight)
    /GapWeight\: +(\S+)/ =~ @description
    @gap_weight = $1
  end
  @gap_weight
end

#seq_dataObject

gets seq data (used internally) (will be obsoleted)



182
183
184
185
# File 'lib/bio/appl/gcg/msf.rb', line 182

def seq_data
  do_parse
  @seq_data
end

#symbol_comparison_tableObject

symbol comparison table



91
92
93
94
95
96
97
# File 'lib/bio/appl/gcg/msf.rb', line 91

def symbol_comparison_table
  unless defined?(@symbol_comparison_table)
    /Symbol comparison table\: +(\S+)/ =~ @description
    @symbol_comparison_table = $1
  end
  @symbol_comparison_table
end

#validate_checksumObject

validates checksum



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/bio/appl/gcg/msf.rb', line 188

def validate_checksum
  do_parse
  valid = true
  total = 0
  @seq_data.each_with_index do |x, i|
    sum = Bio::GCG::Seq.calc_checksum(x)
    if sum != @seq_info[i]['Check'].to_i
      valid = false
      break
    end
    total += sum
  end
  return false unless valid
  if @checksum != 0 # "Check:" field of BioPerl is always 0
    valid = ((total % 10000) == @checksum)
  end
  valid
end