Method: Bio::Alignment::Output#__output_phylip_common

Defined in:
lib/bio/alignment.rb

#__output_phylip_common(options = {}) ⇒ Object

common routine for interleaved/non-interleaved phylip format



1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
# File 'lib/bio/alignment.rb', line 1099

def __output_phylip_common(options = {})
  len = self.alignment_length
  aln = [ " #{self.number_of_sequences} #{len}\n" ]
  sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
  if options[:replace_space]
    sn.collect! { |x| x.gsub(/\s/, '_') }
  end
  if !options.has_key?(:escape) or options[:escape]
    sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
  end
  if !options.has_key?(:split) or options[:split]
    sn.collect! { |x| x.split(/\s/)[0].to_s }
  end
  if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
    sn = __clustal_avoid_same_name(sn, 10)
  end

  namewidth = 10
  seqwidth  = (options[:width] or 60)
  seqwidth = seqwidth.div(10) * 10
  seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
  gchar = (options[:gap_char] or '-')

  aseqs = Array.new(self.number_of_sequences).clear
  self.each_seq do |s|
    aseqs << s.to_s.gsub(self.gap_regexp, gchar)
  end
  case options[:case].to_s
  when /lower/i
    aseqs.each { |s| s.downcase! }
  when /upper/i
    aseqs.each { |s| s.upcase! }
  end
  
  aseqs.collect! do |s|
    snx = sn.shift
    head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
    head2 = ' ' * namewidth
    s << (gchar * (len - s.length))
    s.gsub!(/(.{1,10})/n, " \\1")
    s.gsub!(seqregexp, "\\1\n")
    a = s.split(/^/)
    head += a.shift
    ret = a.collect { |x| head2 + x }
    ret.unshift(head)
    ret
  end
  lines = (len + seqwidth - 1).div(seqwidth)
  [ aln, aseqs, lines ]
end