Class: Bio::MAF::Block

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A MAF alignment block.

Constant Summary collapse

GAP =
/-+/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(vars, sequences, offset, size, filtered) ⇒ Block

Returns a new instance of Block.



70
71
72
73
74
75
76
# File 'lib/bio/maf/maf.rb', line 70

def initialize(vars, sequences, offset, size, filtered)
  @vars = vars
  @sequences = sequences
  @offset = offset
  @size = size
  @filtered = filtered
end

Instance Attribute Details

#offsetInteger

Offset of the alignment block within the MAF file, in bytes.

Returns:

  • (Integer)


61
62
63
# File 'lib/bio/maf/maf.rb', line 61

def offset
  @offset
end

#orig_textString

Original text of the MAF block. Only available if the :retain_text parser option is set.

Returns:

  • (String)


68
69
70
# File 'lib/bio/maf/maf.rb', line 68

def orig_text
  @orig_text
end

#sequencesArray<Sequence> (readonly)

Sequences, one per 's' or 'e' line.

Returns:



58
59
60
# File 'lib/bio/maf/maf.rb', line 58

def sequences
  @sequences
end

#sizeInteger (readonly)

Size of the alignment block within the MAF file, in bytes.

Returns:

  • (Integer)


64
65
66
# File 'lib/bio/maf/maf.rb', line 64

def size
  @size
end

#varsObject (readonly)

Parameters from the 'a' line starting the alignment block.



55
56
57
# File 'lib/bio/maf/maf.rb', line 55

def vars
  @vars
end

Instance Method Details

#_slice(interval) ⇒ Object



177
178
179
180
181
182
183
184
# File 'lib/bio/maf/maf.rb', line 177

def _slice(interval)
  range = _slice_text_range(interval)
  s2 = sequences.collect { |s| s.slice(range) }
  v2 = vars.dup
  #v2[:score] = '0.0'
  # TODO: should the filtered param be #modified? instead?
  Block.new(v2, s2, offset, size, @filtered)
end

#_slice_text_range(interval) ⇒ Object



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/bio/maf/maf.rb', line 186

def _slice_text_range(interval)
  i_start  = interval.zero_start
  i_end    = interval.zero_end
  g_pos    = ref_seq.start
  t_start  = nil
  t_end    = nil
  ref_seq.text.each_char.each_with_index do |c, t_pos|
    if c != '-'
      # non-gap
      if g_pos == i_start
        t_start = t_pos
      end
      g_pos += 1
      if t_start && g_pos == i_end
        t_end = t_pos + 1
        break
      end
    end
  end
  unless t_start && t_end
    raise "did not find start and end for #{interval} in #{ref_seq.inspect}!"
  end
  return t_start...t_end
end

#each_raw_seqObject



86
87
88
# File 'lib/bio/maf/maf.rb', line 86

def each_raw_seq
  sequences.each { |s| yield s }
end

#filtered?Boolean

Whether this block has been modified by a parser filter.

Returns:

  • (Boolean)


103
104
105
# File 'lib/bio/maf/maf.rb', line 103

def filtered?
  @filtered
end

#find_gapsObject

Find gaps present in all sequences. These would generally occur when some sequences have been filtered out.



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/bio/maf/maf.rb', line 126

def find_gaps
  ref_s = StringScanner.new(sequences.first.text)
  others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) }
  gaps = []
  while ref_s.scan_until(GAP)
    offset = ref_s.pos - ref_s.matched_size
    others.each { |s| s.pos = offset }
    unless others.find { |s| ! s.scan(GAP) }
      # all matched
      gap_size = [ref_s.matched_size,
                  others.map {|s| s.matched_size}.min].min
      gaps << [offset, gap_size]
    end
  end
  gaps
end

#join(other) ⇒ Object



229
230
231
232
233
234
235
236
237
# File 'lib/bio/maf/maf.rb', line 229

def join(other)
  nseq = sequences.each_with_index.collect do |s1, i|
    s2 = other.seq_from(s1.source, i)
    s1.join(s2)
  end
  v2 = vars.dup
  v2[:score] = '0.0'
  Block.new(v2, nseq, offset, nil, @filtered)
end

#joinable_with?(other) ⇒ Boolean

Returns:

  • (Boolean)


211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/bio/maf/maf.rb', line 211

def joinable_with?(other)
  if sequences.size == other.sequences.size
    r1 = ref_seq
    r2 = other.ref_seq
    return false if r1.source != r2.source
    return false if r1.end != r2.start
    rest = sequences.each_with_index
    rest.next
    mismatch = rest.find do |s1, i|
      s2 = other.seq_from(s1.source, i)
      (! s2) || ! s1.joinable_with?(s2)
    end
    return (! mismatch)
  else
    return false
  end
end

#raw_seq(i) ⇒ Object



82
83
84
# File 'lib/bio/maf/maf.rb', line 82

def raw_seq(i)
  sequences.fetch(i)
end

#ref_seqObject



78
79
80
# File 'lib/bio/maf/maf.rb', line 78

def ref_seq
  sequences[0]
end

#remove_gaps!Object

Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.



148
149
150
151
152
153
154
155
156
# File 'lib/bio/maf/maf.rb', line 148

def remove_gaps!
  gaps = find_gaps()
  gaps.reverse_each do |offset, len|
    sequences.each do |seq|
      seq.delete_text(offset, len)
    end
  end
  gaps.size
end

#seq_from(src, pos_guess) ⇒ Object



239
240
241
242
243
244
245
246
# File 'lib/bio/maf/maf.rb', line 239

def seq_from(src, pos_guess)
  sg = sequences[pos_guess]
  if sg.source == src
    sg
  else
    sequences.find { |s| s.source == src }
  end
end

#slice(interval) ⇒ Block

Returns a new Block covering only the region where it overlaps the given interval.

Parameters:

Returns:

  • (Block)

    block covering intersection with interval



162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/bio/maf/maf.rb', line 162

def slice(interval)
  case interval.compare(ref_seq.interval)
  when :equal
    return self
  when :contains, :contained_by, :left_overlapped, :right_overlapped
    _slice(interval.intersection(ref_seq.interval))
  when :left_adjacent, :right_adjacent, :left_off, :right_off
    raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
  when :different_chrom
    raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
  else
    raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
  end
end

#text_sizeObject

Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.



93
94
95
# File 'lib/bio/maf/maf.rb', line 93

def text_size
  sequences.first.text.size
end

#to_bio_alignmentObject



107
108
109
110
# File 'lib/bio/maf/maf.rb', line 107

def to_bio_alignment
  ba_seq = sequences.collect { |s| s.to_bio_alignment }
  Bio::BioAlignment::Alignment.new(ba_seq)
end

#to_sObject



112
113
114
115
116
117
# File 'lib/bio/maf/maf.rb', line 112

def to_s
  buf = StringIO.new
  writer = Writer.new(buf)
  writer.write_block(self)
  return buf.string
end

#upcase!Object



97
98
99
# File 'lib/bio/maf/maf.rb', line 97

def upcase!
  sequences.each { |s| s.upcase! }
end