Class: Bio::MAF::Block

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A MAF alignment block.

Constant Summary collapse

GAP =
/-+/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(vars, sequences, offset, size, filtered) ⇒ Block

Returns a new instance of Block.



66
67
68
69
70
71
72
# File 'lib/bio/maf/maf.rb', line 66

def initialize(vars, sequences, offset, size, filtered)
  @vars = vars
  @sequences = sequences
  @offset = offset
  @size = size
  @filtered = filtered
end

Instance Attribute Details

#offsetInteger (readonly)

Offset of the alignment block within the MAF file, in bytes.

Returns:

  • (Integer)


61
62
63
# File 'lib/bio/maf/maf.rb', line 61

def offset
  @offset
end

#sequencesArray<Sequence> (readonly)

Sequences, one per 's' or 'e' line.

Returns:



58
59
60
# File 'lib/bio/maf/maf.rb', line 58

def sequences
  @sequences
end

#sizeInteger (readonly)

Size of the alignment block within the MAF file, in bytes.

Returns:

  • (Integer)


64
65
66
# File 'lib/bio/maf/maf.rb', line 64

def size
  @size
end

#varsObject (readonly)

Parameters from the 'a' line starting the alignment block.



55
56
57
# File 'lib/bio/maf/maf.rb', line 55

def vars
  @vars
end

Instance Method Details

#_slice(interval) ⇒ Object



160
161
162
163
164
165
166
167
# File 'lib/bio/maf/maf.rb', line 160

def _slice(interval)
  range = _slice_text_range(interval)
  s2 = sequences.collect { |s| s.slice(range) }
  v2 = vars.dup
  #v2[:score] = '0.0'
  # TODO: should the filtered param be #modified? instead?
  Block.new(v2, s2, offset, size, @filtered)
end

#_slice_text_range(interval) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/bio/maf/maf.rb', line 169

def _slice_text_range(interval)
  i_start  = interval.zero_start
  i_end    = interval.zero_end
  g_pos    = ref_seq.start
  t_start  = nil
  t_end    = nil
  ref_seq.text.each_char.each_with_index do |c, t_pos|
    if c != '-'
      # non-gap
      if g_pos == i_start
        t_start = t_pos
      end
      g_pos += 1
      if t_start && g_pos == i_end
        t_end = t_pos + 1
        break
      end
    end
  end
  unless t_start && t_end
    raise "did not find start and end for #{interval} in #{ref_seq.inspect}!"
  end
  return t_start...t_end
end

#each_raw_seqObject



82
83
84
# File 'lib/bio/maf/maf.rb', line 82

def each_raw_seq
  sequences.each { |s| yield s }
end

#filtered?Boolean

Whether this block has been modified by a parser filter.

Returns:

  • (Boolean)


95
96
97
# File 'lib/bio/maf/maf.rb', line 95

def filtered?
  @filtered
end

#find_gapsObject

Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/bio/maf/maf.rb', line 110

def find_gaps
  ref_s = StringScanner.new(sequences.first.text)
  others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) }
  gaps = []
  while ref_s.scan_until(GAP)
    offset = ref_s.pos - ref_s.matched_size
    others.each { |s| s.pos = offset }
    unless others.find { |s| ! s.scan(GAP) }
      # all matched
      gap_size = [ref_s.matched_size,
                  others.map {|s| s.matched_size}.min].min
      gaps << [offset, gap_size]
    end
  end
  gaps
end

#join(other) ⇒ Object



212
213
214
215
216
217
218
219
220
# File 'lib/bio/maf/maf.rb', line 212

def join(other)
  nseq = sequences.each_with_index.collect do |s1, i|
    s2 = other.seq_from(s1.source, i)
    s1.join(s2)
  end
  v2 = vars.dup
  v2[:score] = '0.0'
  Block.new(v2, nseq, offset, nil, @filtered)
end

#joinable_with?(other) ⇒ Boolean

Returns:

  • (Boolean)


194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/bio/maf/maf.rb', line 194

def joinable_with?(other)
  if sequences.size == other.sequences.size
    r1 = ref_seq
    r2 = other.ref_seq
    return false if r1.source != r2.source
    return false if r1.end != r2.start
    rest = sequences.each_with_index
    rest.next
    mismatch = rest.find do |s1, i|
      s2 = other.seq_from(s1.source, i)
      (! s2) || ! s1.joinable_with?(s2)
    end
    return (! mismatch)
  else
    return false
  end
end

#raw_seq(i) ⇒ Object



78
79
80
# File 'lib/bio/maf/maf.rb', line 78

def raw_seq(i)
  sequences.fetch(i)
end

#ref_seqObject



74
75
76
# File 'lib/bio/maf/maf.rb', line 74

def ref_seq
  sequences[0]
end

#remove_gaps!Object

Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.



131
132
133
134
135
136
137
138
139
# File 'lib/bio/maf/maf.rb', line 131

def remove_gaps!
  gaps = find_gaps()
  gaps.reverse_each do |offset, len|
    sequences.each do |seq|
      seq.delete_text(offset, len)
    end
  end
  gaps.size
end

#seq_from(src, pos_guess) ⇒ Object



222
223
224
225
226
227
228
229
# File 'lib/bio/maf/maf.rb', line 222

def seq_from(src, pos_guess)
  sg = sequences[pos_guess]
  if sg.source == src
    sg
  else
    sequences.find { |s| s.source == src }
  end
end

#slice(interval) ⇒ Block

Returns a new Block covering only the region where it overlaps the given interval.

Parameters:

Returns:

  • (Block)

    block covering intersection with interval



145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/bio/maf/maf.rb', line 145

def slice(interval)
  case interval.compare(ref_seq.interval)
  when :equal
    return self
  when :contains, :contained_by, :left_overlapped, :right_overlapped
    _slice(interval.intersection(ref_seq.interval))
  when :left_adjacent, :right_adjacent, :left_off, :right_off
    raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
  when :different_chrom
    raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
  else
    raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
  end
end

#text_sizeObject

Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.



89
90
91
# File 'lib/bio/maf/maf.rb', line 89

def text_size
  sequences.first.text.size
end

#to_bio_alignmentObject



99
100
101
102
# File 'lib/bio/maf/maf.rb', line 99

def to_bio_alignment
  ba_seq = sequences.collect { |s| s.to_bio_alignment }
  Bio::BioAlignment::Alignment.new(ba_seq)
end