Class: Bio::MAF::Block
- Inherits:
-
Object
- Object
- Bio::MAF::Block
- Defined in:
- lib/bio/maf/maf.rb
Overview
A MAF alignment block.
Constant Summary collapse
- GAP =
/-+/
Instance Attribute Summary collapse
-
#offset ⇒ Integer
readonly
Offset of the alignment block within the MAF file, in bytes.
-
#sequences ⇒ Array<Sequence>
readonly
Sequences, one per 's' or 'e' line.
-
#size ⇒ Integer
readonly
Size of the alignment block within the MAF file, in bytes.
-
#vars ⇒ Object
readonly
Parameters from the 'a' line starting the alignment block.
Instance Method Summary collapse
- #_slice(interval) ⇒ Object
- #_slice_text_range(interval) ⇒ Object
- #each_raw_seq ⇒ Object
-
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
-
#find_gaps ⇒ Object
Remove gaps present in all sequences.
-
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
constructor
A new instance of Block.
- #join(other) ⇒ Object
- #joinable_with?(other) ⇒ Boolean
- #raw_seq(i) ⇒ Object
- #ref_seq ⇒ Object
-
#remove_gaps! ⇒ Object
Remove gaps present in all sequences.
- #seq_from(src, pos_guess) ⇒ Object
-
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
-
#text_size ⇒ Object
Text size of the alignment block.
- #to_bio_alignment ⇒ Object
Constructor Details
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
Returns a new instance of Block.
66 67 68 69 70 71 72 73 |
# File 'lib/bio/maf/maf.rb', line 66 def initialize(vars, sequences, offset, size, filtered) #raise ArgumentError, "no sequences given for block at offset #{offset}!" unless sequences && sequences.first @vars = vars @sequences = sequences @offset = offset @size = size @filtered = filtered end |
Instance Attribute Details
#offset ⇒ Integer (readonly)
Offset of the alignment block within the MAF file, in bytes.
61 62 63 |
# File 'lib/bio/maf/maf.rb', line 61 def offset @offset end |
#sequences ⇒ Array<Sequence> (readonly)
Sequences, one per 's' or 'e' line.
58 59 60 |
# File 'lib/bio/maf/maf.rb', line 58 def sequences @sequences end |
#size ⇒ Integer (readonly)
Size of the alignment block within the MAF file, in bytes.
64 65 66 |
# File 'lib/bio/maf/maf.rb', line 64 def size @size end |
#vars ⇒ Object (readonly)
Parameters from the 'a' line starting the alignment block.
55 56 57 |
# File 'lib/bio/maf/maf.rb', line 55 def vars @vars end |
Instance Method Details
#_slice(interval) ⇒ Object
161 162 163 164 165 166 167 168 |
# File 'lib/bio/maf/maf.rb', line 161 def _slice(interval) range = _slice_text_range(interval) s2 = sequences.collect { |s| s.slice(range) } v2 = vars.dup #v2[:score] = '0.0' # TODO: should the filtered param be #modified? instead? Block.new(v2, s2, offset, size, @filtered) end |
#_slice_text_range(interval) ⇒ Object
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/bio/maf/maf.rb', line 170 def _slice_text_range(interval) i_start = interval.zero_start i_end = interval.zero_end g_pos = ref_seq.start t_start = nil t_end = nil ref_seq.text.each_char.each_with_index do |c, t_pos| if c != '-' # non-gap if g_pos == i_start t_start = t_pos end g_pos += 1 if t_start && g_pos == i_end t_end = t_pos + 1 break end end end unless t_start && t_end raise "did not find start and end for #{interval} in #{ref_seq.inspect}!" end return t_start...t_end end |
#each_raw_seq ⇒ Object
83 84 85 |
# File 'lib/bio/maf/maf.rb', line 83 def each_raw_seq sequences.each { |s| yield s } end |
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
96 97 98 |
# File 'lib/bio/maf/maf.rb', line 96 def filtered? @filtered end |
#find_gaps ⇒ Object
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/bio/maf/maf.rb', line 111 def find_gaps ref_s = StringScanner.new(sequences.first.text) others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) } gaps = [] while ref_s.scan_until(GAP) offset = ref_s.pos - ref_s.matched_size others.each { |s| s.pos = offset } unless others.find { |s| ! s.scan(GAP) } # all matched gap_size = [ref_s.matched_size, others.map {|s| s.matched_size}.min].min gaps << [offset, gap_size] end end gaps end |
#join(other) ⇒ Object
213 214 215 216 217 218 219 220 221 |
# File 'lib/bio/maf/maf.rb', line 213 def join(other) nseq = sequences.each_with_index.collect do |s1, i| s2 = other.seq_from(s1.source, i) s1.join(s2) end v2 = vars.dup v2[:score] = '0.0' Block.new(v2, nseq, offset, nil, @filtered) end |
#joinable_with?(other) ⇒ Boolean
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/bio/maf/maf.rb', line 195 def joinable_with?(other) if sequences.size == other.sequences.size r1 = ref_seq r2 = other.ref_seq return false if r1.source != r2.source return false if r1.end != r2.start rest = sequences.each_with_index rest.next mismatch = rest.find do |s1, i| s2 = other.seq_from(s1.source, i) (! s2) || ! s1.joinable_with?(s2) end return (! mismatch) else return false end end |
#raw_seq(i) ⇒ Object
79 80 81 |
# File 'lib/bio/maf/maf.rb', line 79 def raw_seq(i) sequences.fetch(i) end |
#ref_seq ⇒ Object
75 76 77 |
# File 'lib/bio/maf/maf.rb', line 75 def ref_seq sequences[0] end |
#remove_gaps! ⇒ Object
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
132 133 134 135 136 137 138 139 140 |
# File 'lib/bio/maf/maf.rb', line 132 def remove_gaps! gaps = find_gaps() gaps.reverse_each do |offset, len| sequences.each do |seq| seq.delete_text(offset, len) end end gaps.size end |
#seq_from(src, pos_guess) ⇒ Object
223 224 225 226 227 228 229 230 |
# File 'lib/bio/maf/maf.rb', line 223 def seq_from(src, pos_guess) sg = sequences[pos_guess] if sg.source == src sg else sequences.find { |s| s.source == src } end end |
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/bio/maf/maf.rb', line 146 def slice(interval) case interval.compare(ref_seq.interval) when :equal return self when :contains, :contained_by, :left_overlapped, :right_overlapped _slice(interval.intersection(ref_seq.interval)) when :left_adjacent, :right_adjacent, :left_off, :right_off raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}" when :different_chrom raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!" else raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}" end end |
#text_size ⇒ Object
Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.
90 91 92 |
# File 'lib/bio/maf/maf.rb', line 90 def text_size sequences.first.text.size end |
#to_bio_alignment ⇒ Object
100 101 102 103 |
# File 'lib/bio/maf/maf.rb', line 100 def to_bio_alignment ba_seq = sequences.collect { |s| s.to_bio_alignment } Bio::BioAlignment::Alignment.new(ba_seq) end |