Class: Bio::MAF::Block
- Inherits:
-
Object
- Object
- Bio::MAF::Block
- Defined in:
- lib/bio/maf/maf.rb
Overview
A MAF alignment block.
Constant Summary collapse
- GAP =
/-+/
Instance Attribute Summary collapse
-
#offset ⇒ Integer
readonly
Offset of the alignment block within the MAF file, in bytes.
-
#sequences ⇒ Array<Sequence>
readonly
Sequences, one per 's' or 'e' line.
-
#size ⇒ Integer
readonly
Size of the alignment block within the MAF file, in bytes.
-
#vars ⇒ Object
readonly
Parameters from the 'a' line starting the alignment block.
Instance Method Summary collapse
- #_slice(interval) ⇒ Object
- #_slice_text_range(interval) ⇒ Object
- #each_raw_seq ⇒ Object
-
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
-
#find_gaps ⇒ Object
Remove gaps present in all sequences.
-
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
constructor
A new instance of Block.
- #join(other) ⇒ Object
- #joinable_with?(other) ⇒ Boolean
- #raw_seq(i) ⇒ Object
- #ref_seq ⇒ Object
-
#remove_gaps! ⇒ Object
Remove gaps present in all sequences.
- #seq_from(src, pos_guess) ⇒ Object
-
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
-
#text_size ⇒ Object
Text size of the alignment block.
- #to_bio_alignment ⇒ Object
Constructor Details
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
Returns a new instance of Block.
66 67 68 69 70 71 72 |
# File 'lib/bio/maf/maf.rb', line 66 def initialize(vars, sequences, offset, size, filtered) @vars = vars @sequences = sequences @offset = offset @size = size @filtered = filtered end |
Instance Attribute Details
#offset ⇒ Integer (readonly)
Offset of the alignment block within the MAF file, in bytes.
61 62 63 |
# File 'lib/bio/maf/maf.rb', line 61 def offset @offset end |
#sequences ⇒ Array<Sequence> (readonly)
Sequences, one per 's' or 'e' line.
58 59 60 |
# File 'lib/bio/maf/maf.rb', line 58 def sequences @sequences end |
#size ⇒ Integer (readonly)
Size of the alignment block within the MAF file, in bytes.
64 65 66 |
# File 'lib/bio/maf/maf.rb', line 64 def size @size end |
#vars ⇒ Object (readonly)
Parameters from the 'a' line starting the alignment block.
55 56 57 |
# File 'lib/bio/maf/maf.rb', line 55 def vars @vars end |
Instance Method Details
#_slice(interval) ⇒ Object
160 161 162 163 164 165 166 167 |
# File 'lib/bio/maf/maf.rb', line 160 def _slice(interval) range = _slice_text_range(interval) s2 = sequences.collect { |s| s.slice(range) } v2 = vars.dup #v2[:score] = '0.0' # TODO: should the filtered param be #modified? instead? Block.new(v2, s2, offset, size, @filtered) end |
#_slice_text_range(interval) ⇒ Object
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/bio/maf/maf.rb', line 169 def _slice_text_range(interval) i_start = interval.zero_start i_end = interval.zero_end g_pos = ref_seq.start t_start = nil t_end = nil ref_seq.text.each_char.each_with_index do |c, t_pos| if c != '-' # non-gap if g_pos == i_start t_start = t_pos end g_pos += 1 if t_start && g_pos == i_end t_end = t_pos + 1 break end end end unless t_start && t_end raise "did not find start and end for #{interval} in #{ref_seq.inspect}!" end return t_start...t_end end |
#each_raw_seq ⇒ Object
82 83 84 |
# File 'lib/bio/maf/maf.rb', line 82 def each_raw_seq sequences.each { |s| yield s } end |
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
95 96 97 |
# File 'lib/bio/maf/maf.rb', line 95 def filtered? @filtered end |
#find_gaps ⇒ Object
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/bio/maf/maf.rb', line 110 def find_gaps ref_s = StringScanner.new(sequences.first.text) others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) } gaps = [] while ref_s.scan_until(GAP) offset = ref_s.pos - ref_s.matched_size others.each { |s| s.pos = offset } unless others.find { |s| ! s.scan(GAP) } # all matched gap_size = [ref_s.matched_size, others.map {|s| s.matched_size}.min].min gaps << [offset, gap_size] end end gaps end |
#join(other) ⇒ Object
212 213 214 215 216 217 218 219 220 |
# File 'lib/bio/maf/maf.rb', line 212 def join(other) nseq = sequences.each_with_index.collect do |s1, i| s2 = other.seq_from(s1.source, i) s1.join(s2) end v2 = vars.dup v2[:score] = '0.0' Block.new(v2, nseq, offset, nil, @filtered) end |
#joinable_with?(other) ⇒ Boolean
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/bio/maf/maf.rb', line 194 def joinable_with?(other) if sequences.size == other.sequences.size r1 = ref_seq r2 = other.ref_seq return false if r1.source != r2.source return false if r1.end != r2.start rest = sequences.each_with_index rest.next mismatch = rest.find do |s1, i| s2 = other.seq_from(s1.source, i) (! s2) || ! s1.joinable_with?(s2) end return (! mismatch) else return false end end |
#raw_seq(i) ⇒ Object
78 79 80 |
# File 'lib/bio/maf/maf.rb', line 78 def raw_seq(i) sequences.fetch(i) end |
#ref_seq ⇒ Object
74 75 76 |
# File 'lib/bio/maf/maf.rb', line 74 def ref_seq sequences[0] end |
#remove_gaps! ⇒ Object
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
131 132 133 134 135 136 137 138 139 |
# File 'lib/bio/maf/maf.rb', line 131 def remove_gaps! gaps = find_gaps() gaps.reverse_each do |offset, len| sequences.each do |seq| seq.delete_text(offset, len) end end gaps.size end |
#seq_from(src, pos_guess) ⇒ Object
222 223 224 225 226 227 228 229 |
# File 'lib/bio/maf/maf.rb', line 222 def seq_from(src, pos_guess) sg = sequences[pos_guess] if sg.source == src sg else sequences.find { |s| s.source == src } end end |
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/bio/maf/maf.rb', line 145 def slice(interval) case interval.compare(ref_seq.interval) when :equal return self when :contains, :contained_by, :left_overlapped, :right_overlapped _slice(interval.intersection(ref_seq.interval)) when :left_adjacent, :right_adjacent, :left_off, :right_off raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}" when :different_chrom raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!" else raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}" end end |
#text_size ⇒ Object
Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.
89 90 91 |
# File 'lib/bio/maf/maf.rb', line 89 def text_size sequences.first.text.size end |
#to_bio_alignment ⇒ Object
99 100 101 102 |
# File 'lib/bio/maf/maf.rb', line 99 def to_bio_alignment ba_seq = sequences.collect { |s| s.to_bio_alignment } Bio::BioAlignment::Alignment.new(ba_seq) end |