Class: Bio::MAF::Sequence
- Inherits:
-
Object
- Object
- Bio::MAF::Sequence
- Defined in:
- lib/bio/maf/maf.rb
Overview
A sequence within an alignment block.
Direct Known Subclasses
Constant Summary collapse
- I_STATUS =
{ 'C' => :contiguous, 'I' => :intervening, 'N' => :first, 'n' => :first_bridged, 'M' => :missing_data, 'T' => :tandem }
Instance Attribute Summary collapse
-
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
-
#quality ⇒ String
Quality string from 'q' line.
-
#size ⇒ Integer
readonly
Size of aligning region in source sequence.
-
#source ⇒ String
readonly
Source sequence name.
-
#src_size ⇒ Integer
(also: #source_size)
readonly
Size of the entire source sequence, not just the aligning region.
-
#start ⇒ Integer
readonly
Zero-based start position.
-
#strand ⇒ Symbol
readonly
:+ or :-, indicating which strand the alignment is to.
-
#text ⇒ String
readonly
Sequence data for the alignment, including insertions.
Instance Method Summary collapse
- #decode_status_char(c) ⇒ Object
- #delete_text(offset, len) ⇒ Object
-
#empty? ⇒ Boolean
Whether this sequence is empty.
- #end ⇒ Object
- #fasta_desc ⇒ Object
- #gapped? ⇒ Boolean
-
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
constructor
A new instance of Sequence.
- #interval ⇒ Object
- #join(o) ⇒ Object
- #joinable_with?(o) ⇒ Boolean
- #left_count ⇒ Object
- #left_status ⇒ Object
- #left_status_char ⇒ Object
- #right_count ⇒ Object
- #right_status ⇒ Object
- #right_status_char ⇒ Object
- #slice(range) ⇒ Object
- #species ⇒ Object
-
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
- #to_bio_alignment ⇒ Object
Constructor Details
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
Returns a new instance of Sequence.
262 263 264 265 266 267 268 269 |
# File 'lib/bio/maf/maf.rb', line 262 def initialize(source, start, size, strand, src_size, text) @source = source @start = start @size = size @strand = strand @src_size = src_size @text = text end |
Instance Attribute Details
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
256 257 258 |
# File 'lib/bio/maf/maf.rb', line 256 def i_data @i_data end |
#quality ⇒ String
Quality string from 'q' line.
259 260 261 |
# File 'lib/bio/maf/maf.rb', line 259 def quality @quality end |
#size ⇒ Integer (readonly)
Returns Size of aligning region in source sequence.
243 244 245 |
# File 'lib/bio/maf/maf.rb', line 243 def size @size end |
#source ⇒ String (readonly)
Returns Source sequence name.
239 240 241 |
# File 'lib/bio/maf/maf.rb', line 239 def source @source end |
#src_size ⇒ Integer (readonly) Also known as: source_size
Size of the entire source sequence, not just the aligning region.
250 251 252 |
# File 'lib/bio/maf/maf.rb', line 250 def src_size @src_size end |
#start ⇒ Integer (readonly)
Returns Zero-based start position.
241 242 243 |
# File 'lib/bio/maf/maf.rb', line 241 def start @start end |
#strand ⇒ Symbol (readonly)
:+ or :-, indicating which strand the alignment is to.
246 247 248 |
# File 'lib/bio/maf/maf.rb', line 246 def strand @strand end |
#text ⇒ String (readonly)
Sequence data for the alignment, including insertions.
253 254 255 |
# File 'lib/bio/maf/maf.rb', line 253 def text @text end |
Instance Method Details
#decode_status_char(c) ⇒ Object
317 318 319 |
# File 'lib/bio/maf/maf.rb', line 317 def decode_status_char(c) I_STATUS[c] || raise("Unsupported status character #{c}!") end |
#delete_text(offset, len) ⇒ Object
350 351 352 353 354 355 356 357 |
# File 'lib/bio/maf/maf.rb', line 350 def delete_text(offset, len) unless empty? text.slice!(offset, len) if quality quality.slice!(offset, len) end end end |
#empty? ⇒ Boolean
Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.
300 301 302 |
# File 'lib/bio/maf/maf.rb', line 300 def empty? false end |
#end ⇒ Object
271 272 273 |
# File 'lib/bio/maf/maf.rb', line 271 def end start + size end |
#fasta_desc ⇒ Object
363 364 365 |
# File 'lib/bio/maf/maf.rb', line 363 def fasta_desc "#{source}:#{start}-#{start + size}" end |
#gapped? ⇒ Boolean
304 305 306 |
# File 'lib/bio/maf/maf.rb', line 304 def gapped? size != text.size end |
#interval ⇒ Object
275 276 277 |
# File 'lib/bio/maf/maf.rb', line 275 def interval GenomicInterval.zero_based(self.source, self.start, self.end) end |
#join(o) ⇒ Object
373 374 375 376 377 378 379 380 381 382 383 384 |
# File 'lib/bio/maf/maf.rb', line 373 def join(o) s2 = Sequence.new(source, start, size + o.size, strand, src_size, text + o.text) if quality && o.quality s2.quality = quality + o.quality end s2 end |
#joinable_with?(o) ⇒ Boolean
367 368 369 370 371 |
# File 'lib/bio/maf/maf.rb', line 367 def joinable_with?(o) (self.end == o.start) \ && (self.strand == o.strand) \ && (self.empty? == o.empty?) end |
#left_count ⇒ Object
329 330 331 |
# File 'lib/bio/maf/maf.rb', line 329 def left_count i_data && i_data[1].to_i end |
#left_status ⇒ Object
325 326 327 |
# File 'lib/bio/maf/maf.rb', line 325 def left_status i_data && decode_status_char(left_status_char()) end |
#left_status_char ⇒ Object
321 322 323 |
# File 'lib/bio/maf/maf.rb', line 321 def left_status_char i_data && i_data[0] end |
#right_count ⇒ Object
341 342 343 |
# File 'lib/bio/maf/maf.rb', line 341 def right_count i_data && i_data[3].to_i end |
#right_status ⇒ Object
337 338 339 |
# File 'lib/bio/maf/maf.rb', line 337 def right_status i_data && decode_status_char(right_status_char()) end |
#right_status_char ⇒ Object
333 334 335 |
# File 'lib/bio/maf/maf.rb', line 333 def right_status_char i_data && i_data[2] end |
#slice(range) ⇒ Object
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 |
# File 'lib/bio/maf/maf.rb', line 279 def slice(range) before = text.slice(0...(range.begin)) non_gap_prev = before.delete("-").size new_text = text.slice(range) unless new_text raise "could not extract slice #{range} from #{self.inspect}!" end non_gap_text = new_text.delete("-").size s2 = Sequence.new(source, start + non_gap_prev, non_gap_text, strand, src_size, new_text) s2.quality = quality.slice(range) if quality # TODO: what to do with synteny data? s2 end |
#species ⇒ Object
345 346 347 348 |
# File 'lib/bio/maf/maf.rb', line 345 def species parts = source.split('.', 2) parts.size == 2 ? parts[0] : nil end |
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 |
# File 'lib/bio/maf/maf.rb', line 391 def text_range(range) r_end = range.exclude_end? ? range.end : range.end + 1 r_size = r_end - range.begin if range.begin == start && r_size == size # special case, entire text 0...text.size else if range.begin < start || r_end > self.end raise "Range #{range} outside sequence bounds; start #{start}, size #{size}" end if ! gapped? # no gaps, can map indexes directly (range.begin - start)...(r_end - start) else # gaps present g_start = start # genomic position of the start t_start = 0 # text position of the start m_begin = nil # beginning of match match = nil text.scan(/(\w+|-+)/) do |parts| part = parts[0] if part[0] != '-' # sequence text g_end = g_start + part.size if g_start <= range.begin && range.begin < g_end offset_in_part = range.begin - g_start m_begin = offset_in_part + t_start end if g_start <= r_end && r_end <= g_end raise "reached end before start!" unless m_begin offset_in_part = r_end - g_start m_end = offset_in_part + t_start match = m_begin...m_end break end g_start = g_end else # gap end t_start += part.size end raise "no match found!" unless match return match end end end |
#to_bio_alignment ⇒ Object
359 360 361 |
# File 'lib/bio/maf/maf.rb', line 359 def to_bio_alignment Bio::BioAlignment::Sequence.new(source, text) end |