Class: Bio::MAF::Sequence
- Inherits:
-
Object
- Object
- Bio::MAF::Sequence
- Defined in:
- lib/bio/maf/maf.rb
Overview
A sequence within an alignment block.
Direct Known Subclasses
Constant Summary collapse
- I_STATUS =
{ 'C' => :contiguous, 'I' => :intervening, 'N' => :first, 'n' => :first_bridged, 'M' => :missing_data, 'T' => :tandem }
Instance Attribute Summary collapse
-
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
-
#quality ⇒ String
Quality string from 'q' line.
-
#size ⇒ Integer
readonly
Size of aligning region in source sequence.
-
#source ⇒ String
readonly
Source sequence name.
-
#src_size ⇒ Integer
(also: #source_size)
readonly
Size of the entire source sequence, not just the aligning region.
-
#start ⇒ Integer
readonly
Zero-based start position.
-
#strand ⇒ Symbol
readonly
:+ or :-, indicating which strand the alignment is to.
-
#text ⇒ String
readonly
Sequence data for the alignment, including insertions.
Instance Method Summary collapse
- #decode_status_char(c) ⇒ Object
- #delete_text(offset, len) ⇒ Object
-
#empty? ⇒ Boolean
Whether this sequence is empty.
- #end ⇒ Object
- #gapped? ⇒ Boolean
-
#initialize(*args) ⇒ Sequence
constructor
A new instance of Sequence.
- #interval ⇒ Object
- #join(o) ⇒ Object
- #joinable_with?(o) ⇒ Boolean
- #left_count ⇒ Object
- #left_status ⇒ Object
- #left_status_char ⇒ Object
- #right_count ⇒ Object
- #right_status ⇒ Object
- #right_status_char ⇒ Object
- #slice(range) ⇒ Object
- #species ⇒ Object
-
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
- #to_bio_alignment ⇒ Object
- #write_fasta(writer) ⇒ Object
Constructor Details
#initialize(*args) ⇒ Sequence
Returns a new instance of Sequence.
261 262 263 |
# File 'lib/bio/maf/maf.rb', line 261 def initialize(*args) @source, @start, @size, @strand, @src_size, @text = args end |
Instance Attribute Details
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
255 256 257 |
# File 'lib/bio/maf/maf.rb', line 255 def i_data @i_data end |
#quality ⇒ String
Quality string from 'q' line.
258 259 260 |
# File 'lib/bio/maf/maf.rb', line 258 def quality @quality end |
#size ⇒ Integer (readonly)
Returns Size of aligning region in source sequence.
242 243 244 |
# File 'lib/bio/maf/maf.rb', line 242 def size @size end |
#source ⇒ String (readonly)
Returns Source sequence name.
238 239 240 |
# File 'lib/bio/maf/maf.rb', line 238 def source @source end |
#src_size ⇒ Integer (readonly) Also known as: source_size
Size of the entire source sequence, not just the aligning region.
249 250 251 |
# File 'lib/bio/maf/maf.rb', line 249 def src_size @src_size end |
#start ⇒ Integer (readonly)
Returns Zero-based start position.
240 241 242 |
# File 'lib/bio/maf/maf.rb', line 240 def start @start end |
#strand ⇒ Symbol (readonly)
:+ or :-, indicating which strand the alignment is to.
245 246 247 |
# File 'lib/bio/maf/maf.rb', line 245 def strand @strand end |
#text ⇒ String (readonly)
Sequence data for the alignment, including insertions.
252 253 254 |
# File 'lib/bio/maf/maf.rb', line 252 def text @text end |
Instance Method Details
#decode_status_char(c) ⇒ Object
311 312 313 |
# File 'lib/bio/maf/maf.rb', line 311 def decode_status_char(c) I_STATUS[c] || raise("Unsupported status character #{c}!") end |
#delete_text(offset, len) ⇒ Object
344 345 346 347 348 349 350 351 |
# File 'lib/bio/maf/maf.rb', line 344 def delete_text(offset, len) unless empty? text.slice!(offset, len) if quality quality.slice!(offset, len) end end end |
#empty? ⇒ Boolean
Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.
294 295 296 |
# File 'lib/bio/maf/maf.rb', line 294 def empty? false end |
#end ⇒ Object
265 266 267 |
# File 'lib/bio/maf/maf.rb', line 265 def end start + size end |
#gapped? ⇒ Boolean
298 299 300 |
# File 'lib/bio/maf/maf.rb', line 298 def gapped? size != text.size end |
#interval ⇒ Object
269 270 271 |
# File 'lib/bio/maf/maf.rb', line 269 def interval GenomicInterval.zero_based(self.source, self.start, self.end) end |
#join(o) ⇒ Object
368 369 370 371 372 373 374 375 376 377 378 379 |
# File 'lib/bio/maf/maf.rb', line 368 def join(o) s2 = Sequence.new(source, start, size + o.size, strand, src_size, text + o.text) if quality && o.quality s2.quality = quality + o.quality end s2 end |
#joinable_with?(o) ⇒ Boolean
362 363 364 365 366 |
# File 'lib/bio/maf/maf.rb', line 362 def joinable_with?(o) (self.end == o.start) \ && (self.strand == o.strand) \ && (self.empty? == o.empty?) end |
#left_count ⇒ Object
323 324 325 |
# File 'lib/bio/maf/maf.rb', line 323 def left_count i_data && i_data[1].to_i end |
#left_status ⇒ Object
319 320 321 |
# File 'lib/bio/maf/maf.rb', line 319 def left_status i_data && decode_status_char(left_status_char()) end |
#left_status_char ⇒ Object
315 316 317 |
# File 'lib/bio/maf/maf.rb', line 315 def left_status_char i_data && i_data[0] end |
#right_count ⇒ Object
335 336 337 |
# File 'lib/bio/maf/maf.rb', line 335 def right_count i_data && i_data[3].to_i end |
#right_status ⇒ Object
331 332 333 |
# File 'lib/bio/maf/maf.rb', line 331 def right_status i_data && decode_status_char(right_status_char()) end |
#right_status_char ⇒ Object
327 328 329 |
# File 'lib/bio/maf/maf.rb', line 327 def right_status_char i_data && i_data[2] end |
#slice(range) ⇒ Object
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
# File 'lib/bio/maf/maf.rb', line 273 def slice(range) before = text.slice(0...(range.begin)) non_gap_prev = before.delete("-").size new_text = text.slice(range) unless new_text raise "could not extract slice #{range} from #{self.inspect}!" end non_gap_text = new_text.delete("-").size s2 = Sequence.new(source, start + non_gap_prev, non_gap_text, strand, src_size, new_text) s2.quality = quality.slice(range) if quality # TODO: what to do with synteny data? s2 end |
#species ⇒ Object
339 340 341 342 |
# File 'lib/bio/maf/maf.rb', line 339 def species parts = source.split('.', 2) parts.size == 2 ? parts[0] : nil end |
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 |
# File 'lib/bio/maf/maf.rb', line 386 def text_range(range) r_end = range.exclude_end? ? range.end : range.end + 1 r_size = r_end - range.begin if range.begin == start && r_size == size # special case, entire text 0...text.size else if range.begin < start || r_end > self.end raise "Range #{range} outside sequence bounds; start #{start}, size #{size}" end if ! gapped? # no gaps, can map indexes directly (range.begin - start)...(r_end - start) else # gaps present g_start = start # genomic position of the start t_start = 0 # text position of the start m_begin = nil # beginning of match match = nil text.scan(/(\w+|-+)/) do |parts| part = parts[0] if part[0] != '-' # sequence text g_end = g_start + part.size if g_start <= range.begin && range.begin < g_end offset_in_part = range.begin - g_start m_begin = offset_in_part + t_start end if g_start <= r_end && r_end <= g_end raise "reached end before start!" unless m_begin offset_in_part = r_end - g_start m_end = offset_in_part + t_start match = m_begin...m_end break end g_start = g_end else # gap end t_start += part.size end raise "no match found!" unless match return match end end end |
#to_bio_alignment ⇒ Object
353 354 355 |
# File 'lib/bio/maf/maf.rb', line 353 def to_bio_alignment Bio::BioAlignment::Sequence.new(source, text) end |
#write_fasta(writer) ⇒ Object
357 358 359 360 |
# File 'lib/bio/maf/maf.rb', line 357 def write_fasta(writer) writer.write("#{source}:#{start}-#{start + size}", text) end |