Class: Bio::MAF::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A sequence within an alignment block.

Direct Known Subclasses

EmptySequence

Constant Summary collapse

I_STATUS =
{
  'C' => :contiguous,
  'I' => :intervening,
  'N' => :first,
  'n' => :first_bridged,
  'M' => :missing_data,
  'T' => :tandem
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, start, size, strand, src_size, text) ⇒ Sequence

Returns a new instance of Sequence.



277
278
279
280
281
282
283
284
# File 'lib/bio/maf/maf.rb', line 277

def initialize(source, start, size, strand, src_size, text)
  @source = source
  @start = start
  @size = size
  @strand = strand
  @src_size = src_size
  @text = text
end

Instance Attribute Details

#i_dataArray<String>

Array of raw synteny information from 'i' line.

Returns:

  • (Array<String>)


271
272
273
# File 'lib/bio/maf/maf.rb', line 271

def i_data
  @i_data
end

#qualityString

Quality string from 'q' line.

Returns:

  • (String)


274
275
276
# File 'lib/bio/maf/maf.rb', line 274

def quality
  @quality
end

#sizeInteger (readonly)

Returns Size of aligning region in source sequence.

Returns:

  • (Integer)

    Size of aligning region in source sequence.



258
259
260
# File 'lib/bio/maf/maf.rb', line 258

def size
  @size
end

#sourceString (readonly)

Returns Source sequence name.

Returns:

  • (String)

    Source sequence name.



254
255
256
# File 'lib/bio/maf/maf.rb', line 254

def source
  @source
end

#src_sizeInteger (readonly) Also known as: source_size

Size of the entire source sequence, not just the aligning region.

Returns:

  • (Integer)


265
266
267
# File 'lib/bio/maf/maf.rb', line 265

def src_size
  @src_size
end

#startInteger (readonly)

Returns Zero-based start position.

Returns:

  • (Integer)

    Zero-based start position.



256
257
258
# File 'lib/bio/maf/maf.rb', line 256

def start
  @start
end

#strandSymbol (readonly)

:+ or :-, indicating which strand the alignment is to.

Returns:

  • (Symbol)


261
262
263
# File 'lib/bio/maf/maf.rb', line 261

def strand
  @strand
end

#textString (readonly)

Sequence data for the alignment, including insertions.

Returns:

  • (String)


268
269
270
# File 'lib/bio/maf/maf.rb', line 268

def text
  @text
end

Instance Method Details

#decode_status_char(c) ⇒ Object



332
333
334
# File 'lib/bio/maf/maf.rb', line 332

def decode_status_char(c)
  I_STATUS[c] || raise("Unsupported status character #{c}!")
end

#delete_text(offset, len) ⇒ Object



365
366
367
368
369
370
371
372
# File 'lib/bio/maf/maf.rb', line 365

def delete_text(offset, len)
  unless empty?
    text.slice!(offset, len)
    if quality
      quality.slice!(offset, len)
    end
  end
end

#empty?Boolean

Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.

Returns:

  • (Boolean)


315
316
317
# File 'lib/bio/maf/maf.rb', line 315

def empty?
  false
end

#endObject



286
287
288
# File 'lib/bio/maf/maf.rb', line 286

def end
  start + size
end

#fasta_descObject



382
383
384
# File 'lib/bio/maf/maf.rb', line 382

def fasta_desc
  "#{source}:#{start}-#{start + size}"
end

#gapped?Boolean

Returns:

  • (Boolean)


319
320
321
# File 'lib/bio/maf/maf.rb', line 319

def gapped?
  size != text.size
end

#intervalObject



290
291
292
# File 'lib/bio/maf/maf.rb', line 290

def interval
  GenomicInterval.zero_based(self.source, self.start, self.end)
end

#join(o) ⇒ Object



392
393
394
395
396
397
398
399
400
401
402
403
# File 'lib/bio/maf/maf.rb', line 392

def join(o)
  s2 = Sequence.new(source,
                    start,
                    size + o.size,
                    strand,
                    src_size,
                    text + o.text)
  if quality && o.quality
    s2.quality = quality + o.quality
  end
  s2
end

#joinable_with?(o) ⇒ Boolean

Returns:

  • (Boolean)


386
387
388
389
390
# File 'lib/bio/maf/maf.rb', line 386

def joinable_with?(o)
  (self.end == o.start) \
  && (self.strand == o.strand) \
  && (self.empty? == o.empty?)
end

#left_countObject



344
345
346
# File 'lib/bio/maf/maf.rb', line 344

def left_count
  i_data && i_data[1].to_i
end

#left_statusObject



340
341
342
# File 'lib/bio/maf/maf.rb', line 340

def left_status
  i_data && decode_status_char(left_status_char())
end

#left_status_charObject



336
337
338
# File 'lib/bio/maf/maf.rb', line 336

def left_status_char
  i_data && i_data[0]
end

#right_countObject



356
357
358
# File 'lib/bio/maf/maf.rb', line 356

def right_count
  i_data && i_data[3].to_i
end

#right_statusObject



352
353
354
# File 'lib/bio/maf/maf.rb', line 352

def right_status
  i_data && decode_status_char(right_status_char())
end

#right_status_charObject



348
349
350
# File 'lib/bio/maf/maf.rb', line 348

def right_status_char
  i_data && i_data[2]
end

#slice(range) ⇒ Object



294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/bio/maf/maf.rb', line 294

def slice(range)
  before = text.slice(0...(range.begin))
  non_gap_prev = before.delete("-").size
  new_text = text.slice(range)
  unless new_text
    raise "could not extract slice #{range} from #{self.inspect}!"
  end
  non_gap_text = new_text.delete("-").size
  s2 = Sequence.new(source,
                    start + non_gap_prev,
                    non_gap_text,
                    strand,
                    src_size,
                    new_text)
  s2.quality = quality.slice(range) if quality
  # TODO: what to do with synteny data?
  s2
end

#speciesObject



360
361
362
363
# File 'lib/bio/maf/maf.rb', line 360

def species
  parts = source.split('.', 2)
  parts.size == 2 ? parts[0] : nil
end

#text_range(range) ⇒ Object

Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.

See Also:

  • String#slice


410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# File 'lib/bio/maf/maf.rb', line 410

def text_range(range)
  r_end = range.exclude_end? ? range.end : range.end + 1
  r_size = r_end - range.begin
  if range.begin == start && r_size == size
    # special case, entire text
    0...text.size
  else
    if range.begin < start || r_end > self.end
      raise "Range #{range} outside sequence bounds; start #{start}, size #{size}"
    end
    if ! gapped?
      # no gaps, can map indexes directly
      (range.begin - start)...(r_end - start)
    else
      # gaps present
      g_start = start     # genomic position of the start
      t_start = 0         # text position of the start
      m_begin = nil       # beginning of match
      match = nil
      text.scan(/(\w+|-+)/) do |parts|
        part = parts[0]
        if part[0] != '-'
          # sequence text
          g_end = g_start + part.size
          if g_start <= range.begin && range.begin < g_end
            offset_in_part = range.begin - g_start
            m_begin = offset_in_part + t_start
          end
          if g_start <= r_end && r_end <= g_end
            raise "reached end before start!" unless m_begin
            offset_in_part = r_end - g_start
            m_end = offset_in_part + t_start
            match = m_begin...m_end
            break
          end
          g_start = g_end
        else
          # gap
        end
        t_start += part.size
      end
      raise "no match found!" unless match
      return match
    end
  end
end

#to_bio_alignmentObject



378
379
380
# File 'lib/bio/maf/maf.rb', line 378

def to_bio_alignment
  Bio::BioAlignment::Sequence.new(source, text)
end

#upcase!Object



374
375
376
# File 'lib/bio/maf/maf.rb', line 374

def upcase!
  text.upcase!
end