Class: Bio::MAF::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A sequence within an alignment block.

Direct Known Subclasses

EmptySequence

Constant Summary collapse

I_STATUS =
{
  'C' => :contiguous,
  'I' => :intervening,
  'N' => :first,
  'n' => :first_bridged,
  'M' => :missing_data,
  'T' => :tandem
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Sequence

Returns a new instance of Sequence.



261
262
263
# File 'lib/bio/maf/maf.rb', line 261

def initialize(*args)
  @source, @start, @size, @strand, @src_size, @text = args
end

Instance Attribute Details

#i_dataArray<String>

Array of raw synteny information from 'i' line.

Returns:

  • (Array<String>)


255
256
257
# File 'lib/bio/maf/maf.rb', line 255

def i_data
  @i_data
end

#qualityString

Quality string from 'q' line.

Returns:

  • (String)


258
259
260
# File 'lib/bio/maf/maf.rb', line 258

def quality
  @quality
end

#sizeInteger (readonly)

Returns Size of aligning region in source sequence.

Returns:

  • (Integer)

    Size of aligning region in source sequence.



242
243
244
# File 'lib/bio/maf/maf.rb', line 242

def size
  @size
end

#sourceString (readonly)

Returns Source sequence name.

Returns:

  • (String)

    Source sequence name.



238
239
240
# File 'lib/bio/maf/maf.rb', line 238

def source
  @source
end

#src_sizeInteger (readonly) Also known as: source_size

Size of the entire source sequence, not just the aligning region.

Returns:

  • (Integer)


249
250
251
# File 'lib/bio/maf/maf.rb', line 249

def src_size
  @src_size
end

#startInteger (readonly)

Returns Zero-based start position.

Returns:

  • (Integer)

    Zero-based start position.



240
241
242
# File 'lib/bio/maf/maf.rb', line 240

def start
  @start
end

#strandSymbol (readonly)

:+ or :-, indicating which strand the alignment is to.

Returns:

  • (Symbol)


245
246
247
# File 'lib/bio/maf/maf.rb', line 245

def strand
  @strand
end

#textString (readonly)

Sequence data for the alignment, including insertions.

Returns:

  • (String)


252
253
254
# File 'lib/bio/maf/maf.rb', line 252

def text
  @text
end

Instance Method Details

#decode_status_char(c) ⇒ Object



311
312
313
# File 'lib/bio/maf/maf.rb', line 311

def decode_status_char(c)
  I_STATUS[c] || raise("Unsupported status character #{c}!")
end

#delete_text(offset, len) ⇒ Object



344
345
346
347
348
349
350
351
# File 'lib/bio/maf/maf.rb', line 344

def delete_text(offset, len)
  unless empty?
    text.slice!(offset, len)
    if quality
      quality.slice!(offset, len)
    end
  end
end

#empty?Boolean

Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.

Returns:

  • (Boolean)


294
295
296
# File 'lib/bio/maf/maf.rb', line 294

def empty?
  false
end

#endObject



265
266
267
# File 'lib/bio/maf/maf.rb', line 265

def end
  start + size
end

#gapped?Boolean

Returns:

  • (Boolean)


298
299
300
# File 'lib/bio/maf/maf.rb', line 298

def gapped?
  size != text.size
end

#intervalObject



269
270
271
# File 'lib/bio/maf/maf.rb', line 269

def interval
  GenomicInterval.zero_based(self.source, self.start, self.end)
end

#join(o) ⇒ Object



368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/bio/maf/maf.rb', line 368

def join(o)
  s2 = Sequence.new(source,
                    start,
                    size + o.size,
                    strand,
                    src_size,
                    text + o.text)
  if quality && o.quality
    s2.quality = quality + o.quality
  end
  s2
end

#joinable_with?(o) ⇒ Boolean

Returns:

  • (Boolean)


362
363
364
365
366
# File 'lib/bio/maf/maf.rb', line 362

def joinable_with?(o)
  (self.end == o.start) \
  && (self.strand == o.strand) \
  && (self.empty? == o.empty?)
end

#left_countObject



323
324
325
# File 'lib/bio/maf/maf.rb', line 323

def left_count
  i_data && i_data[1].to_i
end

#left_statusObject



319
320
321
# File 'lib/bio/maf/maf.rb', line 319

def left_status
  i_data && decode_status_char(left_status_char())
end

#left_status_charObject



315
316
317
# File 'lib/bio/maf/maf.rb', line 315

def left_status_char
  i_data && i_data[0]
end

#right_countObject



335
336
337
# File 'lib/bio/maf/maf.rb', line 335

def right_count
  i_data && i_data[3].to_i
end

#right_statusObject



331
332
333
# File 'lib/bio/maf/maf.rb', line 331

def right_status
  i_data && decode_status_char(right_status_char())
end

#right_status_charObject



327
328
329
# File 'lib/bio/maf/maf.rb', line 327

def right_status_char
  i_data && i_data[2]
end

#slice(range) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/bio/maf/maf.rb', line 273

def slice(range)
  before = text.slice(0...(range.begin))
  non_gap_prev = before.delete("-").size
  new_text = text.slice(range)
  unless new_text
    raise "could not extract slice #{range} from #{self.inspect}!"
  end
  non_gap_text = new_text.delete("-").size
  s2 = Sequence.new(source,
                    start + non_gap_prev,
                    non_gap_text,
                    strand,
                    src_size,
                    new_text)
  s2.quality = quality.slice(range) if quality
  # TODO: what to do with synteny data?
  s2
end

#speciesObject



339
340
341
342
# File 'lib/bio/maf/maf.rb', line 339

def species
  parts = source.split('.', 2)
  parts.size == 2 ? parts[0] : nil
end

#text_range(range) ⇒ Object

Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.

See Also:

  • String#slice


386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
# File 'lib/bio/maf/maf.rb', line 386

def text_range(range)
  r_end = range.exclude_end? ? range.end : range.end + 1
  r_size = r_end - range.begin
  if range.begin == start && r_size == size
    # special case, entire text
    0...text.size
  else
    if range.begin < start || r_end > self.end
      raise "Range #{range} outside sequence bounds; start #{start}, size #{size}"
    end
    if ! gapped?
      # no gaps, can map indexes directly
      (range.begin - start)...(r_end - start)
    else
      # gaps present
      g_start = start     # genomic position of the start
      t_start = 0         # text position of the start
      m_begin = nil       # beginning of match
      match = nil
      text.scan(/(\w+|-+)/) do |parts|
        part = parts[0]
        if part[0] != '-'
          # sequence text
          g_end = g_start + part.size
          if g_start <= range.begin && range.begin < g_end
            offset_in_part = range.begin - g_start
            m_begin = offset_in_part + t_start
          end
          if g_start <= r_end && r_end <= g_end
            raise "reached end before start!" unless m_begin
            offset_in_part = r_end - g_start
            m_end = offset_in_part + t_start
            match = m_begin...m_end
            break
          end
          g_start = g_end
        else
          # gap
        end
        t_start += part.size
      end
      raise "no match found!" unless match
      return match
    end
  end
end

#to_bio_alignmentObject



353
354
355
# File 'lib/bio/maf/maf.rb', line 353

def to_bio_alignment
  Bio::BioAlignment::Sequence.new(source, text)
end

#write_fasta(writer) ⇒ Object



357
358
359
360
# File 'lib/bio/maf/maf.rb', line 357

def write_fasta(writer)
  writer.write("#{source}:#{start}-#{start + size}",
               text)
end