Class: Bio::MAF::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A sequence within an alignment block.

Direct Known Subclasses

EmptySequence

Constant Summary collapse

I_STATUS =
{
  'C' => :contiguous,
  'I' => :intervening,
  'N' => :first,
  'n' => :first_bridged,
  'M' => :missing_data,
  'T' => :tandem
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, start, size, strand, src_size, text) ⇒ Sequence

Returns a new instance of Sequence.



262
263
264
265
266
267
268
269
# File 'lib/bio/maf/maf.rb', line 262

def initialize(source, start, size, strand, src_size, text)
  @source = source
  @start = start
  @size = size
  @strand = strand
  @src_size = src_size
  @text = text
end

Instance Attribute Details

#i_dataArray<String>

Array of raw synteny information from 'i' line.

Returns:

  • (Array<String>)


256
257
258
# File 'lib/bio/maf/maf.rb', line 256

def i_data
  @i_data
end

#qualityString

Quality string from 'q' line.

Returns:

  • (String)


259
260
261
# File 'lib/bio/maf/maf.rb', line 259

def quality
  @quality
end

#sizeInteger (readonly)

Returns Size of aligning region in source sequence.

Returns:

  • (Integer)

    Size of aligning region in source sequence.



243
244
245
# File 'lib/bio/maf/maf.rb', line 243

def size
  @size
end

#sourceString (readonly)

Returns Source sequence name.

Returns:

  • (String)

    Source sequence name.



239
240
241
# File 'lib/bio/maf/maf.rb', line 239

def source
  @source
end

#src_sizeInteger (readonly) Also known as: source_size

Size of the entire source sequence, not just the aligning region.

Returns:

  • (Integer)


250
251
252
# File 'lib/bio/maf/maf.rb', line 250

def src_size
  @src_size
end

#startInteger (readonly)

Returns Zero-based start position.

Returns:

  • (Integer)

    Zero-based start position.



241
242
243
# File 'lib/bio/maf/maf.rb', line 241

def start
  @start
end

#strandSymbol (readonly)

:+ or :-, indicating which strand the alignment is to.

Returns:

  • (Symbol)


246
247
248
# File 'lib/bio/maf/maf.rb', line 246

def strand
  @strand
end

#textString (readonly)

Sequence data for the alignment, including insertions.

Returns:

  • (String)


253
254
255
# File 'lib/bio/maf/maf.rb', line 253

def text
  @text
end

Instance Method Details

#decode_status_char(c) ⇒ Object



317
318
319
# File 'lib/bio/maf/maf.rb', line 317

def decode_status_char(c)
  I_STATUS[c] || raise("Unsupported status character #{c}!")
end

#delete_text(offset, len) ⇒ Object



350
351
352
353
354
355
356
357
# File 'lib/bio/maf/maf.rb', line 350

def delete_text(offset, len)
  unless empty?
    text.slice!(offset, len)
    if quality
      quality.slice!(offset, len)
    end
  end
end

#empty?Boolean

Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.

Returns:

  • (Boolean)


300
301
302
# File 'lib/bio/maf/maf.rb', line 300

def empty?
  false
end

#endObject



271
272
273
# File 'lib/bio/maf/maf.rb', line 271

def end
  start + size
end

#fasta_descObject



363
364
365
# File 'lib/bio/maf/maf.rb', line 363

def fasta_desc
  "#{source}:#{start}-#{start + size}"
end

#gapped?Boolean

Returns:

  • (Boolean)


304
305
306
# File 'lib/bio/maf/maf.rb', line 304

def gapped?
  size != text.size
end

#intervalObject



275
276
277
# File 'lib/bio/maf/maf.rb', line 275

def interval
  GenomicInterval.zero_based(self.source, self.start, self.end)
end

#join(o) ⇒ Object



373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/bio/maf/maf.rb', line 373

def join(o)
  s2 = Sequence.new(source,
                    start,
                    size + o.size,
                    strand,
                    src_size,
                    text + o.text)
  if quality && o.quality
    s2.quality = quality + o.quality
  end
  s2
end

#joinable_with?(o) ⇒ Boolean

Returns:

  • (Boolean)


367
368
369
370
371
# File 'lib/bio/maf/maf.rb', line 367

def joinable_with?(o)
  (self.end == o.start) \
  && (self.strand == o.strand) \
  && (self.empty? == o.empty?)
end

#left_countObject



329
330
331
# File 'lib/bio/maf/maf.rb', line 329

def left_count
  i_data && i_data[1].to_i
end

#left_statusObject



325
326
327
# File 'lib/bio/maf/maf.rb', line 325

def left_status
  i_data && decode_status_char(left_status_char())
end

#left_status_charObject



321
322
323
# File 'lib/bio/maf/maf.rb', line 321

def left_status_char
  i_data && i_data[0]
end

#right_countObject



341
342
343
# File 'lib/bio/maf/maf.rb', line 341

def right_count
  i_data && i_data[3].to_i
end

#right_statusObject



337
338
339
# File 'lib/bio/maf/maf.rb', line 337

def right_status
  i_data && decode_status_char(right_status_char())
end

#right_status_charObject



333
334
335
# File 'lib/bio/maf/maf.rb', line 333

def right_status_char
  i_data && i_data[2]
end

#slice(range) ⇒ Object



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/bio/maf/maf.rb', line 279

def slice(range)
  before = text.slice(0...(range.begin))
  non_gap_prev = before.delete("-").size
  new_text = text.slice(range)
  unless new_text
    raise "could not extract slice #{range} from #{self.inspect}!"
  end
  non_gap_text = new_text.delete("-").size
  s2 = Sequence.new(source,
                    start + non_gap_prev,
                    non_gap_text,
                    strand,
                    src_size,
                    new_text)
  s2.quality = quality.slice(range) if quality
  # TODO: what to do with synteny data?
  s2
end

#speciesObject



345
346
347
348
# File 'lib/bio/maf/maf.rb', line 345

def species
  parts = source.split('.', 2)
  parts.size == 2 ? parts[0] : nil
end

#text_range(range) ⇒ Object

Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.

See Also:

  • String#slice


391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/bio/maf/maf.rb', line 391

def text_range(range)
  r_end = range.exclude_end? ? range.end : range.end + 1
  r_size = r_end - range.begin
  if range.begin == start && r_size == size
    # special case, entire text
    0...text.size
  else
    if range.begin < start || r_end > self.end
      raise "Range #{range} outside sequence bounds; start #{start}, size #{size}"
    end
    if ! gapped?
      # no gaps, can map indexes directly
      (range.begin - start)...(r_end - start)
    else
      # gaps present
      g_start = start     # genomic position of the start
      t_start = 0         # text position of the start
      m_begin = nil       # beginning of match
      match = nil
      text.scan(/(\w+|-+)/) do |parts|
        part = parts[0]
        if part[0] != '-'
          # sequence text
          g_end = g_start + part.size
          if g_start <= range.begin && range.begin < g_end
            offset_in_part = range.begin - g_start
            m_begin = offset_in_part + t_start
          end
          if g_start <= r_end && r_end <= g_end
            raise "reached end before start!" unless m_begin
            offset_in_part = r_end - g_start
            m_end = offset_in_part + t_start
            match = m_begin...m_end
            break
          end
          g_start = g_end
        else
          # gap
        end
        t_start += part.size
      end
      raise "no match found!" unless match
      return match
    end
  end
end

#to_bio_alignmentObject



359
360
361
# File 'lib/bio/maf/maf.rb', line 359

def to_bio_alignment
  Bio::BioAlignment::Sequence.new(source, text)
end