Class: Bio::MAF::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/maf.rb

Overview

A sequence within an alignment block.

Direct Known Subclasses

EmptySequence

Constant Summary collapse

I_STATUS =
{
  'C' => :contiguous,
  'I' => :intervening,
  'N' => :first,
  'n' => :first_bridged,
  'M' => :missing_data,
  'T' => :tandem
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, start, size, strand, src_size, text) ⇒ Sequence

Returns a new instance of Sequence.



260
261
262
263
264
265
266
267
# File 'lib/bio/maf/maf.rb', line 260

def initialize(source, start, size, strand, src_size, text)
  @source = source
  @start = start
  @size = size
  @strand = strand
  @src_size = src_size
  @text = text
end

Instance Attribute Details

#i_dataArray<String>

Array of raw synteny information from 'i' line.

Returns:

  • (Array<String>)


254
255
256
# File 'lib/bio/maf/maf.rb', line 254

def i_data
  @i_data
end

#qualityString

Quality string from 'q' line.

Returns:

  • (String)


257
258
259
# File 'lib/bio/maf/maf.rb', line 257

def quality
  @quality
end

#sizeInteger (readonly)

Returns Size of aligning region in source sequence.

Returns:

  • (Integer)

    Size of aligning region in source sequence.



241
242
243
# File 'lib/bio/maf/maf.rb', line 241

def size
  @size
end

#sourceString (readonly)

Returns Source sequence name.

Returns:

  • (String)

    Source sequence name.



237
238
239
# File 'lib/bio/maf/maf.rb', line 237

def source
  @source
end

#src_sizeInteger (readonly) Also known as: source_size

Size of the entire source sequence, not just the aligning region.

Returns:

  • (Integer)


248
249
250
# File 'lib/bio/maf/maf.rb', line 248

def src_size
  @src_size
end

#startInteger (readonly)

Returns Zero-based start position.

Returns:

  • (Integer)

    Zero-based start position.



239
240
241
# File 'lib/bio/maf/maf.rb', line 239

def start
  @start
end

#strandSymbol (readonly)

:+ or :-, indicating which strand the alignment is to.

Returns:

  • (Symbol)


244
245
246
# File 'lib/bio/maf/maf.rb', line 244

def strand
  @strand
end

#textString (readonly)

Sequence data for the alignment, including insertions.

Returns:

  • (String)


251
252
253
# File 'lib/bio/maf/maf.rb', line 251

def text
  @text
end

Instance Method Details

#decode_status_char(c) ⇒ Object



315
316
317
# File 'lib/bio/maf/maf.rb', line 315

def decode_status_char(c)
  I_STATUS[c] || raise("Unsupported status character #{c}!")
end

#delete_text(offset, len) ⇒ Object



348
349
350
351
352
353
354
355
# File 'lib/bio/maf/maf.rb', line 348

def delete_text(offset, len)
  unless empty?
    text.slice!(offset, len)
    if quality
      quality.slice!(offset, len)
    end
  end
end

#empty?Boolean

Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.

Returns:

  • (Boolean)


298
299
300
# File 'lib/bio/maf/maf.rb', line 298

def empty?
  false
end

#endObject



269
270
271
# File 'lib/bio/maf/maf.rb', line 269

def end
  start + size
end

#fasta_descObject



361
362
363
# File 'lib/bio/maf/maf.rb', line 361

def fasta_desc
  "#{source}:#{start}-#{start + size}"
end

#gapped?Boolean

Returns:

  • (Boolean)


302
303
304
# File 'lib/bio/maf/maf.rb', line 302

def gapped?
  size != text.size
end

#intervalObject



273
274
275
# File 'lib/bio/maf/maf.rb', line 273

def interval
  GenomicInterval.zero_based(self.source, self.start, self.end)
end

#join(o) ⇒ Object



371
372
373
374
375
376
377
378
379
380
381
382
# File 'lib/bio/maf/maf.rb', line 371

def join(o)
  s2 = Sequence.new(source,
                    start,
                    size + o.size,
                    strand,
                    src_size,
                    text + o.text)
  if quality && o.quality
    s2.quality = quality + o.quality
  end
  s2
end

#joinable_with?(o) ⇒ Boolean

Returns:

  • (Boolean)


365
366
367
368
369
# File 'lib/bio/maf/maf.rb', line 365

def joinable_with?(o)
  (self.end == o.start) \
  && (self.strand == o.strand) \
  && (self.empty? == o.empty?)
end

#left_countObject



327
328
329
# File 'lib/bio/maf/maf.rb', line 327

def left_count
  i_data && i_data[1].to_i
end

#left_statusObject



323
324
325
# File 'lib/bio/maf/maf.rb', line 323

def left_status
  i_data && decode_status_char(left_status_char())
end

#left_status_charObject



319
320
321
# File 'lib/bio/maf/maf.rb', line 319

def left_status_char
  i_data && i_data[0]
end

#right_countObject



339
340
341
# File 'lib/bio/maf/maf.rb', line 339

def right_count
  i_data && i_data[3].to_i
end

#right_statusObject



335
336
337
# File 'lib/bio/maf/maf.rb', line 335

def right_status
  i_data && decode_status_char(right_status_char())
end

#right_status_charObject



331
332
333
# File 'lib/bio/maf/maf.rb', line 331

def right_status_char
  i_data && i_data[2]
end

#slice(range) ⇒ Object



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/bio/maf/maf.rb', line 277

def slice(range)
  before = text.slice(0...(range.begin))
  non_gap_prev = before.delete("-").size
  new_text = text.slice(range)
  unless new_text
    raise "could not extract slice #{range} from #{self.inspect}!"
  end
  non_gap_text = new_text.delete("-").size
  s2 = Sequence.new(source,
                    start + non_gap_prev,
                    non_gap_text,
                    strand,
                    src_size,
                    new_text)
  s2.quality = quality.slice(range) if quality
  # TODO: what to do with synteny data?
  s2
end

#speciesObject



343
344
345
346
# File 'lib/bio/maf/maf.rb', line 343

def species
  parts = source.split('.', 2)
  parts.size == 2 ? parts[0] : nil
end

#text_range(range) ⇒ Object

Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.

See Also:

  • String#slice


389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/bio/maf/maf.rb', line 389

def text_range(range)
  r_end = range.exclude_end? ? range.end : range.end + 1
  r_size = r_end - range.begin
  if range.begin == start && r_size == size
    # special case, entire text
    0...text.size
  else
    if range.begin < start || r_end > self.end
      raise "Range #{range} outside sequence bounds; start #{start}, size #{size}"
    end
    if ! gapped?
      # no gaps, can map indexes directly
      (range.begin - start)...(r_end - start)
    else
      # gaps present
      g_start = start     # genomic position of the start
      t_start = 0         # text position of the start
      m_begin = nil       # beginning of match
      match = nil
      text.scan(/(\w+|-+)/) do |parts|
        part = parts[0]
        if part[0] != '-'
          # sequence text
          g_end = g_start + part.size
          if g_start <= range.begin && range.begin < g_end
            offset_in_part = range.begin - g_start
            m_begin = offset_in_part + t_start
          end
          if g_start <= r_end && r_end <= g_end
            raise "reached end before start!" unless m_begin
            offset_in_part = r_end - g_start
            m_end = offset_in_part + t_start
            match = m_begin...m_end
            break
          end
          g_start = g_end
        else
          # gap
        end
        t_start += part.size
      end
      raise "no match found!" unless match
      return match
    end
  end
end

#to_bio_alignmentObject



357
358
359
# File 'lib/bio/maf/maf.rb', line 357

def to_bio_alignment
  Bio::BioAlignment::Sequence.new(source, text)
end