Class: Bio::MAF::Sequence

Inherits:

Object

Object
Bio::MAF::Sequence

show all

Defined in:: lib/bio/maf/maf.rb

Overview

A sequence within an alignment block.

Direct Known Subclasses

EmptySequence

Constant Summary collapse

I_STATUS =

{
  'C' => :contiguous,
  'I' => :intervening,
  'N' => :first,
  'n' => :first_bridged,
  'M' => :missing_data,
  'T' => :tandem
}

Instance Attribute Summary collapse

#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
#quality ⇒ String
Quality string from 'q' line.
#size ⇒ Integer readonly
Size of aligning region in source sequence.
#source ⇒ String readonly
Source sequence name.
#src_size ⇒ Integer (also: #source_size) readonly
Size of the entire source sequence, not just the aligning region.
#start ⇒ Integer readonly
Zero-based start position.
#strand ⇒ Symbol readonly
:+ or :-, indicating which strand the alignment is to.
#text ⇒ String readonly
Sequence data for the alignment, including insertions.

Instance Method Summary collapse

#decode_status_char(c) ⇒ Object
#delete_text(offset, len) ⇒ Object
#empty? ⇒ Boolean
Whether this sequence is empty.
#end ⇒ Object
#fasta_desc ⇒ Object
#gapped? ⇒ Boolean
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence constructor
A new instance of Sequence.
#interval ⇒ Object
#join(o) ⇒ Object
#joinable_with?(o) ⇒ Boolean
#left_count ⇒ Object
#left_status ⇒ Object
#left_status_char ⇒ Object
#right_count ⇒ Object
#right_status ⇒ Object
#right_status_char ⇒ Object
#slice(range) ⇒ Object
#species ⇒ Object
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
#to_bio_alignment ⇒ Object

Constructor Details

#initialize(source, start, size, strand, src_size, text) ⇒ `Sequence`

Returns a new instance of Sequence.

# File 'lib/bio/maf/maf.rb', line 262

def initialize(source, start, size, strand, src_size, text)
  @source = source
  @start = start
  @size = size
  @strand = strand
  @src_size = src_size
  @text = text
end

Instance Attribute Details

#i_data ⇒ `Array<String>`

Array of raw synteny information from 'i' line.

Returns:

(Array<String>)



256
257
258

# File 'lib/bio/maf/maf.rb', line 256

def i_data
  @i_data
end

#quality ⇒ `String`

Quality string from 'q' line.

Returns:

(String)



259
260
261

# File 'lib/bio/maf/maf.rb', line 259

def quality
  @quality
end

#size ⇒ `Integer` (readonly)

Returns Size of aligning region in source sequence.

Returns:

(Integer) —
Size of aligning region in source sequence.



243
244
245

# File 'lib/bio/maf/maf.rb', line 243

def size
  @size
end

#source ⇒ `String` (readonly)

Returns Source sequence name.

Returns:

(String) —
Source sequence name.



239
240
241

# File 'lib/bio/maf/maf.rb', line 239

def source
  @source
end

#src_size ⇒ `Integer` (readonly) Also known as: source_size

Size of the entire source sequence, not just the aligning region.

Returns:

(Integer)



250
251
252

# File 'lib/bio/maf/maf.rb', line 250

def src_size
  @src_size
end

#start ⇒ `Integer` (readonly)

Returns Zero-based start position.

Returns:

(Integer) —
Zero-based start position.



241
242
243

# File 'lib/bio/maf/maf.rb', line 241

def start
  @start
end

#strand ⇒ `Symbol` (readonly)

:+ or :-, indicating which strand the alignment is to.

Returns:

(Symbol)



246
247
248

# File 'lib/bio/maf/maf.rb', line 246

def strand
  @strand
end

#text ⇒ `String` (readonly)

Sequence data for the alignment, including insertions.

Returns:

(String)



253
254
255

# File 'lib/bio/maf/maf.rb', line 253

def text
  @text
end

Instance Method Details

#decode_status_char(c) ⇒ `Object`



317
318
319

# File 'lib/bio/maf/maf.rb', line 317

def decode_status_char(c)
  I_STATUS[c] || raise("Unsupported status character #{c}!")
end

#delete_text(offset, len) ⇒ `Object`

# File 'lib/bio/maf/maf.rb', line 350

def delete_text(offset, len)
  unless empty?
    text.slice!(offset, len)
    if quality
      quality.slice!(offset, len)
    end
  end
end

#empty? ⇒ `Boolean`

Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.

Returns:

(Boolean)



300
301
302

# File 'lib/bio/maf/maf.rb', line 300

def empty?
  false
end

#end ⇒ `Object`



271
272
273

# File 'lib/bio/maf/maf.rb', line 271

def end
  start + size
end

#fasta_desc ⇒ `Object`



363
364
365

# File 'lib/bio/maf/maf.rb', line 363

def fasta_desc
  "#{source}:#{start}-#{start + size}"
end

#gapped? ⇒ `Boolean`

Returns:

(Boolean)



304
305
306

# File 'lib/bio/maf/maf.rb', line 304

def gapped?
  size != text.size
end

#interval ⇒ `Object`



275
276
277

# File 'lib/bio/maf/maf.rb', line 275

def interval
  GenomicInterval.zero_based(self.source, self.start, self.end)
end

#join(o) ⇒ `Object`

# File 'lib/bio/maf/maf.rb', line 373

def join(o)
  s2 = Sequence.new(source,
                    start,
                    size + o.size,
                    strand,
                    src_size,
                    text + o.text)
  if quality && o.quality
    s2.quality = quality + o.quality
  end
  s2
end

#joinable_with?(o) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/bio/maf/maf.rb', line 367

def joinable_with?(o)
  (self.end == o.start) \
  && (self.strand == o.strand) \
  && (self.empty? == o.empty?)
end

#left_count ⇒ `Object`



329
330
331

# File 'lib/bio/maf/maf.rb', line 329

def left_count
  i_data && i_data[1].to_i
end

#left_status ⇒ `Object`



325
326
327

# File 'lib/bio/maf/maf.rb', line 325

def left_status
  i_data && decode_status_char(left_status_char())
end

#left_status_char ⇒ `Object`



321
322
323

# File 'lib/bio/maf/maf.rb', line 321

def left_status_char
  i_data && i_data[0]
end

#right_count ⇒ `Object`



341
342
343

# File 'lib/bio/maf/maf.rb', line 341

def right_count
  i_data && i_data[3].to_i
end

#right_status ⇒ `Object`



337
338
339

# File 'lib/bio/maf/maf.rb', line 337

def right_status
  i_data && decode_status_char(right_status_char())
end

#right_status_char ⇒ `Object`



333
334
335

# File 'lib/bio/maf/maf.rb', line 333

def right_status_char
  i_data && i_data[2]
end

#slice(range) ⇒ `Object`

# File 'lib/bio/maf/maf.rb', line 279

def slice(range)
  before = text.slice(0...(range.begin))
  non_gap_prev = before.delete("-").size
  new_text = text.slice(range)
  unless new_text
    raise "could not extract slice #{range} from #{self.inspect}!"
  end
  non_gap_text = new_text.delete("-").size
  s2 = Sequence.new(source,
                    start + non_gap_prev,
                    non_gap_text,
                    strand,
                    src_size,
                    new_text)
  s2.quality = quality.slice(range) if quality
  # TODO: what to do with synteny data?
  s2
end

#species ⇒ `Object`

# File 'lib/bio/maf/maf.rb', line 345

def species
  parts = source.split('.', 2)
  parts.size == 2 ? parts[0] : nil
end

#text_range(range) ⇒ `Object`

Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.

#to_bio_alignment ⇒ `Object`



359
360
361

# File 'lib/bio/maf/maf.rb', line 359

def to_bio_alignment
  Bio::BioAlignment::Sequence.new(source, text)
end

Class: Bio::MAF::Sequence

Overview

Direct Known Subclasses

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, start, size, strand, src_size, text) ⇒ Sequence

Instance Attribute Details

#i_data ⇒ Array<String>

#quality ⇒ String

#size ⇒ Integer (readonly)

#source ⇒ String (readonly)

#src_size ⇒ Integer (readonly) Also known as: source_size

#start ⇒ Integer (readonly)

#strand ⇒ Symbol (readonly)

#text ⇒ String (readonly)

Instance Method Details

#decode_status_char(c) ⇒ Object

#delete_text(offset, len) ⇒ Object

#empty? ⇒ Boolean

#end ⇒ Object

#fasta_desc ⇒ Object

#gapped? ⇒ Boolean

#interval ⇒ Object

#join(o) ⇒ Object

#joinable_with?(o) ⇒ Boolean

#left_count ⇒ Object

#left_status ⇒ Object

#left_status_char ⇒ Object

#right_count ⇒ Object

#right_status ⇒ Object

#right_status_char ⇒ Object

#slice(range) ⇒ Object

#species ⇒ Object

#text_range(range) ⇒ Object

#to_bio_alignment ⇒ Object

#initialize(source, start, size, strand, src_size, text) ⇒ `Sequence`

#i_data ⇒ `Array<String>`

#quality ⇒ `String`

#size ⇒ `Integer` (readonly)

#source ⇒ `String` (readonly)

#src_size ⇒ `Integer` (readonly) Also known as: source_size

#start ⇒ `Integer` (readonly)

#strand ⇒ `Symbol` (readonly)

#text ⇒ `String` (readonly)

#decode_status_char(c) ⇒ `Object`

#delete_text(offset, len) ⇒ `Object`

#empty? ⇒ `Boolean`

#end ⇒ `Object`

#fasta_desc ⇒ `Object`

#gapped? ⇒ `Boolean`

#interval ⇒ `Object`

#join(o) ⇒ `Object`

#joinable_with?(o) ⇒ `Boolean`

#left_count ⇒ `Object`

#left_status ⇒ `Object`

#left_status_char ⇒ `Object`

#right_count ⇒ `Object`

#right_status ⇒ `Object`

#right_status_char ⇒ `Object`

#slice(range) ⇒ `Object`

#species ⇒ `Object`

#text_range(range) ⇒ `Object`

#to_bio_alignment ⇒ `Object`