Class: Bio::Assembly::Contig

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-assembly/contig.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str = "") ⇒ Contig

Returns a new instance of Contig.



8
9
10
11
12
13
# File 'lib/bio-assembly/contig.rb', line 8

def initialize(str="")
  @reads = Hash.new
  @seq = Bio::Sequence::NA.new(str)
  # counter for RD identifier
  @rds_parsed = 0
end

Instance Attribute Details

#fromObject

Returns the value of attribute from.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def from
  @from
end

#nameObject

Returns the value of attribute name.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def name
  @name
end

#orientationObject

Returns the value of attribute orientation.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def orientation
  @orientation
end

#qualityObject

Returns the value of attribute quality.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def quality
  @quality
end

#readsObject

Returns the value of attribute reads.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def reads
  @reads
end

#seqObject Also known as: consensus_seq

Returns the value of attribute seq.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def seq
  @seq
end

#toObject

Returns the value of attribute to.



5
6
7
# File 'lib/bio-assembly/contig.rb', line 5

def to
  @to
end

Instance Method Details

#add_read(read) ⇒ Object



38
39
40
41
# File 'lib/bio-assembly/contig.rb', line 38

def add_read(read)
  # TODO do some checks for pos location
  @reads[read.name] = read
end

#each_readObject



43
44
45
# File 'lib/bio-assembly/contig.rb', line 43

def each_read
  @reads.each_value { |read| yield read }
end

#find_read_by_name(name) ⇒ Object



15
16
17
# File 'lib/bio-assembly/contig.rb', line 15

def find_read_by_name(name)
  @reads[name]
end

#find_reads_in_range(clear_range_from, clear_range_to) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/bio-assembly/contig.rb', line 19

def find_reads_in_range(clear_range_from, clear_range_to)
  reads_in_range = Array.new
  each_read do |read|
    
    # Read starts in region
    if read.from+read.clear_range_from > clear_range_from and read.from+read.clear_range_from < clear_range_to
       reads_in_range.push read
    # Read ends in region
    elsif read.to+read.clear_range_to < clear_range_to and read.to+read.clear_range_to > clear_range_from
       reads_in_range.push read
    # Read encompasses region
    elsif read.from+read.clear_range_from < clear_range_from and read.to+read.clear_range_to > clear_range_to
       reads_in_range.push read
    end
    
  end
  reads_in_range;
end

#num_base_segmentsObject



55
56
57
58
59
60
61
# File 'lib/bio-assembly/contig.rb', line 55

def num_base_segments
  num_base_sequences = 0
  each_read do |read|
    num_base_sequences += read.base_sequences.size unless read.base_sequences.nil?
  end
  num_base_sequences
end

#num_basesObject



51
52
53
# File 'lib/bio-assembly/contig.rb', line 51

def num_bases
  seq.length
end

#num_readsObject



47
48
49
# File 'lib/bio-assembly/contig.rb', line 47

def num_reads
  @reads.size
end

#to_aceObject



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/bio-assembly/contig.rb', line 63

def to_ace
  ace = ""
  ace += ['CO', name, num_bases, num_reads, num_base_segments, orientation].join(' ') + "\n"
  ace += seq.to_s.gsub(Regexp.new(".{1,50}"), "\\0\n") + "\n"
  ace += "BQ\n"
  last_stop = quality.size - 1
  (quality.size/50+1).times do |i|
    start = i * 50
    stop = (i+1) * 50 - 1
    stop = last_stop if stop > last_stop
    ace += ' ' + quality[start..stop].join(' ')  + "\n"
  end
  ace += "\n"
  
  # holds BS data for reads
  bs_str = ""
  # holds RD, QA, and DS data for reads 
  rest_str = ""
  @reads.values.sort.each do |read|
    ace += read.to_ace_af
    bs_str += read.to_ace_bs
    rest_str += read.to_ace_rest
  end
  
  # compile data in correct order
  ace += bs_str
  ace += "\n"
  ace += rest_str
  ace
end