Class: Transrate::Contig

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/transrate/contig.rb,
ext/transrate/transrate.c

Overview

A contig in a transcriptome assembly.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(seq, name: nil) ⇒ Contig

Returns a new instance of Contig.



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/transrate/contig.rb', line 18

def initialize(seq, name: nil)
  seq.seq.gsub!("\0", "") # there is probably a better fix than this
  @seq = seq
  @seq.data = nil # no need to store raw fasta string
  @name = seq.respond_to?(:entry_id) ? seq.entry_id : name
  @hits = []
  @reference_coverage = 0
  @collapse_factor = 0
  @is_chimera = false
  @has_crb = false
  @in_bridges = 0
  @mean_coverage = 0
end

Instance Attribute Details

#collapse_factorObject

reference-based metrics



15
16
17
# File 'lib/transrate/contig.rb', line 15

def collapse_factor
  @collapse_factor
end

#coverageObject

read-based metrics



13
14
15
# File 'lib/transrate/contig.rb', line 13

def coverage
  @coverage
end

#has_crbObject

reference-based metrics



15
16
17
# File 'lib/transrate/contig.rb', line 15

def has_crb
  @has_crb
end

#hitsObject

Returns the value of attribute hits.



16
17
18
# File 'lib/transrate/contig.rb', line 16

def hits
  @hits
end

#in_bridgesObject

read-based metrics



13
14
15
# File 'lib/transrate/contig.rb', line 13

def in_bridges
  @in_bridges
end

#is_chimeraObject

reference-based metrics



15
16
17
# File 'lib/transrate/contig.rb', line 15

def is_chimera
  @is_chimera
end

#mean_coverageObject

read-based metrics



13
14
15
# File 'lib/transrate/contig.rb', line 13

def mean_coverage
  @mean_coverage
end

#nameObject

Returns the value of attribute name.



11
12
13
# File 'lib/transrate/contig.rb', line 11

def name
  @name
end

#reference_coverageObject

reference-based metrics



15
16
17
# File 'lib/transrate/contig.rb', line 15

def reference_coverage
  @reference_coverage
end

#seqObject

Returns the value of attribute seq.



11
12
13
# File 'lib/transrate/contig.rb', line 11

def seq
  @seq
end

#uncovered_basesObject

read-based metrics



13
14
15
# File 'lib/transrate/contig.rb', line 13

def uncovered_bases
  @uncovered_bases
end

Instance Method Details

#at_skewObject

AT skew



184
185
186
# File 'lib/transrate/contig.rb', line 184

def at_skew
  (bases_a - bases_t) / (bases_a + bases_t).to_f
end

#base_compositionObject

Base composition of the contig

If called and the instance variable @base_composition is nil then call the c method to count the bases and dibases in the sequence then get the info out of the c array and store it in the hash then if it is called again just return the hash as before



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/transrate/contig.rb', line 84

def base_composition
  if @base_composition
    return @base_composition
  end
  # else run the C method
  composition(@seq.seq)
  alphabet = ['a', 'c', 'g', 't', 'n']
  @base_composition = {}
  @dibase_composition={}
  bases = []
  dibases = []
  alphabet.each do |c|
    bases << "#{c}".to_sym
  end
  alphabet.each do |c|
    alphabet.each do |d|
      dibases << "#{c}#{d}".to_sym
    end
  end
  bases.each_with_index do |a,i|
    @base_composition[a] = base_count(i)
  end
  dibases.each_with_index do |a,i|
    @dibase_composition[a] = dibase_count(i)
  end
  return @base_composition
end

#base_countObject



15
# File 'ext/transrate/transrate.c', line 15

VALUE method_base_count(VALUE,VALUE);

#bases_aObject

Number of bases that are A



142
143
144
# File 'lib/transrate/contig.rb', line 142

def bases_a
  base_composition[:a]
end

#bases_cObject

Number of bases that are C



122
123
124
# File 'lib/transrate/contig.rb', line 122

def bases_c
  base_composition[:c]
end

#bases_gObject

Number of bases that are G



132
133
134
# File 'lib/transrate/contig.rb', line 132

def bases_g
  base_composition[:g]
end

#bases_gcObject

GC



170
171
172
# File 'lib/transrate/contig.rb', line 170

def bases_gc
  bases_g + bases_c
end

#bases_nObject



161
162
163
# File 'lib/transrate/contig.rb', line 161

def bases_n
  base_composition[:n]
end

#bases_tObject

Number of bases that are T



152
153
154
# File 'lib/transrate/contig.rb', line 152

def bases_t
  base_composition[:t]
end

#basic_metricsObject

Get all metrics available for this contig



37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/transrate/contig.rb', line 37

def basic_metrics
  basic = {
    :length => length,
    :prop_gc => prop_gc,
    :gc_skew => gc_skew,
    :at_skew => at_skew,
    :cpg_count => cpg_count,
    :cpg_ratio => cpg_ratio,
    :orf_length => orf_length,
    :linguistic_complexity_6 => linguistic_complexity(6)
  }
end

#comparative_metricsObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/transrate/contig.rb', line 62

def comparative_metrics
  reference = @has_crb ? {
    :has_crb => has_crb,
    :collapse_factor => collapse_factor,
    :reference_coverage => reference_coverage,
    :is_chimera => is_chimera,
    :hits => hits.map{ |h| h.target }.join(";")
  } : {
    :has_crb => false,
    :collapse_factor => "NA",
    :reference_coverage => "NA",
    :is_chimera => "NA",
    :hits => "NA"
  }
end

#compositionObject

VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);



14
# File 'ext/transrate/transrate.c', line 14

VALUE method_composition(VALUE, VALUE);

#cpg_countObject

CpG count



189
190
191
# File 'lib/transrate/contig.rb', line 189

def cpg_count
  dibase_composition[:cg] + dibase_composition[:gc]
end

#cpg_ratioObject

observed-to-expected CpG (C-phosphate-G) ratio



194
195
196
197
198
199
# File 'lib/transrate/contig.rb', line 194

def cpg_ratio
  r = dibase_composition[:cg] + dibase_composition[:gc]
  r /= (bases_c * bases_g).to_f
  r *= (length - bases_n)
  return r
end

#dibase_compositionObject

Dibase composition of the contig



113
114
115
116
117
118
119
# File 'lib/transrate/contig.rb', line 113

def dibase_composition
  if @dibase_composition
    return @dibase_composition
  end
  base_composition
  @dibase_composition
end

#dibase_countObject



16
# File 'ext/transrate/transrate.c', line 16

VALUE method_dibase_count(VALUE,VALUE);

#each(&block) ⇒ Object



32
33
34
# File 'lib/transrate/contig.rb', line 32

def each &block
  @seq.seq.each_char &block
end

#gc_skewObject

GC skew



179
180
181
# File 'lib/transrate/contig.rb', line 179

def gc_skew
  (bases_g - bases_c) / (bases_g + bases_c).to_f
end

#kmer_countObject



17
# File 'ext/transrate/transrate.c', line 17

VALUE method_kmer_count(VALUE,VALUE,VALUE);

#linguistic_complexity(k) ⇒ Object



208
209
210
# File 'lib/transrate/contig.rb', line 208

def linguistic_complexity k
  return kmer_count(k, @seq.seq)/(4**k).to_f # call to C
end

#longest_orfObject



18
# File 'ext/transrate/transrate.c', line 18

VALUE method_longest_orf(VALUE, VALUE);

#orf_lengthObject

Find the longest orf in the contig



202
203
204
205
206
# File 'lib/transrate/contig.rb', line 202

def orf_length
  return @orf_length if @orf_length
  @orf_length = longest_orf(@seq.seq) # call to C
  return @orf_length
end

#prop_aObject

Proportion of bases that are A



147
148
149
# File 'lib/transrate/contig.rb', line 147

def prop_a
  bases_a / length.to_f
end

#prop_cObject

Proportion of bases that are C



127
128
129
# File 'lib/transrate/contig.rb', line 127

def prop_c
  bases_c / length.to_f
end

#prop_gObject

Proportion of bases that are G



137
138
139
# File 'lib/transrate/contig.rb', line 137

def prop_g
  bases_g / length.to_f
end

#prop_gcObject



174
175
176
# File 'lib/transrate/contig.rb', line 174

def prop_gc
  prop_g + prop_c
end

#prop_nObject



165
166
167
# File 'lib/transrate/contig.rb', line 165

def prop_n
  bases_n / length.to_f
end

#prop_tObject

Proportion of bases that are T



157
158
159
# File 'lib/transrate/contig.rb', line 157

def prop_t
  bases_t / length.to_f
end

#read_metricsObject



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/transrate/contig.rb', line 50

def read_metrics
  read = @coverage ? {
    :uncovered_bases => uncovered_bases,
    :mean_coverage => mean_coverage,
    :in_bridges => in_bridges
  } : {
    :uncovered_bases => "NA",
    :mean_coverage => "NA",
    :in_bridges => in_bridges
  }
end