Class: GeneValidator::OpenReadingFrameValidation

Inherits:
ValidationTest show all
Defined in:
lib/genevalidator/validation_open_reading_frame.rb

Overview

This class contains the methods necessary for checking whether there is a main Open Reading Frame in the predicted sequence

Instance Attribute Summary collapse

Attributes inherited from ValidationTest

#cli_name, #description, #header, #hits, #prediction, #running_time, #short_header, #type, #validation_report

Instance Method Summary collapse

Constructor Details

#initialize(type, prediction, hits, filename) ⇒ OpenReadingFrameValidation

Initilizes the object Params: type: type of the predicted sequence (:nucleotide or :protein) prediction: a Sequence object representing the blast query hits: a vector of Sequence objects (representing blast hits) plot_filename: name of the input file, used when making plot files



69
70
71
72
73
74
75
76
77
78
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 69

def initialize(type, prediction, hits, filename)
  super
  @short_header = 'ORF'
  @header       = 'Main ORF'
  @description  = 'Check whether there is a single main Open Reading' \
                  ' Frame in the predicted gene. Applicable only for' \
                  ' nucleotide queries.'
  @cli_name     = 'orf'
  @filename     = filename
end

Instance Attribute Details

#filenameObject (readonly)

Returns the value of attribute filename.



60
61
62
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 60

def filename
  @filename
end

Instance Method Details

#get_orfs(_orf_length = 100, prediction = @prediction) ⇒ Object

Find open reading frames in the original sequence Applied only to nucleotide sequences Params: orf_length: minimimum ORF length, default 100 prediction: Sequence object Output: Hash containing the data on ORFs



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 124

def get_orfs(_orf_length = 100, prediction = @prediction)
  '-' if prediction.type != 'nucleotide'

  seq = Bio::Sequence::NA.new(prediction.raw_sequence)

  result = {}
  key = 0
  (1..6).each do |f|
    s = seq.translate(f)
    f = -1 if f == 4
    f = -2 if f == 5
    f = -3 if f == 6
    s.scan(/(\w{30,})/) do |_orf|
      orf_start = $~.offset(0)[0] + 1
      orf_end   = $~.offset(0)[1] + 1
      coverage = (((orf_end - orf_start) / s.length.to_f) * 100).ceil
      # reduce the orf_end and the translated length by 2% to increase the
      #   width between ORFs on the plot
      chopping = s.length * 0.02
      orf_end = (orf_end.to_f - chopping).floor
      translated_length = (s.length - chopping).ceil
      key += 1
      result[key] = { frame: f, orf_start: orf_start, orf_end: orf_end,
                      coverage: coverage,
                      translated_length: translated_length }
    end
  end
  result
end

#plot_orfs(orfs, translated_length, output = "#{@filename}_orfs.json") ⇒ Object

Plots the resions corresponding to open reading frames Param orfs: Hash containing the open reading frame output: location where the plot will be saved in jped file format prediction: Sequence objects



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 160

def plot_orfs(orfs, translated_length, output = "#{@filename}_orfs.json")
  fail QueryError unless orfs.is_a? Hash

  results = []

  # Create hashes for the Background
  (-3..3).each do |frame|
    next if frame == 0
    results << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
                 'color' => 'gray' }
  end

  # Create the hashes for the ORFs...
  orfs.each do |_key, h|
    results << { 'y' => h[:frame], 'start' => h[:orf_start],
                 'stop' => h[:orf_end], 'color' => 'red' }
  end

  f = File.open(output, 'w')
  f.write((results).to_json)
  f.close

  Plot.new(output.scan(%r{([^/]+)$})[0][0],
           :lines,
           'Open Reading Frames in all 6 Frames',
           'Open Reading Frame (Minimimum Length: 30 amino acids),red',
           'Offset in the Prediction',
           'Reading Frame',
           14)
end

#runObject

Check whether there is a main reading frame Output: ORFValidationOutput object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 84

def run
  if type.to_s != 'nucleotide'
    @validation_report = ValidationReport.new('', :unapplicable)
    return @validation_report
  end

  fail Exception unless prediction.is_a?(Sequence)

  start = Time.new
  orfs = get_orfs

  longest_orf       = orfs.sort_by { |_key, hash| hash[:coverage] }.last
  longest_orf_frame = longest_orf[1][:frame]
  coverage          = longest_orf[1][:coverage]
  translated_length = longest_orf[1][:translated_length]
  plot1             = plot_orfs(orfs, translated_length)

  @validation_report = ORFValidationOutput.new(@short_header, @header,
                                               @description, orfs,
                                               coverage, longest_orf_frame)
  @validation_report.running_time = Time.now - start

  @validation_report.plot_files.push(plot1)
  @validation_report
rescue Exception
  @validation_report = ValidationReport.new('Unexpected error', :error,
                                            @short_header, @header,
                                            @description, @approach,
                                            @explanation, @conclusion)
  @validation_report.errors.push 'Unexpected Error'
end