Class: GeneValidator::OpenReadingFrameValidation

Inherits:
ValidationTest show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/validation_open_reading_frame.rb

Overview

This class contains the methods necessary for checking whether there is a main Open Reading Frame in the predicted sequence

Instance Attribute Summary

Attributes inherited from ValidationTest

#cli_name, #description, #header, #hits, #prediction, #run_time, #short_header, #type, #validation_report

Instance Method Summary collapse

Constructor Details

#initialize(prediction, hits) ⇒ OpenReadingFrameValidation

Initilizes the object Params: prediction: a Sequence object representing the blast query hits: a vector of Sequence objects (representing blast hits)



76
77
78
79
80
81
82
83
84
85
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 76

def initialize(prediction, hits)
  super
  @short_header = 'MainORF'
  @header       = 'Main ORF'
  @description  = 'Check whether there is a single main Open Reading' \
                  ' Frame in the predicted gene. This validation does not' \
                  ' require any BLAST hits.'
  @cli_name     = 'orf'
  @type         = config[:type]
end

Instance Method Details

#get_orfs(_orf_length = 100, prediction = @prediction) ⇒ Object

Find open reading frames in the original sequence Applied only to nucleotide sequences Params: orf_length: minimimum ORF length, default 100 prediction: Sequence object Output: Hash containing the data on ORFs



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 130

def get_orfs(_orf_length = 100, prediction = @prediction)
  '-' if prediction.type != 'nucleotide'

  seq = Bio::Sequence::NA.new(prediction.raw_sequence)

  result = {}
  key = 0
  (1..6).each do |f|
    s = seq.translate(f)
    f = -1 if f == 4
    f = -2 if f == 5
    f = -3 if f == 6
    s.scan(/(\w{30,})/) do |_orf|
      orf_start = $LAST_MATCH_INFO.offset(0)[0] + 1
      orf_end   = $LAST_MATCH_INFO.offset(0)[1] + 1
      coverage = (((orf_end - orf_start) / s.length.to_f) * 100).ceil
      # reduce the orf_end and the translated length by 2% to increase the
      #   width between ORFs on the plot
      chopping = s.length * 0.02
      orf_end = (orf_end.to_f - chopping).floor
      translated_length = (s.length - chopping).ceil
      key += 1
      result[key] = { frame: f, orf_start: orf_start, orf_end: orf_end,
                      coverage: coverage,
                      translated_length: translated_length }
    end
  end
  result
end

#plot_orfs(orfs, translated_length) ⇒ Object

Plots the resions corresponding to open reading frames Param orfs: Hash containing the open reading frame output: location where the plot will be saved in jped file format prediction: Sequence objects

Raises:



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 166

def plot_orfs(orfs, translated_length)
  raise QueryError unless orfs.is_a? Hash

  data = []

  # Create hashes for the Background
  (-3..3).each do |frame|
    next if frame == 0
    data << { 'y' => frame, 'start' => 1, 'stop' => translated_length,
              'color' => 'gray' }
  end

  # Create the hashes for the ORFs...
  orfs.each do |_key, h|
    data << { 'y' => h[:frame], 'start' => h[:orf_start],
              'stop' => h[:orf_end], 'color' => 'red' }
  end

  Plot.new(data,
           :lines,
           'Open Reading Frames in all 6 Frames',
           'Open Reading Frame (Minimimum Length: 30 amino acids),red',
           'Offset in the Prediction',
           'Reading Frame',
           14)
end

#runObject

Check whether there is a main reading frame Output: ORFValidationOutput object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/genevalidator/validation_open_reading_frame.rb', line 91

def run
  if type.to_s != 'nucleotide'
    @validation_report = ValidationReport.new('', :unapplicable)
    return @validation_report
  end

  raise unless prediction.is_a?(Query)

  start = Time.new
  orfs = get_orfs

  longest_orf       = orfs.max_by { |_key, hash| hash[:coverage] }
  longest_orf_frame = longest_orf[1][:frame]
  coverage          = longest_orf[1][:coverage]
  translated_length = longest_orf[1][:translated_length]
  plot1             = plot_orfs(orfs, translated_length)

  @validation_report = ORFValidationOutput.new(@short_header, @header,
                                               @description, orfs,
                                               coverage, longest_orf_frame)
  @validation_report.run_time = Time.now - start

  @validation_report.plot_files.push(plot1)
  @validation_report
rescue StandardError
  @validation_report = ValidationReport.new('Unexpected error', :error,
                                            @short_header, @header,
                                            @description)
  @validation_report.errors.push 'Unexpected Error'
end