Class: GeneValidator::LengthRankValidation

Inherits:
ValidationTest show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/validation_length_rank.rb

Overview

This class contains the methods necessary for length validation by ranking the hit lengths

Constant Summary collapse

THRESHOLD =
20

Instance Attribute Summary

Attributes inherited from ValidationTest

#cli_name, #description, #header, #hits, #prediction, #run_time, #short_header, #type, #validation_report

Instance Method Summary collapse

Constructor Details

#initialize(prediction, hits) ⇒ LengthRankValidation

Initializes the object Params: prediction: a Sequence object representing the blast query hits: a vector of Sequence objects (representing blast hits)



94
95
96
97
98
99
100
101
# File 'lib/genevalidator/validation_length_rank.rb', line 94

def initialize(prediction, hits)
  super
  @short_header = 'LengthRank'
  @header       = 'Length Rank'
  @description  = 'Check whether the rank of the prediction length lies' \
                  ' among 80% of all the BLAST hit lengths.'
  @cli_name     = 'lenr'
end

Instance Method Details

#run(hits = @hits, prediction = @prediction) ⇒ Object

Calculates a percentage based on the rank of the prediction among the hit lengths Params: hits (optional): a vector of Sequence objects prediction (optional): a Sequence object Output: LengthRankValidationOutput object



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/genevalidator/validation_length_rank.rb', line 111

def run(hits = @hits, prediction = @prediction)
  raise NotEnoughHitsError if hits.length < opt[:min_blast_hits]
  raise unless prediction.is_a?(Query) && hits[0].is_a?(Query)

  start = Time.now

  hits_lengths = hits.map { |x| x.length_protein.to_i }
                     .sort { |a, b| a <=> b }

  no_of_hits   = hits_lengths.length
  median       = hits_lengths.median.round
  query_length = prediction.length_protein
  mean         = hits_lengths.mean.round

  smallest_hit = hits_lengths[0]
  largest_hit  = hits_lengths[-1]

  if hits_lengths.standard_deviation <= 5
    msg = ''
    percentage = 100
  else
    if query_length < median
      extreme_hits = hits_lengths.find_all { |x| x < query_length }.length
      percentage   = ((extreme_hits.to_f / no_of_hits) * 100).round
      msg          = 'too&nbsp;short'
    else
      extreme_hits = hits_lengths.find_all { |x| x > query_length }.length
      percentage   = ((extreme_hits.to_f / no_of_hits) * 100).round
      msg          = 'too&nbsp;long'
    end
  end

  msg = '' if percentage >= THRESHOLD

  @validation_report = LengthRankValidationOutput.new(@short_header,
                                                      @header, @description,
                                                      msg, query_length,
                                                      no_of_hits, median,
                                                      mean, smallest_hit,
                                                      largest_hit,
                                                      extreme_hits,
                                                      percentage)
  @validation_report.run_time = Time.now - start
  @validation_report
rescue NotEnoughHitsError
  @validation_report = ValidationReport.new('Not enough evidence', :warning,
                                            @short_header, @header,
                                            @description)
rescue StandardError
  @validation_report = ValidationReport.new('Unexpected error', :error,
                                            @short_header, @header,
                                            @description)
  @validation_report.errors.push 'Unexpected Error'
end