Class: NpSearch::ArgValidators

Inherits:
Object
  • Object
show all
Defined in:
lib/npsearch/arg_validator.rb

Instance Method Summary collapse

Constructor Details

#initialize(verbose_opt) ⇒ ArgValidators

Changes the logger level to output extra info when the verbose option is

true.


7
8
9
# File 'lib/npsearch/arg_validator.rb', line 7

def initialize(verbose_opt)
  LOG.level = Logger::INFO if verbose_opt == true
end

Instance Method Details

#arg(motif, input, output_dir, orf_min_length, extract_orf, signalp_file, help_banner) ⇒ Object

Runs all the arguments method…



12
13
14
15
16
17
18
19
20
# File 'lib/npsearch/arg_validator.rb', line 12

def arg(motif, input, output_dir, orf_min_length, extract_orf,
        signalp_file, help_banner)
  comp_arg(input, motif, output_dir, extract_orf, help_banner)
  input_type = guess_input_type(input)
  extract_orf_conflict(input_type, extract_orf)
  input_sp_file_conflict(input_type, signalp_file)
  orf_min_length(orf_min_length)
  input_type
end

#comp_arg(input, motif, output_dir, extract_orf, help_banner) ⇒ Object

Ensures that the compulsory input arguments are supplied…



23
24
25
26
27
28
29
30
# File 'lib/npsearch/arg_validator.rb', line 23

def comp_arg(input, motif, output_dir, extract_orf, help_banner)
  comp_arg_error(motif, 'Query Motif ("-m" option)') if extract_orf == false
  comp_arg_error(input, 'Input file ("-i option")')
  comp_arg_error(output_dir, 'Output Folder ("-o" option)')
  return unless input.nil? || (motif.nil? && extract_orf == false)
  puts help_banner
  exit
end

#comp_arg_error(arg, message) ⇒ Object

Ensures that a message is provided for all missing compulsory args.

Run from comp_arg method


34
35
36
# File 'lib/npsearch/arg_validator.rb', line 34

def comp_arg_error(arg, message)
  puts 'Usage Error: No ' + message + ' is supplied' if arg.nil?
end

#extract_orf_conflict(input_type, extract_orf) ⇒ Object

Ensures that the extract_orf option is only used with genetic data.



77
78
79
80
81
82
83
84
# File 'lib/npsearch/arg_validator.rb', line 77

def extract_orf_conflict(input_type, extract_orf)
  return unless input_type == 'protein' && extract_orf == true
  fail ArgumentError('Usage Error: Conflicting arguments detected:' \
                      ' Protein data detected within the input file,' \
                      ' when using the  Extract_ORF option (option' \
                      ' "-e"). This option is only available when' \
                      ' input file contains genetic data.')
end

#guess_input_type(input_file) ⇒ Object

Guesses the type of data within the input file on the first 100 lines of

the file (ignores all identifiers (lines that start with a '>').
It has a 80% threshold.


41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/npsearch/arg_validator.rb', line 41

def guess_input_type(input_file)
  input_file_format(input_file)
  sequences = []
  File.open(input_file, 'r') do |file_stream|
    file_stream.readlines[0..100].each do |line|
      sequences << line.to_s unless line.match(/^>/)
    end
  end
  type = Bio::Sequence.new(sequences).guess(0.8)
  if type == Bio::Sequence::NA
    input_type = 'genetic'
  elsif type == Bio::Sequence::AA
    input_type = 'protein'
  end
  input_type
end

#input_file_format(input_file) ⇒ Object

Ensures that the input file a) exists b) is not empty and c) is a fasta

file. Run from the guess_input_type method.


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/npsearch/arg_validator.rb', line 60

def input_file_format(input_file)
  unless File.exist?(input_file)
    fail ArgumentError("Critical Error: The input file '#{input_file}'" \
                       ' does not exist.')
  end
  if File.zero?(input_file)
    fail ArgumentError("Critical Error: The input file '#{input_file}'" \
                        ' is empty.')
  end
  unless File.probably_fasta?(input_file)
    fail ArgumentError("Critical Error: The input file '#{input_file}'" \
                        ' does not seem to be in fasta format. Only' \
                        ' input files in fasta format are supported.')
  end
end

#input_sp_file_conflict(input_type, signalp_file) ⇒ Object

Ensures that the protein data (or open reading frames) are supplied as

the input file when the signal p output file is passed.


88
89
90
91
92
93
94
95
96
97
# File 'lib/npsearch/arg_validator.rb', line 88

def input_sp_file_conflict(input_type, signalp_file)
  return unless input_type == 'genetic' && !signalp_file.nil?
  fail ArgumentError('Usage Error: Conflicting arguments detected' \
                      ': Genetic data detected within the input file' \
                      ' when using the Signal P Input Option (Option' \
                      ' "-s"). The Signal P input Option requires the' \
                      ' input of two files: the Signal P Script Result' \
                      ' files (at the "-s" option) and the protein' \
                      ' data file used to run the Signal P Script.')
end

#orf_min_length(orf_min_length) ⇒ Object

Ensures that the ORF minimum length is a number. Any digits after the

decimal place are ignored.


101
102
103
104
105
# File 'lib/npsearch/arg_validator.rb', line 101

def orf_min_length(orf_min_length)
  return unless orf_min_length.to_i < 1
  fail ArgumentError('Usage Error: The Open Reading Frames minimum' \
                      ' length can only be a full integer.')
end