Module: GeneValidator

Defined in:
lib/genevalidator.rb,
lib/genevalidator/hsp.rb,
lib/genevalidator/blast.rb,
lib/genevalidator/query.rb,
lib/genevalidator/output.rb,
lib/genevalidator/version.rb,
lib/genevalidator/ext/array.rb,
lib/genevalidator/exceptions.rb,
lib/genevalidator/validation.rb,
lib/genevalidator/output_files.rb,
lib/genevalidator/arg_validation.rb,
lib/genevalidator/clusterization.rb,
lib/genevalidator/tabular_parser.rb,
lib/genevalidator/validation_test.rb,
lib/genevalidator/get_raw_sequences.rb,
lib/genevalidator/validation_report.rb,
lib/genevalidator/json_to_gv_results.rb,
lib/genevalidator/validation_maker_qi.rb,
lib/genevalidator/validation_alignment.rb,
lib/genevalidator/validation_gene_merge.rb,
lib/genevalidator/validation_duplication.rb,
lib/genevalidator/validation_length_rank.rb,
lib/genevalidator/validation_length_cluster.rb,
lib/genevalidator/validation_open_reading_frame.rb,
lib/genevalidator/validation_blast_reading_frame.rb

Overview

Top level module / namespace.

Defined Under Namespace

Modules: ExtraArrayMethods Classes: AliasDuplicationError, AlignmentValidation, AlignmentValidationOutput, BLASTDBError, BlastRFValidationOutput, BlastReadingFrameValidation, BlastUtils, ClasspathError, Cluster, DuplicationValidation, DuplicationValidationOutput, FetchRawSequences, FileNotFoundException, GVArgValidation, GeneMergeValidation, GeneMergeValidationOutput, HierarchicalClusterization, Hsp, InconsistentTabularFormat, JsonToGVResults, LengthClusterValidation, LengthClusterValidationOutput, LengthRankValidation, LengthRankValidationOutput, MakerQIValidation, MakerQIValidationOutput, NoInternetError, NoMafftInstallationError, NoPIdentError, NoValidationError, NotEnoughEvidence, NotEnoughHitsError, ORFValidationOutput, OpenReadingFrameValidation, OtherError, Output, OutputFiles, Pair, Pair1, PairCluster, Plot, Query, QueryError, RawSequences, ReadingFrameError, ReportClassError, SequenceTypeError, TabularParser, Validate, ValidationClassError, ValidationReport, ValidationTest, Validations

Constant Summary collapse

VERSION =
'2.1.5'.freeze

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.configObject

Returns the value of attribute config.



17
18
19
# File 'lib/genevalidator.rb', line 17

def config
  @config
end

.dirsObject

Returns the value of attribute dirs.



17
18
19
# File 'lib/genevalidator.rb', line 17

def dirs
  @dirs
end

.mutexObject

Returns the value of attribute mutex.



22
23
24
# File 'lib/genevalidator.rb', line 22

def mutex
  @mutex
end

.mutex_arrayObject

Returns the value of attribute mutex_array.



22
23
24
# File 'lib/genevalidator.rb', line 22

def mutex_array
  @mutex_array
end

.optObject

Returns the value of attribute opt.



17
18
19
# File 'lib/genevalidator.rb', line 17

def opt
  @opt
end

.overviewObject

Returns the value of attribute overview.



17
18
19
# File 'lib/genevalidator.rb', line 17

def overview
  @overview
end

.query_idxObject (readonly)

array of indexes for the start offsets of each query in the fasta file



21
22
23
# File 'lib/genevalidator.rb', line 21

def query_idx
  @query_idx
end

.raw_seq_file_indexObject (readonly)

Returns the value of attribute raw_seq_file_index.



18
19
20
# File 'lib/genevalidator.rb', line 18

def raw_seq_file_index
  @raw_seq_file_index
end

.raw_seq_file_loadObject (readonly)

Returns the value of attribute raw_seq_file_load.



19
20
21
# File 'lib/genevalidator.rb', line 19

def raw_seq_file_load
  @raw_seq_file_load
end

Class Method Details

.extract_input_fasta_sequence(index) ⇒ Object



93
94
95
96
97
# File 'lib/genevalidator.rb', line 93

def extract_input_fasta_sequence(index)
  start_offset = @query_idx[index + 1] - @query_idx[index]
  end_offset = @query_idx[index]
  IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
end

.init(opt, start_idx = 1) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/genevalidator.rb', line 24

def init(opt, start_idx = 1)
  warn '==> Analysing input arguments'
  @opt = opt
  GVArgValidation.validate_args # validates @opt
  number_of_sequences = index_the_input

  @config = setup_config(start_idx, number_of_sequences)
  @dirs = setup_dirnames(@opt[:input_fasta_file])

  @mutex       = Mutex.new
  @mutex_array = Mutex.new

  resume_from_previous_run(opt[:resumable]) unless opt[:resumable].nil?

  RawSequences.index_raw_seq_file if @opt[:raw_sequences]
end

.parse_blast_output_fileObject

Params: output: filename or stream, according to the type type: file or stream Returns an iterator..



68
69
70
71
72
73
74
75
# File 'lib/genevalidator.rb', line 68

def parse_blast_output_file
  if @opt[:blast_xml_file]
    Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
  else
    TabularParser.new
  end
  ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
end

.produce_outputObject



99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/genevalidator.rb', line 99

def produce_output
  @overview = Output.generate_overview(@config[:json_output],
                                       @opt[:min_blast_hits])
  eval_text = Output.generate_evaluation_text(@overview)
  Output.print_console_footer(eval_text, @opt)

  output_files = OutputFiles.new
  output_files.write_json
  output_files.write_html(eval_text)
  output_files.write_csv
  output_files.write_summary
  output_files.print_best_fasta
end

.runObject

Parse the blast output and run validations



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/genevalidator.rb', line 43

def run
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
  # if no BLAST OUTPUT file provided...
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
    blast_xml_fname = "#{dirs[:filename]}.blast_xml"
    opt[:blast_xml_file] = File.join(dirs[:tmp_dir], blast_xml_fname)
    BlastUtils.run_blast_on_input_file
  end
  # Obtain fasta file of all BLAST hits if running align or dup validations
  if @opt[:validations].include?('align') ||
     @opt[:validations].include?('dup')
    RawSequences.run unless @opt[:raw_sequences]
  end
  # Run Validations
  iterator = parse_blast_output_file
  Validations.new.run_validations(iterator)
  produce_output
  print_directories_locations
end

.setup_dirnames(input_file) ⇒ Object

Also called by json_to_gv script



78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/genevalidator.rb', line 78

def setup_dirnames(input_file)
  fname = File.basename(input_file, File.extname(input_file))
  out_dir = setup_output_dir(fname)
  { filename: fname,
    output_dir: out_dir,
    tmp_dir: File.join(out_dir, 'tmp'),
    json_dir:  File.join(out_dir, 'tmp/json'),
    html_file: File.join(out_dir, "#{fname}_results*.html"),
    json_file: File.join(out_dir, "#{fname}_results.json"),
    csv_file: File.join(out_dir, "#{fname}_results.csv"),
    summary_file: File.join(out_dir, "#{fname}_summary.csv"),
    fasta_file: File.join(out_dir, "#{fname}_results.fa"),
    aux_dir: File.expand_path('../aux', __dir__) }
end