Class: GeneValidator::RawSequences

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/get_raw_sequences.rb

Overview

Gets the raw sequences for each hit in a BLAST output file

Class Method Summary collapse

Class Method Details

.index_raw_seq_file(raw_seq_file = ) ⇒ Object

Index the raw sequences file…



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/genevalidator/get_raw_sequences.rb', line 46

def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
  # leave only the identifiers in the fasta description
  content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
  File.open(raw_seq_file, 'w+') { |f| f.write(content) }

  # index the fasta file
  keys   = content.scan(/>(.*)\n/).flatten
  values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }

  # make an index hash
  index_hash = {}
  keys.each_with_index do |k, i|
    start = values[i]
    endf  = i == values.length - 1 ? content.length - 1 : values[i + 1]
    index_hash[k] = [start, endf]
  end

  # create FASTA index
  fname = File.basename(raw_seq_file)
  config[:raw_seq_file_index] = File.join(dirs[:tmp_dir], "#{fname}.idx")
  config[:raw_seq_file_load]  = index_hash

  File.open(config[:raw_seq_file_index], 'w') do |f|
    YAML.dump(index_hash, f)
  end
  content = nil
end

.initObject



18
19
20
21
22
23
24
25
26
27
28
# File 'lib/genevalidator/get_raw_sequences.rb', line 18

def init
  warn '==> Extracting fasta sequences for each BLAST HSP from the' \
       ' BLAST database'

  @blast_file = opt[:blast_xml_file] if opt[:blast_xml_file]
  @blast_file = opt[:blast_tabular_file] if opt[:blast_tabular_file]

  fname = File.basename(@blast_file)
  opt[:raw_sequences] = File.join(dirs[:tmp_dir], "#{fname}.raw_seq")
  @index_file         = File.join(dirs[:tmp_dir], "#{fname}.index")
end

.runObject

Obtains raw_sequences from BLAST output file…



32
33
34
35
36
37
38
39
40
41
# File 'lib/genevalidator/get_raw_sequences.rb', line 32

def run
  init
  if opt[:db].match?(/remote/)
    write_a_raw_seq_file(opt[:raw_sequences], 'remote')
  else
    write_an_index_file(@index_file, 'local')
    FetchRawSequences.extract_from_local_db(true, nil, @index_file)
  end
  index_raw_seq_file(opt[:raw_sequences])
end