Class: GeneValidator::FetchRawSequences

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/get_raw_sequences.rb

Class Method Summary collapse

Class Method Details

.batch_raw_seq_cmd(index_file) ⇒ Object



186
187
188
189
# File 'lib/genevalidator/get_raw_sequences.rb', line 186

def batch_raw_seq_cmd(index_file)
  "blastdbcmd -entry_batch '#{index_file}' -db '#{opt[:db]}'" \
  " -outfmt '%f' -out '#{opt[:raw_sequences]}'"
end

.extract_from_index(identifier) ⇒ Object

Gets raw sequence by fasta identifier from a fasta index file Params: identifier: String Output: String with the nucleotide sequence corresponding to the identifier



158
159
160
161
162
163
164
165
# File 'lib/genevalidator/get_raw_sequences.rb', line 158

def extract_from_index(identifier)
  idx         = config[:raw_seq_file_load][identifier]
  query       = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0])
  parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
  parse_query[1].delete("\n")
rescue StandardError
  'Error' # return error so it can then try alternative fetching method.
end

.extract_from_local_db(batch, accno = nil, idx_file = nil) ⇒ Object

Gets raw sequence by accession number from a givem database Params: accno: accession number as String db: database as String Output: String with the nucleotide sequence corresponding to the accession



174
175
176
177
178
179
180
181
182
183
184
# File 'lib/genevalidator/get_raw_sequences.rb', line 174

def extract_from_local_db(batch, accno = nil, idx_file = nil)
  cmd = batch ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
  efile = Tempfile.new('blast_out')
  `#{cmd} &>#{efile.path}`
  raw_seqs = efile.read
  failed_raw_sequences(raw_seqs) if batch && raw_seqs =~ /Error/
  raw_seqs # when obtaining a single raw_seq, this contains the sequence
ensure
  efile.close
  efile.unlink
end

.extract_from_remote_db(accession, db_seq_type = 'protein') ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/genevalidator/get_raw_sequences.rb', line 208

def extract_from_remote_db(accession, db_seq_type = 'protein')
  uri     = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
            "db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/"
  result  = Net::HTTP.get(URI.parse(uri))
  query   = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1]
  web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1]
  uri     = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
            'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
            "db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}"
  result  = Net::HTTP.get(URI.parse(uri))
  result[0..result.length - 2]
end

.failed_raw_sequences(blast_output) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/genevalidator/get_raw_sequences.rb', line 195

def failed_raw_sequences(blast_output)
  blast_output.each_line do |line|
    acc = line.match(/Error: (\w+): OID not found/)[1]
    warn "\nCould not find sequence '#{acc.chomp}' within the" \
                 ' BLAST database.'
    warn "Attempting to obtain sequence '#{acc.chomp}' from" \
                 ' remote BLAST databases.'
    File.open(opt[:raw_sequences], 'a+') do |f|
      f.puts extract_from_remote_db(acc)
    end
  end
end

.run(identifier, accession) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/genevalidator/get_raw_sequences.rb', line 137

def run(identifier, accession)
  # first try to extract from previously created raw_sequences HASH
  raw_seq = extract_from_index(identifier) if opt[:raw_sequences]
  # then try to just extract that sequence based on accession.
  if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
    raw_seq = extract_from_local_db(false, accession)
  end
  # then try to extract from remote database
  if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/i)
    raw_seq = extract_from_remote_db(accession)
  end
  # return nil if the raw_sequence still produces an error.
  raw_seq =~ /Error/i ? nil : raw_seq
end

.single_raw_seq_cmd(accession) ⇒ Object



191
192
193
# File 'lib/genevalidator/get_raw_sequences.rb', line 191

def single_raw_seq_cmd(accession)
  "blastdbcmd -entry '#{accession}' -db '#{opt[:db]}' -outfmt '%s'"
end