Class: Genfrag::App::SearchCommand::ProcessFile

Inherits:
Object
  • Object
show all
Defined in:
lib/genfrag/app/search_command/process_file.rb

Class Method Summary collapse

Class Method Details

.process_db_fasta_file(db_normalized_fasta) ⇒ Object

Process the standardized Fasta file (sqlite3 format)



22
23
24
25
26
27
28
29
# File 'lib/genfrag/app/search_command/process_file.rb', line 22

def process_db_fasta_file(db_normalized_fasta)
  sequences = {}
  db_normalized_fasta.execute( "select * from db_normalized_fasta" ) do |row|
    id = row[0].to_i
    sequences[id] = {:definitions => CSV.parse_line(row[1]), :sequence => row[2]}
  end
  return sequences
end

.process_db_freq_lookup(db_freq_lookup) ⇒ Object

Process the fragment frequency file (sqlite3 format)



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/genfrag/app/search_command/process_file.rb', line 55

def process_db_freq_lookup(db_freq_lookup)
  sizes = {}
  db_freq_lookup.execute( "select * from db_freq_lookup" ) do |row|
    id = row[0]
    size = row[1].to_i
    multiple = []
    row[2].split(', ').each do |a|
      pos = {}
      pos[:offset], pos[:fasta_id] = a.split(' ')
      pos[:offset] = pos[:offset].to_i
      pos[:raw_size] = size.to_i
      pos[:fasta_id] = pos[:fasta_id].to_i
      multiple << pos
    end
    sizes[size] = multiple
  end
  return sizes
end

.process_tdf_adapters(f_adapters, adapter5_name = nil, adapter3_name = nil) ⇒ Object

Process the adapter file (tdf format)



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/genfrag/app/search_command/process_file.rb', line 76

def process_tdf_adapters(f_adapters, adapter5_name=nil, adapter3_name=nil)
  adapter5_sequence = nil
  adapter3_sequence = nil
  adapter5_specificity = nil
  adapter3_specificity = nil
  adapter5_needs_to_be_found = !adapter5_name.nil?
  adapter3_needs_to_be_found = !adapter3_name.nil?
  f_adapters[1..-1].each do |line|
    break if !(adapter5_needs_to_be_found or adapter3_needs_to_be_found)
    line = line.chomp.split("\t")
    next if line.empty?
    name = line[0]
    worksense = line[1][0].chr.to_i
    sequence = line[2].gsub(/\|N*$/i,'')
    specificity = line[3] # what it's supposed to match
    if (worksense != 3 and worksense != 5)
      raise "Unknown worksense value \"#{line[1]}\". First character of column must be a '5' or a '3'."
    end
    
    if adapter5_name and (worksense == 5) and ( name =~ /#{adapter5_name}/i )
      adapter5_sequence = sequence
      adapter5_specificity = specificity
      adapter5_needs_to_be_found = false
    elsif adapter3_name and (worksense == 3) and ( name =~ /#{adapter3_name}/i )
      adapter3_sequence = sequence
      adapter3_specificity = specificity
      adapter3_needs_to_be_found = false
    end
  end
  if ( adapter5_name and adapter5_needs_to_be_found )
    raise "named-adapter5 ('#{adapter5_name}') with the worksense '5' not found."
  elsif ( adapter3_name and adapter3_needs_to_be_found )
    raise "named-adapter3 ('#{adapter3_name}') with the worksense '3' not found."
  end
  return {
    :adapter5_sequence    => adapter5_sequence,
    :adapter5_specificity => adapter5_specificity,
    :adapter3_sequence    => adapter3_sequence,
    :adapter3_specificity => adapter3_specificity
  }
end

.process_tdf_fasta_file(f_normalized_fasta) ⇒ Object

Process the standardized Fasta file (tdf format)



11
12
13
14
15
16
17
18
19
# File 'lib/genfrag/app/search_command/process_file.rb', line 11

def process_tdf_fasta_file(f_normalized_fasta)
  sequences = {}
  f_normalized_fasta[1..-1].each do |line|
    line = line.chomp.split("\t")
    id = line[0].to_i
    sequences[id] = {:definitions => CSV.parse_line(line[1]), :sequence => line[2]}
  end
  return sequences
end

.process_tdf_freq_lookup(f_freq_lookup) ⇒ Object

Process the fragment frequency file (tdf format)



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/genfrag/app/search_command/process_file.rb', line 33

def process_tdf_freq_lookup(f_freq_lookup)
  sizes = {}
  f_freq_lookup[1..-1].each do |line|
    line = line.chomp.split("\t")
    id = line[0]
    size = line[1].to_i
    multiple = []
    line[2].split(', ').each do |a|
      pos = {}
      pos[:offset], pos[:fasta_id] = a.split(' ')
      pos[:offset] = pos[:offset].to_i
      pos[:raw_size] = size.to_i
      pos[:fasta_id] = pos[:fasta_id].to_i
      multiple << pos
    end
    sizes[size] = multiple
  end
  return sizes
end