Class: Bio::FinishM::ORFsFinder

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/finishm/orfs_finder.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{
:min_orf_length => 96
}

Instance Method Summary collapse

Methods included from Logging

#log

Instance Method Details

#add_options(optparse_object, options) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/finishm/orfs_finder.rb', line 8

def add_options(optparse_object, options)
  options.merge! Bio::FinishM::Visualise::DEFAULT_OPTIONS
  options.merge! DEFAULT_OPTIONS
  optparse_object.banner = "\nUsage: finishm find_orfs [--orf-amino-acids OUTPUT_FAA --orf-nucleotides OUTPUT_FNA]

  Find possible open reading frames in assembly graph
  \n\n"

  optparse_object.separator "\nOutput sequence files\n\n"
  optparse_object.on("--orf-amino-acids OUTPUT_FAA", "Output ORF amino acid sequences [default: orf.faa unless --orf-nucleotides is specified]") do |arg|
    options[:output_faa] = arg
  end
  optparse_object.on("--orf-nucleotides OUTPUT_FNA", "Output ORF nucleotide sequences [default: orf.fna unless --orf-amino-acids is specified]") do |arg|
    options[:output_fna] = arg
  end

  optparse_object.separator "\nInput genome information"
  optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
  Bio::FinishM::ReadInput.new.add_options(optparse_object, options)

  optparse_object.separator "\nOptional arguments:\n\n"
  optparse_object.on("--min-orf-length", "Minimum ORF length [default: 96]") do |arg|
    length = arg.to_i
    if length.to_s != arg or length.nil? or length < 1
      raise "Unable to parse minimum orf length parameter #{arg}, cannot continue"
    end
    options[:min_orf_length] = length
  end


  optparse_object.separator "\nOptional graph-exploration arguments:\n\n"
  Bio::FinishM::Visualise.new.add_probe_options(optparse_object, options)

  optparse_object.separator "\nOptional graph-related arguments:\n\n"
  Bio::FinishM::GraphGenerator.new.add_options(optparse_object, options)
end

#find_orfs_in_graph(finishm_graph, initial_onodes, options = {}) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/finishm/orfs_finder.rb', line 94

def find_orfs_in_graph(finishm_graph, initial_onodes, options={})
  initial_paths = initial_onodes.collect do |onode|
    path = Bio::Velvet::Graph::OrientedNodeTrail.new
    path.add_oriented_node onode
    path
  end

  orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
  orf_trails = orfer.find_orfs_in_graph(finishm_graph.graph, initial_paths,
      options[:min_orf_length], options[:range])

  orfer.orf_sequences_from_trails(orf_trails, options[:min_orf_length])
end

#orf_to_settable(path, start_index, start_offset, end_index, end_offset) ⇒ Object



127
128
129
# File 'lib/finishm/orfs_finder.rb', line 127

def orf_to_settable(path, start_index, start_offset, end_index, end_offset)
  [path[start_index..end_index].collect{|onode| onode.to_settable},[start_offset, end_offset]]
end

#run(options, argv) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/finishm/orfs_finder.rb', line 52

def run(options, argv)
  read_input = Bio::FinishM::ReadInput.new
  read_input.parse_options options

  visualise = Bio::FinishM::Visualise.new

  if options[:interesting_probes] or options[:interesting_probe_names]
    finishm_graph, interesting_node_ids = visualise.generate_graph_from_probes(read_input, options)
  elsif options[:interesting_nodes]
    finishm_graph = visualise.generate_graph_from_nodes(read_input, options)
    interesting_node_ids = options[:interesting_nodes]
  elsif options[:assembly_files]
    finishm_graph, interesting_node_ids, = visualise.generate_graph_from_assembly(read_input, options)
  else
    finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
  end

  if options[:graph_search_leash_length]
    #log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] }.."
    nodes_within_leash, node_ids_at_leash = visualise.get_nodes_within_leash(finishm_graph, interesting_node_ids, options)
    log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{options[:graph_search_leash_length] }bp leash" if options[:graph_search_leash_length]

    options[:range] = nodes_within_leash
  else
    options[:range] = finishm_graph.graph.nodes
  end

  initial_onodes = Bio::FinishM::PathCounter.new.get_leash_start_nodes(finishm_graph, options[:range])
  orfs = find_orfs_in_graph(finishm_graph, initial_onodes, options)
  log.info "Found #{orfs.length} open reading frames longer than #{options[:min_orf_length]}."
  if not options[:output_fna] and not options[:output_faa]
      options[:output_fna] = 'orfs.fna'
  end

  if options[:output_fna]
      write_orfs_to_file(orfs, options[:output_fna])
  end
  if options[:output_faa]
      write_orfs_to_file(orfs, options[:output_faa], translate=true)
  end
end

#validate_options(options, argv) ⇒ Object



45
46
47
48
49
50
# File 'lib/finishm/orfs_finder.rb', line 45

def validate_options(options, argv)
  visualise = Bio::FinishM::Visualise.new
  return visualise.validate_argv_length(argv) ||
    visualise.validate_probe_options(options) ||
    visualise.validate_assembly_options(options)
end

#write_orfs_to_file(found_orfs, orfs_file, translate = false) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/finishm/orfs_finder.rb', line 108

def write_orfs_to_file(found_orfs, orfs_file, translate=false)
  if translate
      translator = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
  end
  File.open(orfs_file,'w') do |f|
    counter = 0
    found_orfs.each do |name_and_sequence|
      counter += 1
      f.puts ">finishm_orf_#{counter} #{name_and_sequence[0]}"
      if translate
        f.puts translator.sequence2AA(name_and_sequence[1][0...-3])
      else
        f.puts name_and_sequence[1]
      end
    end
  end
end