Class: TreeBunchStarter

Inherits:
Object
  • Object
show all
Defined in:
lib/starter.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ TreeBunchStarter

Returns a new instance of TreeBunchStarter.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/starter.rb', line 54

def initialize(opts)
  @phylip = opts[:phylip]
  @base_dir = opts[:base_dir]
  @prev_dir = opts[:prev_dir]
  @update_id = opts[:update_id] || 0
  @num_threads = opts[:num_threads] || 0 
  # create dirs if required
  @alignment_dir = File.join @base_dir, "alignments"
  @parsimony_trees_dir = File.join @base_dir, "parsimony_trees"
  @parsimony_trees_out_dir = File.join @parsimony_trees_dir, "output"
  @ml_trees_dir = File.join @base_dir, "ml_trees"
  @bestML_trees_dir = File.join @base_dir, "best_ml_trees"
  # the new phylip
  @phylip_updated = File.join @alignment_dir, "phy_#{@update_id.to_s}"
  # defaults
  @num_parsi_trees = 4 
  @num_bestML_trees = @num_parsi_trees / 2 
  @CAT_topology_bunch = File.join @ml_trees_dir, "CAT_topology_bunch.nw"
  @CAT_topology_bunch_order = File.join @ml_trees_dir, "CAT_topology_bunch_order.txt"
  @bestML_bunch = File.join @bestML_trees_dir, "best_bunch.nw"
  @prev_bestML_bunch = File.join @prev_dir, "best_ml_trees", "best_bunch.nw" unless @prev_dir.nil?
  @cluster_batch = opts[:cluster_batch] 
  @logpath = File.join @base_dir, "starter.log"
end

Instance Attribute Details

#cluster_batchObject (readonly)

Given an initial alignment, it creates a initial bunch of ML trees in bunch_0 dir should log results



53
54
55
# File 'lib/starter.rb', line 53

def cluster_batch
  @cluster_batch
end

Instance Method Details

#add_update(opts) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/starter.rb', line 107

def add_update(opts)
  check_options(opts)
  begin
    num_parsi_trees = opts[:num_parsi_trees] || @num_parsi_trees
    num_bestML_trees = opts[:num_bestML_trees] || @num_bestML_trees
    # prepare the parsimony starting trees
    raise "prev bunch not ready #{@prev_bestML_bunch}" unless File.exist?(@prev_bestML_bunch)
    last_best_bunch = NewickFile.new(@prev_bestML_bunch)
    # TODO this save_each_newick_as should be wrapped around sth. that considers a list of outliers and
    # prunes out the ouliers before saving as "prev_parsi_tree" to make it compatible with the next round
    last_best_bunch.save_each_newick_as(File.join(@parsimony_trees_dir, 'prev_parsi_tree'), "nw") 
    prev_trees = Dir.entries(@parsimony_trees_dir).select{|f| f =~ /^prev_parsi_tree/}
    if num_bestML_trees > num_parsi_trees * prev_trees.size
      raise "#bestML trees (#{num_bestML_trees}) is too higher than trees from previous round"
    end
    unless @cluster_batch.empty?
      logput "Exp #{opts[:exp_name]}, your cluster will take care of this update no #{@update_id}. stay tuned"
            c = CycleController.new(:iter => @update_id, 
		                              :phy => @phylip_updated, 
                                    :num_parsi_trees => num_parsi_trees, 
                                    :num_ptrees => num_parsi_trees * prev_trees.size, 
                                    :num_bestML_trees => num_bestML_trees,
                                    :base_dir => @base_dir,
                                    :exp_name => opts[:exp_name]
                                   )
            c.run_as_batch(@cluster_batch, @logpath)
      "cluster"
    else
      logput "****** Start update no #{@update_id} ********"
      logput "step 1 of 3 : Parsimony starting trees #{num_parsi_trees} each\n----"
      update_parsimony_trees(num_parsi_trees, prev_trees)
      # raxml light phase (2/3): use threads here? / each tree could be computed in paralell
      logput "step 2 of 3 : ML trees\n----"
      # generate_ML_trees
      generate_ML_trees(@parsimony_trees_out_dir, @phylip_updated)
      # raxml scoring of initial bunch (needs to be done after step 2, or not?)
      logput "step 3 of 3 : Score bunch of initial ML trees and select best #{num_bestML_trees}\n----"
      best_lh = score_ML_trees(num_bestML_trees, @phylip_updated) 
      logput "Bunch of initial ML trees #{num_bestML_trees}, ready at #{@bestML_bunch}\n----"
      best_lh
    end
  rescue Exception => e
    logput(e, error = true)
    raise e
  end
end

#generate_initial_bunch(opts) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/starter.rb', line 170

def generate_initial_bunch(opts)
  check_options(opts)
  begin
    num_parsi_trees = opts[:num_parsi_trees] || @num_parsi_trees
    num_bestML_trees = opts[:num_bestML_trees] || @num_bestML_trees
    if num_bestML_trees > num_parsi_trees
      raise "#bestML trees (#{num_bestML_trees}) cant be higher than #parsi trees(#{num_parsi_trees})"
    end
    # phases 2 and 3 can be done in paralell
    unless @cluster_batch.empty?
            logput "your cluster will take care of this start #{@update_id}. Stay tuned"
            c = CycleController.new(:iter => 0, 
                                    :phy => @phylip, 
                                    :num_parsi_trees => num_parsi_trees, 
                                    :num_bestML_trees => num_bestML_trees,
                                    :base_dir => @base_dir,
                                    :exp_name => opts[:exp_name]
                                   )
            c.run_as_batch(@cluster_batch, @logpath)
            "cluster"
    else
     logput "Start generating initial bunch"
     logput "step 1 of 3 : Parsimony starting trees #{num_parsi_trees}\n----"
     generate_parsimony_trees(num_parsi_trees)
     # raxml light phase (2/3): use threads here? / each tree could be computed in paralell
     logput "step 2 of 3 : ML trees\n----"
     generate_ML_trees(@parsimony_trees_dir, @phylip)
     # raxml scoring of initial bunch (needs to be done after step 2, or not?)
     logput "step 3 of 3 : Score bunch of initial ML trees and select best #{num_bestML_trees}\n----"
     best_lh = score_ML_trees(num_bestML_trees, @phylip) 
     logput "Bunch of initial ML trees #{num_bestML_trees}, ready at #{@bestML_bunch}\n----"
     best_lh
    end
  rescue Exception => e
    logput(e, error = true)
    raise e
  end
end

#logput(msg, error = false) ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/starter.rb', line 78

def logput(msg, error = false)
  @logger ||= Logger.new(@logpath)
  if error
    @logger.error msg
  else
    @logger.info msg
  end
  puts msg
end

#ready?Boolean

Returns:

  • (Boolean)


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/starter.rb', line 87

def ready?
  ready = true
  dirs = [@alignment_dir, @parsimony_trees_dir, @parsimony_trees_out_dir,@ml_trees_dir, @bestML_trees_dir]
  dirs.each do |d|
    if not File.exist?(d)
      FileUtils.mkdir_p d
      logput "Created #{d}"
    else
      logput "Exists #{d}"
      ready = false
    end
  end
  if @update_id == 0
    FileUtils.cp @phylip, @alignment_dir 
  else
    logput "Copying new update alignment (not expanding) from #{@phylip} to #{@phylip_updated}"
    FileUtils.cp @phylip, @phylip_updated 
  end
  ready
end

#search_std(num_gamma_trees = nil) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/starter.rb', line 153

def search_std(num_gamma_trees = nil)
      search_opts = {
        :phylip => @phylip,
        :outdir => @ml_trees_dir,
        :num_gamma_trees => num_gamma_trees || 1, 
        :stderr => File.join(@ml_trees_dir, "err"),
        :stdout => File.join(@ml_trees_dir, "info"),
        :name => "std_GAMMA_search" 
      }
      search_opts.merge!({:num_threads => @num_threads}) if @num_threads.to_i > 0
      r = RaxmlGammaSearch.new(search_opts)
      logput "Start ML search from scratch with #{num_gamma_trees} trees"
      r.run
      bestLH = File.open(r.stdout).readlines.find{|l| l =~ /^Final GAMMA-based Score of best/}.chomp.split("tree").last
      logput "Done ML search from scratch with #{num_gamma_trees} trees"
      bestLH
end