Top Level Namespace

Defined Under Namespace

Modules: Bio

Instance Method Summary collapse

Instance Method Details

#show_usageObject



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'bin/biosge.rb', line 90

def show_usage
  prog  = File.basename($0)
  usage = %Q[
Usage:
    % #{prog} \[options...\] -q input_file -t db_file -c 'command --opts \#{query} \#{target}'

Options:
    -q or --query file
       Specify a flatfile including multiple entries.
    -t or --target file
       Specify a database file to be used.
    -c or --command 'string'
       Specify a command line to be executed.
       The following identifiers can be used in the command line 'string'.
         '\#{query}'       fragmented query file name (== input_file)
         '\#{target}'      target database file name
         '\#{work_dir}'    current working directory
         '\#{task_id}'     SGE_TASK_ID
         '\#{slice}'       -- task_id / @@slice (integer >= 0)
         '\#{input_file}'  -- "input/\#{slice}/\#{task_id}"
         '\#{output_file}' -- "output/\#{slice}/\#{task_id}"
         '\#{error_file}'  -- "error/\#{slice}/\#{task_id}"
    -o or --sge_opts 'string'
       Additional options for the qsub command.
         '-l s_vmem=16G -l mem_req=16' to reserve 16GB RAM for each job
         '-l cpu_arch=xeon'            to limit to use xeon CPUs only
       Resource reservation and backfill options:
         '-R y -l s_rt=12:0:0'         to limit max exec time to 12h (SIGUSER1)
         '-R y -l h_rt=12:0:0'         to limit max exec time to 12h (SIGKILL)
         '-R y -pe mpi-fillup 4'       to reserve 4 threads for MPI
    -m or --task_min integer
       Start number of tasks (default is 1, increase to start from halfway).
    -M or --taks_max integer
       Last value (default is a total number of entries in query).
    -s or --task_step integer
       Number of processes per one job (default is 1000). Large value is
       recommended for short tasks with a large number of queries, and
       a small value (minimum is 1) can be used for time consuming tasks
       with a small number of queries.
    -h or --help
       Print this help message.
    --clear
       Remove a SGE script and output/error/log directories
    --clean
       Remove a count file and the extracted input directory
    --distclean
       Exec both of --clear and --clean

Examples:
    % #{prog} -q data/query.pep -t data/target.pep -c 'blastall -p blastp -i \#{query} -d \#{target}' -o '-l cpu_arch=xeon'
    % #{prog} -q data/query.nuc -t /usr/local/db/blast/ncbi/nr -c 'blastall -p blastx -s 10 -i \#{query} -d \#{target}' -o '-l cpu_arch=xeon -l sjob -l s_vmem=4G,mem_req=4'
    % #{prog} -q data/dme.nuc -t data/dme.genome -s 1 -c 'exonerate --bestn 1 --model est2genome --showtargetgff 1 --showvulgar yes \#{query} \#{target}'
    % #{prog} -q data/hsa.pep -t data/Pfam-A.hmm -m 1000 -M 2000 -s 10 -c 'hmmscan --tblout output/\#{slice}/\#{task_id}.tbl \#{target} \#{query}'
    % #{prog} -q data/refseq.gb -c 'bp_genbank2gff3.pl -out stdout \#{query}'
    % #{prog} --distclean

See also:
    http://kanehisa.hgc.jp/~k/sge/

]
  puts usage
  exit
end