Module: MiGA::Cli::Action::Download::Ncbi
- Includes:
- Base
- Included in:
- NcbiGet
- Defined in:
- lib/miga/cli/action/download/ncbi.rb
Overview
Helper module including download functions for the ncbi_get action
Instance Method Summary
collapse
Methods included from Base
#cli_base_flags, #cli_filters, #cli_save_actions, #discard_excluded, #download_entries, #finalize_tasks, #generic_perform, #impose_limit, #load_tasks, #save_entry, #unlink_entries
Instance Method Details
#cli_name_modifiers(opt) ⇒ Object
31
32
33
34
35
36
37
38
39
40
41
42
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 31
def cli_name_modifiers(opt)
opt.on(
'--no-version-name',
'Do not add sequence version to the dataset name',
'Only affects --complete and --chromosome'
) { |v| cli[:add_version] = v }
cli.opt_flag(
opt, 'legacy-name',
'Use dataset names based on chromosome entries instead of assembly',
:legacy_name
)
end
|
#cli_task_flags(opt) ⇒ Object
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 11
def cli_task_flags(opt)
cli.opt_flag(
opt, 'reference',
'Download all reference genomes (ignore any other status)'
)
cli.opt_flag(opt, 'complete', 'Download complete genomes')
cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
opt.on(
'--all',
'Download all genomes (in any status)'
) do
cli[:complete] = true
cli[:chromosome] = true
cli[:scaffold] = true
cli[:contig] = true
end
end
|
#parse_csv_as_datasets(doc) ⇒ Object
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 69
def parse_csv_as_datasets(doc)
ds = {}
CSV.parse(doc, headers: true).each do |r|
asm = r['assembly']
next if asm.nil? || asm.empty? || asm == '-'
rep = remote_row_replicons(r)
n = remote_row_name(r, rep, asm)
ds[n] = {
ids: [asm], db: :assembly, universe: :ncbi,
md: {
type: :genome, ncbi_asm: asm, strain: r['strain']
}
}
ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
unless r['release_date'].nil?
ds[n][:md][:release_date] = Time.parse(r['release_date']).to_s
end
end
ds
end
|
#remote_list ⇒ Object
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 54
def remote_list
doc =
if cli[:ncbi_table_file]
cli.say 'Reading genome list from file'
File.open(cli[:ncbi_table_file], 'r')
else
cli.say 'Downloading genome list'
url = remote_list_url
MiGA::RemoteDataset.download_url(url)
end
ds = parse_csv_as_datasets(doc)
doc.close if cli[:ncbi_table_file]
ds
end
|
#remote_list_url ⇒ Object
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 114
def remote_list_url
url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
url_param = {
q: '[display()].' \
'from(GenomeAssemblies).' \
'usingschema(/schema/GenomeAssemblies).' \
'matching(tab==["Prokaryotes"] and q=="' \
"#{cli[:taxon]&.tr('"', "'")}\"",
fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
'level|level,release_date|release_date,strain|strain',
nolimit: 'on'
}
if cli[:reference]
url_param[:q] += ' and refseq_category==["representative"]'
else
status = {
complete: 'Complete',
chromosome: ' Chromosome', scaffold: 'Scaffold',
contig: 'Contig'
}.map { |k, v| '"' + v + '"' if cli[k] }.compact.join(',')
url_param[:q] += ' and level==[' + status + ']'
end
url_param[:q] += ')'
url_base + URI.encode_www_form(url_param)
end
|
#remote_row_name(r, rep, asm) ⇒ Object
102
103
104
105
106
107
108
109
110
111
112
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 102
def remote_row_name(r, rep, asm)
return r['#organism'].miga_name if cli[:legacy_name] && cli[:reference]
if cli[:legacy_name] && ['Complete', ' Chromosome'].include?(r['level'])
acc = rep.nil? ? '' : rep.first
else
acc = asm
end
acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
"#{r['#organism']}_#{acc}".miga_name
end
|
#remote_row_replicons(r) ⇒ Object
93
94
95
96
97
98
99
100
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 93
def remote_row_replicons(r)
return if r['replicons'].nil?
r['replicons']
.split('; ')
.map { |i| i.gsub(/.*:/, '') }
.map { |i| i.gsub(%r{/.*}, '') }
end
|
#sanitize_cli ⇒ Object
44
45
46
47
48
49
50
51
52
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 44
def sanitize_cli
cli.ensure_par(taxon: '-T')
tasks = %w[reference complete chromosome scaffold contig]
unless tasks.any? { |i| cli[i.to_sym] }
raise 'No action requested: pick at least one type of genome'
end
cli[:save_every] = 1 if cli[:dry]
end
|