Module: MiGA::Cli::Action::Download::Base
Overview
Helper module including download functions for the *_get actions
Instance Method Summary collapse
- #cli_base_flags(opt) ⇒ Object
- #cli_filters(opt) ⇒ Object
- #cli_save_actions(opt) ⇒ Object
- #discard_excluded(ds) ⇒ Object
- #download_entries(ds, p) ⇒ Object
- #finalize_tasks(d, downloaded) ⇒ Object
- #generic_perform ⇒ Object
- #impose_limit(ds) ⇒ Object
- #load_tasks ⇒ Object
-
#save_entry(name, body, p) ⇒ Object
Saves the (generic remote) entry identified by
namewithbodyinto the projectp, and returnstrueon success andfalseotherwise. - #unlink_entries(p, unlink) ⇒ Object
Instance Method Details
#cli_base_flags(opt) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/miga/cli/action/download/base.rb', line 10 def cli_base_flags(opt) opt.on( '--max INT', Integer, 'Maximum number of datasets to download (by default: unlimited)' ) { |v| cli[:max_datasets] = v } opt.on( '-m', '--metadata STRING', 'Metadata as key-value pairs separated by = and delimited by comma', 'Values are saved as strings except for booleans (true / false) or nil' ) { |v| cli[:metadata] = v } end |
#cli_filters(opt) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/miga/cli/action/download/base.rb', line 22 def cli_filters(opt) opt.on( '--exclude PATH', 'A file with dataset names to exclude' ) { |v| cli[:exclude] = v } cli.opt_flag(opt, 'dry', 'Do not download or save the datasets') opt.on( '--ignore-until STRING', 'Ignores all datasets until a name is found (useful for large reruns)' ) { |v| cli[:ignore_until] = v } opt.on( '--ignore-removed', 'Ignores entries removed from NCBI (by default fails on removed entries)' ) { |v| cli[:ignore_removed] = v } cli.opt_flag( opt, 'get-metadata', 'Only download and update metadata for existing datasets', :get_md ) end |
#cli_save_actions(opt) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/miga/cli/action/download/base.rb', line 42 def cli_save_actions(opt) cli.opt_flag( opt, 'only-metadata', 'Create datasets without input data but retrieve all metadata', :only_md ) opt.on( '--save-every INT', Integer, 'Save project every this many downloaded datasets', 'If zero, it saves the project only once upon completion', "By default: #{cli[:save_every]}" ) { |v| cli[:save_every] = v } opt.on( '-q', '--query', 'Register the datasets as queries, not reference datasets' ) { |v| cli[:query] = v } opt.on( '-u', '--unlink', 'Unlink all datasets in the project missing from the download list' ) { |v| cli[:unlink] = v } opt.on( '-R', '--remote-list PATH', 'Path to an output file with the list of all datasets listed remotely' ) { |v| cli[:remote_list] = v } end |
#discard_excluded(ds) ⇒ Object
102 103 104 105 106 107 108 109 110 111 |
# File 'lib/miga/cli/action/download/base.rb', line 102 def discard_excluded(ds) unless cli[:exclude].nil? cli.say "Discarding datasets in #{cli[:exclude]}" File.readlines(cli[:exclude]) .select { |i| i !~ /^#/ } .map(&:chomp) .each { |i| ds.delete i } end ds end |
#download_entries(ds, p) ⇒ Object
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/miga/cli/action/download/base.rb', line 123 def download_entries(ds, p) cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries') p.do_not_save = true if cli[:save_every] != 1 ignore = !cli[:ignore_until].nil? downloaded = 0 d = [] ds.each do |name, body| d << name cli.puts name ignore = false if ignore && name == cli[:ignore_until] next if ignore || p.dataset(name).nil? == cli[:get_md] downloaded += 1 unless cli[:dry] unless save_entry(name, body, p) downloaded -= 1 d.pop next end p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero? end end p.do_not_save = false p.save! if cli[:save_every] != 1 [d, downloaded] end |
#finalize_tasks(d, downloaded) ⇒ Object
86 87 88 89 90 91 92 93 94 95 |
# File 'lib/miga/cli/action/download/base.rb', line 86 def finalize_tasks(d, downloaded) cli.say "Datasets listed: #{d.size}" act = cli[:dry] ? 'to download' : 'downloaded' cli.say "Datasets #{act}: #{downloaded}" unless cli[:remote_list].nil? File.open(cli[:remote_list], 'w') do |fh| d.each { |i| fh.puts i } end end end |
#generic_perform ⇒ Object
68 69 70 71 72 73 74 75 |
# File 'lib/miga/cli/action/download/base.rb', line 68 def generic_perform p, ds = load_tasks d, downloaded = download_entries(ds, p) # Finalize finalize_tasks(d, downloaded) unlink_entries(p, p.dataset_names - d) if cli[:unlink] end |
#impose_limit(ds) ⇒ Object
113 114 115 116 117 118 119 120 121 |
# File 'lib/miga/cli/action/download/base.rb', line 113 def impose_limit(ds) max = cli[:max_datasets].to_i if !max.zero? && max < ds.size cli.say "Subsampling list from #{ds.size} to #{max} datasets" sample = ds.keys.sample(max) ds.select! { |k, _| sample.include? k } end ds end |
#load_tasks ⇒ Object
77 78 79 80 81 82 83 84 |
# File 'lib/miga/cli/action/download/base.rb', line 77 def load_tasks sanitize_cli p = cli.load_project ds = remote_list ds = discard_excluded(ds) ds = impose_limit(ds) [p, ds] end |
#save_entry(name, body, p) ⇒ Object
Saves the (generic remote) entry identified by name with body into the project p, and returns true on success and false otherwise
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/miga/cli/action/download/base.rb', line 153 def save_entry(name, body, p) cli.say " Locating remote dataset: #{name}" body[:md][:metadata_only] = true if cli[:only_md] rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe]) if cli[:get_md] cli.say ' Updating dataset' rd.(p.dataset(name), body[:md]) else cli.say ' Creating dataset' rd.save_to(p, name, !cli[:query], body[:md]) cli.(p.add_dataset(name)) end true rescue MiGA::RemoteDataMissingError => e raise(e) unless cli[:ignore_removed] cli.say " Removed dataset ignored: #{name}" false end |
#unlink_entries(p, unlink) ⇒ Object
97 98 99 100 |
# File 'lib/miga/cli/action/download/base.rb', line 97 def unlink_entries(p, unlink) unlink.each { |i| p.unlink_dataset(i).remove! } cli.say "Datasets unlinked: #{unlink.size}" end |