Module: MiGA::RemoteDataset::Base
- Included in:
- MiGA::RemoteDataset
- Defined in:
- lib/miga/remote_dataset/base.rb
Constant Summary collapse
- @@_NCBI_DATASETS =
'https://api.ncbi.nlm.nih.gov/datasets/v2alpha/'
- @@_EUTILS =
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
- @@_EBI_API =
'https://www.ebi.ac.uk/Tools/'
- @@_GTDB_API =
'https://gtdb-api.ecogenomic.org/'
- @@_SEQCODE_API =
'https://disc-genomics.uibk.ac.at/seqcode/'
- @@_EUTILS_BUILD =
lambda { |service, q| q[:api_key] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY'] uri_safe_join(@@_EUTILS, "#{service}.fcgi") .tap { |uri| uri.query = URI.encode_www_form(q) } }
- @@UNIVERSE =
Structure of the different database Universes or containers. The structure is a Hash with universe names as keys as Symbol and values being a Hash with supported keys as Symbol:
-
:dbs
=> Hash with keys being the database name and the values a Hash of properties such asstage
,format
,map_to
, andgetter
. -
uri
=> Function producing a parsed URI object, accepting one parameter: a Hash of options. -
method
=> Method used to query the URL. Only:rest
and:net
are currently supported. -
map_to_universe
=> Universe where results map to. Currently unsupported. -
scheme
=> Function returning the scheme used as a String (ftp, http, https). Mandatory if method is :net.
-
{ web: { dbs: { assembly: { stage: :assembly, format: :fasta }, assembly_gz: { stage: :assembly, format: :fasta_gz }, text: { stage: :metadata, format: :text } }, uri: lambda { |opts| URI.parse(opts[:ids][0]) }, scheme: lambda { |opts| opts[:ids][0].split(':', 2)[0] }, method: :net }, ebi: { dbs: { embl: { stage: :assembly, format: :fasta } }, uri: lambda do |opts| uri_safe_join( @@_EBI_API, 'dbfetch', 'dbfetch', opts[:db], opts[:ids], opts[:format] ) end, method: :get }, gtdb: { dbs: { # This is a dummy entry plugged directly to +ncbi_asm_get+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm }, # The 'taxon' namespace actually returns a list of genomes (+format+) taxon: { stage: :metadata, format: :genomes, map_to: [:assembly], extra: { sp_reps_only: false } }, # The 'genome' namespace actually returns the taxonomy (+format+) genome: { stage: :metadata, format: 'taxon-history' } }, uri: lambda do |opts| uri_safe_join(@@_GTDB_API, opts[:db], opts[:ids], opts[:format]) .tap { |uri| uri.query = URI.encode_www_form(opts[:extra]) } end, method: :get, map_to_universe: :ncbi, headers: lambda { |_opts| { 'Accept' => 'application/json' } } }, seqcode: { dbs: { # These are dummy entries plugged directly to +ncbi_*_get+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm }, nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb }, # This is the list of type genomes :'type-genomes' => { stage: :metadata, format: :json } }, uri: lambda do |opts| uri_safe_join(@@_SEQCODE_API, "#{opts[:db]}.json") .tap { |uri| uri.query = URI.encode_www_form(opts[:extra]) } end, method: :get, map_to_universe: :ncbi }, ncbi: { dbs: { nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb }, assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm }, taxonomy: { stage: :metadata, format: :xml } }, uri: lambda do |opts| @@_EUTILS_BUILD[:efetch, db: opts[:db], id: opts[:ids], rettype: opts[:format], retmode: :text ] end, method: :get }, ncbi_map: { dbs: { nuccore: { stage: :metadata, map_to: [:biosample, :assembly], format: :json }, biosample: { stage: :metadata, map_to: [:assembly], format: :json } }, uri: lambda do |opts| @@_EUTILS_BUILD[:elink, { dbfrom: opts[:db], id: opts[:ids], retmode: opts[:format] }.merge(opts[:extra])] end, method: :get, map_to_universe: :ncbi }, ncbi_summary: { dbs: { assembly: { stage: :metadata, format: :json } }, uri: lambda do |opts| @@_EUTILS_BUILD[:esummary, db: opts[:db], id: opts[:ids], retmode: opts[:format] ] end, method: :get }, ncbi_search: { dbs: { assembly: { stage: :metadata, format: :json }, taxonomy: { stage: :metadata, format: :json } }, uri: lambda do |opts| @@_EUTILS_BUILD[:esearch, db: opts[:db], term: opts[:ids], retmode: opts[:format] ] end, method: :get }, ncbi_datasets_download: { dbs: { genome: { stage: :assembly, format: :zip } }, uri: lambda do |opts| q = { include_annotation_type: 'GENOME_FASTA' } uri_safe_join( @@_NCBI_DATASETS, opts[:db], :accession, opts[:ids], :download ).tap { |uri| uri.query = URI.encode_www_form(q) } end, method: :get, headers: lambda do |opts| {}.tap do |h| h['Accept'] = 'application/zip' if opts[:format] == :zip h['api-key'] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY'] end end }, ncbi_datasets: { dbs: { genome: { stage: :metadata, format: :json, extra: { action: 'dataset_report' } } }, uri: lambda do |opts| uri_safe_join(@@_NCBI_DATASETS, opts[:db], opts[:extra][:action]) end, payload: lambda do |opts| query = opts[:ids][0] q = { filters: { assembly_version: 'current', exclude_paired_reports: true }.merge(query[:filters] || {}), page_size: query[:page_size] || 1_000, returned_content: 'COMPLETE' } q[:page_token] = query[:page_token] if query[:page_token] q[:taxons] = query[:taxons] if query[:taxons] MiGA::Json.generate_plain(q) end, headers: lambda do |opts| {}.tap do |h| h['api-key'] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY'] h['Content-Type'] = 'application/json' if opts[:format] == :json end end, method: :post } }