Class: MiGA::RemoteDataset
- Inherits:
-
Object
- Object
- MiGA::RemoteDataset
- Defined in:
- lib/miga/remote_dataset.rb
Constant Summary collapse
- @@UNIVERSE =
Class
{ ebi:{ dbs: { embl:{stage: :assembly, format: :fasta} }, url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s", method: :rest }, ncbi:{ dbs: { nuccore:{stage: :assembly, format: :fasta} }, url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text", method: :rest }, ncbi_map:{ dbs: { assembly:{map_to: :nuccore, format: :text} }, url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -", method: :rest, map_to_universe: :ncbi } }
Instance Attribute Summary collapse
-
#db ⇒ Object
readonly
Instance.
-
#ids ⇒ Object
readonly
Instance.
-
#universe ⇒ Object
readonly
Instance.
Class Method Summary collapse
Instance Method Summary collapse
- #download(file) ⇒ Object
- #get_metadata(metadata = {}) ⇒ Object
- #get_ncbi_taxid ⇒ Object
- #get_ncbi_taxonomy ⇒ Object
-
#initialize(ids, db, universe) ⇒ RemoteDataset
constructor
A new instance of RemoteDataset.
- #save_to(project, name = nil, is_ref = true, metadata = {}) ⇒ Object
Constructor Details
#initialize(ids, db, universe) ⇒ RemoteDataset
Returns a new instance of RemoteDataset.
60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/miga/remote_dataset.rb', line 60 def initialize(ids, db, universe) ids = [ids] unless ids.is_a? Array @ids = (ids.is_a?(Array) ? ids : [ids]) @db = db.to_sym @universe = universe.to_sym raise "Unknown Universe: #{@universe}. Try one of: "+ "#{@@UNIVERSE.keys}" unless @@UNIVERSE.keys.include? @universe raise "Unknown Database: #{@db}. Try one of: "+ "#{@@UNIVERSE[@universe][:dbs]}" unless @@UNIVERSE[@universe][:dbs].include? @db unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil? res = RemoteDataset.download end end |
Instance Attribute Details
#db ⇒ Object (readonly)
Instance
59 60 61 |
# File 'lib/miga/remote_dataset.rb', line 59 def db @db end |
#ids ⇒ Object (readonly)
Instance
59 60 61 |
# File 'lib/miga/remote_dataset.rb', line 59 def ids @ids end |
#universe ⇒ Object (readonly)
Instance
59 60 61 |
# File 'lib/miga/remote_dataset.rb', line 59 def universe @universe end |
Class Method Details
.download(universe, db, ids, format, file = nil) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/miga/remote_dataset.rb', line 34 def self.download(universe, db, ids, format, file=nil) ids = [ids] unless ids.is_a? Array case @@UNIVERSE[universe][:method] when :rest map_to = @@UNIVERSE[universe][:dbs][db].nil? ? nil : @@UNIVERSE[universe][:dbs][db][:map_to] url = sprintf @@UNIVERSE[universe][:url], db, ids.join(","), format, map_to response = RestClient::Request.execute(:method=>:get, :url=>url, :timeout=>600) raise "Unable to reach #{universe} client, error code "+ "#{response.code}." unless response.code == 200 doc = response.to_s else raise "Unexpected error: Unsupported download method for Universe "+ "#{universe}." end unless file.nil? ofh = File.open(file, "w") ofh.print doc ofh.close end doc end |
.UNIVERSE ⇒ Object
33 |
# File 'lib/miga/remote_dataset.rb', line 33 def self.UNIVERSE ; @@UNIVERSE ; end |
Instance Method Details
#download(file) ⇒ Object
113 114 115 116 |
# File 'lib/miga/remote_dataset.rb', line 113 def download(file) RemoteDataset.download(universe, db, ids, @@UNIVERSE[universe][:dbs][db][:format], file) end |
#get_metadata(metadata = {}) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/miga/remote_dataset.rb', line 102 def (={}) case universe when :ebi # Get taxonomy [:tax] = get_ncbi_taxonomy when :ncbi # Get taxonomy [:tax] = get_ncbi_taxonomy end end |
#get_ncbi_taxid ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/miga/remote_dataset.rb', line 117 def get_ncbi_taxid case universe when :ebi doc = RemoteDataset.download(universe, db, ids, :annot).split(/\n/) ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil? return nil if ln.nil? ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1") return nil unless ln =~ /^\d+$/ ln when :ncbi doc = RemoteDataset.download(universe, db, ids, :gb).split(/\n/) ln = doc.grep(/^\s+\/db_xref="taxon:/).first return nil if ln.nil? ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, "\\1") return nil unless ln =~ /^\d+$/ ln else raise "I don't know how to extract ncbi_taxids from #{universe}." end end |
#get_ncbi_taxonomy ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/miga/remote_dataset.rb', line 138 def get_ncbi_taxonomy lineage = {} tax_id = get_ncbi_taxid loop do break if tax_id.nil? or %w{0 1}.include? tax_id doc = RemoteDataset.download(:ebi, :taxonomy, tax_id, "") name = (doc.scan(/SCIENTIFIC NAME\s+:\s+(.+)/).first||[]).first rank = (doc.scan(/RANK\s+:\s+(.+)/).first||[]).first rank = "dataset" if lineage.empty? and rank=="no rank" lineage[rank] = name unless rank.nil? tax_id = (doc.scan(/PARENT ID\s+:\s+(.+)/).first||[]).first end Taxonomy.new(lineage) end |
#save_to(project, name = nil, is_ref = true, metadata = {}) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/miga/remote_dataset.rb', line 74 def save_to(project, name=nil, is_ref=true, ={}) name = ids.join("_").miga_name if name.nil? project = Project.new(project) if project.is_a? String raise "Dataset #{name} exists in the project, aborting..." if Dataset.exist?(project, name) = () case @@UNIVERSE[universe][:dbs][db][:stage] when :assembly base = project.path + "/data/" + Dataset.RESULT_DIRS[:assembly] + "/" + name ofh = File.open("#{base}.start", "w") ofh.puts Time.now.to_s ofh.close download("#{base}.LargeContigs.fna") File.symlink("#{base}.LargeContigs.fna", "#{base}.AllContigs.fna") ofh = File.open("#{base}.done", "w") ofh.puts Time.now.to_s ofh.close else raise "Unexpected error: Unsupported result for database #{db}." end dataset = Dataset.new(project, name, is_ref, ) project.add_dataset(dataset.name) result = dataset.add_result @@UNIVERSE[universe][:dbs][db][:stage] raise "Empty dataset created: seed result was not added due to "+ "incomplete files." if result.nil? dataset end |