Class: MiGA::RemoteDataset

Inherits:
Object
  • Object
show all
Defined in:
lib/miga/remote_dataset.rb

Constant Summary collapse

@@UNIVERSE =

Class

{
	 ebi:{
	    dbs: { embl:{stage: :assembly, format: :fasta} },
	    url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
	    method: :rest
	 },
	 ncbi:{
	    dbs: { nuccore:{stage: :assembly, format: :fasta} },
	    url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
  "efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
	    method: :rest
	 },
	 ncbi_map:{
	    dbs: { assembly:{map_to: :nuccore, format: :text} },
	    url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
  "elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
	    method: :rest,
	    map_to_universe: :ncbi
	 }
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ids, db, universe) ⇒ RemoteDataset

Returns a new instance of RemoteDataset.



60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/miga/remote_dataset.rb', line 60

def initialize(ids, db, universe)
	 ids = [ids] unless ids.is_a? Array
	 @ids = (ids.is_a?(Array) ? ids : [ids])
	 @db = db.to_sym
	 @universe = universe.to_sym
	 raise "Unknown Universe: #{@universe}. Try one of: "+
	    "#{@@UNIVERSE.keys}" unless @@UNIVERSE.keys.include? @universe
	 raise "Unknown Database: #{@db}. Try one of: "+
	    "#{@@UNIVERSE[@universe][:dbs]}" unless
	    @@UNIVERSE[@universe][:dbs].include? @db
	 unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
	    res = RemoteDataset.download
	 end
end

Instance Attribute Details

#dbObject (readonly)

Instance



59
60
61
# File 'lib/miga/remote_dataset.rb', line 59

def db
  @db
end

#idsObject (readonly)

Instance



59
60
61
# File 'lib/miga/remote_dataset.rb', line 59

def ids
  @ids
end

#universeObject (readonly)

Instance



59
60
61
# File 'lib/miga/remote_dataset.rb', line 59

def universe
  @universe
end

Class Method Details

.download(universe, db, ids, format, file = nil) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/miga/remote_dataset.rb', line 34

def self.download(universe, db, ids, format, file=nil)
	 ids = [ids] unless ids.is_a? Array
	 case @@UNIVERSE[universe][:method]
	 when :rest
	    map_to = @@UNIVERSE[universe][:dbs][db].nil? ? nil :
  @@UNIVERSE[universe][:dbs][db][:map_to]
	    url = sprintf @@UNIVERSE[universe][:url],
  db, ids.join(","), format, map_to
	    response = RestClient::Request.execute(:method=>:get,  :url=>url,
  :timeout=>600)
	    raise "Unable to reach #{universe} client, error code "+
  "#{response.code}." unless response.code == 200
	    doc = response.to_s
	 else
	    raise "Unexpected error: Unsupported download method for Universe "+
  "#{universe}."
	 end
	 unless file.nil?
	    ofh = File.open(file, "w")
	    ofh.print doc
	    ofh.close
	 end
	 doc
end

.UNIVERSEObject



33
# File 'lib/miga/remote_dataset.rb', line 33

def self.UNIVERSE ; @@UNIVERSE ; end

Instance Method Details

#download(file) ⇒ Object



113
114
115
116
# File 'lib/miga/remote_dataset.rb', line 113

def download(file)
	 RemoteDataset.download(universe, db, ids,
	    @@UNIVERSE[universe][:dbs][db][:format], file)
end

#get_metadata(metadata = {}) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
# File 'lib/miga/remote_dataset.rb', line 102

def (={})
	 case universe
	 when :ebi
	    # Get taxonomy
	    [:tax] = get_ncbi_taxonomy
	 when :ncbi
	    # Get taxonomy
	    [:tax] = get_ncbi_taxonomy
	 end
	 
end

#get_ncbi_taxidObject



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/miga/remote_dataset.rb', line 117

def get_ncbi_taxid
	 case universe
	 when :ebi
	    doc = RemoteDataset.download(universe, db, ids, :annot).split(/\n/)
	    ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
	    ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
	    return nil if ln.nil?
	    ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
	    return nil unless ln =~ /^\d+$/
	    ln
	 when :ncbi
	    doc = RemoteDataset.download(universe, db, ids, :gb).split(/\n/)
	    ln = doc.grep(/^\s+\/db_xref="taxon:/).first
	    return nil if ln.nil?
	    ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, "\\1")
	    return nil unless ln =~ /^\d+$/
	    ln
	 else
	    raise "I don't know how to extract ncbi_taxids from #{universe}."
	 end
end

#get_ncbi_taxonomyObject



138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/miga/remote_dataset.rb', line 138

def get_ncbi_taxonomy
	 lineage = {}
	 tax_id = get_ncbi_taxid
	 loop do
	    break if tax_id.nil? or %w{0 1}.include? tax_id
	    doc = RemoteDataset.download(:ebi, :taxonomy, tax_id, "")
	    name = (doc.scan(/SCIENTIFIC NAME\s+:\s+(.+)/).first||[]).first
	    rank = (doc.scan(/RANK\s+:\s+(.+)/).first||[]).first
	    rank = "dataset" if lineage.empty? and rank=="no rank"
	    lineage[rank] = name unless rank.nil?
	    tax_id = (doc.scan(/PARENT ID\s+:\s+(.+)/).first||[]).first
	 end
	 Taxonomy.new(lineage)
end

#save_to(project, name = nil, is_ref = true, metadata = {}) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/miga/remote_dataset.rb', line 74

def save_to(project, name=nil, is_ref=true, ={})
	 name = ids.join("_").miga_name if name.nil?
	 project = Project.new(project) if project.is_a? String
	 raise "Dataset #{name} exists in the project, aborting..." if
	    Dataset.exist?(project, name)
	  = ()
	 case @@UNIVERSE[universe][:dbs][db][:stage]
	 when :assembly
	    base = project.path + "/data/" + Dataset.RESULT_DIRS[:assembly] +
  "/" + name
	    ofh = File.open("#{base}.start", "w")
	    ofh.puts Time.now.to_s
	    ofh.close
	    download("#{base}.LargeContigs.fna")
	    File.symlink("#{base}.LargeContigs.fna", "#{base}.AllContigs.fna")
	    ofh = File.open("#{base}.done", "w")
	    ofh.puts Time.now.to_s
	    ofh.close
	 else
	    raise "Unexpected error: Unsupported result for database #{db}."
	 end
	 dataset = Dataset.new(project, name, is_ref, )
	 project.add_dataset(dataset.name)
	 result = dataset.add_result @@UNIVERSE[universe][:dbs][db][:stage]
	 raise "Empty dataset created: seed result was not added due to "+
	    "incomplete files." if result.nil?
	 dataset
end