Module: Ensembl::FTP
- Defined in:
- lib/rbbt/sources/ensembl_ftp.rb
Constant Summary collapse
- SERVER =
"ftp.ensembl.org"
Class Method Summary collapse
- ._get_file(organism, table, extension) ⇒ Object
- ._get_gz(url) ⇒ Object
- .base_url(organism) ⇒ Object
- .ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {}) ⇒ Object
- .fields_for(organism, table) ⇒ Object
- .ftp_directory_for(organism) ⇒ Object
- .ftp_name_for(organism) ⇒ Object
- .has_table?(organism, table) ⇒ Boolean
- .mysql_path(release) ⇒ Object
- .url_for(organism, table, extension) ⇒ Object
Class Method Details
._get_file(organism, table, extension) ⇒ Object
67 68 69 70 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 67 def self._get_file(organism, table, extension) url = url_for(organism, table, extension) self._get_gz(url) end |
._get_gz(url) ⇒ Object
59 60 61 62 63 64 65 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 59 def self._get_gz(url) begin CMD.cmd("wget '#{url}' -O - | gunzip").read rescue CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read end end |
.base_url(organism) ⇒ Object
51 52 53 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 51 def self.base_url(organism) File.join("ftp://" + SERVER, ftp_directory_for(organism) ) end |
.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {}) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 83 def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, = {}) if key_field and fields all_fields = fields_for(organism, table) key_pos = all_fields.index key_field field_pos = fields.collect{|f| all_fields.index f} [:key_field] = key_pos [:fields] = field_pos end tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), ) tsv.key_field = key_field tsv.fields = fields tsv end |
.fields_for(organism, table) ⇒ Object
77 78 79 80 81 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 77 def self.fields_for(organism, table) sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql') chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1] chunk.scan(/^\s+`(.*?)`/).flatten end |
.ftp_directory_for(organism) ⇒ Object
42 43 44 45 46 47 48 49 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 42 def self.ftp_directory_for(organism) release, ftp_name = ftp_name_for(organism) if release == 'current' File.join('/pub/', 'current_mysql', ftp_name) else File.join('/pub/', release, 'mysql', ftp_name) end end |
.ftp_name_for(organism) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 16 def self.ftp_name_for(organism) code, build = organism.split "/" build ||= "current" if build.to_s == "current" release = 'current' name = Organism.scientific_name(organism) ftp = Net::FTP.new(Ensembl::FTP::SERVER) ftp.passive = true ftp.login ftp.chdir(File.join('pub', 'current_mysql')) file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last ftp.close else release = Ensembl.releases[build] name = Organism.scientific_name(organism) ftp = Net::FTP.new(Ensembl::FTP::SERVER) ftp.passive = true ftp.login ftp.chdir(File.join('pub', release, 'mysql')) file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").reject{|f| f =~ /\.gz$/}.collect{|l| l.split(" ").last}.last ftp.close end [release, file] end |
.has_table?(organism, table) ⇒ Boolean
72 73 74 75 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 72 def self.has_table?(organism, table) sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql') ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil? end |
.mysql_path(release) ⇒ Object
13 14 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 13 def self.mysql_path(release) end |
.url_for(organism, table, extension) ⇒ Object
55 56 57 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 55 def self.url_for(organism, table, extension) File.join(base_url(organism), table) + ".#{extension}.gz" end |