Module: Ensembl::FTP

Defined in:
lib/rbbt/sources/ensembl_ftp.rb

Constant Summary collapse

SERVER =
"ftp.ensembl.org"

Class Method Summary collapse

Class Method Details

.base_url(organism) ⇒ Object



51
52
53
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 51

def self.base_url(organism)
  File.join("ftp://" + SERVER, ftp_directory_for(organism) )
end

.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {}) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 71

def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
  url = url_for(organism, table)
  if key_field and fields
    all_fields = fields_for(organism, table)
    key_pos = all_fields.index key_field
    field_pos = fields.collect{|f| all_fields.index f}

    options[:key_field] = key_pos
    options[:fields]    = field_pos
  end
  tsv = TSV.open(url, options)
  tsv.key_field = key_field
  tsv.fields = fields
  tsv
end

.fields_for(organism, table) ⇒ Object



64
65
66
67
68
69
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 64

def self.fields_for(organism, table)
  sql_file = Open.read("#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz")

  chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
  chunk.scan(/^\s+`(.*?)`/).flatten
end

.ftp_directory_for(organism) ⇒ Object



42
43
44
45
46
47
48
49
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 42

def self.ftp_directory_for(organism)
  release, ftp_name = ftp_name_for(organism)
  if release == 'current'
    File.join('/pub/', 'current_mysql', ftp_name)
  else
    File.join('/pub/', release, 'mysql', ftp_name)
  end
end

.ftp_name_for(organism) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 16

def self.ftp_name_for(organism)
  code, build = organism.split "/"
  build ||= "current"

  if build.to_s == "current"
    release = 'current'
    name = Organism.scientific_name(organism)
    ftp = Net::FTP.new(Ensembl::FTP::SERVER)
    ftp.passive = true
    ftp.
    ftp.chdir(File.join('pub', 'current_mysql'))
    file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last
    ftp.close
  else
    release = Ensembl.releases[build]
    name = Organism.scientific_name(organism)
    ftp = Net::FTP.new(Ensembl::FTP::SERVER)
    ftp.passive = true
    ftp.
    ftp.chdir(File.join('pub', release, 'mysql'))
    file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last
    ftp.close
  end
  [release, file]
end

.has_table?(organism, table) ⇒ Boolean

Returns:

  • (Boolean)


59
60
61
62
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 59

def self.has_table?(organism, table)
  sql_file = Open.read("#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz")
  ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
end

.mysql_path(release) ⇒ Object



13
14
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 13

def self.mysql_path(release)
end

.url_for(organism, table) ⇒ Object



55
56
57
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 55

def self.url_for(organism, table)
  "#{base_url(organism)}/#{table}.txt.gz"
end