Class: Bio::KEGG::Keggtab

Inherits:
Object show all
Defined in:
lib/bio/db/kegg/keggtab.rb

Overview

Description

Parse ‘keggtab’ KEGG database definition file which also includes Taxonomic category of the KEGG organisms.

References

The ‘keggtab’ file is included in

Format

File format is something like

# KEGGTAB
#
# name            type            directory                    abbreviation
#
enzyme            enzyme          $BIOROOT/db/ideas/ligand     ec
ec                alias           enzyme
(snip)
# Human
h.sapiens         genes           $BIOROOT/db/kegg/genes       hsa
H.sapiens         alias           h.sapiens
hsa               alias           h.sapiens
(snip)
#
# Taxonomy
#
(snip)
animals           alias           hsa+mmu+rno+dre+dme+cel
eukaryotes        alias           animals+plants+protists+fungi
genes             alias           eubacteria+archaea+eukaryotes

Defined Under Namespace

Classes: DB

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_path, bioroot = nil) ⇒ Keggtab

Path for keggtab file and optionally set bioroot top directory. Environmental variable BIOROOT overrides bioroot.



54
55
56
57
58
59
60
61
62
# File 'lib/bio/db/kegg/keggtab.rb', line 54

def initialize(file_path, bioroot = nil)
  @bioroot = ENV['BIOROOT'] || bioroot
  @db_names = Hash.new
  @database = Hash.new
  @taxonomy = Hash.new
  File.open(file_path) do |f|
    parse_keggtab(f.read)
  end
end

Instance Attribute Details

#biorootObject (readonly)

Returns a string of the BIOROOT path prefix.



65
66
67
# File 'lib/bio/db/kegg/keggtab.rb', line 65

def bioroot
  @bioroot
end

#db_namesObject (readonly)

Returns the value of attribute db_names.



66
67
68
# File 'lib/bio/db/kegg/keggtab.rb', line 66

def db_names
  @db_names
end

Instance Method Details

#alias_list(db_name) ⇒ Object

deprecated



141
142
143
144
145
# File 'lib/bio/db/kegg/keggtab.rb', line 141

def alias_list(db_name)
  if @db_names[db_name]
    @db_names[db_name].aliases
  end
end

#aliases(db_abbrev) ⇒ Object

Returns an Array containing all alias names for the database. (e.g. ‘hsa’ -> [“H.sapiens”, “hsa”], ‘hpj’ -> [“H.pylori_J99”, “hpj”])



112
113
114
115
116
# File 'lib/bio/db/kegg/keggtab.rb', line 112

def aliases(db_abbrev)
  if @database[db_abbrev]
    @database[db_abbrev].aliases
  end
end

#child_nodes(node = 'genes') ⇒ Object



196
197
198
# File 'lib/bio/db/kegg/keggtab.rb', line 196

def child_nodes(node = 'genes')
  return @taxonomy[node]
end

#database(db_abbrev = nil) ⇒ Object

Returns a hash containing DB definition section of the keggtab file. If database name is given as an argument, returns a Keggtab::DB object.



102
103
104
105
106
107
108
# File 'lib/bio/db/kegg/keggtab.rb', line 102

def database(db_abbrev = nil)
  if db_abbrev
    @database[db_abbrev]
  else
    @database
  end
end

#db_by_abbrev(db_abbrev) ⇒ Object

deprecated



157
158
159
160
161
162
# File 'lib/bio/db/kegg/keggtab.rb', line 157

def db_by_abbrev(db_abbrev)
  @db_names.each do |k, db|
    return db if db.abbrev == db_abbrev
  end
  return nil
end

#db_path(db_name) ⇒ Object

deprecated



148
149
150
151
152
153
154
# File 'lib/bio/db/kegg/keggtab.rb', line 148

def db_path(db_name)
  if @bioroot
    "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
  else
    "#{@db_names[db_name].path}/#{db_name}"
  end
end

#db_path_by_abbrev(db_abbrev) ⇒ Object

deprecated



170
171
172
173
# File 'lib/bio/db/kegg/keggtab.rb', line 170

def db_path_by_abbrev(db_abbrev)
  db_name = name_by_abbrev(db_abbrev)
  db_path(db_name)
end

#korg2taxo(keggorg) ⇒ Object Also known as: keggorg2taxo, korg2taxonomy, keggorg2taxonomy

Returns an array of taxonomy names the organism belongs. (e.g. ‘eco’ -> [‘proteogamma’,‘proteobacteria’,‘eubacteria’,‘genes’]) This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.



225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/bio/db/kegg/keggtab.rb', line 225

def korg2taxo(keggorg)
  tmp = Array.new
  traverse = Proc.new {|keggorg|
    @taxonomy.each do |k,v|
      if v.include?(keggorg)
        tmp.push(k)
        traverse.call(k)
        break
      end
    end
  }
  traverse.call(keggorg)
  return tmp
end

#name(db_abbrev) ⇒ Object

Returns a canonical database name for the abbreviation. (e.g. ‘ec’ -> ‘enzyme’, ‘hsa’ -> ‘h.sapies’, …)



120
121
122
123
124
# File 'lib/bio/db/kegg/keggtab.rb', line 120

def name(db_abbrev)
  if @database[db_abbrev]
    @database[db_abbrev].name
  end
end

#name_by_abbrev(db_abbrev) ⇒ Object

deprecated



165
166
167
# File 'lib/bio/db/kegg/keggtab.rb', line 165

def name_by_abbrev(db_abbrev)
  db_by_abbrev(db_abbrev).name
end

#path(db_abbrev) ⇒ Object

Returns an absolute path for the flat file database. (e.g. ‘/bio/db/kegg/genes’, …)



128
129
130
131
132
133
134
135
136
137
# File 'lib/bio/db/kegg/keggtab.rb', line 128

def path(db_abbrev)
  if @database[db_abbrev]
    file = @database[db_abbrev].name
    if @bioroot
      "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
    else
      "#{@database[db_abbrev].path}/#{file}"
    end
  end
end

#taxa_listObject

List of all node labels from Taxonomy section. (e.g. [“actinobacteria”, “animals”, “archaea”, “bacillales”, …)



192
193
194
# File 'lib/bio/db/kegg/keggtab.rb', line 192

def taxa_list
  @taxonomy.keys.sort
end

#taxo2korgs(node = 'genes') ⇒ Object Also known as: taxo2keggorgs, taxon2korgs, taxon2keggorgs

Returns an array of organism names included in the specified taxon label. (e.g. ‘proteobeta’ -> [“nme”, “nma”, “rso”]) This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/bio/db/kegg/keggtab.rb', line 203

def taxo2korgs(node = 'genes')
  if node.length == 3
    return node
  else
    if @taxonomy[node]
      tmp = Array.new
      @taxonomy[node].each do |x|
        tmp.push(taxo2korgs(x))
      end
      return tmp
    else
      return nil
    end
  end
end

#taxonomy(node = nil) ⇒ Object

Returns a hash containing Taxonomy section of the keggtab file. If argument is given, returns a List of all child nodes belongs to the label node. (e.g. “eukaryotes” -> [“animals”, “plants”, “protists”, “fungi”], …)



182
183
184
185
186
187
188
# File 'lib/bio/db/kegg/keggtab.rb', line 182

def taxonomy(node = nil)
  if node
    @taxonomy[node]
  else
    @taxonomy
  end
end