Class: Taxonomy

Inherits:
Object
  • Object
show all
Defined in:
lib/ncbi_taxonomy.rb

Instance Method Summary collapse

Constructor Details

#initializeTaxonomy

Returns a new instance of Taxonomy.



16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/ncbi_taxonomy.rb', line 16

def initialize
  @home_dir = Dir.home
  @work_dir = @home_dir + "/.ncbi_taxonomy"
  @taxdb_release = @work_dir + "/taxonomy.db"
  @in_memory = false
  begin
    @db = SQLite3::Database.new @taxdb_release
  rescue SQLite3::CantOpenException => e
    STDERR.puts "Please download the NCBI Taxonomy database using 'ncbi_taxonomy update' command."
    STDERR.puts "[MSG]" + e.message
    exit 1
  end
end

Instance Method Details

#check_sqlite_versionObject



43
44
45
46
# File 'lib/ncbi_taxonomy.rb', line 43

def check_sqlite_version
  rs = @db.execute "SELECT SQLITE_VERSION()"
  Gem::Version.new(rs[0][0]) >= Gem::Version.new('3.8.3')
end

#get_all_names_by_id(id) ⇒ Object



68
69
70
71
72
# File 'lib/ncbi_taxonomy.rb', line 68

def get_all_names_by_id id
  id = id.to_i
  out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id}"
  out
end

#get_allrank_by_id(id) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/ncbi_taxonomy.rb', line 86

def get_allrank_by_id id
  id = id.to_i
  out = Array.new
  if check_sqlite_version
    rs = @db.execute "WITH RECURSIVE allrank (id, pid, rank, name) AS ( VALUES (0, #{id}, 'no_rank', 'Homo sapiens javamintus') UNION ALL SELECT nodes.tax_id, nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names, allrank WHERE nodes.tax_id=allrank.pid AND names.tax_id = nodes.tax_id AND names.name_class='scientific name' AND nodes.tax_id<>1) SELECT * FROM allrank;"
    rs[1..-2].each {|x| out << [ x[2], x[3] ] }
  else
    while true
      rs = self.get_rank_ptaxonid_scientificname_by_id id
      out << [ rs[1], rs[2] ]
      break if rs[1] == 'superkingdom'
      id = rs[0]
    end
  end

  out = self.get_allrank_by_id(self.get_missing_id id).reverse  if out.size == 0 && id > -1
  out.reverse
end

#get_allrank_by_name(name) ⇒ Object



141
142
143
144
145
# File 'lib/ncbi_taxonomy.rb', line 141

def get_allrank_by_name name
  out = Array.new
  self.get_taxonids_by_name(name).each {|x| out << self.get_allrank_by_id(x) }
  out
end

#get_fixedrank_by_id(id) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/ncbi_taxonomy.rb', line 105

def get_fixedrank_by_id id
  id = id.to_i
  ranks = self.get_allrank_by_id id
  arr = Array.new
  pos = 0  
  alt_name = ''
  ranks.each do |rank, name|
    rank_fixed_no = tax_rank_fixed[rank]
    rank_all_no = tax_rank_all[rank]
    if rank_fixed_no != nil
      this_rank_fixed = tax_rank_fixed.to_a.index [rank, rank_fixed_no]
      if arr.size < this_rank_fixed
        (arr.size...this_rank_fixed).each do |x|
          arr << [ tax_rank_fixed.to_a[x][0], "@#{alt_name}_#{tax_rank_fixed.to_a[x][0]}" ]
          pos += 1
        end
      end
      arr << [ rank, name ]
      pos += 1
      alt_name = name
    elsif arr.size == 7 && rank_all_no == 28
      arr << [ 'strain', name ]
    elsif rank_all_no != 28
      alt_name = name if tax_rank_fixed.to_a[pos-1][1] > tax_rank_all[rank]
    end
  end
  if arr.size < 7 && arr.size >= 0
    (arr.size..7).each do |x|
      arr << [ tax_rank_fixed.to_a[x][0], nil ]
    end
  elsif arr.size == 7
    arr << [ 'strain', arr[-1][1] ]
  end
  arr
end

#get_fixedrank_by_name(name) ⇒ Object



147
148
149
150
151
# File 'lib/ncbi_taxonomy.rb', line 147

def get_fixedrank_by_name name
  out = Array.new
  self.get_taxonids_by_name(name).each {|x| out << self.get_fixedrank_by_id(x) }
  out
end

#get_missing_id(id) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/ncbi_taxonomy.rb', line 153

def get_missing_id id
  rs = @db.execute "SELECT tax_id FROM delnodes WHERE tax_id='#{id}'"
  if rs.size > 0
    return -1
  else
    rs = @db.execute "SELECT new_tax_id FROM merged WHERE old_tax_id=#{id}"
    if rs.size == 1
      return rs[0][0].to_i
    else
      return -1
    end
  end
end

#get_names_by_taxonid(id) ⇒ Object



54
55
56
# File 'lib/ncbi_taxonomy.rb', line 54

def get_names_by_taxonid id
  @db.execute "SELECT name_class, name_txt FROM names WHERE tax_id=#{id}"
end

#get_rank_ptaxonid_by_id(id) ⇒ Object



74
75
76
77
78
# File 'lib/ncbi_taxonomy.rb', line 74

def get_rank_ptaxonid_by_id id
  id = id.to_i
  out = @db.execute "SELECT parent_tax_id, rank FROM nodes WHERE tax_id=#{id}"
  out[0]
end

#get_rank_ptaxonid_scientificname_by_id(id) ⇒ Object



80
81
82
83
84
# File 'lib/ncbi_taxonomy.rb', line 80

def get_rank_ptaxonid_scientificname_by_id id
  id = id.to_i
  out = @db.execute "SELECT nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names WHERE nodes.tax_id=#{id} AND names.tax_id=#{id} AND names.name_class='scientific name'"
  out[0]
end

#get_scientific_name_by_id(id) ⇒ Object



62
63
64
65
66
# File 'lib/ncbi_taxonomy.rb', line 62

def get_scientific_name_by_id id
  id = id.to_i
  out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id} AND name_class='scientific name'"
  out[0][0]
end

#get_scientific_name_by_names(names) ⇒ Object



58
59
60
# File 'lib/ncbi_taxonomy.rb', line 58

def get_scientific_name_by_names names
  names.each {|x| return x[1] if x[0] == 'scientific name' }
end

#get_taxonids_by_name(name) ⇒ Object



48
49
50
51
52
# File 'lib/ncbi_taxonomy.rb', line 48

def get_taxonids_by_name name
  name = SQLite3::Database.quote name
  out = @db.execute "SELECT DISTINCT tax_id FROM names WHERE name_txt='#{name}'"
  out.flatten
end

#memoryObject



30
31
32
33
34
35
36
37
# File 'lib/ncbi_taxonomy.rb', line 30

def memory
  tmp = SQLite3::Database.new ':memory:'
  backup = SQLite3::Backup.new tmp, 'main', @db, 'main'
  backup.step -1
  backup.finish
  @db = tmp
  @in_memory = true
end

#memory?Boolean

Returns:

  • (Boolean)


39
40
41
# File 'lib/ncbi_taxonomy.rb', line 39

def memory?
  @in_memory
end

#tax_rank_allObject



12
13
14
# File 'lib/ncbi_taxonomy.rb', line 12

def tax_rank_all
  { "superkingdom" => 0, "kingdom" => 1, "subkingdom" => 2, "superphylum" => 3, "phylum" => 4, "subphylum" => 5, "superclass" => 6, "class" => 7, "infraclass" => 8, "subclass" => 9, "superorder" => 10, "order" => 11, "suborder" => 12, "infraorder" => 13, "parvorder" => 14, "superfamily" => 15, "family" => 16, "subfamily" => 17, "tribe" => 18, "subtribe" => 19, "genus" => 20, "subgenus" => 21, "species group" => 22, "species subgroup" => 23, "species" => 24, "subspecies" => 25, "varietas" => 26, "forma" => 27, "no rank" => 28 }
end

#tax_rank_fixedObject



8
9
10
# File 'lib/ncbi_taxonomy.rb', line 8

def tax_rank_fixed
  { "superkingdom" => 0, "phylum" => 4, "class" => 7, "order" => 11, "family" => 16, "genus" => 21, "species" => 24, "strain" => 28 }
end

#using_unique_nameObject



4
5
6
# File 'lib/ncbi_taxonomy.rb', line 4

def using_unique_name
  { "Ponticoccus" => 1, "Bacillus" => 1 }
end