Class: Taxonomy
- Inherits:
-
Object
- Object
- Taxonomy
- Defined in:
- lib/ncbi_taxonomy.rb
Instance Method Summary collapse
- #check_sqlite_version ⇒ Object
- #get_all_names_by_id(id) ⇒ Object
- #get_allrank_by_id(id) ⇒ Object
- #get_allrank_by_name(name) ⇒ Object
- #get_fixedrank_by_id(id) ⇒ Object
- #get_fixedrank_by_name(name) ⇒ Object
- #get_missing_id(id) ⇒ Object
- #get_names_by_taxonid(id) ⇒ Object
- #get_rank_ptaxonid_by_id(id) ⇒ Object
- #get_rank_ptaxonid_scientificname_by_id(id) ⇒ Object
- #get_scientific_name_by_id(id) ⇒ Object
- #get_scientific_name_by_names(names) ⇒ Object
- #get_taxonids_by_name(name) ⇒ Object
-
#initialize ⇒ Taxonomy
constructor
A new instance of Taxonomy.
- #memory ⇒ Object
- #memory? ⇒ Boolean
- #mget_allrank_by_id(id_arr) ⇒ Object
- #mget_allrank_by_name(name_arr) ⇒ Object
- #mget_fixedrank_by_id(id_arr) ⇒ Object
- #mget_fixedrank_by_name(name_arr) ⇒ Object
- #mrun(cmd, arr) ⇒ Object
- #tax_rank_all ⇒ Object
- #tax_rank_fixed ⇒ Object
- #using_unique_name ⇒ Object
Constructor Details
#initialize ⇒ Taxonomy
Returns a new instance of Taxonomy.
16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/ncbi_taxonomy.rb', line 16 def initialize @home_dir = Dir.home @work_dir = @home_dir + "/.ncbi_taxonomy" @taxdb_release = @work_dir + "/taxonomy.db" @in_memory = false begin @db = SQLite3::Database.new @taxdb_release rescue SQLite3::CantOpenException => e STDERR.puts "Please download the NCBI Taxonomy database using 'ncbi_taxonomy update' command." STDERR.puts "[MSG]" + e. exit 1 end end |
Instance Method Details
#check_sqlite_version ⇒ Object
43 44 45 46 |
# File 'lib/ncbi_taxonomy.rb', line 43 def check_sqlite_version rs = @db.execute "SELECT SQLITE_VERSION()" Gem::Version.new(rs[0][0]) >= Gem::Version.new('3.8.3') end |
#get_all_names_by_id(id) ⇒ Object
68 69 70 71 72 |
# File 'lib/ncbi_taxonomy.rb', line 68 def get_all_names_by_id id id = id.to_i out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id}" out end |
#get_allrank_by_id(id) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/ncbi_taxonomy.rb', line 86 def get_allrank_by_id id id = id.to_i out = Array.new if check_sqlite_version rs = @db.execute "WITH RECURSIVE allrank (id, pid, rank, name) AS ( VALUES (0, #{id}, 'no_rank', 'Homo sapiens javamintus') UNION ALL SELECT nodes.tax_id, nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names, allrank WHERE nodes.tax_id=allrank.pid AND names.tax_id = nodes.tax_id AND names.name_class='scientific name' AND nodes.tax_id<>1) SELECT * FROM allrank;" rs[1..-2].each {|x| out << [ x[2], x[3] ] } else while true rs = self.get_rank_ptaxonid_scientificname_by_id id out << [ rs[1], rs[2] ] break if rs[1] == 'superkingdom' id = rs[0] end end out = self.get_allrank_by_id(self.get_missing_id id).reverse if out.size == 0 && id > -1 out.reverse end |
#get_allrank_by_name(name) ⇒ Object
141 142 143 144 145 |
# File 'lib/ncbi_taxonomy.rb', line 141 def get_allrank_by_name name out = Array.new self.get_taxonids_by_name(name).each {|x| out << self.get_allrank_by_id(x) } out end |
#get_fixedrank_by_id(id) ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/ncbi_taxonomy.rb', line 105 def get_fixedrank_by_id id id = id.to_i ranks = self.get_allrank_by_id id arr = Array.new pos = 0 alt_name = '' ranks.each do |rank, name| rank_fixed_no = tax_rank_fixed[rank] rank_all_no = tax_rank_all[rank] if rank_fixed_no != nil this_rank_fixed = tax_rank_fixed.to_a.index [rank, rank_fixed_no] if arr.size < this_rank_fixed (arr.size...this_rank_fixed).each do |x| arr << [ tax_rank_fixed.to_a[x][0], "@#{alt_name}_#{tax_rank_fixed.to_a[x][0]}" ] pos += 1 end end arr << [ rank, name ] pos += 1 alt_name = name elsif arr.size == 7 && rank_all_no == 28 arr << [ 'strain', name ] elsif rank_all_no != 28 alt_name = name if tax_rank_fixed.to_a[pos-1][1] > tax_rank_all[rank] end end if arr.size < 7 && arr.size >= 0 (arr.size..7).each do |x| arr << [ tax_rank_fixed.to_a[x][0], nil ] end elsif arr.size == 7 arr << [ 'strain', arr[-1][1] ] end arr end |
#get_fixedrank_by_name(name) ⇒ Object
147 148 149 150 151 |
# File 'lib/ncbi_taxonomy.rb', line 147 def get_fixedrank_by_name name out = Array.new self.get_taxonids_by_name(name).each {|x| out << self.get_fixedrank_by_id(x) } out end |
#get_missing_id(id) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/ncbi_taxonomy.rb', line 153 def get_missing_id id rs = @db.execute "SELECT tax_id FROM delnodes WHERE tax_id='#{id}'" if rs.size > 0 return -1 else rs = @db.execute "SELECT new_tax_id FROM merged WHERE old_tax_id=#{id}" if rs.size == 1 return rs[0][0].to_i else return -1 end end end |
#get_names_by_taxonid(id) ⇒ Object
54 55 56 |
# File 'lib/ncbi_taxonomy.rb', line 54 def get_names_by_taxonid id @db.execute "SELECT name_class, name_txt FROM names WHERE tax_id=#{id}" end |
#get_rank_ptaxonid_by_id(id) ⇒ Object
74 75 76 77 78 |
# File 'lib/ncbi_taxonomy.rb', line 74 def get_rank_ptaxonid_by_id id id = id.to_i out = @db.execute "SELECT parent_tax_id, rank FROM nodes WHERE tax_id=#{id}" out[0] end |
#get_rank_ptaxonid_scientificname_by_id(id) ⇒ Object
80 81 82 83 84 |
# File 'lib/ncbi_taxonomy.rb', line 80 def get_rank_ptaxonid_scientificname_by_id id id = id.to_i out = @db.execute "SELECT nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names WHERE nodes.tax_id=#{id} AND names.tax_id=#{id} AND names.name_class='scientific name'" out[0] end |
#get_scientific_name_by_id(id) ⇒ Object
62 63 64 65 66 |
# File 'lib/ncbi_taxonomy.rb', line 62 def get_scientific_name_by_id id id = id.to_i out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id} AND name_class='scientific name'" out[0][0] end |
#get_scientific_name_by_names(names) ⇒ Object
58 59 60 |
# File 'lib/ncbi_taxonomy.rb', line 58 def get_scientific_name_by_names names names.each {|x| return x[1] if x[0] == 'scientific name' } end |
#get_taxonids_by_name(name) ⇒ Object
48 49 50 51 52 |
# File 'lib/ncbi_taxonomy.rb', line 48 def get_taxonids_by_name name name = SQLite3::Database.quote name out = @db.execute "SELECT DISTINCT tax_id FROM names WHERE name_txt='#{name}'" out.flatten end |
#memory ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/ncbi_taxonomy.rb', line 30 def memory tmp = SQLite3::Database.new ':memory:' backup = SQLite3::Backup.new tmp, 'main', @db, 'main' backup.step -1 backup.finish @db = tmp @in_memory = true end |
#memory? ⇒ Boolean
39 40 41 |
# File 'lib/ncbi_taxonomy.rb', line 39 def memory? @in_memory end |
#mget_allrank_by_id(id_arr) ⇒ Object
183 184 185 |
# File 'lib/ncbi_taxonomy.rb', line 183 def mget_allrank_by_id id_arr self.mrun 'get_allrank_by_id', id_arr end |
#mget_allrank_by_name(name_arr) ⇒ Object
191 192 193 |
# File 'lib/ncbi_taxonomy.rb', line 191 def mget_allrank_by_name name_arr self.mrun 'get_allrank_by_name', name_arr end |
#mget_fixedrank_by_id(id_arr) ⇒ Object
187 188 189 |
# File 'lib/ncbi_taxonomy.rb', line 187 def mget_fixedrank_by_id id_arr self.mrun 'get_fixedrank_by_id', id_arr end |
#mget_fixedrank_by_name(name_arr) ⇒ Object
195 196 197 |
# File 'lib/ncbi_taxonomy.rb', line 195 def mget_fixedrank_by_name name_arr self.mrun 'get_fixedrank_by_name', name_arr end |
#mrun(cmd, arr) ⇒ Object
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# File 'lib/ncbi_taxonomy.rb', line 167 def mrun cmd, arr t = Array.new out = Array.new(arr.size) arr.each do |el| idx = t.size t << Thread.new(el, idx) do |myel, myidx| out[myidx] = self.method(cmd).call(myel) end end t.each { |myt| myt.join } out end |
#tax_rank_all ⇒ Object
12 13 14 |
# File 'lib/ncbi_taxonomy.rb', line 12 def tax_rank_all { "superkingdom" => 0, "kingdom" => 1, "subkingdom" => 2, "superphylum" => 3, "phylum" => 4, "subphylum" => 5, "superclass" => 6, "class" => 7, "infraclass" => 8, "subclass" => 9, "superorder" => 10, "order" => 11, "suborder" => 12, "infraorder" => 13, "parvorder" => 14, "superfamily" => 15, "family" => 16, "subfamily" => 17, "tribe" => 18, "subtribe" => 19, "genus" => 20, "subgenus" => 21, "species group" => 22, "species subgroup" => 23, "species" => 24, "subspecies" => 25, "varietas" => 26, "forma" => 27, "no rank" => 28 } end |
#tax_rank_fixed ⇒ Object
8 9 10 |
# File 'lib/ncbi_taxonomy.rb', line 8 def tax_rank_fixed { "superkingdom" => 0, "phylum" => 4, "class" => 7, "order" => 11, "family" => 16, "genus" => 21, "species" => 24, "strain" => 28 } end |
#using_unique_name ⇒ Object
4 5 6 |
# File 'lib/ncbi_taxonomy.rb', line 4 def using_unique_name { "Ponticoccus" => 1, "Bacillus" => 1 } end |