Class: Taxonomy

Inherits:
Object
  • Object
show all
Defined in:
lib/ncbi_taxonomy.rb

Instance Method Summary collapse

Constructor Details

#initializeTaxonomy

Returns a new instance of Taxonomy.



16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/ncbi_taxonomy.rb', line 16

def initialize
	@home_dir = Dir.home
	@work_dir = @home_dir + "/.ncbi_taxonomy"
	@taxdb_release = @work_dir + "/taxonomy.db"
	@in_memory = false
	begin
		@db = SQLite3::Database.new @taxdb_release
	rescue SQLite3::CantOpenException => e
		STDERR.puts "Please download the NCBI Taxonomy database using 'ncbi_taxonomy update' command."
		STDERR.puts "[MSG]" + e.message
		exit 1
	end
end

Instance Method Details

#check_sqlite_versionObject



43
44
45
46
# File 'lib/ncbi_taxonomy.rb', line 43

def check_sqlite_version
	rs = @db.execute "SELECT SQLITE_VERSION()"
	Gem::Version.new(rs[0][0]) >= Gem::Version.new('3.8.3')
end

#get_all_names_by_id(id) ⇒ Object



68
69
70
71
72
# File 'lib/ncbi_taxonomy.rb', line 68

def get_all_names_by_id id
	id = id.to_i
	out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id}"
	out
end

#get_allrank_by_id(id) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/ncbi_taxonomy.rb', line 86

def get_allrank_by_id id
	id = id.to_i
	out = Array.new
	if check_sqlite_version
		rs = @db.execute "WITH RECURSIVE allrank (id, pid, rank, name) AS ( VALUES (0, #{id}, 'no_rank', 'Homo sapiens javamintus') UNION ALL SELECT nodes.tax_id, nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names, allrank WHERE nodes.tax_id=allrank.pid AND names.tax_id = nodes.tax_id AND names.name_class='scientific name' AND nodes.tax_id<>1) SELECT * FROM allrank;"
		rs[1..-2].each {|x| out << [ x[2], x[3] ] }
	else
		while true
			rs = self.get_rank_ptaxonid_scientificname_by_id id
			out << [ rs[1], rs[2] ]
			break if rs[1] == 'superkingdom'
			id = rs[0]
		end
	end

	out = self.get_allrank_by_id(self.get_missing_id id).reverse  if out.size == 0 && id > -1
	out.reverse
end

#get_allrank_by_name(name) ⇒ Object



141
142
143
144
145
# File 'lib/ncbi_taxonomy.rb', line 141

def get_allrank_by_name name
	out = Array.new
	self.get_taxonids_by_name(name).each {|x| out << self.get_allrank_by_id(x) }
	out
end

#get_fixedrank_by_id(id) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/ncbi_taxonomy.rb', line 105

def get_fixedrank_by_id id
	id = id.to_i
	ranks = self.get_allrank_by_id id
	arr = Array.new
	pos = 0	
	alt_name = ''
	ranks.each do |rank, name|
		rank_fixed_no = tax_rank_fixed[rank]
		rank_all_no = tax_rank_all[rank]
		if rank_fixed_no != nil
			this_rank_fixed = tax_rank_fixed.to_a.index [rank, rank_fixed_no]
			if arr.size < this_rank_fixed
				(arr.size...this_rank_fixed).each do |x|
					arr << [ tax_rank_fixed.to_a[x][0], "@#{alt_name}_#{tax_rank_fixed.to_a[x][0]}" ]
					pos += 1
				end
			end
			arr << [ rank, name ]
			pos += 1
			alt_name = name
		elsif arr.size == 7 && rank_all_no == 28
			arr << [ 'strain', name ]
		elsif rank_all_no != 28
			alt_name = name if tax_rank_fixed.to_a[pos-1][1] > tax_rank_all[rank]
		end
	end
	if arr.size < 7 && arr.size >= 0
		(arr.size..7).each do |x|
			arr << [ tax_rank_fixed.to_a[x][0], nil ]
		end
	elsif arr.size == 7
		arr << [ 'strain', arr[-1][1] ]
	end
	arr
end

#get_fixedrank_by_name(name) ⇒ Object



147
148
149
150
151
# File 'lib/ncbi_taxonomy.rb', line 147

def get_fixedrank_by_name name
	out = Array.new
	self.get_taxonids_by_name(name).each {|x| out << self.get_fixedrank_by_id(x) }
	out
end

#get_missing_id(id) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/ncbi_taxonomy.rb', line 153

def get_missing_id id
	rs = @db.execute "SELECT tax_id FROM delnodes WHERE tax_id='#{id}'"
	if rs.size > 0
		return -1
	else
		rs = @db.execute "SELECT new_tax_id FROM merged WHERE old_tax_id=#{id}"
		if rs.size == 1
			return rs[0][0].to_i
		else
			return -1
		end
	end
end

#get_names_by_taxonid(id) ⇒ Object



54
55
56
# File 'lib/ncbi_taxonomy.rb', line 54

def get_names_by_taxonid id
	@db.execute "SELECT name_class, name_txt FROM names WHERE tax_id=#{id}"
end

#get_rank_ptaxonid_by_id(id) ⇒ Object



74
75
76
77
78
# File 'lib/ncbi_taxonomy.rb', line 74

def get_rank_ptaxonid_by_id id
	id = id.to_i
	out = @db.execute "SELECT parent_tax_id, rank FROM nodes WHERE tax_id=#{id}"
	out[0]
end

#get_rank_ptaxonid_scientificname_by_id(id) ⇒ Object



80
81
82
83
84
# File 'lib/ncbi_taxonomy.rb', line 80

def get_rank_ptaxonid_scientificname_by_id id
	id = id.to_i
	out = @db.execute "SELECT nodes.parent_tax_id, nodes.rank, names.name_txt FROM nodes, names WHERE nodes.tax_id=#{id} AND names.tax_id=#{id} AND names.name_class='scientific name'"
	out[0]
end

#get_scientific_name_by_id(id) ⇒ Object



62
63
64
65
66
# File 'lib/ncbi_taxonomy.rb', line 62

def get_scientific_name_by_id id
	id = id.to_i
	out = @db.execute "SELECT name_txt FROM names WHERE tax_id=#{id} AND name_class='scientific name'"
	out[0][0]
end

#get_scientific_name_by_names(names) ⇒ Object



58
59
60
# File 'lib/ncbi_taxonomy.rb', line 58

def get_scientific_name_by_names names
	names.each {|x| return x[1] if x[0] == 'scientific name' }
end

#get_taxonids_by_name(name) ⇒ Object



48
49
50
51
52
# File 'lib/ncbi_taxonomy.rb', line 48

def get_taxonids_by_name name
	name = SQLite3::Database.quote name
	out = @db.execute "SELECT DISTINCT tax_id FROM names WHERE name_txt='#{name}'"
	out.flatten
end

#memoryObject



30
31
32
33
34
35
36
37
# File 'lib/ncbi_taxonomy.rb', line 30

def memory
	tmp = SQLite3::Database.new ':memory:'
	backup = SQLite3::Backup.new tmp, 'main', @db, 'main'
	backup.step -1
	backup.finish
	@db = tmp
	@in_memory = true
end

#memory?Boolean

Returns:

  • (Boolean)


39
40
41
# File 'lib/ncbi_taxonomy.rb', line 39

def memory?
	@in_memory
end

#mget_allrank_by_id(id_arr) ⇒ Object



183
184
185
# File 'lib/ncbi_taxonomy.rb', line 183

def mget_allrank_by_id id_arr
	self.mrun 'get_allrank_by_id', id_arr
end

#mget_allrank_by_name(name_arr) ⇒ Object



191
192
193
# File 'lib/ncbi_taxonomy.rb', line 191

def mget_allrank_by_name name_arr
	self.mrun 'get_allrank_by_name', name_arr
end

#mget_fixedrank_by_id(id_arr) ⇒ Object



187
188
189
# File 'lib/ncbi_taxonomy.rb', line 187

def mget_fixedrank_by_id id_arr
	self.mrun 'get_fixedrank_by_id', id_arr
end

#mget_fixedrank_by_name(name_arr) ⇒ Object



195
196
197
# File 'lib/ncbi_taxonomy.rb', line 195

def mget_fixedrank_by_name name_arr
	self.mrun 'get_fixedrank_by_name', name_arr
end

#mrun(cmd, arr) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/ncbi_taxonomy.rb', line 167

def mrun cmd, arr
	t = Array.new
	out = Array.new(arr.size)
	
	arr.each do |el|
		idx = t.size
		t << Thread.new(el, idx) do |myel, myidx|
			out[myidx] = self.method(cmd).call(myel)
		end
	end
	
	t.each { |myt| myt.join }
	
	out
end

#tax_rank_allObject



12
13
14
# File 'lib/ncbi_taxonomy.rb', line 12

def tax_rank_all
	{ "superkingdom" => 0, "kingdom" => 1, "subkingdom" => 2, "superphylum" => 3, "phylum" => 4, "subphylum" => 5, "superclass" => 6, "class" => 7, "infraclass" => 8, "subclass" => 9, "superorder" => 10, "order" => 11, "suborder" => 12, "infraorder" => 13, "parvorder" => 14, "superfamily" => 15, "family" => 16, "subfamily" => 17, "tribe" => 18, "subtribe" => 19, "genus" => 20, "subgenus" => 21, "species group" => 22, "species subgroup" => 23, "species" => 24, "subspecies" => 25, "varietas" => 26, "forma" => 27, "no rank" => 28 }
end

#tax_rank_fixedObject



8
9
10
# File 'lib/ncbi_taxonomy.rb', line 8

def tax_rank_fixed
	{ "superkingdom" => 0, "phylum" => 4, "class" => 7, "order" => 11, "family" => 16, "genus" => 21, "species" => 24, "strain" => 28 }
end

#using_unique_nameObject



4
5
6
# File 'lib/ncbi_taxonomy.rb', line 4

def using_unique_name
	{ "Ponticoccus" => 1, "Bacillus" => 1 }
end