Module: Roebe::SqlParadise::Commands
Overview
Roebe::SqlParadise::Commands
Constant Summary collapse
- NAMES_SQL =
#
Some Safeguards next.
#
'names.sql'
- NODES_SQL =
'nodes.sql'
- FASTA_SQL =
'fasta.sql'
- N =
#
N
#
"\n"
- BE_VERBOSE =
#
BE_VERBOSE
#
false
- FASTA_ENTRIES =
#
FASTA_ENTRIES
These are the entries for the Taxonomy Fasta file, the main reference.
It has 10 entries.
path is the filepath. taxid is an integer.
#
' name varchar(80), taxid integer, type smallint, lineage_ids varchar(500), lineage_scientific_name varchar(2500), comment_field varchar(8000), filesize integer, modification_time varchar(28), n_accession_numbers integer, path varchar(130) '
Class Method Summary collapse
-
.create_and_save_table(i, be_verbose = false) ⇒ Object
# === Roebe::SqlParadise::Commands.create_and_save_table.
-
.create_table(i) ⇒ Object
# === Roebe::SqlParadise::Commands.create_table.
-
.set_temp_dir(i) ⇒ Object
# === Roebe::SqlParadise::Commands.set_temp_dir ========================================================================= #.
-
.temp_dir? ⇒ Boolean
# === Roebe::SqlParadise::Commands.temp_dir? ========================================================================= #.
Instance Method Summary collapse
-
#create(i = 'nodes.sql') ⇒ Object
# === create.
-
#create_and_save_table(i, be_verbose = false) ⇒ Object
# === create_and_save_table ========================================================================= #.
-
#create_names_table(be_verbose = BE_VERBOSE) ⇒ Object
(also: #names_table?, #table?, #create_names)
# === create_names_table.
-
#create_nodes_table ⇒ Object
(also: #nodes_table?)
# === create_nodes_table.
-
#create_populate_and_save_table(i) ⇒ Object
# === create_populate_and_save_table ========================================================================= #.
-
#create_sql_command_for(i = :table_names) ⇒ Object
# === create_sql_command_for.
-
#create_sqlite_database(i = :nodes, be_verbose = true) ⇒ Object
# === create_sqlite_database.
-
#create_table_names ⇒ Object
# === create_table_names.
-
#do_precreate_actions(i) ⇒ Object
# === do_precreate_actions.
-
#esystem(i) ⇒ Object
# === esystem ========================================================================= #.
-
#populate(i = :nodes, be_verbose = false, store_here = nil) ⇒ Object
# === populate (populate tag) ========================================================================= #.
-
#populate_citations_table ⇒ Object
# === populate_citations_table.
-
#populate_into(i, be_verbose = false) ⇒ Object
# === populate_into ========================================================================= #.
-
#populate_names_table(be_verbose = BE_VERBOSE, save_here = NAMES_SQL) ⇒ Object
(also: #populate_table, #create_names_database, #populate_names)
# === populate_names_table.
-
#populate_nodes_table(be_verbose = BE_VERBOSE, save_here = NODES_SQL) ⇒ Object
(also: #populate_nodes, #create_nodes)
# === populate_nodes_table.
-
#sql(i = :nothing) ⇒ Object
# === sql.
-
#sql_names ⇒ Object
# === sql_names ========================================================================= #.
-
#table(i) ⇒ Object
# === table ========================================================================= #.
-
#temp_dir? ⇒ Boolean
# === temp_dir? ========================================================================= #.
Class Method Details
.create_and_save_table(i, be_verbose = false) ⇒ Object
#
Roebe::SqlParadise::Commands.create_and_save_table
This method will not only create the table but also store it somewhere, into a .sql file. By default, we will store into the TEMP_DIR directory.
#
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 |
# File 'lib/roebe/sql_paradise/commands.rb', line 423 def self.create_and_save_table( i, be_verbose = false ) case be_verbose # ======================================================================= # # === :be_quiet # ======================================================================= # when :be_quiet be_verbose = false end i = i.to_s.dup i << '.sql' unless i.end_with? '.sql' result = Commands.create_table(i).to_s+N i = temp_dir?+i unless i.include?('/') if be_verbose e "Now storing into file #{sfile(i)}." end # ======================================================================= # # Next, save it. # ======================================================================= # append_what_into(result, i) end |
.create_table(i) ⇒ Object
#
Roebe::SqlParadise::Commands.create_table
This method will not actually create the table - it will merely give back the proper SQL command which we use to create that table.
#
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 |
# File 'lib/roebe/sql_paradise/commands.rb', line 452 def self.create_table(i) i = i.to_s if i.include?('.') and !i.include?('/') i = i[0, i.index('.')] end i = i.to_s case i # case tag # ======================================================================= # # === names # ======================================================================= # when 'names', '1','taxonomy_names' return Roebe::SqlParadise::CreateTable[:names, 'taxid int, name_txt varchar(155), unique_name varchar(100), name_class varchar(25) ' ] # ======================================================================= # # === nodes # ======================================================================= # when 'nodes', '2','taxonomy_nodes' return Roebe::SqlParadise::CreateTable[:nodes, 'taxid int, parent_taxid int, rank varchar(25)' ] # ======================================================================= # # The Fasta table comes next. It has these entries: # # (1) name # (2) DNA or Protein (0 is DNA, 1 is Protein) # (3) lineage_ids # (4) lineage_scientific_name # (5) accession_number # (6) dataset (optional for now) # # ======================================================================= # when 'fasta', '3','taxonomy_fasta', /fasta\.?sql$/ return Roebe::SqlParadise::CreateTable[ :fasta, FASTA_ENTRIES # This constant is defined at the top of this file here. ] else ewarn 'In method create_table(): We did not find the input: `'+ sfancy(i)+swarn('`') end end |
.set_temp_dir(i) ⇒ Object
#
Roebe::SqlParadise::Commands.set_temp_dir
#
63 64 65 |
# File 'lib/roebe/sql_paradise/commands.rb', line 63 def self.set_temp_dir(i) @temp_dir = i end |
.temp_dir? ⇒ Boolean
#
Roebe::SqlParadise::Commands.temp_dir?
#
70 71 72 |
# File 'lib/roebe/sql_paradise/commands.rb', line 70 def self.temp_dir? @temp_dir end |
Instance Method Details
#create(i = 'nodes.sql') ⇒ Object
#
create
Use this method to create a specific .sql file.
#
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# File 'lib/roebe/sql_paradise/commands.rb', line 319 def create(i = 'nodes.sql') i = i.to_s case i # === nodes.sql when 'nodes.sql' remove_file(i) append_what_into(create_table(:nodes), i) populate_into(i) e 'Finished writing into '+sfile(i) # esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"' # === names.sql when 'names.sql' remove_file(i) append_what_into(create_table(:names), i) populate_into(i) e 'Finished writing into '+sfile(i) end end |
#create_and_save_table(i, be_verbose = false) ⇒ Object
#
create_and_save_table
#
586 587 588 |
# File 'lib/roebe/sql_paradise/commands.rb', line 586 def create_and_save_table(i, be_verbose = false) Roebe::SqlParadise::Commands.create_and_save_table(i, be_verbose) end |
#create_names_table(be_verbose = BE_VERBOSE) ⇒ Object Also known as: names_table?, table?, create_names
#
create_names_table
This will fetch the information from names.dmp and create a corresponding SQL database.
Let’s keep in mind that the taxonomy names file has these fields:
taxid -- the id of node associated with this name
name_txt -- name itself
unique name -- the unique variant of this name if name not unique
name class -- (synonym, common name)
#
200 201 202 203 204 205 206 |
# File 'lib/roebe/sql_paradise/commands.rb', line 200 def create_names_table(be_verbose = BE_VERBOSE) sql_command = create_table_names e sql_command if be_verbose # The next line is only required when we wish to initialize our table from scratch.' # append_what_into(sql_command+N, 'create_names_database.sql') populate_names_table(be_verbose) end |
#create_nodes_table ⇒ Object Also known as: nodes_table?
#
create_nodes_table
This will create the nodes table.
We need only three entries - tax id, parent id and rank.
taxid -- node id in GenBank taxonomy database
parent taxid -- parent node id in GenBank taxonomy database
rank -- rank of this node (superkingdom, kingdom)
#
178 179 180 181 182 183 184 |
# File 'lib/roebe/sql_paradise/commands.rb', line 178 def create_nodes_table sql_command = create_sql_command_for(:nodes) e sql_command if BE_VERBOSE # The next line is only required when we wish to initialize our table from scratch.' # append_what_into(sql_command+N, 'create_nodes_database.sql') populate_nodes_table end |
#create_populate_and_save_table(i) ⇒ Object
#
create_populate_and_save_table
#
341 342 343 344 |
# File 'lib/roebe/sql_paradise/commands.rb', line 341 def create_populate_and_save_table(i) create_and_save_table(i) populate(i) end |
#create_sql_command_for(i = :table_names) ⇒ Object
#
create_sql_command_for
This creates the SQL command for initializing the name. The default layout for these tables can also be seen here:
-
The first table (nodes) has int int varchar structure. Typical values for the second entry are like ‘335928’.
-
The second table has 4x varchars.
#
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
# File 'lib/roebe/sql_paradise/commands.rb', line 243 def create_sql_command_for( i = :table_names ) sql_command = '' case i # ======================================================================= # # === :table_nodes # ======================================================================= # when :table_nodes, :nodes, 1 # nodes tag. sql_command = Commands.create_table :nodes # ======================================================================= # # === :table_names # ======================================================================= # when :table_names, :names, 2 # names tag. sql_command = Commands.create_table :names end return sql_command # We return that command. end |
#create_sqlite_database(i = :nodes, be_verbose = true) ⇒ Object
#
create_sqlite_database
Use this method to create a sqlite database.
#
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/roebe/sql_paradise/commands.rb', line 279 def create_sqlite_database( i = :nodes, be_verbose = true ) case i # ======================================================================= # # === :nodes # ======================================================================= # when :nodes store_into = i.to_s+'.db' remove_file(store_into) sql_command = Commands.create_table :nodes # Run the sqlite3-command next: esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"' readlines(:nodes).each {|entry| entry = tokenize(entry)[0..2] taxid = entry[0] parent_taxid = entry[1] rank = pad_with_single_quotes(entry[2]).chomp sql_command = InsertInto[i, taxid+', '+parent_taxid+', '+rank] esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"' } 'Finished creating '+sfile(i.to_s+'.db') # ======================================================================= # # === :names # ======================================================================= # when :names store_into = i.to_s+'.db' remove_file(store_into) _ = create_sql_command_for(i) esystem 'sqlite3 '+i.to_s+'.db "'+_+'"' populate_names_table(false, i.to_s+'.db') e 'Finished creating '+sfile(i.to_s+'.db') end end |
#create_table_names ⇒ Object
#
create_table_names
This method will create the SQL table.
#
215 216 217 |
# File 'lib/roebe/sql_paradise/commands.rb', line 215 def create_table_names create_sql_command_for(:table_names) end |
#do_precreate_actions(i) ⇒ Object
#
do_precreate_actions
This method will delete an already existing .sql file before continuing. That way we can clean up from scratch.
#
121 122 123 124 125 126 127 128 129 130 |
# File 'lib/roebe/sql_paradise/commands.rb', line 121 def do_precreate_actions(i) if File.exist? i e 'An older file seems to already exist in this '+ 'directory ('+sdir(Dir.pwd)+') at: '+sfile(i) e 'We will delete that old file before continuing.' remove_file(i) e 'Ok, done deleting. Ready to continue. (This may take '+ 'a while, please be patient)' end end |
#esystem(i) ⇒ Object
#
esystem
#
269 270 271 272 |
# File 'lib/roebe/sql_paradise/commands.rb', line 269 def esystem(i) e i system i end |
#populate(i = :nodes, be_verbose = false, store_here = nil) ⇒ Object
#
populate (populate tag)
#
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 |
# File 'lib/roebe/sql_paradise/commands.rb', line 399 def populate(i = :nodes, be_verbose = false, store_here = nil) i = i.to_s case i when 'nodes', '1', 'nodes.sql' store_here = NODES_SQL if store_here.nil? populate_nodes_table(be_verbose, store_here) when 'names', '2', 'names.sql' store_here = NAMES_SQL if store_here.nil? populate_names_table(be_verbose, store_here) when 'fasta', '3', 'fasta.sql' store_here = NAMES_SQL if store_here.nil? populate_fasta_table(be_verbose, store_here) else ewarn 'In populate(), not found the input file: `'+sfancy(i)+'`' end end |
#populate_citations_table ⇒ Object
#
populate_citations_table
We use this method to populate the citations table with proper data.
#
105 106 107 108 109 110 111 112 113 |
# File 'lib/roebe/sql_paradise/commands.rb', line 105 def populate_citations_table citations?.each {|entry| sql_command = 'INSERT INTO citations '\ '(cit_id, cit_key, pubmed_id, medline_id, url, text, taxid_list) '\ "VALUES ('"+pad(entry)+"')" append_what_into(sql_command, 'citations.sql') e sql_command if BE_VERBOSE } end |
#populate_into(i, be_verbose = false) ⇒ Object
#
populate_into
#
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 |
# File 'lib/roebe/sql_paradise/commands.rb', line 349 def populate_into(i, be_verbose = false) Roebe::SqlParadise.be_silent i = i.to_s if i.include? '.' _ = i[0, i.index('.')] else _ = i end case _ when 'names' # This has: # taxid, name_txt, unique_name, name_class readlines(_).each {|entry| e 'DEBUG: '+entry if be_verbose entry = split_at(entry) if be_verbose e 'DEBUG: '; pp entry end # Now entry will look like: # ["228", "ATCC 14393", "", "type material", ""] taxid = entry[0].to_s # First entry is an Integer. name_txt = pad_with_single_quotes(entry[1]) unique_name = pad_with_single_quotes(entry[2]) name_class = pad_with_single_quotes(entry[3].delete("'")) sql_command = InsertInto[_, taxid+', '+name_txt+', '+unique_name+', '+name_class] append_what_into(sql_command+N, i) } when 'nodes' readlines(_).each {|entry| e 'DEBUG: '+entry if be_verbose entry = tokenize(entry)[0..2] if be_verbose e 'DEBUG: '; pp entry end taxid = entry.first parent_taxid = pad_with_single_quotes(entry[1]) rank = pad_with_single_quotes(entry[2]).chomp sql_command = InsertInto[_, taxid+', '+parent_taxid+', '+rank] append_what_into(sql_command+N, i) } else # else assume we populate from a .fasta file here (default tag) # This has name, type, lineage_ids, lineage_scientific_name, accession_number, dataset pp i pp '^^^ populating into fasta now.' end end |
#populate_names_table(be_verbose = BE_VERBOSE, save_here = NAMES_SQL) ⇒ Object Also known as: populate_table, create_names_database, populate_names
#
populate_names_table
The names table can be huge, 1_672_079 entries in total.
Specific entries look like this:
INSERT INTO names (taxid, name_txt, unique_name, name_class)
VALUES ('1457285','Anatrichosomatidae','','scientific name');
The first entry is int, the other three are varchars.
#
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 |
# File 'lib/roebe/sql_paradise/commands.rb', line 512 def populate_names_table( be_verbose = BE_VERBOSE, save_here = NAMES_SQL # This will be "names.sql" ) be_verbose = true if be_verbose == :be_verbose max_entry = 1 readlines(:names).each {|entry| entry.chomp! # First, we eliminate those pesky newlines. # Next, we must split at the tabulator and | token => "\t|" entry = split_at(entry) taxid = entry.first.to_s # First entry is integer. sql_command = InsertInto[:names, '(taxid, name_txt, unique_name, name_class)', taxid.to_s+','+ pad_with_single_quotes(entry[1])+','+ pad_with_single_quotes(entry[2])+','+ pad_with_single_quotes(entry[3]) ]+N e sql_command if be_verbose append_what_into(sql_command, save_here) max_entry += 1 } max_entry = sfancy(max_entry.to_s) save_here = Dir.pwd+'/'+save_here unless save_here.include? '/' e 'Finished storing into '+sfile(save_here) if be_verbose e 'We found '+max_entry+' lines in the names table.' end |
#populate_nodes_table(be_verbose = BE_VERBOSE, save_here = NODES_SQL) ⇒ Object Also known as: populate_nodes, create_nodes
#
populate_nodes_table
We need only three entries here - taxid, parent_taxid and rank. Right now these are int, int, varchar.
#
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 |
# File 'lib/roebe/sql_paradise/commands.rb', line 548 def populate_nodes_table( be_verbose = BE_VERBOSE, save_here = NODES_SQL ) be_verbose = true if be_verbose == :be_verbose max_entry = 1 readlines(:nodes).each {|entry| entry = tokenize(entry)[0..2] # We need only the first 3 entries here. taxid = entry[0] # taxid parent_taxid = entry[1] # parent_taxid rank = pad_with_single_quotes( entry[2] ) # rank sql_command = InsertInto[:nodes, '(taxid, parent_taxid, rank)', taxid.to_s+','+ parent_taxid+','+ rank ]+N e sql_command if be_verbose append_what_into(sql_command, save_here) max_entry += 1 } max_entry = sfancy(max_entry.to_s) save_here = Dir.pwd+'/'+save_here unless save_here.include? '/' e 'Finished storing into '+sfile(save_here) if be_verbose e 'We found '+max_entry+' lines in the nodes table.' end |
#sql(i = :nothing) ⇒ Object
#
sql
Default is to do nothing. Shortcuts exist as input 1 and 2, which will create either names (1) or nodes (2).
#
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/roebe/sql_paradise/commands.rb', line 138 def sql( i = :nothing ) i = i.to_sym if i.is_a? String e 'Debug in method sql() - input was `'+i.to_s+'`.' case i # ======================================================================= # # === :names # ======================================================================= # when :names, 1, nil # This is the default. i = 'names.sql' do_precreate_actions(i) create_names_table e 'All statements were written into the file `'+i+'`.' # ======================================================================= # # === :nodes # ======================================================================= # when :nodes, 2 i = 'nodes.sql' do_precreate_actions(i) create_nodes_table e "All statements were written into the file `#{i}`." end end |
#sql_names ⇒ Object
#
sql_names
#
229 230 231 |
# File 'lib/roebe/sql_paradise/commands.rb', line 229 def sql_names create_sql_command_for :names end |
#table(i) ⇒ Object
#
table
#
222 223 224 |
# File 'lib/roebe/sql_paradise/commands.rb', line 222 def table(i) e create_sql_command_for(i) # table tag end |
#temp_dir? ⇒ Boolean
#
temp_dir?
#
579 580 581 |
# File 'lib/roebe/sql_paradise/commands.rb', line 579 def temp_dir? ::Command.temp_dir? end |