Module: Roebe::SqlParadise::Commands

Extended by:
Commands
Included in:
Commands
Defined in:
lib/roebe/sql_paradise/commands.rb

Overview

Roebe::SqlParadise::Commands

Constant Summary collapse

NAMES_SQL =
#

Some Safeguards next.

#
'names.sql'
NODES_SQL =
'nodes.sql'
FASTA_SQL =
'fasta.sql'
N =
#

N

#
"\n"
BE_VERBOSE =
#

BE_VERBOSE

#
false
FASTA_ENTRIES =
#

FASTA_ENTRIES

These are the entries for the Taxonomy Fasta file, the main reference.

It has 10 entries.

path is the filepath.
taxid is an integer.
#
'
  name varchar(80),
  taxid integer,
  type smallint,
  lineage_ids varchar(500),
  lineage_scientific_name varchar(2500),
  comment_field varchar(8000),
  filesize integer,
  modification_time varchar(28),
  n_accession_numbers integer,
  path varchar(130)
'

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.create_and_save_table(i, be_verbose = false) ⇒ Object

#

Roebe::SqlParadise::Commands.create_and_save_table

This method will not only create the table but also store it somewhere, into a .sql file. By default, we will store into the TEMP_DIR directory.

#


423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
# File 'lib/roebe/sql_paradise/commands.rb', line 423

def self.create_and_save_table(
    i, be_verbose = false
  )
  case be_verbose
  # ======================================================================= #
  # === :be_quiet
  # ======================================================================= #
  when :be_quiet
    be_verbose = false
  end
  i = i.to_s.dup
  i << '.sql' unless i.end_with? '.sql'
  result = Commands.create_table(i).to_s+N
  i = temp_dir?+i unless i.include?('/')
  if be_verbose
    e "Now storing into file #{sfile(i)}."
  end
  # ======================================================================= #
  # Next, save it.
  # ======================================================================= #
  append_what_into(result, i)
end

.create_table(i) ⇒ Object

#

Roebe::SqlParadise::Commands.create_table

This method will not actually create the table - it will merely give back the proper SQL command which we use to create that table.

#


452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/roebe/sql_paradise/commands.rb', line 452

def self.create_table(i)
  i = i.to_s
  if i.include?('.') and !i.include?('/')
    i = i[0, i.index('.')]
  end
  i = i.to_s
  case i # case tag
  # ======================================================================= #
  # === names
  # ======================================================================= #
  when 'names', '1','taxonomy_names'
    return Roebe::SqlParadise::CreateTable[:names,
     'taxid       int,
      name_txt    varchar(155),
      unique_name varchar(100),
      name_class  varchar(25)
     '
    ]
  # ======================================================================= #
  # === nodes
  # ======================================================================= #
  when 'nodes', '2','taxonomy_nodes'
    return Roebe::SqlParadise::CreateTable[:nodes,
      'taxid int, parent_taxid int, rank varchar(25)'
    ]
  # ======================================================================= #
  # The Fasta table comes next. It has these entries:
  #
  #   (1) name 
  #   (2) DNA or Protein (0 is DNA, 1 is Protein)
  #   (3) lineage_ids
  #   (4) lineage_scientific_name
  #   (5) accession_number
  #   (6) dataset (optional for now)
  #
  # ======================================================================= #
  when 'fasta', '3','taxonomy_fasta',
       /fasta\.?sql$/
    return Roebe::SqlParadise::CreateTable[
      :fasta,
      FASTA_ENTRIES # This constant is defined at the top of this file here.
    ]
  else
    ewarn 'In method create_table(): We did not find the input: `'+
          sfancy(i)+swarn('`')
  end
end

.set_temp_dir(i) ⇒ Object

#

Roebe::SqlParadise::Commands.set_temp_dir

#


63
64
65
# File 'lib/roebe/sql_paradise/commands.rb', line 63

def self.set_temp_dir(i)
  @temp_dir = i
end

.temp_dir?Boolean

#

Roebe::SqlParadise::Commands.temp_dir?

#

Returns:

  • (Boolean)


70
71
72
# File 'lib/roebe/sql_paradise/commands.rb', line 70

def self.temp_dir?
  @temp_dir
end

Instance Method Details

#create(i = 'nodes.sql') ⇒ Object

#

create

Use this method to create a specific .sql file.

#


319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/roebe/sql_paradise/commands.rb', line 319

def create(i = 'nodes.sql')
  i = i.to_s
  case i
  # === nodes.sql
  when 'nodes.sql'
    remove_file(i)
    append_what_into(create_table(:nodes), i)
    populate_into(i)
    e 'Finished writing into '+sfile(i)
    # esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"'
  # === names.sql
  when 'names.sql'
    remove_file(i)
    append_what_into(create_table(:names), i)
    populate_into(i)
    e 'Finished writing into '+sfile(i)
  end
end

#create_and_save_table(i, be_verbose = false) ⇒ Object

#

create_and_save_table

#


586
587
588
# File 'lib/roebe/sql_paradise/commands.rb', line 586

def create_and_save_table(i, be_verbose = false)
  Roebe::SqlParadise::Commands.create_and_save_table(i, be_verbose)
end

#create_names_table(be_verbose = BE_VERBOSE) ⇒ Object Also known as: names_table?, table?, create_names

#

create_names_table

This will fetch the information from names.dmp and create a corresponding SQL database.

Let’s keep in mind that the taxonomy names file has these fields:

taxid -- the id of node associated with this name
name_txt -- name itself
unique name -- the unique variant of this name if name not unique
name class -- (synonym, common name)
#


200
201
202
203
204
205
206
# File 'lib/roebe/sql_paradise/commands.rb', line 200

def create_names_table(be_verbose = BE_VERBOSE)
  sql_command = create_table_names
  e sql_command if be_verbose
  # The next line is only required when we wish to initialize our table from scratch.'
  # append_what_into(sql_command+N, 'create_names_database.sql')
  populate_names_table(be_verbose)
end

#create_nodes_tableObject Also known as: nodes_table?

#

create_nodes_table

This will create the nodes table.

We need only three entries - tax id, parent id and rank.

taxid -- node id in GenBank taxonomy database
parent taxid -- parent node id in GenBank taxonomy database
rank -- rank of this node (superkingdom, kingdom)
#


178
179
180
181
182
183
184
# File 'lib/roebe/sql_paradise/commands.rb', line 178

def create_nodes_table
  sql_command = create_sql_command_for(:nodes)
  e sql_command if BE_VERBOSE
  # The next line is only required when we wish to initialize our table from scratch.'
  # append_what_into(sql_command+N, 'create_nodes_database.sql')
  populate_nodes_table
end

#create_populate_and_save_table(i) ⇒ Object

#

create_populate_and_save_table

#


341
342
343
344
# File 'lib/roebe/sql_paradise/commands.rb', line 341

def create_populate_and_save_table(i)
  create_and_save_table(i)
  populate(i)
end

#create_sql_command_for(i = :table_names) ⇒ Object

#

create_sql_command_for

This creates the SQL command for initializing the name. The default layout for these tables can also be seen here:

  • The first table (nodes) has int int varchar structure. Typical values for the second entry are like ‘335928’.

  • The second table has 4x varchars.

#


243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/roebe/sql_paradise/commands.rb', line 243

def create_sql_command_for(
    i = :table_names
  )
  sql_command = ''
  case i
  # ======================================================================= #
  # === :table_nodes
  # ======================================================================= #
  when :table_nodes,
       :nodes,
       1 # nodes tag.
    sql_command = Commands.create_table :nodes
  # ======================================================================= #
  # === :table_names
  # ======================================================================= #
  when :table_names,
       :names,
       2 # names tag.
    sql_command = Commands.create_table :names
  end
  return sql_command # We return that command.
end

#create_sqlite_database(i = :nodes, be_verbose = true) ⇒ Object

#

create_sqlite_database

Use this method to create a sqlite database.

#


279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/roebe/sql_paradise/commands.rb', line 279

def create_sqlite_database(
    i = :nodes, be_verbose = true
  )
  case i
  # ======================================================================= #
  # === :nodes
  # ======================================================================= #
  when :nodes
    store_into = i.to_s+'.db'
    remove_file(store_into)
    sql_command = Commands.create_table :nodes
    # Run the sqlite3-command next:
    esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"'
    readlines(:nodes).each {|entry|
      entry = tokenize(entry)[0..2]
      taxid = entry[0]
      parent_taxid = entry[1]
      rank = pad_with_single_quotes(entry[2]).chomp
      sql_command = InsertInto[i, taxid+', '+parent_taxid+', '+rank]
      esystem 'sqlite3 '+i.to_s+'.db "'+sql_command+'"'
    }
    'Finished creating '+sfile(i.to_s+'.db')
  # ======================================================================= #
  # === :names
  # ======================================================================= #
  when :names
    store_into = i.to_s+'.db'
    remove_file(store_into)
    _ = create_sql_command_for(i)
    esystem 'sqlite3 '+i.to_s+'.db "'+_+'"'
    populate_names_table(false, i.to_s+'.db')
    e 'Finished creating '+sfile(i.to_s+'.db')
  end
end

#create_table_namesObject

#

create_table_names

This method will create the SQL table.

#


215
216
217
# File 'lib/roebe/sql_paradise/commands.rb', line 215

def create_table_names
  create_sql_command_for(:table_names)
end

#do_precreate_actions(i) ⇒ Object

#

do_precreate_actions

This method will delete an already existing .sql file before continuing. That way we can clean up from scratch.

#


121
122
123
124
125
126
127
128
129
130
# File 'lib/roebe/sql_paradise/commands.rb', line 121

def do_precreate_actions(i)
  if File.exist? i
    e 'An older file seems to already exist in this '+
      'directory ('+sdir(Dir.pwd)+') at: '+sfile(i)
    e 'We will delete that old file before continuing.'
    remove_file(i)
    e 'Ok, done deleting. Ready to continue. (This may take '+
    'a while, please be patient)'
  end
end

#esystem(i) ⇒ Object

#

esystem

#


269
270
271
272
# File 'lib/roebe/sql_paradise/commands.rb', line 269

def esystem(i)
  e i
  system i
end

#populate(i = :nodes, be_verbose = false, store_here = nil) ⇒ Object

#

populate (populate tag)

#


399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'lib/roebe/sql_paradise/commands.rb', line 399

def populate(i = :nodes, be_verbose = false, store_here = nil)
  i = i.to_s
  case i
  when 'nodes', '1', 'nodes.sql'
    store_here = NODES_SQL if store_here.nil?
    populate_nodes_table(be_verbose, store_here)
  when 'names', '2', 'names.sql'
    store_here = NAMES_SQL if store_here.nil?
    populate_names_table(be_verbose, store_here)
  when 'fasta', '3', 'fasta.sql'
    store_here = NAMES_SQL if store_here.nil?
    populate_fasta_table(be_verbose, store_here)
  else
    ewarn 'In populate(), not found the input file: `'+sfancy(i)+'`'
  end
end

#populate_citations_tableObject

#

populate_citations_table

We use this method to populate the citations table with proper data.

#


105
106
107
108
109
110
111
112
113
# File 'lib/roebe/sql_paradise/commands.rb', line 105

def populate_citations_table
  citations?.each {|entry|
    sql_command = 'INSERT INTO citations '\
      '(cit_id, cit_key, pubmed_id, medline_id, url, text, taxid_list) '\
      "VALUES ('"+pad(entry)+"')"
    append_what_into(sql_command, 'citations.sql')
    e sql_command if BE_VERBOSE
  }
end

#populate_into(i, be_verbose = false) ⇒ Object

#

populate_into

#


349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# File 'lib/roebe/sql_paradise/commands.rb', line 349

def populate_into(i, be_verbose = false)
  Roebe::SqlParadise.be_silent
  i = i.to_s
  if i.include? '.'
    _ = i[0, i.index('.')]
  else
    _ = i
  end
  case _
  when 'names'
    # This has:
    #   taxid, name_txt, unique_name, name_class
    readlines(_).each {|entry|
      e 'DEBUG: '+entry if be_verbose
      entry = split_at(entry)
      if be_verbose
        e 'DEBUG: '; pp entry
      end
      # Now entry will look like:
      #   ["228", "ATCC 14393", "", "type material", ""]
      taxid      = entry[0].to_s # First entry is an Integer.
      name_txt    = pad_with_single_quotes(entry[1])
      unique_name = pad_with_single_quotes(entry[2])
      name_class  = pad_with_single_quotes(entry[3].delete("'"))
      sql_command = InsertInto[_, taxid+', '+name_txt+', '+unique_name+', '+name_class]
      append_what_into(sql_command+N, i)
    }
  when 'nodes'
    readlines(_).each {|entry|
      e 'DEBUG: '+entry if be_verbose
      entry = tokenize(entry)[0..2]
      if be_verbose
        e 'DEBUG: '; pp entry
      end        
      taxid = entry.first
      parent_taxid = pad_with_single_quotes(entry[1])
      rank = pad_with_single_quotes(entry[2]).chomp
      sql_command = InsertInto[_, taxid+', '+parent_taxid+', '+rank]
      append_what_into(sql_command+N, i)
    }
  else # else assume we populate from a .fasta file here (default tag)
    # This has name, type, lineage_ids, lineage_scientific_name, accession_number, dataset
    pp i
    pp '^^^ populating into fasta now.'
  end
end

#populate_names_table(be_verbose = BE_VERBOSE, save_here = NAMES_SQL) ⇒ Object Also known as: populate_table, create_names_database, populate_names

#

populate_names_table

The names table can be huge, 1_672_079 entries in total.

Specific entries look like this:

INSERT INTO names (taxid, name_txt, unique_name, name_class)
VALUES ('1457285','Anatrichosomatidae','','scientific name');

The first entry is int, the other three are varchars.

#


512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
# File 'lib/roebe/sql_paradise/commands.rb', line 512

def populate_names_table(
    be_verbose = BE_VERBOSE,
    save_here  = NAMES_SQL # This will be "names.sql"
  )
  be_verbose = true if be_verbose == :be_verbose
  max_entry = 1
  readlines(:names).each {|entry|
    entry.chomp! # First, we eliminate those pesky newlines.
    # Next, we must split at the tabulator and | token => "\t|"
    entry = split_at(entry)
    taxid = entry.first.to_s # First entry is integer.
    sql_command = InsertInto[:names,
      '(taxid, name_txt, unique_name, name_class)',
      taxid.to_s+','+
      pad_with_single_quotes(entry[1])+','+
      pad_with_single_quotes(entry[2])+','+
      pad_with_single_quotes(entry[3])
    ]+N
    e sql_command if be_verbose
    append_what_into(sql_command, save_here)
    max_entry += 1
  }
  max_entry = sfancy(max_entry.to_s)
  save_here = Dir.pwd+'/'+save_here unless save_here.include? '/'
  e 'Finished storing into '+sfile(save_here) if be_verbose
  e 'We found '+max_entry+' lines in the names table.'
end

#populate_nodes_table(be_verbose = BE_VERBOSE, save_here = NODES_SQL) ⇒ Object Also known as: populate_nodes, create_nodes

#

populate_nodes_table

We need only three entries here - taxid, parent_taxid and rank. Right now these are int, int, varchar.

#


548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
# File 'lib/roebe/sql_paradise/commands.rb', line 548

def populate_nodes_table(
    be_verbose = BE_VERBOSE,
    save_here = NODES_SQL
  )
  be_verbose = true if be_verbose == :be_verbose
  max_entry = 1
  readlines(:nodes).each {|entry|
    entry = tokenize(entry)[0..2] # We need only the first 3 entries here.
    taxid         = entry[0]                        # taxid
    parent_taxid  = entry[1]                        # parent_taxid
    rank          = pad_with_single_quotes( entry[2] ) # rank
    sql_command = InsertInto[:nodes,
      '(taxid, parent_taxid, rank)',
      taxid.to_s+','+
      parent_taxid+','+
      rank
    ]+N
    e sql_command if be_verbose
    append_what_into(sql_command, save_here)
    max_entry += 1
  }
  max_entry = sfancy(max_entry.to_s)
  save_here = Dir.pwd+'/'+save_here unless save_here.include? '/'
  e 'Finished storing into '+sfile(save_here) if be_verbose
  e 'We found '+max_entry+' lines in the nodes table.'
end

#sql(i = :nothing) ⇒ Object

#

sql

Default is to do nothing. Shortcuts exist as input 1 and 2, which will create either names (1) or nodes (2).

#


138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/roebe/sql_paradise/commands.rb', line 138

def sql(
    i = :nothing
  )
  i = i.to_sym if i.is_a? String
  e 'Debug in method sql() - input was `'+i.to_s+'`.'
  case i
  # ======================================================================= #
  # === :names
  # ======================================================================= #
  when :names,
       1,
       nil # This is the default.
    i = 'names.sql'
    do_precreate_actions(i)
    create_names_table
    e 'All statements were written into the file `'+i+'`.'
  # ======================================================================= #
  # === :nodes
  # ======================================================================= #
  when :nodes,
       2
    i = 'nodes.sql'
    do_precreate_actions(i)
    create_nodes_table
    e "All statements were written into the file `#{i}`."
  end
end

#sql_namesObject

#

sql_names

#


229
230
231
# File 'lib/roebe/sql_paradise/commands.rb', line 229

def sql_names
  create_sql_command_for :names
end

#table(i) ⇒ Object

#

table

#


222
223
224
# File 'lib/roebe/sql_paradise/commands.rb', line 222

def table(i)
  e create_sql_command_for(i) # table tag
end

#temp_dir?Boolean

#

temp_dir?

#

Returns:

  • (Boolean)


579
580
581
# File 'lib/roebe/sql_paradise/commands.rb', line 579

def temp_dir?
  ::Command.temp_dir?
end