Class: Bioroebe::GenbankParser
Overview
Constant Summary
collapse
- UPCASE_THE_SEQUENCE =
#
UPCASE_THE_SEQUENCE
Setting this constant to true will cause this class to store the FASTA sequence in an upcased variant, e. g. “AGCAGCTA” rather than “acgatcag”.
#
true
- TEST_STRING =
#
TEST_STRING
Our example test-string, to see how such a genbank file usually looks like.
This will contain two different FASTA sequences.
#
' /note="internal transcribed spacer 2"
ORIGIN
1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
721 gatgcaaccc caggtcaggt gagga
//
LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
ACCESSION Z78510
VERSION Z78510.1 GI:2765635
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
ITS1; ITS2.
SOURCE Phragmipedium caricinum
ORGANISM Phragmipedium caricinum
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
Cypripedioideae; Phragmipedium.
REFERENCE 1
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
Orchidaceae): nuclear rDNA ITS sequences
JOURNAL Unpublished
REFERENCE 2 (bases 1 to 750)
AUTHORS Cox,A.V.
TITLE Direct Submission
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
Richmond, Surrey TW9 3AB, UK
FEATURES Location/Qualifiers
source 1..750
/organism="Phragmipedium caricinum"
/mol_type="genomic DNA"
/db_xref="taxon:53127"
misc_feature 1..380
/note="internal transcribed spacer 1"
gene 381..550
/gene="5.8S rRNA"
rRNA 381..550
/gene="5.8S rRNA"
/product="5.8S ribosomal RNA"
misc_feature 551..750
/note="internal transcribed spacer 2"
ORIGIN
1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
721 ccaggtcagg tgagccaccc gctgagttta
//
LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
ACCESSION Z78509
VERSION Z78509.1 GI:2765634
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
ITS1; ITS2.
SOURCE Phragmipedium pearcei
ORGANISM Phragmipedium pearcei
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
Cypripedioideae; Phragmipedium.
REFERENCE 1
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
Orchidaceae): nuclear rDNA ITS sequences
JOURNAL Unpublished
REFERENCE 2 (bases 1 to 731)
AUTHORS Cox,A.V.
TITLE Direct Submission
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
Richmond, Surrey TW9 3AB, UK
FEATURES Location/Qualifiers
source 1..731
/organism="Phragmipedium pearcei"
/mol_type="genomic DNA"
/db_xref="taxon:53135"
misc_feature 1..380
/note="internal transcribed spacer 1"
gene 381..550
/gene="5.8S rRNA"
rRNA 381..550
/gene="5.8S rRNA"
/product="5.8S ribosomal RNA"
misc_feature 551..731
/note="internal transcribed spacer 2"
ORIGIN
1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
721 taattttagg a
//
LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
CommandlineApplication::OLD_VERBOSE_VALUE
ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Class Method Summary
collapse
-
.[](i = ARGV) ⇒ Object
# === Bioroebe::GenbankParser[] ========================================================================= #.
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
Methods included from BaseModule
#absolute_path, #default_file_read, #file_readlines
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
#internal_hash?, #reset_the_internal_hash
#infer_the_namespace, #namespace?
Constructor Details
#initialize(commandline_arguments = nil, run_already = true) ⇒ GenbankParser
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 175
def initialize(
commandline_arguments = nil,
run_already = true
)
reset
set_commandline_arguments(
commandline_arguments
)
if block_given?
yielded = yield
case yielded
when :do_not_report_anything
@internal_hash[:report_the_dataset] = false
end
end
run if run_already
end
|
Class Method Details
.[](i = ARGV) ⇒ Object
#
Bioroebe::GenbankParser[]
#
380
381
382
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 380
def self.[](i = ARGV)
new(i)
end
|
Instance Method Details
#analyse_this_dataset(dataset) ⇒ Object
Also known as:
determine_dataset
283
284
285
286
287
288
289
290
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 283
def analyse_this_dataset(dataset)
use_this_regex =
/ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ scanned = dataset.scan(use_this_regex)
@internal_hash[:n_FASTA_entries_in_the_file] = scanned
discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
consider_reporting_our_findings_to_the_user
end
|
#consider_reporting_our_findings_to_the_user ⇒ Object
Also known as:
report, report_the_dataset
#
consider_reporting_our_findings_to_the_user (report tag)
#
348
349
350
351
352
353
354
355
356
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 348
def consider_reporting_our_findings_to_the_user
if report_the_dataset? and dataset? and !dataset?.empty?
main_dataset?.each_pair {|key, value|
e steelblue("#{key}:")
e lightblue(value)
e
}
end
end
|
#dataset? ⇒ Boolean
Also known as:
main_dataset?
295
296
297
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 295
def dataset?
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
end
|
#discover_the_corresponding_FASTA_entries_from_this_dataset(i) ⇒ Object
#
discover_the_corresponding_FASTA_entries_from_this_dataset
#
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 317
def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
if i.is_a? Array
i.each {|this_dataset|
this_dataset =~ regex_to_use_for_the_id
use_this_id = $1.to_s.dup
use_this_FASTA_sequence = this_dataset.scan(
/^\s*\d{1,100}([\sa-zA-Z]+)/
).flatten.join(' ').delete(" \n")
use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
}
end
end
|
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 231
def (
i = commandline_arguments_containing_leading_hyphens?
)
if i.is_a? Array
i.each {|entry| (entry) }
else
case i when /^-?-?help$/i
show_help
exit
when /^-?-?test$/i,
/^-?-?test(-|_)?string$/i
analyse_this_dataset(TEST_STRING)
exit
end
end
end
|
#report_the_dataset? ⇒ Boolean
267
268
269
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 267
def report_the_dataset?
@internal_hash[:report_the_dataset]
end
|
#reset ⇒ Object
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 200
def reset
super()
infer_the_namespace
@internal_hash[:work_on_this_file] = nil
@internal_hash[:report_the_dataset] = true
@internal_hash[:n_FASTA_entries_in_the_file] = 0
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
end
|
#run ⇒ Object
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 362
def run
set_work_on_this_file(first_argument?)
if verbose_check_whether_the_file_exists
original_dataset = File.read(@internal_hash[:work_on_this_file]) if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
analyse_this_dataset(original_dataset)
else
opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
end
end
end
|
#sequence? ⇒ Boolean
Also known as:
coding_sequence?, cds
309
310
311
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 309
def sequence?
sequences?.first
end
|
#sequences? ⇒ Boolean
302
303
304
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 302
def sequences?
dataset?.values
end
|
#set_work_on_this_file(i = first_argument?
) ⇒ Object
#
set_work_on_this_file
#
274
275
276
277
278
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 274
def set_work_on_this_file(
i = first_argument?
)
@internal_hash[:work_on_this_file] = i
end
|
#verbose_check_whether_the_file_exists ⇒ Object
#
verbose_check_whether_the_file_exists
#
335
336
337
338
339
340
341
342
343
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 335
def verbose_check_whether_the_file_exists
_ = @internal_hash[:work_on_this_file]
if _ and File.exist?(_)
true
else
opnn; e 'No file exists at '+sfile(_)
false
end
end
|
#work_on_which_file? ⇒ Boolean
260
261
262
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 260
def work_on_which_file?
@internal_hash[:work_on_this_file]
end
|