Class: Bioroebe::GenbankParser
Overview
Constant Summary
collapse
- UPCASE_THE_SEQUENCE =
#
UPCASE_THE_SEQUENCE
Setting this constant to true will cause this class to store the FASTA sequence in an upcased variant, e. g. “AGCAGCTA” rather than “acgatcag”.
#
true
- TEST_STRING =
#
TEST_STRING
Our example test-string, to see how such a genbank file usually looks like.
This will contain two different FASTA sequences.
#
' /note="internal transcribed spacer 2"
ORIGIN
1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
721 gatgcaaccc caggtcaggt gagga
//
LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
ACCESSION Z78510
VERSION Z78510.1 GI:2765635
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
ITS1; ITS2.
SOURCE Phragmipedium caricinum
ORGANISM Phragmipedium caricinum
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
Cypripedioideae; Phragmipedium.
REFERENCE 1
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
Orchidaceae): nuclear rDNA ITS sequences
JOURNAL Unpublished
REFERENCE 2 (bases 1 to 750)
AUTHORS Cox,A.V.
TITLE Direct Submission
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
Richmond, Surrey TW9 3AB, UK
FEATURES Location/Qualifiers
source 1..750
/organism="Phragmipedium caricinum"
/mol_type="genomic DNA"
/db_xref="taxon:53127"
misc_feature 1..380
/note="internal transcribed spacer 1"
gene 381..550
/gene="5.8S rRNA"
rRNA 381..550
/gene="5.8S rRNA"
/product="5.8S ribosomal RNA"
misc_feature 551..750
/note="internal transcribed spacer 2"
ORIGIN
1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
721 ccaggtcagg tgagccaccc gctgagttta
//
LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
ACCESSION Z78509
VERSION Z78509.1 GI:2765634
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
ITS1; ITS2.
SOURCE Phragmipedium pearcei
ORGANISM Phragmipedium pearcei
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
Cypripedioideae; Phragmipedium.
REFERENCE 1
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
Orchidaceae): nuclear rDNA ITS sequences
JOURNAL Unpublished
REFERENCE 2 (bases 1 to 731)
AUTHORS Cox,A.V.
TITLE Direct Submission
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
Richmond, Surrey TW9 3AB, UK
FEATURES Location/Qualifiers
source 1..731
/organism="Phragmipedium pearcei"
/mol_type="genomic DNA"
/db_xref="taxon:53135"
misc_feature 1..380
/note="internal transcribed spacer 1"
gene 381..550
/gene="5.8S rRNA"
rRNA 381..550
/gene="5.8S rRNA"
/product="5.8S ribosomal RNA"
misc_feature 551..731
/note="internal transcribed spacer 2"
ORIGIN
1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
721 taattttagg a
//
LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
CommandlineApplication::OLD_VERBOSE_VALUE
ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Class Method Summary
collapse
-
.[](i = '') ⇒ Object
# === Bioroebe::GenbankParser[] ========================================================================= #.
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
Constructor Details
#initialize(commandline_arguments = nil, run_already = true) ⇒ GenbankParser
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 175
def initialize(
commandline_arguments = nil,
run_already = true
)
reset
set_commandline_arguments(
commandline_arguments
)
if block_given?
yielded = yield
case yielded
when :do_not_report_anything
@internal_hash[:report_the_dataset] = false
end
end
run if run_already
end
|
Class Method Details
.[](i = '') ⇒ Object
#
Bioroebe::GenbankParser[]
#
384
385
386
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 384
def self.[](i = '')
new(i)
end
|
Instance Method Details
#analyse_this_dataset(dataset) ⇒ Object
Also known as:
determine_dataset
287
288
289
290
291
292
293
294
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 287
def analyse_this_dataset(dataset)
use_this_regex =
/ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ scanned = dataset.scan(use_this_regex)
@internal_hash[:n_FASTA_entries_in_the_file] = scanned
discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
consider_reporting_our_findings_to_the_user
end
|
#consider_reporting_our_findings_to_the_user ⇒ Object
Also known as:
report, report_the_dataset
#
consider_reporting_our_findings_to_the_user (report tag)
#
352
353
354
355
356
357
358
359
360
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 352
def consider_reporting_our_findings_to_the_user
if report_the_dataset? and dataset? and !dataset?.empty?
main_dataset?.each_pair {|key, value|
e steelblue("#{key}:")
e lightblue(value)
e
}
end
end
|
#dataset? ⇒ Boolean
Also known as:
main_dataset?
299
300
301
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 299
def dataset?
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
end
|
#discover_the_corresponding_FASTA_entries_from_this_dataset(i) ⇒ Object
#
discover_the_corresponding_FASTA_entries_from_this_dataset
#
321
322
323
324
325
326
327
328
329
330
331
332
333
334
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 321
def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
if i.is_a? Array
i.each {|this_dataset|
this_dataset =~ regex_to_use_for_the_id
use_this_id = $1.to_s.dup
use_this_FASTA_sequence = this_dataset.scan(
/^\s*\d{1,100}([\sa-zA-Z]+)/
).flatten.join(' ').delete(" \n")
use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
}
end
end
|
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 235
def (
i = commandline_arguments_containing_leading_hyphens?
)
if i.is_a? Array
i.each {|entry| (entry) }
else
case i when /^-?-?help$/i
show_help
exit
when /^-?-?test$/i,
/^-?-?test(-|_)?string$/i
analyse_this_dataset(TEST_STRING)
exit
end
end
end
|
#report_the_dataset? ⇒ Boolean
271
272
273
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 271
def report_the_dataset?
@internal_hash[:report_the_dataset]
end
|
#reset ⇒ Object
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 200
def reset
super()
infer_the_namespace
@internal_hash[:work_on_this_file] = nil
@internal_hash[:report_the_dataset] = true
@internal_hash[:n_FASTA_entries_in_the_file] = 0
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
end
|
#run ⇒ Object
366
367
368
369
370
371
372
373
374
375
376
377
378
379
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 366
def run
set_work_on_this_file(first_argument?)
if verbose_check_whether_the_file_exists
original_dataset = File.read(@internal_hash[:work_on_this_file]) if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
analyse_this_dataset(original_dataset)
else
opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
end
end
end
|
#sequence? ⇒ Boolean
Also known as:
coding_sequence?, cds
313
314
315
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 313
def sequence?
sequences?.first
end
|
#sequences? ⇒ Boolean
306
307
308
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 306
def sequences?
dataset?.values
end
|
#set_work_on_this_file(i = first_argument?
) ⇒ Object
#
set_work_on_this_file
#
278
279
280
281
282
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 278
def set_work_on_this_file(
i = first_argument?
)
@internal_hash[:work_on_this_file] = i
end
|
#verbose_check_whether_the_file_exists ⇒ Object
#
verbose_check_whether_the_file_exists
#
339
340
341
342
343
344
345
346
347
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 339
def verbose_check_whether_the_file_exists
_ = @internal_hash[:work_on_this_file]
if _ and File.exist?(_)
true
else
opnn; e 'No file exists at '+sfile(_)
false
end
end
|
#work_on_which_file? ⇒ Boolean
264
265
266
|
# File 'lib/bioroebe/parsers/genbank_parser.rb', line 264
def work_on_which_file?
@internal_hash[:work_on_this_file]
end
|