Module: Bioroebe

Included in:
Taxonomy::Interactive
Defined in:
lib/bioroebe/svg/page.rb,
lib/bioroebe/base/base.rb,
lib/bioroebe/base/misc.rb,
lib/bioroebe/cell/cell.rb,
lib/bioroebe/gene/gene.rb,
lib/bioroebe/ncbi/ncbi.rb,
lib/bioroebe/svg/glyph.rb,
lib/bioroebe/svg/svgee.rb,
lib/bioroebe/svg/track.rb,
lib/bioroebe/misc/ruler.rb,
lib/bioroebe/shell/menu.rb,
lib/bioroebe/shell/misc.rb,
lib/bioroebe/colours/rev.rb,
lib/bioroebe/ncbi/efetch.rb,
lib/bioroebe/parsers/gff.rb,
lib/bioroebe/shell/shell.rb,
lib/bioroebe/siRNA/siRNA.rb,
lib/bioroebe/base/colours.rb,
lib/bioroebe/colours/sdir.rb,
lib/bioroebe/colours/simp.rb,
lib/bioroebe/sequence/dna.rb,
lib/bioroebe/blosum/blosum.rb,
lib/bioroebe/codons/codons.rb,
lib/bioroebe/colours/sfile.rb,
lib/bioroebe/colours/swarn.rb,
lib/bioroebe/genome/genome.rb,
lib/bioroebe/svg/primitive.rb,
lib/bioroebe/taxonomy/edit.rb,
lib/bioroebe/taxonomy/menu.rb,
lib/bioroebe/taxonomy/node.rb,
lib/bioroebe/base/namespace.rb,
lib/bioroebe/biomart/filter.rb,
lib/bioroebe/biomart/server.rb,
lib/bioroebe/colours/sfancy.rb,
lib/bioroebe/constants/GUIs.rb,
lib/bioroebe/constants/misc.rb,
lib/bioroebe/constants/urls.rb,
lib/bioroebe/count/count_at.rb,
lib/bioroebe/count/count_gc.rb,
lib/bioroebe/taxonomy/chart.rb,
lib/bioroebe/base/initialize.rb,
lib/bioroebe/biomart/biomart.rb,
lib/bioroebe/biomart/dataset.rb,
lib/bioroebe/colours/colours.rb,
lib/bioroebe/constants/regex.rb,
lib/bioroebe/constants/roebe.rb,
lib/bioroebe/project/project.rb,
lib/bioroebe/regexes/regexes.rb,
lib/bioroebe/shell/help/help.rb,
lib/bioroebe/taxonomy/shared.rb,
lib/bioroebe/version/version.rb,
lib/bioroebe/biomart/database.rb,
lib/bioroebe/dotplots/dotplot.rb,
lib/bioroebe/sequence/protein.rb,
lib/bioroebe/shell/help/class.rb,
lib/bioroebe/svg/mini_feature.rb,
lib/bioroebe/taxonomy/colours.rb,
lib/bioroebe/biomart/attribute.rb,
lib/bioroebe/constants/newline.rb,
lib/bioroebe/constants/unicode.rb,
lib/bioroebe/encoding/encoding.rb,
lib/bioroebe/readline/readline.rb,
lib/bioroebe/sequence/sequence.rb,
lib/bioroebe/taxonomy/taxonomy.rb,
lib/bioroebe/codons/codon_table.rb,
lib/bioroebe/gui/gtk3/gene/gene.rb,
lib/bioroebe/parsers/parse_embl.rb,
lib/bioroebe/pdb/parse_pdb_file.rb,
lib/bioroebe/sequence/alignment.rb,
lib/bioroebe/taxonomy/constants.rb,
lib/bioroebe/taxonomy/help/help.rb,
lib/bioroebe/taxonomy/info/info.rb,
lib/bioroebe/toplevel_methods/e.rb,
lib/bioroebe/base/prototype/misc.rb,
lib/bioroebe/codons/codon_tables.rb,
lib/bioroebe/codons/start_codons.rb,
lib/bioroebe/colours/use_colours.rb,
lib/bioroebe/constants/tabulator.rb,
lib/bioroebe/www/sinatra/sinatra.rb,
lib/bioroebe/base/prototype/mkdir.rb,
lib/bioroebe/base/prototype/reset.rb,
lib/bioroebe/colours/misc_colours.rb,
lib/bioroebe/misc/useful_formulas.rb,
lib/bioroebe/patterns/rgg_scanner.rb,
lib/bioroebe/pdb/parse_mmCIF_file.rb,
lib/bioroebe/taxonomy/info/is_dna.rb,
lib/bioroebe/taxonomy/interactive.rb,
lib/bioroebe/taxonomy/parse_fasta.rb,
lib/bioroebe/toplevel_methods/cat.rb,
lib/bioroebe/toplevel_methods/opn.rb,
lib/bioroebe/toplevel_methods/rds.rb,
lib/bioroebe/toplevel_methods/url.rb,
lib/bioroebe/constants/nucleotides.rb,
lib/bioroebe/exceptions/exceptions.rb,
lib/bioroebe/parsers/blosum_parser.rb,
lib/bioroebe/parsers/stride_parser.rb,
lib/bioroebe/pdb/download_this_pdb.rb,
lib/bioroebe/requires/require_yaml.rb,
lib/bioroebe/shell/colours/colours.rb,
lib/bioroebe/toplevel_methods/misc.rb,
lib/bioroebe/constants/codon_tables.rb,
lib/bioroebe/genomes/genome_pattern.rb,
lib/bioroebe/parsers/biolang_parser.rb,
lib/bioroebe/parsers/genbank_parser.rb,
lib/bioroebe/taxonomy/class_methods.rb,
lib/bioroebe/taxonomy/help/helpline.rb,
lib/bioroebe/toplevel_methods/blast.rb,
lib/bioroebe/toplevel_methods/infer.rb,
lib/bioroebe/toplevel_methods/parse.rb,
lib/bioroebe/base/prototype/e_and_ee.rb,
lib/bioroebe/codons/show_codon_usage.rb,
lib/bioroebe/configuration/constants.rb,
lib/bioroebe/shell/readline/readline.rb,
lib/bioroebe/toplevel_methods/cliner.rb,
lib/bioroebe/toplevel_methods/digest.rb,
lib/bioroebe/toplevel_methods/editor.rb,
lib/bioroebe/toplevel_methods/remove.rb,
lib/bioroebe/cleave_and_digest/cleave.rb,
lib/bioroebe/codons/show_codon_tables.rb,
lib/bioroebe/constants/base_directory.rb,
lib/bioroebe/constants/row_terminator.rb,
lib/bioroebe/genomes/genome_retriever.rb,
lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb,
lib/bioroebe/patterns/profile_pattern.rb,
lib/bioroebe/patterns/scan_for_repeat.rb,
lib/bioroebe/requires/require_colours.rb,
lib/bioroebe/toplevel_methods/esystem.rb,
lib/bioroebe/toplevel_methods/extract.rb,
lib/bioroebe/toplevel_methods/matches.rb,
lib/bioroebe/toplevel_methods/verbose.rb,
lib/bioroebe/utility_scripts/pathways.rb,
lib/bioroebe/www/embeddable_interface.rb,
lib/bioroebe/cleave_and_digest/trypsin.rb,
lib/bioroebe/constants/carriage_return.rb,
lib/bioroebe/dotplots/advanced_dotplot.rb,
lib/bioroebe/electron_microscopy/flipy.rb,
lib/bioroebe/raw_sequence/raw_sequence.rb,
lib/bioroebe/toplevel_methods/rnalfold.rb,
lib/bioroebe/toplevel_methods/taxonomy.rb,
lib/bioroebe/toplevel_methods/truncate.rb,
lib/bioroebe/utility_scripts/compacter.rb,
lib/bioroebe/utility_scripts/find_gene.rb,
lib/bioroebe/viennarna/rnafold_wrapper.rb,
lib/bioroebe/colours/colourize_sequence.rb,
lib/bioroebe/enzymes/restriction_enzyme.rb,
lib/bioroebe/toplevel_methods/databases.rb,
lib/bioroebe/toplevel_methods/delimiter.rb,
lib/bioroebe/aminoacids/codon_percentage.rb,
lib/bioroebe/cleave_and_digest/digestion.rb,
lib/bioroebe/codons/detect_minimal_codon.rb,
lib/bioroebe/configuration/configuration.rb,
lib/bioroebe/gui/gtk3/three_to_one/title.rb,
lib/bioroebe/sequence/reverse_complement.rb,
lib/bioroebe/string_matching/levensthein.rb,
lib/bioroebe/toplevel_methods/complement.rb,
lib/bioroebe/toplevel_methods/shuffleseq.rb,
lib/bioroebe/toplevel_methods/statistics.rb,
lib/bioroebe/utility_scripts/showorf/run.rb,
lib/bioroebe/codons/show_this_codon_table.rb,
lib/bioroebe/colours/colour_schemes/score.rb,
lib/bioroebe/constants/database_constants.rb,
lib/bioroebe/fasta_and_fastq/fasta_parser.rb,
lib/bioroebe/gui/gtk3/alignment/alignment.rb,
lib/bioroebe/toplevel_methods/ad_hoc_task.rb,
lib/bioroebe/toplevel_methods/frequencies.rb,
lib/bioroebe/toplevel_methods/is_on_roebe.rb,
lib/bioroebe/toplevel_methods/levensthein.rb,
lib/bioroebe/toplevel_methods/nucleotides.rb,
lib/bioroebe/toplevel_methods/palindromes.rb,
lib/bioroebe/utility_scripts/permutations.rb,
lib/bioroebe/utility_scripts/showorf/help.rb,
lib/bioroebe/utility_scripts/showorf/menu.rb,
lib/bioroebe/utility_scripts/showorf/show.rb,
lib/bioroebe/colours/colour_schemes/simple.rb,
lib/bioroebe/gui/libui/alignment/alignment.rb,
lib/bioroebe/ngs/phred_quality_score_table.rb,
lib/bioroebe/palindromes/palindrome_finder.rb,
lib/bioroebe/protein_structure/alpha_helix.rb,
lib/bioroebe/taxonomy/info/check_available.rb,
lib/bioroebe/toplevel_methods/rna_splicing.rb,
lib/bioroebe/toplevel_methods/to_camelcase.rb,
lib/bioroebe/utility_scripts/dot_alignment.rb,
lib/bioroebe/utility_scripts/mirror_repeat.rb,
lib/bioroebe/utility_scripts/punnet/punnet.rb,
lib/bioroebe/utility_scripts/showorf/reset.rb,
lib/bioroebe/aminoacids/show_hydrophobicity.rb,
lib/bioroebe/calculate/calculate_gc_content.rb,
lib/bioroebe/fasta_and_fastq/download_fasta.rb,
lib/bioroebe/gui/gtk3/controller/controller.rb,
lib/bioroebe/gui/gtk3/show_codon_table/misc.rb,
lib/bioroebe/gui/gtk3/www_finder/www_finder.rb,
lib/bioroebe/requires/require_all_pdb_files.rb,
lib/bioroebe/string_matching/smith_waterman.rb,
lib/bioroebe/toplevel_methods/log_directory.rb,
lib/bioroebe/toplevel_methods/time_and_date.rb,
lib/bioroebe/utility_scripts/parse_taxonomy.rb,
lib/bioroebe/codons/determine_optimal_codons.rb,
lib/bioroebe/codons/sanitize_codon_frequency.rb,
lib/bioroebe/constants/files_and_directories.rb,
lib/bioroebe/matplotlib/matplotlib_generator.rb,
lib/bioroebe/protein_structure/helical_wheel.rb,
lib/bioroebe/utility_scripts/compseq/compseq.rb,
lib/bioroebe/utility_scripts/showorf/showorf.rb,
lib/bioroebe/aminoacids/aminoacids_mass_table.rb,
lib/bioroebe/base/commandline_application/opn.rb,
lib/bioroebe/calculate/calculate_blosum_score.rb,
lib/bioroebe/count/count_amount_of_aminoacids.rb,
lib/bioroebe/electron_microscopy/fix_pos_file.rb,
lib/bioroebe/enzymes/restriction_enzymes_file.rb,
lib/bioroebe/enzymes/show_restriction_enzymes.rb,
lib/bioroebe/gui/tk/three_to_one/three_to_one.rb,
lib/bioroebe/palindromes/palindrome_generator.rb,
lib/bioroebe/requires/require_all_codon_files.rb,
lib/bioroebe/requires/require_all_count_files.rb,
lib/bioroebe/string_matching/hamming_distance.rb,
lib/bioroebe/toplevel_methods/chunked_display.rb,
lib/bioroebe/toplevel_methods/fasta_and_fastq.rb,
lib/bioroebe/toplevel_methods/open_in_browser.rb,
lib/bioroebe/toplevel_methods/three_delimiter.rb,
lib/bioroebe/aminoacids/aminoacid_substitution.rb,
lib/bioroebe/base/commandline_application/misc.rb,
lib/bioroebe/colours/colour_schemes/nucleotide.rb,
lib/bioroebe/constants/aminoacids_and_proteins.rb,
lib/bioroebe/count/count_amount_of_nucleotides.rb,
lib/bioroebe/electron_microscopy/read_file_xmd.rb,
lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb,
lib/bioroebe/requires/require_all_parser_files.rb,
lib/bioroebe/toplevel_methods/base_composition.rb,
lib/bioroebe/toplevel_methods/hamming_distance.rb,
lib/bioroebe/toplevel_methods/number_of_clones.rb,
lib/bioroebe/utility_scripts/showorf/constants.rb,
lib/bioroebe/aminoacids/display_aminoacid_table.rb,
lib/bioroebe/base/commandline_application/reset.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb,
lib/bioroebe/gui/experimental/snapgene/snapgene.rb,
lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb,
lib/bioroebe/requires/require_all_dotplot_files.rb,
lib/bioroebe/requires/require_all_enzymes_files.rb,
lib/bioroebe/requires/require_all_pattern_files.rb,
lib/bioroebe/requires/require_cleave_and_digest.rb,
lib/bioroebe/utility_scripts/consensus_sequence.rb,
lib/bioroebe/utility_scripts/showorf/initialize.rb,
lib/bioroebe/aminoacids/create_random_aminoacids.rb,
lib/bioroebe/enzymes/has_this_restriction_enzyme.rb,
lib/bioroebe/gui/libui/three_to_one/three_to_one.rb,
lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb,
lib/bioroebe/palindromes/palindrome_2D_structure.rb,
lib/bioroebe/requires/require_all_sequence_files.rb,
lib/bioroebe/requires/require_all_taxonomy_files.rb,
lib/bioroebe/toplevel_methods/atomic_composition.rb,
lib/bioroebe/toplevel_methods/convert_global_env.rb,
lib/bioroebe/toplevel_methods/exponential_growth.rb,
lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb,
lib/bioroebe/annotations/create_annotation_format.rb,
lib/bioroebe/base/commandline_application/extract.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme.rb,
lib/bioroebe/conversions/convert_aminoacid_to_dna.rb,
lib/bioroebe/databases/download_taxonomy_database.rb,
lib/bioroebe/nucleotides/complementary_dna_strand.rb,
lib/bioroebe/nucleotides/show_nucleotide_sequence.rb,
lib/bioroebe/requires/require_all_calculate_files.rb,
lib/bioroebe/toplevel_methods/calculate_n50_value.rb,
lib/bioroebe/toplevel_methods/open_reading_frames.rb,
lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb,
lib/bioroebe/base/commandline_application/warnings.rb,
lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb,
lib/bioroebe/electron_microscopy/parse_coordinates.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb,
lib/bioroebe/requires/require_all_aminoacids_files.rb,
lib/bioroebe/requires/require_the_toplevel_methods.rb,
lib/bioroebe/utility_scripts/analyse_local_dataset.rb,
lib/bioroebe/base/colours_for_base/colours_for_base.rb,
lib/bioroebe/base/commandline_application/directory.rb,
lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb,
lib/bioroebe/gui/gtk3/parse_pdb_file/parse_pdb_file.rb,
lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb,
lib/bioroebe/patterns/analyse_glycosylation_pattern.rb,
lib/bioroebe/requires/require_all_nucleotides_files.rb,
lib/bioroebe/requires/require_all_palindromes_files.rb,
lib/bioroebe/string_matching/find_longest_substring.rb,
lib/bioroebe/string_matching/simple_string_comparer.rb,
lib/bioroebe/toplevel_methods/searching_and_finding.rb,
lib/bioroebe/utility_scripts/show_this_dna_sequence.rb,
lib/bioroebe/base/commandline_application/aminoacids.rb,
lib/bioroebe/calculate/calculate_melting_temperature.rb,
lib/bioroebe/electron_microscopy/coordinate_analyzer.rb,
lib/bioroebe/electron_microscopy/generate_em2em_file.rb,
lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb,
lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb,
lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb,
lib/bioroebe/calculate/calculate_levensthein_distance.rb,
lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb,
lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb,
lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb,
lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb,
lib/bioroebe/requires/require_all_colour_scheme_files.rb,
lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb,
lib/bioroebe/toplevel_methods/download_and_fetch_data.rb,
lib/bioroebe/toplevel_methods/phred_error_probability.rb,
lib/bioroebe/utility_scripts/create_batch_entrez_file.rb,
lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb,
lib/bioroebe/gui/libui/random_sequence/random_sequence.rb,
lib/bioroebe/toplevel_methods/longest_common_substring.rb,
lib/bioroebe/utility_scripts/align_open_reading_frames.rb,
lib/bioroebe/utility_scripts/determine_antigenic_areas.rb,
lib/bioroebe/genbank/genbank_flat_file_format_generator.rb,
lib/bioroebe/gui/gtk3/format_converter/format_converter.rb,
lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb,
lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb,
lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb,
lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb,
lib/bioroebe/requires/require_all_string_matching_files.rb,
lib/bioroebe/requires/require_all_utility_scripts_files.rb,
lib/bioroebe/toplevel_methods/colourize_related_methods.rb,
lib/bioroebe/utility_scripts/download_files_from_rebase.rb,
lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb,
lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb,
lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb,
lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb,
lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb,
lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb,
lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb,
lib/bioroebe/base/commandline_application/write_what_into.rb,
lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb,
lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb,
lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb,
lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb,
lib/bioroebe/electron_microscopy/electron_microscopy_module.rb,
lib/bioroebe/electron_microscopy/simple_star_file_generator.rb,
lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb,
lib/bioroebe/requires/require_all_electron_microscopy_files.rb,
lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb,
lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb,
lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb,
lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb,
lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb,
lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb,
lib/bioroebe/base/commandline_application/commandline_arguments.rb,
lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb,
lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb,
lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb,
lib/bioroebe/pdb/report_secondary_structures_from_this_pdb_file.rb,
lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb,
lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb,
lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb,
lib/bioroebe/base/commandline_application/commandline_application.rb,
lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb,
lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb,
lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb,
lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb,
lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb,
lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb,
lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb,
lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb,
lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb,
lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb,
lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb,
lib/bioroebe/gui/shared_code/show_codon_usage/show_codon_usage_module.rb,
lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb,
lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage.rb,
lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb,
lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb,
lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb,
lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb,
lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb,
lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb,
lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb,
lib/bioroebe/gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb,
lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb,
lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb,
lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb,
lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb,
lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb,
lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb,
lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb,
lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb,
lib/bioroebe/gui/shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb,
lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb

Overview

#

Bioroebe::GUI::CalculateCellNumbersOfBacteriaModule

#

require 'bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb' include Bioroebe::GUI::CalculateCellNumbersOfBacteriaModule

#

Defined Under Namespace

Modules: Biomart, Blosum, CodonTable, CodonTables, CodonTablesFrequencies, ColourScheme, Colourize, ColoursForBase, CommandlineArguments, Configuration, ElectronMicroscopy, EmbeddableInterface, GUI, MolecularWeightOfNucleotides, NucleotideModule, Parser, Postgresql, Quiz, Taxonomy, VerboseTruth Classes: AdvancedDotplot, AlignOpenReadingFrames, Alignment, AlphaHelix, AminoacidSubstitution, AminoacidsMassTable, AnalyseGlycosylationPattern, AnalyseLocalDataset, AutocorrectTheNameOfThisFastaFile, Base, BiolangParser, BlosumParser, CalculateBlosumScore, CalculateGCContent, CalculateMeltingTemperature, CalculateMeltingTemperatureForMoreThanThirteenNucleotides, CalculateThePositionSpecificScoringMatrix, Cell, CheckForMismatches, CodonPercentage, ColourSchemeDemo, ColourizeHydrophilicAndHydrophobicAminoacids, ColourizeSequence, CommandlineApplication, CompactFastaFile, Compacter, ComplementaryDnaStrand, Compseq, ConsensusSequence, ConvertAminoacidToDNA, CountAmountOfAminoacids, CountAmountOfNucleotides, CreateAnnotationFormat, CreateBatchEntrezFile, CreateRandomAminoacids, DNA, DeduceAminoacidSequence, DetectMinimalCodon, DetermineAntigenicAreas, DetermineMissingNucleotidesPercentage, DetermineOptimalCodons, Digestion, DisplayAminoacidTable, DisplayHowManyFastaEntriesAreInThisDirectory, DisplayOpenReadingFrames, DnaToAminoacidSequence, DotAlignment, Dotplot, DownloadFasta, DownloadFilesFromRebase, DownloadTaxonomyDatabase, FastaDefline, FastaParser, FastaToYaml, FastqFormatExplainer, FetchFastaSequenceFromPdb, FindGene, FindLongestSubstring, FindLongestSubstringViaLCSalgorithm, GenbankFlatFileFormatGenerator, GenbankParser, Gene, Genome, GenomePattern, GenomeRetriever, HammingDistance, HelixWheel, InvalidAminoacid, LengthModifier, Levensthein, MatplotlibGenerator, Matrix, MirrorRepeat, MostLikelyNucleotideSequenceForThisAminoacidSequence, MoveFileToItsCorrectLocation, Ncbi, Palindrome2DStructure, PalindromeFinder, PalindromeGenerator, ParseEMBL, ParseFasta, ParseFastq, ParseFrequencyTable, ParsePdbFile, ParseTaxonomy, ParsemmCIFFile, Pathways, Permutations, PhredQualityScoreTable, PossibleCodonsForThisAminoacid, ProfilePattern, Protein, Punnet, RGG_Scanner, RNALfoldWrapper, RawSequence, ReportSecondaryStructuresFromThisPdbFile, RestrictionEnzyme, ReverseComplement, Ruler, SVG, SanitizeCodonFrequency, SanitizeNucleotideSequence, ScanForRepeat, Sequence, Shell, ShowCodonTables, ShowCodonUsage, ShowFastaHeaders, ShowFastaStatistics, ShowHydrophobicity, ShowNucleotideSequence, ShowOrf, ShowRestrictionEnzymes, ShowThisCodonTable, ShowThisDNASequence, SiRNA, SimpleStringComparer, SimplifyFastaHeader, Sinatra, SmithWaterman, SplitThisFastaFileIntoChromosomes, StrideParser, Trypsin, UsefulFormulas

Constant Summary collapse

BE_VERBOSE =
#

BE_VERBOSE

#
true
TOKEN =
#

TOKEN (TOKEN tag)

#
'|'
VALID_WAYS_TO_EXIT =
#

VALID_WAYS_TO_EXIT

All ways to exit will be recorded here.

If you need to use more ways, simply append to this Array.

This constant may have to be moved into the bio-shell part eventually.

#
%w(
  quit q exit qq :q qt
  bye
  rda
  r2
  tq
  sq
  exit_program
  exitprogram
)
NAMES_ENTRIES =
#

NAMES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'names.sql'
NODES_ENTRIES =
#

NODES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'nodes.sql'
FASTA_ENTRIES =
#

FASTA_ENTRIES

This used to belong to the Taxonomy submodule.

#
'fasta.sql'
SHALL_WE_LOG_LAST_UPDATE =
#

SHALL_WE_LOG_LAST_UPDATE

This constant exists specifically for the taxonomy-component of the Bioroebe project.

#
true
TAXONOMY_NCBI_DATABASE_LAST_UPDATE_LOG_FILE =
#

TAXONOMY_NCBI_DATABASE_LAST_UPDATE_LOG_FILE

This constant is used specifically for the taxonomy-component of the Bioroebe project.

#
"#{::Bioroebe.log_dir?}taxonomy_ncbi_database_last_update.log"
NAME_OF_BIO_SHELL =
#

NAME_OF_BIO_SHELL

This constant can be used as the default prompt for the bioshell component.

#
'BIO SHELL> '
DEFAULT_DNA_SEQUENCE =
#

DEFAULT_DNA_SEQUENCE

This is a default “test” DNA sequence, in the sense that it can be used to quickly test functionality within the bioroebe project.

It was added in May 2020, but it may be that we have to remove it at a later time, or move it into a separate .yml file. For the time being, though, it will reside here.

#
'CGGCCCGATTTGGGTTTCGGAGCGATCGAAATACCAGCACTACCATGAATTCTAT'\
'ATGGCTGCCGTTCACAGCCTTAATTTTAGGCTTTCCACCTGATCACTCTTTAATC'\
'TCCATTGTTTCTGGTACGCAGAAATTGACGCTTCCCATTCATTCACGGCTAAAAT'\
'CAAGGATTCCACCAGAATCGCGGGCCGCGTGGGTGCGCCGTCGACCTCCTCGGCC'\
'AAATAAGAACGGGCAGGTAAGAGACTAGGGTACTCAAGAT'
DEFAULT_LENGTH_FOR_DNA =
#

DEFAULT_LENGTH_FOR_DNA

How long our DNA-generated strings should be by default.

This may be used by some scripts, so it provides a default value for use in these scripts.

150 nucleotides are the current default.

#
150
FIELD_TERMINATOR =
#

FIELD_TERMINATOR

#
"#{TABULATOR}|#{TABULATOR}"
MAIN_DELIMITER =

An alias to the above.

DELIMITER = FIELD_TERMINATOR
BIOROEBE_AT_HOME =
#

BIOROEBE_AT_HOME

#
"#{RUBY_SRC}bioroebe/lib/bioroebe/"
LOCALHOST =
#

LOCALHOST

#
'http://localhost/'
PATH_TO_THE_RELION_BINARY =
#

PATH_TO_THE_RELION_BINARY

This constant can be set to determine where relion resides. It is mostly an ad-hoc constant.

#
'/opt/RELION/relion-1.3/bin/relion'
ARRAY_REGISTERED_ACTIONS =
#

ARRAY_REGISTERED_ACTIONS

ARRAY_REGISTERED_ACTIONS becomes @registered_actions.

#
%w(
  to_rna
  to_dna
  rest
  pubmed
  blosum
  restriction
  translate
  quit
  shorten_aminoacid
)
FILE_BIO_LANG =
#

FILE_BIO_LANG

#
"#{USERS_X}data/personal/yaml/bio_lang/bio_lang.md"
FTP_NCBI_TAXONOMY_DATABASE =
#

FTP_NCBI_TAXONOMY_DATABASE

This constant refers to the taxonomy-database from NCBI. This is the file that can be downloaded from the NCBI homepage (actually, the ftp-listing).

Take note that this database, in .tar.gz format, is about 50 MB in size or even larger these days. So only download it if you really need it locally.

#
'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
URL_TO_TAXONOMY_ARCHIVE =
#

URL_TO_TAXONOMY_ARCHIVE

An “alias” to the above ^^^ constant.

#
FTP_NCBI_TAXONOMY_DATABASE
NCBI_NUCCORE =
#

NCBI_NUCCORE

#
'https://www.ncbi.nlm.nih.gov/nuccore/'
NCBI_GENE =
#

NCBI_GENE

#
'https://www.ncbi.nlm.nih.gov/gene/'
USE_THIS_COLOUR_FOR_DNA =
#

Bioroebe::USE_THIS_COLOUR_FOR_DNA

The following constant will denote which colour we will use for DNA sequences by default, in this case, the HTML colour called steelblue.

#
:steelblue
REGEX_FOR_N_GLYCOSYLATION_PATTERN =
#

REGEX_FOR_N_GLYCOSYLATION_PATTERN

See rubular at:

https://rubular.com/r/D95Cq7oR5x
#
/(?=(N[^P][ST][^P]))/
REGEX_PROSITE_FOR_ANY_AMINOACID =
#

REGEX_PROSITE_FOR_ANY_AMINOACID

#
/x\((\d+)\)/
EMAIL =
#

EMAIL

My email address - not too terribly useful for other people, but nonetheless it may be useful to display it, in particular for GUI-related components of the bioroebe-project and simple feedback in the long run.

#
'[email protected]'
USERS_X =
#

USERS_X

#
'/home/x/'
RUBY_SRC =
#

RUBY_SRC

This constant is only useful on my home directory. Most other users will not need it, ever.

#
"#{USERS_X}programming/ruby/src/"
FILE_EXPAND_CD_ALIASES =
#

FILE_EXPAND_CD_ALIASES

#
"#{@project_base_directory}hash_expand_cd_aliases.rb"
CONFIGURATION_DIRECTORY =
#

CONFIGURATION_DIRECTORY

#
"#{project_yaml_directory?}configuration/"
TOPLEVEL_METHODS_DIRECTORY =
#

TOPLEVEL_METHODS_DIRECTORY

#
"#{@project_base_directory}toplevel_methods/"
CODON_TABLES_DIRECTORY =
#

CODON_TABLES_DIRECTORY

#
"#{@project_base_directory}codon_tables/"
CLEAVE_AND_DIGEST_DIRECTORY =
#

CLEAVE_AND_DIGEST_DIRECTORY

#
"#{@project_base_directory}cleave_and_digest/"
ELECTRON_MICROSCOPY_DIRECTORY =
#

ELECTRON_MICROSCOPY_DIRECTORY

#
"#{@project_base_directory}electron_microscopy/"
CODON_TABLES_DIRECTORY_FREQUENCY =
#

CODON_TABLES_DIRECTORY_FREQUENCY

#
"#{CODON_TABLES_DIRECTORY}frequencies/"
PDB_DIRECTORY =
#

PDB_DIRECTORY

#
"#{@project_base_directory}pdb/"
PARSERS_DIRECTORY =
#

PARSERS_DIRECTORY

#
"#{@project_base_directory}parsers/"
ENZYMES_DIRECTORY =
#

ENZYMES_DIRECTORY

#
"#{@project_base_directory}enzymes/"
PALINDROMES_DIRECTORY =
#

PALINDROMES_DIRECTORY

#
"#{@project_base_directory}palindromes/"
PATTERN_DIRECTORY =
#

PATTERN_DIRECTORY

#
"#{@project_base_directory}pattern/"
NUCLEOTIDES_DIRECTORY =
#

NUCLEOTIDES_DIRECTORY

#
"#{@project_base_directory}nucleotides/"
COUNT_DIRECTORY =
#

COUNT_DIRECTORY

#
"#{@project_base_directory}count/"
AMINOACIDS_DIRECTORY =
#

AMINOACIDS_DIRECTORY

#
"#{@project_base_directory}aminoacids/"
BLOSUM_DIRECTORY =
#

BLOSUM_DIRECTORY

#
"#{project_yaml_directory?}blosum/"
CALCULATE_DIRECTORY =
#

CALCULATE_DIRECTORY

#
"#{@project_base_directory}calculate/"
CODONS_DIRECTORY =
#

CODONS_DIRECTORY

#
"#{@project_base_directory}codons/"
DOTPLOTS_DIRECTORY =
#

DOTPLOTS_DIRECTORY

#
"#{@project_base_directory}dotplots/"
SEQUENCE_DIRECTORY =
#

SEQUENCE_DIRECTORY

#
"#{@project_base_directory}sequence/"
PATHWAYS_DIRECTORY =
#

PATHWAYS_DIRECTORY

This constant will point to e. g. “/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/pathways/”.

#
"#{project_yaml_directory?}pathways/"
BIOROEBE_YAML_AMINOACIDS_DIRECTORY =
#

BIOROEBE_YAML_AMINOACIDS_DIRECTORY

#
"#{project_yaml_directory?}aminoacids/"
STRING_MATCHING_DIRECTORY =
#

STRING_MATCHING_DIRECTORY

#
"#{@project_base_directory}string_matching/"
FASTA_AND_FASTQ_DIRECTORY =
#

FASTA_AND_FASTQ_DIRECTORY

#
"#{@project_base_directory}fasta_and_fastq/"
VERSION =
#

VERSION

#
'0.11.32'
LAST_UPDATE =
#

LAST_UPDATE

This variable keeps track as to when the bioroebe project was last updated. The notation is: DD.MM.YYYY

#
'25.09.2022'
URL_TO_THE_DOCUMENTATION =
#

URL_TO_THE_DOCUMENTATION

Keep track of where the documentation to BioRoebe is kept at.

#
"https://www.rubydoc.info/gems/#{self.to_s.downcase}/#{VERSION}"
Aminoacids =
#

The following “alias” was added in May 2022.

#
Protein
N =
#

N

#
"\n"
UNICODE_HORIZONTAL_BAR =
#

UNICODE_HORIZONTAL_BAR

#
''
UTF_ENCODING =
#

Bioroebe::UTF_ENCODING

#
'UTF-8'
USE_THIS_ENCODING =
#

Bioroebe::USE_THIS_ENCODING

#
UTF_ENCODING
Seq =
#

Usage example

x = Bioroebe::Seq.new('AGTACACTGGT')

#
Sequence
TABULATOR =
#

TABULATOR

#
"\t"
STOP_CODONS =
#

Bioroebe::STOP_CODONS

The STOP codons that can be found in Humans, in RNA format.

#
%w(
  UAA UAG UGA
)
RNA_NUCLEOTIDES =
#

Bioroebe::RNA_NUCLEOTIDES

This will refer to an Array including all four RNA nucleotides, that is A, U, G and C.

#
%w( A U G C )
ALLOWED_RNA_NUCLEOTIDES =

ALLOWED_RNA_NUCLEOTIDES

RNA_NUCLEOTIDES
POSSIBLE_RNA_NUCLEOTIDES =
#

Bioroebe::POSSIBLE_RNA_NUCLEOTIDES

This is a bit different to RNA_NUCLEOTIDES in that N is also a part of it. It is not entirely clear whether this array here is kept, though.

#
%w(
  A U G C N
)
DNA_NUCLEOTIDES =
#

Bioroebe::DNA_NUCLEOTIDES

This is the variant without N.

#
%w( A T G C )
HASH_DNA_NUCLEOTIDES =
#

Bioroebe::HASH_DNA_NUCLEOTIDES

Since as of 20.04.2014, Uracil is also part of this Hash. While this is, strictly speaking, not absolutely correct, it does simplify some downstream code. However had, this may possibly be re-evaluated in the future.

This Hash may be helpful when the user wishes to find a complement to a nucleotide. There is a method that does the same, but this Hash should be faster than a method call, so use it in particular if you need to focus more on speed.

#
{
  'A' => 'T',
  'T' => 'A',
  'G' => 'C',
  'C' => 'G',
  'U' => 'A'
}
POSSIBLE_DNA_NUCLEOTIDES =
#

POSSIBLE_DNA_NUCLEOTIDES

This constant will keep all possible DNA nucleotides.

N is also a valid entry, 'Yarrowia_lipolytica_genome.fa' includes it. However had,

Only these sequences are allowed in DNA.

To scope to this, do:

Bioroebe::POSSIBLE_DNA_NUCLEOTIDES
#
%w(
  A T G C N
)
ARRAY_VALID_DNA_SEQUENCES =

ARRAY_VALID_DNA_SEQUENCES

POSSIBLE_DNA_NUCLEOTIDES
ROW_TERMINATOR =
#

Bioroebe::ROW_TERMINATOR

This constant is not often in use, though.

#
"\t|\n"
R =
#

R

#
"\r"
LOCAL_DIRECTORY_FOR_UNIPROT =
#

LOCAL_DIRECTORY_FOR_UNIPROT

This denotes the directory for uniprot-files.

#
"#{@log_directory}uniprot/"
AUTOGENERATED_SQL_FILES_DIR =
#

Bioroebe::AUTOGENERATED_SQL_FILES_DIR

#
"#{@log_directory}autogenerated_sql_files/"
FILE_HYDROPATHY_TABLE =
#

FILE_HYDROPATHY_TABLE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}hydropathy_table.yml"
FILE_NUCLEAR_LOCALIZATION_SEQUENCES =
#

FILE_NUCLEAR_LOCALIZATION_SEQUENCES

#
"#{project_yaml_directory?}nuclear_localization_sequences.yml"
FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS =
#

FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS

#
"#{project_yaml_directory?}configuration/default_colours_for_the_aminoacids.yml"
FILE_BROWSER =
#

FILE_BROWSER

#
"#{project_yaml_directory?}configuration/browser.yml"
FILE_AMINOACIDS_MOLECULAR_FORMULA =
#

FILE_AMINOACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINOACIDS_THREE_TO_ONE =
#

FILE_AMINOACIDS_THREE_TO_ONE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_WEIGHT_OF_COMMON_PROTEINS =
#

FILE_WEIGHT_OF_COMMON_PROTEINS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}weight_of_common_proteins.yml"
FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
#

FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_long_name_to_one_letter.yml"
FILE_AMINO_ACIDS_MOLECULAR_FORMULA =
#

FILE_AMINO_ACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINO_ACIDS_MASS_TABLE =
#

FILE_AMINO_ACIDS_MASS_TABLE

bl $BIOROEBE_YAML/aminoacids/amino_acids_monoisotopic_mass_table.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_monoisotopic_mass_table.yml"
FILE_AMINO_ACIDS =
#

FILE_AMINO_ACIDS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids.yml"
FILE_AMINO_ACIDS_ABBREVIATIONS =
#

FILE_AMINO_ACIDS_ABBREVIATIONS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_abbreviations.yml"
DIRECTORY_CODON_TABLES_FREQUENCIES =
#

DIRECTORY_CODON_TABLES_FREQUENCIES

This constant may point to a directory such as:

/home/Programs/Ruby/2.7.0/lib/ruby/site_ruby/2.7.0/bioroebe/codon_tables/frequencies/
#
"#{CODON_TABLES_DIRECTORY}frequencies/"
FILE_NUCLEOTIDES =
#

FILE_NUCLEOTIDES

#
"#{project_yaml_directory?}nucleotides/nucleotides.yml"
NUCLEOTIDES =
nil
FILE_GFP_SEQUENCE =
#

FILE_GFP_SEQUENCE

#
"#{project_yaml_directory?}sequences/"\
"JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta"
FILE_RESTRICTION_ENZYMES =
#

FILE_RESTRICTION_ENZYMES

bl $BIOROEBE/yaml/restriction/enzymes/restriction_enzymes.yml

#
"#{project_yaml_directory?}restriction_enzymes/restriction_enzymes.yml"
FILE_COLOURIZE_FASTA_SEQUENCES =
#

FILE_COLOURIZE_FASTA_SEQUENCES

This constants points to the .yml file that will hold information in how to colourize the FASTA sequences.

#
"#{project_yaml_directory?}configuration/colourize_fasta_sequences.yml"
FILE_BLOSUM45 =
#

FILE_BLOSUM45

#
"#{BLOSUM_DIRECTORY}/blosum45.yml"
FILE_BLOSUM50 =
#

FILE_BLOSUM50

#
"#{BLOSUM_DIRECTORY}/blosum50.yml"
FILE_BLOSUM62 =
#

FILE_BLOSUM62

#
"#{BLOSUM_DIRECTORY}/blosum62.yml"
FILE_BLOSUM80 =
#

FILE_BLOSUM80

#
"#{BLOSUM_DIRECTORY}/blosum80.yml"
FILE_BLOSUM90 =
#

FILE_BLOSUM90

#
"#{BLOSUM_DIRECTORY}/blosum90.yml"
FILE_BLOSUM_MATRIX =
#

BLOSUM_MATRIX

#
"#{BLOSUM_DIRECTORY}blosum_matrix.yml"
HYDROPATHY_TABLE =
YAML.load_file(
  FILE_HYDROPATHY_TABLE
)
FILE_CHROMOSOME_NUMBERS =
#

FILE_CHROMOSOME_NUMBERS

#
"#{project_yaml_directory?}chromosomes/chromosome_numbers.yml"
FILE_AMINO_ACIDS_FREQUENCY =
#

FILE_AMINO_ACIDS_FREQUENCY

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
FILE_AMINO_ACIDS_RESTE_YAML =
#

FILE_AMINO_ACIDS_RESTE_YAML

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_reste.yml"
FILE_AMINO_ACIDS_THREE_TO_ONE =
#

FILE_AMINO_ACIDS_THREE_TO_ONE

We'll keep the keys downcased.

bl $RUBY_SRC/bioroebe/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE =
#

FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE

This will point to the file amino_acids_average_mass_table.yml.

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_average_mass_table.yml"
FILE_NUCLEOTIDES_WEIGHT =
#

FILE_NUCLEOTIDES_WEIGHT

The path to the file that holds the weight of the nucleotides.

#
"#{project_yaml_directory?}nucleotides/nucleotides_weight.yml"
AMINO_ACIDS_MOLECULAR_FORMULA =
YAML.load_file(
  FILE_AMINO_ACIDS_MOLECULAR_FORMULA
)
AMINO_ACIDS_RESTE =
{}
AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
YAML.load_file(_)
AMINO_ACIDS_MASS_TABLE =

Else hardcode the AminoAcid table here. This may no longer be necessary, though.

{
  'A' =>  71.03711, 'C' => 103.00919, 'D' => 115.02694,
  'E' => 129.04259, 'F' => 147.06841, 'G' =>  57.02146,
  'H' => 137.05891, 'I' => 113.08406, 'K' => 128.09496,
  'L' => 113.08406, 'M' => 131.04049, 'N' => 114.04293,
  'P' =>  97.05276, 'Q' => 128.05858, 'R' => 156.10111,
  'S' =>  87.03203, 'T' => 101.04768, 'V' =>  99.06841,
  'W' => 186.07931, 'Y' => 163.06333
}
AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE =

An alias.

AMINO_ACIDS_MASS_TABLE
AMINO_ACIDS =
#

Bioroebe::AMINO_ACIDS

Currently listing 21 AminoAcids from amino_acids.yml

bl $BIOROEBE/yaml/aminoacids/amino_acids.yml
#
YAML.load_file(
  FILE_AMINO_ACIDS
)
FILE_AMINO_ACIDS_ENGLISH =
#

::Bioroebe::AMINO_ACIDS_ENGLISH

#
YAML.load_file("#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_english.yml")
AMINO_ACIDS_ENGLISH =

AMINO_ACIDS_ENGLISH

FILE_AMINO_ACIDS_ENGLISH
AMINO_ACIDS_AVERAGE_MASS_TABLE =

Else simply hardcode the AminoAcid table here.

{
  'A' =>  71.0788,
  'C' => 103.1388,
  'D' => 115.0886,
  'E' => 129.1155,
  'F' => 147.1766,
  'G' =>  57.0519,
  'H' => 137.1411,
  'I' => 113.1594,
  'K' => 128.1741,
  'L' => 113.1594,
  'M' => 131.1926,
  'N' => 114.1038,
  'P' =>  97.1167,
  'Q' => 128.1307,
  'R' => 156.1875,
  'S' =>  87.0782,
  'T' => 101.1051,
  'V' =>  99.1326,
  'W' => 186.2132,
  'Y' => 163.1760
}
AMINO_ACIDS_THREE_TO_ONE =
hash
NUCLEAR_LOCALIZATION_SEQUENCES =
''
ARRAY_NLS_SEQUENCES =
[]
USE_THIS_BROWSER =

opera # Hardcoded value in this case..

'firefox'
ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED =
#

ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED

Just list the aminoacids that can typically be phosphorylated.

#
%w(
  S Y T
)
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS =
#

ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS

We have to keep the long names for the amino acids in one constant, so that we can do queries lateron.

#
(%w( 
  Alanine
  Arginine
  Asparagine
  Cysteine
  Glutamine
  Glycine
  Histidine
  Isoleucine
  Leucine
  Lysine
  Methionine
  Phenylalanine
  Proline
  Serine
  Threonine
  Tryptophane
  Tyrosine
  Valine
) << 'Aspartic acid' << 'Glutamic acid').sort
POSSIBLE_AMINO_ACIDS =
#

POSSIBLE_AMINO_ACIDS

Which Aminoacids are possible/allowed? We will list them here:

ACDEFGHIKLMNPQRSTUVWY

Note that this is distinct from the constant AMINO_ACIDS, which is instead loaded from a local .yml file. This constant includes all the 20 canonical aminoacids, whereas AMINO_ACIDS may also include pyrrolysine and selenocysteine.

#
'ACDEFGHIKLMNPQRSTVWY'
TWENTY_CANONICAL_AMINOACIDS =

TWENTY_CANONICAL_AMINOACIDS

POSSIBLE_AMINO_ACIDS
ARRAY_AMINO_ACIDS_ALPHABET =
#

ARRAY_AMINO_ACIDS_ALPHABET

This keeps an Array with all aminoacids, in one-letter format.

So it is equivalent to:

["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#
POSSIBLE_AMINO_ACIDS.chars
AMINOACID_FAMILIES =
#

AMINOACID_FAMILIES

#
{
  'citratzyklus' => {
    # Alpha-Ketoglutarat: EPQR
    'alpha-ketoglutarat' => %w( E P Q R ),
    # Oxalacetat: DMN-KTI
    'oxalacetat' => %w( D N K M T I ),
  },
  'glykolyse' => {
    'pyruvat' => %w( A V L ),                 # AVL
    '3-phosphoglycerinsäure' => %w( S G C ), # SGC
    },
    'chorismat' => {
      'aromatische_familie' => %w( F Y W )       # FYW
    },
    'ribose-5-p' => {
      'histidinol' => %w( H ) # Histidine.
    },
}
ProteinToDNA =
#

Bioroebe::ProteinToDNA

Use an “alias” to the other name.

#
ConvertAminoacidToDNA
Fasta =

Add an “alias” constant to class ParseFasta.

ParseFasta

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.[](i = nil) ⇒ Object

#

Bioroebe[]

Assign a sequence through the [] method.

Note that some aliases are allowed to this way; see the variants that use self.instance_eval below this method definition.

This method here could be compared to methods such as Integer(). Biopython uses something similar, by the way.

For instance, you can do this too:

Bioroebe << 'ATT'
x = Bioroebe['ATT']
x = Bioroebe << 'ATT'
#

674
675
676
# File 'lib/bioroebe/sequence/sequence.rb', line 674

def self.[](i = nil)
  Bioroebe::Sequence.new(i)
end

.ad_hoc_task(this_file = '/root/Bioroebe/table_ids.md') ⇒ Object

#

Bioroebe.ad_hoc_task

This method can be used to specifically run an “ad-hoc” task.

An ad-hoc task is something that we just quickly “hack” together, in order to solve some existing bioinformatics-related problem.

Presently, in May 2021, this was for a university course that required us to work with MEGA X and compare different proteins from a phylogenetics point of view.

#

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/bioroebe/toplevel_methods/ad_hoc_task.rb', line 24

def self.ad_hoc_task(
    this_file = '/root/Bioroebe/table_ids.md'
  )
  require 'bioroebe/fasta_and_fastq/download_fasta.rb'
  require 'bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb'
  if this_file.is_a? Array
    this_file = this_file.join(' ')
  end
  cd ::Bioroebe.log_dir? # Make sure we are in the log-directory.
  e 'Now downloading some FASTA files, based on this file: '+
    this_file
  # ======================================================================= #
  # (1) Download the remote FASTA dataset
  # ======================================================================= #
  download_fasta this_file
  # ======================================================================= #
  # (2) cd into the fasta directory
  # ======================================================================= #
  cd ::Bioroebe.log_dir?+'fasta/'
  # ======================================================================= #
  # (3) batch rename all .fasta files next via simplify-fasta-header.
  # ======================================================================= #
  all_files = Dir['*.fasta']
  all_files.each {|this_fasta_file|
    Bioroebe.overwrite_fasta_header(this_fasta_file)
  }
end

.align_this_string_via_multiple_sequence_alignment(this_string = "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------ ") ⇒ Object

#

Bioroebe.align_this_string_via_multiple_sequence_alignment

This method will simply return an Array.

#

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 152

def self.align_this_string_via_multiple_sequence_alignment(
    this_string =
      "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS
       PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------
      "
  )
  if this_string.is_a? Array
    this_string = this_string.join("\n")
  end
  this_string = this_string.dup if this_string.frozen?
  this_string.strip!
  this_string.delete!(' ')
  splitted = this_string.split("\n")
  return splitted
end

.all_aminoacids?Boolean

#

Bioroebe.all_aminoacids?

This method will return all available aminoacids.

Example:

Bioroebe.all_aminoacids? # => ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#

Returns:

  • (Boolean)

83
84
85
# File 'lib/bioroebe/constants/aminoacids_and_proteins.rb', line 83

def self.all_aminoacids?
  ARRAY_AMINO_ACIDS_ALPHABET
end

.allowed_dna_nucleotides?Boolean

#

Bioroebe.allowed_dna_nucleotides?

This will return an Array with valid DNA nucleotides.

#

Returns:

  • (Boolean)

106
107
108
# File 'lib/bioroebe/constants/nucleotides.rb', line 106

def self.allowed_dna_nucleotides?
  ALLOWED_DNA_NUCLEOTIDES
end

.amino_acid_average_mass(i) ⇒ Object

#

Bioroebe.amino_acid_average_mass

The input to this method should be in the form of the one-letter code for aminoacids. Several aminoacids can be input, of course, such as 'AGL'.

Do note that since as of March 2020 a float will be returned by this method, if the input was found to be a valid aminoacid.

Usage example:

Bioroebe.amino_acid_average_mass('F') # => "147.17660"
#

233
234
235
236
237
238
239
240
241
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 233

def self.amino_acid_average_mass(i)
  use_this_table = AMINO_ACIDS_AVERAGE_MASS_TABLE
  i = i.split(//) if i.is_a? String
  i = [i] unless i.is_a? Array
  result = i.map {|entry|
    entry = use_this_table[entry].to_f
  }
  return ('%.5f' % result.sum).to_f # ← This is our properly formatted result.
end

.amino_acid_monoisotopic_mass(this_aminoacid) ⇒ Object

#

Bioroebe.amino_acid_monoisotopic_mass

We require the monoisotopic table for this method, and return the corresponding match to the given aminoacid.

The input format should be in the one-letter aminoacid abbreviation.

Invocation example:

Bioroebe.amino_acid_monoisotopic_mass 'L' # => 113.08406
Bioroebe.amino_acid_monoisotopic_mass 'K' # => 128.09496
#

257
258
259
260
261
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 257

def self.amino_acid_monoisotopic_mass(this_aminoacid)
  use_this_table = AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE
  # '%.5f' % use_this_table[this_aminoacid].to_f
  use_this_table[this_aminoacid].to_f
end

.aminoacid_families?Boolean

#

Bioroebe.aminoacid_families?

Feedback which aminoacid-families we know of.

Usage example:

pp Bioroebe.aminoacid_families?; ''
#

Returns:

  • (Boolean)

139
140
141
# File 'lib/bioroebe/constants/aminoacids_and_proteins.rb', line 139

def self.aminoacid_families?
  AMINOACID_FAMILIES
end

.aminoacid_frequency(of_this_sequence = '') ⇒ Object

#

Bioroebe.aminoacid_frequency

Usage example:

Bioroebe.aminoacid_frequency('MVTDEGAIYFTKDAARNWKAAVEETVSATLNRTVSSGITGASYYTGTFST')

Would yield the following Hash:

{"M"=>1, "V"=>4, "T"=>9, "D"=>2, "E"=>3, "G"=>4, "A"=>7, "I"=>2, "Y"=>3, "F"=>2, "K"=>2, "R"=>2, "N"=>2, "W"=>1, "S"=>5, "L"=>1}
#

74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/bioroebe/toplevel_methods/frequencies.rb', line 74

def self.aminoacid_frequency(
    of_this_sequence = ''
  )
  if of_this_sequence.is_a? Array
    of_this_sequence = of_this_sequence.first 
  end
  chars = of_this_sequence.split(//)
  hash = {}
  hash.default = 0
  chars.each {|this_char| hash[this_char] += 1 }
  return hash
end

.aminoacid_substitution(from_this_sequence = :default) ⇒ Object

#

Bioroebe.aminoacid_substitution

#

102
103
104
# File 'lib/bioroebe/aminoacids/aminoacid_substitution.rb', line 102

def self.aminoacid_substitution(from_this_sequence = :default)
  Bioroebe::AminoacidSubstitution.new(from_this_sequence)
end

.aminoacids?Boolean

#

Bioroebe.aminoacids?

Note that this will return a Hash that looks like this:

{"A"=>{"ala"=>"alanine", "d
#

Returns:

  • (Boolean)

440
441
442
# File 'lib/bioroebe/constants/files_and_directories.rb', line 440

def self.aminoacids?
  AMINO_ACIDS
end

.append_what_into(what = 'Hello world!', into = 'test.md') ⇒ Object

#

Bioroebe.append_what_into

This method can be used to append content onto a file.

#

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb', line 70

def self.append_what_into(
    what = 'Hello world!',
    into = 'test.md'
  )
  unless File.exist? into
    base_dir = File.dirname(into)
    unless File.directory? base_dir
      e rev+
      'No directory exists at '+sdir(base_dir)+
      rev+'. Thus creating it now.'
      create_directory(base_dir)
    end
    e rev+
      'No file exists at '+sfile(into)+rev+
      '. Thus creating it now.'
    create_file(into)
  end
  File.open(into, 'a') { |file|
    file << what
  }
end

.array_colourize_this_aminoacidObject

#

Bioroebe.array_colourize_this_aminoacid

Query as to which aminoacid we will colourize, if any at all.

#

42
43
44
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 42

def self.array_colourize_this_aminoacid
  @array_colourize_this_aminoacid
end

.atomic_composition(of = 'GGGGA') ⇒ Object

#

Bioroebe.atomic_composition

This method will return the composition of atoms in a given protein, via Hash, such as:

{"C"=>11, "H"=>19, "N"=>5, "O"=>6, "S"=>0}

The Hash keeps track of 11 C atoms, 19 H atoms, 5 N atoms, 6 O atoms and 0 S atoms.

This hash can then be formatted via the method:

Bioroebe.show_atomic_composition()

Which can be found below.

Presently this method works on aminoacids only, but in theory the code could be extended to work with DNA nucleotides and RNA nucleotides as well.

Either way, the one letter abbreviation should be used as input to this method.

When we use aminoacids, we need to remember that a peptide bond deducts 1x H₂O (water). This will have to be deducted from the formula, but only if it is an internal aminoacid. In other words, the only two aminoacids that will behave differently, are the first one (since it will miss one -OH group) and the last aminoacid (as this one will lack a -H molecule.

Remember that the input sequence to this method should be the one-letter code for the aminoacid sequence at hand.

#

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/bioroebe/toplevel_methods/atomic_composition.rb', line 50

def self.atomic_composition(
    of = 'GGGGA' # ← This should be the aminoacid sequence.
  )
  begin
    require 'chemistry_paradise/split_molecule_names.rb'
    require 'chemistry_paradise/toplevel_methods/remove_this_molecule_from.rb'
  rescue LoadError
    if is_on_roebe?
      puts 'Two files from the chemistry_paradise gem are not available.'
    end
  end
  # ======================================================================= #
  # Load up the molecular formula for each aminoacid next. This will
  # be used as our reference-point for calculating things such as the
  # composition, or weight.
  # ======================================================================= #
  dataset_molecular_formula_for_the_aminoacids = YAML.load_file(
    FILE_AMINOACIDS_MOLECULAR_FORMULA
  )
  if of.is_a?(Array)
    if of.empty?
      of = 'GGGGA' # In this case reinstate the default.
    else
      if of.first.is_a?(String) and of.first.size > 1
        of = of.first.split(//) # Split it on a per-character basis here.
      end
    end
  end
  if of.is_a? String
    of = of.split(//)
  end
  unless of.is_a? Array
    of = [of]
  end
  hash_keeping_track_of_the_atomic_composition = {}
  # ======================================================================= #
  # Build up the default values, for the atoms C, H, N, O and S.
  # ======================================================================= #
  hash_keeping_track_of_the_atomic_composition['C'] = 0
  hash_keeping_track_of_the_atomic_composition['H'] = 0
  hash_keeping_track_of_the_atomic_composition['N'] = 0
  hash_keeping_track_of_the_atomic_composition['O'] = 0
  hash_keeping_track_of_the_atomic_composition['S'] = 0
  # ======================================================================= #
  # Next obtain the formula from the ChemistryParadise project. We
  # do so by iterating over the given input, and we assume that
  # this input is always an Array.
  # ======================================================================= #
  of.map.with_index {|this_amino_acid, position_of_that_aminoacid|
    # ===================================================================== #
    # Next, we have to obtain the formula for this amino acid.
    # ===================================================================== #
    this_amino_acid = AMINO_ACIDS_ENGLISH[this_amino_acid]
    formula_for_this_amino_acid = dataset_molecular_formula_for_the_aminoacids[this_amino_acid]
    # ===================================================================== #
    # The next case-menu will handle the position of the aminoacid at hand.
    # We will skip doing so if there is only one aminoacid though.
    # ===================================================================== #
    if of.first.to_s.size > 1
      case position_of_that_aminoacid # case tag
      when 0 # This is the first aminoacid. It loses only one 'OH' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('OH', formula_for_this_amino_acid)
      when (of.size - 1) # This is the last entry. It loses only one 'H' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H', formula_for_this_amino_acid)
      else
        # ================================================================= #
        # Else it will lose a full H₂O group.
        # ================================================================= #
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H2O', formula_for_this_amino_acid)
      end
    end
    array_chemical_formula = ::ChemistryParadise.split_this_molecular_formula_into_a_hash(
      formula_for_this_amino_acid
    )
    array_chemical_formula.each {|molecule_and_number| # e. g. 'H13'
      if molecule_and_number =~ /\d+/ # If it has at the least one number.
        molecule_and_number =~ /([A-Z]+)(\d{1,2})/ # See: https://rubular.com/r/nCojEDcY6g
        molecule = $1.to_s.dup
        n_times  = $2.to_s.dup.to_i
        hash_keeping_track_of_the_atomic_composition[molecule] += n_times
      else # else it must be 1, since there is no other number, such as 'N'.
        hash_keeping_track_of_the_atomic_composition[molecule_and_number] += 1
      end
    }
  }
  return hash_keeping_track_of_the_atomic_composition
end

.automatically_rename_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.automatically_rename_this_fasta_file

This method will automatically (try to) rename an existing fasta file, by tapping into the method called .return_new_filename_based_on_fasta_identifier().

#

177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 177

def self.automatically_rename_this_fasta_file(fasta_file)
  fasta_file = [fasta_file].flatten.compact
  fasta_file.each {|this_fasta_file|
    if File.exist? this_fasta_file
      new_filename = return_new_filename_based_on_fasta_identifier(this_fasta_file)
      erev "Renaming #{sfile(this_fasta_file)}#{rev} "\
           "to #{sfile(new_filename)} #{rev}next."
      Bioroebe.rename(this_fasta_file, new_filename)
    else
      no_file_exists_at(this_fasta_file)
    end
  }
end

.available_blosum_matrices?Boolean

#

Bioroebe.available_blosum_matrices?

This method will return an Array of all available blosum matrices.

Example output:

["blosum45", "blosum50", "blosum62", "blosum80", "blosum90", "blosum_matrix"]
#

Returns:

  • (Boolean)

78
79
80
81
82
# File 'lib/bioroebe/blosum/blosum.rb', line 78

def self.available_blosum_matrices?
  Bioroebe::Blosum.available_blosum_files?.map {|entry|
    File.basename(entry).delete_suffix('.yml')
  }
end

.available_codon_tables?Boolean

#

Bioroebe.available_codon_tables?

#

Returns:

  • (Boolean)

124
125
126
# File 'lib/bioroebe/codons/show_codon_tables.rb', line 124

def self.available_codon_tables?
  ::Bioroebe::CodonTables.definitions?.values # Do not sort this.
end

.base_composition(i = '52%GC') ⇒ Object

#

Bioroebe.base_composition

This method can be used to query the composition of a given DNA sequence, that is, in percentage, the values for A, T, C and G.

This method will then return a Hash, consisting of the percentage values of A, T, C and G in the given DNA sequence at hand.

Note that the input to this method has to include a '%' character, at the least up until March 2020. Past March 2020 this requirement was dropped, but I still think it is visually more elegant to include a '%' character.

#

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/bioroebe/toplevel_methods/base_composition.rb', line 29

def self.base_composition(
    i = '52%GC'
  )
  if i.is_a? Array
    if i.empty?
      i = '52%GC' # Default value.
    else
      i = i.join(' ').strip
    end
  end
  # ======================================================================= #
  # Add support for Files here.
  # ======================================================================= #
  if i and File.exist?(i)
    i = File.readlines(i).reject {|line| line.start_with? '>' }.join("\n").delete("\n")
  end
  # ======================================================================= #
  # We must use a Hash for this.
  # ======================================================================= #
  hash = {
    'A' => 0,
    'T' => 0,
    'C' => 0,
    'G' => 0,
  }
  if i.include? '%'
    splitted = i.split('%').map(&:strip)
    frequency = splitted.first.to_i
    opposite_frequency = 100 - frequency
    characters = splitted.last.split(//)
    characters.each {|this_nucleotide|
      hash[this_nucleotide] = frequency / 2
    }
    # ===================================================================== #
    # Next calculate the missing nucleotides:
    # ===================================================================== #
    missing_nucleotides = hash.select {|key, value|
      value == 0
    }
    missing_nucleotides.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = opposite_frequency / 2
    }
  else
    frequency = Hash.new(0)
    chars = i.chars
    chars.each { |entry| frequency[entry] += 1 }
    sum = frequency.values.sum
    frequency.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = ((value * 100.0) / sum).round(2)
    }
  end
  return hash
end

.base_directory?Boolean

#

Bioroebe.base_directory?

This method will return the stored value for the @base_directory, which is typically a String such as “/root/Bioroebe/”.

#

Returns:

  • (Boolean)

28
29
30
# File 'lib/bioroebe/constants/base_directory.rb', line 28

def self.base_directory?
  @base_directory
end

.batch_create_windows_executablesObject

#

Bioroebe.batch_create_windows_executables

This method is only useful for windows. We will use “ocra” to create various .exe files that have the desired widgt-functionality.

Note that the functionality depends on the roebe-gem.

#

371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 371

def self.batch_create_windows_executables
  begin
    require 'roebe/custom_methods/custom_methods.rb'
  rescue LoadError; end
  array_these_files =  %w(
    /home/x/programming/ruby/src/bioroebe/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb
  )
  array_these_files.each {|this_file|
    Roebe.ocra_build(
      this_file,
      File.basename(this_file).delete_suffix('.rb')
    )
  }
end

.be_verbose?Boolean

#

Bioroebe.be_verbose?

#

Returns:

  • (Boolean)

23
24
25
# File 'lib/bioroebe/toplevel_methods/verbose.rb', line 23

def self.be_verbose?
  @be_verbose
end

.bisulfite_treatment(i) ⇒ Object

#

Bioroebe.bisulfite_treatment

Simply convert all C into U.

#

356
357
358
359
360
361
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 356

def self.bisulfite_treatment(i)
  if i.is_a? Array
    i = i.join('').strip
  end
  i.tr('C','U')
end

.blast_neighborhood(this_mer = 'CTC', optional_apply_filter_for_score_higher_than = nil) ⇒ Object

#

Bioroebe.blast_neighborhood

The second argument to this method is a score-filter, e. g. to select only entries that have a score higher than 1.

#

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/bioroebe/toplevel_methods/blast.rb', line 15

def self.blast_neighborhood(
    this_mer                                    = 'CTC',
    optional_apply_filter_for_score_higher_than = nil
  )
  require 'bioroebe/toplevel_methods/e.rb'
  if this_mer.is_a? Array
    this_mer = this_mer.first
  end
  if this_mer.nil?
    this_mer = 'CTC' # Set the same default as above.
  end
  match_score     =  2
  mis_match_score = -2
  # ======================================================================= #
  # Next use an Array of sequences that we will compare.
  # ======================================================================= #
  compare_these_sequences = %w(
    AAA
    AAT
    AAC
    AAG
    ATA
    ATT
    ATC
    ATG
    ACA
    ACT
    ACC
    ACG
    AGA
    AGT
    AGC
    AGG
    TAA
    TAT
    TAC
    TAG
    TTA
    TTT
    TTC
    TTG
    TCA
    TCT
    TCC
    TCG
    TGA
    TGT
    TGC
    TGG
    CAA
    CAT
    CAC
    CAG
    CTA
    CTT
    CTC
    CTG
    CCA
    CCT
    CCC
    CCG
    CGA
    CGT
    CGC
    CGG
    GAA
    GAT
    GAC
    GAG
    GTA
    GTT
    GTC
    GTG
    GCA
    GCT
    GCC
    GCG
    GGA
    GGT
    GGC
    GGG
  )
  compare_these_sequences.each {|this_sequence|
    score = 0
    chars = this_sequence.chars
    first_char  = chars[0]
    second_char = chars[1]
    third_char  = chars[2]
    if first_char == this_mer[0]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if second_char == this_mer[1]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if third_char == this_mer[2]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if optional_apply_filter_for_score_higher_than
       if (score.to_i > optional_apply_filter_for_score_higher_than)
        e "#{this_sequence}: score of "\
          "#{score.to_s.rjust(3)}"
       end
    else
      e this_sequence+': score of '+
        score.to_s.rjust(3)
    end
  }
end

.blosum_directory?Boolean

#

Bioroebe.blosum_directory?

#

Returns:

  • (Boolean)

343
344
345
# File 'lib/bioroebe/constants/files_and_directories.rb', line 343

def self.blosum_directory?
  "#{project_yaml_directory?}blosum/"
end

.blosum_matrix(i = FILE_BLOSUM_MATRIX) ⇒ Object

#

Bioroebe.blosum_matrix

#

250
251
252
# File 'lib/bioroebe/constants/files_and_directories.rb', line 250

def self.blosum_matrix(i = FILE_BLOSUM_MATRIX)
  YAML.load_file(i)
end

.calculate_exponential_growth(number_of_cells = 10, number_of_divisions = 10) ⇒ Object

#

Bioroebe.calculate_exponential_growth

This method can be used to calculate how many bacteria will exist after n cell divisions (provided that we know, and supply to this method, how many bacteria existed when we started our calculation).

#

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/bioroebe/toplevel_methods/exponential_growth.rb', line 16

def self.calculate_exponential_growth(
    number_of_cells     = 10,
    number_of_divisions = 10
  )
  if number_of_cells.nil?
    number_of_cells = 10 # Default value.
  end
  if number_of_divisions.nil?
    number_of_divisions = 10 # Default value.
  end
  # ======================================================================= #
  # === Hashes
  #
  # Handle Hash as input given.
  # ======================================================================= #
  if number_of_cells.is_a? Hash
    if number_of_cells.has_key? :n_divisions
      number_of_divisions = number_of_cells.delete(:n_divisions)
    end
    if number_of_cells.has_key? :number_of_cells
      number_of_cells = number_of_cells.delete(:number_of_cells)
    elsif number_of_cells.has_key? :n_cells
      number_of_cells = number_of_cells.delete(:n_cells)
    end
  end
  # ======================================================================= #
  # We need numbers, aka integers - there are no "1.3" cells.
  # ======================================================================= #
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  total_amount_of_cells = 0
  total_amount_of_cells = number_of_cells * (2 ** number_of_divisions)
  return total_amount_of_cells
end

.calculate_levensthein_distance(string1 = 'TTACCC', string2 = 'TTTCCC', be_verbose = true) ⇒ Object

#

Bioroebe.calculate_levensthein_distance

The following method is based on

http://rosettacode.org/wiki/Levenshtein_distance#Ruby, slightly modified.

To test this code, do:

[ ['kitten','sitting'], ['saturday','sunday'], ["rosettde", "raisethyrd"] ].each { |s,t|
  puts "calculate_levensthein_distance('#{s}', '#{t}') = #{Bioroebe.calculate_levensthein_distance(s, t)}"
}

However had, rubygems has a levensthein variant too.

#

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/bioroebe/calculate/calculate_levensthein_distance.rb', line 28

def self.calculate_levensthein_distance(
    string1    = 'TTACCC',
    string2    = 'TTTCCC',
    be_verbose = true
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if string1.is_a?(Array) and (string1.size > 1)
    string2 = string1.shift
    string1 = string1.first
  elsif string1.is_a?(String) and string1.include?(' ')
    splitted = string1.split(' ')
    string2  = splitted.last
    string1  = splitted.first
  end
  m = string1.length
  n = string2.length
  return m if n == 0 # Stop at 0.
  return n if m == 0 # Stop at 0.
  arrays = Array.new(m+1) { Array.new(n+1) }
  # ======================================================================= #
  # Initialize the variable arrays next:
  # ======================================================================= #
  (0 .. m).each {|i| arrays[i][0] = i}
  (0 .. n).each {|j| arrays[0][j] = j}
  # ======================================================================= #
  # Now, iterate through:
  # ======================================================================= #
  (1 .. n).each {|j|
    (1 .. m).each {|i|
      arrays[i][j] = 
        if string1[i-1] == string2[j-1] # adjust index into string
          arrays[i-1][j-1]       # no operation required
        else
           [ arrays[i-1][j]+1,   # deletion     operation
             arrays[i][j-1]+1,   # insertion    operation
             arrays[i-1][j-1]+1, # substitution operation
           ].min
        end
    }
  }
  result = arrays[m][n]
  if be_verbose
    e rev+'The two strings '+simp(string1.to_s)+rev+' and '+
      simp(string2.to_s)+rev+' have n differences ('+
      steelblue('edit distance')+rev+'):'
    e "  #{simp(result.to_s)}"
  end
  return result
end

.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i) ⇒ Object

#

Bioroebe.calculate_melting_temperature_for_more_than_thirteen_nucleotides

An alias exists for this method, called Bioroebe.melting_Temperature().

Usage example for the latter:

x = Bioroebe.melting_temperature('CCGTGTCGTACATCG')
#

278
279
280
# File 'lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb', line 278

def self.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i)
  ::Bioroebe::CalculateMeltingTemperatureForMoreThanThirteenNucleotides.new(i)
end

.calculate_n50_value(i = [ 1989, 1934, 1841, 1785, 1737, 1649, 1361, 926, 848, 723 ]) ⇒ Object

#

Bioroebe.calculate_n50_value

This method will calculate the N50 value of the given input. The input to this method should be a sorted Array.

#

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/bioroebe/toplevel_methods/calculate_n50_value.rb', line 16

def self.calculate_n50_value(
    i = [
      1989, 1934, 1841,
      1785, 1737, 1649,
      1361,  926,  848,
       723
    ]
  )
  # ======================================================================= #
  # The following conversion is necessary because ARGV will contain only
  # String objects, not integer-values.
  # ======================================================================= #
  i.map! {|entry| entry.to_i }
  calculate_sum_for_the_loop = 0
  sum = i.sum
  half = sum / 2.0
  find_the_proper_contig = nil
  i.each {|this_number|
    calculate_sum_for_the_loop += this_number
    # ===================================================================== #
    # Compare the temporary sum with the half-sum.
    # ===================================================================== #
    if calculate_sum_for_the_loop > half
      find_the_proper_contig = this_number
      break
    end
  }
  return find_the_proper_contig
end

.calculate_original_amount_of_cells_of_exponential_growth(number_of_cells = 1600, number_of_divisions = 5) ⇒ Object

#

Bioroebe.calculate_original_amount_of_cells_of_exponential_growth

The first argument, number_of_cells, means “how many cells do we have now/currently”. This is necessary, in order to calculate how many cells we used to have initially.

#

58
59
60
61
62
63
64
65
66
67
# File 'lib/bioroebe/toplevel_methods/exponential_growth.rb', line 58

def self.calculate_original_amount_of_cells_of_exponential_growth(
    number_of_cells     = 1600, # 1600 cells to start with.
    number_of_divisions =    5  #    5 generations by default.
  )
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  initial_amount_of_cells_was = 0
  initial_amount_of_cells_was = number_of_cells / ( 2 ** number_of_divisions )
  return initial_amount_of_cells_was
end

.calculate_the_frequencies_of_this_species(i = :homo_sapiens) ⇒ Object

#

Bioroebe.calculate_the_frequencies_of_this_species

#

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bioroebe/toplevel_methods/frequencies.rb', line 17

def self.calculate_the_frequencies_of_this_species(
    i = :homo_sapiens
  )
  require 'bioroebe/sequence/dna.rb'
  require 'yaml'
  if i and i.is_a?(Array) and i.empty?
    i << :homo_sapiens
  end
  hash_aminoacids = {}
  hash_aminoacids.default = 0
  if i.is_a? Array
    i = i.first
  end
  case i.to_sym
  # ======================================================================= #
  # === :homo_sapiens
  # ======================================================================= #
  when :homo_sapiens,
       :homo,
       :human
    i = "#{::Bioroebe.project_base_directory?}"\
        "codon_tables/frequencies/9606_Homo_sapiens.yml"
  end
  hash = YAML.load_file(i)
  # "GAC"=>25.1
  hash.each_pair {|key, value|
    this_aminoacid = Bioroebe.to_aa(key)
    hash_aminoacids[this_aminoacid] += value
  }
  e
  # ======================================================================= #
  # Convert it into percent:
  # ======================================================================= #
  hash_aminoacids.each_pair {|key, value_for_percentage|
    value_for_percentage = ((value_for_percentage * 100.0) / 1000.0).round(3).to_s
    value_for_percentage = '%.2f' % value_for_percentage
    e '  '+
      steelblue(key).to_s+' '+
      royalblue(
        value_for_percentage.rjust(6)+'%'
      )
  }
  e
end

.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file

This method will return a Hash containing the weight of the aminoacids in a .fasta file.

Usage example:

x = Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file('viruses.fa')

This may yield a Hash such as the following:

{ "sp|P23046|NSP5_ROTBV"  => 21647.5341,
  "sp|Q81835|SHDAG_HDVU2" => 22030.6392,
  "sp|A5HBD7|ST_POVWU"    => 23433.3773,
  "sp|Q91FT8|234R_IIV6"   => 21076.778 }
#

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 70

def self.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file)
  if File.exist? fasta_file
    hash = {}
    results = Bioroebe.parse_fasta_quietly(fasta_file)
    short_headers = results.short_headers?
    sequences = results.sequences?
    short_headers.each_with_index {|entry, index|
      sum = 0
      this_sequence = sequences[index]
      # Next, convert this sequence into the corresponding mass.
      this_sequence.chars.each {|this_specific_aminoacid|
        sum += Bioroebe.weight_of_this_aminoacid?(this_specific_aminoacid)
      }
      hash[entry] = sum.round(4)
    }
    hash
  else
    e 'No file exists at '+fasta_file.to_s+'.'
  end
end

.cat(i = nil) ⇒ Object

#

Bioroebe.cat (cat tag)

A variant of cat to use here.

#

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/bioroebe/toplevel_methods/cat.rb', line 21

def self.cat(i = nil)
  if i
    i = convert_global_env(i) if i.include? '$'
    i = Dir['*'][i.to_i - 1] if i =~ /^\d+$/
  end
  if i.nil?
    erev 'Please provide an argument to Bioroebe.cat() (the name of a file)'
  # ======================================================================= #
  # === Handle directories next
  # ======================================================================= #
  elsif File.directory? i
    erev "We can not read from `#{sdir(i)}#{rev}` as it is a directory."
  # ======================================================================= #
  # Else the File will exist in this clause:
  # ======================================================================= #
  elsif File.exist?(i)
    _ = File.extname(i).delete('.')
    case _ # case tag
    # ===================================================================== #
    # === fasta
    # ===================================================================== #
    when 'fasta',
         'fa'
      e 'This is a fasta file, so rather than cat-ing the content,'
      e 'we will send this dataset to the ParseFasta class.'
      require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
      Bioroebe::ParseFasta.new(i)
    else # The default here.
      e "Now displaying the file `#{sfile(i)}`."
      # e File.read(i)
      # ^^^ Or we could use the above. We have to reconsider this one day.
      File.readlines(i).each {|line| e "  #{line.chomp}" } # With a bit of padding.
    end
  else # else the file does not exist.
    e "#{swarn('Trying to display the file `')}#{sfile(i)}#{swarn('`')}"
    e swarn('but it does not exist.')
  end
end

.change_directory(i = '$HOME', be_verbose = false) ⇒ Object

#

Bioroebe.change_directory

This method allows us to change the directory.

Bioroebe.cd() is an alias to the method here.

#

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb', line 139

def self.change_directory(
    i          = '$HOME',
    be_verbose = false
  )
  case be_verbose
  # ======================================================================= #
  # === :do_report_current_directory
  # ======================================================================= #
  when :do_report_current_directory,
       :be_verbose
    be_verbose = true
  end
  case i # Do some sanitizing here. (case tag)
  # ======================================================================= #
  # === :home_directory
  # ======================================================================= #
  when :home_directory,
       :default,
       nil # ← Nil is also assumed to refer to this :default value.
    # ===================================================================== #
    # In this case we will try to cd into the base-directory of the
    # Bioroebe shell.
    # ===================================================================== #
    i = log_dir?
  # ======================================================================= #
  # === :download_dir
  # ======================================================================= #
  when :download_dir,':download_dir',
       :download_directory,':download_directory'
    i = download_dir?
  # ======================================================================= #
  # Bioroebe.save_dir? is defined in bioroebe/toplevel_methods/store_here.rb.
  # ======================================================================= #
  when 'base',
       'logdir',
       :bioroebe_log_directory
    # ===================================================================== #
    # Enter the main log dir, unless a file exists with the same name.
    # ===================================================================== #
    i = save_dir? unless File.exist?(i.to_s) # .to_s to avoid Symbols here.
  end
  i = i.dup if i.is_a?(String) and i.frozen?
  i << '/' unless i.end_with? '/'
  if File.directory? i
    e sdir(i) if be_verbose # Also colourize the directory and output it.
    Dir.chdir(i)
  else
    if be_verbose
      erev "No directory called `#{sdir(i)}#{rev}` exists,"
      erev 'thus we can not cd to this target.'
    end
  end
end

.clear_array_colourize_this_aminoacidObject

#

Bioroebe.clear_array_colourize_this_aminoacid

#

33
34
35
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 33

def self.clear_array_colourize_this_aminoacid
  @array_colourize_this_aminoacid = []
end

.clear_stop_codonsObject

#

Bioroebe.clear_stop_codons

#

256
257
258
# File 'lib/bioroebe/codons/codons.rb', line 256

def self.clear_stop_codons
  @stop_codons = []
end

.cleave(with = :with_trypsin, i = ARGV) ⇒ Object

#

Bioroebe.cleave (cleave tag)

This is the general entry-point for “cleave-related” activities, such as cleaving a polypeptide or a DNA strand via an enzyme.

#

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 56

def self.cleave(
    with = :with_trypsin,
    i    = ARGV
  )
  case with
  # ======================================================================= #
  # === :with_trypsin
  # ======================================================================= #
  when :with_trypsin,
       :trypsin,
       :default
    Bioroebe.cleave_with_trypsin(i)
  else
    nil
  end
end

.cleave_with_trypsin(this_sequence = ARGV) ⇒ Object

#

Bioroebe.cleave_with_trypsin

Trypsin cleaves peptides on the C-terminal side of lysine and arginine amino acid residues. If a proline residue is on the carboxyl side of the cleavage site, the cleavage will not occur. If an acidic residue is on either side of the cleavage site, the rate of hydrolysis has been shown to be slower.

This method will return an Array.

#

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 21

def self.cleave_with_trypsin(
    this_sequence = ARGV
  )
  # ======================================================================= #
  # === Handle Arrays first
  # ======================================================================= #
  if this_sequence.is_a? Array
    this_sequence = this_sequence.first
  end
  array_cleave_positions = [] # This is the Array that will be returned.
  subrange = ''.dup
  this_sequence.size.times {|index|
    this_char = this_sequence[index, 1]
    case this_char # case tag
    when 'K','R'
      subrange << this_char
      next_char_is = this_sequence[index+1, 1]
      unless next_char_is == 'P' # Exclude Proline.
        array_cleave_positions << subrange
        subrange = ''.dup
      end
    else
      subrange << this_char
    end
  }
  array_cleave_positions << subrange
  return array_cleave_positions
end

.cliner(use_this_token = :default_token, how_many_times = 80, use_this_colour = nil) ⇒ Object

#

Bioroebe.cliner

The first character denotes which token we will use, such as '#', for the line that is to be displayed.

#

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/bioroebe/toplevel_methods/cliner.rb', line 15

def self.cliner(
    use_this_token  = :default_token,
    how_many_times  = 80,
    use_this_colour = nil
  )
  require 'bioroebe/colours/colours.rb'
  if use_this_token.is_a? Hash
    # ===================================================================== #
    # === :length
    # ===================================================================== #
    if use_this_token.has_key? :length
      how_many_times = use_this_token.delete(:length)
    end
    if use_this_token.is_a? Hash
      # =================================================================== #
      # === :token
      # =================================================================== #
      if use_this_token.has_key? :token
        use_this_token = use_this_token.delete(:token)
      end
    end
    use_this_token = :default if use_this_token.is_a? Hash
  end
  # ======================================================================= #
  # The following case-when menu must come after the check for Hashes
  # above.
  # ======================================================================= #
  case use_this_token
  when :default_token, :default
    use_this_token = '='
  end
  # ======================================================================= #
  # === handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    if yielded.is_a?(Hash)
      # === :colour
      if yielded.has_key? :colour
        use_this_colour = yielded[:colour]  
      # === :colours
      elsif yielded.has_key? :colours
        use_this_colour = yielded[:colours]
      end
    #else
    #  cliner(use_this_token, how_many_times)
    end
  end
  if use_this_colour
    puts ::Colours.send(use_this_colour, use_this_token * how_many_times)
  else
    puts use_this_token * how_many_times
  end
end

.codon_frequencies_of_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.codon_frequencies_of_this_sequence

#

189
190
191
# File 'lib/bioroebe/codons/show_codon_usage.rb', line 189

def self.codon_frequencies_of_this_sequence(i = ARGV)
  Bioroebe::ShowCodonUsage.new(i) { :be_quiet }
end

.codon_frequency_of_this_string(i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\ 'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA', automatically_convert_into_a_RNA_sequence = true) ⇒ Object

#

Bioroebe.codon_frequency_of_this_string

The input to this method should ideally be a String. It will be assumed to be a RNA string, e. g. mRNA. Thus, all T are replaced with U by default. This can be toggled via the second argument of this method.

This method will return a Hash.

Usage example:

Bioroebe.codon_frequency_of_this_string
Bioroebe.codon_frequency_of_this_string 'ATTCGTACGATCGACTACTACT' # => {"UAC"=>2, "GAC"=>1, "AUC"=>1, "ACG"=>1, "CGU"=>1, "AUU"=>1}
#

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 112

def self.codon_frequency_of_this_string(
    i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\
        'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA',
    automatically_convert_into_a_RNA_sequence = true
  )
  i = i.join if i.is_a? Array
  if automatically_convert_into_a_RNA_sequence
    i = i.dup if i.frozen?
    i.tr!('T','U')
  end
  scanned = i.scan(/.../)
  tally = scanned.tally
  # ======================================================================= #
  # We still have to sort it.
  # ======================================================================= #
  sorted_hash = Hash[tally.sort_by {|key, value| value }.reverse]
  return sorted_hash
end

.codon_table_dataset?Boolean

#

Bioroebe.codon_table_dataset?

This method will return the “codon table dataset”, as a Hash.

This Hash will contain entries like this:

{"TAA"=>"*", "TGA"=>'*',"CCA"=>"P", ...

and so forth.

#

Returns:

  • (Boolean)

39
40
41
# File 'lib/bioroebe/codons/codon_table.rb', line 39

def self.codon_table_dataset?
  @codon_table_dataset
end

.codon_table_in_use?Boolean

#

Bioroebe.codon_table_in_use?

Query method to return the currently used codon table.

#

Returns:

  • (Boolean)

80
81
82
# File 'lib/bioroebe/codons/codon_table.rb', line 80

def self.codon_table_in_use?
  @codon_table_in_use
end

.codon_tablesObject

#

Bioroebe.codon_tables

This method will return all codon tables that we have registered.

This is probably not so terribly useful for most projects, but in the event that you do need all codon tables, you can use this method.

The result will be a Hash having key->value pairs such as:

"9" => {"TAA"=>"*", "TAG"=>"*"
#

30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bioroebe/codons/codon_tables.rb', line 30

def self.codon_tables
  require 'bioroebe/requires/require_yaml.rb'
  hash = {}
  _ = "#{yaml_directory?}codon_tables/*.yml"
  all_files = Dir[_].sort
  all_files.each {|yaml_file|
    next if yaml_file.end_with? 'overview.yml' # We reject this one here.
    dataset = YAML.load_file(yaml_file)
    entry_number = File.basename(yaml_file).delete_suffix('.yml')
    dataset = { entry_number => dataset}
    hash.merge!(dataset)
  }
  hash
end

.codons_for_this_aminoacid?(i = ARGV) ⇒ Boolean

#

Bioroebe.codons_for_this_aminoacid?

This method will return all possible DNA codons for a specific aminoacid, as an Array.

So for example, for the aminoacid serine, this method would return an Array containing all 6 codons that code for this aminoacid (if the eukaryotic codon table is used, which also includes humans).

This method supports to query only ONE aminoacid at a given time.

Currently the method relies on the file called “codons_of_the_aminoacids.yml”. In the future, the method here will probably be changed to add support for different codon tables.

Specific invocation examples:

Bioroebe.codons_for?(:serine)
Bioroebe.codons_for?(:tyrosine)
Bioroebe.codons_for?(:threonine)
Bioroebe.codons_for?('T')

To test this for another organism, try:

Bioroebe.use_this_codon_table(:yeast_mitochondria)
Bioroebe.codons_for?('T')
Bioroebe.decode_this_aminoacid 'K' # => ["AAA", "AAG"]
#

Returns:

  • (Boolean)

322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/bioroebe/codons/codons.rb', line 322

def self.codons_for_this_aminoacid?(
    i = ARGV
  )
  # ======================================================================= #
  # First, convert the input a bit and sanitize it.
  # ======================================================================= #
  i = i.first if i.is_a? Array
  if i.is_a?(String) and i.start_with?(':')
    i = i.delete(':').to_sym
  end
  case i # case tag
  when :default,
       nil
    i = :lysine
  end
  if i.is_a? Symbol
    # ===================================================================== #
    # === Convert e. g. :serine into 'ser'
    # ===================================================================== #
    _ = i.to_s.downcase[0 .. 2]
    i = AMINO_ACIDS_THREE_TO_ONE[_]
  end
  # ======================================================================= #
  # Next we must use the one-letter abbreviation, and then find all
  # entries that match to the given input at hand.
  #
  # @codon_table_dataset is a Hash and will have these key->value
  # entries:
  #
  #   "TTC" => "F"
  #
  # ======================================================================= #
  result = @codon_table_dataset.select {|key, value|
    value == i
  }
  return result.keys
end

.colourize_aa(i, array_colourize_these_aminoacids = array_colourize_this_aminoacid? ) ⇒ Object

#

Bioroebe.colourize_aa

Use this method if you wish to colourize an aminoacid, in a red colour.

The input should be the specific aminoacid sequence in question that you wish to see being colourized here.

This currently only works for aminoacids, and only in red. Perhaps at a later time it will become more flexible, but for now, it will be exclusive for aminoacids alone.

Usage example:

puts Bioroebe.colourize_aa 'STGYGGCTR', 'S T Y'
#

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 149

def self.colourize_aa(
    i,
    array_colourize_these_aminoacids = array_colourize_this_aminoacid?
  )
  if array_colourize_these_aminoacids.is_a? String
    array_colourize_these_aminoacids = array_colourize_these_aminoacids.split(' ') # Split it into an Array.
  end
  unless array_colourize_these_aminoacids.empty?
    if i.nil?
      puts 'You first have to assign a sequence.'
    else
      if i.chars.any? {|entry| array_colourize_these_aminoacids.include? entry }
        # =================================================================== #
        # Ok, we have established a need to colourize the result.
        # =================================================================== #
        array_colourize_these_aminoacids.each {|colour|
          i.gsub!(/(#{colour})/, swarn('\\1')+rev)
        }
      end
    end
  end if use_colours? # But only if we use colours.
  return i
end

.colourize_this_aminoacid_sequence_for_the_commandline(i) ⇒ Object

#

Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline

This method uses some hardcoded colour assignments to the 20 different aminoacids.

Usage example:

puts Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline('NLKRSPTHY')
#

113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 113

def self.colourize_this_aminoacid_sequence_for_the_commandline(i)
  if i.is_a? Array
    i = i.join
  end
  dataset = YAML.load_file(FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS)
  array_of_allowed_aminoacids = %w( A R N D B C E Q Z G H I L K M F P S T W Y V )
  _ = ''.dup
  splitted = i.chars
  splitted.each {|this_aminoacid|
    case this_aminoacid
    when *array_of_allowed_aminoacids
      this_aminoacid = send(dataset[this_aminoacid.to_s], this_aminoacid)
    # else # else it will not be colourized.
    end
    _ << this_aminoacid
  }
  return _
end

.colourize_this_fasta_dna_sequence(i = nil, &block) ⇒ Object

#

Bioroebe.colourize_this_fasta_dna_sequence

This toplevel method can be used to colourize a FASTA (DNA) sequence, e. g. “ATGCGCGTATTA” and so forth.

Note that this is intended for the commandline, that is to be displayed on e. g. a KDE Konsole terminal.

Usage examples:

puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA')
puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler }
puts Bioroebe.colourize_this_fasta_dna_sequence('TGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler_steelblue_colour }
#

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 274

def self.colourize_this_fasta_dna_sequence(
    i = nil,
    &block
  )
  unless ::Bioroebe.respond_to?(:ruler_return_as_string_without_colours)
    require 'bioroebe/misc/ruler.rb'
  end
  if i.nil?
    e 'Please provide a valid FASTA sequence as input to '\
      'Bioroebe.colourize_this_fasta_dna_sequence()'
    return
  end
  if i.is_a? Array
    # ===================================================================== #
    # Arrays will be joined together.
    # ===================================================================== #
    i = i.join(' ').strip
  end
  # ======================================================================= #
  # Check for existing files next:
  # ======================================================================= #
  if i and File.file?(i)
    i = File.read(i)
  end
  original_input = i.dup
  i = i.dup # Always dup it here.
  if i.is_a? String
    # ===================================================================== #
    # The colours are either defined in a file called
    # 'colourize_fasta_sequences.yml' or they are simply hardcoded.
    #
    # The preferred (and thus default) way is to simply make use
    # of that .yml file. That works on my home system, so it
    # should work for other people as well.
    # ===================================================================== #
    if use_colours?
      this_file = FILE_COLOURIZE_FASTA_SEQUENCES
      if File.exist? this_file
        dataset_for_the_colours = YAML.load_file(this_file)
        dataset_for_the_colours.each_pair {|this_nucleotide, this_colour_to_be_used|
          i.gsub!(
            /#{this_nucleotide}/,
            Colours.send(this_colour_to_be_used, this_nucleotide)+
            rev
          )
        }
      else
        i.gsub!(/A/, "#{teal('A')}#{rev}")
        i.gsub!(/C/, "#{slateblue('C')}#{rev}")
        i.gsub!(/G/, "#{royalblue('G')}#{rev}")
        i.gsub!(/T/, "#{steelblue('T')}#{rev}")
        i.gsub!(/U/, "#{steelblue('U')}#{rev}") # Uracil is just the same as Thymine.
      end
    end
  end
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === with_ruler
    # ===================================================================== #
    when :with_ruler,
         :add_ruler,
         :ruler
      i.prepend(
        ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
        "\n"
      )
    else # Assume something like:
         #   :with_ruler_steelblue_colour
      if yielded.to_s.include? 'colo' # This assumes "colour" or "color".
        use_this_colour = yielded.to_s.sub(/_colou?r/,'').
                                       sub(/with_ruler_/,'')
        this_string = send(use_this_colour,
          ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
          "\n"
        )
        i.prepend(this_string)
      end
    end
  end
  return i
end

.colours(enable_or_disable = '+') ⇒ Object

#

Bioroebe.colours

This method can be used to quickly enable or disable colours, by passing '+' or '-'.

#

80
81
82
83
84
85
86
87
88
89
90
# File 'lib/bioroebe/colours/colours.rb', line 80

def self.colours(enable_or_disable = '+')
  case enable_or_disable.to_s
  when '+',
       'true'
    enable_colours
  when '-',
       'false',
       ''
    disable_colours
  end
end

.complement(i = nil) ⇒ Object

#

Bioroebe.complement

This method will return the complementary DNA strand.

We will use possibilities though.

Usage example:

Bioroebe.complement 'ATGGGTCCC' # => "TACCCAGGG"
#

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/bioroebe/toplevel_methods/complement.rb', line 21

def self.complement(
    i = nil
  )
  require 'bioroebe/constants/nucleotides.rb'
  # ======================================================================= #
  # Refer to the main Hash.
  # ======================================================================= #
  hash = HASH_DNA_NUCLEOTIDES
  result = ''.dup
  i = i.first if i.is_a? Array
  if i
    if File.exist?(i)
      i = File.readlines(i).join(' ').strip
    end
    i.each_char { |char|
      char = char.upcase
      if hash.has_key? char
        result << hash[char]
      else
        case char.downcase # case tag
        when 'n' # Means any.
          result << '(A/T/G/C)'
        when 'r' # Means a purine.     (larger)
          result << '(A/G)'
        when 'y' # Means a pyrimidine. (smaller)
          result << '(T/C)'
        end
      end
    }
    return result
  end
end

.complementary_dna_strand(i = ARGV) ⇒ Object

#

Bioroebe.complementary_dna_strand

This method will simply return the corresponding (complementary) DNA strand.

Usage example:

Bioroebe.complementary_dna_strand('ATCATCATC') # => "TAGTAGTAG"
#

155
156
157
# File 'lib/bioroebe/nucleotides/complementary_dna_strand.rb', line 155

def self.complementary_dna_strand(i = ARGV)
  retrn Bioroebe::ComplementaryDnaStrand.new(i).result?
end

.complementary_rna_strand(i) ⇒ Object

#

Bioroebe.complementary_rna_strand

This method will simply return the corresponding (complementary) RNA strand.

Usage example:

Bioroebe.complementary_rna_strand('ATCATCATC') # => "UAGUAGUAG"
#

33
34
35
36
37
38
39
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 33

def self.complementary_rna_strand(i)
  if i.is_a? Array
    i = i.first
  end
  hash = partner_nucleotide_hash
  i.chars.map {|entry| hash[entry] }.join.tr('T','U')
end

.compseq(i = ARGV) ⇒ Object

#

Bioroebe.compseq

#

512
513
514
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 512

def self.compseq(i = ARGV)
  Bioroebe::Compseq.new(i) { :disable_colours_and_be_quiet }.result_as_string?
end

.convert_global_env(i) ⇒ Object

#

Bioroebe.convert_global_env

Note that the method will pick only the first argument given to it if an Array is supplied.

#

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/bioroebe/toplevel_methods/convert_global_env.rb', line 16

def self.convert_global_env(i)
  if i.is_a? Array
    i = i.first
  end
  unless Object.const_defined? :ConvertGlobalEnv
    begin # Require an external gem in this case.
      require 'convert_global_env'
    rescue LoadError; end
  end
  if Object.const_defined? :ConvertGlobalEnv
    if i and !i.start_with?('$')
      i = i.dup if i.frozen?
      i.prepend('$')
    end
    return ConvertGlobalEnv.convert(i, :do_not_report_errors) # Handle ENV variables.
  end
  return i
end

.convert_one_letter_to_full(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_full

Convert one aminoacid to the real name.

Usage example:

Bioroebe.convert_one_letter_to_full('T') # => "threonine"
#

207
208
209
210
211
212
213
214
215
216
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 207

def self.convert_one_letter_to_full(i)
  if i.is_a? Array
    i.each {|entry| convert_one_letter_to_full(entry) }
  else
    i = i.to_s.downcase # need it to be downcased.
    three_letters = convert_one_letter_to_three(i)
    i = AMINO_ACIDS_ABBREVIATIONS[three_letters]
    return i
  end
end

.convert_one_letter_to_three(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_three

Convert a one-letter-code for an aminoacid into the slightly longer three-letter-code variant for that particular aminoacid.

Note that this method will return the result in a downcased variant, such as “gly” for “glycine”.

Returns:

A string of three characters, if it is a valid one-letter aminoacid.

Usage example for an aminoacid such as Glycine:

Bioroebe.convert_one_letter_to_three('G') # => "gly"
#

519
520
521
# File 'lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb', line 519

def self.convert_one_letter_to_three(i)
  AMINO_ACIDS_THREE_TO_ONE.invert[i.upcase]
end

.convert_this_codon_to_that_aminoacid(i = ARGV, shall_we_replace_stop_codons = :default, use_this_codon_table = codon_table_dataset?.dup) ⇒ Object

#

Bioroebe.convert_this_codon_to_that_aminoacid

This method can be used to convert a particular codon, such as 'ATG', into the corresponding aminoacid, in this case being 'M' for Methionin. A String will be returned by this method.

The characters in that String will correspond to the one-letter format of aminoacids, such as the example shown above 'M' for Methionin.

The method has to handle invalid input arguments as well, to some extent.

The method supports both DNA and mRNA as “input”.

In order for the method to do its job properly, the method will make use of the codon table stored in:

Bioroebe.aa_to_rna_codon_table

Also note that this method can easily be used to translate other frames - simply provide the proper input to it.

Note that this method will return the result - it will NOT output the result.

Usage examples:

puts Bioroebe.codon_to_aminoacid('ATG') # => "M"
puts Bioroebe.codon_to_aminoacid('GUG') # => "V"
puts Bioroebe.decode_this_codon('CCG')  # => "P"
puts Bioroebe.codon_to_aminoacid('GUGGUG') # => "VV"
puts Bioroebe.codon_to_aminoacid('ATGA') # => "M"
puts Bioroebe.convert_dna_to_protein_sequence('ATGA') # => "M"
puts Bioroebe.convert_this_codon_to_that_aminoacid('ATGA') # => "M"
#

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb', line 49

def self.convert_this_codon_to_that_aminoacid(
    i                            = ARGV,
    shall_we_replace_stop_codons = :default,
    use_this_codon_table         = codon_table_dataset?.dup
  )
  case shall_we_replace_stop_codons
  when :default
    shall_we_replace_stop_codons = true
  end
  if i.is_a? Array
    i = i.join
  end
  i = i.to_s.dup
  i.upcase!
  i.tr!('U','T') # We need DNA here, so U becomes T.
  # ======================================================================= #
  # Obtain the dataset from the correct codon_table next.
  # ======================================================================= #
  scanned = i.scan(/.../)
  scanned.map! {|potential_codon|
    if use_this_codon_table.has_key? potential_codon
      potential_codon = use_this_codon_table[potential_codon]
    end
    # ===================================================================== #
    # Get rid of stop codons. This works because 'O' is not
    # a valid aminoacid.
    # ===================================================================== #
    if potential_codon.include? 'STOP'
      potential_codon.gsub!(/STOP/, '*') if shall_we_replace_stop_codons
    end
    potential_codon
  }
  result = scanned
  result = result.join if result.is_a? Array
  return result
end

.count_amount_of_aminoacids(i) ⇒ Object

#

Bioroebe.count_amount_of_aminoacids

#

342
343
344
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 342

def self.count_amount_of_aminoacids(i)
  CountAmountOfAminoacids.new(i)
end

.count_amount_of_nucleotides(i) ⇒ Object

#

Bioroebe.count_amount_of_nucleotides

This method will always return the result in the form of a single line. The order is: A C G T

This can also be used to solve a problem listed at Rosalind.

Invocation examples:

Bioroebe.count_amount_of_nucleotides 'AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC' => "20 17 12 21"
Bioroebe.count_amount_of_nucleotides File.read('/rosalind_dna.txt').strip
#

483
484
485
486
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 483

def self.count_amount_of_nucleotides(i)
  _ = ::Bioroebe::CountAmountOfNucleotides.new(i, :do_not_run_yet) { :display_short_form }
  _.return_the_amount_of_nucleotides_in_short_form_on_a_single_line
end

.count_AT(i = ARGV) ⇒ Object

#

Bioroebe.count_AT

This method will count how characters in a given String are “A” or “T”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “A” or a “T” in that string, we will “add” +1 to the number that will be returned by that method.

Usage example:

Bioroebe.count_AT 'ATTATATACCGCGCCCATATAAA' # => 15
#

25
26
27
28
29
# File 'lib/bioroebe/count/count_at.rb', line 25

def self.count_AT(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('A')+
  i.upcase.count('T')
end

.count_GC(i = ARGV) ⇒ Object

#

Bioroebe.count_GC

This method will count how characters in a given String are “G” or “C”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “G” or a “C” in that string, we will “add” +1 to the number that will be returned by that method.

Specific usage examples:

Bioroebe.count_GC 'ATTATTATGGCCAATATA' # => 4
Bioroebe.count_GC 'ATG' # => 1
#

27
28
29
30
31
# File 'lib/bioroebe/count/count_gc.rb', line 27

def self.count_GC(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('G')+
  i.upcase.count('C')
end

.create_file(i) ⇒ Object

#

Bioroebe.create_file

This method can be used to create a file.

#

61
62
63
# File 'lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb', line 61

def self.create_file(i)
  FileUtils.touch(i) unless File.file?(i)
end

.create_jar_archiveObject

#

Bioroebe.create_jar_archive

This method will create a .jar file.

To invoke it from the commandline do:

bioroebe --jar

To execute a .jar file do:

java -jar foobar.jar
#

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 33

def self.create_jar_archive
  e 'Creating a .jar archive next:'
  e
  original_dir = return_pwd
  cd '/home/x/programming/ruby/src/bioroebe/lib/bioroebe/java/bioroebe/src/main/java/'
  esystem 'jar cf bioroebe.jar bioroebe/'
  target_file = File.absolute_path('bioroebe.jar')
  cd original_dir
  if File.exist? target_file
    e 'Moving the created .jar file into the current working '\
      'directory next.'
    move_file(target_file, original_dir)
    e 'It should now be at:'
    e
    e sfile("  #{original_dir}#{File.basename(target_file)}")
    e
  end
#   esystem 'jar cfe bioroebe.jar myClass myClass.class'
  e
end

.create_new_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.create_new_sequence

Create a new Bioroebe::Sequence object. It will also assign to the @sequence module-level instance variable.

#

716
717
718
# File 'lib/bioroebe/sequence/sequence.rb', line 716

def self.create_new_sequence(i = ARGV, &block)
  @sequence = ::Bioroebe::Sequence.new(i, &block)
end

.create_random_aminoacids(how_many_aminoacids = CREATE_N_AMINOACIDS, split_at = nil, be_verbose = false, &block) ⇒ Object

#

Bioroebe.create_random_aminoacids

This method will create a random chain of aminoacids.

The first argument to this method shall denote how many aminoacids are to be generated, e. g. 25 would mean to create “25 aminoacids”.

If the second argument, called `split_at`, is not nil and is a number, then this method we add a newline into the returned String.

This method will return a String, consisting of the random aminoacids.

Usage Examples:

Bioroebe.create_random_aminoacids 125
Bioroebe.create_random_aminoacids  25 # => "SQHWVGGGVSRCWLMWAPECMYVWW"
Bioroebe.create_random_aminoacids  15 # => "CLKHMLMGLVAEEKA"
Bioroebe.random_aminoacids(5) # => "STRRM"
Bioroebe.random_aminoacids(8) # => "TRTQHSNN"s
#

200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/bioroebe/aminoacids/create_random_aminoacids.rb', line 200

def self.create_random_aminoacids(
    how_many_aminoacids = CREATE_N_AMINOACIDS,
    split_at            = nil,
    be_verbose          = false,
    &block
  )
  _ = ::Bioroebe::CreateRandomAminoacids.new(
        how_many_aminoacids,
        split_at,
        be_verbose,
        &block
      )
  return _.amino_acid_sequence # ← And return the aminoacid sequence here.
end

.create_the_pdf_tutorial(read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md', store_where = '/Depot/j/example.pdf') ⇒ Object

#

Bioroebe.create_the_pdf_tutorial

This method can be used to quickly turn the README.md file into a .pdf file, for whatever the reason the user may want this.

#

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 392

def self.create_the_pdf_tutorial(
    read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md',
    store_where         = '/Depot/j/example.pdf'
  )

  require 'prawn'

  Prawn::Fonts::AFM.hide_m17n_warning = true # Hide a useless warning.

  pdf = Prawn::Document.new(
          page_size: 'A4',
          page_layout: :landscape
        )
  pdf.text "The Bioroebe Project", size: 80
  pdf.start_new_page
  pdf.bounding_box [50, 600], width: 200 do
    pdf.fill_color '000000'
    pdf.text "making bioinformatics great again:", size: 15
  end
  pdf.start_new_page
  dataset = File.read(read_from_this_file, encoding: UTF_ENCODING)
  dataset = dataset.encode("Windows-1252", invalid: :replace, undef: :replace)

  pdf.text(dataset)
  e 'Storing at this location: '+store_where
  pdf.render_file store_where
end

.decode_this_aminoacid_sequence(i = 'KKKA') ⇒ Object

#

Bioroebe.decode_this_aminoacid_sequence

This method can be used as means to decode an aminoacid sequence, such as a String like 'KKKA'.

The input to this method may also be in the form of an Array, such as ['K','K','K','A']. Only valid one-letter aminoacids will be honoured by this method; invalid letters will be silently dropped.

After that, this method will replace all valid letters, that is valid aminoacids (in single letter code), with the corresponding codon. It will return all possibilities.

Invocation example:

Bioroebe.decode_this_aminoacid_sequence('KKKA') # => [["AAG", "AAA"], ["AAG", "AAA"], ["AAG", "AAA"], ["GCT", "GCC", "GCA", "GCG"]]
#

385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/bioroebe/codons/codons.rb', line 385

def self.decode_this_aminoacid_sequence(
    i = 'KKKA'
  )
  if i.is_a? Array
    i = i.join
  end
  if i.is_a? String
    # ===================================================================== #
    # We may have a 3-letter code too, so check for that first.
    # ===================================================================== #
    if i.include? '-'
      i = i.split('-').map {|entry| ::Bioroebe.three_to_one(entry) }
    else
      i = i.split(//)
    end
  end
  i = [i].flatten.map {|entry|
    ::Bioroebe.decode_this_aminoacid(entry)
  }
  return i
end

.deduce_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_aminoacid_sequence

#

439
440
441
442
443
# File 'lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb', line 439

def self.deduce_aminoacid_sequence(
    from_this_sequence = :default
  )
  Bioroebe::DeduceAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence

#

144
145
146
# File 'lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb', line 144

def self.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default)
  Bioroebe::MostLikelyNucleotideSequenceForThisAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence_as_string(i, use_this_codon_tables_frequencies = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string

This method will attempt to deduce the most likely aminoacid sequence for a given protein, as a String.

Usage example:

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string('KKKA') # => "AAGAAGAAGGCC"
#

452
453
454
455
456
457
458
459
460
461
# File 'lib/bioroebe/codons/codons.rb', line 452

def self.deduce_most_likely_aminoacid_sequence_as_string(
    i, use_this_codon_tables_frequencies = :default
  )
  result = return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
    i, 
    use_this_codon_tables_frequencies
  )
  result = result.join if result.is_a? Array
  return result
end

.default_colour?Boolean

#

Bioroebe.default_colour?

#

Returns:

  • (Boolean)

45
46
47
# File 'lib/bioroebe/colours/colours.rb', line 45

def self.default_colour?
  @default_colour
end

.delimiter?Boolean

#

Bioroebe.delimiter?

This is simply the delimiter used for reading “multiline input” of the Bioroebe::Shell component.

#

Returns:

  • (Boolean)

15
16
17
# File 'lib/bioroebe/toplevel_methods/delimiter.rb', line 15

def self.delimiter?
  '___'
end

.determine_n_glycosylation_matches(of_this_protein_sequence = 'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\ 'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\ 'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\ 'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\ 'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\ 'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\ 'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\ 'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK') ⇒ Object

#

Bioroebe.determine_n_glycosylation_matches

This method can be used to determine N-Glycosylation patterns in a protein.

The input to this method should be an aminoacid chain - aka a protein sequence.

This method will return an Array. This Array holds the indices where a N-glycosylation pattern begins.

Usage example:

Bioroebe.determine_n_glycosylation_matches # => [85, 118, 142, 306, 395]
#

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/bioroebe/toplevel_methods/matches.rb', line 36

def self.determine_n_glycosylation_matches(
    of_this_protein_sequence =
      'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\
      'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\
      'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\
      'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\
      'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\
      'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\
      'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\
      'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK'
  )
  if of_this_protein_sequence.is_a? Array
    of_this_protein_sequence.each {|this_sequence|
      determine_n_glycosylation_matches(this_sequence)
    }
  else
    scanned = of_this_protein_sequence.scan(
      REGEX_FOR_N_GLYCOSYLATION_PATTERN
    )
    scanned.flatten.uniq.map {|substring|
      of_this_protein_sequence.index(substring)+1 # +1 because ruby starts at 0.
    }
  end
end

.determine_start_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_start_codons_from_the_codon_table

#

61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/bioroebe/codons/codons.rb', line 61

def self.determine_start_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    key == 'START' # '*' refers to a stop codon.
  }
  use_these_start_codons = this_codon_table_dataset.values
  if use_these_start_codons.is_a? Array
    use_these_start_codons = use_these_start_codons.first
  end
  set_start_codon(
    use_these_start_codons
  )
end

.determine_stop_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_stop_codons_from_the_codon_table

This method will determine the stop codons in use for the given species/organism, depending on the proper codon table.

#

45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bioroebe/codons/codons.rb', line 45

def self.determine_stop_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    value == '*' # '*' refers to a stop codon.
  }
  use_these_stop_codons = this_codon_table_dataset.keys
  set_stop_codons(
    use_these_stop_codons
  )
end

.digest_this_dna(this_DNA_sequence, hash = {}) ⇒ Object

#

Bioroebe.digest_this_dna

Usage examples:

Bioroebe.digest_this_dna(:lambda_genome, with: :EcoRI)
Bioroebe.digest_this_dna("/root/Bioroebe/fasta/NC_001416.1_Enterobacteria_phage_lambda_complete_genome.fasta", with: :EcoRI)
#

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/bioroebe/toplevel_methods/digest.rb', line 18

def self.digest_this_dna(
    this_DNA_sequence, hash = {}
  )
  require 'bioroebe/toplevel_methods/e.rb'
  require 'bioroebe/constants/files_and_directories.rb'
  require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
  restriction_enzymes = Bioroebe.load_and_return_the_restriction_enzymes
  this_restriction_enzyme = nil
  nucleotide_sequence = nil
  if this_DNA_sequence.is_a? Array
    this_DNA_sequence = this_DNA_sequence.first
  end
  if this_DNA_sequence.is_a?(String) and File.exist?(this_DNA_sequence)
    nucleotide_sequence = ::Bioroebe::ParseFasta.new(this_DNA_sequence).sequence?
  end
  # ======================================================================= #
  # === Handle the hash next (and ensure that it is a Hash)
  # ======================================================================= #
  if hash.is_a? Hash
    # ===================================================================== #
    # === :with
    # ===================================================================== #
    if hash.has_key? :with
      this_restriction_enzyme = hash.delete(:with).to_s
    end
  end
  target_sequence = restriction_enzymes[this_restriction_enzyme].dup
  if target_sequence =~ /\d$/ # If it ends with a number.
    target_sequence.chop!
    target_sequence.strip!
  end
  if nucleotide_sequence.include?(target_sequence)
    e 'Yes, this sequence is there.'
    scanned = nucleotide_sequence.scan(
      /#{target_sequence}/
    )
    e "It can be found #{scanned.size.to_s} times."
    sub_sequences = nucleotide_sequence.split(/#{target_sequence}/)
    sub_sequences.sort_by {|entry| entry.size }.reverse.each {|sequence|
      pp sequence.size
    }
  else
    e 'Nothing found.'
  end
end

.directory_frequencies?(codon_tables_directory = CODON_TABLES_DIRECTORY) ⇒ Boolean

#

Bioroebe.directory_frequencies?

Preferentially use this method past the year 2022 - it is a tiny bit more flexible than the above constant.

#

Returns:

  • (Boolean)

133
134
135
136
137
# File 'lib/bioroebe/constants/files_and_directories.rb', line 133

def self.directory_frequencies?(
    codon_tables_directory = CODON_TABLES_DIRECTORY
  )
  "#{codon_tables_directory}frequencies/"
end

.disable_colours(be_verbose = false) ⇒ Object

#

Bioroebe.disable_colours

Use this method if you wish to disable colours for the whole Bioroebe project.

#

98
99
100
101
102
103
# File 'lib/bioroebe/colours/colours.rb', line 98

def self.disable_colours(be_verbose = false)
  if be_verbose
    e 'Disabling colours.'
  end
  @use_colours = false
end

.display_all_open_reading_frames_from_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.display_all_open_reading_frames_from_this_sequence

#

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/bioroebe/toplevel_methods/open_reading_frames.rb', line 173

def self.display_all_open_reading_frames_from_this_sequence(i = ARGV)
  require 'bioroebe/colours/colours.rb'
  if i.empty?
    array = Bioroebe.return_all_open_reading_frames_from_this_sequence
    pp array
    pp Bioroebe.to_aa(array)
  else
    this_sequence = i
    array = return_all_open_reading_frames_from_this_sequence(this_sequence)
    this_sequence = this_sequence.join
    if array.empty?
      e "No open reading from has been found from "\
        "this sequence: #{this_sequence}"
    else
      e rev+
        'The following ORFs have been found in this sequence: '
      e
      e "  #{Colours.lightgreen(this_sequence)}"
      e
      array.each_with_index {|sequence, index| index += 1
        name_for_the_ORF = "ORF number #{index}"
        e "  #{Colours.steelblue(sequence.ljust(50))} "\
          "#{Colours.lightslategrey('#')} "\
          "#{Colours.mediumseagreen(name_for_the_ORF)}"
      }
      e
    end
  end
end

.dna_sequence(i) ⇒ Object

#

Bioroebe.dna_sequence

Usage example:

dna = Bioroebe.dna_sequence('ATTCGGU')
#

199
200
201
202
203
# File 'lib/bioroebe/sequence/dna.rb', line 199

def self.dna_sequence(i)
  i = i.first if i.is_a? Array
  i.delete!('U') # Reject Uracil there.
  ::Bioroebe::DNA.new(i)
end

.dna_to_aminoacid_sequence(i = ARGV) ⇒ Object

#

Bioroebe.dna_to_aminoacid_sequence

Usage example:

Bioroebe.dna_to_aminoacid_sequence('ATGGGGCCC') # => "MGP"
#

533
534
535
536
537
# File 'lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb', line 533

def self.dna_to_aminoacid_sequence(
    i = ARGV
  )
  ::Bioroebe::DnaToAminoacidSequence.new(i) { :be_quiet }.sequence?
end

.do_not_truncateObject

#

Bioroebe.do_not_truncate

Do not truncate any “too long” output. This method disable the truncate-functionality.

#

44
45
46
# File 'lib/bioroebe/toplevel_methods/truncate.rb', line 44

def self.do_not_truncate
  @truncate = false
end

.do_truncateObject

#

Bioroebe.do_truncate

#

34
35
36
# File 'lib/bioroebe/toplevel_methods/truncate.rb', line 34

def self.do_truncate
  @truncate = true
end

.dotplot_array(dna_x, dna_y) ⇒ Object

#

Bioroebe.dotplot_array

This method can be used to return a 2D dotplot-array of two input sequences. Be careful with large data as input - the RAM usage may go up, so this method has NOT been optimized for such situations. It is deliberately kept simple.

#

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/bioroebe/dotplots/advanced_dotplot.rb', line 215

def self.dotplot_array(dna_x, dna_y)
  dotplot_matrix = Array.new(
    dna_y.size, Array.new(dna_x.size, 0)
  )
  dotplot_matrix = Array.new(dna_y.size) {
    Array.new(dna_x.size) { 0 }
  }
  dna_x.chars.each_with_index {|x_value, x_index|
    # ===================================================================== #
    # Next, we work from top-to-bottom.
    # ===================================================================== #
    dna_y.chars.each_with_index {|y_value, y_index|
      if x_value == y_value
        dotplot_matrix[y_index][x_index] = 1
      end
    }
  }
  return dotplot_matrix
end

.downcase_chunked_display(i, group_together_n_nucleotides = 10) ⇒ Object

#

Bioroebe.downcase_chunked_display

This is similar to the regular chunked display, but will return the nucleotides in a downcased manner, aka “A” will become “a” and so forth.

In the past this functionality resided in its own .rb file, but since as of March 2020 a bin/ executable was added, so that the functionality can be more easily called when the bioroebe gem is installed.

Usage example:

Bioroebe.downcase_chunked_display 'ATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCA'
#

78
79
80
81
82
83
84
# File 'lib/bioroebe/toplevel_methods/chunked_display.rb', line 78

def self.downcase_chunked_display(
    i,
    group_together_n_nucleotides = 10
  )
  sequence = ::Bioroebe.return_chunked_display(i, group_together_n_nucleotides).downcase
  return sequence
end

.download(from_these_URLs) ⇒ Object

#

Bioroebe.download

#

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/bioroebe/toplevel_methods/download_and_fetch_data.rb', line 114

def self.download(
    from_these_URLs
  )
  require 'open-uri'
  array_these_urls = [from_these_URLs].flatten.compact
  array_these_urls.each {|remote_url|
    # ===================================================================== #
    # First, we must determine the remote file listing here.
    # Due to convenience we will simply use curl here.
    # ===================================================================== #
    cmd = "curl -s \"#{remote_url}\" --list-only"
    # e cmd
    remote_files = `#{cmd}`.split("\n")
    remote_files.each {|this_remote_file|
      target = remote_url+this_remote_file
      e "Downloading `#{this_remote_file}` next. '"\
        "(Full target: '#{target})"
      wget_download(target)
    }
  }
end

.download_directory?Boolean

#

Bioroebe.download_directory?

#

Returns:

  • (Boolean)

77
78
79
# File 'lib/bioroebe/toplevel_methods/log_directory.rb', line 77

def self.download_directory?
  "#{@log_directory}Downloads/"
end

.download_fasta(i) ⇒ Object

#

Bioroebe.download_fasta

Easier wrapper-method to download fasta files.

#

239
240
241
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 239

def self.download_fasta(i)
  ::Bioroebe::DownloadFasta.new(i).location?
end

.download_human_genome(from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz') ⇒ Object

#

Bioroebe.download_human_genome

#

298
299
300
301
302
303
304
305
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 298

def self.download_human_genome(
    from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz'
  )
  esystem "wget #{from_this_URL}"
  extract(
    File.basename(from_this_URL)
  )
end

.download_taxonomy_database(i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE) ⇒ Object

#

Bioroebe.download_taxonomy_database

#

92
93
94
95
96
# File 'lib/bioroebe/databases/download_taxonomy_database.rb', line 92

def self.download_taxonomy_database(
    i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE
  )
  DownloadTaxonomyDatabase.new(i)
end

.download_this_pdb(i = '355D') ⇒ Object

#

Bioroebe.download_this_pdb

This method can be used to download a remote .pdb file to the local file-system. If the default pdb/ directory exists as well locally then the downloaded .pdb file will be relocated into that file.

An example for a remote URL to a .pdb file would be:

https://files.rcsb.org/view/2BTS.pdb
https://files.rcsb.org/view/355D.pdb
#

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/bioroebe/pdb/download_this_pdb.rb', line 30

def self.download_this_pdb(
    i = '355D'
  )
  # ======================================================================= #
  # Treat all input as an Array past the next point.
  # ======================================================================= #
  [i].flatten.compact.each {|this_entry|
    if this_entry.frozen?
      this_entry = this_entry.dup
    end
    if this_entry.end_with? '.pdb' # This will lateron be appended again anyway.
      this_entry.sub!(/\.pdb$/,'')
    end
    this_entry.upcase! # For convenience.
    unless this_entry.end_with? '.pdb'
      this_entry << '.pdb'
    end
    e this_entry
    # ===================================================================== #
    # Build up our remote URL next:
    # ===================================================================== #
    remote_url = "https://files.rcsb.org/view/#{this_entry}"
    e steelblue(remote_url)
    esystem "wget #{remote_url}"
    _ = File.basename(remote_url)
    if File.exist? _
      ::Bioroebe.move_file_to_its_correct_location(_)
    end
  }
end

.e(i = '') ⇒ Object

#

Bioroebe.e

#

12
13
14
# File 'lib/bioroebe/toplevel_methods/e.rb', line 12

def self.e(i = '')
  puts i
end

.editor?Boolean

#

Bioroebe.editor?

#

Returns:

  • (Boolean)

12
13
14
15
# File 'lib/bioroebe/toplevel_methods/editor.rb', line 12

def self.editor?
  require 'bioroebe/configuration/constants.rb'
  ::Bioroebe::Configuration::DEFAULT_EDITOR_TO_USE
end

.embeddable_interfaceObject

#

Bioroebe.embeddable_interface

#

729
730
731
732
733
# File 'lib/bioroebe/www/embeddable_interface.rb', line 729

def self.embeddable_interface
  object = Object.new
  object.extend(::Bioroebe::EmbeddableInterface)
  return object
end

.enable_coloursObject

#

Bioroebe.enable_colours

Use this method to enable colours for the whole Bioroebe project.

All classes that are part of the Bioroebe project should honour this setting (if it is a class that may make use of colours; some smaller classes do not need colours, and hence have no need for the method here).

#

115
116
117
# File 'lib/bioroebe/colours/colours.rb', line 115

def self.enable_colours
  @use_colours = true
end

.ensure_that_the_base_directories_existObject

#

Bioroebe.ensure_that_the_base_directories_exist

This method will ensure that the base directory for the Bioroebe project exist.

#

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb', line 209

def self.ensure_that_the_base_directories_exist
  # ======================================================================= #
  # We also need to create the temp directory, as well as having to
  # notify the user that this will be done. The taxonomy subdirectory
  # will also be created.
  # ======================================================================= #
  use_this_log_dir = log_dir?
  unless File.exist? use_this_log_dir
    erev "The base directory at `#{sdir(use_this_log_dir)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir use_this_log_dir
  end
  # ======================================================================= #
  # === Ensure that the Downloads/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}Downloads/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  # ======================================================================= #
  # === Ensure that the pdb/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}pdb/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  autogenerated_sql_files_dir =
    AUTOGENERATED_SQL_FILES_DIR
  unless Dir.exist? autogenerated_sql_files_dir
    erev 'The directory at `'+sdir(autogenerated_sql_files_dir)+
         rev+'` does not exist.'
    erev 'It will thus be created next.'
    mkdir(autogenerated_sql_files_dir)
  end
end

.erev(i = '') ⇒ Object

#

Bioroebe.erev

#

37
38
39
# File 'lib/bioroebe/colours/rev.rb', line 37

def self.erev(i = '')
  puts "#{rev}#{i}"
end

.esystem(i) ⇒ Object

#

Bioroebe.esystem

#

12
13
14
15
# File 'lib/bioroebe/toplevel_methods/esystem.rb', line 12

def self.esystem(i)
  puts i.to_s
  system i.to_s
end

.every_reverse_palindrome_in_this_string(i = 'TCAATGCATGCGGGTCTATATGCAT', min_length = 4, max_length = 12) ⇒ Object

#

Bioroebe.every_reverse_palindrome_in_this_string

This method can return every reverse palindrome in the given input String.

The output will be an Array such as this:

[[4, 6], [5, 4], [6, 6], [7, 4], [17, 4], [18, 4], [4, 6], [5, 4]]
#

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/bioroebe/toplevel_methods/palindromes.rb', line 34

def self.every_reverse_palindrome_in_this_string(
    i          = 'TCAATGCATGCGGGTCTATATGCAT',
    min_length =  4,
    max_length = 12
  )
  require 'bioroebe/sequence/reverse_complement.rb'
  if i.is_a? Array # Arrays will become Strings - or rather, whatever is the first argument.
    i = i.first
  end
  if i and File.exist?(i)
    i = File.readlines(i).reject {|entry|
      entry.start_with?('>')
    }.map {|inner_entry| inner_entry.strip }.join
  end
  # ======================================================================= #
  # How do we find all subsequences that are relevant? Well - we
  # need to find all the sequences between min_length and
  # max_length, e. g. 4 and 12.
  # ======================================================================= #
  string = i.dup
  array_containing_starting_index_and_length_of_reverse_palindromes = []
  i.size.times {
    substrings = return_every_substring_from_this_sequence(string)
    substrings.each {|entry|
      next if entry.size > max_length
      if (entry.size >= min_length) and
         (Bioroebe.reverse_complement(entry) == entry)
        array_containing_starting_index_and_length_of_reverse_palindromes << 
          [i.index(entry)+1, entry.size]
      end
    }
    string[0,1] = ''
  }
  return array_containing_starting_index_and_length_of_reverse_palindromes
end

.ewarn(i = '') ⇒ Object

#

Bioroebe.swarn

#

24
25
26
27
# File 'lib/bioroebe/colours/swarn.rb', line 24

def self.ewarn(i = '')
  require 'bioroebe/toplevel_methods/e.rb'
  e swarn(i)
end

.extract(i = ARGV) ⇒ Object

#

Bioroebe.extract

This method can be used to quickly extract a local archive.

#

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/bioroebe/toplevel_methods/extract.rb', line 18

def self.extract(
    i = ARGV
  )
  require 'bioroebe/colours/sfancy.rb'
  require 'bioroebe/colours/swarn.rb'
  if i.is_a? Array
    i = i.join(' ').strip
  end
  unless i.include?('/')
    unless File.exist? i
      i = return_pwd+
          File.basename(i)
    end
  end
  if File.exist? i
    case i
    when /bz2$/
      _ = "tar -xfv #{i}"
    when /xz$/
      _ = "tar -xvf #{i}"
    end
    if be_verbose?
      e "Now extracting `#{sfancy((i).squeeze('/'))}`."
      esystem(_)
      e 'Done extracting!'
    else
      system _
    end
  else
    ewarn "Can not extract #{sfile(i)} because it does "\
          "not appear to exist."
  end
end

.fasta_dir?Boolean

#

Bioroebe.fasta_dir?

#

Returns:

  • (Boolean)

170
171
172
# File 'lib/bioroebe/constants/files_and_directories.rb', line 170

def self.fasta_dir?
  "#{Bioroebe.log_dir?}fasta/"
end

.fasta_directory?Boolean

#

Bioroebe.fasta_directory?

This method will return a path such as “/root/Bioroebe/fasta/”.

#

Returns:

  • (Boolean)

99
100
101
# File 'lib/bioroebe/toplevel_methods/log_directory.rb', line 99

def self.fasta_directory?
  "#{::Bioroebe.log_directory?}fasta/"
end

.fetch_data_from_uniprot(i = 'B5ZC00', do_perform_rename_action = true) ⇒ Object

#

Bioroebe.fetch_data_from_uniprot

The first argument to this method should be the name of the protein at hand. Alternatively it can also be just the full URL.

The second argument specifies whether we will automatically try to rename this FASTA file. By default this is enabled, largely because it makes it a bit easier to know what a particular fasta sequence contains.

#

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/bioroebe/toplevel_methods/download_and_fetch_data.rb', line 27

def self.fetch_data_from_uniprot(
    i = 'B5ZC00',
    do_perform_rename_action = true
  )
  require 'open-uri'
  # ======================================================================= #
  # Support Array as input:
  # ======================================================================= #
  if i.is_a? Array
    i.each {|entry|
      fetch_data_from_uniprot(entry, do_perform_rename_action)
    }
  else
    if i.frozen?
      i = i.dup
    end
    i.strip!
    if i.start_with? 'http'
      remote_url = i
    else
      remote_url = "https://www.uniprot.org/uniprot/#{i}".dup
    end
    # ======================================================================= #
    # Next, we will append a trailing ".fasta" part for uniprot, if
    # the very last part does NOT include a '.' character.
    # ======================================================================= #
    _ = File.basename(remote_url)
    unless _.include?('.')
      remote_url << '.fasta' unless remote_url.end_with? '.fasta'
    end
    # ======================================================================= #
    # Store this protein sequence at an appropriate location:
    # ======================================================================= #
    e
    erev "Trying to obtain remote data from "\
         "#{sfancy(remote_url)} #{rev}next:"
    e
    store_here = LOCAL_DIRECTORY_FOR_UNIPROT+
                 File.basename(remote_url)
    # ======================================================================= #
    # Next create the directory unless it already exists.
    # ======================================================================= #
    unless File.directory? LOCAL_DIRECTORY_FOR_UNIPROT
      mkdir LOCAL_DIRECTORY_FOR_UNIPROT
    end
    dataset = URI.open(remote_url).read
    e "The remote dataset will be stored here: "\
      "#{sfile(store_here)}"
    e
    write_what_into(dataset, store_here)
    # ======================================================================= #
    # Next comes a rename action - this will make the .fasta file more
    # verbose, but I think this may still be helpful, in particular if you
    # have a lot of .fasta files. There is a switch that controls this
    # setting though.
    # ======================================================================= #
    if do_perform_rename_action
      # ===================================================================== #
      # The initially assumed filename may be like this:
      #
      #   /root/Bioroebe/A2Z669_CSPLT_ORYSI_CASP-like_protein_5A2_OS=Oryza_sativa_subsp_indica_OX=39946_GN=OsI_33147_PE=3_SV=1.fasta
      #
      # ===================================================================== #
      assumed_filename = ::Bioroebe.return_new_filename_based_on_fasta_identifier(store_here)
      new_filename = LOCAL_DIRECTORY_FOR_UNIPROT+
                     File.basename(assumed_filename)
      erev "Next renaming `#{sfile(store_here)}#{rev}` to"
      erev "  `#{sfile(new_filename)}`."
      ::Bioroebe.mv(store_here, new_filename)
      return new_filename
    end
    return store_here
  end
end

.fetch_fasta_sequence_from_pdb(i = ARGV) ⇒ Object

#

Bioroebe.fetch_fasta_sequence_from_pdb

#

126
127
128
# File 'lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb', line 126

def self.fetch_fasta_sequence_from_pdb(i = ARGV)
  Bioroebe::FetchFastaSequenceFromPdb.new(i)
end

.file_amino_acidsObject

#

Bioroebe.file_amino_acids

#

86
87
88
# File 'lib/bioroebe/constants/files_and_directories.rb', line 86

def self.file_amino_acids
  FILE_AMINO_ACIDS
end

.file_amino_acids_abbreviationsObject

#

Bioroebe.file_amino_acids_abbreviations

#

99
100
101
# File 'lib/bioroebe/constants/files_and_directories.rb', line 99

def self.file_amino_acids_abbreviations
  FILE_AMINO_ACIDS_ABBREVIATIONS
end

.file_amino_acids_frequencyObject

#

Bioroebe.file_amino_acids_frequency

#

279
280
281
# File 'lib/bioroebe/constants/files_and_directories.rb', line 279

def self.file_amino_acids_frequency
  "#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
end

.file_amino_acids_long_name_to_one_letterObject

#

Bioroebe.file_amino_acids_long_name_to_one_letter

This method will return a String such as:

"/home/Programs/Ruby/3.1.2/lib/ruby/site_ruby/3.1.0/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml"
#

579
580
581
# File 'lib/bioroebe/constants/files_and_directories.rb', line 579

def self.file_amino_acids_long_name_to_one_letter
  "#{project_yaml_directory?}aminoacids/amino_acids_long_name_to_one_letter.yml"
end

.file_fastq_quality_schemesObject

#

Bioroebe.file_fastq_quality_schemes

This constant will point to a location such as this one here:

/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/fastq_quality_schemes.yml
#

182
183
184
# File 'lib/bioroebe/constants/files_and_directories.rb', line 182

def self.file_fastq_quality_schemes
  "#{project_yaml_directory?}fasta_and_fastq/fastq_quality_schemes.yml"
end

.file_molecular_weightObject

#

Bioroebe.file_molecular_weight

#

567
568
569
# File 'lib/bioroebe/constants/files_and_directories.rb', line 567

def self.file_molecular_weight
  "#{project_yaml_directory?}aminoacids/molecular_weight.yml"
end

.file_restriction_enzymesObject

#

Bioroebe.file_restriction_enzymes

#

329
330
331
# File 'lib/bioroebe/constants/files_and_directories.rb', line 329

def self.file_restriction_enzymes
  FILE_RESTRICTION_ENZYMES
end

.file_statistics?Boolean

#

Bioroebe.file_statistics?

This file can normally be found here:

$BIOROEBE/yaml/statistics.yml
#

Returns:

  • (Boolean)

163
164
165
# File 'lib/bioroebe/constants/files_and_directories.rb', line 163

def self.file_statistics?
  "#{Bioroebe.log_dir?}statistics.yml"
end

.file_talensObject

#

Bioroebe.file_talens

#

189
190
191
# File 'lib/bioroebe/constants/files_and_directories.rb', line 189

def self.file_talens
  "#{project_yaml_directory?}talens.yml"
end

.filter_away_invalid_aminoacids(i) ⇒ Object

#

Bioroebe.filter_away_invalid_aminoacids

Usage example:

Bioroebe.filter_away_invalid_aminoacids('ATMÜ') # => "ATM"
#

95
96
97
98
# File 'lib/bioroebe/constants/aminoacids_and_proteins.rb', line 95

def self.filter_away_invalid_aminoacids(i)
  array_that_is_allowed = all_aminoacids?
  return i.chars.select {|entry| array_that_is_allowed.include? entry }.join
end

.filter_away_invalid_nucleotides(i, preserve_uracil = false) ⇒ Object

#

Bioroebe.filter_away_invalid_nucleotides

This method can be used to filter away invalid nucleotides. An “invalid” nucleotide is, for example, if you work with DNA sequences, any character that is not allowed to be part of DNA. For example, Uracil, which can be found (almost exclusively) only in RNA.

As for now, the behaviour is to downcase the given input before applying the .tr() method on the given String.

Usage example:

Bioroebe.filter_away_invalid_nucleotides 'ATGCCGGAGGAGANNN' # => "ATGCCGGAGGAGA"
#

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 363

def self.filter_away_invalid_nucleotides(
    i,
    preserve_uracil = false
  )
  if i.is_a? Array
    i = i.join(' ').strip
  end
  case preserve_uracil
  when :preserve_uracil
    preserve_uracil = true
  when :preserve_nothing
    preserve_uracil = false
  end
  i = i.to_s.upcase
  if preserve_uracil
    i.tr!('B,D-F,H-S,V-Z','') # A T C G U
  else
    i.tr!('B,D-F,H-S,U-Z','') # A T C G
  end
  return i
end

.find_substring(full_string = 'GATATATGCATATACTT', this_substring = :default) ⇒ Object

#

Bioroebe.find_substring

This method can be used to find a substring within a larger String.

For example, in the below default values, the substring “ATAT” would exist at the positions 2, 4 and 10, if compared to the larger parent string “GATATATGCATATACTT”.

The following display may help you see this more easily, in regards to the substring matches:

GATATATGCATATACTT
 ATATAT  ATAT

If you look closely, you will be able to see that “ATAT” can be found three times in the string above.

Indices in this context start at position 1, not 0. This is mostly done to refer to nucleotides or aminoacids, which also typically start at the first letter. Position 0 makes no sense for a nucleotide - what would “nucleotide 0” even refer to?

The first argument to this method may also be the path to a locally existing file, such as “/rosalind_subs.txt”. In fact this method has been largely motivated by Rosalind tasks.

The method will return an Array with the positions of all substrings that are found in the full_string variable. See the usage example below for how this may be.

Usage example:

Bioroebe.find_substring 'GATATATGCATATACTT', 'ATAT' # => [2, 4, 10]
#

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/bioroebe/toplevel_methods/searching_and_finding.rb', line 51

def self.find_substring(
    full_string    = 'GATATATGCATATACTT', # ← The full String comes here.
    this_substring = :default             # ← The substring we are searching for comes here.
  )
  if full_string.is_a? Array
    # ===================================================================== #
    # Presently this method will only work on the first member of an Array.
    # ===================================================================== #
    full_string = full_string.first
  end
  if full_string and File.file?(full_string) and
     this_substring == :default
    # ===================================================================== #
    # In this case it is ok to read from that file.
    # ===================================================================== #
    _ = File.read(full_string)
    splitted = _.split("\n")
    full_string    = splitted.first
    this_substring = splitted.last
  end
  case this_substring
  # ======================================================================= #
  # Use a default value in this case. In reality users should supply
  # their own substring when they use this method here.
  # ======================================================================= #
  when :default,
       nil
    this_substring = 'ATAT'
  else
    if this_substring.empty?
      this_substring = 'ATAT'
    end
  end
  if full_string.nil? or full_string.empty?
    full_string = 'GATATATGCATATACTT' # ← Use the default in this case.
  end
  result = Array.new.tap { |indexes|
    final_index_position = full_string.size - this_substring.size
    i = 0
    while (i < final_index_position)
      index = full_string.to_s.index(this_substring.to_s, i)
      break if index.nil?
      i = index + 1
      indexes << i
    end
  }
  result = nil if result.empty? # ← We will try this here; could also return an empty Array, though.
  result # Return our findings here.
end

.format_this_nucleotide_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.format_this_nucleotide_sequence

#

545
546
547
548
549
550
551
552
# File 'lib/bioroebe/nucleotides/show_nucleotide_sequence.rb', line 545

def self.format_this_nucleotide_sequence(i = ARGV, &block)
  _ = ::Bioroebe::ShowNucleotideSequence.new(
    i, :do_not_report_anything, &block
  )
  _.clear_padding
  _.format
  _.formatted_sequence?
end

.frequency_per_thousand(i) ⇒ Object

#

Bioroebe.frequency_per_thousand

The input to this method should be a String ideally. If an Array is input then it will simply be .join()-ed.

This method will return a String, if all goes well.

#

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 62

def self.frequency_per_thousand(i)
  result = "fields: [triplet] [frequency: per thousand] ([number])\n".dup # This String will be returned.
  if i.is_a? Array
    i = i.join
  end
  i.delete!("\n")
  hash = ::Bioroebe.codon_frequency_of_this_string(i)
  hash.default = 0
  total_n_elements = hash.values.sum
  append_this = <<-EOF 

UUU#{thousand_percentage(hash['UUU'], total_n_elements)}(     #{hash['UUU']})  UCU#{thousand_percentage(hash['UCU'], total_n_elements)}(     #{hash['UCU']})  UAU#{thousand_percentage(hash['UAU'], total_n_elements)}(     #{hash['UAU']})  UGU#{thousand_percentage(hash['UGU'], total_n_elements)}(     #{hash['UGU']})
UUC#{thousand_percentage(hash['UUC'], total_n_elements)}(     #{hash['UUC']})  UCC#{thousand_percentage(hash['UCC'], total_n_elements)}(     #{hash['UCC']})  UAC#{thousand_percentage(hash['UAC'], total_n_elements)}(     #{hash['UAC']})  UGC#{thousand_percentage(hash['UGC'], total_n_elements)}(     #{hash['UGC']})
UUA#{thousand_percentage(hash['UUA'], total_n_elements)}(     #{hash['UUA']})  UCA#{thousand_percentage(hash['UCA'], total_n_elements)}(     #{hash['UCA']})  UAA#{thousand_percentage(hash['UAA'], total_n_elements)}(     #{hash['UAA']})  UGA#{thousand_percentage(hash['UGA'], total_n_elements)}(     #{hash['UGA']})
UUG#{thousand_percentage(hash['UUG'], total_n_elements)}(     #{hash['UUG']})  UCG#{thousand_percentage(hash['UCG'], total_n_elements)}(     #{hash['UCG']})  UAG#{thousand_percentage(hash['UAG'], total_n_elements)}(     #{hash['UAG']})  UGG#{thousand_percentage(hash['UGG'], total_n_elements)}(     #{hash['UGG']})

CUU#{thousand_percentage(hash['CUU'], total_n_elements)}(     #{hash['CUU']})  CCU#{thousand_percentage(hash['CCU'], total_n_elements)}(     #{hash['CCU']})  CAU#{thousand_percentage(hash['CAU'], total_n_elements)}(     #{hash['CAU']})  CGU#{thousand_percentage(hash['CGU'], total_n_elements)}(     #{hash['CGU']})
CUC#{thousand_percentage(hash['CUC'], total_n_elements)}(     #{hash['CUC']})  CCC#{thousand_percentage(hash['CCC'], total_n_elements)}(     #{hash['CCC']})  CAC#{thousand_percentage(hash['CAC'], total_n_elements)}(     #{hash['CAC']})  CGC#{thousand_percentage(hash['CGC'], total_n_elements)}(     #{hash['CGC']})
CUA#{thousand_percentage(hash['CUA'], total_n_elements)}(     #{hash['CUA']})  CCA#{thousand_percentage(hash['CCA'], total_n_elements)}(     #{hash['CCA']})  CAA#{thousand_percentage(hash['CAA'], total_n_elements)}(     #{hash['CAA']})  CGA#{thousand_percentage(hash['CGA'], total_n_elements)}(     #{hash['CGA']})
CUG#{thousand_percentage(hash['CUG'], total_n_elements)}(     #{hash['CUG']})  CCG#{thousand_percentage(hash['CCG'], total_n_elements)}(     #{hash['CCG']})  CAG#{thousand_percentage(hash['CAG'], total_n_elements)}(     #{hash['CAG']})  CGG#{thousand_percentage(hash['CGG'], total_n_elements)}(     #{hash['CGG']})

AUU#{thousand_percentage(hash['AUU'], total_n_elements)}(     #{hash['AUU']})  ACU#{thousand_percentage(hash['ACU'], total_n_elements)}(     #{hash['ACU']})  AAU#{thousand_percentage(hash['AAU'], total_n_elements)}(     #{hash['AAU']})  AGU#{thousand_percentage(hash['AGU'], total_n_elements)}(     #{hash['AGU']})
AUC#{thousand_percentage(hash['AUC'], total_n_elements)}(     #{hash['AUC']})  ACC#{thousand_percentage(hash['ACC'], total_n_elements)}(     #{hash['ACC']})  AAC#{thousand_percentage(hash['AAC'], total_n_elements)}(     #{hash['AAC']})  AGC#{thousand_percentage(hash['AGC'], total_n_elements)}(     #{hash['AGC']})
AUA#{thousand_percentage(hash['AUA'], total_n_elements)}(     #{hash['AUA']})  ACA#{thousand_percentage(hash['ACA'], total_n_elements)}(     #{hash['ACA']})  AAA#{thousand_percentage(hash['AAA'], total_n_elements)}(     #{hash['AAA']})  AGA#{thousand_percentage(hash['AGA'], total_n_elements)}(     #{hash['AGA']})
AUG#{thousand_percentage(hash['AUG'], total_n_elements)}(     #{hash['AUG']})  ACG#{thousand_percentage(hash['ACG'], total_n_elements)}(     #{hash['ACG']})  AAG#{thousand_percentage(hash['AAG'], total_n_elements)}(     #{hash['AAG']})  AGG#{thousand_percentage(hash['AGG'], total_n_elements)}(     #{hash['AGG']})

GUU#{thousand_percentage(hash['GUU'], total_n_elements)}(     #{hash['GUU']})  GCU#{thousand_percentage(hash['GCU'], total_n_elements)}(     #{hash['GCU']})  GAU#{thousand_percentage(hash['GAU'], total_n_elements)}(     #{hash['GAU']})  GGU#{thousand_percentage(hash['GGU'], total_n_elements)}(     #{hash['GGU']})
GUC#{thousand_percentage(hash['GUC'], total_n_elements)}(     #{hash['GUC']})  GCC#{thousand_percentage(hash['GCC'], total_n_elements)}(     #{hash['GCC']})  GAC#{thousand_percentage(hash['GAC'], total_n_elements)}(     #{hash['GAC']})  GGC#{thousand_percentage(hash['GGC'], total_n_elements)}(     #{hash['GGC']})
GUA#{thousand_percentage(hash['GUA'], total_n_elements)}(     #{hash['GUA']})  GCA#{thousand_percentage(hash['GCA'], total_n_elements)}(     #{hash['GCA']})  GAA#{thousand_percentage(hash['GAA'], total_n_elements)}(     #{hash['GAA']})  GGA#{thousand_percentage(hash['GGA'], total_n_elements)}(     #{hash['GGA']})
GUG#{thousand_percentage(hash['GUG'], total_n_elements)}(     #{hash['GUG']})  GCG#{thousand_percentage(hash['GCG'], total_n_elements)}(     #{hash['GCG']})  GAG#{thousand_percentage(hash['GAG'], total_n_elements)}(     #{hash['GAG']})  GGG#{thousand_percentage(hash['GGG'], total_n_elements)}(     #{hash['GGG']})
EOF
  result << append_this
  return result
end

.gc_content(of_this_sequence, round_to_n_positions = 3) ⇒ Object

#

Bioroebe.gc_content

This is a convenience method that will return back the GC content, as a percentage value, of the input-given sequence (nucleotide sequence).

So for instance, the following example will correctly return 50.0 because the G and C content of the sequence is exactly 50%.

The second argument can be used for denoting where to round.

Usage example:

Bioroebe.gc_content('ATCG') # => 50.0
#

280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 280

def self.gc_content(
    of_this_sequence,
    round_to_n_positions = 3
  )
  if of_this_sequence.is_a? Array
    of_this_sequence.each {|entry|
      gc_content(of_this_sequence, round_to_n_positions)
    }
  else
    ::Bioroebe::CalculateGCContent.gc_percentage(
      of_this_sequence, round_to_n_positions
    )
  end
end

.genbank_to_fasta(this_file, be_verbose = :be_verbose) ⇒ Object

#

Bioroebe.genbank_to_fasta

This method will convert from a genbank file, to a .fasta file.

Invocation example:

Bioroebe.genbank_to_fasta('/home/x/DATA/PROGRAMMING_LANGUAGES/RUBY/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
#

1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
# File 'lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb', line 1399

def self.genbank_to_fasta(
    this_file,
    be_verbose = :be_verbose
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if this_file.is_a? Array
    this_file = this_file.first
  end
  if File.exist? this_file
    _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
  else
    _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
    _.set_data # This will use the default file.
    _.split_into_proper_sections
  end
  file_path = _.save_into_a_fasta_file(be_verbose)
  return file_path
end

.generate_nucleotide_sequence_based_on_these_frequencies(n_nucleotides = 1061, hash_frequencies = { A: 0.3191430, C: 0.2086633, G: 0.2580345, T: 0.2141593 }) ⇒ Object

#

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies

The second argument to this method should be a Hash.

The default output may be a String such as this one here:

AACTGAACATTTTAGGAGATATCAAGACCCTCTGATTCTCAAGGAATAATTAGCTAATTT

Usage example:

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies(:default, { A: 0.25, C: 0.25, G: 0.25, T: 0.25 })
#

479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 479

def self.generate_nucleotide_sequence_based_on_these_frequencies(
    n_nucleotides = 1061, # Denote how many nucleotides to use.
    hash_frequencies = {
      A: 0.3191430,
      C: 0.2086633,
      G: 0.2580345,
      T: 0.2141593
    }
  )
  case n_nucleotides
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    n_nucleotides = 500
  end
  result = ''.dup
  frequency_for_A = hash_frequencies[:A]
  frequency_for_C = hash_frequencies[:C]
  frequency_for_G = hash_frequencies[:G]
  frequency_for_T = hash_frequencies[:T]
  n_nucleotides.times {|run_number_n|
    use_this_number = rand(0)
    if use_this_number <= frequency_for_A
      result << 'A'
    elsif use_this_number <= (frequency_for_A+frequency_for_C)
      result << 'C'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G)
      result << 'G'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G+frequency_for_T)
      result << 'T'
    end
  }
  return result
end

.generate_pdf_tutorialObject

#

Bioroebe.generate_pdf_tutorial

#

11244
11245
11246
# File 'lib/bioroebe/shell/shell.rb', line 11244

def self.generate_pdf_tutorial
  ::Bioroebe::Shell.generate_pdf_tutorial
end

.generate_random_dna_sequence(i = ARGV, optional_hash_with_the_frequencies = {}) ⇒ Object

#

Bioroebe.generate_random_dna_sequence

This method will “generate” a random DNA sequence (as a String).

A String will be returned by this method.

The second argument to this method can be a Hash, specifying the percentage likelihood for each of the nucleotides. See the following usage examples to find out how to use this.

Usage examples:

Bioroebe.random_dna 15 # => "TTGGTAAGCTCTTTA"
Bioroebe.random_dna 25 # => "TTAGCACAAGCATGGACGGACCAGA"
Bioroebe.random_dna(50, { A: 10, T: 10, C: 10, G: 70}) # => "GGGGTGGGGAGGGTATGCGGAGGAAGGGCGGGAAGGGCGGGGGCTGGGCG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "TGAGGGGGGGGGTGGGAGGG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "GGTAGGGGGGGGTAGGGGGG"
#

277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 277

def self.generate_random_dna_sequence(
    i                                  = ARGV,
    optional_hash_with_the_frequencies = {} # ← This may be a String too, mind you.
  )
  # ======================================================================= #
  # First define our result-String. This one will be returned by this
  # method.
  # ======================================================================= #
  result = ''.dup
  _ = Bioroebe::DNA_NUCLEOTIDES # Get a handle to the four DNA nucleotides.
  if i.is_a? Array
    i = i.join.strip
  end
  case i
  when :default
    i = 250
  end
  i = i.to_i # This is "n times".
  # ======================================================================= #
  # First handle the case where the user passed a String:
  # ======================================================================= #
  if optional_hash_with_the_frequencies.is_a? String
    pool = optional_hash_with_the_frequencies.dup.chars.shuffle
    i.times {
      if pool.size == 0
        pool = optional_hash_with_the_frequencies.dup.chars.shuffle
      end
      result << pool.pop
    }
  elsif optional_hash_with_the_frequencies.empty?
    # ===================================================================== #
    # This is the default clause.
    # ===================================================================== #
    i.times {
      result << _.sample
    }
  else
    # ===================================================================== #
    # Else, the user wants to use a frequency hash:
    # ===================================================================== #
    hash = optional_hash_with_the_frequencies
    frequency_for_A = hash[:A]
    frequency_for_T = hash[:T]
    frequency_for_C = hash[:C]
    frequency_for_G = hash[:G]
    i.times {
      percentage = rand(100)+1
      if percentage <= frequency_for_A
        match = 'A'
      elsif (percentage > frequency_for_A) and
            (percentage <= frequency_for_A+frequency_for_T)
        match = 'T'
      elsif (percentage > frequency_for_A+frequency_for_T) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C)
        match = 'C'
      elsif (percentage > frequency_for_A+frequency_for_T+frequency_for_C) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C+frequency_for_G)
        match = 'G'
      else
        e 'Not found a match for '+percentage.to_s
      end
      result << match
    }
  end
  result
end

.generate_random_rna_sequence(i = ARGV) ⇒ Object

#

Bioroebe.generate_random_rna_sequence

The input-argument should be a number, an Integer, such as 10.

Usage example:

Bioroebe.generate_random_rna_sequence(10)
#

632
633
634
635
636
637
638
639
640
641
642
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 632

def self.generate_random_rna_sequence(i = ARGV)
  if i.is_a? Array
    i = i.join(' ').strip
  end
  _ = Bioroebe::RNA_NUCLEOTIDES # Point to the allowed RNA-nucleotides here.
  result = ''.dup
  i.to_s.to_i.times {
    result << _.sample
  }
  return result
end

.guess_format(i) ⇒ Object

#

Bioroebe.guess_format

#

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/bioroebe/toplevel_methods/misc.rb', line 310

def self.guess_format(i)
  case i
  # ======================================================================= #
  # === fasta
  # ======================================================================= #
  when /.fa$/,
       /.fna$/,
       /.faa$/,
       /.fasta$/
    'fasta'
  # ======================================================================= #
  # === fastq
  # ======================================================================= #
  when /.fq$/,
       /.fastq$/
    'fastq'
  when /.fx/
    ''
  end
end

.hamming_distance(sequence1 = 'ATCG', sequence2 = 'ATCC') ⇒ Object

#

Bioroebe.hamming_distance

This method will return an Integer, aka a number, which represents the hamming distance between two sequences of equal length. This will state how many differences exist between two same-sized sequences (aka sequences that have the same length).

Do note that a second implementation may exist for the hamming distance, in the Bioroebe project.

Usage example:

Bioroebe.hamming_distance('ATCG','ATCC') # => 1
#

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/bioroebe/toplevel_methods/hamming_distance.rb', line 27

def self.hamming_distance(
    sequence1 = 'ATCG',
    sequence2 = 'ATCC'
  )
  if sequence1.nil?
    e 'Please provide a sequence (String) as input to this method.'
    return
  end
  if sequence1.is_a? String
    sequence1 = sequence1.split(//)
  end
  if sequence2.is_a? String
    sequence2 = sequence2.split(//)
  end
  array_sequence1 = [sequence1].flatten
  array_sequence2 = [sequence2].flatten
  # ======================================================================= #
  # Zip the two sequences together, then reduce this Array of
  # zipped values to an integer value, which will be returned.
  # ======================================================================= #
  zipped_array = array_sequence1.zip(array_sequence2)
  hamming_value = 0
  zipped_array.each { |left, right|
    hamming_value += 1 unless left == right
  }
  return hamming_value
end

.has_this_restriction_enzyme?(name_of_restriction_enzyme) ⇒ Boolean

#

Bioroebe.has_this_restriction_enzyme?

Determine whether we have a specific restriction enzyme or whether we do not. We will downcase all keys in use to simplify finding a matching entry.

Usage example:

Bioroebe.has_this_restriction_enzyme? 'MvnI'    # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRI'   # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRII'  # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRIII' # => false
#

Returns:

  • (Boolean)

26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/bioroebe/enzymes/has_this_restriction_enzyme.rb', line 26

def self.has_this_restriction_enzyme?(
    name_of_restriction_enzyme
  )
  _ = {}
  if name_of_restriction_enzyme.frozen?
    name_of_restriction_enzyme = name_of_restriction_enzyme.dup
  end
  name_of_restriction_enzyme.delete!('?') if name_of_restriction_enzyme.include? '?'
  name_of_restriction_enzyme.downcase!
  ::Bioroebe.restriction_enzymes?.each_pair {|key, value|
    _[key.downcase] = value
  }
  return _.has_key? name_of_restriction_enzyme
end

.hash_codon_tables?Boolean

#

Bioroebe.hash_codon_tables?

#

Returns:

  • (Boolean)

115
116
117
# File 'lib/bioroebe/codons/codon_tables.rb', line 115

def self.hash_codon_tables?
  ::Bioroebe::CodonTables.definitions?
end

.index_this_fasta_file(i = ARGV) ⇒ Object

#

Bioroebe.index_this_fasta_file

This method will use samtools faidx to index files.

#

97
98
99
100
101
102
103
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 97

def self.index_this_fasta_file(i = ARGV)
  [i].flatten.compact.each {|this_file|
    e
    esystem "samtools faidx #{this_file}"
    e
  }
end

.infer_type_from_this_sequence(i = 'ATGGTACGACAC') ⇒ Object

#

Bioroebe.infer_type_from_this_sequence

This method will try to infer the type from a given sequence.

The three valid return types are the following symbols:

:dna
:rna
:protein

Note that this may not work 100% reliably, so do not depend too much on this method working absolutely perfect.

#

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/bioroebe/toplevel_methods/infer.rb', line 29

def self.infer_type_from_this_sequence(
    i = 'ATGGTACGACAC'
  )
  if i.is_a? Array
    i = i.join
  end
  type = :dna # This is the default - DNA.
  # ======================================================================= #
  # === :rna
  # ======================================================================= #
  if i.include? 'U'
    type = :rna
  # ======================================================================= #
  # === :dna
  # ======================================================================= #
  elsif i =~ /^[ATCG]+$/
    type = :dna 
  # ======================================================================= #
  # === :protein
  # ======================================================================= #
  else # else simply assume this to be a protein.
    type = :protein
  end
  return type
end

.initialize_codonsObject

#

Bioroebe.initialize_codons

This method will first initialize the stop-codons, and then determine the start codons in use.

#

82
83
84
85
# File 'lib/bioroebe/codons/codons.rb', line 82

def self.initialize_codons
  initialize_stop_codons
  determine_start_codons_from_the_codon_table
end

.initialize_default_stop_codonsObject

#

Bioroebe.initialize_default_stop_codons

This method will initialize the default stop codons. This defaults to

    1. the stop codons that can be found in the human genome.

Note that this method will NOT work if @stop_codons already contains elements; this is a tiny “safeguard” to prevent erroneous use. If you wish to not be handicapped then clear it by yourself first, via:

Bioroebe.clear_stop_codons
#

246
247
248
249
250
251
# File 'lib/bioroebe/codons/codons.rb', line 246

def self.initialize_default_stop_codons
  if @stop_codons.empty?
    @stop_codons << %w( TAG TAA TGA ) # <- Add the default stop codons here.
    @stop_codons.flatten! 
  end
end

.input_as_dna(i) ⇒ Object

#

Bioroebe.input_as_dna

This method will only accept input that is DNA, that is, the short letter variant (thus, A, T, C or G). Any other input will be stripped away, aka discarded, so this methods acts as a filter - a forward-filter for DNA.

The method will return a “String” that is assumed to be a “DNA string”. You can expect only DNA nucleotides to be part of that string.

Usage example:

Bioroebe.input_as_dna 'UUTGAGGACCA' # => "TGAGGACCA"
#

402
403
404
405
406
407
408
409
410
411
# File 'lib/bioroebe/toplevel_methods/nucleotides.rb', line 402

def self.input_as_dna(