Module: Bioroebe

Included in:
Taxonomy::Interactive
Defined in:
lib/bioroebe/svg/page.rb,
lib/bioroebe/base/base.rb,
lib/bioroebe/cell/cell.rb,
lib/bioroebe/gene/gene.rb,
lib/bioroebe/ncbi/ncbi.rb,
lib/bioroebe/svg/glyph.rb,
lib/bioroebe/svg/svgee.rb,
lib/bioroebe/svg/track.rb,
lib/bioroebe/misc/ruler.rb,
lib/bioroebe/shell/menu.rb,
lib/bioroebe/ncbi/efetch.rb,
lib/bioroebe/parsers/gff.rb,
lib/bioroebe/shell/shell.rb,
lib/bioroebe/siRNA/siRNA.rb,
lib/bioroebe/virus/virus.rb,
lib/bioroebe/base/colours.rb,
lib/bioroebe/sequence/dna.rb,
lib/bioroebe/sequence/rna.rb,
lib/bioroebe/blosum/blosum.rb,
lib/bioroebe/codons/codons.rb,
lib/bioroebe/genome/genome.rb,
lib/bioroebe/svg/primitive.rb,
lib/bioroebe/taxonomy/edit.rb,
lib/bioroebe/taxonomy/menu.rb,
lib/bioroebe/taxonomy/node.rb,
lib/bioroebe/biomart/filter.rb,
lib/bioroebe/biomart/server.rb,
lib/bioroebe/constants/GUIs.rb,
lib/bioroebe/count/count_at.rb,
lib/bioroebe/count/count_gc.rb,
lib/bioroebe/shell/readline.rb,
lib/bioroebe/taxonomy/chart.rb,
lib/bioroebe/biomart/biomart.rb,
lib/bioroebe/biomart/dataset.rb,
lib/bioroebe/colours/colours.rb,
lib/bioroebe/project/project.rb,
lib/bioroebe/regexes/regexes.rb,
lib/bioroebe/taxonomy/shared.rb,
lib/bioroebe/version/version.rb,
lib/bioroebe/biomart/database.rb,
lib/bioroebe/dotplots/dotplot.rb,
lib/bioroebe/sequence/protein.rb,
lib/bioroebe/shell/help/class.rb,
lib/bioroebe/svg/mini_feature.rb,
lib/bioroebe/taxonomy/colours.rb,
lib/bioroebe/abstract/features.rb,
lib/bioroebe/biomart/attribute.rb,
lib/bioroebe/encoding/encoding.rb,
lib/bioroebe/readline/readline.rb,
lib/bioroebe/sequence/sequence.rb,
lib/bioroebe/taxonomy/taxonomy.rb,
lib/bioroebe/codons/codon_table.rb,
lib/bioroebe/parsers/parse_embl.rb,
lib/bioroebe/sequence/alignment.rb,
lib/bioroebe/taxonomy/constants.rb,
lib/bioroebe/taxonomy/help/help.rb,
lib/bioroebe/taxonomy/info/info.rb,
lib/bioroebe/codons/codon_tables.rb,
lib/bioroebe/codons/start_codons.rb,
lib/bioroebe/colours/use_colours.rb,
lib/bioroebe/constants/constants.rb,
lib/bioroebe/misc/useful_formulas.rb,
lib/bioroebe/patterns/rgg_scanner.rb,
lib/bioroebe/taxonomy/info/is_dna.rb,
lib/bioroebe/taxonomy/interactive.rb,
lib/bioroebe/taxonomy/parse_fasta.rb,
lib/bioroebe/exceptions/exceptions.rb,
lib/bioroebe/parsers/blosum_parser.rb,
lib/bioroebe/parsers/stride_parser.rb,
lib/bioroebe/requires/require_yaml.rb,
lib/bioroebe/genomes/genome_pattern.rb,
lib/bioroebe/parsers/biolang_parser.rb,
lib/bioroebe/parsers/genbank_parser.rb,
lib/bioroebe/taxonomy/class_methods.rb,
lib/bioroebe/taxonomy/help/helpline.rb,
lib/bioroebe/toplevel_methods/roebe.rb,
lib/bioroebe/codons/show_codon_usage.rb,
lib/bioroebe/configuration/constants.rb,
lib/bioroebe/sinatra/sinatra_wrapper.rb,
lib/bioroebe/base/prototype/prototype.rb,
lib/bioroebe/cleave_and_digest/cleave.rb,
lib/bioroebe/codons/show_codon_tables.rb,
lib/bioroebe/genomes/genome_retriever.rb,
lib/bioroebe/patterns/profile_pattern.rb,
lib/bioroebe/patterns/scan_for_repeat.rb,
lib/bioroebe/requires/require_colours.rb,
lib/bioroebe/www/embeddable_interface.rb,
lib/bioroebe/cleave_and_digest/trypsin.rb,
lib/bioroebe/dotplots/advanced_dotplot.rb,
lib/bioroebe/electron_microscopy/flipy.rb,
lib/bioroebe/raw_sequence/raw_sequence.rb,
lib/bioroebe/sinatra/sinatra_interface.rb,
lib/bioroebe/toplevel_methods/taxonomy.rb,
lib/bioroebe/utility_scripts/find_gene.rb,
lib/bioroebe/viennarna/rnafold_wrapper.rb,
lib/bioroebe/colours/colourize_sequence.rb,
lib/bioroebe/enzymes/restriction_enzyme.rb,
lib/bioroebe/aminoacids/codon_percentage.rb,
lib/bioroebe/cleave_and_digest/digestion.rb,
lib/bioroebe/codons/detect_minimal_codon.rb,
lib/bioroebe/configuration/configuration.rb,
lib/bioroebe/log_directory/log_directory.rb,
lib/bioroebe/sequence/reverse_complement.rb,
lib/bioroebe/string_matching/levensthein.rb,
lib/bioroebe/base/base_module/base_module.rb,
lib/bioroebe/codons/show_this_codon_table.rb,
lib/bioroebe/colours/colour_schemes/score.rb,
lib/bioroebe/fasta_and_fastq/fasta_parser.rb,
lib/bioroebe/cell/specialized_cells/B_cell.rb,
lib/bioroebe/cell/specialized_cells/T_cell.rb,
lib/bioroebe/colours/colour_schemes/simple.rb,
lib/bioroebe/ngs/phred_quality_score_table.rb,
lib/bioroebe/palindromes/palindrome_finder.rb,
lib/bioroebe/taxonomy/info/check_available.rb,
lib/bioroebe/utility_scripts/punnet/punnet.rb,
lib/bioroebe/aminoacids/show_hydrophobicity.rb,
lib/bioroebe/calculate/calculate_gc_content.rb,
lib/bioroebe/colours/colourize_related_code.rb,
lib/bioroebe/fasta_and_fastq/download_fasta.rb,
lib/bioroebe/requires/require_all_pdb_files.rb,
lib/bioroebe/string_matching/smith_waterman.rb,
lib/bioroebe/codons/determine_optimal_codons.rb,
lib/bioroebe/codons/sanitize_codon_frequency.rb,
lib/bioroebe/gui/universal_widgets/gene/gene.rb,
lib/bioroebe/matplotlib/matplotlib_generator.rb,
lib/bioroebe/utility_scripts/compseq/compseq.rb,
lib/bioroebe/utility_scripts/showorf/showorf.rb,
lib/bioroebe/aminoacids/aminoacids_mass_table.rb,
lib/bioroebe/calculate/calculate_blosum_score.rb,
lib/bioroebe/count/count_amount_of_aminoacids.rb,
lib/bioroebe/electron_microscopy/fix_pos_file.rb,
lib/bioroebe/enzymes/restriction_enzymes_file.rb,
lib/bioroebe/enzymes/show_restriction_enzymes.rb,
lib/bioroebe/palindromes/palindrome_generator.rb,
lib/bioroebe/requires/require_all_codon_files.rb,
lib/bioroebe/requires/require_all_count_files.rb,
lib/bioroebe/string_matching/hamming_distance.rb,
lib/bioroebe/toplevel_methods/fasta_and_fastq.rb,
lib/bioroebe/aminoacids/aminoacid_substitution.rb,
lib/bioroebe/cell/specialized_cells/Macrophage.rb,
lib/bioroebe/colours/colour_schemes/nucleotide.rb,
lib/bioroebe/count/count_amount_of_nucleotides.rb,
lib/bioroebe/electron_microscopy/read_file_xmd.rb,
lib/bioroebe/gui/universal_widgets/shell/shell.rb,
lib/bioroebe/requires/require_all_parser_files.rb,
lib/bioroebe/toplevel_methods/toplevel_methods.rb,
lib/bioroebe/utility_scripts/pathways/pathways.rb,
lib/bioroebe/aminoacids/display_aminoacid_table.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb,
lib/bioroebe/gui/experimental/snapgene/snapgene.rb,
lib/bioroebe/requires/require_all_dotplot_files.rb,
lib/bioroebe/requires/require_all_enzymes_files.rb,
lib/bioroebe/requires/require_all_pattern_files.rb,
lib/bioroebe/requires/require_cleave_and_digest.rb,
lib/bioroebe/aminoacids/create_random_aminoacids.rb,
lib/bioroebe/enzymes/has_this_restriction_enzyme.rb,
lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb,
lib/bioroebe/palindromes/palindrome_2D_structure.rb,
lib/bioroebe/requires/require_all_sequence_files.rb,
lib/bioroebe/requires/require_all_taxonomy_files.rb,
lib/bioroebe/utility_scripts/compacter/compacter.rb,
lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb,
lib/bioroebe/annotations/create_annotation_format.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme.rb,
lib/bioroebe/conversions/convert_aminoacid_to_dna.rb,
lib/bioroebe/databases/download_taxonomy_database.rb,
lib/bioroebe/nucleotides/complementary_dna_strand.rb,
lib/bioroebe/requires/require_all_calculate_files.rb,
lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb,
lib/bioroebe/electron_microscopy/parse_coordinates.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb,
lib/bioroebe/gui/universal_widgets/sizeseq/sizeseq.rb,
lib/bioroebe/pdb_and_protein_structure/alpha_helix.rb,
lib/bioroebe/requires/require_all_aminoacids_files.rb,
lib/bioroebe/requires/require_the_toplevel_methods.rb,
lib/bioroebe/utility_scripts/analyse_local_dataset.rb,
lib/bioroebe/base/colours_for_base/colours_for_base.rb,
lib/bioroebe/enzymes/restriction_enzymes/statistics.rb,
lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb,
lib/bioroebe/patterns/analyse_glycosylation_pattern.rb,
lib/bioroebe/requires/require_all_nucleotides_files.rb,
lib/bioroebe/requires/require_all_palindromes_files.rb,
lib/bioroebe/string_matching/find_longest_substring.rb,
lib/bioroebe/string_matching/simple_string_comparer.rb,
lib/bioroebe/calculate/calculate_melting_temperature.rb,
lib/bioroebe/electron_microscopy/coordinate_analyzer.rb,
lib/bioroebe/electron_microscopy/generate_em2em_file.rb,
lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb,
lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb,
lib/bioroebe/pdb_and_protein_structure/helical_wheel.rb,
lib/bioroebe/calculate/calculate_levensthein_distance.rb,
lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb,
lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb,
lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb,
lib/bioroebe/requires/require_all_colour_scheme_files.rb,
lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb,
lib/bioroebe/gui/universal_widgets/alignment/alignment.rb,
lib/bioroebe/utility_scripts/align_open_reading_frames.rb,
lib/bioroebe/utility_scripts/permutations/permutations.rb,
lib/bioroebe/genbank/genbank_flat_file_format_generator.rb,
lib/bioroebe/pdb_and_protein_structure/parse_mmCIF_file.rb,
lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb,
lib/bioroebe/requires/require_all_string_matching_files.rb,
lib/bioroebe/requires/require_all_utility_scripts_files.rb,
lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb,
lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb,
lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb,
lib/bioroebe/gui/universal_widgets/controller/controller.rb,
lib/bioroebe/gui/universal_widgets/www_finder/www_finder.rb,
lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb,
lib/bioroebe/pdb_and_protein_structure/download_this_pdb.rb,
lib/bioroebe/utility_scripts/dot_alignment/dot_alignment.rb,
lib/bioroebe/utility_scripts/mirror_repeat/mirror_repeat.rb,
lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb,
lib/bioroebe/utility_scripts/parse_taxonomy/parse_taxonomy.rb,
lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb,
lib/bioroebe/base/internal_hash_module/internal_hash_module.rb,
lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb,
lib/bioroebe/electron_microscopy/electron_microscopy_module.rb,
lib/bioroebe/electron_microscopy/simple_star_file_generator.rb,
lib/bioroebe/requires/require_all_electron_microscopy_files.rb,
lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb,
lib/bioroebe/gui/universal_widgets/three_to_one/three_to_one.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb,
lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb,
lib/bioroebe/base/commandline_application/commandline_arguments.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb,
lib/bioroebe/gui/universal_widgets/parse_pdb_file/parse_pdb_file.rb,
lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb,
lib/bioroebe/base/commandline_application/commandline_application.rb,
lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb,
lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb,
lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb,
lib/bioroebe/gui/universal_widgets/random_sequence/random_sequence.rb,
lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb,
lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb,
lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb,
lib/bioroebe/gui/universal_widgets/format_converter/format_converter.rb,
lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb,
lib/bioroebe/gui/universal_widgets/show_codon_table/show_codon_table.rb,
lib/bioroebe/gui/universal_widgets/show_codon_usage/show_codon_usage.rb,
lib/bioroebe/pdb_and_protein_structure/fetch_fasta_sequence_from_pdb.rb,
lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb,
lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb,
lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb,
lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb,
lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb,
lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb,
lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb,
lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb,
lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb,
lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb,
lib/bioroebe/gui/universal_widgets/aminoacid_composition/customized_dialog.rb,
lib/bioroebe/gui/universal_widgets/nucleotide_analyser/nucleotide_analyser.rb,
lib/bioroebe/gui/universal_widgets/restriction_enzymes/restriction_enzymes.rb,
lib/bioroebe/nucleotides/show_nucleotide_sequence/show_nucleotide_sequence.rb,
lib/bioroebe/utility_scripts/show_this_dna_sequence/show_this_dna_sequence.rb,
lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/universal_widgets/levensthein_distance/levensthein_distance.rb,
lib/bioroebe/gui/universal_widgets/primer_design_widget/primer_design_widget.rb,
lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb,
lib/bioroebe/gui/universal_widgets/aminoacid_composition/aminoacid_composition.rb,
lib/bioroebe/utility_scripts/create_batch_entrez_file/create_batch_entrez_file.rb,
lib/bioroebe/utility_scripts/determine_antigenic_areas/determine_antigenic_areas.rb,
lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb,
lib/bioroebe/gui/universal_widgets/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb,
lib/bioroebe/utility_scripts/download_files_from_rebase/download_files_from_rebase.rb,
lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb,
lib/bioroebe/pdb_and_protein_structure/report_secondary_structures_from_this_pdb_file.rb,
lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb,
lib/bioroebe/gui/universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb,
lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb,
lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb

Overview

#

require ‘bioroebe/fasta/split_this_fasta_file_into_chromosomes/constants.rb’

#

Defined Under Namespace

Modules: BaseModule, Biomart, Blosum, CodonTable, CodonTables, CodonTablesFrequencies, ColourScheme, Colourize, ColoursForBase, CommandlineArguments, Configuration, ElectronMicroscopy, EmbeddableInterface, Features, GUI, InferTheNamespaceModule, InternalHashModule, MolecularWeightOfNucleotides, NucleotideModule, Parser, Postgresql, Quiz, RestrictionEnzymes, SinatraInterface, Taxonomy, VerboseTruth Classes: AdvancedDotplot, AlignOpenReadingFrames, Alignment, AlphaHelix, AminoacidSubstitution, AminoacidsMassTable, AnalyseGlycosylationPattern, AnalyseLocalDataset, AutocorrectTheNameOfThisFastaFile, B_cell, Base, BiolangParser, BlosumParser, CalculateBlosumScore, CalculateGCContent, CalculateMeltingTemperature, CalculateMeltingTemperatureForMoreThanThirteenNucleotides, CalculateThePositionSpecificScoringMatrix, Cell, CheckForMismatches, CodonPercentage, ColourSchemeDemo, ColourizeHydrophilicAndHydrophobicAminoacids, ColourizeSequence, CommandlineApplication, CompactFastaFile, Compacter, CompareTheseTwoSequencesViaBlosum, ComplementaryDnaStrand, Compseq, ConsensusSequence, ConvertAminoacidToDNA, ConvertThisCodonToThatAminoacid, CountAmountOfAminoacids, CountAmountOfNucleotides, CreateAnnotationFormat, CreateBatchEntrezFile, CreateRandomAminoacids, DNA, DeduceAminoacidSequence, DetectMinimalCodon, DetermineAntigenicAreas, DetermineMissingNucleotidesPercentage, DetermineOptimalCodons, Digestion, DisplayAminoacidTable, DisplayHowManyFastaEntriesAreInThisDirectory, DisplayOpenReadingFrames, DnaToAminoacidSequence, DotAlignment, Dotplot, DownloadFasta, DownloadFilesFromRebase, DownloadTaxonomyDatabase, FastaDefline, FastaParser, FastaToYaml, FastqFormatExplainer, FetchDataFromUniprot, FetchFastaSequenceFromPdb, FindGene, FindLongestSubstring, FindLongestSubstringViaLCSalgorithm, GenbankFlatFileFormatGenerator, GenbankParser, Gene, Genome, GenomePattern, GenomeRetriever, HammingDistance, HelixWheel, InvalidAminoacid, LengthModifier, Levensthein, Macrophage, MatplotlibGenerator, Matrix, MirrorRepeat, MostLikelyNucleotideSequenceForThisAminoacidSequence, MoveFileToItsCorrectLocation, Ncbi, Palindrome2DStructure, PalindromeFinder, PalindromeGenerator, ParseEMBL, ParseFasta, ParseFastq, ParseFrequencyTable, ParsePdbFile, ParseTaxonomy, ParsemmCIFFile, Pathways, Permutations, PhredQualityScoreTable, PossibleCodonsForThisAminoacid, ProfilePattern, Protein, Punnet, RGG_Scanner, RNA, RNALfoldWrapper, RawSequence, ReportSecondaryStructuresFromThisPdbFile, RestrictionEnzyme, ReverseComplement, Ruler, SVG, SanitizeCodonFrequency, SanitizeNucleotideSequence, ScanForRepeat, Sequence, Shell, ShowCodonTables, ShowCodonUsage, ShowFastaHeaders, ShowFastaStatistics, ShowHydrophobicity, ShowNucleotideSequence, ShowOrf, ShowRestrictionEnzymes, ShowThisCodonTable, ShowThisDNASequence, SiRNA, SimpleStringComparer, SimplifyFastaHeader, SinatraWrapper, SmithWaterman, SplitThisFastaFileIntoChromosomes, StrideParser, T_cell, TobaccoMosaicVirus, Trypsin, UsefulFormulas, Virus

Constant Summary collapse

USE_THIS_COLOUR_FOR_DNA =
#

Bioroebe::USE_THIS_COLOUR_FOR_DNA

The following constant will denote which colour we will use for DNA sequences by default, in this case, the HTML colour called steelblue.

#
:steelblue
FILE_EXPAND_CD_ALIASES =
#

FILE_EXPAND_CD_ALIASES

#
"#{@project_base_directory}hash_expand_cd_aliases.rb"
CONFIGURATION_DIRECTORY =
#

CONFIGURATION_DIRECTORY

#
"#{project_yaml_directory?}configuration/"
TOPLEVEL_METHODS_DIRECTORY =
#

TOPLEVEL_METHODS_DIRECTORY

#
"#{@project_base_directory}toplevel_methods/"
CODON_TABLES_DIRECTORY =
#

CODON_TABLES_DIRECTORY

#
"#{@project_base_directory}codon_tables/"
CLEAVE_AND_DIGEST_DIRECTORY =
#

CLEAVE_AND_DIGEST_DIRECTORY

#
"#{@project_base_directory}cleave_and_digest/"
ELECTRON_MICROSCOPY_DIRECTORY =
#

ELECTRON_MICROSCOPY_DIRECTORY

#
"#{@project_base_directory}electron_microscopy/"
CODON_TABLES_DIRECTORY_FREQUENCY =
#

CODON_TABLES_DIRECTORY_FREQUENCY

#
"#{CODON_TABLES_DIRECTORY}frequencies/"
PDB_DIRECTORY =
#

PDB_DIRECTORY

This is called pdb_and_protein_structure/ since as of November 2023.

#
"#{@project_base_directory}pdb_and_protein_structure/"
PARSERS_DIRECTORY =
#

PARSERS_DIRECTORY

#
"#{@project_base_directory}parsers/"
ENZYMES_DIRECTORY =
#

ENZYMES_DIRECTORY

#
"#{@project_base_directory}enzymes/"
PALINDROMES_DIRECTORY =
#

PALINDROMES_DIRECTORY

#
"#{@project_base_directory}palindromes/"
PATTERN_DIRECTORY =
#

PATTERN_DIRECTORY

#
"#{@project_base_directory}pattern/"
NUCLEOTIDES_DIRECTORY =
#

NUCLEOTIDES_DIRECTORY

#
"#{@project_base_directory}nucleotides/"
COUNT_DIRECTORY =
#

COUNT_DIRECTORY

#
"#{@project_base_directory}count/"
AMINOACIDS_DIRECTORY =
#

AMINOACIDS_DIRECTORY

#
"#{@project_base_directory}aminoacids/"
BLOSUM_DIRECTORY =
#

BLOSUM_DIRECTORY

#
"#{project_yaml_directory?}blosum/"
CALCULATE_DIRECTORY =
#

CALCULATE_DIRECTORY

#
"#{@project_base_directory}calculate/"
CODONS_DIRECTORY =
#

CODONS_DIRECTORY

#
"#{@project_base_directory}codons/"
DOTPLOTS_DIRECTORY =
#

DOTPLOTS_DIRECTORY

#
"#{@project_base_directory}dotplots/"
SEQUENCE_DIRECTORY =
#

SEQUENCE_DIRECTORY

#
"#{@project_base_directory}sequence/"
PATHWAYS_DIRECTORY =
#

PATHWAYS_DIRECTORY

This constant will point to e. g. “/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/pathways/”.

#
"#{project_yaml_directory?}pathways/"
BIOROEBE_YAML_AMINOACIDS_DIRECTORY =
#

BIOROEBE_YAML_AMINOACIDS_DIRECTORY

#
"#{project_yaml_directory?}aminoacids/"
STRING_MATCHING_DIRECTORY =
#

STRING_MATCHING_DIRECTORY

#
"#{@project_base_directory}string_matching/"
FASTA_AND_FASTQ_DIRECTORY =
#

FASTA_AND_FASTQ_DIRECTORY

#
"#{@project_base_directory}fasta_and_fastq/"
VERSION =
#

VERSION

#
'0.13.31'
LAST_UPDATE =
#

LAST_UPDATE

This variable keeps track as to when the bioroebe project was last updated. The notation is: DD.MM.YYYY

#
'22.02.2024'
URL_TO_THE_DOCUMENTATION =
#

URL_TO_THE_DOCUMENTATION

Keep track of where the documentation to BioRoebe is kept at.

#
"https://www.rubydoc.info/gems/#{self.to_s.downcase}/#{VERSION}"
Aminoacids =
#

The following “alias” was added in May 2022.

#
Protein
UTF_ENCODING =
#

Bioroebe::UTF_ENCODING

#
'UTF-8'
USE_THIS_ENCODING =
#

Bioroebe::USE_THIS_ENCODING

#
UTF_ENCODING
Seq =
#

Usage example

x = Bioroebe::Seq.new(‘AGTACACTGGT’)

#
Sequence
N =
#

Bioroebe::N

#
"\n"
R =
#

Bioroebe::R

#
"\r"
TABULATOR =
#

TABULATOR

#
"\t"
ROW_TERMINATOR =
#

Bioroebe::ROW_TERMINATOR

This constant is not often in use, though.

#
"#{TABULATOR}|\n"
BE_VERBOSE =
#

BE_VERBOSE

#
true
TOKEN =
#

TOKEN (TOKEN tag)

#
'|'
ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED =
#

ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED

Just list the aminoacids that can typically be phosphorylated.

#
%w(
  S Y T
)
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS =
#

ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS

We have to keep the long names for the amino acids in one constant, so that we can do queries lateron.

#
(%w( 
  Alanine
  Arginine
  Asparagine
  Cysteine
  Glutamine
  Glycine
  Histidine
  Isoleucine
  Leucine
  Lysine
  Methionine
  Phenylalanine
  Proline
  Serine
  Threonine
  Tryptophane
  Tyrosine
  Valine
) << 'Aspartic acid' << 'Glutamic acid').sort
POSSIBLE_AMINO_ACIDS =
#

POSSIBLE_AMINO_ACIDS

Which Aminoacids are possible/allowed? We will list them here:

ACDEFGHIKLMNPQRSTUVWY

Note that this is distinct from the constant AMINO_ACIDS, which is instead loaded from a local .yml file. This constant includes all the 20 canonical aminoacids, whereas AMINO_ACIDS may also include pyrrolysine and selenocysteine.

#
'ACDEFGHIKLMNPQRSTVWY'
TWENTY_CANONICAL_AMINOACIDS =

TWENTY_CANONICAL_AMINOACIDS

POSSIBLE_AMINO_ACIDS
ARRAY_AMINO_ACIDS_ALPHABET =
#

ARRAY_AMINO_ACIDS_ALPHABET

This keeps an Array with all aminoacids, in one-letter format.

So it is equivalent to:

["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#
POSSIBLE_AMINO_ACIDS.chars
VERTICAL_UNICODE_BAR =
#

VERTICAL_UNICODE_BAR

#
'|'
AMINOACID_FAMILIES =
#

AMINOACID_FAMILIES

#
{
  'citratzyklus' => {
    # Alpha-Ketoglutarat: EPQR
    'alpha-ketoglutarat' => %w( E P Q R ),
    # Oxalacetat: DMN-KTI
    'oxalacetat' => %w( D N K M T I ),
  },
  'glykolyse' => {
    'pyruvat' => %w( A V L ),                 # AVL
    '3-phosphoglycerinsäure' => %w( S G C ), # SGC
    },
    'chorismat' => {
      'aromatische_familie' => %w( F Y W )       # FYW
    },
    'ribose-5-p' => {
      'histidinol' => %w( H ) # Histidine.
    },
}
VALID_WAYS_TO_EXIT =
#

VALID_WAYS_TO_EXIT

All ways to exit will be recorded here.

If you need to use more ways, simply append to this Array.

This constant may have to be moved into the bio-shell part eventually.

#
%w(
  quit q exit qq :q qt
  bye
  rda
  r2
  tq
  sq
  exit_program
  exitprogram
)
NAMES_ENTRIES =
#

NAMES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'names.sql'
NODES_ENTRIES =
#

NODES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'nodes.sql'
FASTA_ENTRIES =
#

FASTA_ENTRIES

This used to belong to the Taxonomy submodule.

#
'fasta.sql'
SHALL_WE_LOG_LAST_UPDATE =
#

SHALL_WE_LOG_LAST_UPDATE

This constant exists specifically for the taxonomy-component of the Bioroebe project.

#
true
NAME_OF_BIO_SHELL =
#

NAME_OF_BIO_SHELL

This constant can be used as the default prompt for the bioshell component.

#
'BIO SHELL> '
DEFAULT_DNA_SEQUENCE =
#

DEFAULT_DNA_SEQUENCE

This is a default “test” DNA sequence, in the sense that it can be used to quickly test functionality within the bioroebe project.

It was added in May 2020, but it may be that we have to remove it at a later time, or move it into a separate .yml file. For the time being, though, it will reside here.

#
'CGGCCCGATTTGGGTTTCGGAGCGATCGAAATACCAGCACTACCATGAATTCTAT'\
'ATGGCTGCCGTTCACAGCCTTAATTTTAGGCTTTCCACCTGATCACTCTTTAATC'\
'TCCATTGTTTCTGGTACGCAGAAATTGACGCTTCCCATTCATTCACGGCTAAAAT'\
'CAAGGATTCCACCAGAATCGCGGGCCGCGTGGGTGCGCCGTCGACCTCCTCGGCC'\
'AAATAAGAACGGGCAGGTAAGAGACTAGGGTACTCAAGAT'
DEFAULT_LENGTH_FOR_DNA =
#

DEFAULT_LENGTH_FOR_DNA

How long our DNA-generated strings should be by default.

This may be used by some scripts, so it provides a default value for use in these scripts.

150 nucleotides are the current default.

#
150
FIELD_TERMINATOR =
#

FIELD_TERMINATOR

#
"#{TABULATOR}|#{TABULATOR}"
MAIN_DELIMITER =

An alias to the above.

DELIMITER = FIELD_TERMINATOR
USERS_X =
#

USERS_X

#
'/home/x/'
HOME_DIRECTORY_OF_USER_X =

HOME_DIRECTORY_OF_USER_X

USERS_X
RUBY_SRC =
#

RUBY_SRC

This constant is only useful on my home directory. Most other users will not need it, ever.

#
"#{USERS_X}programming/ruby/src/"
BIOROEBE_AT_HOME =
#

BIOROEBE_AT_HOME

#
"#{RUBY_SRC}bioroebe/lib/bioroebe/"
LOCALHOST =
#

LOCALHOST

#
'http://localhost/'
PATH_TO_THE_RELION_BINARY =
#

PATH_TO_THE_RELION_BINARY

This constant can be set to determine where relion resides. It is mostly an ad-hoc constant.

#
'/opt/RELION/relion-1.3/bin/relion'
ARRAY_REGISTERED_ACTIONS =
#

ARRAY_REGISTERED_ACTIONS

ARRAY_REGISTERED_ACTIONS becomes @registered_actions.

#
%w(
  to_rna
  to_dna
  rest
  pubmed
  blosum
  restriction
  translate
  quit
  shorten_aminoacid
)
FILE_BIO_LANG =
#

FILE_BIO_LANG

#
"#{USERS_X}data/personal/yaml/bio_lang/bio_lang.md"
EMAIL =
#

EMAIL

My email address - not too terribly useful for other people, but nonetheless it may be useful to display it, in particular for GUI-related components of the bioroebe-project and simple feedback in the long run.

#
'[email protected]'
REGEX_FOR_N_GLYCOSYLATION_PATTERN =
#

REGEX_FOR_N_GLYCOSYLATION_PATTERN

See rubular at:

https://rubular.com/r/D95Cq7oR5x
#
/(?=(N[^P][ST][^P]))/
REGEX_PROSITE_FOR_ANY_AMINOACID =
#

REGEX_PROSITE_FOR_ANY_AMINOACID

#
/x\((\d+)\)/
STOP_CODONS =
#

Bioroebe::STOP_CODONS

The STOP codons that can be found in Humans, in RNA format.

#
%w(
  UAA UAG UGA
)
RNA_NUCLEOTIDES =
#

Bioroebe::RNA_NUCLEOTIDES

This will refer to an Array including all four RNA nucleotides, that is A, U, G and C.

#
%w( A U G C )
ALLOWED_RNA_NUCLEOTIDES =

ALLOWED_RNA_NUCLEOTIDES

RNA_NUCLEOTIDES
POSSIBLE_RNA_NUCLEOTIDES =
#

Bioroebe::POSSIBLE_RNA_NUCLEOTIDES

This is a bit different to RNA_NUCLEOTIDES in that N is also a part of it. It is not entirely clear whether this array here is kept, though.

#
%w(
  A U G C N
)
DNA_NUCLEOTIDES =
#

Bioroebe::DNA_NUCLEOTIDES

This is the variant without N.

#
%w( A T G C )
HASH_DNA_NUCLEOTIDES =
#

Bioroebe::HASH_DNA_NUCLEOTIDES

Since as of 20.04.2014, Uracil is also part of this Hash. While this is, strictly speaking, not absolutely correct, it does simplify some downstream code. However had, this may possibly be re-evaluated in the future.

This Hash may be helpful when the user wishes to find a complement to a nucleotide. There is a method that does the same, but this Hash should be faster than a method call, so use it in particular if you need to focus more on speed.

#
{
  'A' => 'T',
  'T' => 'A',
  'G' => 'C',
  'C' => 'G',
  'U' => 'A'
}
POSSIBLE_DNA_NUCLEOTIDES =
#

POSSIBLE_DNA_NUCLEOTIDES

This constant will keep all possible DNA nucleotides.

N is also a valid entry, ‘Yarrowia_lipolytica_genome.fa’ includes it. However had,

Only these sequences are allowed in DNA.

To scope to this, do:

Bioroebe::POSSIBLE_DNA_NUCLEOTIDES
#
%w(
  A T G C N
)
ARRAY_VALID_DNA_SEQUENCES =

ARRAY_VALID_DNA_SEQUENCES

POSSIBLE_DNA_NUCLEOTIDES
FTP_NCBI_TAXONOMY_DATABASE =
#

FTP_NCBI_TAXONOMY_DATABASE

This constant refers to the taxonomy-database from NCBI. This is the file that can be downloaded from the NCBI homepage (actually, the ftp-listing).

Take note that this database, in .tar.gz format, is about 50 MB in size or even larger these days. So only download it if you really need it locally.

#
'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
URL_TO_TAXONOMY_ARCHIVE =
#

URL_TO_TAXONOMY_ARCHIVE

An “alias” to the above ^^^ constant.

#
FTP_NCBI_TAXONOMY_DATABASE
NCBI_NUCCORE =
#

NCBI_NUCCORE

#
'https://www.ncbi.nlm.nih.gov/nuccore/'
NCBI_GENE =
#

NCBI_GENE

#
'https://www.ncbi.nlm.nih.gov/gene/'
FILE_HYDROPATHY_TABLE =
#

FILE_HYDROPATHY_TABLE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}hydropathy_table.yml"
FILE_NUCLEAR_LOCALIZATION_SEQUENCES =
#

FILE_NUCLEAR_LOCALIZATION_SEQUENCES

#
"#{project_yaml_directory?}nuclear_localization_sequences.yml"
FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS =
#

FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS

#
"#{project_yaml_directory?}configuration/default_colours_for_the_aminoacids.yml"
FILE_BROWSER =
#

FILE_BROWSER

#
"#{project_yaml_directory?}configuration/browser.yml"
FILE_AMINOACIDS_MOLECULAR_FORMULA =
#

FILE_AMINOACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINOACIDS_THREE_TO_ONE =
#

FILE_AMINOACIDS_THREE_TO_ONE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_WEIGHT_OF_COMMON_PROTEINS =
#

FILE_WEIGHT_OF_COMMON_PROTEINS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}weight_of_common_proteins.yml"
FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
#

FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_long_name_to_one_letter.yml"
FILE_AMINO_ACIDS_MOLECULAR_FORMULA =
#

FILE_AMINO_ACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINO_ACIDS_MASS_TABLE =
#

FILE_AMINO_ACIDS_MASS_TABLE

bl $BIOROEBE_YAML/aminoacids/amino_acids_monoisotopic_mass_table.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_monoisotopic_mass_table.yml"
FILE_AMINO_ACIDS =
#

FILE_AMINO_ACIDS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids.yml"
FILE_AMINO_ACIDS_ABBREVIATIONS =
#

FILE_AMINO_ACIDS_ABBREVIATIONS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_abbreviations.yml"
DIRECTORY_CODON_TABLES_FREQUENCIES =
#

DIRECTORY_CODON_TABLES_FREQUENCIES

This constant may point to a directory such as:

/home/Programs/Ruby/2.7.0/lib/ruby/site_ruby/2.7.0/bioroebe/codon_tables/frequencies/
#
"#{CODON_TABLES_DIRECTORY}frequencies/"
FILE_NUCLEOTIDES =
#

FILE_NUCLEOTIDES

#
"#{project_yaml_directory?}nucleotides/nucleotides.yml"
NUCLEOTIDES =
nil
FILE_GFP_SEQUENCE =
#

FILE_GFP_SEQUENCE

#
"#{project_yaml_directory?}sequences/"\
"JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta"
FILE_RESTRICTION_ENZYMES =
#

FILE_RESTRICTION_ENZYMES

bl $BIOROEBE/yaml/restriction/enzymes/restriction_enzymes.yml

#
"#{project_yaml_directory?}restriction_enzymes/restriction_enzymes.yml"
FILE_COLOURIZE_FASTA_SEQUENCES =
#

FILE_COLOURIZE_FASTA_SEQUENCES

This constants points to the .yml file that will hold information in how to colourize the FASTA sequences.

#
"#{project_yaml_directory?}configuration/colourize_fasta_sequences.yml"
FILE_BLOSUM45 =
#

FILE_BLOSUM45

#
"#{BLOSUM_DIRECTORY}/blosum45.yml"
FILE_BLOSUM50 =
#

FILE_BLOSUM50

#
"#{BLOSUM_DIRECTORY}/blosum50.yml"
FILE_BLOSUM62 =
#

FILE_BLOSUM62

#
"#{BLOSUM_DIRECTORY}/blosum62.yml"
FILE_BLOSUM80 =
#

FILE_BLOSUM80

#
"#{BLOSUM_DIRECTORY}/blosum80.yml"
FILE_BLOSUM90 =
#

FILE_BLOSUM90

#
"#{BLOSUM_DIRECTORY}/blosum90.yml"
FILE_BLOSUM_MATRIX =
#

BLOSUM_MATRIX

#
"#{BLOSUM_DIRECTORY}blosum_matrix.yml"
HYDROPATHY_TABLE =
YAML.load_file(
  FILE_HYDROPATHY_TABLE
)
FILE_CHROMOSOME_NUMBERS =
#

FILE_CHROMOSOME_NUMBERS

#
"#{project_yaml_directory?}chromosomes/chromosome_numbers.yml"
FILE_AMINO_ACIDS_FREQUENCY =
#

FILE_AMINO_ACIDS_FREQUENCY

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
FILE_AMINO_ACIDS_RESTE_YAML =
#

FILE_AMINO_ACIDS_RESTE_YAML

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_reste.yml"
FILE_AMINO_ACIDS_THREE_TO_ONE =
#

FILE_AMINO_ACIDS_THREE_TO_ONE

We’ll keep the keys downcased.

bl $RUBY_SRC/bioroebe/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE =
#

FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE

This will point to the file amino_acids_average_mass_table.yml.

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_average_mass_table.yml"
FILE_NUCLEOTIDES_WEIGHT =
#

FILE_NUCLEOTIDES_WEIGHT

The path to the file that holds the weight of the nucleotides.

#
"#{project_yaml_directory?}nucleotides/nucleotides_weight.yml"
UNICODE_HORIZONTAL_BAR =
#

UNICODE_HORIZONTAL_BAR

#
''
AMINO_ACIDS_MOLECULAR_FORMULA =
YAML.load_file(
  FILE_AMINO_ACIDS_MOLECULAR_FORMULA
)
AMINO_ACIDS_RESTE =
{}
AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
YAML.load_file(_)
AMINO_ACIDS_MASS_TABLE =

Else hardcode the AminoAcid table here. This may no longer be necessary, though.

{
  'A' =>  71.03711, 'C' => 103.00919, 'D' => 115.02694,
  'E' => 129.04259, 'F' => 147.06841, 'G' =>  57.02146,
  'H' => 137.05891, 'I' => 113.08406, 'K' => 128.09496,
  'L' => 113.08406, 'M' => 131.04049, 'N' => 114.04293,
  'P' =>  97.05276, 'Q' => 128.05858, 'R' => 156.10111,
  'S' =>  87.03203, 'T' => 101.04768, 'V' =>  99.06841,
  'W' => 186.07931, 'Y' => 163.06333
}
AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE =

An alias.

AMINO_ACIDS_MASS_TABLE
AMINO_ACIDS =
#

Bioroebe::AMINO_ACIDS

Currently listing 21 AminoAcids from amino_acids.yml

bl $BIOROEBE/yaml/aminoacids/amino_acids.yml
#
YAML.load_file(
  FILE_AMINO_ACIDS
)
FILE_AMINO_ACIDS_ENGLISH =
#

::Bioroebe::AMINO_ACIDS_ENGLISH

#
YAML.load_file("#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_english.yml")
AMINO_ACIDS_ENGLISH =

AMINO_ACIDS_ENGLISH

FILE_AMINO_ACIDS_ENGLISH
AMINO_ACIDS_AVERAGE_MASS_TABLE =

Else simply hardcode the AminoAcid table here.

{
  'A' =>  71.0788,
  'C' => 103.1388,
  'D' => 115.0886,
  'E' => 129.1155,
  'F' => 147.1766,
  'G' =>  57.0519,
  'H' => 137.1411,
  'I' => 113.1594,
  'K' => 128.1741,
  'L' => 113.1594,
  'M' => 131.1926,
  'N' => 114.1038,
  'P' =>  97.1167,
  'Q' => 128.1307,
  'R' => 156.1875,
  'S' =>  87.0782,
  'T' => 101.1051,
  'V' =>  99.1326,
  'W' => 186.2132,
  'Y' => 163.1760
}
AMINO_ACIDS_THREE_TO_ONE =
hash
NUCLEAR_LOCALIZATION_SEQUENCES =
''
ARRAY_NLS_SEQUENCES =
[]
LOCAL_DIRECTORY_FOR_UNIPROT =
#

LOCAL_DIRECTORY_FOR_UNIPROT

This denotes the directory for uniprot-files.

#
"#{log_directory?}uniprot/"
AUTOGENERATED_SQL_FILES_DIR =
#

Bioroebe::AUTOGENERATED_SQL_FILES_DIR

#
"#{log_directory?}autogenerated_sql_files/"
USE_THIS_BROWSER =

opera # Hardcoded value in this case..

'firefox'
ProteinToDNA =
#

Bioroebe::ProteinToDNA

Use an “alias” to the other name.

#
ConvertAminoacidToDNA
Fasta =

Add an “alias” constant to class ParseFasta.

ParseFasta

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.[](i = nil) ⇒ Object

#

Bioroebe[]

Assign a sequence through the [] method.

Note that some aliases are allowed to this way; see the variants that use self.instance_eval below this method definition.

This method here could be compared to methods such as Integer(). Biopython uses something similar, by the way.

For instance, you can do this too:

Bioroebe << 'ATT'
x = Bioroebe['ATT']
x = Bioroebe << 'ATT'
#


685
686
687
# File 'lib/bioroebe/sequence/sequence.rb', line 685

def self.[](i = nil)
  Bioroebe::Sequence.new(i)
end

.ad_hoc_task(this_file = '/root/Bioroebe/table_ids.md') ⇒ Object

#

Bioroebe.ad_hoc_task

This method can be used to specifically run an “ad-hoc” task.

An ad-hoc task is something that we just quickly “hack” together, in order to solve some existing bioinformatics-related problem or another problem that may exist right now.

For instance, in May 2021, this was used for a university course that required us to work with MEGA X and compare different proteins from a phylogenetics point of view.

#


4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4074

def self.ad_hoc_task(
    this_file = '/root/Bioroebe/table_ids.md'
  )
  require 'bioroebe/fasta_and_fastq/download_fasta.rb'
  require 'bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb'
  if this_file.is_a? Array
    this_file = this_file.join(' ')
  end
  cd ::Bioroebe.log_dir? # Make sure we are in the log-directory.
  e 'Now downloading some FASTA files, based on this file: '+
    this_file
  # ======================================================================= #
  # (1) Download the remote FASTA dataset
  # ======================================================================= #
  download_fasta this_file
  # ======================================================================= #
  # (2) cd into the fasta directory
  # ======================================================================= #
  cd ::Bioroebe.log_dir?+'fasta/'
  # ======================================================================= #
  # (3) batch rename all .fasta files next via simplify-fasta-header.
  # ======================================================================= #
  all_files = Dir['*.fasta']
  all_files.each {|this_fasta_file|
    Bioroebe.overwrite_fasta_header(this_fasta_file)
  }
end

.align_this_string_via_multiple_sequence_alignment(this_string = "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------ ") ⇒ Object

#

Bioroebe.align_this_string_via_multiple_sequence_alignment

This method will simply return an Array.

#


957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 957

def self.align_this_string_via_multiple_sequence_alignment(
    this_string =
      "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS
       PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------
      "
  )
  if this_string.is_a? Array
    this_string = this_string.join("\n")
  end
  this_string = this_string.dup if this_string.frozen?
  this_string.strip!
  this_string.delete!(' ')
  splitted = this_string.split("\n")
  return splitted
end

.all_aminoacids?Boolean

#

Bioroebe.all_aminoacids?

This method will return all available aminoacids.

Example:

Bioroebe.all_aminoacids? # => ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#

Returns:

  • (Boolean)


162
163
164
# File 'lib/bioroebe/constants/constants.rb', line 162

def self.all_aminoacids?
  ARRAY_AMINO_ACIDS_ALPHABET
end

.allowed_dna_nucleotides?Boolean

#

Bioroebe.allowed_dna_nucleotides?

This will return an Array with valid DNA nucleotides.

#

Returns:

  • (Boolean)


522
523
524
# File 'lib/bioroebe/constants/constants.rb', line 522

def self.allowed_dna_nucleotides?
  POSSIBLE_DNA_NUCLEOTIDES - ['N']
end

.amino_acid_average_mass(i) ⇒ Object

#

Bioroebe.amino_acid_average_mass

The input to this method should be in the form of the one-letter code for aminoacids. Several aminoacids can be input, of course, such as ‘AGL’.

Do note that since as of March 2020 a float will be returned by this method, if the input was found to be a valid aminoacid.

Usage example:

Bioroebe.amino_acid_average_mass('F') # => "147.17660"
#


2008
2009
2010
2011
2012
2013
2014
2015
2016
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2008

def self.amino_acid_average_mass(i)
  use_this_table = AMINO_ACIDS_AVERAGE_MASS_TABLE
  i = i.split(//) if i.is_a? String
  i = [i] unless i.is_a? Array
  result = i.map {|entry|
    entry = use_this_table[entry].to_f
  }
  return ('%.5f' % result.sum).to_f # ← This is our properly formatted result.
end

.amino_acid_monoisotopic_mass(this_aminoacid) ⇒ Object

#

Bioroebe.amino_acid_monoisotopic_mass

We require the monoisotopic table for this method, and return the corresponding match to the given aminoacid.

The input format should be in the one-letter aminoacid abbreviation.

Invocation example:

Bioroebe.amino_acid_monoisotopic_mass 'L' # => 113.08406
Bioroebe.amino_acid_monoisotopic_mass 'K' # => 128.09496
#


2032
2033
2034
2035
2036
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2032

def self.amino_acid_monoisotopic_mass(this_aminoacid)
  use_this_table = AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE
  # '%.5f' % use_this_table[this_aminoacid].to_f
  use_this_table[this_aminoacid].to_f
end

.aminoacid_families?Boolean

#

Bioroebe.aminoacid_families?

Feedback which aminoacid-families we know of.

Usage example:

pp Bioroebe.aminoacid_families?; ''
#

Returns:

  • (Boolean)


223
224
225
# File 'lib/bioroebe/constants/constants.rb', line 223

def self.aminoacid_families?
  AMINOACID_FAMILIES
end

.aminoacid_frequency(of_this_sequence = '') ⇒ Object

#

Bioroebe.aminoacid_frequency

Usage example:

Bioroebe.aminoacid_frequency('MVTDEGAIYFTKDAARNWKAAVEETVSATLNRTVSSGITGASYYTGTFST')

Would yield the following Hash:

{"M"=>1, "V"=>4, "T"=>9, "D"=>2, "E"=>3, "G"=>4, "A"=>7, "I"=>2, "Y"=>3, "F"=>2, "K"=>2, "R"=>2, "N"=>2, "W"=>1, "S"=>5, "L"=>1}
#


2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2538

def self.aminoacid_frequency(
    of_this_sequence = ''
  )
  if of_this_sequence.is_a? Array
    of_this_sequence = of_this_sequence.first 
  end
  chars = of_this_sequence.split(//)
  hash = {}
  hash.default = 0
  chars.each {|this_char| hash[this_char] += 1 }
  return hash
end

.aminoacid_substitution(from_this_sequence = :default) ⇒ Object

#

Bioroebe.aminoacid_substitution

#


102
103
104
# File 'lib/bioroebe/aminoacids/aminoacid_substitution.rb', line 102

def self.aminoacid_substitution(from_this_sequence = :default)
  Bioroebe::AminoacidSubstitution.new(from_this_sequence)
end

.aminoacids?Boolean

#

Bioroebe.aminoacids?

Note that this will return a Hash that looks like this:

{"A"=>{"ala"=>"alanine", "d
#

Returns:

  • (Boolean)


995
996
997
# File 'lib/bioroebe/constants/constants.rb', line 995

def self.aminoacids?
  AMINO_ACIDS
end

.append_what_into(what = 'Hello world!', into = 'test.md') ⇒ Object

#

Bioroebe.append_what_into

This method can be used to append content onto a file.

#


1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1227

def self.append_what_into(
    what = 'Hello world!',
    into = 'test.md'
  )
  unless File.exist? into
    base_dir = File.dirname(into)
    unless File.directory? base_dir
      e rev+
      'No directory exists at '+sdir(base_dir)+
      rev+'. Thus creating it now.'
      create_directory(base_dir)
    end
    e rev+
      'No file exists at '+sfile(into)+rev+
      '. Thus creating it now.'
    create_file(into)
  end
  File.open(into, 'a') { |file|
    file << what
  }
end

.array_colourize_this_aminoacidObject

#

Bioroebe.array_colourize_this_aminoacid

Query as to which aminoacid we will colourize, if any at all.

#


1291
1292
1293
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1291

def self.array_colourize_this_aminoacid
  @array_colourize_this_aminoacid
end

.atomic_composition(of = 'GGGGA') ⇒ Object

#

Bioroebe.atomic_composition

This method will return the composition of atoms in a given protein, via Hash, such as:

{"C"=>11, "H"=>19, "N"=>5, "O"=>6, "S"=>0}

The Hash keeps track of 11 C atoms, 19 H atoms, 5 N atoms, 6 O atoms and 0 S atoms.

This hash can then be formatted via the method:

Bioroebe.show_atomic_composition()

Which can be found below.

Presently this method works on aminoacids only, but in theory the code could be extended to work with DNA nucleotides and RNA nucleotides as well.

Either way, the one letter abbreviation should be used as input to this method.

When we use aminoacids, we need to remember that a peptide bond deducts 1x H₂O (water). This will have to be deducted from the formula, but only if it is an internal aminoacid. In other words, the only two aminoacids that will behave differently, are the first one (since it will miss one -OH group) and the last aminoacid (as this one will lack a -H molecule.

Remember that the input sequence to this method should be the one-letter code for the aminoacid sequence at hand.

#


2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2145

def self.atomic_composition(
    of = 'GGGGA' # ← This should be the aminoacid sequence.
  )
  begin
    require 'chemistry_paradise/split_molecule_names.rb'
    require 'chemistry_paradise/toplevel_methods/remove_this_molecule_from.rb'
  rescue LoadError
    if is_on_roebe?
      puts 'Two files from the chemistry_paradise gem are not available.'
    end
  end
  # ======================================================================= #
  # Load up the molecular formula for each aminoacid next. This will
  # be used as our reference-point for calculating things such as the
  # composition, or weight.
  # ======================================================================= #
  dataset_molecular_formula_for_the_aminoacids = YAML.load_file(
    FILE_AMINOACIDS_MOLECULAR_FORMULA
  )
  if of.is_a?(Array)
    if of.empty?
      of = 'GGGGA' # In this case reinstate the default.
    else
      if of.first.is_a?(String) and of.first.size > 1
        of = of.first.split(//) # Split it on a per-character basis here.
      end
    end
  end
  if of.is_a? String
    of = of.split(//)
  end
  unless of.is_a? Array
    of = [of]
  end
  hash_keeping_track_of_the_atomic_composition = {}
  # ======================================================================= #
  # Build up the default values, for the atoms C, H, N, O and S.
  # ======================================================================= #
  hash_keeping_track_of_the_atomic_composition['C'] = 0
  hash_keeping_track_of_the_atomic_composition['H'] = 0
  hash_keeping_track_of_the_atomic_composition['N'] = 0
  hash_keeping_track_of_the_atomic_composition['O'] = 0
  hash_keeping_track_of_the_atomic_composition['S'] = 0
  # ======================================================================= #
  # Next obtain the formula from the ChemistryParadise project. We
  # do so by iterating over the given input, and we assume that
  # this input is always an Array.
  # ======================================================================= #
  of.map.with_index {|this_amino_acid, position_of_that_aminoacid|
    # ===================================================================== #
    # Next, we have to obtain the formula for this amino acid.
    # ===================================================================== #
    this_amino_acid = AMINO_ACIDS_ENGLISH[this_amino_acid]
    formula_for_this_amino_acid = dataset_molecular_formula_for_the_aminoacids[this_amino_acid]
    # ===================================================================== #
    # The next case-menu will handle the position of the aminoacid at hand.
    # We will skip doing so if there is only one aminoacid though.
    # ===================================================================== #
    if of.first.to_s.size > 1
      case position_of_that_aminoacid # case tag
      when 0 # This is the first aminoacid. It loses only one 'OH' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('OH', formula_for_this_amino_acid)
      when (of.size - 1) # This is the last entry. It loses only one 'H' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H', formula_for_this_amino_acid)
      else
        # ================================================================= #
        # Else it will lose a full H₂O group.
        # ================================================================= #
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H2O', formula_for_this_amino_acid)
      end
    end
    array_chemical_formula = ::ChemistryParadise.split_this_molecular_formula_into_a_hash(
      formula_for_this_amino_acid
    )
    array_chemical_formula.each {|molecule_and_number| # e. g. 'H13'
      if molecule_and_number =~ /\d+/ # If it has at the least one number.
        molecule_and_number =~ /([A-Z]+)(\d{1,2})/ # See: https://rubular.com/r/nCojEDcY6g
        molecule = $1.to_s.dup
        n_times  = $2.to_s.dup.to_i
        hash_keeping_track_of_the_atomic_composition[molecule] += n_times
      else # else it must be 1, since there is no other number, such as 'N'.
        hash_keeping_track_of_the_atomic_composition[molecule_and_number] += 1
      end
    }
  }
  return hash_keeping_track_of_the_atomic_composition
end

.automatically_rename_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.automatically_rename_this_fasta_file

This method will automatically (try to) rename an existing fasta file, by tapping into the method called .return_new_filename_based_on_fasta_identifier().

#


135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 135

def self.automatically_rename_this_fasta_file(fasta_file)
  fasta_file = [fasta_file].flatten.compact
  fasta_file.each {|this_fasta_file|
    if File.exist? this_fasta_file
      new_filename = return_new_filename_based_on_fasta_identifier(this_fasta_file)
      erev "Renaming #{sfile(this_fasta_file)}#{rev} "\
           "to #{sfile(new_filename)} #{rev}next."
      Bioroebe.rename(this_fasta_file, new_filename)
    else
      no_file_exists_at(this_fasta_file)
    end
  }
end

.available_blosum_matrices?Boolean

#

Bioroebe.available_blosum_matrices?

This method will return an Array of all available blosum matrices.

Example output:

["blosum45", "blosum50", "blosum62", "blosum80", "blosum90", "blosum_matrix"]
#

Returns:

  • (Boolean)


78
79
80
81
82
# File 'lib/bioroebe/blosum/blosum.rb', line 78

def self.available_blosum_matrices?
  Bioroebe::Blosum.available_blosum_files?.map {|entry|
    File.basename(entry).delete_suffix('.yml')
  }
end

.available_codon_tables?Boolean

#

Bioroebe.available_codon_tables?

#

Returns:

  • (Boolean)


125
126
127
# File 'lib/bioroebe/codons/show_codon_tables.rb', line 125

def self.available_codon_tables?
  ::Bioroebe::CodonTables.definitions?.values # Do not sort this.
end

.base_composition(i = '52%GC') ⇒ Object

#

Bioroebe.base_composition

This method can be used to query the composition of a given DNA sequence, that is, in percentage, the values for A, T, C and G.

This method will then return a Hash, consisting of the percentage values of A, T, C and G in the given DNA sequence at hand.

Note that the input to this method has to include a ‘%’ character, at the least up until March 2020. Past March 2020 this requirement was dropped, but I still think it is visually more elegant to include a ‘%’ character.

#


3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3931

def self.base_composition(
    i = '52%GC'
  )
  if i.is_a? Array
    if i.empty?
      i = '52%GC' # Default value.
    else
      i = i.join(' ').strip
    end
  end
  # ======================================================================= #
  # Add support for Files here.
  # ======================================================================= #
  if i and File.exist?(i)
    i = File.readlines(i).reject {|line| line.start_with? '>' }.join("\n").delete("\n")
  end
  # ======================================================================= #
  # We must use a Hash for this.
  # ======================================================================= #
  hash = {
    'A' => 0,
    'T' => 0,
    'C' => 0,
    'G' => 0,
  }
  if i.include? '%'
    splitted = i.split('%').map(&:strip)
    frequency = splitted.first.to_i
    opposite_frequency = 100 - frequency
    characters = splitted.last.split(//)
    characters.each {|this_nucleotide|
      hash[this_nucleotide] = frequency / 2
    }
    # ===================================================================== #
    # Next calculate the missing nucleotides:
    # ===================================================================== #
    missing_nucleotides = hash.select {|key, value|
      value == 0
    }
    missing_nucleotides.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = opposite_frequency / 2
    }
  else
    frequency = Hash.new(0)
    chars = i.chars
    chars.each { |entry| frequency[entry] += 1 }
    sum = frequency.values.sum
    frequency.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = ((value * 100.0) / sum).round(2)
    }
  end
  return hash
end

.batch_create_windows_executablesObject

#

Bioroebe.batch_create_windows_executables

This method is only useful for windows. We will use “ocra” to create various .exe files that have the desired widgt-functionality.

Note that the functionality depends on the roebe-gem.

#


2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2860

def self.batch_create_windows_executables
  begin
    require 'roebe/custom_methods/module.rb'
  rescue LoadError; end
  array_these_files =  %w(
    /home/x/programming/ruby/src/bioroebe/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb
  )
  array_these_files.each {|this_file|
    Roebe.ocra_build(
      this_file,
      File.basename(this_file).delete_suffix('.rb')
    )
  }
end

.be_verbose?Boolean

#

Bioroebe.be_verbose?

#

Returns:

  • (Boolean)


164
165
166
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 164

def self.be_verbose?
  @be_verbose
end

.bisulfite_treatment(i) ⇒ Object

#

Bioroebe.bisulfite_treatment

Simply convert all C into U. The underlying idea here is that bilsufite will convert unmethylated Cytosines into Uracil.

Usage example:

Bioroebe.bisulfite_treatment('CCCGCAATGCATACCTCGCCG') # => "UUUGUAATGUATAUUTUGUUG"
#


2845
2846
2847
2848
2849
2850
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2845

def self.bisulfite_treatment(i)
  if i.is_a? Array
    i = i.join('').strip
  end
  i.tr('C','U')
end

.blast_neighborhood(this_mer = 'CTC', optional_apply_filter_for_score_higher_than = nil) ⇒ Object

#

Bioroebe.blast_neighborhood

The second argument to this method is a score-filter, e. g. to select only entries that have a score higher than 1.

#


4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4271

def self.blast_neighborhood(
    this_mer                                    = 'CTC',
    optional_apply_filter_for_score_higher_than = nil
  )
  if this_mer.is_a? Array
    this_mer = this_mer.first
  end
  if this_mer.nil?
    this_mer = 'CTC' # Set the same default as above.
  end
  match_score     =  2
  mis_match_score = -2
  # ======================================================================= #
  # Next use an Array of sequences that we will compare.
  # ======================================================================= #
  compare_these_sequences = %w(
    AAA
    AAT
    AAC
    AAG
    ATA
    ATT
    ATC
    ATG
    ACA
    ACT
    ACC
    ACG
    AGA
    AGT
    AGC
    AGG
    TAA
    TAT
    TAC
    TAG
    TTA
    TTT
    TTC
    TTG
    TCA
    TCT
    TCC
    TCG
    TGA
    TGT
    TGC
    TGG
    CAA
    CAT
    CAC
    CAG
    CTA
    CTT
    CTC
    CTG
    CCA
    CCT
    CCC
    CCG
    CGA
    CGT
    CGC
    CGG
    GAA
    GAT
    GAC
    GAG
    GTA
    GTT
    GTC
    GTG
    GCA
    GCT
    GCC
    GCG
    GGA
    GGT
    GGC
    GGG
  )
  compare_these_sequences.each {|this_sequence|
    score = 0
    chars = this_sequence.chars
    first_char  = chars[0]
    second_char = chars[1]
    third_char  = chars[2]
    if first_char == this_mer[0]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if second_char == this_mer[1]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if third_char == this_mer[2]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if optional_apply_filter_for_score_higher_than
       if (score.to_i > optional_apply_filter_for_score_higher_than)
        e "#{this_sequence}: score of "\
          "#{score.to_s.rjust(3)}"
       end
    else
      e this_sequence+': score of '+
        score.to_s.rjust(3)
    end
  }
end

.blosum_directory?Boolean

#

Bioroebe.blosum_directory?

#

Returns:

  • (Boolean)


899
900
901
# File 'lib/bioroebe/constants/constants.rb', line 899

def self.blosum_directory?
  "#{project_yaml_directory?}blosum/"
end

.blosum_matrix(i = FILE_BLOSUM_MATRIX) ⇒ Object

#

Bioroebe.blosum_matrix

#


801
802
803
# File 'lib/bioroebe/constants/constants.rb', line 801

def self.blosum_matrix(i = FILE_BLOSUM_MATRIX)
  YAML.load_file(i)
end

.calculate_exponential_growth(number_of_cells = 10, number_of_divisions = 10) ⇒ Object

#

Bioroebe.calculate_exponential_growth

This method can be used to calculate how many bacteria will exist after n cell divisions (provided that we know, and supply to this method, how many bacteria existed when we started our calculation).

#


4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4774

def self.calculate_exponential_growth(
    number_of_cells     = 10,
    number_of_divisions = 10
  )
  if number_of_cells.nil?
    number_of_cells = 10 # Default value.
  end
  if number_of_divisions.nil?
    number_of_divisions = 10 # Default value.
  end
  # ======================================================================= #
  # === Hashes
  #
  # Handle Hash as input given.
  # ======================================================================= #
  if number_of_cells.is_a? Hash
    if number_of_cells.has_key? :n_divisions
      number_of_divisions = number_of_cells.delete(:n_divisions)
    end
    if number_of_cells.has_key? :number_of_cells
      number_of_cells = number_of_cells.delete(:number_of_cells)
    elsif number_of_cells.has_key? :n_cells
      number_of_cells = number_of_cells.delete(:n_cells)
    end
  end
  # ======================================================================= #
  # We need numbers, aka integers - there are no "1.3" cells.
  # ======================================================================= #
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  total_amount_of_cells = 0
  total_amount_of_cells = number_of_cells * (2 ** number_of_divisions)
  return total_amount_of_cells
end

.calculate_levensthein_distance(string1 = 'TTACCC', string2 = 'TTTCCC', be_verbose = true) ⇒ Object

#

Bioroebe.calculate_levensthein_distance

The following method is based on

http://rosettacode.org/wiki/Levenshtein_distance#Ruby, slightly modified.

To test this code, do:

[ ['kitten','sitting'], ['saturday','sunday'], ["rosettde", "raisethyrd"] ].each { |s,t|
  puts "calculate_levensthein_distance('#{s}', '#{t}') = #{Bioroebe.calculate_levensthein_distance(s, t)}"
}

However had, rubygems has a levensthein variant too.

#


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/bioroebe/calculate/calculate_levensthein_distance.rb', line 27

def self.calculate_levensthein_distance(
    string1    = 'TTACCC',
    string2    = 'TTTCCC',
    be_verbose = true
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if string1.is_a?(Array) and (string1.size > 1)
    string2 = string1.shift
    string1 = string1.first
  elsif string1.is_a?(String) and string1.include?(' ')
    splitted = string1.split(' ')
    string2  = splitted.last
    string1  = splitted.first
  end
  m = string1.length
  n = string2.length
  return m if n == 0 # Stop at 0.
  return n if m == 0 # Stop at 0.
  arrays = Array.new(m+1) { Array.new(n+1) }
  # ======================================================================= #
  # Initialize the variable arrays next:
  # ======================================================================= #
  (0 .. m).each {|i| arrays[i][0] = i}
  (0 .. n).each {|j| arrays[0][j] = j}
  # ======================================================================= #
  # Now, iterate through:
  # ======================================================================= #
  (1 .. n).each {|j|
    (1 .. m).each {|i|
      arrays[i][j] = 
        if string1[i-1] == string2[j-1] # adjust index into string
          arrays[i-1][j-1]       # no operation required
        else
           [ arrays[i-1][j]+1,   # deletion     operation
             arrays[i][j-1]+1,   # insertion    operation
             arrays[i-1][j-1]+1, # substitution operation
           ].min
        end
    }
  }
  result = arrays[m][n]
  if be_verbose
    e rev+'The two strings '+simp(string1.to_s)+rev+' and '+
      simp(string2.to_s)+rev+' have n differences ('+
      steelblue('edit distance')+rev+'):'
    e "  #{simp(result.to_s)}"
  end
  return result
end

.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i) ⇒ Object

#

Bioroebe.calculate_melting_temperature_for_more_than_thirteen_nucleotides

An alias exists for this method, called Bioroebe.melting_Temperature().

Usage example for the latter:

x = Bioroebe.melting_temperature('CCGTGTCGTACATCG')
#


269
270
271
# File 'lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb', line 269

def self.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i)
  ::Bioroebe::CalculateMeltingTemperatureForMoreThanThirteenNucleotides.new(i)
end

.calculate_n50_value(i = [ 1989, 1934, 1841, 1785, 1737, 1649, 1361, 926, 848, 723 ]) ⇒ Object

#

Bioroebe.calculate_n50_value

This method will calculate the N50 value of the given input. The input to this method should be a sorted Array.

#


3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3060

def self.calculate_n50_value(
    i = [
      1989, 1934, 1841,
      1785, 1737, 1649,
      1361,  926,  848,
       723
    ]
  )
  # ======================================================================= #
  # The following conversion is necessary because ARGV will contain only
  # String objects, not integer-values.
  # ======================================================================= #
  i.map! {|entry| entry.to_i }
  calculate_sum_for_the_loop = 0
  sum = i.sum
  half = sum / 2.0
  find_the_proper_contig = nil
  i.each {|this_number|
    calculate_sum_for_the_loop += this_number
    # ===================================================================== #
    # Compare the temporary sum with the half-sum.
    # ===================================================================== #
    if calculate_sum_for_the_loop > half
      find_the_proper_contig = this_number
      break
    end
  }
  return find_the_proper_contig
end

.calculate_original_amount_of_cells_of_exponential_growth(number_of_cells = 1600, number_of_divisions = 5) ⇒ Object

#

Bioroebe.calculate_original_amount_of_cells_of_exponential_growth

The first argument, number_of_cells, means “how many cells do we have now/currently”. This is necessary, in order to calculate how many cells we used to have initially.

#


4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4816

def self.calculate_original_amount_of_cells_of_exponential_growth(
    number_of_cells     = 1600, # 1600 cells to start with.
    number_of_divisions =    5  #    5 generations by default.
  )
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  initial_amount_of_cells_was = 0
  initial_amount_of_cells_was = number_of_cells / ( 2 ** number_of_divisions )
  return initial_amount_of_cells_was
end

.calculate_the_frequencies_of_this_species(i = :homo_sapiens) ⇒ Object

#

Bioroebe.calculate_the_frequencies_of_this_species

#


2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2477

def self.calculate_the_frequencies_of_this_species(
    i = :homo_sapiens
  )
  require 'bioroebe/sequence/dna.rb'
  require 'yaml'
  if i.nil?
    i = :default
  end
  if i and i.is_a?(Array) and i.empty?
    i << :homo_sapiens
  end
  hash_aminoacids = {}
  hash_aminoacids.default = 0
  if i.is_a? Array
    i = i.first
  end
  case i.to_sym
  # ======================================================================= #
  # === :homo_sapiens
  # ======================================================================= #
  when :homo_sapiens,
       :homo,
       :human,
       :default
    i = "#{project_base_directory?}"\
        "codon_tables/frequencies/9606_Homo_sapiens.yml"
  end
  hash = YAML.load_file(i)
  # "GAC"=>25.1
  hash.each_pair {|key, value|
    this_aminoacid = Bioroebe.to_aa(key)
    hash_aminoacids[this_aminoacid] += value
  }
  e
  # ======================================================================= #
  # Convert it into percent:
  # ======================================================================= #
  hash_aminoacids.each_pair {|key, value_for_percentage|
    value_for_percentage = ((value_for_percentage * 100.0) / 1000.0).round(3).to_s
    value_for_percentage = '%.2f' % value_for_percentage
    e '  '+
      steelblue(key).to_s+' '+
      royalblue(
        value_for_percentage.rjust(6)+'%'
      )
  }
  e
end

.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file

This method will return a Hash containing the weight of the aminoacids in a .fasta file.

Usage example:

x = Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file('viruses.fa')

This may yield a Hash such as the following:

{ "sp|P23046|NSP5_ROTBV"  => 21647.5341,
  "sp|Q81835|SHDAG_HDVU2" => 22030.6392,
  "sp|A5HBD7|ST_POVWU"    => 23433.3773,
  "sp|Q91FT8|234R_IIV6"   => 21076.778 }
#


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 42

def self.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file)
  if File.exist? fasta_file
    hash = {}
    results = Bioroebe.parse_fasta_quietly(fasta_file)
    short_headers = results.short_headers?
    sequences = results.sequences?
    short_headers.each_with_index {|entry, index|
      sum = 0
      this_sequence = sequences[index]
      # Next, convert this sequence into the corresponding mass.
      this_sequence.chars.each {|this_specific_aminoacid|
        sum += Bioroebe.weight_of_this_aminoacid?(this_specific_aminoacid)
      }
      hash[entry] = sum.round(4)
    }
    hash
  else
    e 'No file exists at '+fasta_file.to_s+'.'
  end
end

.can_base_pair_with?(a, b) ⇒ Boolean

#

Bioroebe.can_base_pair_with?

Usage example:

Bioroebe.can_base_pair_with?('A','T') # => true
Bioroebe.can_base_pair_with?('A','G') # => false
#

Returns:

  • (Boolean)


4916
4917
4918
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4916

def self.can_base_pair_with?(a, b)
  ::Bioroebe.partner_nucleotide(a) == b
end

.cat(i = nil) ⇒ Object

#

Bioroebe.cat (cat tag)

A variant of cat to use here.

#


5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 5012

def self.cat(
    i = nil
  )
  if i.is_a? Array
    i = i.first
  end
  if i
    i = convert_global_env(i) if i.include? '$'
    i = Dir['*'][i.to_i - 1] if i =~ /^\d+$/
  end
  if i.nil?
    erev 'Please provide an argument to Bioroebe.cat() (the name of a file)'
  # ======================================================================= #
  # === Handle directories next
  # ======================================================================= #
  elsif File.directory? i
    erev "We can not read from `#{sdir(i)}#{rev}` as it is a directory."
  # ======================================================================= #
  # Else the File will exist in this clause:
  # ======================================================================= #
  elsif File.exist?(i)
    _ = File.extname(i).delete('.')
    case _ # case tag
    # ===================================================================== #
    # === fasta
    # ===================================================================== #
    when 'fasta',
         'fa'
      e 'This is a fasta file, so rather than cat-ing the content,'
      e 'we will send this dataset to the ParseFasta class.'
      require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
      Bioroebe::ParseFasta.new(i)
    else # The default here.
      e "Now displaying the file `#{sfile(i)}`."
      # e File.read(i)
      # ^^^ Or we could use the above. We have to reconsider this one day.
      File.readlines(i).each {|line| e "  #{line.chomp}" } # With a bit of padding.
    end
  else # else the file does not exist.
    e "#{swarn('Trying to display the file `')}#{sfile(i)}#{swarn('`')}"
    e swarn('but it does not exist.')
  end
end

.change_directory(i = '$HOME', be_verbose = false) ⇒ Object

#

Bioroebe.change_directory

This method allows us to change the directory.

Bioroebe.cd() is an alias to the method here.

#


4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4210

def self.change_directory(
    i          = '$HOME',
    be_verbose = false
  )
  case be_verbose
  # ======================================================================= #
  # === :do_report_current_directory
  # ======================================================================= #
  when :do_report_current_directory,
       :be_verbose
    be_verbose = true
  end
  case i # Do some sanitizing here. (case tag)
  # ======================================================================= #
  # === :home_directory
  # ======================================================================= #
  when :home_directory,
       :default,
       nil # ← Nil is also assumed to refer to this :default value.
    # ===================================================================== #
    # In this case we will try to cd into the base-directory of the
    # Bioroebe shell.
    # ===================================================================== #
    i = log_dir?
  # ======================================================================= #
  # === :download_dir
  # ======================================================================= #
  when :download_dir,':download_dir',
       :download_directory,':download_directory'
    i = download_dir?
  # ======================================================================= #
  # Bioroebe.save_dir? is defined in bioroebe/toplevel_methods/store_here.rb.
  # ======================================================================= #
  when 'base',
       'logdir',
       :bioroebe_log_directory
    # ===================================================================== #
    # Enter the main log dir, unless a file exists with the same name.
    # ===================================================================== #
    i = save_dir? unless File.exist?(i.to_s) # .to_s to avoid Symbols here.
  end
  i = i.dup if i.is_a?(String) and i.frozen?
  i << '/' unless i.end_with? '/'
  if File.directory? i
    e sdir(i) if be_verbose # Also colourize the directory and output it.
    Dir.chdir(i)
  else
    if be_verbose
      erev "No directory called `#{sdir(i)}#{rev}` exists,"
      erev 'thus we can not cd to this target.'
    end
  end
end

.clear_array_colourize_this_aminoacidObject

#

Bioroebe.clear_array_colourize_this_aminoacid

#


1282
1283
1284
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1282

def self.clear_array_colourize_this_aminoacid
  @array_colourize_this_aminoacid = []
end

.clear_stop_codonsObject

#

Bioroebe.clear_stop_codons

#


256
257
258
# File 'lib/bioroebe/codons/codons.rb', line 256

def self.clear_stop_codons
  @stop_codons = []
end

.cleave(with = :with_trypsin, i = ARGV) ⇒ Object

#

Bioroebe.cleave (cleave tag)

This is the general entry-point for “cleave-related” activities, such as cleaving a polypeptide or a DNA strand via an enzyme.

#


56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 56

def self.cleave(
    with = :with_trypsin,
    i    = ARGV
  )
  case with
  # ======================================================================= #
  # === :with_trypsin
  # ======================================================================= #
  when :with_trypsin,
       :trypsin,
       :default
    Bioroebe.cleave_with_trypsin(i)
  else
    nil
  end
end

.cleave_with_trypsin(this_sequence = ARGV) ⇒ Object

#

Bioroebe.cleave_with_trypsin

Trypsin cleaves peptides on the C-terminal side of lysine and arginine amino acid residues. If a proline residue is on the carboxyl side of the cleavage site, the cleavage will not occur. If an acidic residue is on either side of the cleavage site, the rate of hydrolysis has been shown to be slower.

This method will return an Array.

#


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 21

def self.cleave_with_trypsin(
    this_sequence = ARGV
  )
  # ======================================================================= #
  # === Handle Arrays first
  # ======================================================================= #
  if this_sequence.is_a? Array
    this_sequence = this_sequence.first
  end
  array_cleave_positions = [] # This is the Array that will be returned.
  subrange = ''.dup
  this_sequence.size.times {|index|
    this_char = this_sequence[index, 1]
    case this_char # case tag
    when 'K','R'
      subrange << this_char
      next_char_is = this_sequence[index+1, 1]
      unless next_char_is == 'P' # Exclude Proline.
        array_cleave_positions << subrange
        subrange = ''.dup
      end
    else
      subrange << this_char
    end
  }
  array_cleave_positions << subrange
  return array_cleave_positions
end

.cliner(use_this_token = :default_token, how_many_times = 80, use_this_colour = nil) ⇒ Object

#

Bioroebe.cliner

The first character denotes which token we will use, such as ‘#’, for the line that is to be displayed.

#


2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2578

def self.cliner(
    use_this_token  = :default_token,
    how_many_times  = 80,
    use_this_colour = nil
  )
  require 'bioroebe/colours/colours.rb'
  if use_this_token.is_a? Hash
    # ===================================================================== #
    # === :length
    # ===================================================================== #
    if use_this_token.has_key? :length
      how_many_times = use_this_token.delete(:length)
    end
    if use_this_token.is_a? Hash
      # =================================================================== #
      # === :token
      # =================================================================== #
      if use_this_token.has_key? :token
        use_this_token = use_this_token.delete(:token)
      end
    end
    use_this_token = :default if use_this_token.is_a? Hash
  end
  # ======================================================================= #
  # The following case-when menu must come after the check for Hashes
  # above.
  # ======================================================================= #
  case use_this_token
  when :default_token, :default
    use_this_token = '='
  end
  # ======================================================================= #
  # === handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    if yielded.is_a?(Hash)
      # =================================================================== #
      # === :colour
      # =================================================================== #
      if yielded.has_key? :colour
        use_this_colour = yielded[:colour]  
      # =================================================================== #
      # === :colours
      # =================================================================== #
      elsif yielded.has_key? :colours
        use_this_colour = yielded[:colours]
      end
    #else
    #  cliner(use_this_token, how_many_times)
    end
  end
  if use_this_colour
    e ::Colours.send(use_this_colour, use_this_token * how_many_times)
  else
    e use_this_token * how_many_times
  end
end

.codon_frequencies_of_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.codon_frequencies_of_this_sequence

Usage example:

x = Bioroebe.codon_frequencies_of_this_sequence('ATGGGCGGGGTGATGGCAATGCCTTTAATGCCGCCAAAAAAAAAAAAAAAA')

Will yield this Hash:

{"AAA"=>5, "ATG"=>4, "CCA"=>1, "CCG"=>1, "TTA"=>1, "CCT"=>1, "GCA"=>1, "GTG"=>1, "GGG"=>1, "GGC"=>1}
#


198
199
200
# File 'lib/bioroebe/codons/show_codon_usage.rb', line 198

def self.codon_frequencies_of_this_sequence(i = ARGV)
  Bioroebe::ShowCodonUsage.new(i) { :be_quiet }.result?
end

.codon_frequency_of_this_string(i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\ 'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA', automatically_convert_into_a_RNA_sequence = true) ⇒ Object

#

Bioroebe.codon_frequency_of_this_string

The input to this method should ideally be a String. It will be assumed to be a RNA string, e. g. mRNA. Thus, all T are replaced with U by default. This can be toggled via the second argument of this method.

This method will return a Hash.

Usage example:

Bioroebe.codon_frequency_of_this_string
Bioroebe.codon_frequency_of_this_string 'ATTCGTACGATCGACTACTACT' # => {"UAC"=>2, "GAC"=>1, "AUC"=>1, "ACG"=>1, "CGU"=>1, "AUU"=>1}
#


917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 917

def self.codon_frequency_of_this_string(
    i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\
        'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA',
    automatically_convert_into_a_RNA_sequence = true
  )
  i = i.join if i.is_a? Array
  if automatically_convert_into_a_RNA_sequence
    i = i.dup if i.frozen?
    i.tr!('T','U')
  end
  scanned = i.scan(/.../)
  tally = scanned.tally
  # ======================================================================= #
  # We still have to sort it.
  # ======================================================================= #
  sorted_hash = Hash[tally.sort_by {|key, value| value }.reverse]
  return sorted_hash
end

.codon_table_dataset?Boolean

#

Bioroebe.codon_table_dataset?

This method will return the “codon table dataset”, as a Hash.

This Hash will contain entries like this:

{"TAA"=>"*", "TGA"=>'*',"CCA"=>"P", ...

and so forth.

#

Returns:

  • (Boolean)


39
40
41
# File 'lib/bioroebe/codons/codon_table.rb', line 39

def self.codon_table_dataset?
  @codon_table_dataset
end

.codon_table_in_use?Boolean

#

Bioroebe.codon_table_in_use?

Query method to return the currently used codon table.

#

Returns:

  • (Boolean)


83
84
85
# File 'lib/bioroebe/codons/codon_table.rb', line 83

def self.codon_table_in_use?
  @codon_table_in_use
end

.codon_tablesObject

#

Bioroebe.codon_tables

This method will return all codon tables that we have registered.

This is probably not so terribly useful for most projects, but in the event that you do need all codon tables, you can use this method.

The result will be a Hash having key->value pairs such as:

"9" => {"TAA"=>"*", "TAG"=>"*"
#


30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bioroebe/codons/codon_tables.rb', line 30

def self.codon_tables
  require 'bioroebe/requires/require_yaml.rb'
  hash = {}
  _ = "#{yaml_directory?}codon_tables/*.yml"
  all_files = Dir[_].sort
  all_files.each {|yaml_file|
    next if yaml_file.end_with? 'overview.yml' # We reject this one here.
    dataset = YAML.load_file(yaml_file)
    entry_number = File.basename(yaml_file).delete_suffix('.yml')
    dataset = { entry_number => dataset}
    hash.merge!(dataset)
  }
  hash
end

.codons_for_this_aminoacid?(i = ARGV) ⇒ Boolean

#

Bioroebe.codons_for_this_aminoacid?

This method will return all possible DNA codons for a specific aminoacid, as an Array.

So for example, for the aminoacid serine, this method would return an Array containing all 6 codons that code for this aminoacid (if the eukaryotic codon table is used, which also includes humans).

This method supports to query only ONE aminoacid at a given time.

Currently the method relies on the file called “codons_of_the_aminoacids.yml”. In the future, the method here will probably be changed to add support for different codon tables.

Specific invocation examples:

Bioroebe.codons_for?(:serine)
Bioroebe.codons_for?(:tyrosine)
Bioroebe.codons_for?(:threonine)
Bioroebe.codons_for?('T')

To test this for another organism, try:

Bioroebe.use_this_codon_table(:yeast_mitochondria)
Bioroebe.codons_for?('T')
Bioroebe.decode_this_aminoacid 'K' # => ["AAA", "AAG"]
#

Returns:

  • (Boolean)


322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/bioroebe/codons/codons.rb', line 322

def self.codons_for_this_aminoacid?(
    i = ARGV
  )
  # ======================================================================= #
  # First, convert the input a bit and sanitize it.
  # ======================================================================= #
  i = i.first if i.is_a? Array
  if i.is_a?(String) and i.start_with?(':')
    i = i.delete(':').to_sym
  end
  case i # case tag
  when :default,
       nil
    i = :lysine
  end
  if i.is_a? Symbol
    # ===================================================================== #
    # === Convert e. g. :serine into 'ser'
    # ===================================================================== #
    _ = i.to_s.downcase[0 .. 2]
    i = AMINO_ACIDS_THREE_TO_ONE[_]
  end
  # ======================================================================= #
  # Next we must use the one-letter abbreviation, and then find all
  # entries that match to the given input at hand.
  #
  # @codon_table_dataset is a Hash and will have these key->value
  # entries:
  #
  #   "TTC" => "F"
  #
  # ======================================================================= #
  result = @codon_table_dataset.select {|key, value|
    value == i
  }
  return result.keys
end

.colourize_aa(i, array_colourize_these_aminoacids = array_colourize_this_aminoacid? ) ⇒ Object

#

Bioroebe.colourize_aa

Use this method if you wish to colourize an aminoacid, in a red colour.

The input should be the specific aminoacid sequence in question that you wish to see being colourized here.

This currently only works for aminoacids, and only in red. Perhaps at a later time it will become more flexible, but for now, it will be exclusive for aminoacids alone.

Usage example:

puts Bioroebe.colourize_aa 'STGYGGCTR', 'S T Y'
#


1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1924

def self.colourize_aa(
    i,
    array_colourize_these_aminoacids = array_colourize_this_aminoacid?
  )
  if array_colourize_these_aminoacids.is_a? String
    array_colourize_these_aminoacids = array_colourize_these_aminoacids.split(' ') # Split it into an Array.
  end
  unless array_colourize_these_aminoacids.empty?
    if i.nil?
      puts 'You first have to assign a sequence.'
    else
      if i.chars.any? {|entry| array_colourize_these_aminoacids.include? entry }
        # =================================================================== #
        # Ok, we have established a need to colourize the result.
        # =================================================================== #
        array_colourize_these_aminoacids.each {|colour|
          i.gsub!(/(#{colour})/, swarn('\\1')+rev)
        }
      end
    end
  end if use_colours? # But only if we use colours.
  return i
end

.colourize_this_aminoacid_sequence_for_the_commandline(i) ⇒ Object

#

Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline

This method uses some hardcoded colour assignments to the 20 different aminoacids.

Usage example:

puts Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline('NLKRSPTHY')
#


1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1717

def self.colourize_this_aminoacid_sequence_for_the_commandline(i)
  if i.is_a? Array
    i = i.join
  end
  dataset = YAML.load_file(FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS)
  array_of_allowed_aminoacids = %w( A R N D B C E Q Z G H I L K M F P S T W Y V )
  _ = ''.dup
  splitted = i.chars
  splitted.each {|this_aminoacid|
    case this_aminoacid
    when *array_of_allowed_aminoacids
      this_aminoacid = send(dataset[this_aminoacid.to_s], this_aminoacid)
    # else # else it will not be colourized.
    end
    _ << this_aminoacid
  }
  return _
end

.colourize_this_fasta_dna_sequence(i = nil, &block) ⇒ Object

#

Bioroebe.colourize_this_fasta_dna_sequence

This toplevel method can be used to colourize a FASTA (DNA) sequence, e. g. “ATGCGCGTATTA” and so forth.

Note that this is intended for the commandline, that is to be displayed on e. g. a KDE Konsole terminal.

Usage examples:

puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA')
puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler }
puts Bioroebe.colourize_this_fasta_dna_sequence('TGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler_steelblue_colour }
#


232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 232

def self.colourize_this_fasta_dna_sequence(
    i = nil,
    &block
  )
  unless ::Bioroebe.respond_to?(:ruler_return_as_string_without_colours)
    require 'bioroebe/misc/ruler.rb'
  end
  if i.nil?
    e 'Please provide a valid FASTA sequence as input to '\
      'Bioroebe.colourize_this_fasta_dna_sequence()'
    return
  end
  if i.is_a? Array
    # ===================================================================== #
    # Arrays will be joined together.
    # ===================================================================== #
    i = i.join(' ').strip
  end
  # ======================================================================= #
  # Check for existing files next:
  # ======================================================================= #
  if i and File.file?(i)
    i = File.read(i)
  end
  original_input = i.dup
  i = i.dup # Always dup it here.
  if i.is_a? String
    # ===================================================================== #
    # The colours are either defined in a file called
    # 'colourize_fasta_sequences.yml' or they are simply hardcoded.
    #
    # The preferred (and thus default) way is to simply make use
    # of that .yml file. That works on my home system, so it
    # should work for other people as well.
    # ===================================================================== #
    if use_colours?
      this_file = FILE_COLOURIZE_FASTA_SEQUENCES
      if File.exist? this_file
        dataset_for_the_colours = YAML.load_file(this_file)
        dataset_for_the_colours.each_pair {|this_nucleotide, this_colour_to_be_used|
          i.gsub!(
            /#{this_nucleotide}/,
            Colours.send(this_colour_to_be_used, this_nucleotide)+
            rev
          )
        }
      else
        i.gsub!(/A/, "#{teal('A')}#{rev}")
        i.gsub!(/C/, "#{slateblue('C')}#{rev}")
        i.gsub!(/G/, "#{royalblue('G')}#{rev}")
        i.gsub!(/T/, "#{steelblue('T')}#{rev}")
        i.gsub!(/U/, "#{steelblue('U')}#{rev}") # Uracil is just the same as Thymine.
      end
    end
  end
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === with_ruler
    # ===================================================================== #
    when :with_ruler,
         :add_ruler,
         :ruler
      i.prepend(
        ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
        "\n"
      )
    else # Assume something like:
         #   :with_ruler_steelblue_colour
      if yielded.to_s.include? 'colo' # This assumes "colour" or "color".
        use_this_colour = yielded.to_s.sub(/_colou?r/,'').
                                       sub(/with_ruler_/,'')
        this_string = send(use_this_colour,
          ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
          "\n"
        )
        i.prepend(this_string)
      end
    end
  end
  return i
end

.colours(enable_or_disable = '+') ⇒ Object

#

Bioroebe.colours

This method can be used to quickly enable or disable colours, by passing ‘+’ or ‘-’.

#


131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/bioroebe/colours/colours.rb', line 131

def self.colours(
    enable_or_disable = '+'
  )
  case enable_or_disable.to_s
  when '+',
       'true'
    enable_colours
  when '-',
       'false',
       ''
    disable_colours
  end
end

.compacter(i = ARGV) ⇒ Object

#

Bioroebe.compacter

Note that this variant will NEVER ask for user-input of the Bioroebe::Compacter class.

#


243
244
245
246
247
# File 'lib/bioroebe/utility_scripts/compacter/compacter.rb', line 243

def self.compacter(
    i = ARGV
  )
  Bioroebe::Compacter.new(i) { :do_not_ask_for_user_input }
end

.complement(i = nil) ⇒ Object

#

Bioroebe.complement

This method will return the complementary DNA strand.

We will use possibilities though.

Usage example:

Bioroebe.complement 'ATGGGTCCC' # => "TACCCAGGG"
#


3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3997

def self.complement(
    i = nil
  )
  # ======================================================================= #
  # Refer to the main Hash.
  # ======================================================================= #
  hash = HASH_DNA_NUCLEOTIDES
  result = ''.dup
  i = i.first if i.is_a? Array
  if i
    if File.exist?(i)
      i = File.readlines(i).join(' ').strip
    end
    i.each_char { |char|
      char = char.upcase
      if hash.has_key? char
        result << hash[char]
      else
        case char.downcase # case tag
        when 'n' # Means any.
          result << '(A/T/G/C)'
        when 'r' # Means a purine.     (larger)
          result << '(A/G)'
        when 'y' # Means a pyrimidine. (smaller)
          result << '(T/C)'
        end
      end
    }
    return result
  end
end

.complementary_dna_strand(i = ARGV) ⇒ Object

#

Bioroebe.complementary_dna_strand

This method will simply return the corresponding (complementary) DNA strand.

Usage example:

Bioroebe.complementary_dna_strand('ATCATCATC') # => "TAGTAGTAG"
#


152
153
154
# File 'lib/bioroebe/nucleotides/complementary_dna_strand.rb', line 152

def self.complementary_dna_strand(i = ARGV)
  retrn Bioroebe::ComplementaryDnaStrand.new(i).result?
end

.complementary_rna_strand(i) ⇒ Object

#

Bioroebe.complementary_rna_strand

This method will simply return the corresponding (complementary) RNA strand.

Usage example:

Bioroebe.complementary_rna_strand('ATCATCATC') # => "UAGUAGUAG"
#


588
589
590
591
592
593
594
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 588

def self.complementary_rna_strand(i)
  if i.is_a? Array
    i = i.first
  end
  hash = partner_nucleotide_hash
  i.chars.map {|entry| hash[entry] }.join.tr('T','U')
end

.compseq(i = ARGV) ⇒ Object

#

Bioroebe.compseq

#


514
515
516
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 514

def self.compseq(i = ARGV)
  Bioroebe::Compseq.new(i) { :disable_colours_and_be_quiet }.result_as_string?
end

.contains_an_inverted_repeat?(i = 'TTACGAAAAAACGTAA') ⇒ Boolean

#

Bioroebe.contains_an_inverted_repeat?

We assume an inverted repeat to exist if at the least 2 nucleotides match to one another in the reverse, so a total of 4 matching nucleotides. This assumption may not necessarily be correct and we may have to fine-tune this at a later time.

For testing purpose, the sequence ‘TTACGAAAAAACGTAA’ can be used.

#

Returns:

  • (Boolean)


532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 532

def self.contains_an_inverted_repeat?(
    #i = 'AGCCCCGCAAAAAAGGCGGGCU'
    i = 'TTACGAAAAAACGTAA' # This is in the 5'→3' direction.
  )
  contains_an_inverted_repeat = false
  longest_stretch = 0
  current_stretch = 0
  halfed_position = i.size / 2
  both_sides = [
    i[0 .. (halfed_position-1)],
    i[halfed_position .. -1]
  ]
  # ======================================================================= #
  # Now that we have both sides, we will try to match them. First reverse
  # the second, though.
  # ======================================================================= #
  first  = both_sides[0]
  second = both_sides[1].reverse # Work via the reverse sequence.
  first.chars.each_with_index {|this_nucleotide, index|
    if can_base_pair_with?(second[index], this_nucleotide)
      current_stretch += 1
      longest_stretch = current_stretch if current_stretch > longest_stretch 
    else
      current_stretch = 0
    end
  }
  if longest_stretch >= 2
    contains_an_inverted_repeat = true
  end
  return contains_an_inverted_repeat
end

.convert_global_env(i) ⇒ Object

#

Bioroebe.convert_global_env

Note that the method will pick only the first argument given to it if an Array is supplied.

#


801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 801

def self.convert_global_env(i)
  if i.is_a? Array
    i = i.first
  end
  unless Object.const_defined? :ConvertGlobalEnv
    begin # Require an external gem in this case.
      require 'convert_global_env'
    rescue LoadError; end
  end
  if Object.const_defined? :ConvertGlobalEnv
    if i and !i.start_with?('$')
      i = i.dup if i.frozen?
      i.prepend('$')
    end
    return ConvertGlobalEnv.convert(i, :do_not_report_errors) # Handle ENV variables.
  end
  return i
end

.convert_one_letter_to_full(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_full

Convert one aminoacid to the real name.

Usage example:

Bioroebe.convert_one_letter_to_full('T') # => "threonine"
#


1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1982

def self.convert_one_letter_to_full(i)
  if i.is_a? Array
    i.each {|entry| convert_one_letter_to_full(entry) }
  else
    i = i.to_s.downcase # need it to be downcased.
    three_letters = convert_one_letter_to_three(i)
    i = AMINO_ACIDS_ABBREVIATIONS[three_letters]
    return i
  end
end

.convert_one_letter_to_three(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_three

Convert a one-letter-code for an aminoacid into the slightly longer three-letter-code variant for that particular aminoacid.

Note that this method will return the result in a downcased variant, such as “gly” for “glycine”.

Returns:

A string of three characters, if it is a valid one-letter aminoacid.

Usage example for an aminoacid such as Glycine:

Bioroebe.convert_one_letter_to_three('G') # => "gly"
#


1634
1635
1636
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1634

def self.convert_one_letter_to_three(i)
  AMINO_ACIDS_THREE_TO_ONE.invert[i.upcase]
end

.convert_this_codon_to_that_aminoacid(i = ARGV, &block) ⇒ Object

#

Bioroebe.convert_this_codon_to_that_aminoacid

#


225
226
227
228
229
230
# File 'lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb', line 225

def self.convert_this_codon_to_that_aminoacid(
    i = ARGV,
    &block
  )
  Bioroebe::ConvertThisCodonToThatAminoacid.new(i) { :be_quiet }.result?.to_s
end

.count_amount_of_aminoacids(i) ⇒ Object

#

Bioroebe.count_amount_of_aminoacids

#


344
345
346
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 344

def self.count_amount_of_aminoacids(i)
  CountAmountOfAminoacids.new(i)
end

.count_amount_of_nucleotides(i) ⇒ Object

#

Bioroebe.count_amount_of_nucleotides

This method will always return the result in the form of a single line. The order is: A C G T

This can also be used to solve a problem listed at Rosalind.

Invocation examples:

Bioroebe.count_amount_of_nucleotides 'AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC' => "20 17 12 21"
Bioroebe.count_amount_of_nucleotides File.read('/rosalind_dna.txt').strip
#


483
484
485
486
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 483

def self.count_amount_of_nucleotides(i)
  _ = ::Bioroebe::CountAmountOfNucleotides.new(i, :do_not_run_yet) { :display_short_form }
  _.return_the_amount_of_nucleotides_in_short_form_on_a_single_line
end

.count_AT(i = ARGV) ⇒ Object

#

Bioroebe.count_AT

This method will count how characters in a given String are “A” or “T”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “A” or a “T” in that string, we will “add” +1 to the number that will be returned by that method.

Usage example:

Bioroebe.count_AT 'ATTATATACCGCGCCCATATAAA' # => 15
#


25
26
27
28
29
# File 'lib/bioroebe/count/count_at.rb', line 25

def self.count_AT(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('A')+
  i.upcase.count('T')
end

.count_GC(i = ARGV) ⇒ Object

#

Bioroebe.count_GC

This method will count how characters in a given String are “G” or “C”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “G” or a “C” in that string, we will “add” +1 to the number that will be returned by that method.

Specific usage examples:

Bioroebe.count_GC 'ATTATTATGGCCAATATA' # => 4
Bioroebe.count_GC 'ATG' # => 1
#


27
28
29
30
31
# File 'lib/bioroebe/count/count_gc.rb', line 27

def self.count_GC(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('G')+
  i.upcase.count('C')
end

.count_non_DNA_bases_in_this_sequence(i, array = Bioroebe.return_DNA_nucleotides) ⇒ Object

#

Bioroebe.count_non_DNA_bases_in_this_sequence

Usage example:

Bioroebe.count_non_DNA_bases_in_this_sequence('ATCGF')
#


3044
3045
3046
3047
3048
3049
3050
3051
3052
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3044

def self.count_non_DNA_bases_in_this_sequence(
    i, array = Bioroebe.return_DNA_nucleotides
  )
  i = i.dup
  array.each {|this_nucleotide|
    i.delete!(this_nucleotide)
  }
  return i.size
end

.create_file(i) ⇒ Object

#

Bioroebe.create_file

This method can be used to create a file.

#


1218
1219
1220
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1218

def self.create_file(i)
  FileUtils.touch(i) unless File.file?(i)
end

.create_jar_archiveObject

#

Bioroebe.create_jar_archive

This method will create a .jar file.

To invoke it from the commandline do:

bioroebe --jar

To execute a .jar file do:

java -jar foobar.jar
#


3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3154

def self.create_jar_archive
  e 'Creating a .jar archive next:'
  e
  original_dir = return_pwd
  cd '/home/x/programming/ruby/src/bioroebe/lib/bioroebe/java/bioroebe/src/main/java/'
  esystem 'jar cf bioroebe.jar bioroebe/'
  target_file = File.absolute_path('bioroebe.jar')
  cd original_dir
  if File.exist? target_file
    e 'Moving the created .jar file into the current working '\
      'directory next.'
    move_file(target_file, original_dir)
    e 'It should now be at:'
    e
    e sfile("  #{original_dir}#{File.basename(target_file)}")
    e
  end
  #   esystem 'jar cfe bioroebe.jar myClass myClass.class'
  e
end

.create_new_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.create_new_sequence

Create a new Bioroebe::Sequence object. It will also assign to the @sequence module-level instance variable.

#


727
728
729
# File 'lib/bioroebe/sequence/sequence.rb', line 727

def self.create_new_sequence(i = ARGV, &block)
  @sequence = ::Bioroebe::Sequence.new(i, &block)
end

.create_random_aminoacids(how_many_aminoacids = CREATE_N_AMINOACIDS, split_at = nil, be_verbose = false, &block) ⇒ Object

#

Bioroebe.create_random_aminoacids

This method will create a random chain of aminoacids.

The first argument to this method shall denote how many aminoacids are to be generated, e. g. 25 would mean to create “25 aminoacids”.

If the second argument, called ‘split_at`, is not nil and is a number, then this method we add a newline into the returned String.

This method will return a String, consisting of the random aminoacids.

Usage Examples:

Bioroebe.create_random_aminoacids 125
Bioroebe.create_random_aminoacids  25 # => "SQHWVGGGVSRCWLMWAPECMYVWW"
Bioroebe.create_random_aminoacids  15 # => "CLKHMLMGLVAEEKA"
Bioroebe.random_aminoacids(5) # => "STRRM"
Bioroebe.random_aminoacids(8) # => "TRTQHSNN"s
#


203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/bioroebe/aminoacids/create_random_aminoacids.rb', line 203

def self.create_random_aminoacids(
    how_many_aminoacids = CREATE_N_AMINOACIDS,
    split_at            = nil,
    be_verbose          = false,
    &block
  )
  _ = ::Bioroebe::CreateRandomAminoacids.new(
        how_many_aminoacids,
        split_at,
        be_verbose,
        &block
      )
  return _.amino_acid_sequence # ← And return the aminoacid sequence here.
end

.create_the_pdf_tutorial(read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md', store_where = '/Depot/j/example.pdf') ⇒ Object

#

Bioroebe.create_the_pdf_tutorial

This method can be used to quickly turn the README.md file into a .pdf file, for whatever the reason the user may want this.

#


2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2881

def self.create_the_pdf_tutorial(
    read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md',
    store_where         = '/Depot/j/example.pdf'
  )

  require 'prawn'

  Prawn::Fonts::AFM.hide_m17n_warning = true # Hide a useless warning.

  pdf = Prawn::Document.new(
          page_size: 'A4',
          page_layout: :landscape
        )
  pdf.text "The Bioroebe Project", size: 80
  pdf.start_new_page
  pdf.bounding_box [50, 600], width: 200 do
    pdf.fill_color '000000'
    pdf.text "making bioinformatics great again:", size: 15
  end
  pdf.start_new_page
  dataset = File.read(read_from_this_file, encoding: UTF_ENCODING)
  dataset = dataset.encode("Windows-1252", invalid: :replace, undef: :replace)

  pdf.text(dataset)
  e 'Storing at this location: '+store_where
  pdf.render_file store_where
end

.decode_this_aminoacid_sequence(i = 'KKKA') ⇒ Object

#

Bioroebe.decode_this_aminoacid_sequence

This method can be used as means to decode an aminoacid sequence, such as a String like ‘KKKA’.

The input to this method may also be in the form of an Array, such as [‘K’,‘K’,‘K’,‘A’]. Only valid one-letter aminoacids will be honoured by this method; invalid letters will be silently dropped.

After that, this method will replace all valid letters, that is valid aminoacids (in single letter code), with the corresponding codon. It will return all possibilities.

Invocation example:

Bioroebe.decode_this_aminoacid_sequence('KKKA') # => [["AAG", "AAA"], ["AAG", "AAA"], ["AAG", "AAA"], ["GCT", "GCC", "GCA", "GCG"]]
#


385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/bioroebe/codons/codons.rb', line 385

def self.decode_this_aminoacid_sequence(
    i = 'KKKA'
  )
  if i.is_a? Array
    i = i.join
  end
  if i.is_a? String
    # ===================================================================== #
    # We may have a 3-letter code too, so check for that first.
    # ===================================================================== #
    if i.include? '-'
      i = i.split('-').map {|entry| ::Bioroebe.three_to_one(entry) }
    else
      i = i.split(//)
    end
  end
  i = [i].flatten.map {|entry|
    ::Bioroebe.decode_this_aminoacid(entry)
  }
  return i
end

.deduce_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_aminoacid_sequence

#


465
466
467
468
469
# File 'lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb', line 465

def self.deduce_aminoacid_sequence(
    from_this_sequence = :default
  )
  Bioroebe::DeduceAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence

#


140
141
142
# File 'lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb', line 140

def self.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default)
  Bioroebe::MostLikelyNucleotideSequenceForThisAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence_as_string(i, use_this_codon_tables_frequencies = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string

This method will attempt to deduce the most likely aminoacid sequence for a given protein, as a String.

Usage example:

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string('KKKA') # => "AAGAAGAAGGCC"
#


452
453
454
455
456
457
458
459
460
461
# File 'lib/bioroebe/codons/codons.rb', line 452

def self.deduce_most_likely_aminoacid_sequence_as_string(
    i, use_this_codon_tables_frequencies = :default
  )
  result = return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
    i, 
    use_this_codon_tables_frequencies
  )
  result = result.join if result.is_a? Array
  return result
end

.default_colour?Boolean

#

Bioroebe.default_colour?

#

Returns:

  • (Boolean)


96
97
98
# File 'lib/bioroebe/colours/colours.rb', line 96

def self.default_colour?
  @default_colour
end

.delimiter?Boolean

#

Bioroebe.delimiter?

This is simply the primary delimiter used for reading “multiline input” of the Bioroebe::Shell component.

#

Returns:

  • (Boolean)


413
414
415
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 413

def self.delimiter?
  '___'
end

.determine_n_glycosylation_matches(of_this_protein_sequence = 'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\ 'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\ 'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\ 'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\ 'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\ 'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\ 'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\ 'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK') ⇒ Object

#

Bioroebe.determine_n_glycosylation_matches

This method can be used to determine N-Glycosylation patterns in a protein.

The input to this method should be an aminoacid chain - aka a protein sequence.

This method will return an Array. This Array holds the indices where a N-glycosylation pattern begins.

Usage example:

Bioroebe.determine_n_glycosylation_matches # => [85, 118, 142, 306, 395]
#


2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2086

def self.determine_n_glycosylation_matches(
    of_this_protein_sequence =
      'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\
      'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\
      'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\
      'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\
      'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\
      'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\
      'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\
      'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK'
  )
  if of_this_protein_sequence.is_a? Array
    of_this_protein_sequence.each {|this_sequence|
      determine_n_glycosylation_matches(this_sequence)
    }
  else
    scanned = of_this_protein_sequence.scan(
      REGEX_FOR_N_GLYCOSYLATION_PATTERN
    )
    scanned.flatten.uniq.map {|substring|
      of_this_protein_sequence.index(substring)+1 # +1 because ruby starts at 0.
    }
  end
end

.determine_start_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_start_codons_from_the_codon_table

#


61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/bioroebe/codons/codons.rb', line 61

def self.determine_start_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    key == 'START' # '*' refers to a stop codon.
  }
  use_these_start_codons = this_codon_table_dataset.values
  if use_these_start_codons.is_a? Array
    use_these_start_codons = use_these_start_codons.first
  end
  set_start_codon(
    use_these_start_codons
  )
end

.determine_stop_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_stop_codons_from_the_codon_table

This method will determine the stop codons in use for the given species/organism, depending on the proper codon table.

#


45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bioroebe/codons/codons.rb', line 45

def self.determine_stop_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    value == '*' # '*' refers to a stop codon.
  }
  use_these_stop_codons = this_codon_table_dataset.keys
  set_stop_codons(
    use_these_stop_codons
  )
end

.digest_this_dna(this_DNA_sequence, hash = {}) ⇒ Object

#

Bioroebe.digest_this_dna

This method depends on the file bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb.

Usage examples:

x = Bioroebe.digest_this_dna(:lambda_genome, with: :EcoRI)
x = Bioroebe.digest_this_dna("/root/Bioroebe/fasta/NC_001416.1_Enterobacteria_phage_lambda_complete_genome.fasta", with: :EcoRI)
x = Bioroebe.digest_this_dna("/Depot/j/foobar.fasta", with: :PvuII)
#


3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3796

def self.digest_this_dna(
    this_DNA_sequence,
    hash = {}
  )
  require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
  restriction_enzymes = Bioroebe.load_and_return_the_restriction_enzymes
  this_restriction_enzyme = nil
  nucleotide_sequence = nil
  if this_DNA_sequence.is_a? Array
    this_DNA_sequence = this_DNA_sequence.first
  end
  if this_DNA_sequence.is_a?(String) and File.exist?(this_DNA_sequence)
    nucleotide_sequence = ::Bioroebe::ParseFasta.new(this_DNA_sequence).sequence?
  end
  # ======================================================================= #
  # === Handle the hash next (and ensure that it is a Hash)
  # ======================================================================= #
  if hash.is_a? Hash
    # ===================================================================== #
    # === :with
    # ===================================================================== #
    if hash.has_key? :with
      this_restriction_enzyme = hash.delete(:with).to_s
    end
  end
  target_sequence = restriction_enzymes[this_restriction_enzyme].dup
  if target_sequence =~ /\d$/ # If it ends with a number.
    target_sequence.chop!
    target_sequence.strip!
  end
  if nucleotide_sequence and
     nucleotide_sequence.include?(target_sequence)
    print rev+'Yes, the restriction-sequence '+
          lightblue(target_sequence)+
          rev+
          ' is found in the given sequence. '
    scanned = nucleotide_sequence.scan(
      /#{target_sequence}/
    )
    erev "It can be found #{steelblue(scanned.size.to_s)}#{rev} "\
         "times, at these positions:"
    e
    sub_sequences = nucleotide_sequence.split(/#{target_sequence}/)
    sub_sequences.sort_by {|entry| entry.size }.reverse.each {|sequence|
      erev "  #{sequence.size}"
    }
    e
    return sub_sequences
  else
    e 'Nothing found.'
  end
end

.directory_frequencies?(codon_tables_directory = CODON_TABLES_DIRECTORY) ⇒ Boolean

#

Bioroebe.directory_frequencies?

Preferentially use this method past the year 2022 - it is a tiny bit more flexible than the above constant.

#

Returns:

  • (Boolean)


685
686
687
688
689
# File 'lib/bioroebe/constants/constants.rb', line 685

def self.directory_frequencies?(
    codon_tables_directory = CODON_TABLES_DIRECTORY
  )
  "#{codon_tables_directory}frequencies/"
end

.disable_colours(be_verbose = false) ⇒ Object

#

Bioroebe.disable_colours

Use this method if you wish to disable colours for the whole Bioroebe project.

#


186
187
188
189
190
191
# File 'lib/bioroebe/colours/colours.rb', line 186

def self.disable_colours(be_verbose = false)
  if be_verbose
    e 'Disabling colours.'
  end
  @use_colours = false
end

.display_all_open_reading_frames_from_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.display_all_open_reading_frames_from_this_sequence

#


1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1560

def self.display_all_open_reading_frames_from_this_sequence(i = ARGV)
  require 'bioroebe/colours/colours.rb'
  if i.empty?
    array = Bioroebe.return_all_open_reading_frames_from_this_sequence
    pp array
    pp Bioroebe.to_aa(array)
  else
    this_sequence = i
    array = return_all_open_reading_frames_from_this_sequence(this_sequence)
    this_sequence = this_sequence.join
    if array.empty?
      e "No open reading from has been found from "\
        "this sequence: #{this_sequence}"
    else
      e rev+
        'The following ORFs have been found in this sequence: '
      e
      e "  #{Colours.lightgreen(this_sequence)}"
      e
      array.each_with_index {|sequence, index| index += 1
        name_for_the_ORF = "ORF number #{index}"
        e "  #{Colours.steelblue(sequence.ljust(50))} "\
          "#{Colours.lightslategrey('#')} "\
          "#{Colours.mediumseagreen(name_for_the_ORF)}"
      }
      e
    end
  end
end

.dna_sequence(i) ⇒ Object

#

Bioroebe.dna_sequence

Usage example:

dna = Bioroebe.dna_sequence('ATTCGGU')
#


200
201
202
203
204
# File 'lib/bioroebe/sequence/dna.rb', line 200

def self.dna_sequence(i)
  i = i.first if i.is_a? Array
  i.delete!('U') # Reject Uracil there.
  ::Bioroebe::DNA.new(i)
end

.dna_to_aminoacid_sequence(i = ARGV) ⇒ Object

#

Bioroebe.dna_to_aminoacid_sequence

Usage example:

Bioroebe.dna_to_aminoacid_sequence('ATGGGGCCC') # => "MGP"
#


610
611
612
613
614
# File 'lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb', line 610

def self.dna_to_aminoacid_sequence(
    i = ARGV
  )
  ::Bioroebe::DnaToAminoacidSequence.new(i) { :be_quiet }.sequence?
end

.do_not_truncateObject

#

Bioroebe.do_not_truncate

Do not truncate any “too long” output. This method disable the truncate-functionality.

#


146
147
148
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 146

def self.do_not_truncate
  @truncate = false
end

.do_truncateObject

#

Bioroebe.do_truncate

#


136
137
138
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 136

def self.do_truncate
  @truncate = true
end

.dotplot_array(dna_x, dna_y) ⇒ Object

#

Bioroebe.dotplot_array

This method can be used to return a 2D dotplot-array of two input sequences. Be careful with large data as input - the RAM usage may go up, so this method has NOT been optimized for such situations. It is deliberately kept simple.

#


215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/bioroebe/dotplots/advanced_dotplot.rb', line 215

def self.dotplot_array(dna_x, dna_y)
  dotplot_matrix = Array.new(
    dna_y.size, Array.new(dna_x.size, 0)
  )
  dotplot_matrix = Array.new(dna_y.size) {
    Array.new(dna_x.size) { 0 }
  }
  dna_x.chars.each_with_index {|x_value, x_index|
    # ===================================================================== #
    # Next, we work from top-to-bottom.
    # ===================================================================== #
    dna_y.chars.each_with_index {|y_value, y_index|
      if x_value == y_value
        dotplot_matrix[y_index][x_index] = 1
      end
    }
  }
  return dotplot_matrix
end

.downcase_chunked_display(i, group_together_n_nucleotides = 10) ⇒ Object

#

Bioroebe.downcase_chunked_display

This is similar to the regular chunked display, but will return the nucleotides in a downcased manner, aka “A” will become “a” and so forth.

In the past this functionality resided in its own .rb file, but since as of March 2020 a bin/ executable was added, so that the functionality can be more easily called when the bioroebe gem is installed.

Usage example:

Bioroebe.downcase_chunked_display 'ATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCA'
#


4154
4155
4156
4157
4158
4159
4160
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4154

def self.downcase_chunked_display(
    i,
    group_together_n_nucleotides = 10
  )
  sequence = ::Bioroebe.return_chunked_display(i, group_together_n_nucleotides).downcase
  return sequence
end

.download(from_these_URLs) ⇒ Object

#

Bioroebe.download

#


4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4415

def self.download(
    from_these_URLs
  )
  require 'open-uri'
  array_these_urls = [from_these_URLs].flatten.compact
  array_these_urls.each {|remote_url|
    # ===================================================================== #
    # First, we must determine the remote file listing here.
    # Due to convenience we will simply use curl here.
    # ===================================================================== #
    cmd = "curl -s \"#{remote_url}\" --list-only"
    # e cmd
    remote_files = `#{cmd}`.split("\n")
    remote_files.each {|this_remote_file|
      target = remote_url+this_remote_file
      e "Downloading `#{this_remote_file}` next. '"\
        "(Full target: '#{target})"
      wget_download(target)
    }
  }
end

.download_directory?Boolean

#

Bioroebe.download_directory?

#

Returns:

  • (Boolean)


171
172
173
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 171

def self.download_directory?
  "#{log_directory?}Downloads/"
end

.download_fasta(i) ⇒ Object

#

Bioroebe.download_fasta

Easier wrapper-method to download fasta files.

#


233
234
235
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 233

def self.download_fasta(i)
  ::Bioroebe::DownloadFasta.new(i).location?
end

.download_human_genome(from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz') ⇒ Object

#

Bioroebe.download_human_genome

#


2781
2782
2783
2784
2785
2786
2787
2788
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2781

def self.download_human_genome(
    from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz'
  )
  esystem "wget #{from_this_URL}"
  extract(
    File.basename(from_this_URL)
  )
end

.download_taxonomy_database(i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE) ⇒ Object

#

Bioroebe.download_taxonomy_database

#


92
93
94
95
96
# File 'lib/bioroebe/databases/download_taxonomy_database.rb', line 92

def self.download_taxonomy_database(
    i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE
  )
  DownloadTaxonomyDatabase.new(i)
end

.download_this_pdb(i = '355D') ⇒ Object

#

Bioroebe.download_this_pdb

This method can be used to download a remote .pdb file to the local file-system. If the default pdb/ directory exists as well locally then the downloaded .pdb file will be relocated into that file.

An example for a remote URL to a .pdb file would be:

https://files.rcsb.org/view/2BTS.pdb
https://files.rcsb.org/view/355D.pdb
#


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/bioroebe/pdb_and_protein_structure/download_this_pdb.rb', line 29

def self.download_this_pdb(
    i = '355D'
  )
  # ======================================================================= #
  # Treat all input as an Array past the next point.
  # ======================================================================= #
  [i].flatten.compact.each {|this_entry|
    if this_entry.frozen?
      this_entry = this_entry.dup
    end
    if this_entry.end_with? '.pdb' # This will lateron be appended again anyway.
      this_entry.sub!(/\.pdb$/,'')
    end
    this_entry.upcase! # For convenience.
    unless this_entry.end_with? '.pdb'
      this_entry << '.pdb'
    end
    e this_entry
    # ===================================================================== #
    # Build up our remote URL next:
    # ===================================================================== #
    remote_url = "https://files.rcsb.org/view/#{this_entry}"
    e steelblue(remote_url)
    esystem "wget #{remote_url}"
    _ = File.basename(remote_url)
    if File.exist? _
      ::Bioroebe.move_file_to_its_correct_location(_)
    end
  }
end

.e(i = '') ⇒ Object

#

Bioroebe.e

#


246
247
248
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 246

def self.e(i = '')
  puts i
end

.editor?Boolean

#

Bioroebe.editor?

This method will determine which editor is to be used, if we have to use an editor for the bioroebe project.

#

Returns:

  • (Boolean)


442
443
444
445
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 442

def self.editor?
  require 'bioroebe/configuration/constants.rb'
  ::Bioroebe::Configuration::DEFAULT_EDITOR_TO_USE
end

.embeddable_interfaceObject

#

Bioroebe.embeddable_interface

#


775
776
777
778
779
# File 'lib/bioroebe/www/embeddable_interface.rb', line 775

def self.embeddable_interface
  object = Object.new
  object.extend(::Bioroebe::EmbeddableInterface)
  return object
end

.enable_coloursObject

#

Bioroebe.enable_colours

Use this method to enable colours for the whole Bioroebe project.

All classes that are part of the Bioroebe project should honour this setting (if it is a class that may make use of colours; some smaller classes do not need colours, and hence have no need for the method here).

#


203
204
205
# File 'lib/bioroebe/colours/colours.rb', line 203

def self.enable_colours
  @use_colours = true
end

.ensure_that_the_base_directories_existObject

#

Bioroebe.ensure_that_the_base_directories_exist

This method will ensure that the base directory for the Bioroebe project exist.

#


3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3282

def self.ensure_that_the_base_directories_exist
  # ======================================================================= #
  # We also need to create the temp directory, as well as having to
  # notify the user that this will be done. The taxonomy subdirectory
  # will also be created.
  # ======================================================================= #
  use_this_log_dir = log_dir?
  unless File.exist? use_this_log_dir
    erev "The base directory at `#{sdir(use_this_log_dir)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir use_this_log_dir
  end
  # ======================================================================= #
  # === Ensure that the Downloads/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}Downloads/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  # ======================================================================= #
  # === Ensure that the pdb/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}pdb/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  autogenerated_sql_files_dir =
    AUTOGENERATED_SQL_FILES_DIR
  unless Dir.exist? autogenerated_sql_files_dir
    erev 'The directory at `'+sdir(autogenerated_sql_files_dir)+
         rev+'` does not exist.'
    erev 'It will thus be created next.'
    mkdir(autogenerated_sql_files_dir)
  end
end

.erev(i = '') ⇒ Object

#

Bioroebe.erev

#


69
70
71
# File 'lib/bioroebe/colours/colours.rb', line 69

def self.erev(i = '')
  puts "#{rev}#{i}"
end

.esystem(i) ⇒ Object

#

Bioroebe.esystem

#


253
254
255
256
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 253

def self.esystem(i)
  puts i.to_s
  system i.to_s
end

.every_reverse_palindrome_in_this_string(i = 'TCAATGCATGCGGGTCTATATGCAT', min_length = 4, max_length = 12) ⇒ Object

#

Bioroebe.every_reverse_palindrome_in_this_string

This method can return every reverse palindrome in the given input String.

The output will be an Array such as this:

[[4, 6], [5, 4], [6, 6], [7, 4], [17, 4], [18, 4], [4, 6], [5, 4]]
#


4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4971

def self.every_reverse_palindrome_in_this_string(
    i          = 'TCAATGCATGCGGGTCTATATGCAT',
    min_length =  4,
    max_length = 12
  )
  require 'bioroebe/sequence/reverse_complement.rb'
  if i.is_a? Array # Arrays will become Strings - or rather, whatever is the first argument.
    i = i.first
  end
  if i and File.exist?(i)
    i = File.readlines(i).reject {|entry|
      entry.start_with?('>')
    }.map {|inner_entry| inner_entry.strip }.join
  end
  # ======================================================================= #
  # How do we find all subsequences that are relevant? Well - we
  # need to find all the sequences between min_length and
  # max_length, e. g. 4 and 12.
  # ======================================================================= #
  string = i.dup
  array_containing_starting_index_and_length_of_reverse_palindromes = []
  i.size.times {
    substrings = return_every_substring_from_this_sequence(string)
    substrings.each {|entry|
      next if entry.size > max_length
      if (entry.size >= min_length) and
         (Bioroebe.reverse_complement(entry) == entry)
        array_containing_starting_index_and_length_of_reverse_palindromes << 
          [i.index(entry)+1, entry.size]
      end
    }
    string[0,1] = ''
  }
  return array_containing_starting_index_and_length_of_reverse_palindromes
end

.ewarn(i = '') ⇒ Object

#

Bioroebe.swarn

#


168
169
170
# File 'lib/bioroebe/colours/colours.rb', line 168

def self.ewarn(i = '')
  e swarn(i)
end

.extract(i = ARGV) ⇒ Object

#

Bioroebe.extract

This method can be used to quickly extract a local archive.

#


2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2321

def self.extract(
    i = ARGV
  )
  require 'bioroebe/colours/sdir_sfancy_sfile_simp_swarn.rb'
  if i.is_a? Array
    i = i.join(' ').strip
  end
  unless i.include?('/')
    unless File.exist? i
      i = return_pwd+
          File.basename(i)
    end
  end
  if File.exist? i
    case i
    when /bz2$/
      _ = "tar -xfv #{i}"
    when /xz$/
      _ = "tar -xvf #{i}"
    end
    if be_verbose?
      e "Now extracting `#{sfancy((i).squeeze('/'))}`."
      esystem(_)
      e 'Done extracting!'
    else
      system _
    end
  else
    ewarn "Can not extract #{sfile(i)} because it does "\
          "not appear to exist."
  end
end

.extractseq(i = 'AAAGGGTTT', *regions) ⇒ Object

#

Bioroebe.extractseq

Bioroebe.extractseq reads a sequence and writes sub-sequences from it to file. The set of regions to extract is specified on the command-line or in a file as pairs of start and end positions. The regions are written in the order in which they are specified. Thus, if the sequence AAAGGGTTT has been input and the regions: 7-9, 3-4 have been specified, then the output sequence will be:

TTTAG

See the next ruler for that:

012345678 # real index
123456789 # desired index
AAAGGGTTT

Usage example

Bioroebe.extractseq('AAAGGGTTT', '7-9','3-4') # => TTTAG
#


282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 282

def self.extractseq(
    i = 'AAAGGGTTT',
    *regions
  )
  new_sequence = ''.dup
  regions.each {|this_region|
    splitted = this_region.split('-') # We assume a '-' must be there.
    first_position = splitted[0].to_i - 1
    last_position  = splitted[1].to_i - 1
    subsequence = i[first_position .. last_position]
    new_sequence << subsequence
  }
  return new_sequence
end

.fasta_dir?Boolean

#

Bioroebe.fasta_dir?

#

Returns:

  • (Boolean)


721
722
723
# File 'lib/bioroebe/constants/constants.rb', line 721

def self.fasta_dir?
  "#{Bioroebe.log_dir?}fasta/"
end

.fasta_directory?Boolean

#

Bioroebe.fasta_directory?

This method will return a path such as “/root/Bioroebe/fasta/”.

#

Returns:

  • (Boolean)


193
194
195
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 193

def self.fasta_directory?
  "#{::Bioroebe.log_directory?}fasta/"
end

.fetch_data_from_uniprot(i = ARGV) ⇒ Object

#

Bioroebe.fetch_data_from_uniprot

#


259
260
261
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 259

def self.fetch_data_from_uniprot(i = ARGV)
  Bioroebe::FetchDataFromUniprot.new(i)
end

.fetch_fasta_sequence_from_pdb(i = ARGV) ⇒ Object

#

Bioroebe.fetch_fasta_sequence_from_pdb

#


126
127
128
# File 'lib/bioroebe/pdb_and_protein_structure/fetch_fasta_sequence_from_pdb.rb', line 126

def self.fetch_fasta_sequence_from_pdb(i = ARGV)
  Bioroebe::FetchFastaSequenceFromPdb.new(i)
end

.file_amino_acidsObject

#

Bioroebe.file_amino_acids

#


638
639
640
# File 'lib/bioroebe/constants/constants.rb', line 638

def self.file_amino_acids
  FILE_AMINO_ACIDS
end

.file_amino_acids_abbreviationsObject

#

Bioroebe.file_amino_acids_abbreviations

#


651
652
653
# File 'lib/bioroebe/constants/constants.rb', line 651

def self.file_amino_acids_abbreviations
  FILE_AMINO_ACIDS_ABBREVIATIONS
end

.file_amino_acids_frequencyObject

#

Bioroebe.file_amino_acids_frequency

#


830
831
832
# File 'lib/bioroebe/constants/constants.rb', line 830

def self.file_amino_acids_frequency
  "#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
end

.file_amino_acids_long_name_to_one_letterObject

#

Bioroebe.file_amino_acids_long_name_to_one_letter

This method will return a String such as:

"/home/Programs/Ruby/3.1.2/lib/ruby/site_ruby/3.1.0/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml"
#


1134
1135
1136
# File 'lib/bioroebe/constants/constants.rb', line 1134

def self.file_amino_acids_long_name_to_one_letter
  "#{project_yaml_directory?}aminoacids/amino_acids_long_name_to_one_letter.yml"
end

.file_fastq_quality_schemesObject

#

Bioroebe.file_fastq_quality_schemes

This constant will point to a location such as this one here:

/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/fastq_quality_schemes.yml
#


733
734
735
# File 'lib/bioroebe/constants/constants.rb', line 733

def self.file_fastq_quality_schemes
  "#{project_yaml_directory?}fasta_and_fastq/fastq_quality_schemes.yml"
end

.file_molecular_weightObject

#

Bioroebe.file_molecular_weight

#


1122
1123
1124
# File 'lib/bioroebe/constants/constants.rb', line 1122

def self.file_molecular_weight
  "#{project_yaml_directory?}aminoacids/molecular_weight.yml"
end

.file_restriction_enzymesObject

#

Bioroebe.file_restriction_enzymes

#


885
886
887
# File 'lib/bioroebe/constants/constants.rb', line 885

def self.file_restriction_enzymes
  FILE_RESTRICTION_ENZYMES
end

.file_statistics?Boolean

#

Bioroebe.file_statistics?

This file can normally be found here:

$BIOROEBE/yaml/statistics.yml
#

Returns:

  • (Boolean)


714
715
716
# File 'lib/bioroebe/constants/constants.rb', line 714

def self.file_statistics?
  "#{Bioroebe.log_dir?}statistics.yml"
end

.file_talensObject

#

Bioroebe.file_talens

#


740
741
742
# File 'lib/bioroebe/constants/constants.rb', line 740

def self.file_talens
  "#{project_yaml_directory?}talens.yml"
end

.filter_away_invalid_aminoacids(i) ⇒ Object

#

Bioroebe.filter_away_invalid_aminoacids

Usage example:

Bioroebe.filter_away_invalid_aminoacids('ATMÜ') # => "ATM"
#


174
175
176
177
# File 'lib/bioroebe/constants/constants.rb', line 174

def self.filter_away_invalid_aminoacids(i)
  array_that_is_allowed = all_aminoacids?
  return i.chars.select {|entry| array_that_is_allowed.include? entry }.join
end

.filter_away_invalid_nucleotides(i, preserve_uracil = false) ⇒ Object

#

Bioroebe.filter_away_invalid_nucleotides

This method can be used to filter away invalid nucleotides. An “invalid” nucleotide is, for example, if you work with DNA sequences, any character that is not allowed to be part of DNA. For example, Uracil, which can be found (almost exclusively) only in RNA.

As for now, the behaviour is to downcase the given input before applying the .tr() method on the given String.

Usage example:

Bioroebe.filter_away_invalid_nucleotides 'ATGCCGGAGGAGANNN' # => "ATGCCGGAGGAGA"
#


3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3865

def self.filter_away_invalid_nucleotides(
    i,
    preserve_uracil = false
  )
  if i.is_a? Array
    i = i.join(' ').strip
  end
  case preserve_uracil
  when :preserve_uracil
    preserve_uracil = true
  when :preserve_nothing
    preserve_uracil = false
  end
  i = i.to_s.upcase
  if preserve_uracil
    i.tr!('B,D-F,H-S,V-Z','') # A T C G U
  else
    i.tr!('B,D-F,H-S,U-Z','') # A T C G
  end
  return i
end

.find_substring(full_string = 'GATATATGCATATACTT', this_substring = :default) ⇒ Object

#

Bioroebe.find_substring

This method can be used to find a substring within a larger String.

For example, in the below default values, the substring “ATAT” would exist at the positions 2, 4 and 10, if compared to the larger parent string “GATATATGCATATACTT”.

The following display may help you see this more easily, in regards to the substring matches:

GATATATGCATATACTT
 ATATAT  ATAT

If you look closely, you will be able to see that “ATAT” can be found three times in the string above.

Indices in this context start at position 1, not 0. This is mostly done to refer to nucleotides or aminoacids, which also typically start at the first letter. Position 0 makes no sense for a nucleotide - what would “nucleotide 0” even refer to?

The first argument to this method may also be the path to a locally existing file, such as “/rosalind_subs.txt”. In fact this method has been largely motivated by Rosalind tasks.

The method will return an Array with the positions of all substrings that are found in the full_string variable. See the usage example below for how this may be.

Usage example:

Bioroebe.find_substring 'GATATATGCATATACTT', 'ATAT' # => [2, 4, 10]
#


2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2424

def self.find_substring(
    full_string    = 'GATATATGCATATACTT', # ← The full String comes here.
    this_substring = :default             # ← The substring we are searching for comes here.
  )
  if full_string.is_a? Array
    # ===================================================================== #
    # Presently this method will only work on the first member of an Array.
    # ===================================================================== #
    full_string = full_string.first
  end
  if full_string and File.file?(full_string) and
     this_substring == :default
    # ===================================================================== #
    # In this case it is ok to read from that file.
    # ===================================================================== #
    _ = File.read(full_string)
    splitted = _.split("\n")
    full_string    = splitted.first
    this_substring = splitted.last
  end
  case this_substring
  # ======================================================================= #
  # Use a default value in this case. In reality users should supply
  # their own substring when they use this method here.
  # ======================================================================= #
  when :default,
       nil
    this_substring = 'ATAT'
  else
    if this_substring.empty?
      this_substring = 'ATAT'
    end
  end
  if full_string.nil? or full_string.empty?
    full_string = 'GATATATGCATATACTT' # ← Use the default in this case.
  end
  result = Array.new.tap { |indexes|
    final_index_position = full_string.size - this_substring.size
    i = 0
    while (i < final_index_position)
      index = full_string.to_s.index(this_substring.to_s, i)
      break if index.nil?
      i = index + 1
      indexes << i
    end
  }
  result = nil if result.empty? # ← We will try this here; could also return an empty Array, though.
  result # Return our findings here.
end

.format_this_nucleotide_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.format_this_nucleotide_sequence

#


660
661
662
663
664
665
666
667
668
669
670
# File 'lib/bioroebe/nucleotides/show_nucleotide_sequence/show_nucleotide_sequence.rb', line 660

def self.format_this_nucleotide_sequence(
    i = ARGV,
    &block
  )
  _ = ::Bioroebe::ShowNucleotideSequence.new(
    i, :do_not_report_anything, &block
  )
  _.clear_padding
  _.format
  _.formatted_sequence?
end

.frequency_per_thousand(i) ⇒ Object

#

Bioroebe.frequency_per_thousand

The input to this method should be a String ideally. If an Array is input then it will simply be .join()-ed.

This method will return a String, if all goes well.

#


867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 867

def self.frequency_per_thousand(i)
  result = "fields: [triplet] [frequency: per thousand] ([number])\n".dup # This String will be returned.
  if i.is_a? Array
    i = i.join
  end
  i.delete!("\n")
  hash = ::Bioroebe.codon_frequency_of_this_string(i)
  hash.default = 0
  total_n_elements = hash.values.sum
  append_this = <<-EOF 

UUU#{thousand_percentage(hash['UUU'], total_n_elements)}(     #{hash['UUU']})  UCU#{thousand_percentage(hash['UCU'], total_n_elements)}(     #{hash['UCU']})  UAU#{thousand_percentage(hash['UAU'], total_n_elements)}(     #{hash['UAU']})  UGU#{thousand_percentage(hash['UGU'], total_n_elements)}(     #{hash['UGU']})
UUC#{thousand_percentage(hash['UUC'], total_n_elements)}(     #{hash['UUC']})  UCC#{thousand_percentage(hash['UCC'], total_n_elements)}(     #{hash['UCC']})  UAC#{thousand_percentage(hash['UAC'], total_n_elements)}(     #{hash['UAC']})  UGC#{thousand_percentage(hash['UGC'], total_n_elements)}(     #{hash['UGC']})
UUA#{thousand_percentage(hash['UUA'], total_n_elements)}(     #{hash['UUA']})  UCA#{thousand_percentage(hash['UCA'], total_n_elements)}(     #{hash['UCA']})  UAA#{thousand_percentage(hash['UAA'], total_n_elements)}(     #{hash['UAA']})  UGA#{thousand_percentage(hash['UGA'], total_n_elements)}(     #{hash['UGA']})
UUG#{thousand_percentage(hash['UUG'], total_n_elements)}(     #{hash['UUG']})  UCG#{thousand_percentage(hash['UCG'], total_n_elements)}(     #{hash['UCG']})  UAG#{thousand_percentage(hash['UAG'], total_n_elements)}(     #{hash['UAG']})  UGG#{thousand_percentage(hash['UGG'], total_n_elements)}(     #{hash['UGG']})

CUU#{thousand_percentage(hash['CUU'], total_n_elements)}(     #{hash['CUU']})  CCU#{thousand_percentage(hash['CCU'], total_n_elements)}(     #{hash['CCU']})  CAU#{thousand_percentage(hash['CAU'], total_n_elements)}(     #{hash['CAU']})  CGU#{thousand_percentage(hash['CGU'], total_n_elements)}(     #{hash['CGU']})
CUC#{thousand_percentage(hash['CUC'], total_n_elements)}(     #{hash['CUC']})  CCC#{thousand_percentage(hash['CCC'], total_n_elements)}(     #{hash['CCC']})  CAC#{thousand_percentage(hash['CAC'], total_n_elements)}(     #{hash['CAC']})  CGC#{thousand_percentage(hash['CGC'], total_n_elements)}(     #{hash['CGC']})
CUA#{thousand_percentage(hash['CUA'], total_n_elements)}(     #{hash['CUA']})  CCA#{thousand_percentage(hash['CCA'], total_n_elements)}(     #{hash['CCA']})  CAA#{thousand_percentage(hash['CAA'], total_n_elements)}(     #{hash['CAA']})  CGA#{thousand_percentage(hash['CGA'], total_n_elements)}(     #{hash['CGA']})
CUG#{thousand_percentage(hash['CUG'], total_n_elements)}(     #{hash['CUG']})  CCG#{thousand_percentage(hash['CCG'], total_n_elements)}(     #{hash['CCG']})  CAG#{thousand_percentage(hash['CAG'], total_n_elements)}(     #{hash['CAG']})  CGG#{thousand_percentage(hash['CGG'], total_n_elements)}(     #{hash['CGG']})

AUU#{thousand_percentage(hash['AUU'], total_n_elements)}(     #{hash['AUU']})  ACU#{thousand_percentage(hash['ACU'], total_n_elements)}(     #{hash['ACU']})  AAU#{thousand_percentage(hash['AAU'], total_n_elements)}(     #{hash['AAU']})  AGU#{thousand_percentage(hash['AGU'], total_n_elements)}(     #{hash['AGU']})
AUC#{thousand_percentage(hash['AUC'], total_n_elements)}(     #{hash['AUC']})  ACC#{thousand_percentage(hash['ACC'], total_n_elements)}(     #{hash['ACC']})  AAC#{thousand_percentage(hash['AAC'], total_n_elements)}(     #{hash['AAC']})  AGC#{thousand_percentage(hash['AGC'], total_n_elements)}(     #{hash['AGC']})
AUA#{thousand_percentage(hash['AUA'], total_n_elements)}(     #{hash['AUA']})  ACA#{thousand_percentage(hash['ACA'], total_n_elements)}(     #{hash['ACA']})  AAA#{thousand_percentage(hash['AAA'], total_n_elements)}(     #{hash['AAA']})  AGA#{thousand_percentage(hash['AGA'], total_n_elements)}(     #{hash['AGA']})
AUG#{thousand_percentage(hash['AUG'], total_n_elements)}(     #{hash['AUG']})  ACG#{thousand_percentage(hash['ACG'], total_n_elements)}(     #{hash['ACG']})  AAG#{thousand_percentage(hash['AAG'], total_n_elements)}(     #{hash['AAG']})  AGG#{thousand_percentage(hash['AGG'], total_n_elements)}(     #{hash['AGG']})

GUU#{thousand_percentage(hash['GUU'], total_n_elements)}(     #{hash['GUU']})  GCU#{thousand_percentage(hash['GCU'], total_n_elements)}(     #{hash['GCU']})  GAU#{thousand_percentage(hash['GAU'], total_n_elements)}(     #{hash['GAU']})  GGU#{thousand_percentage(hash['GGU'], total_n_elements)}(     #{hash['GGU']})
GUC#{thousand_percentage(hash['GUC'], total_n_elements)}(     #{hash['GUC']})  GCC#{thousand_percentage(hash['GCC'], total_n_elements)}(     #{hash['GCC']})  GAC#{thousand_percentage(hash['GAC'], total_n_elements)}(     #{hash['GAC']})  GGC#{thousand_percentage(hash['GGC'], total_n_elements)}(     #{hash['GGC']})
GUA#{thousand_percentage(hash['GUA'], total_n_elements)}(     #{hash['GUA']})  GCA#{thousand_percentage(hash['GCA'], total_n_elements)}(     #{hash['GCA']})  GAA#{thousand_percentage(hash['GAA'], total_n_elements)}(     #{hash['GAA']})  GGA#{thousand_percentage(hash['GGA'], total_n_elements)}(     #{hash['GGA']})
GUG#{thousand_percentage(hash['GUG'], total_n_elements)}(     #{hash['GUG']})  GCG#{thousand_percentage(hash['GCG'], total_n_elements)}(     #{hash['GCG']})  GAG#{thousand_percentage(hash['GAG'], total_n_elements)}(     #{hash['GAG']})  GGG#{thousand_percentage(hash['GGG'], total_n_elements)}(     #{hash['GGG']})
EOF
  result << append_this
  return result
end

.gc_content(of_this_sequence, round_to_n_positions = 3) ⇒ Object

#

Bioroebe.gc_content

This is a convenience method that will return back the GC content, as a percentage value, of the input-given sequence (nucleotide sequence).

So for instance, the following example will correctly return 50.0 because the G and C content of the sequence is exactly 50%.

The second argument can be used for denoting where to round.

Usage example:

Bioroebe.gc_content('ATCG') # => 50.0
#


280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 280

def self.gc_content(
    of_this_sequence,
    round_to_n_positions = 3
  )
  if of_this_sequence.is_a? Array
    of_this_sequence.each {|entry|
      gc_content(of_this_sequence, round_to_n_positions)
    }
  else
    ::Bioroebe::CalculateGCContent.gc_percentage(
      of_this_sequence, round_to_n_positions
    )
  end
end

.genbank_to_fasta(this_file, be_verbose = :be_verbose) ⇒ Object

#

Bioroebe.genbank_to_fasta

This method will convert from a genbank file, to a .fasta file.

Invocation example:

Bioroebe.genbank_to_fasta('/home/x/programming/ruby/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
#


1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
# File 'lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb', line 1457

def self.genbank_to_fasta(
    this_file,
    be_verbose = :be_verbose
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if this_file.is_a? Array
    this_file = this_file.first
  end
  if File.exist? this_file
    _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
  else
    _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
    _.set_data # This will use the default file.
    _.split_into_proper_sections
  end
  file_path = _.save_into_a_fasta_file(be_verbose)
  return file_path
end

.generate_nucleotide_sequence_based_on_these_frequencies(n_nucleotides = 1061, hash_frequencies = { A: 0.3191430, C: 0.2086633, G: 0.2580345, T: 0.2141593 }) ⇒ Object

#

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies

The second argument to this method should be a Hash.

The default output may be a String such as this one here:

AACTGAACATTTTAGGAGATATCAAGACCCTCTGATTCTCAAGGAATAATTAGCTAATTT

Usage example:

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies(:default, { A: 0.25, C: 0.25, G: 0.25, T: 0.25 })
#


4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4662

def self.generate_nucleotide_sequence_based_on_these_frequencies(
    n_nucleotides = 1061, # Denote how many nucleotides to use.
    hash_frequencies = {
      A: 0.3191430,
      C: 0.2086633,
      G: 0.2580345,
      T: 0.2141593
    }
  )
  case n_nucleotides
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    n_nucleotides = 500
  end
  result = ''.dup
  frequency_for_A = hash_frequencies[:A]
  frequency_for_C = hash_frequencies[:C]
  frequency_for_G = hash_frequencies[:G]
  frequency_for_T = hash_frequencies[:T]
  n_nucleotides.times {|run_number_n|
    use_this_number = rand(0)
    if use_this_number <= frequency_for_A
      result << 'A'
    elsif use_this_number <= (frequency_for_A+frequency_for_C)
      result << 'C'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G)
      result << 'G'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G+frequency_for_T)
      result << 'T'
    end
  }
  return result
end

.generate_pdf_tutorialObject

#

Bioroebe.generate_pdf_tutorial

#


11918
11919
11920
# File 'lib/bioroebe/shell/shell.rb', line 11918

def self.generate_pdf_tutorial
  ::Bioroebe::Shell.generate_pdf_tutorial
end

.generate_random_dna_sequence(i = ARGV, optional_hash_with_the_frequencies = {}) ⇒ Object

#

Bioroebe.generate_random_dna_sequence

This method will “generate” a random DNA sequence (as a String).

A String will be returned by this method.

The second argument to this method can be a Hash, specifying the percentage likelihood for each of the nucleotides. See the following usage examples to find out how to use this.

Usage examples:

Bioroebe.random_dna 15 # => "TTGGTAAGCTCTTTA"
Bioroebe.random_dna 25 # => "TTAGCACAAGCATGGACGGACCAGA"
Bioroebe.random_dna(50, { A: 10, T: 10, C: 10, G: 70}) # => "GGGGTGGGGAGGGTATGCGGAGGAAGGGCGGGAAGGGCGGGGGCTGGGCG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "TGAGGGGGGGGGTGGGAGGG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "GGTAGGGGGGGGTAGGGGGG"
#


3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3669

def self.generate_random_dna_sequence(
    i                                  = ARGV,
    optional_hash_with_the_frequencies = {} # ← This may be a String too, mind you.
  )
  # ======================================================================= #
  # First define our result-String. This one will be returned by this
  # method.
  # ======================================================================= #
  result = ''.dup
  _ = Bioroebe::DNA_NUCLEOTIDES # Get a handle to the four DNA nucleotides.
  if i.is_a? Array
    i = i.join.strip
  end
  case i
  when :default
    i = 250
  end
  i = i.to_i # This is "n times".
  # ======================================================================= #
  # First handle the case where the user passed a String:
  # ======================================================================= #
  if optional_hash_with_the_frequencies.is_a? String
    pool = optional_hash_with_the_frequencies.dup.chars.shuffle
    i.times {
      if pool.size == 0
        pool = optional_hash_with_the_frequencies.dup.chars.shuffle
      end
      result << pool.pop
    }
  elsif optional_hash_with_the_frequencies.empty?
    # ===================================================================== #
    # This is the default clause.
    # ===================================================================== #
    i.times {
      result << _.sample
    }
  else
    # ===================================================================== #
    # Else, the user wants to use a frequency hash:
    # ===================================================================== #
    hash = optional_hash_with_the_frequencies
    frequency_for_A = hash[:A]
    frequency_for_T = hash[:T]
    frequency_for_C = hash[:C]
    frequency_for_G = hash[:G]
    i.times {
      percentage = rand(100)+1
      if percentage <= frequency_for_A
        match = 'A'
      elsif (percentage > frequency_for_A) and
            (percentage <= frequency_for_A+frequency_for_T)
        match = 'T'
      elsif (percentage > frequency_for_A+frequency_for_T) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C)
        match = 'C'
      elsif (percentage > frequency_for_A+frequency_for_T+frequency_for_C) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C+frequency_for_G)
        match = 'G'
      else
        e 'Not found a match for '+percentage.to_s
      end
      result << match
    }
  end
  result
end

.generate_random_rna_sequence(i = ARGV) ⇒ Object

#

Bioroebe.generate_random_rna_sequence

The input-argument should be a number, an Integer, such as 10.

Usage example:

Bioroebe.generate_random_rna_sequence(10)
#


2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2685

def self.generate_random_rna_sequence(i = ARGV)
  if i.is_a? Array
    i = i.join(' ').strip
  end
  _ = Bioroebe::RNA_NUCLEOTIDES # Point to the allowed RNA-nucleotides here.
  result = ''.dup
  i.to_s.to_i.times {
    result << _.sample
  }
  return result
end

.guess_format(i) ⇒ Object

#

Bioroebe.guess_format

#


2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2793

def self.guess_format(i)
  case i
  # ======================================================================= #
  # === fasta
  # ======================================================================= #
  when /.fa$/,
       /.fna$/,
       /.faa$/,
       /.fasta$/
    'fasta'
  # ======================================================================= #
  # === fastq
  # ======================================================================= #
  when /.fq$/,
       /.fastq$/
    'fastq'
  when /.fx/
    ''
  end
end

.hamming_distance(sequence1 = 'ATCG', sequence2 = 'ATCC') ⇒ Object

#

Bioroebe.hamming_distance

This method will return an Integer, aka a number, which represents the hamming distance between two sequences of equal length. This will state how many differences exist between two same-sized sequences (aka sequences that have the same length).

Do note that a second implementation may exist for the hamming distance, in the Bioroebe project.

Usage example:

Bioroebe.hamming_distance('ATCG','ATCC') # => 1
#


1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1062

def self.hamming_distance(
    sequence1 = 'ATCG',
    sequence2 = 'ATCC'
  )
  if sequence1.nil?
    e 'Please provide a sequence (String) as input to this method.'
    return
  end
  if sequence1.is_a? String
    sequence1 = sequence1.split(//)
  end
  if sequence2.is_a? String
    sequence2 = sequence2.split(//)
  end
  array_sequence1 = [sequence1].flatten
  array_sequence2 = [sequence2].flatten
  # ======================================================================= #
  # Zip the two sequences together, then reduce this Array of
  # zipped values to an integer value, which will be returned.
  # ======================================================================= #
  zipped_array = array_sequence1.zip(array_sequence2)
  hamming_value = 0
  zipped_array.each { |left, right|
    hamming_value += 1 unless left == right
  }
  return hamming_value
end

.has_this_restriction_enzyme?(name_of_restriction_enzyme) ⇒ Boolean

#

Bioroebe.has_this_restriction_enzyme?

This method will determine whether we have a specific restriction enzyme registered in the yaml file restriction_enzymes.yml or whether we do not. That way we can query whether a restriction enzyme is registered (and thus available) or whether it is not.

The method will downcase all keys in use to simplify finding a matching entry.

Usage example:

Bioroebe.has_this_restriction_enzyme? 'MvnI'    # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRI'   # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRII'  # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRIII' # => false
Bioroebe.has_this_restriction_enzyme? 'PvuI'    # => true
Bioroebe.has_this_restriction_enzyme? 'PvuII'   # => true
Bioroebe.has_this_restriction_enzyme? 'PvuIII'  # => false
#

Returns:

  • (Boolean)


33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/bioroebe/enzymes/has_this_restriction_enzyme.rb', line 33

def self.has_this_restriction_enzyme?(
    name_of_restriction_enzyme
  )
  _ = {}
  if name_of_restriction_enzyme.frozen?
    name_of_restriction_enzyme = name_of_restriction_enzyme.dup
  end
  name_of_restriction_enzyme.delete!('?') if name_of_restriction_enzyme.include? '?'
  name_of_restriction_enzyme.downcase!
  ::Bioroebe.restriction_enzymes?.each_pair {|key, value|
    _[key.downcase] = value
  }
  return _.has_key? name_of_restriction_enzyme
end

.hash_codon_tables?Boolean

#

Bioroebe.hash_codon_tables?

#

Returns:

  • (Boolean)


115
116
117
# File 'lib/bioroebe/codons/codon_tables.rb', line 115

def self.hash_codon_tables?
  ::Bioroebe::CodonTables.definitions?
end

.index_this_fasta_file(i = ARGV) ⇒ Object

#

Bioroebe.index_this_fasta_file

This method will use samtools faidx to index files.

#


569
570
571
572
573
574
575
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 569

def self.index_this_fasta_file(i = ARGV)
  [i].flatten.compact.each {|this_file|
    e
    esystem "samtools faidx #{this_file}"
    e
  }
end

.infer_type_from_this_sequence(i = 'ATGGTACGACAC') ⇒ Object

#

Bioroebe.infer_type_from_this_sequence

This method will try to infer the type from a given sequence.

The three valid return types are the following symbols:

:dna
:rna
:protein

Note that this may not work 100% reliably, so do not depend too much on this method working absolutely perfect.

#


4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4460

def self.infer_type_from_this_sequence(
    i = 'ATGGTACGACAC'
  )
  if i.is_a? Array
    i = i.join
  end
  type = :dna # This is the default - DNA.
  # ======================================================================= #
  # === :rna
  # ======================================================================= #
  if i.include? 'U'
    type = :rna
  # ======================================================================= #
  # === :dna
  # ======================================================================= #
  elsif i =~ /^[ATCG]+$/
    type = :dna 
  # ======================================================================= #
  # === :protein
  # ======================================================================= #
  else # else simply assume this to be a protein.
    type = :protein
  end
  return type
end

.initialize_codonsObject

#

Bioroebe.initialize_codons

This method will first initialize the stop-codons, and then determine the start codons in use.

#


82
83
84
85
# File 'lib/bioroebe/codons/codons.rb', line 82

def self.initialize_codons
  initialize_stop_codons
  determine_start_codons_from_the_codon_table
end

.initialize_default_stop_codonsObject

#

Bioroebe.initialize_default_stop_codons

This method will initialize the default stop codons. This defaults to

    1. the stop codons that can be found in the human genome.

Note that this method will NOT work if @stop_codons already contains elements; this is a tiny “safeguard” to prevent erroneous use. If you wish to not be handicapped then clear it by yourself first, via:

Bioroebe.clear_stop_codons
#


246
247
248
249
250
251
# File 'lib/bioroebe/codons/codons.rb', line 246

def self.initialize_default_stop_codons
  if @stop_codons.empty?
    @stop_codons << %w( TAG TAA TGA ) # <- Add the default stop codons here.
    @stop_codons.flatten! 
  end
end

.input_as_dna(i) ⇒ Object

#

Bioroebe.input_as_dna

This method will only accept input that is DNA, that is, the short letter variant (thus, A, T, C or G). Any other input will be stripped away, aka discarded, so this methods acts as a filter - a forward-filter for DNA.

The method will return a “String” that is assumed to be a “DNA string”. You can expect only DNA nucleotides to be part of that string.

Usage example:

Bioroebe.input_as_dna 'UUTGAGGACCA' # => "TGAGGACCA"
#


4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4503

def self.input_as_dna(i)
  i = <