Module: Bioroebe

Included in:
Taxonomy::Interactive
Defined in:
lib/bioroebe/svg/page.rb,
lib/bioroebe/base/base.rb,
lib/bioroebe/cell/cell.rb,
lib/bioroebe/gene/gene.rb,
lib/bioroebe/ncbi/ncbi.rb,
lib/bioroebe/svg/glyph.rb,
lib/bioroebe/svg/svgee.rb,
lib/bioroebe/svg/track.rb,
lib/bioroebe/misc/ruler.rb,
lib/bioroebe/shell/menu.rb,
lib/bioroebe/ncbi/efetch.rb,
lib/bioroebe/parsers/gff.rb,
lib/bioroebe/shell/shell.rb,
lib/bioroebe/siRNA/siRNA.rb,
lib/bioroebe/virus/virus.rb,
lib/bioroebe/base/colours.rb,
lib/bioroebe/sequence/dna.rb,
lib/bioroebe/sequence/rna.rb,
lib/bioroebe/blosum/blosum.rb,
lib/bioroebe/codons/codons.rb,
lib/bioroebe/genome/genome.rb,
lib/bioroebe/svg/primitive.rb,
lib/bioroebe/taxonomy/edit.rb,
lib/bioroebe/taxonomy/menu.rb,
lib/bioroebe/taxonomy/node.rb,
lib/bioroebe/biomart/filter.rb,
lib/bioroebe/biomart/server.rb,
lib/bioroebe/constants/GUIs.rb,
lib/bioroebe/count/count_at.rb,
lib/bioroebe/count/count_gc.rb,
lib/bioroebe/shell/readline.rb,
lib/bioroebe/taxonomy/chart.rb,
lib/bioroebe/biomart/biomart.rb,
lib/bioroebe/biomart/dataset.rb,
lib/bioroebe/colours/colours.rb,
lib/bioroebe/project/project.rb,
lib/bioroebe/regexes/regexes.rb,
lib/bioroebe/taxonomy/shared.rb,
lib/bioroebe/version/version.rb,
lib/bioroebe/biomart/database.rb,
lib/bioroebe/dotplots/dotplot.rb,
lib/bioroebe/sequence/protein.rb,
lib/bioroebe/shell/help/class.rb,
lib/bioroebe/svg/mini_feature.rb,
lib/bioroebe/taxonomy/colours.rb,
lib/bioroebe/abstract/features.rb,
lib/bioroebe/biomart/attribute.rb,
lib/bioroebe/encoding/encoding.rb,
lib/bioroebe/readline/readline.rb,
lib/bioroebe/sequence/sequence.rb,
lib/bioroebe/taxonomy/taxonomy.rb,
lib/bioroebe/codons/codon_table.rb,
lib/bioroebe/parsers/parse_embl.rb,
lib/bioroebe/sequence/alignment.rb,
lib/bioroebe/taxonomy/constants.rb,
lib/bioroebe/taxonomy/help/help.rb,
lib/bioroebe/taxonomy/info/info.rb,
lib/bioroebe/codons/codon_tables.rb,
lib/bioroebe/codons/start_codons.rb,
lib/bioroebe/colours/use_colours.rb,
lib/bioroebe/constants/constants.rb,
lib/bioroebe/misc/useful_formulas.rb,
lib/bioroebe/patterns/rgg_scanner.rb,
lib/bioroebe/taxonomy/info/is_dna.rb,
lib/bioroebe/taxonomy/interactive.rb,
lib/bioroebe/taxonomy/parse_fasta.rb,
lib/bioroebe/exceptions/exceptions.rb,
lib/bioroebe/parsers/blosum_parser.rb,
lib/bioroebe/parsers/stride_parser.rb,
lib/bioroebe/requires/require_yaml.rb,
lib/bioroebe/genomes/genome_pattern.rb,
lib/bioroebe/parsers/biolang_parser.rb,
lib/bioroebe/parsers/genbank_parser.rb,
lib/bioroebe/taxonomy/class_methods.rb,
lib/bioroebe/taxonomy/help/helpline.rb,
lib/bioroebe/toplevel_methods/roebe.rb,
lib/bioroebe/codons/show_codon_usage.rb,
lib/bioroebe/configuration/constants.rb,
lib/bioroebe/sinatra/sinatra_wrapper.rb,
lib/bioroebe/base/prototype/prototype.rb,
lib/bioroebe/cleave_and_digest/cleave.rb,
lib/bioroebe/codons/show_codon_tables.rb,
lib/bioroebe/genomes/genome_retriever.rb,
lib/bioroebe/patterns/profile_pattern.rb,
lib/bioroebe/patterns/scan_for_repeat.rb,
lib/bioroebe/requires/require_colours.rb,
lib/bioroebe/www/embeddable_interface.rb,
lib/bioroebe/cleave_and_digest/trypsin.rb,
lib/bioroebe/dotplots/advanced_dotplot.rb,
lib/bioroebe/electron_microscopy/flipy.rb,
lib/bioroebe/raw_sequence/raw_sequence.rb,
lib/bioroebe/sinatra/sinatra_interface.rb,
lib/bioroebe/toplevel_methods/taxonomy.rb,
lib/bioroebe/utility_scripts/find_gene.rb,
lib/bioroebe/viennarna/rnafold_wrapper.rb,
lib/bioroebe/colours/colourize_sequence.rb,
lib/bioroebe/enzymes/restriction_enzyme.rb,
lib/bioroebe/aminoacids/codon_percentage.rb,
lib/bioroebe/cleave_and_digest/digestion.rb,
lib/bioroebe/codons/detect_minimal_codon.rb,
lib/bioroebe/configuration/configuration.rb,
lib/bioroebe/log_directory/log_directory.rb,
lib/bioroebe/sequence/reverse_complement.rb,
lib/bioroebe/string_matching/levensthein.rb,
lib/bioroebe/base/base_module/base_module.rb,
lib/bioroebe/codons/show_this_codon_table.rb,
lib/bioroebe/colours/colour_schemes/score.rb,
lib/bioroebe/fasta_and_fastq/fasta_parser.rb,
lib/bioroebe/cell/specialized_cells/B_cell.rb,
lib/bioroebe/cell/specialized_cells/T_cell.rb,
lib/bioroebe/colours/colour_schemes/simple.rb,
lib/bioroebe/ngs/phred_quality_score_table.rb,
lib/bioroebe/palindromes/palindrome_finder.rb,
lib/bioroebe/taxonomy/info/check_available.rb,
lib/bioroebe/utility_scripts/punnet/punnet.rb,
lib/bioroebe/aminoacids/show_hydrophobicity.rb,
lib/bioroebe/calculate/calculate_gc_content.rb,
lib/bioroebe/colours/colourize_related_code.rb,
lib/bioroebe/fasta_and_fastq/download_fasta.rb,
lib/bioroebe/requires/require_all_pdb_files.rb,
lib/bioroebe/string_matching/smith_waterman.rb,
lib/bioroebe/codons/determine_optimal_codons.rb,
lib/bioroebe/codons/sanitize_codon_frequency.rb,
lib/bioroebe/gui/universal_widgets/gene/gene.rb,
lib/bioroebe/matplotlib/matplotlib_generator.rb,
lib/bioroebe/utility_scripts/compseq/compseq.rb,
lib/bioroebe/utility_scripts/showorf/showorf.rb,
lib/bioroebe/aminoacids/aminoacids_mass_table.rb,
lib/bioroebe/calculate/calculate_blosum_score.rb,
lib/bioroebe/count/count_amount_of_aminoacids.rb,
lib/bioroebe/electron_microscopy/fix_pos_file.rb,
lib/bioroebe/enzymes/restriction_enzymes_file.rb,
lib/bioroebe/enzymes/show_restriction_enzymes.rb,
lib/bioroebe/palindromes/palindrome_generator.rb,
lib/bioroebe/requires/require_all_codon_files.rb,
lib/bioroebe/requires/require_all_count_files.rb,
lib/bioroebe/string_matching/hamming_distance.rb,
lib/bioroebe/toplevel_methods/fasta_and_fastq.rb,
lib/bioroebe/aminoacids/aminoacid_substitution.rb,
lib/bioroebe/cell/specialized_cells/Macrophage.rb,
lib/bioroebe/colours/colour_schemes/nucleotide.rb,
lib/bioroebe/count/count_amount_of_nucleotides.rb,
lib/bioroebe/electron_microscopy/read_file_xmd.rb,
lib/bioroebe/gui/universal_widgets/shell/shell.rb,
lib/bioroebe/requires/require_all_parser_files.rb,
lib/bioroebe/toplevel_methods/toplevel_methods.rb,
lib/bioroebe/utility_scripts/pathways/pathways.rb,
lib/bioroebe/aminoacids/display_aminoacid_table.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb,
lib/bioroebe/gui/experimental/snapgene/snapgene.rb,
lib/bioroebe/requires/require_all_dotplot_files.rb,
lib/bioroebe/requires/require_all_enzymes_files.rb,
lib/bioroebe/requires/require_all_pattern_files.rb,
lib/bioroebe/requires/require_cleave_and_digest.rb,
lib/bioroebe/aminoacids/create_random_aminoacids.rb,
lib/bioroebe/enzymes/has_this_restriction_enzyme.rb,
lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb,
lib/bioroebe/palindromes/palindrome_2D_structure.rb,
lib/bioroebe/requires/require_all_sequence_files.rb,
lib/bioroebe/requires/require_all_taxonomy_files.rb,
lib/bioroebe/utility_scripts/compacter/compacter.rb,
lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb,
lib/bioroebe/annotations/create_annotation_format.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme.rb,
lib/bioroebe/conversions/convert_aminoacid_to_dna.rb,
lib/bioroebe/databases/download_taxonomy_database.rb,
lib/bioroebe/nucleotides/complementary_dna_strand.rb,
lib/bioroebe/requires/require_all_calculate_files.rb,
lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb,
lib/bioroebe/electron_microscopy/parse_coordinates.rb,
lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb,
lib/bioroebe/gui/universal_widgets/sizeseq/sizeseq.rb,
lib/bioroebe/pdb_and_protein_structure/alpha_helix.rb,
lib/bioroebe/requires/require_all_aminoacids_files.rb,
lib/bioroebe/requires/require_the_toplevel_methods.rb,
lib/bioroebe/utility_scripts/analyse_local_dataset.rb,
lib/bioroebe/base/colours_for_base/colours_for_base.rb,
lib/bioroebe/enzymes/restriction_enzymes/statistics.rb,
lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb,
lib/bioroebe/patterns/analyse_glycosylation_pattern.rb,
lib/bioroebe/requires/require_all_nucleotides_files.rb,
lib/bioroebe/requires/require_all_palindromes_files.rb,
lib/bioroebe/string_matching/find_longest_substring.rb,
lib/bioroebe/string_matching/simple_string_comparer.rb,
lib/bioroebe/calculate/calculate_melting_temperature.rb,
lib/bioroebe/electron_microscopy/coordinate_analyzer.rb,
lib/bioroebe/electron_microscopy/generate_em2em_file.rb,
lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb,
lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb,
lib/bioroebe/pdb_and_protein_structure/helical_wheel.rb,
lib/bioroebe/calculate/calculate_levensthein_distance.rb,
lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb,
lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb,
lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb,
lib/bioroebe/requires/require_all_colour_scheme_files.rb,
lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb,
lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb,
lib/bioroebe/gui/universal_widgets/alignment/alignment.rb,
lib/bioroebe/utility_scripts/align_open_reading_frames.rb,
lib/bioroebe/utility_scripts/permutations/permutations.rb,
lib/bioroebe/genbank/genbank_flat_file_format_generator.rb,
lib/bioroebe/pdb_and_protein_structure/parse_mmCIF_file.rb,
lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb,
lib/bioroebe/requires/require_all_string_matching_files.rb,
lib/bioroebe/requires/require_all_utility_scripts_files.rb,
lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb,
lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb,
lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb,
lib/bioroebe/gui/universal_widgets/controller/controller.rb,
lib/bioroebe/gui/universal_widgets/www_finder/www_finder.rb,
lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb,
lib/bioroebe/pdb_and_protein_structure/download_this_pdb.rb,
lib/bioroebe/utility_scripts/dot_alignment/dot_alignment.rb,
lib/bioroebe/utility_scripts/mirror_repeat/mirror_repeat.rb,
lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb,
lib/bioroebe/utility_scripts/parse_taxonomy/parse_taxonomy.rb,
lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb,
lib/bioroebe/base/internal_hash_module/internal_hash_module.rb,
lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb,
lib/bioroebe/electron_microscopy/electron_microscopy_module.rb,
lib/bioroebe/electron_microscopy/simple_star_file_generator.rb,
lib/bioroebe/requires/require_all_electron_microscopy_files.rb,
lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb,
lib/bioroebe/gui/universal_widgets/three_to_one/three_to_one.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb,
lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb,
lib/bioroebe/base/commandline_application/commandline_arguments.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb,
lib/bioroebe/gui/universal_widgets/parse_pdb_file/parse_pdb_file.rb,
lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb,
lib/bioroebe/base/commandline_application/commandline_application.rb,
lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb,
lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb,
lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb,
lib/bioroebe/gui/universal_widgets/random_sequence/random_sequence.rb,
lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb,
lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb,
lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb,
lib/bioroebe/gui/universal_widgets/format_converter/format_converter.rb,
lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb,
lib/bioroebe/gui/universal_widgets/show_codon_table/show_codon_table.rb,
lib/bioroebe/gui/universal_widgets/show_codon_usage/show_codon_usage.rb,
lib/bioroebe/pdb_and_protein_structure/fetch_fasta_sequence_from_pdb.rb,
lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb,
lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb,
lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb,
lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb,
lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb,
lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb,
lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb,
lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb,
lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb,
lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb,
lib/bioroebe/gui/universal_widgets/aminoacid_composition/customized_dialog.rb,
lib/bioroebe/gui/universal_widgets/nucleotide_analyser/nucleotide_analyser.rb,
lib/bioroebe/gui/universal_widgets/restriction_enzymes/restriction_enzymes.rb,
lib/bioroebe/nucleotides/show_nucleotide_sequence/show_nucleotide_sequence.rb,
lib/bioroebe/utility_scripts/show_this_dna_sequence/show_this_dna_sequence.rb,
lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb,
lib/bioroebe/gui/universal_widgets/levensthein_distance/levensthein_distance.rb,
lib/bioroebe/gui/universal_widgets/primer_design_widget/primer_design_widget.rb,
lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb,
lib/bioroebe/gui/universal_widgets/aminoacid_composition/aminoacid_composition.rb,
lib/bioroebe/utility_scripts/create_batch_entrez_file/create_batch_entrez_file.rb,
lib/bioroebe/utility_scripts/determine_antigenic_areas/determine_antigenic_areas.rb,
lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb,
lib/bioroebe/gui/universal_widgets/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb,
lib/bioroebe/utility_scripts/download_files_from_rebase/download_files_from_rebase.rb,
lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb,
lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb,
lib/bioroebe/pdb_and_protein_structure/report_secondary_structures_from_this_pdb_file.rb,
lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb,
lib/bioroebe/gui/universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb,
lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb,
lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb,
lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb

Overview

#

require ‘bioroebe/fasta/split_this_fasta_file_into_chromosomes/constants.rb’

#

Defined Under Namespace

Modules: BaseModule, Biomart, Blosum, CodonTable, CodonTables, CodonTablesFrequencies, ColourScheme, Colourize, ColoursForBase, CommandlineArguments, Configuration, ElectronMicroscopy, EmbeddableInterface, Features, GUI, InferTheNamespaceModule, InternalHashModule, MolecularWeightOfNucleotides, NucleotideModule, Parser, Postgresql, Quiz, RestrictionEnzymes, SinatraInterface, Taxonomy, VerboseTruth Classes: AdvancedDotplot, AlignOpenReadingFrames, Alignment, AlphaHelix, AminoacidSubstitution, AminoacidsMassTable, AnalyseGlycosylationPattern, AnalyseLocalDataset, AutocorrectTheNameOfThisFastaFile, B_cell, Base, BiolangParser, BlosumParser, CalculateBlosumScore, CalculateGCContent, CalculateMeltingTemperature, CalculateMeltingTemperatureForMoreThanThirteenNucleotides, CalculateThePositionSpecificScoringMatrix, Cell, CheckForMismatches, CodonPercentage, ColourSchemeDemo, ColourizeHydrophilicAndHydrophobicAminoacids, ColourizeSequence, CommandlineApplication, CompactFastaFile, Compacter, CompareTheseTwoSequencesViaBlosum, ComplementaryDnaStrand, Compseq, ConsensusSequence, ConvertAminoacidToDNA, ConvertThisCodonToThatAminoacid, CountAmountOfAminoacids, CountAmountOfNucleotides, CreateAnnotationFormat, CreateBatchEntrezFile, CreateRandomAminoacids, DNA, DeduceAminoacidSequence, DetectMinimalCodon, DetermineAntigenicAreas, DetermineMissingNucleotidesPercentage, DetermineOptimalCodons, Digestion, DisplayAminoacidTable, DisplayHowManyFastaEntriesAreInThisDirectory, DisplayOpenReadingFrames, DnaToAminoacidSequence, DotAlignment, Dotplot, DownloadFasta, DownloadFilesFromRebase, DownloadTaxonomyDatabase, FastaDefline, FastaParser, FastaToYaml, FastqFormatExplainer, FetchDataFromUniprot, FetchFastaSequenceFromPdb, FindGene, FindLongestSubstring, FindLongestSubstringViaLCSalgorithm, GenbankFlatFileFormatGenerator, GenbankParser, Gene, Genome, GenomePattern, GenomeRetriever, HammingDistance, HelixWheel, InvalidAminoacid, LengthModifier, Levensthein, Macrophage, MatplotlibGenerator, Matrix, MirrorRepeat, MostLikelyNucleotideSequenceForThisAminoacidSequence, MoveFileToItsCorrectLocation, Ncbi, Palindrome2DStructure, PalindromeFinder, PalindromeGenerator, ParseEMBL, ParseFasta, ParseFastq, ParseFrequencyTable, ParsePdbFile, ParseTaxonomy, ParsemmCIFFile, Pathways, Permutations, PhredQualityScoreTable, PossibleCodonsForThisAminoacid, ProfilePattern, Protein, Punnet, RGG_Scanner, RNA, RNALfoldWrapper, RawSequence, ReportSecondaryStructuresFromThisPdbFile, RestrictionEnzyme, ReverseComplement, Ruler, SVG, SanitizeCodonFrequency, SanitizeNucleotideSequence, ScanForRepeat, Sequence, Shell, ShowCodonTables, ShowCodonUsage, ShowFastaHeaders, ShowFastaStatistics, ShowHydrophobicity, ShowNucleotideSequence, ShowOrf, ShowRestrictionEnzymes, ShowThisCodonTable, ShowThisDNASequence, SiRNA, SimpleStringComparer, SimplifyFastaHeader, SinatraWrapper, SmithWaterman, SplitThisFastaFileIntoChromosomes, StrideParser, T_cell, TobaccoMosaicVirus, Trypsin, UsefulFormulas, Virus

Constant Summary collapse

USE_THIS_COLOUR_FOR_DNA =
#

Bioroebe::USE_THIS_COLOUR_FOR_DNA

The following constant will denote which colour we will use for DNA sequences by default, in this case, the HTML colour called steelblue.

#
:steelblue
FILE_EXPAND_CD_ALIASES =
#

FILE_EXPAND_CD_ALIASES

#
"#{@project_base_directory}hash_expand_cd_aliases.rb"
CONFIGURATION_DIRECTORY =
#

CONFIGURATION_DIRECTORY

#
"#{project_yaml_directory?}configuration/"
TOPLEVEL_METHODS_DIRECTORY =
#

TOPLEVEL_METHODS_DIRECTORY

#
"#{@project_base_directory}toplevel_methods/"
CODON_TABLES_DIRECTORY =
#

CODON_TABLES_DIRECTORY

#
"#{@project_base_directory}codon_tables/"
CLEAVE_AND_DIGEST_DIRECTORY =
#

CLEAVE_AND_DIGEST_DIRECTORY

#
"#{@project_base_directory}cleave_and_digest/"
ELECTRON_MICROSCOPY_DIRECTORY =
#

ELECTRON_MICROSCOPY_DIRECTORY

#
"#{@project_base_directory}electron_microscopy/"
CODON_TABLES_DIRECTORY_FREQUENCY =
#

CODON_TABLES_DIRECTORY_FREQUENCY

#
"#{CODON_TABLES_DIRECTORY}frequencies/"
PDB_DIRECTORY =
#

PDB_DIRECTORY

This is called pdb_and_protein_structure/ since as of November 2023.

#
"#{@project_base_directory}pdb_and_protein_structure/"
PARSERS_DIRECTORY =
#

PARSERS_DIRECTORY

#
"#{@project_base_directory}parsers/"
ENZYMES_DIRECTORY =
#

ENZYMES_DIRECTORY

#
"#{@project_base_directory}enzymes/"
PALINDROMES_DIRECTORY =
#

PALINDROMES_DIRECTORY

#
"#{@project_base_directory}palindromes/"
PATTERN_DIRECTORY =
#

PATTERN_DIRECTORY

#
"#{@project_base_directory}pattern/"
NUCLEOTIDES_DIRECTORY =
#

NUCLEOTIDES_DIRECTORY

#
"#{@project_base_directory}nucleotides/"
COUNT_DIRECTORY =
#

COUNT_DIRECTORY

#
"#{@project_base_directory}count/"
AMINOACIDS_DIRECTORY =
#

AMINOACIDS_DIRECTORY

#
"#{@project_base_directory}aminoacids/"
BLOSUM_DIRECTORY =
#

BLOSUM_DIRECTORY

#
"#{project_yaml_directory?}blosum/"
CALCULATE_DIRECTORY =
#

CALCULATE_DIRECTORY

#
"#{@project_base_directory}calculate/"
CODONS_DIRECTORY =
#

CODONS_DIRECTORY

#
"#{@project_base_directory}codons/"
DOTPLOTS_DIRECTORY =
#

DOTPLOTS_DIRECTORY

#
"#{@project_base_directory}dotplots/"
SEQUENCE_DIRECTORY =
#

SEQUENCE_DIRECTORY

#
"#{@project_base_directory}sequence/"
PATHWAYS_DIRECTORY =
#

PATHWAYS_DIRECTORY

This constant will point to e. g. “/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/pathways/”.

#
"#{project_yaml_directory?}pathways/"
BIOROEBE_YAML_AMINOACIDS_DIRECTORY =
#

BIOROEBE_YAML_AMINOACIDS_DIRECTORY

#
"#{project_yaml_directory?}aminoacids/"
STRING_MATCHING_DIRECTORY =
#

STRING_MATCHING_DIRECTORY

#
"#{@project_base_directory}string_matching/"
FASTA_AND_FASTQ_DIRECTORY =
#

FASTA_AND_FASTQ_DIRECTORY

#
"#{@project_base_directory}fasta_and_fastq/"
VERSION =
#

VERSION

#
'0.13.32'
LAST_UPDATE =
#

LAST_UPDATE

This variable keeps track as to when the bioroebe project was last updated. The notation is: DD.MM.YYYY

#
'09.04.2024'
URL_TO_THE_DOCUMENTATION =
#

URL_TO_THE_DOCUMENTATION

Keep track of where the documentation to BioRoebe is kept at.

#
"https://www.rubydoc.info/gems/#{self.to_s.downcase}/#{VERSION}"
Aminoacids =
#

The following “alias” was added in May 2022.

#
Protein
UTF_ENCODING =
#

Bioroebe::UTF_ENCODING

#
'UTF-8'
USE_THIS_ENCODING =
#

Bioroebe::USE_THIS_ENCODING

#
UTF_ENCODING
Seq =
#

Usage example

x = Bioroebe::Seq.new(‘AGTACACTGGT’)

#
Sequence
N =
#

Bioroebe::N

#
"\n"
R =
#

Bioroebe::R

#
"\r"
TABULATOR =
#

TABULATOR

#
"\t"
ROW_TERMINATOR =
#

Bioroebe::ROW_TERMINATOR

This constant is not often in use, though.

#
"#{TABULATOR}|\n"
BE_VERBOSE =
#

BE_VERBOSE

#
true
TOKEN =
#

TOKEN (TOKEN tag)

#
'|'
ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED =
#

ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED

Just list the aminoacids that can typically be phosphorylated.

#
%w(
  S Y T
)
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS =
#

ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS

We have to keep the long names for the amino acids in one constant, so that we can do queries lateron.

#
(%w( 
  Alanine
  Arginine
  Asparagine
  Cysteine
  Glutamine
  Glycine
  Histidine
  Isoleucine
  Leucine
  Lysine
  Methionine
  Phenylalanine
  Proline
  Serine
  Threonine
  Tryptophane
  Tyrosine
  Valine
) << 'Aspartic acid' << 'Glutamic acid').sort
POSSIBLE_AMINO_ACIDS =
#

POSSIBLE_AMINO_ACIDS

Which Aminoacids are possible/allowed? We will list them here:

ACDEFGHIKLMNPQRSTUVWY

Note that this is distinct from the constant AMINO_ACIDS, which is instead loaded from a local .yml file. This constant includes all the 20 canonical aminoacids, whereas AMINO_ACIDS may also include pyrrolysine and selenocysteine.

#
'ACDEFGHIKLMNPQRSTVWY'
TWENTY_CANONICAL_AMINOACIDS =

TWENTY_CANONICAL_AMINOACIDS

POSSIBLE_AMINO_ACIDS
ARRAY_AMINO_ACIDS_ALPHABET =
#

ARRAY_AMINO_ACIDS_ALPHABET

This keeps an Array with all aminoacids, in one-letter format.

So it is equivalent to:

["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#
POSSIBLE_AMINO_ACIDS.chars
VERTICAL_UNICODE_BAR =
#

VERTICAL_UNICODE_BAR

#
'|'
AMINOACID_FAMILIES =
#

AMINOACID_FAMILIES

#
{
  'citratzyklus' => {
    # Alpha-Ketoglutarat: EPQR
    'alpha-ketoglutarat' => %w( E P Q R ),
    # Oxalacetat: DMN-KTI
    'oxalacetat' => %w( D N K M T I ),
  },
  'glykolyse' => {
    'pyruvat' => %w( A V L ),                 # AVL
    '3-phosphoglycerinsäure' => %w( S G C ), # SGC
    },
    'chorismat' => {
      'aromatische_familie' => %w( F Y W )       # FYW
    },
    'ribose-5-p' => {
      'histidinol' => %w( H ) # Histidine.
    },
}
VALID_WAYS_TO_EXIT =
#

VALID_WAYS_TO_EXIT

All ways to exit will be recorded here.

If you need to use more ways, simply append to this Array.

This constant may have to be moved into the bio-shell part eventually.

#
%w(
  quit q exit qq :q qt
  bye
  rda
  r2
  tq
  sq
  exit_program
  exitprogram
)
NAMES_ENTRIES =
#

NAMES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'names.sql'
NODES_ENTRIES =
#

NODES_ENTRIES

This used to belong to the Taxonomy submodule.

#
'nodes.sql'
FASTA_ENTRIES =
#

FASTA_ENTRIES

This used to belong to the Taxonomy submodule.

#
'fasta.sql'
SHALL_WE_LOG_LAST_UPDATE =
#

SHALL_WE_LOG_LAST_UPDATE

This constant exists specifically for the taxonomy-component of the Bioroebe project.

#
true
NAME_OF_BIO_SHELL =
#

NAME_OF_BIO_SHELL

This constant can be used as the default prompt for the bioshell component.

#
'BIO SHELL> '
DEFAULT_DNA_SEQUENCE =
#

DEFAULT_DNA_SEQUENCE

This is a default “test” DNA sequence, in the sense that it can be used to quickly test functionality within the bioroebe project.

It was added in May 2020, but it may be that we have to remove it at a later time, or move it into a separate .yml file. For the time being, though, it will reside here.

#
'CGGCCCGATTTGGGTTTCGGAGCGATCGAAATACCAGCACTACCATGAATTCTAT'\
'ATGGCTGCCGTTCACAGCCTTAATTTTAGGCTTTCCACCTGATCACTCTTTAATC'\
'TCCATTGTTTCTGGTACGCAGAAATTGACGCTTCCCATTCATTCACGGCTAAAAT'\
'CAAGGATTCCACCAGAATCGCGGGCCGCGTGGGTGCGCCGTCGACCTCCTCGGCC'\
'AAATAAGAACGGGCAGGTAAGAGACTAGGGTACTCAAGAT'
DEFAULT_LENGTH_FOR_DNA =
#

DEFAULT_LENGTH_FOR_DNA

How long our DNA-generated strings should be by default.

This may be used by some scripts, so it provides a default value for use in these scripts.

150 nucleotides are the current default.

#
150
FIELD_TERMINATOR =
#

FIELD_TERMINATOR

#
"#{TABULATOR}|#{TABULATOR}"
MAIN_DELIMITER =

An alias to the above.

DELIMITER = FIELD_TERMINATOR
USERS_X =
#

USERS_X

#
'/home/x/'
HOME_DIRECTORY_OF_USER_X =

HOME_DIRECTORY_OF_USER_X

USERS_X
RUBY_SRC =
#

RUBY_SRC

This constant is only useful on my home directory. Most other users will not need it, ever.

#
"#{USERS_X}programming/ruby/src/"
BIOROEBE_AT_HOME =
#

BIOROEBE_AT_HOME

#
"#{RUBY_SRC}bioroebe/lib/bioroebe/"
LOCALHOST =
#

LOCALHOST

#
'http://localhost/'
PATH_TO_THE_RELION_BINARY =
#

PATH_TO_THE_RELION_BINARY

This constant can be set to determine where relion resides. It is mostly an ad-hoc constant.

#
'/opt/RELION/relion-1.3/bin/relion'
ARRAY_REGISTERED_ACTIONS =
#

ARRAY_REGISTERED_ACTIONS

ARRAY_REGISTERED_ACTIONS becomes @registered_actions.

#
%w(
  to_rna
  to_dna
  rest
  pubmed
  blosum
  restriction
  translate
  quit
  shorten_aminoacid
)
FILE_BIO_LANG =
#

FILE_BIO_LANG

#
"#{USERS_X}data/personal/yaml/bio_lang/bio_lang.md"
EMAIL =
#

EMAIL

My email address - not too terribly useful for other people, but nonetheless it may be useful to display it, in particular for GUI-related components of the bioroebe-project and simple feedback in the long run.

#
'[email protected]'
REGEX_FOR_N_GLYCOSYLATION_PATTERN =
#

REGEX_FOR_N_GLYCOSYLATION_PATTERN

See rubular at:

https://rubular.com/r/D95Cq7oR5x
#
/(?=(N[^P][ST][^P]))/
REGEX_PROSITE_FOR_ANY_AMINOACID =
#

REGEX_PROSITE_FOR_ANY_AMINOACID

#
/x\((\d+)\)/
STOP_CODONS =
#

Bioroebe::STOP_CODONS

The STOP codons that can be found in Humans, in RNA format.

#
%w(
  UAA UAG UGA
)
RNA_NUCLEOTIDES =
#

Bioroebe::RNA_NUCLEOTIDES

This will refer to an Array including all four RNA nucleotides, that is A, U, G and C.

#
%w( A U G C )
ALLOWED_RNA_NUCLEOTIDES =

ALLOWED_RNA_NUCLEOTIDES

RNA_NUCLEOTIDES
POSSIBLE_RNA_NUCLEOTIDES =
#

Bioroebe::POSSIBLE_RNA_NUCLEOTIDES

This is a bit different to RNA_NUCLEOTIDES in that N is also a part of it. It is not entirely clear whether this array here is kept, though.

#
%w(
  A U G C N
)
DNA_NUCLEOTIDES =
#

Bioroebe::DNA_NUCLEOTIDES

This is the variant without N.

#
%w( A T G C )
HASH_DNA_NUCLEOTIDES =
#

Bioroebe::HASH_DNA_NUCLEOTIDES

Since as of 20.04.2014, Uracil is also part of this Hash. While this is, strictly speaking, not absolutely correct, it does simplify some downstream code. However had, this may possibly be re-evaluated in the future.

This Hash may be helpful when the user wishes to find a complement to a nucleotide. There is a method that does the same, but this Hash should be faster than a method call, so use it in particular if you need to focus more on speed.

#
{
  'A' => 'T',
  'T' => 'A',
  'G' => 'C',
  'C' => 'G',
  'U' => 'A'
}
POSSIBLE_DNA_NUCLEOTIDES =
#

POSSIBLE_DNA_NUCLEOTIDES

This constant will keep all possible DNA nucleotides.

N is also a valid entry, ‘Yarrowia_lipolytica_genome.fa’ includes it. However had,

Only these sequences are allowed in DNA.

To scope to this, do:

Bioroebe::POSSIBLE_DNA_NUCLEOTIDES
#
%w(
  A T G C N
)
ARRAY_VALID_DNA_SEQUENCES =

ARRAY_VALID_DNA_SEQUENCES

POSSIBLE_DNA_NUCLEOTIDES
FTP_NCBI_TAXONOMY_DATABASE =
#

FTP_NCBI_TAXONOMY_DATABASE

This constant refers to the taxonomy-database from NCBI. This is the file that can be downloaded from the NCBI homepage (actually, the ftp-listing).

Take note that this database, in .tar.gz format, is about 50 MB in size or even larger these days. So only download it if you really need it locally.

#
'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
URL_TO_TAXONOMY_ARCHIVE =
#

URL_TO_TAXONOMY_ARCHIVE

An “alias” to the above ^^^ constant.

#
FTP_NCBI_TAXONOMY_DATABASE
NCBI_NUCCORE =
#

NCBI_NUCCORE

#
'https://www.ncbi.nlm.nih.gov/nuccore/'
NCBI_GENE =
#

NCBI_GENE

#
'https://www.ncbi.nlm.nih.gov/gene/'
FILE_HYDROPATHY_TABLE =
#

FILE_HYDROPATHY_TABLE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}hydropathy_table.yml"
FILE_NUCLEAR_LOCALIZATION_SEQUENCES =
#

FILE_NUCLEAR_LOCALIZATION_SEQUENCES

#
"#{project_yaml_directory?}nuclear_localization_sequences.yml"
FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS =
#

FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS

#
"#{project_yaml_directory?}configuration/default_colours_for_the_aminoacids.yml"
FILE_BROWSER =
#

FILE_BROWSER

#
"#{project_yaml_directory?}configuration/browser.yml"
FILE_AMINOACIDS_MOLECULAR_FORMULA =
#

FILE_AMINOACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINOACIDS_THREE_TO_ONE =
#

FILE_AMINOACIDS_THREE_TO_ONE

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_WEIGHT_OF_COMMON_PROTEINS =
#

FILE_WEIGHT_OF_COMMON_PROTEINS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}weight_of_common_proteins.yml"
FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
#

FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_long_name_to_one_letter.yml"
FILE_AMINO_ACIDS_MOLECULAR_FORMULA =
#

FILE_AMINO_ACIDS_MOLECULAR_FORMULA

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
FILE_AMINO_ACIDS_MASS_TABLE =
#

FILE_AMINO_ACIDS_MASS_TABLE

bl $BIOROEBE_YAML/aminoacids/amino_acids_monoisotopic_mass_table.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_monoisotopic_mass_table.yml"
FILE_AMINO_ACIDS =
#

FILE_AMINO_ACIDS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids.yml"
FILE_AMINO_ACIDS_ABBREVIATIONS =
#

FILE_AMINO_ACIDS_ABBREVIATIONS

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_abbreviations.yml"
DIRECTORY_CODON_TABLES_FREQUENCIES =
#

DIRECTORY_CODON_TABLES_FREQUENCIES

This constant may point to a directory such as:

/home/Programs/Ruby/2.7.0/lib/ruby/site_ruby/2.7.0/bioroebe/codon_tables/frequencies/
#
"#{CODON_TABLES_DIRECTORY}frequencies/"
FILE_NUCLEOTIDES =
#

FILE_NUCLEOTIDES

#
"#{project_yaml_directory?}nucleotides/nucleotides.yml"
NUCLEOTIDES =
nil
FILE_GFP_SEQUENCE =
#

FILE_GFP_SEQUENCE

#
"#{project_yaml_directory?}sequences/"\
"JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta"
FILE_RESTRICTION_ENZYMES =
#

FILE_RESTRICTION_ENZYMES

bl $BIOROEBE/yaml/restriction/enzymes/restriction_enzymes.yml

#
"#{project_yaml_directory?}restriction_enzymes/restriction_enzymes.yml"
FILE_COLOURIZE_FASTA_SEQUENCES =
#

FILE_COLOURIZE_FASTA_SEQUENCES

This constants points to the .yml file that will hold information in how to colourize the FASTA sequences.

#
"#{project_yaml_directory?}configuration/colourize_fasta_sequences.yml"
FILE_BLOSUM45 =
#

FILE_BLOSUM45

#
"#{BLOSUM_DIRECTORY}/blosum45.yml"
FILE_BLOSUM50 =
#

FILE_BLOSUM50

#
"#{BLOSUM_DIRECTORY}/blosum50.yml"
FILE_BLOSUM62 =
#

FILE_BLOSUM62

#
"#{BLOSUM_DIRECTORY}/blosum62.yml"
FILE_BLOSUM80 =
#

FILE_BLOSUM80

#
"#{BLOSUM_DIRECTORY}/blosum80.yml"
FILE_BLOSUM90 =
#

FILE_BLOSUM90

#
"#{BLOSUM_DIRECTORY}/blosum90.yml"
FILE_BLOSUM_MATRIX =
#

BLOSUM_MATRIX

#
"#{BLOSUM_DIRECTORY}blosum_matrix.yml"
HYDROPATHY_TABLE =
YAML.load_file(
  FILE_HYDROPATHY_TABLE
)
FILE_CHROMOSOME_NUMBERS =
#

FILE_CHROMOSOME_NUMBERS

#
"#{project_yaml_directory?}chromosomes/chromosome_numbers.yml"
FILE_AMINO_ACIDS_FREQUENCY =
#

FILE_AMINO_ACIDS_FREQUENCY

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
FILE_AMINO_ACIDS_RESTE_YAML =
#

FILE_AMINO_ACIDS_RESTE_YAML

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_reste.yml"
FILE_AMINO_ACIDS_THREE_TO_ONE =
#

FILE_AMINO_ACIDS_THREE_TO_ONE

We’ll keep the keys downcased.

bl $RUBY_SRC/bioroebe/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml
#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE =
#

FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE

This will point to the file amino_acids_average_mass_table.yml.

#
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_average_mass_table.yml"
FILE_NUCLEOTIDES_WEIGHT =
#

FILE_NUCLEOTIDES_WEIGHT

The path to the file that holds the weight of the nucleotides.

#
"#{project_yaml_directory?}nucleotides/nucleotides_weight.yml"
UNICODE_HORIZONTAL_BAR =
#

UNICODE_HORIZONTAL_BAR

#
''
AMINO_ACIDS_MOLECULAR_FORMULA =
YAML.load_file(
  FILE_AMINO_ACIDS_MOLECULAR_FORMULA
)
AMINO_ACIDS_RESTE =
{}
AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
YAML.load_file(_)
AMINO_ACIDS_MASS_TABLE =

Else hardcode the AminoAcid table here. This may no longer be necessary, though.

{
  'A' =>  71.03711, 'C' => 103.00919, 'D' => 115.02694,
  'E' => 129.04259, 'F' => 147.06841, 'G' =>  57.02146,
  'H' => 137.05891, 'I' => 113.08406, 'K' => 128.09496,
  'L' => 113.08406, 'M' => 131.04049, 'N' => 114.04293,
  'P' =>  97.05276, 'Q' => 128.05858, 'R' => 156.10111,
  'S' =>  87.03203, 'T' => 101.04768, 'V' =>  99.06841,
  'W' => 186.07931, 'Y' => 163.06333
}
AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE =

An alias.

AMINO_ACIDS_MASS_TABLE
AMINO_ACIDS =
#

Bioroebe::AMINO_ACIDS

Currently listing 21 AminoAcids from amino_acids.yml

bl $BIOROEBE/yaml/aminoacids/amino_acids.yml
#
YAML.load_file(
  FILE_AMINO_ACIDS
)
FILE_AMINO_ACIDS_ENGLISH =
#

::Bioroebe::AMINO_ACIDS_ENGLISH

#
YAML.load_file("#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_english.yml")
AMINO_ACIDS_ENGLISH =

AMINO_ACIDS_ENGLISH

FILE_AMINO_ACIDS_ENGLISH
AMINO_ACIDS_AVERAGE_MASS_TABLE =

Else simply hardcode the AminoAcid table here.

{
  'A' =>  71.0788,
  'C' => 103.1388,
  'D' => 115.0886,
  'E' => 129.1155,
  'F' => 147.1766,
  'G' =>  57.0519,
  'H' => 137.1411,
  'I' => 113.1594,
  'K' => 128.1741,
  'L' => 113.1594,
  'M' => 131.1926,
  'N' => 114.1038,
  'P' =>  97.1167,
  'Q' => 128.1307,
  'R' => 156.1875,
  'S' =>  87.0782,
  'T' => 101.1051,
  'V' =>  99.1326,
  'W' => 186.2132,
  'Y' => 163.1760
}
AMINO_ACIDS_THREE_TO_ONE =
hash
NUCLEAR_LOCALIZATION_SEQUENCES =
''
ARRAY_NLS_SEQUENCES =
[]
LOCAL_DIRECTORY_FOR_UNIPROT =
#

LOCAL_DIRECTORY_FOR_UNIPROT

This denotes the directory for uniprot-files.

#
"#{log_directory?}uniprot/"
AUTOGENERATED_SQL_FILES_DIR =
#

Bioroebe::AUTOGENERATED_SQL_FILES_DIR

#
"#{log_directory?}autogenerated_sql_files/"
USE_THIS_BROWSER =

opera # Hardcoded value in this case..

'firefox'
ProteinToDNA =
#

Bioroebe::ProteinToDNA

Use an “alias” to the other name.

#
ConvertAminoacidToDNA
Fasta =

Add an “alias” constant to class ParseFasta.

ParseFasta

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.[](i = nil) ⇒ Object

#

Bioroebe[]

Assign a sequence through the [] method.

Note that some aliases are allowed to this way; see the variants that use self.instance_eval below this method definition.

This method here could be compared to methods such as Integer(). Biopython uses something similar, by the way.

For instance, you can do this too:

Bioroebe << 'ATT'
x = Bioroebe['ATT']
x = Bioroebe << 'ATT'
#


685
686
687
# File 'lib/bioroebe/sequence/sequence.rb', line 685

def self.[](i = nil)
  Bioroebe::Sequence.new(i)
end

.ad_hoc_task(this_file = '/root/Bioroebe/table_ids.md') ⇒ Object

#

Bioroebe.ad_hoc_task

This method can be used to specifically run an “ad-hoc” task.

An ad-hoc task is something that we just quickly “hack” together, in order to solve some existing bioinformatics-related problem or another problem that may exist right now.

For instance, in May 2021, this was used for a university course that required us to work with MEGA X and compare different proteins from a phylogenetics point of view.

#


4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4051

def self.ad_hoc_task(
    this_file = '/root/Bioroebe/table_ids.md'
  )
  require 'bioroebe/fasta_and_fastq/download_fasta.rb'
  require 'bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb'
  if this_file.is_a? Array
    this_file = this_file.join(' ')
  end
  cd ::Bioroebe.log_dir? # Make sure we are in the log-directory.
  e 'Now downloading some FASTA files, based on this file: '+
    this_file
  # ======================================================================= #
  # (1) Download the remote FASTA dataset
  # ======================================================================= #
  download_fasta this_file
  # ======================================================================= #
  # (2) cd into the fasta directory
  # ======================================================================= #
  cd ::Bioroebe.log_dir?+'fasta/'
  # ======================================================================= #
  # (3) batch rename all .fasta files next via simplify-fasta-header.
  # ======================================================================= #
  all_files = Dir['*.fasta']
  all_files.each {|this_fasta_file|
    Bioroebe.overwrite_fasta_header(this_fasta_file)
  }
end

.align_this_string_via_multiple_sequence_alignment(this_string = "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------ ") ⇒ Object

#

Bioroebe.align_this_string_via_multiple_sequence_alignment

This method will simply return an Array.

#


957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 957

def self.align_this_string_via_multiple_sequence_alignment(
    this_string =
      "PSRARRDAVG--DH--PAVEALP----PQSGPHKKEISFFTVRKEEAADADLWFPS
       PGGASK--VGQTDNDPQAIKDLP----PQGED------------------------
      "
  )
  if this_string.is_a? Array
    this_string = this_string.join("\n")
  end
  this_string = this_string.dup if this_string.frozen?
  this_string.strip!
  this_string.delete!(' ')
  splitted = this_string.split("\n")
  return splitted
end

.all_aminoacids?Boolean

#

Bioroebe.all_aminoacids?

This method will return all available aminoacids.

Example:

Bioroebe.all_aminoacids? # => ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
#

Returns:

  • (Boolean)


162
163
164
# File 'lib/bioroebe/constants/constants.rb', line 162

def self.all_aminoacids?
  ARRAY_AMINO_ACIDS_ALPHABET
end

.allowed_dna_nucleotides?Boolean

#

Bioroebe.allowed_dna_nucleotides?

This will return an Array with valid DNA nucleotides.

#

Returns:

  • (Boolean)


522
523
524
# File 'lib/bioroebe/constants/constants.rb', line 522

def self.allowed_dna_nucleotides?
  POSSIBLE_DNA_NUCLEOTIDES - ['N']
end

.amino_acid_average_mass(i) ⇒ Object

#

Bioroebe.amino_acid_average_mass

The input to this method should be in the form of the one-letter code for aminoacids. Several aminoacids can be input, of course, such as ‘AGL’.

Do note that since as of March 2020 a float will be returned by this method, if the input was found to be a valid aminoacid.

Usage example:

Bioroebe.amino_acid_average_mass('F') # => "147.17660"
#


1985
1986
1987
1988
1989
1990
1991
1992
1993
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1985

def self.amino_acid_average_mass(i)
  use_this_table = AMINO_ACIDS_AVERAGE_MASS_TABLE
  i = i.split(//) if i.is_a? String
  i = [i] unless i.is_a? Array
  result = i.map {|entry|
    entry = use_this_table[entry].to_f
  }
  return ('%.5f' % result.sum).to_f # ← This is our properly formatted result.
end

.amino_acid_monoisotopic_mass(this_aminoacid) ⇒ Object

#

Bioroebe.amino_acid_monoisotopic_mass

We require the monoisotopic table for this method, and return the corresponding match to the given aminoacid.

The input format should be in the one-letter aminoacid abbreviation.

Invocation example:

Bioroebe.amino_acid_monoisotopic_mass 'L' # => 113.08406
Bioroebe.amino_acid_monoisotopic_mass 'K' # => 128.09496
#


2009
2010
2011
2012
2013
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2009

def self.amino_acid_monoisotopic_mass(this_aminoacid)
  use_this_table = AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE
  # '%.5f' % use_this_table[this_aminoacid].to_f
  use_this_table[this_aminoacid].to_f
end

.aminoacid_families?Boolean

#

Bioroebe.aminoacid_families?

Feedback which aminoacid-families we know of.

Usage example:

pp Bioroebe.aminoacid_families?; ''
#

Returns:

  • (Boolean)


223
224
225
# File 'lib/bioroebe/constants/constants.rb', line 223

def self.aminoacid_families?
  AMINOACID_FAMILIES
end

.aminoacid_frequency(of_this_sequence = '') ⇒ Object

#

Bioroebe.aminoacid_frequency

Usage example:

Bioroebe.aminoacid_frequency('MVTDEGAIYFTKDAARNWKAAVEETVSATLNRTVSSGITGASYYTGTFST')

Would yield the following Hash:

{"M"=>1, "V"=>4, "T"=>9, "D"=>2, "E"=>3, "G"=>4, "A"=>7, "I"=>2, "Y"=>3, "F"=>2, "K"=>2, "R"=>2, "N"=>2, "W"=>1, "S"=>5, "L"=>1}
#


2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2515

def self.aminoacid_frequency(
    of_this_sequence = ''
  )
  if of_this_sequence.is_a? Array
    of_this_sequence = of_this_sequence.first 
  end
  chars = of_this_sequence.split(//)
  hash = {}
  hash.default = 0
  chars.each {|this_char| hash[this_char] += 1 }
  return hash
end

.aminoacid_substitution(from_this_sequence = :default) ⇒ Object

#

Bioroebe.aminoacid_substitution

#


102
103
104
# File 'lib/bioroebe/aminoacids/aminoacid_substitution.rb', line 102

def self.aminoacid_substitution(from_this_sequence = :default)
  Bioroebe::AminoacidSubstitution.new(from_this_sequence)
end

.aminoacids?Boolean

#

Bioroebe.aminoacids?

Note that this will return a Hash that looks like this:

{"A"=>{"ala"=>"alanine", "d
#

Returns:

  • (Boolean)


995
996
997
# File 'lib/bioroebe/constants/constants.rb', line 995

def self.aminoacids?
  AMINO_ACIDS
end

.append_what_into(what = 'Hello world!', into = 'test.md') ⇒ Object

#

Bioroebe.append_what_into

This method can be used to append content onto a file.

#


1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1204

def self.append_what_into(
    what = 'Hello world!',
    into = 'test.md'
  )
  unless File.exist? into
    base_dir = File.dirname(into)
    unless File.directory? base_dir
      e rev+
      'No directory exists at '+sdir(base_dir)+
      rev+'. Thus creating it now.'
      create_directory(base_dir)
    end
    e rev+
      'No file exists at '+sfile(into)+rev+
      '. Thus creating it now.'
    create_file(into)
  end
  File.open(into, 'a') { |file|
    file << what
  }
end

.array_colourize_this_aminoacidObject

#

Bioroebe.array_colourize_this_aminoacid

Query as to which aminoacid we will colourize, if any at all.

#


1268
1269
1270
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1268

def self.array_colourize_this_aminoacid
  @array_colourize_this_aminoacid
end

.atomic_composition(of = 'GGGGA') ⇒ Object

#

Bioroebe.atomic_composition

This method will return the composition of atoms in a given protein, via Hash, such as:

{"C"=>11, "H"=>19, "N"=>5, "O"=>6, "S"=>0}

The Hash keeps track of 11 C atoms, 19 H atoms, 5 N atoms, 6 O atoms and 0 S atoms.

This hash can then be formatted via the method:

Bioroebe.show_atomic_composition()

Which can be found below.

Presently this method works on aminoacids only, but in theory the code could be extended to work with DNA nucleotides and RNA nucleotides as well.

Either way, the one letter abbreviation should be used as input to this method.

When we use aminoacids, we need to remember that a peptide bond deducts 1x H₂O (water). This will have to be deducted from the formula, but only if it is an internal aminoacid. In other words, the only two aminoacids that will behave differently, are the first one (since it will miss one -OH group) and the last aminoacid (as this one will lack a -H molecule.

Remember that the input sequence to this method should be the one-letter code for the aminoacid sequence at hand.

#


2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2122

def self.atomic_composition(
    of = 'GGGGA' # ← This should be the aminoacid sequence.
  )
  begin
    require 'chemistry_paradise/split_molecule_names.rb'
    require 'chemistry_paradise/toplevel_methods/remove_this_molecule_from.rb'
  rescue LoadError
    if is_on_roebe?
      puts 'Two files from the chemistry_paradise gem are not available.'
    end
  end
  # ======================================================================= #
  # Load up the molecular formula for each aminoacid next. This will
  # be used as our reference-point for calculating things such as the
  # composition, or weight.
  # ======================================================================= #
  dataset_molecular_formula_for_the_aminoacids = YAML.load_file(
    FILE_AMINOACIDS_MOLECULAR_FORMULA
  )
  if of.is_a?(Array)
    if of.empty?
      of = 'GGGGA' # In this case reinstate the default.
    else
      if of.first.is_a?(String) and of.first.size > 1
        of = of.first.split(//) # Split it on a per-character basis here.
      end
    end
  end
  if of.is_a? String
    of = of.split(//)
  end
  unless of.is_a? Array
    of = [of]
  end
  hash_keeping_track_of_the_atomic_composition = {}
  # ======================================================================= #
  # Build up the default values, for the atoms C, H, N, O and S.
  # ======================================================================= #
  hash_keeping_track_of_the_atomic_composition['C'] = 0
  hash_keeping_track_of_the_atomic_composition['H'] = 0
  hash_keeping_track_of_the_atomic_composition['N'] = 0
  hash_keeping_track_of_the_atomic_composition['O'] = 0
  hash_keeping_track_of_the_atomic_composition['S'] = 0
  # ======================================================================= #
  # Next obtain the formula from the ChemistryParadise project. We
  # do so by iterating over the given input, and we assume that
  # this input is always an Array.
  # ======================================================================= #
  of.map.with_index {|this_amino_acid, position_of_that_aminoacid|
    # ===================================================================== #
    # Next, we have to obtain the formula for this amino acid.
    # ===================================================================== #
    this_amino_acid = AMINO_ACIDS_ENGLISH[this_amino_acid]
    formula_for_this_amino_acid = dataset_molecular_formula_for_the_aminoacids[this_amino_acid]
    # ===================================================================== #
    # The next case-menu will handle the position of the aminoacid at hand.
    # We will skip doing so if there is only one aminoacid though.
    # ===================================================================== #
    if of.first.to_s.size > 1
      case position_of_that_aminoacid # case tag
      when 0 # This is the first aminoacid. It loses only one 'OH' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('OH', formula_for_this_amino_acid)
      when (of.size - 1) # This is the last entry. It loses only one 'H' group.
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H', formula_for_this_amino_acid)
      else
        # ================================================================= #
        # Else it will lose a full H₂O group.
        # ================================================================= #
        formula_for_this_amino_acid = 
          ::ChemistryParadise.remove_this_molecule_from('H2O', formula_for_this_amino_acid)
      end
    end
    array_chemical_formula = ::ChemistryParadise.split_this_molecular_formula_into_a_hash(
      formula_for_this_amino_acid
    )
    array_chemical_formula.each {|molecule_and_number| # e. g. 'H13'
      if molecule_and_number =~ /\d+/ # If it has at the least one number.
        molecule_and_number =~ /([A-Z]+)(\d{1,2})/ # See: https://rubular.com/r/nCojEDcY6g
        molecule = $1.to_s.dup
        n_times  = $2.to_s.dup.to_i
        hash_keeping_track_of_the_atomic_composition[molecule] += n_times
      else # else it must be 1, since there is no other number, such as 'N'.
        hash_keeping_track_of_the_atomic_composition[molecule_and_number] += 1
      end
    }
  }
  return hash_keeping_track_of_the_atomic_composition
end

.automatically_rename_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.automatically_rename_this_fasta_file

This method will automatically (try to) rename an existing fasta file, by tapping into the method called .return_new_filename_based_on_fasta_identifier().

#


135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 135

def self.automatically_rename_this_fasta_file(fasta_file)
  fasta_file = [fasta_file].flatten.compact
  fasta_file.each {|this_fasta_file|
    if File.exist? this_fasta_file
      new_filename = return_new_filename_based_on_fasta_identifier(this_fasta_file)
      erev "Renaming #{sfile(this_fasta_file)}#{rev} "\
           "to #{sfile(new_filename)} #{rev}next."
      Bioroebe.rename(this_fasta_file, new_filename)
    else
      no_file_exists_at(this_fasta_file)
    end
  }
end

.available_blosum_matrices?Boolean

#

Bioroebe.available_blosum_matrices?

This method will return an Array of all available blosum matrices.

Example output:

["blosum45", "blosum50", "blosum62", "blosum80", "blosum90", "blosum_matrix"]
#

Returns:

  • (Boolean)


78
79
80
81
82
# File 'lib/bioroebe/blosum/blosum.rb', line 78

def self.available_blosum_matrices?
  Bioroebe::Blosum.available_blosum_files?.map {|entry|
    File.basename(entry).delete_suffix('.yml')
  }
end

.available_codon_tables?Boolean

#

Bioroebe.available_codon_tables?

#

Returns:

  • (Boolean)


125
126
127
# File 'lib/bioroebe/codons/show_codon_tables.rb', line 125

def self.available_codon_tables?
  ::Bioroebe::CodonTables.definitions?.values # Do not sort this.
end

.base_composition(i = '52%GC') ⇒ Object

#

Bioroebe.base_composition

This method can be used to query the composition of a given DNA sequence, that is, in percentage, the values for A, T, C and G.

This method will then return a Hash, consisting of the percentage values of A, T, C and G in the given DNA sequence at hand.

Note that the input to this method has to include a ‘%’ character, at the least up until March 2020. Past March 2020 this requirement was dropped, but I still think it is visually more elegant to include a ‘%’ character.

#


3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3908

def self.base_composition(
    i = '52%GC'
  )
  if i.is_a? Array
    if i.empty?
      i = '52%GC' # Default value.
    else
      i = i.join(' ').strip
    end
  end
  # ======================================================================= #
  # Add support for Files here.
  # ======================================================================= #
  if i and File.exist?(i)
    i = File.readlines(i).reject {|line| line.start_with? '>' }.join("\n").delete("\n")
  end
  # ======================================================================= #
  # We must use a Hash for this.
  # ======================================================================= #
  hash = {
    'A' => 0,
    'T' => 0,
    'C' => 0,
    'G' => 0,
  }
  if i.include? '%'
    splitted = i.split('%').map(&:strip)
    frequency = splitted.first.to_i
    opposite_frequency = 100 - frequency
    characters = splitted.last.split(//)
    characters.each {|this_nucleotide|
      hash[this_nucleotide] = frequency / 2
    }
    # ===================================================================== #
    # Next calculate the missing nucleotides:
    # ===================================================================== #
    missing_nucleotides = hash.select {|key, value|
      value == 0
    }
    missing_nucleotides.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = opposite_frequency / 2
    }
  else
    frequency = Hash.new(0)
    chars = i.chars
    chars.each { |entry| frequency[entry] += 1 }
    sum = frequency.values.sum
    frequency.each_pair {|this_nucleotide, value|
      hash[this_nucleotide] = ((value * 100.0) / sum).round(2)
    }
  end
  return hash
end

.batch_create_windows_executablesObject

#

Bioroebe.batch_create_windows_executables

This method is only useful for windows. We will use “ocra” to create various .exe files that have the desired widgt-functionality.

Note that the functionality depends on the roebe-gem.

#


2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2837

def self.batch_create_windows_executables
  begin
    require 'roebe/custom_methods/module.rb'
  rescue LoadError; end
  array_these_files =  %w(
    /home/x/programming/ruby/src/bioroebe/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb
  )
  array_these_files.each {|this_file|
    Roebe.ocra_build(
      this_file,
      File.basename(this_file).delete_suffix('.rb')
    )
  }
end

.be_verbose?Boolean

#

Bioroebe.be_verbose?

#

Returns:

  • (Boolean)


164
165
166
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 164

def self.be_verbose?
  @be_verbose
end

.bisulfite_treatment(i) ⇒ Object

#

Bioroebe.bisulfite_treatment

Simply convert all C into U. The underlying idea here is that bilsufite will convert unmethylated Cytosines into Uracil.

Usage example:

Bioroebe.bisulfite_treatment('CCCGCAATGCATACCTCGCCG') # => "UUUGUAATGUATAUUTUGUUG"
#


2822
2823
2824
2825
2826
2827
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2822

def self.bisulfite_treatment(i)
  if i.is_a? Array
    i = i.join('').strip
  end
  i.tr('C','U')
end

.blast_neighborhood(this_mer = 'CTC', optional_apply_filter_for_score_higher_than = nil) ⇒ Object

#

Bioroebe.blast_neighborhood

The second argument to this method is a score-filter, e. g. to select only entries that have a score higher than 1.

#


4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4248

def self.blast_neighborhood(
    this_mer                                    = 'CTC',
    optional_apply_filter_for_score_higher_than = nil
  )
  if this_mer.is_a? Array
    this_mer = this_mer.first
  end
  if this_mer.nil?
    this_mer = 'CTC' # Set the same default as above.
  end
  match_score     =  2
  mis_match_score = -2
  # ======================================================================= #
  # Next use an Array of sequences that we will compare.
  # ======================================================================= #
  compare_these_sequences = %w(
    AAA
    AAT
    AAC
    AAG
    ATA
    ATT
    ATC
    ATG
    ACA
    ACT
    ACC
    ACG
    AGA
    AGT
    AGC
    AGG
    TAA
    TAT
    TAC
    TAG
    TTA
    TTT
    TTC
    TTG
    TCA
    TCT
    TCC
    TCG
    TGA
    TGT
    TGC
    TGG
    CAA
    CAT
    CAC
    CAG
    CTA
    CTT
    CTC
    CTG
    CCA
    CCT
    CCC
    CCG
    CGA
    CGT
    CGC
    CGG
    GAA
    GAT
    GAC
    GAG
    GTA
    GTT
    GTC
    GTG
    GCA
    GCT
    GCC
    GCG
    GGA
    GGT
    GGC
    GGG
  )
  compare_these_sequences.each {|this_sequence|
    score = 0
    chars = this_sequence.chars
    first_char  = chars[0]
    second_char = chars[1]
    third_char  = chars[2]
    if first_char == this_mer[0]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if second_char == this_mer[1]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if third_char == this_mer[2]
      # =================================================================== #
      # Found the first match.
      # =================================================================== #
      score += match_score
    else
      # =================================================================== #
      # else it must be a mismatch
      # =================================================================== #
      score += mis_match_score
    end
    if optional_apply_filter_for_score_higher_than
       if (score.to_i > optional_apply_filter_for_score_higher_than)
        e "#{this_sequence}: score of "\
          "#{score.to_s.rjust(3)}"
       end
    else
      e this_sequence+': score of '+
        score.to_s.rjust(3)
    end
  }
end

.blosum_directory?Boolean

#

Bioroebe.blosum_directory?

#

Returns:

  • (Boolean)


899
900
901
# File 'lib/bioroebe/constants/constants.rb', line 899

def self.blosum_directory?
  "#{project_yaml_directory?}blosum/"
end

.blosum_matrix(i = FILE_BLOSUM_MATRIX) ⇒ Object

#

Bioroebe.blosum_matrix

#


801
802
803
# File 'lib/bioroebe/constants/constants.rb', line 801

def self.blosum_matrix(i = FILE_BLOSUM_MATRIX)
  YAML.load_file(i)
end

.calculate_exponential_growth(number_of_cells = 10, number_of_divisions = 10) ⇒ Object

#

Bioroebe.calculate_exponential_growth

This method can be used to calculate how many bacteria will exist after n cell divisions (provided that we know, and supply to this method, how many bacteria existed when we started our calculation).

#


4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4787

def self.calculate_exponential_growth(
    number_of_cells     = 10,
    number_of_divisions = 10
  )
  if number_of_cells.nil?
    number_of_cells = 10 # Default value.
  end
  if number_of_divisions.nil?
    number_of_divisions = 10 # Default value.
  end
  # ======================================================================= #
  # === Hashes
  #
  # Handle Hash as input given.
  # ======================================================================= #
  if number_of_cells.is_a? Hash
    if number_of_cells.has_key? :n_divisions
      number_of_divisions = number_of_cells.delete(:n_divisions)
    end
    if number_of_cells.has_key? :number_of_cells
      number_of_cells = number_of_cells.delete(:number_of_cells)
    elsif number_of_cells.has_key? :n_cells
      number_of_cells = number_of_cells.delete(:n_cells)
    end
  end
  # ======================================================================= #
  # We need numbers, aka integers - there are no "1.3" cells.
  # ======================================================================= #
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  total_amount_of_cells = 0
  total_amount_of_cells = number_of_cells * (2 ** number_of_divisions)
  return total_amount_of_cells
end

.calculate_levensthein_distance(string1 = 'TTACCC', string2 = 'TTTCCC', be_verbose = true) ⇒ Object

#

Bioroebe.calculate_levensthein_distance

The following method is based on

http://rosettacode.org/wiki/Levenshtein_distance#Ruby, slightly modified.

To test this code, do:

[ ['kitten','sitting'], ['saturday','sunday'], ["rosettde", "raisethyrd"] ].each { |s,t|
  puts "calculate_levensthein_distance('#{s}', '#{t}') = #{Bioroebe.calculate_levensthein_distance(s, t)}"
}

However had, rubygems has a levensthein variant too.

#


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/bioroebe/calculate/calculate_levensthein_distance.rb', line 27

def self.calculate_levensthein_distance(
    string1    = 'TTACCC',
    string2    = 'TTTCCC',
    be_verbose = true
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if string1.is_a?(Array) and (string1.size > 1)
    string2 = string1.shift
    string1 = string1.first
  elsif string1.is_a?(String) and string1.include?(' ')
    splitted = string1.split(' ')
    string2  = splitted.last
    string1  = splitted.first
  end
  m = string1.length
  n = string2.length
  return m if n == 0 # Stop at 0.
  return n if m == 0 # Stop at 0.
  arrays = Array.new(m+1) { Array.new(n+1) }
  # ======================================================================= #
  # Initialize the variable arrays next:
  # ======================================================================= #
  (0 .. m).each {|i| arrays[i][0] = i}
  (0 .. n).each {|j| arrays[0][j] = j}
  # ======================================================================= #
  # Now, iterate through:
  # ======================================================================= #
  (1 .. n).each {|j|
    (1 .. m).each {|i|
      arrays[i][j] = 
        if string1[i-1] == string2[j-1] # adjust index into string
          arrays[i-1][j-1]       # no operation required
        else
           [ arrays[i-1][j]+1,   # deletion     operation
             arrays[i][j-1]+1,   # insertion    operation
             arrays[i-1][j-1]+1, # substitution operation
           ].min
        end
    }
  }
  result = arrays[m][n]
  if be_verbose
    e rev+'The two strings '+simp(string1.to_s)+rev+' and '+
      simp(string2.to_s)+rev+' have n differences ('+
      steelblue('edit distance')+rev+'):'
    e "  #{simp(result.to_s)}"
  end
  return result
end

.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i) ⇒ Object

#

Bioroebe.calculate_melting_temperature_for_more_than_thirteen_nucleotides

An alias exists for this method, called Bioroebe.melting_Temperature().

Usage example for the latter:

x = Bioroebe.melting_temperature('CCGTGTCGTACATCG')
#


269
270
271
# File 'lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb', line 269

def self.calculate_melting_temperature_for_more_than_thirteen_nucleotides(i)
  ::Bioroebe::CalculateMeltingTemperatureForMoreThanThirteenNucleotides.new(i)
end

.calculate_n50_value(i = [ 1989, 1934, 1841, 1785, 1737, 1649, 1361, 926, 848, 723 ]) ⇒ Object

#

Bioroebe.calculate_n50_value

This method will calculate the N50 value of the given input. The input to this method should be a sorted Array.

#


3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3037

def self.calculate_n50_value(
    i = [
      1989, 1934, 1841,
      1785, 1737, 1649,
      1361,  926,  848,
       723
    ]
  )
  # ======================================================================= #
  # The following conversion is necessary because ARGV will contain only
  # String objects, not integer-values.
  # ======================================================================= #
  i.map! {|entry| entry.to_i }
  calculate_sum_for_the_loop = 0
  sum = i.sum
  half = sum / 2.0
  find_the_proper_contig = nil
  i.each {|this_number|
    calculate_sum_for_the_loop += this_number
    # ===================================================================== #
    # Compare the temporary sum with the half-sum.
    # ===================================================================== #
    if calculate_sum_for_the_loop > half
      find_the_proper_contig = this_number
      break
    end
  }
  return find_the_proper_contig
end

.calculate_original_amount_of_cells_of_exponential_growth(number_of_cells = 1600, number_of_divisions = 5) ⇒ Object

#

Bioroebe.calculate_original_amount_of_cells_of_exponential_growth

The first argument, number_of_cells, means “how many cells do we have now/currently”. This is necessary, in order to calculate how many cells we used to have initially.

#


4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4829

def self.calculate_original_amount_of_cells_of_exponential_growth(
    number_of_cells     = 1600, # 1600 cells to start with.
    number_of_divisions =    5  #    5 generations by default.
  )
  number_of_cells     = number_of_cells.to_i
  number_of_divisions = number_of_divisions.to_i
  initial_amount_of_cells_was = 0
  initial_amount_of_cells_was = number_of_cells / ( 2 ** number_of_divisions )
  return initial_amount_of_cells_was
end

.calculate_the_frequencies_of_this_species(i = :homo_sapiens) ⇒ Object

#

Bioroebe.calculate_the_frequencies_of_this_species

#


2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2454

def self.calculate_the_frequencies_of_this_species(
    i = :homo_sapiens
  )
  require 'bioroebe/sequence/dna.rb'
  require 'yaml'
  if i.nil?
    i = :default
  end
  if i and i.is_a?(Array) and i.empty?
    i << :homo_sapiens
  end
  hash_aminoacids = {}
  hash_aminoacids.default = 0
  if i.is_a? Array
    i = i.first
  end
  case i.to_sym
  # ======================================================================= #
  # === :homo_sapiens
  # ======================================================================= #
  when :homo_sapiens,
       :homo,
       :human,
       :default
    i = "#{project_base_directory?}"\
        "codon_tables/frequencies/9606_Homo_sapiens.yml"
  end
  hash = YAML.load_file(i)
  # "GAC"=>25.1
  hash.each_pair {|key, value|
    this_aminoacid = Bioroebe.to_aa(key)
    hash_aminoacids[this_aminoacid] += value
  }
  e
  # ======================================================================= #
  # Convert it into percent:
  # ======================================================================= #
  hash_aminoacids.each_pair {|key, value_for_percentage|
    value_for_percentage = ((value_for_percentage * 100.0) / 1000.0).round(3).to_s
    value_for_percentage = '%.2f' % value_for_percentage
    e '  '+
      steelblue(key).to_s+' '+
      royalblue(
        value_for_percentage.rjust(6)+'%'
      )
  }
  e
end

.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file) ⇒ Object

#

Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file

This method will return a Hash containing the weight of the aminoacids in a .fasta file.

Usage example:

x = Bioroebe.calculate_weight_of_the_aminoacids_in_this_fasta_file('viruses.fa')

This may yield a Hash such as the following:

{ "sp|P23046|NSP5_ROTBV"  => 21647.5341,
  "sp|Q81835|SHDAG_HDVU2" => 22030.6392,
  "sp|A5HBD7|ST_POVWU"    => 23433.3773,
  "sp|Q91FT8|234R_IIV6"   => 21076.778 }
#


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 42

def self.calculate_weight_of_the_aminoacids_in_this_fasta_file(fasta_file)
  if File.exist? fasta_file
    hash = {}
    results = Bioroebe.parse_fasta_quietly(fasta_file)
    short_headers = results.short_headers?
    sequences = results.sequences?
    short_headers.each_with_index {|entry, index|
      sum = 0
      this_sequence = sequences[index]
      # Next, convert this sequence into the corresponding mass.
      this_sequence.chars.each {|this_specific_aminoacid|
        sum += Bioroebe.weight_of_this_aminoacid?(this_specific_aminoacid)
      }
      hash[entry] = sum.round(4)
    }
    hash
  else
    e 'No file exists at '+fasta_file.to_s+'.'
  end
end

.can_base_pair_with?(a, b) ⇒ Boolean

#

Bioroebe.can_base_pair_with?

Usage example:

Bioroebe.can_base_pair_with?('A','T') # => true
Bioroebe.can_base_pair_with?('A','G') # => false
#

Returns:

  • (Boolean)


4929
4930
4931
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4929

def self.can_base_pair_with?(a, b)
  ::Bioroebe.partner_nucleotide(a) == b
end

.cat(i = nil) ⇒ Object

#

Bioroebe.cat (cat tag)

A variant of cat to use here.

#


5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 5025

def self.cat(
    i = nil
  )
  if i.is_a? Array
    i = i.first
  end
  if i
    i = convert_global_env(i) if i.include? '$'
    i = Dir['*'][i.to_i - 1] if i =~ /^\d+$/
  end
  if i.nil?
    erev 'Please provide an argument to Bioroebe.cat() (the name of a file)'
  # ======================================================================= #
  # === Handle directories next
  # ======================================================================= #
  elsif File.directory? i
    erev "We can not read from `#{sdir(i)}#{rev}` as it is a directory."
  # ======================================================================= #
  # Else the File will exist in this clause:
  # ======================================================================= #
  elsif File.exist?(i)
    _ = File.extname(i).delete('.')
    case _ # case tag
    # ===================================================================== #
    # === fasta
    # ===================================================================== #
    when 'fasta',
         'fa'
      e 'This is a fasta file, so rather than cat-ing the content,'
      e 'we will send this dataset to the ParseFasta class.'
      require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
      Bioroebe::ParseFasta.new(i)
    else # The default here.
      e "Now displaying the file `#{sfile(i)}`."
      # e File.read(i)
      # ^^^ Or we could use the above. We have to reconsider this one day.
      File.readlines(i).each {|line| e "  #{line.chomp}" } # With a bit of padding.
    end
  else # else the file does not exist.
    e "#{swarn('Trying to display the file `')}#{sfile(i)}#{swarn('`')}"
    e swarn('but it does not exist.')
  end
end

.change_directory(i = '$HOME', be_verbose = false) ⇒ Object

#

Bioroebe.change_directory

This method allows us to change the directory.

Bioroebe.cd() is an alias to the method here.

#


4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4187

def self.change_directory(
    i          = '$HOME',
    be_verbose = false
  )
  case be_verbose
  # ======================================================================= #
  # === :do_report_current_directory
  # ======================================================================= #
  when :do_report_current_directory,
       :be_verbose
    be_verbose = true
  end
  case i # Do some sanitizing here. (case tag)
  # ======================================================================= #
  # === :home_directory
  # ======================================================================= #
  when :home_directory,
       :default,
       nil # ← Nil is also assumed to refer to this :default value.
    # ===================================================================== #
    # In this case we will try to cd into the base-directory of the
    # Bioroebe shell.
    # ===================================================================== #
    i = log_dir?
  # ======================================================================= #
  # === :download_dir
  # ======================================================================= #
  when :download_dir,':download_dir',
       :download_directory,':download_directory'
    i = download_dir?
  # ======================================================================= #
  # Bioroebe.save_dir? is defined in bioroebe/toplevel_methods/store_here.rb.
  # ======================================================================= #
  when 'base',
       'logdir',
       :bioroebe_log_directory
    # ===================================================================== #
    # Enter the main log dir, unless a file exists with the same name.
    # ===================================================================== #
    i = save_dir? unless File.exist?(i.to_s) # .to_s to avoid Symbols here.
  end
  i = i.dup if i.is_a?(String) and i.frozen?
  i << '/' unless i.end_with? '/'
  if File.directory? i
    e sdir(i) if be_verbose # Also colourize the directory and output it.
    Dir.chdir(i)
  else
    if be_verbose
      erev "No directory called `#{sdir(i)}#{rev}` exists,"
      erev 'thus we can not cd to this target.'
    end
  end
end

.clear_array_colourize_this_aminoacidObject

#

Bioroebe.clear_array_colourize_this_aminoacid

#


1259
1260
1261
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1259

def self.clear_array_colourize_this_aminoacid
  @array_colourize_this_aminoacid = []
end

.clear_stop_codonsObject

#

Bioroebe.clear_stop_codons

#


256
257
258
# File 'lib/bioroebe/codons/codons.rb', line 256

def self.clear_stop_codons
  @stop_codons = []
end

.cleave(with = :with_trypsin, i = ARGV) ⇒ Object

#

Bioroebe.cleave (cleave tag)

This is the general entry-point for “cleave-related” activities, such as cleaving a polypeptide or a DNA strand via an enzyme.

#


56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 56

def self.cleave(
    with = :with_trypsin,
    i    = ARGV
  )
  case with
  # ======================================================================= #
  # === :with_trypsin
  # ======================================================================= #
  when :with_trypsin,
       :trypsin,
       :default
    Bioroebe.cleave_with_trypsin(i)
  else
    nil
  end
end

.cleave_with_trypsin(this_sequence = ARGV) ⇒ Object

#

Bioroebe.cleave_with_trypsin

Trypsin cleaves peptides on the C-terminal side of lysine and arginine amino acid residues. If a proline residue is on the carboxyl side of the cleavage site, the cleavage will not occur. If an acidic residue is on either side of the cleavage site, the rate of hydrolysis has been shown to be slower.

This method will return an Array.

#


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/bioroebe/cleave_and_digest/cleave.rb', line 21

def self.cleave_with_trypsin(
    this_sequence = ARGV
  )
  # ======================================================================= #
  # === Handle Arrays first
  # ======================================================================= #
  if this_sequence.is_a? Array
    this_sequence = this_sequence.first
  end
  array_cleave_positions = [] # This is the Array that will be returned.
  subrange = ''.dup
  this_sequence.size.times {|index|
    this_char = this_sequence[index, 1]
    case this_char # case tag
    when 'K','R'
      subrange << this_char
      next_char_is = this_sequence[index+1, 1]
      unless next_char_is == 'P' # Exclude Proline.
        array_cleave_positions << subrange
        subrange = ''.dup
      end
    else
      subrange << this_char
    end
  }
  array_cleave_positions << subrange
  return array_cleave_positions
end

.cliner(use_this_token = :default_token, how_many_times = 80, use_this_colour = nil) ⇒ Object

#

Bioroebe.cliner

The first character denotes which token we will use, such as ‘#’, for the line that is to be displayed.

#


2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2555

def self.cliner(
    use_this_token  = :default_token,
    how_many_times  = 80,
    use_this_colour = nil
  )
  require 'bioroebe/colours/colours.rb'
  if use_this_token.is_a? Hash
    # ===================================================================== #
    # === :length
    # ===================================================================== #
    if use_this_token.has_key? :length
      how_many_times = use_this_token.delete(:length)
    end
    if use_this_token.is_a? Hash
      # =================================================================== #
      # === :token
      # =================================================================== #
      if use_this_token.has_key? :token
        use_this_token = use_this_token.delete(:token)
      end
    end
    use_this_token = :default if use_this_token.is_a? Hash
  end
  # ======================================================================= #
  # The following case-when menu must come after the check for Hashes
  # above.
  # ======================================================================= #
  case use_this_token
  when :default_token, :default
    use_this_token = '='
  end
  # ======================================================================= #
  # === handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    if yielded.is_a?(Hash)
      # =================================================================== #
      # === :colour
      # =================================================================== #
      if yielded.has_key? :colour
        use_this_colour = yielded[:colour]  
      # =================================================================== #
      # === :colours
      # =================================================================== #
      elsif yielded.has_key? :colours
        use_this_colour = yielded[:colours]
      end
    #else
    #  cliner(use_this_token, how_many_times)
    end
  end
  if use_this_colour
    e ::Colours.send(use_this_colour, use_this_token * how_many_times)
  else
    e use_this_token * how_many_times
  end
end

.codon_frequencies_of_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.codon_frequencies_of_this_sequence

Usage example:

x = Bioroebe.codon_frequencies_of_this_sequence('ATGGGCGGGGTGATGGCAATGCCTTTAATGCCGCCAAAAAAAAAAAAAAAA')

Will yield this Hash:

{"AAA"=>5, "ATG"=>4, "CCA"=>1, "CCG"=>1, "TTA"=>1, "CCT"=>1, "GCA"=>1, "GTG"=>1, "GGG"=>1, "GGC"=>1}
#


198
199
200
# File 'lib/bioroebe/codons/show_codon_usage.rb', line 198

def self.codon_frequencies_of_this_sequence(i = ARGV)
  Bioroebe::ShowCodonUsage.new(i) { :be_quiet }.result?
end

.codon_frequency_of_this_string(i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\ 'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA', automatically_convert_into_a_RNA_sequence = true) ⇒ Object

#

Bioroebe.codon_frequency_of_this_string

The input to this method should ideally be a String. It will be assumed to be a RNA string, e. g. mRNA. Thus, all T are replaced with U by default. This can be toggled via the second argument of this method.

This method will return a Hash.

Usage example:

Bioroebe.codon_frequency_of_this_string
Bioroebe.codon_frequency_of_this_string 'ATTCGTACGATCGACTACTACT' # => {"UAC"=>2, "GAC"=>1, "AUC"=>1, "ACG"=>1, "CGU"=>1, "AUU"=>1}
#


917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 917

def self.codon_frequency_of_this_string(
    i = 'ATTCGTACGATCGACTGACTGACAGTCATTCGTAGTACGATCGACTGACTGACAGTCATTCGTAC'\
        'GATCGACTGACTGACAAGTCATTCGTACGATCGACTGACTTGACAGTCATAA',
    automatically_convert_into_a_RNA_sequence = true
  )
  i = i.join if i.is_a? Array
  if automatically_convert_into_a_RNA_sequence
    i = i.dup if i.frozen?
    i.tr!('T','U')
  end
  scanned = i.scan(/.../)
  tally = scanned.tally
  # ======================================================================= #
  # We still have to sort it.
  # ======================================================================= #
  sorted_hash = Hash[tally.sort_by {|key, value| value }.reverse]
  return sorted_hash
end

.codon_table_dataset?Boolean

#

Bioroebe.codon_table_dataset?

This method will return the “codon table dataset”, as a Hash.

This Hash will contain entries like this:

{"TAA"=>"*", "TGA"=>'*',"CCA"=>"P", ...

and so forth.

#

Returns:

  • (Boolean)


39
40
41
# File 'lib/bioroebe/codons/codon_table.rb', line 39

def self.codon_table_dataset?
  @codon_table_dataset
end

.codon_table_in_use?Boolean

#

Bioroebe.codon_table_in_use?

Query method to return the currently used codon table.

#

Returns:

  • (Boolean)


83
84
85
# File 'lib/bioroebe/codons/codon_table.rb', line 83

def self.codon_table_in_use?
  @codon_table_in_use
end

.codon_tablesObject

#

Bioroebe.codon_tables

This method will return all codon tables that we have registered.

This is probably not so terribly useful for most projects, but in the event that you do need all codon tables, you can use this method.

The result will be a Hash having key->value pairs such as:

"9" => {"TAA"=>"*", "TAG"=>"*"
#


30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bioroebe/codons/codon_tables.rb', line 30

def self.codon_tables
  require 'bioroebe/requires/require_yaml.rb'
  hash = {}
  _ = "#{yaml_directory?}codon_tables/*.yml"
  all_files = Dir[_].sort
  all_files.each {|yaml_file|
    next if yaml_file.end_with? 'overview.yml' # We reject this one here.
    dataset = YAML.load_file(yaml_file)
    entry_number = File.basename(yaml_file).delete_suffix('.yml')
    dataset = { entry_number => dataset}
    hash.merge!(dataset)
  }
  hash
end

.codons_for_this_aminoacid?(i = ARGV) ⇒ Boolean

#

Bioroebe.codons_for_this_aminoacid?

This method will return all possible DNA codons for a specific aminoacid, as an Array.

So for example, for the aminoacid serine, this method would return an Array containing all 6 codons that code for this aminoacid (if the eukaryotic codon table is used, which also includes humans).

This method supports to query only ONE aminoacid at a given time.

Currently the method relies on the file called “codons_of_the_aminoacids.yml”. In the future, the method here will probably be changed to add support for different codon tables.

Specific invocation examples:

Bioroebe.codons_for?(:serine)
Bioroebe.codons_for?(:tyrosine)
Bioroebe.codons_for?(:threonine)
Bioroebe.codons_for?('T')

To test this for another organism, try:

Bioroebe.use_this_codon_table(:yeast_mitochondria)
Bioroebe.codons_for?('T')
Bioroebe.decode_this_aminoacid 'K' # => ["AAA", "AAG"]
#

Returns:

  • (Boolean)


322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/bioroebe/codons/codons.rb', line 322

def self.codons_for_this_aminoacid?(
    i = ARGV
  )
  # ======================================================================= #
  # First, convert the input a bit and sanitize it.
  # ======================================================================= #
  i = i.first if i.is_a? Array
  if i.is_a?(String) and i.start_with?(':')
    i = i.delete(':').to_sym
  end
  case i # case tag
  when :default,
       nil
    i = :lysine
  end
  if i.is_a? Symbol
    # ===================================================================== #
    # === Convert e. g. :serine into 'ser'
    # ===================================================================== #
    _ = i.to_s.downcase[0 .. 2]
    i = AMINO_ACIDS_THREE_TO_ONE[_]
  end
  # ======================================================================= #
  # Next we must use the one-letter abbreviation, and then find all
  # entries that match to the given input at hand.
  #
  # @codon_table_dataset is a Hash and will have these key->value
  # entries:
  #
  #   "TTC" => "F"
  #
  # ======================================================================= #
  result = @codon_table_dataset.select {|key, value|
    value == i
  }
  return result.keys
end

.colourize_aa(i, array_colourize_these_aminoacids = array_colourize_this_aminoacid? ) ⇒ Object

#

Bioroebe.colourize_aa

Use this method if you wish to colourize an aminoacid, in a red colour.

The input should be the specific aminoacid sequence in question that you wish to see being colourized here.

This currently only works for aminoacids, and only in red. Perhaps at a later time it will become more flexible, but for now, it will be exclusive for aminoacids alone.

Usage example:

puts Bioroebe.colourize_aa 'STGYGGCTR', 'S T Y'
#


1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1901

def self.colourize_aa(
    i,
    array_colourize_these_aminoacids = array_colourize_this_aminoacid?
  )
  if array_colourize_these_aminoacids.is_a? String
    array_colourize_these_aminoacids = array_colourize_these_aminoacids.split(' ') # Split it into an Array.
  end
  unless array_colourize_these_aminoacids.empty?
    if i.nil?
      puts 'You first have to assign a sequence.'
    else
      if i.chars.any? {|entry| array_colourize_these_aminoacids.include? entry }
        # =================================================================== #
        # Ok, we have established a need to colourize the result.
        # =================================================================== #
        array_colourize_these_aminoacids.each {|colour|
          i.gsub!(/(#{colour})/, swarn('\\1')+rev)
        }
      end
    end
  end if use_colours? # But only if we use colours.
  return i
end

.colourize_this_aminoacid_sequence_for_the_commandline(i) ⇒ Object

#

Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline

This method uses some hardcoded colour assignments to the 20 different aminoacids.

Usage example:

puts Bioroebe.colourize_this_aminoacid_sequence_for_the_commandline('NLKRSPTHY')
#


1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1694

def self.colourize_this_aminoacid_sequence_for_the_commandline(i)
  if i.is_a? Array
    i = i.join
  end
  dataset = YAML.load_file(FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS)
  array_of_allowed_aminoacids = %w( A R N D B C E Q Z G H I L K M F P S T W Y V )
  _ = ''.dup
  splitted = i.chars
  splitted.each {|this_aminoacid|
    case this_aminoacid
    when *array_of_allowed_aminoacids
      this_aminoacid = send(dataset[this_aminoacid.to_s], this_aminoacid)
    # else # else it will not be colourized.
    end
    _ << this_aminoacid
  }
  return _
end

.colourize_this_fasta_dna_sequence(i = nil, &block) ⇒ Object

#

Bioroebe.colourize_this_fasta_dna_sequence

This toplevel method can be used to colourize a FASTA (DNA) sequence, e. g. “ATGCGCGTATTA” and so forth.

Note that this is intended for the commandline, that is to be displayed on e. g. a KDE Konsole terminal.

Usage examples:

puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA')
puts Bioroebe.colourize_this_fasta_dna_sequence('ATGCGCATGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler }
puts Bioroebe.colourize_this_fasta_dna_sequence('TGCGCGTATTAGTATTAATGCGCGTATTAATGCGCGTATTA') { :with_ruler_steelblue_colour }
#


232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/bioroebe/toplevel_methods/fasta_and_fastq.rb', line 232

def self.colourize_this_fasta_dna_sequence(
    i = nil,
    &block
  )
  unless ::Bioroebe.respond_to?(:ruler_return_as_string_without_colours)
    require 'bioroebe/misc/ruler.rb'
  end
  if i.nil?
    e 'Please provide a valid FASTA sequence as input to '\
      'Bioroebe.colourize_this_fasta_dna_sequence()'
    return
  end
  if i.is_a? Array
    # ===================================================================== #
    # Arrays will be joined together.
    # ===================================================================== #
    i = i.join(' ').strip
  end
  # ======================================================================= #
  # Check for existing files next:
  # ======================================================================= #
  if i and File.file?(i)
    i = File.read(i)
  end
  original_input = i.dup
  i = i.dup # Always dup it here.
  if i.is_a? String
    # ===================================================================== #
    # The colours are either defined in a file called
    # 'colourize_fasta_sequences.yml' or they are simply hardcoded.
    #
    # The preferred (and thus default) way is to simply make use
    # of that .yml file. That works on my home system, so it
    # should work for other people as well.
    # ===================================================================== #
    if use_colours?
      this_file = FILE_COLOURIZE_FASTA_SEQUENCES
      if File.exist? this_file
        dataset_for_the_colours = YAML.load_file(this_file)
        dataset_for_the_colours.each_pair {|this_nucleotide, this_colour_to_be_used|
          i.gsub!(
            /#{this_nucleotide}/,
            Colours.send(this_colour_to_be_used, this_nucleotide)+
            rev
          )
        }
      else
        i.gsub!(/A/, "#{teal('A')}#{rev}")
        i.gsub!(/C/, "#{slateblue('C')}#{rev}")
        i.gsub!(/G/, "#{royalblue('G')}#{rev}")
        i.gsub!(/T/, "#{steelblue('T')}#{rev}")
        i.gsub!(/U/, "#{steelblue('U')}#{rev}") # Uracil is just the same as Thymine.
      end
    end
  end
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === with_ruler
    # ===================================================================== #
    when :with_ruler,
         :add_ruler,
         :ruler
      i.prepend(
        ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
        "\n"
      )
    else # Assume something like:
         #   :with_ruler_steelblue_colour
      if yielded.to_s.include? 'colo' # This assumes "colour" or "color".
        use_this_colour = yielded.to_s.sub(/_colou?r/,'').
                                       sub(/with_ruler_/,'')
        this_string = send(use_this_colour,
          ::Bioroebe.ruler_return_as_string_without_colours(original_input)+
          "\n"
        )
        i.prepend(this_string)
      end
    end
  end
  return i
end

.colours(enable_or_disable = '+') ⇒ Object

#

Bioroebe.colours

This method can be used to quickly enable or disable colours, by passing ‘+’ or ‘-’.

#


131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/bioroebe/colours/colours.rb', line 131

def self.colours(
    enable_or_disable = '+'
  )
  case enable_or_disable.to_s
  when '+',
       'true'
    enable_colours
  when '-',
       'false',
       ''
    disable_colours
  end
end

.compacter(i = ARGV) ⇒ Object

#

Bioroebe.compacter

Note that this variant will NEVER ask for user-input of the Bioroebe::Compacter class.

#


243
244
245
246
247
# File 'lib/bioroebe/utility_scripts/compacter/compacter.rb', line 243

def self.compacter(
    i = ARGV
  )
  Bioroebe::Compacter.new(i) { :do_not_ask_for_user_input }
end

.complement(i = nil) ⇒ Object

#

Bioroebe.complement

This method will return the complementary DNA strand.

We will use possibilities though.

Usage example:

Bioroebe.complement 'ATGGGTCCC' # => "TACCCAGGG"
#


3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3974

def self.complement(
    i = nil
  )
  # ======================================================================= #
  # Refer to the main Hash.
  # ======================================================================= #
  hash = HASH_DNA_NUCLEOTIDES
  result = ''.dup
  i = i.first if i.is_a? Array
  if i
    if File.exist?(i)
      i = File.readlines(i).join(' ').strip
    end
    i.each_char { |char|
      char = char.upcase
      if hash.has_key? char
        result << hash[char]
      else
        case char.downcase # case tag
        when 'n' # Means any.
          result << '(A/T/G/C)'
        when 'r' # Means a purine.     (larger)
          result << '(A/G)'
        when 'y' # Means a pyrimidine. (smaller)
          result << '(T/C)'
        end
      end
    }
    return result
  end
end

.complementary_dna_strand(i = ARGV) ⇒ Object

#

Bioroebe.complementary_dna_strand

This method will simply return the corresponding (complementary) DNA strand.

Usage example:

Bioroebe.complementary_dna_strand('ATCATCATC') # => "TAGTAGTAG"
#


152
153
154
# File 'lib/bioroebe/nucleotides/complementary_dna_strand.rb', line 152

def self.complementary_dna_strand(i = ARGV)
  retrn Bioroebe::ComplementaryDnaStrand.new(i).result?
end

.complementary_rna_strand(i) ⇒ Object

#

Bioroebe.complementary_rna_strand

This method will simply return the corresponding (complementary) RNA strand.

Usage example:

Bioroebe.complementary_rna_strand('ATCATCATC') # => "UAGUAGUAG"
#


588
589
590
591
592
593
594
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 588

def self.complementary_rna_strand(i)
  if i.is_a? Array
    i = i.first
  end
  hash = partner_nucleotide_hash
  i.chars.map {|entry| hash[entry] }.join.tr('T','U')
end

.compseq(i = ARGV) ⇒ Object

#

Bioroebe.compseq

#


514
515
516
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 514

def self.compseq(i = ARGV)
  Bioroebe::Compseq.new(i) { :disable_colours_and_be_quiet }.result_as_string?
end

.contains_an_inverted_repeat?(i = 'TTACGAAAAAACGTAA') ⇒ Boolean

#

Bioroebe.contains_an_inverted_repeat?

We assume an inverted repeat to exist if at the least 2 nucleotides match to one another in the reverse, so a total of 4 matching nucleotides. This assumption may not necessarily be correct and we may have to fine-tune this at a later time.

For testing purpose, the sequence ‘TTACGAAAAAACGTAA’ can be used.

#

Returns:

  • (Boolean)


532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 532

def self.contains_an_inverted_repeat?(
    #i = 'AGCCCCGCAAAAAAGGCGGGCU'
    i = 'TTACGAAAAAACGTAA' # This is in the 5'→3' direction.
  )
  contains_an_inverted_repeat = false
  longest_stretch = 0
  current_stretch = 0
  halfed_position = i.size / 2
  both_sides = [
    i[0 .. (halfed_position-1)],
    i[halfed_position .. -1]
  ]
  # ======================================================================= #
  # Now that we have both sides, we will try to match them. First reverse
  # the second, though.
  # ======================================================================= #
  first  = both_sides[0]
  second = both_sides[1].reverse # Work via the reverse sequence.
  first.chars.each_with_index {|this_nucleotide, index|
    if can_base_pair_with?(second[index], this_nucleotide)
      current_stretch += 1
      longest_stretch = current_stretch if current_stretch > longest_stretch 
    else
      current_stretch = 0
    end
  }
  if longest_stretch >= 2
    contains_an_inverted_repeat = true
  end
  return contains_an_inverted_repeat
end

.convert_global_env(i) ⇒ Object

#

Bioroebe.convert_global_env

Note that the method will pick only the first argument given to it if an Array is supplied.

#


801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 801

def self.convert_global_env(i)
  if i.is_a? Array
    i = i.first
  end
  unless Object.const_defined? :ConvertGlobalEnv
    begin # Require an external gem in this case.
      require 'convert_global_env'
    rescue LoadError; end
  end
  if Object.const_defined? :ConvertGlobalEnv
    if i and !i.start_with?('$')
      i = i.dup if i.frozen?
      i.prepend('$')
    end
    return ConvertGlobalEnv.convert(i, :do_not_report_errors) # Handle ENV variables.
  end
  return i
end

.convert_one_letter_to_full(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_full

Convert one aminoacid to the real name.

Usage example:

Bioroebe.convert_one_letter_to_full('T') # => "threonine"
#


1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1959

def self.convert_one_letter_to_full(i)
  if i.is_a? Array
    i.each {|entry| convert_one_letter_to_full(entry) }
  else
    i = i.to_s.downcase # need it to be downcased.
    three_letters = convert_one_letter_to_three(i)
    i = AMINO_ACIDS_ABBREVIATIONS[three_letters]
    return i
  end
end

.convert_one_letter_to_three(i) ⇒ Object

#

Bioroebe.convert_one_letter_to_three

Convert a one-letter-code for an aminoacid into the slightly longer three-letter-code variant for that particular aminoacid.

Note that this method will return the result in a downcased variant, such as “gly” for “glycine”.

Returns:

A string of three characters, if it is a valid one-letter aminoacid.

Usage example for an aminoacid such as Glycine:

Bioroebe.convert_one_letter_to_three('G') # => "gly"
#


1611
1612
1613
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1611

def self.convert_one_letter_to_three(i)
  AMINO_ACIDS_THREE_TO_ONE.invert[i.upcase]
end

.convert_this_codon_to_that_aminoacid(i = ARGV, &block) ⇒ Object

#

Bioroebe.convert_this_codon_to_that_aminoacid

#


225
226
227
228
229
230
# File 'lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb', line 225

def self.convert_this_codon_to_that_aminoacid(
    i = ARGV,
    &block
  )
  Bioroebe::ConvertThisCodonToThatAminoacid.new(i) { :be_quiet }.result?.to_s
end

.count_amount_of_aminoacids(i) ⇒ Object

#

Bioroebe.count_amount_of_aminoacids

#


344
345
346
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 344

def self.count_amount_of_aminoacids(i)
  CountAmountOfAminoacids.new(i)
end

.count_amount_of_nucleotides(i) ⇒ Object

#

Bioroebe.count_amount_of_nucleotides

This method will always return the result in the form of a single line. The order is: A C G T

This can also be used to solve a problem listed at Rosalind.

Invocation examples:

Bioroebe.count_amount_of_nucleotides 'AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC' => "20 17 12 21"
Bioroebe.count_amount_of_nucleotides File.read('/rosalind_dna.txt').strip
#


483
484
485
486
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 483

def self.count_amount_of_nucleotides(i)
  _ = ::Bioroebe::CountAmountOfNucleotides.new(i, :do_not_run_yet) { :display_short_form }
  _.return_the_amount_of_nucleotides_in_short_form_on_a_single_line
end

.count_AT(i = ARGV) ⇒ Object

#

Bioroebe.count_AT

This method will count how characters in a given String are “A” or “T”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “A” or a “T” in that string, we will “add” +1 to the number that will be returned by that method.

Usage example:

Bioroebe.count_AT 'ATTATATACCGCGCCCATATAAA' # => 15
#


25
26
27
28
29
# File 'lib/bioroebe/count/count_at.rb', line 25

def self.count_AT(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('A')+
  i.upcase.count('T')
end

.count_GC(i = ARGV) ⇒ Object

#

Bioroebe.count_GC

This method will count how characters in a given String are “G” or “C”, in total. The method will assume that an Array passed to it is meant to be a String.

So, every time this method encounters a “G” or a “C” in that string, we will “add” +1 to the number that will be returned by that method.

Specific usage examples:

Bioroebe.count_GC 'ATTATTATGGCCAATATA' # => 4
Bioroebe.count_GC 'ATG' # => 1
#


27
28
29
30
31
# File 'lib/bioroebe/count/count_gc.rb', line 27

def self.count_GC(i = ARGV)
  i = i.join(' ').strip if i.is_a? Array
  i.upcase.count('G')+
  i.upcase.count('C')
end

.count_non_DNA_bases_in_this_sequence(i, array = Bioroebe.return_DNA_nucleotides) ⇒ Object

#

Bioroebe.count_non_DNA_bases_in_this_sequence

Usage example:

Bioroebe.count_non_DNA_bases_in_this_sequence('ATCGF')
#


3021
3022
3023
3024
3025
3026
3027
3028
3029
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3021

def self.count_non_DNA_bases_in_this_sequence(
    i, array = Bioroebe.return_DNA_nucleotides
  )
  i = i.dup
  array.each {|this_nucleotide|
    i.delete!(this_nucleotide)
  }
  return i.size
end

.create_file(i) ⇒ Object

#

Bioroebe.create_file

This method can be used to create a file.

#


1195
1196
1197
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1195

def self.create_file(i)
  FileUtils.touch(i) unless File.file?(i)
end

.create_jar_archiveObject

#

Bioroebe.create_jar_archive

This method will create a .jar file.

To invoke it from the commandline do:

bioroebe --jar

To execute a .jar file do:

java -jar foobar.jar
#


3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3131

def self.create_jar_archive
  e 'Creating a .jar archive next:'
  e
  original_dir = return_pwd
  cd '/home/x/programming/ruby/src/bioroebe/lib/bioroebe/java/bioroebe/src/main/java/'
  esystem 'jar cf bioroebe.jar bioroebe/'
  target_file = File.absolute_path('bioroebe.jar')
  cd original_dir
  if File.exist? target_file
    e 'Moving the created .jar file into the current working '\
      'directory next.'
    move_file(target_file, original_dir)
    e 'It should now be at:'
    e
    e sfile("  #{original_dir}#{File.basename(target_file)}")
    e
  end
  #   esystem 'jar cfe bioroebe.jar myClass myClass.class'
  e
end

.create_new_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.create_new_sequence

Create a new Bioroebe::Sequence object. It will also assign to the @sequence module-level instance variable.

#


727
728
729
# File 'lib/bioroebe/sequence/sequence.rb', line 727

def self.create_new_sequence(i = ARGV, &block)
  @sequence = ::Bioroebe::Sequence.new(i, &block)
end

.create_random_aminoacids(how_many_aminoacids = CREATE_N_AMINOACIDS, split_at = nil, be_verbose = false, &block) ⇒ Object

#

Bioroebe.create_random_aminoacids

This method will create a random chain of aminoacids.

The first argument to this method shall denote how many aminoacids are to be generated, e. g. 25 would mean to create “25 aminoacids”.

If the second argument, called ‘split_at`, is not nil and is a number, then this method we add a newline into the returned String.

This method will return a String, consisting of the random aminoacids.

Usage Examples:

Bioroebe.create_random_aminoacids 125
Bioroebe.create_random_aminoacids  25 # => "SQHWVGGGVSRCWLMWAPECMYVWW"
Bioroebe.create_random_aminoacids  15 # => "CLKHMLMGLVAEEKA"
Bioroebe.random_aminoacids(5) # => "STRRM"
Bioroebe.random_aminoacids(8) # => "TRTQHSNN"s
#


203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/bioroebe/aminoacids/create_random_aminoacids.rb', line 203

def self.create_random_aminoacids(
    how_many_aminoacids = CREATE_N_AMINOACIDS,
    split_at            = nil,
    be_verbose          = false,
    &block
  )
  _ = ::Bioroebe::CreateRandomAminoacids.new(
        how_many_aminoacids,
        split_at,
        be_verbose,
        &block
      )
  return _.amino_acid_sequence # ← And return the aminoacid sequence here.
end

.create_the_pdf_tutorial(read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md', store_where = '/Depot/j/example.pdf') ⇒ Object

#

Bioroebe.create_the_pdf_tutorial

This method can be used to quickly turn the README.md file into a .pdf file, for whatever the reason the user may want this.

#


2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2858

def self.create_the_pdf_tutorial(
    read_from_this_file = '/home/x/programming/ruby/src/bioroebe/README.md',
    store_where         = '/Depot/j/example.pdf'
  )

  require 'prawn'

  Prawn::Fonts::AFM.hide_m17n_warning = true # Hide a useless warning.

  pdf = Prawn::Document.new(
          page_size: 'A4',
          page_layout: :landscape
        )
  pdf.text "The Bioroebe Project", size: 80
  pdf.start_new_page
  pdf.bounding_box [50, 600], width: 200 do
    pdf.fill_color '000000'
    pdf.text "making bioinformatics great again:", size: 15
  end
  pdf.start_new_page
  dataset = File.read(read_from_this_file, encoding: UTF_ENCODING)
  dataset = dataset.encode("Windows-1252", invalid: :replace, undef: :replace)

  pdf.text(dataset)
  e 'Storing at this location: '+store_where
  pdf.render_file store_where
end

.decode_this_aminoacid_sequence(i = 'KKKA') ⇒ Object

#

Bioroebe.decode_this_aminoacid_sequence

This method can be used as means to decode an aminoacid sequence, such as a String like ‘KKKA’.

The input to this method may also be in the form of an Array, such as [‘K’,‘K’,‘K’,‘A’]. Only valid one-letter aminoacids will be honoured by this method; invalid letters will be silently dropped.

After that, this method will replace all valid letters, that is valid aminoacids (in single letter code), with the corresponding codon. It will return all possibilities.

Invocation example:

Bioroebe.decode_this_aminoacid_sequence('KKKA') # => [["AAG", "AAA"], ["AAG", "AAA"], ["AAG", "AAA"], ["GCT", "GCC", "GCA", "GCG"]]
#


385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/bioroebe/codons/codons.rb', line 385

def self.decode_this_aminoacid_sequence(
    i = 'KKKA'
  )
  if i.is_a? Array
    i = i.join
  end
  if i.is_a? String
    # ===================================================================== #
    # We may have a 3-letter code too, so check for that first.
    # ===================================================================== #
    if i.include? '-'
      i = i.split('-').map {|entry| ::Bioroebe.three_to_one(entry) }
    else
      i = i.split(//)
    end
  end
  i = [i].flatten.map {|entry|
    ::Bioroebe.decode_this_aminoacid(entry)
  }
  return i
end

.deduce_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_aminoacid_sequence

#


465
466
467
468
469
# File 'lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb', line 465

def self.deduce_aminoacid_sequence(
    from_this_sequence = :default
  )
  Bioroebe::DeduceAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence

#


140
141
142
# File 'lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb', line 140

def self.deduce_most_likely_aminoacid_sequence(from_this_sequence = :default)
  Bioroebe::MostLikelyNucleotideSequenceForThisAminoacidSequence.new(from_this_sequence)
end

.deduce_most_likely_aminoacid_sequence_as_string(i, use_this_codon_tables_frequencies = :default) ⇒ Object

#

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string

This method will attempt to deduce the most likely aminoacid sequence for a given protein, as a String.

Usage example:

Bioroebe.deduce_most_likely_aminoacid_sequence_as_string('KKKA') # => "AAGAAGAAGGCC"
#


452
453
454
455
456
457
458
459
460
461
# File 'lib/bioroebe/codons/codons.rb', line 452

def self.deduce_most_likely_aminoacid_sequence_as_string(
    i, use_this_codon_tables_frequencies = :default
  )
  result = return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
    i, 
    use_this_codon_tables_frequencies
  )
  result = result.join if result.is_a? Array
  return result
end

.default_colour?Boolean

#

Bioroebe.default_colour?

#

Returns:

  • (Boolean)


96
97
98
# File 'lib/bioroebe/colours/colours.rb', line 96

def self.default_colour?
  @default_colour
end

.delimiter?Boolean

#

Bioroebe.delimiter?

This is simply the primary delimiter used for reading “multiline input” of the Bioroebe::Shell component.

#

Returns:

  • (Boolean)


413
414
415
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 413

def self.delimiter?
  '___'
end

.determine_n_glycosylation_matches(of_this_protein_sequence = 'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\ 'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\ 'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\ 'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\ 'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\ 'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\ 'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\ 'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK') ⇒ Object

#

Bioroebe.determine_n_glycosylation_matches

This method can be used to determine N-Glycosylation patterns in a protein.

The input to this method should be an aminoacid chain - aka a protein sequence.

This method will return an Array. This Array holds the indices where a N-glycosylation pattern begins.

Usage example:

Bioroebe.determine_n_glycosylation_matches # => [85, 118, 142, 306, 395]
#


2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2063

def self.determine_n_glycosylation_matches(
    of_this_protein_sequence =
      'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\
      'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\
      'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\
      'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\
      'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\
      'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\
      'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\
      'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK'
  )
  if of_this_protein_sequence.is_a? Array
    of_this_protein_sequence.each {|this_sequence|
      determine_n_glycosylation_matches(this_sequence)
    }
  else
    scanned = of_this_protein_sequence.scan(
      REGEX_FOR_N_GLYCOSYLATION_PATTERN
    )
    scanned.flatten.uniq.map {|substring|
      of_this_protein_sequence.index(substring)+1 # +1 because ruby starts at 0.
    }
  end
end

.determine_start_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_start_codons_from_the_codon_table

#


61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/bioroebe/codons/codons.rb', line 61

def self.determine_start_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    key == 'START' # '*' refers to a stop codon.
  }
  use_these_start_codons = this_codon_table_dataset.values
  if use_these_start_codons.is_a? Array
    use_these_start_codons = use_these_start_codons.first
  end
  set_start_codon(
    use_these_start_codons
  )
end

.determine_stop_codons_from_the_codon_table(this_codon_table_dataset = @codon_table_dataset) ⇒ Object

#

Bioroebe.determine_stop_codons_from_the_codon_table

This method will determine the stop codons in use for the given species/organism, depending on the proper codon table.

#


45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bioroebe/codons/codons.rb', line 45

def self.determine_stop_codons_from_the_codon_table(
    this_codon_table_dataset = @codon_table_dataset
  )
  this_codon_table_dataset = this_codon_table_dataset.select {|key, value|
    value == '*' # '*' refers to a stop codon.
  }
  use_these_stop_codons = this_codon_table_dataset.keys
  set_stop_codons(
    use_these_stop_codons
  )
end

.digest_this_dna(this_DNA_sequence, hash = {}) ⇒ Object

#

Bioroebe.digest_this_dna

This method depends on the file bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb.

Usage examples:

x = Bioroebe.digest_this_dna(:lambda_genome, with: :EcoRI)
x = Bioroebe.digest_this_dna("/root/Bioroebe/fasta/NC_001416.1_Enterobacteria_phage_lambda_complete_genome.fasta", with: :EcoRI)
x = Bioroebe.digest_this_dna("/Depot/j/foobar.fasta", with: :PvuII)
#


3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3773

def self.digest_this_dna(
    this_DNA_sequence,
    hash = {}
  )
  require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
  restriction_enzymes = Bioroebe.load_and_return_the_restriction_enzymes
  this_restriction_enzyme = nil
  nucleotide_sequence = nil
  if this_DNA_sequence.is_a? Array
    this_DNA_sequence = this_DNA_sequence.first
  end
  if this_DNA_sequence.is_a?(String) and File.exist?(this_DNA_sequence)
    nucleotide_sequence = ::Bioroebe::ParseFasta.new(this_DNA_sequence).sequence?
  end
  # ======================================================================= #
  # === Handle the hash next (and ensure that it is a Hash)
  # ======================================================================= #
  if hash.is_a? Hash
    # ===================================================================== #
    # === :with
    # ===================================================================== #
    if hash.has_key? :with
      this_restriction_enzyme = hash.delete(:with).to_s
    end
  end
  target_sequence = restriction_enzymes[this_restriction_enzyme].dup
  if target_sequence =~ /\d$/ # If it ends with a number.
    target_sequence.chop!
    target_sequence.strip!
  end
  if nucleotide_sequence and
     nucleotide_sequence.include?(target_sequence)
    print rev+'Yes, the restriction-sequence '+
          lightblue(target_sequence)+
          rev+
          ' is found in the given sequence. '
    scanned = nucleotide_sequence.scan(
      /#{target_sequence}/
    )
    erev "It can be found #{steelblue(scanned.size.to_s)}#{rev} "\
         "times, at these positions:"
    e
    sub_sequences = nucleotide_sequence.split(/#{target_sequence}/)
    sub_sequences.sort_by {|entry| entry.size }.reverse.each {|sequence|
      erev "  #{sequence.size}"
    }
    e
    return sub_sequences
  else
    e 'Nothing found.'
  end
end

.directory_frequencies?(codon_tables_directory = CODON_TABLES_DIRECTORY) ⇒ Boolean

#

Bioroebe.directory_frequencies?

Preferentially use this method past the year 2022 - it is a tiny bit more flexible than the above constant.

#

Returns:

  • (Boolean)


685
686
687
688
689
# File 'lib/bioroebe/constants/constants.rb', line 685

def self.directory_frequencies?(
    codon_tables_directory = CODON_TABLES_DIRECTORY
  )
  "#{codon_tables_directory}frequencies/"
end

.disable_colours(be_verbose = false) ⇒ Object

#

Bioroebe.disable_colours

Use this method if you wish to disable colours for the whole Bioroebe project.

#


186
187
188
189
190
191
# File 'lib/bioroebe/colours/colours.rb', line 186

def self.disable_colours(be_verbose = false)
  if be_verbose
    e 'Disabling colours.'
  end
  @use_colours = false
end

.display_all_open_reading_frames_from_this_sequence(i = ARGV) ⇒ Object

#

Bioroebe.display_all_open_reading_frames_from_this_sequence

#


1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1537

def self.display_all_open_reading_frames_from_this_sequence(i = ARGV)
  require 'bioroebe/colours/colours.rb'
  if i.empty?
    array = Bioroebe.return_all_open_reading_frames_from_this_sequence
    pp array
    pp Bioroebe.to_aa(array)
  else
    this_sequence = i
    array = return_all_open_reading_frames_from_this_sequence(this_sequence)
    this_sequence = this_sequence.join
    if array.empty?
      e "No open reading from has been found from "\
        "this sequence: #{this_sequence}"
    else
      e rev+
        'The following ORFs have been found in this sequence: '
      e
      e "  #{Colours.lightgreen(this_sequence)}"
      e
      array.each_with_index {|sequence, index| index += 1
        name_for_the_ORF = "ORF number #{index}"
        e "  #{Colours.steelblue(sequence.ljust(50))} "\
          "#{Colours.lightslategrey('#')} "\
          "#{Colours.mediumseagreen(name_for_the_ORF)}"
      }
      e
    end
  end
end

.dna_sequence(i) ⇒ Object

#

Bioroebe.dna_sequence

Usage example:

dna = Bioroebe.dna_sequence('ATTCGGU')
#


200
201
202
203
204
# File 'lib/bioroebe/sequence/dna.rb', line 200

def self.dna_sequence(i)
  i = i.first if i.is_a? Array
  i.delete!('U') # Reject Uracil there.
  ::Bioroebe::DNA.new(i)
end

.dna_to_aminoacid_sequence(i = ARGV) ⇒ Object

#

Bioroebe.dna_to_aminoacid_sequence

Usage example:

Bioroebe.dna_to_aminoacid_sequence('ATGGGGCCC') # => "MGP"
#


650
651
652
653
654
# File 'lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb', line 650

def self.dna_to_aminoacid_sequence(
    i = ARGV
  )
  ::Bioroebe::DnaToAminoacidSequence.new(i) { :be_quiet }.sequence?
end

.do_not_truncateObject

#

Bioroebe.do_not_truncate

Do not truncate any “too long” output. This method disable the truncate-functionality.

#


146
147
148
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 146

def self.do_not_truncate
  @truncate = false
end

.do_truncateObject

#

Bioroebe.do_truncate

#


136
137
138
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 136

def self.do_truncate
  @truncate = true
end

.dotplot_array(dna_x, dna_y) ⇒ Object

#

Bioroebe.dotplot_array

This method can be used to return a 2D dotplot-array of two input sequences. Be careful with large data as input - the RAM usage may go up, so this method has NOT been optimized for such situations. It is deliberately kept simple.

#


215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/bioroebe/dotplots/advanced_dotplot.rb', line 215

def self.dotplot_array(dna_x, dna_y)
  dotplot_matrix = Array.new(
    dna_y.size, Array.new(dna_x.size, 0)
  )
  dotplot_matrix = Array.new(dna_y.size) {
    Array.new(dna_x.size) { 0 }
  }
  dna_x.chars.each_with_index {|x_value, x_index|
    # ===================================================================== #
    # Next, we work from top-to-bottom.
    # ===================================================================== #
    dna_y.chars.each_with_index {|y_value, y_index|
      if x_value == y_value
        dotplot_matrix[y_index][x_index] = 1
      end
    }
  }
  return dotplot_matrix
end

.downcase_chunked_display(i, group_together_n_nucleotides = 10) ⇒ Object

#

Bioroebe.downcase_chunked_display

This is similar to the regular chunked display, but will return the nucleotides in a downcased manner, aka “A” will become “a” and so forth.

In the past this functionality resided in its own .rb file, but since as of March 2020 a bin/ executable was added, so that the functionality can be more easily called when the bioroebe gem is installed.

Usage example:

Bioroebe.downcase_chunked_display 'ATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCAATGGGGCCTGCA'
#


4131
4132
4133
4134
4135
4136
4137
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4131

def self.downcase_chunked_display(
    i,
    group_together_n_nucleotides = 10
  )
  sequence = ::Bioroebe.return_chunked_display(i, group_together_n_nucleotides).downcase
  return sequence
end

.download(from_these_URLs) ⇒ Object

#

Bioroebe.download

#


4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4428

def self.download(
    from_these_URLs
  )
  require 'open-uri'
  array_these_urls = [from_these_URLs].flatten.compact
  array_these_urls.each {|remote_url|
    # ===================================================================== #
    # First, we must determine the remote file listing here.
    # Due to convenience we will simply use curl here.
    # ===================================================================== #
    cmd = "curl -s \"#{remote_url}\" --list-only"
    # e cmd
    remote_files = `#{cmd}`.split("\n")
    remote_files.each {|this_remote_file|
      target = remote_url+this_remote_file
      e "Downloading `#{this_remote_file}` next. '"\
        "(Full target: '#{target})"
      wget_download(target)
    }
  }
end

.download_directory?Boolean

#

Bioroebe.download_directory?

#

Returns:

  • (Boolean)


171
172
173
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 171

def self.download_directory?
  "#{log_directory?}Downloads/"
end

.download_fasta(i) ⇒ Object

#

Bioroebe.download_fasta

Easier wrapper-method to download fasta files.

#


233
234
235
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 233

def self.download_fasta(i)
  ::Bioroebe::DownloadFasta.new(i).location?
end

.download_human_genome(from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz') ⇒ Object

#

Bioroebe.download_human_genome

#


2758
2759
2760
2761
2762
2763
2764
2765
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2758

def self.download_human_genome(
    from_this_URL = 'https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz'
  )
  esystem "wget #{from_this_URL}"
  extract(
    File.basename(from_this_URL)
  )
end

.download_taxonomy_database(i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE) ⇒ Object

#

Bioroebe.download_taxonomy_database

#


92
93
94
95
96
# File 'lib/bioroebe/databases/download_taxonomy_database.rb', line 92

def self.download_taxonomy_database(
    i = ::Bioroebe::FTP_NCBI_TAXONOMY_DATABASE
  )
  DownloadTaxonomyDatabase.new(i)
end

.download_this_pdb(i = '355D') ⇒ Object

#

Bioroebe.download_this_pdb

This method can be used to download a remote .pdb file to the local file-system. If the default pdb/ directory exists as well locally then the downloaded .pdb file will be relocated into that file.

An example for a remote URL to a .pdb file would be:

https://files.rcsb.org/view/2BTS.pdb
https://files.rcsb.org/view/355D.pdb
#


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/bioroebe/pdb_and_protein_structure/download_this_pdb.rb', line 29

def self.download_this_pdb(
    i = '355D'
  )
  # ======================================================================= #
  # Treat all input as an Array past the next point.
  # ======================================================================= #
  [i].flatten.compact.each {|this_entry|
    if this_entry.frozen?
      this_entry = this_entry.dup
    end
    if this_entry.end_with? '.pdb' # This will lateron be appended again anyway.
      this_entry.sub!(/\.pdb$/,'')
    end
    this_entry.upcase! # For convenience.
    unless this_entry.end_with? '.pdb'
      this_entry << '.pdb'
    end
    e this_entry
    # ===================================================================== #
    # Build up our remote URL next:
    # ===================================================================== #
    remote_url = "https://files.rcsb.org/view/#{this_entry}"
    e steelblue(remote_url)
    esystem "wget #{remote_url}"
    _ = File.basename(remote_url)
    if File.exist? _
      ::Bioroebe.move_file_to_its_correct_location(_)
    end
  }
end

.e(i = '') ⇒ Object

#

Bioroebe.e

#


246
247
248
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 246

def self.e(i = '')
  puts i
end

.edit_C_to_U(of_this_sequence, retain_hyphens = false) ⇒ Object

#

Bioroebe.edit_C_to_U

This method attempts to do a RNA editing job, such as may be done in mitochondrial DNA.

Usage examples:

Bioroebe.edit_C_to_U('AGG-GGU-GCU-UCG-GAU-CGG-GAG')                  # => "AGGGGUGUUUUGGAUUGGGAG"
Bioroebe.edit_C_to_U('AGG-GGU-GCU-UCG-GAU-CGG-GAG', :retain_hyphens) # => "AGG-GGU-GUU-UUG-GAU-UGG-GAG"
Bioroebe.to_aa(Bioroebe.edit_C_to_U('AGG-GGU-GCU-UCG-GAU-CGG-GAG', false)) # => "AGG-GGU-GUU-UUG-GAU-UGG-GAG")
#


4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4402

def self.edit_C_to_U(
    of_this_sequence,
    retain_hyphens = false
  )
  case retain_hyphens
  # ======================================================================= #
  # === :retain_hyphens
  # ======================================================================= #
  when :retain_hyphens
    retain_hyphens = true
  end
  if of_this_sequence.is_a? Array
    of_this_sequence = of_this_sequence.first
  end
  if of_this_sequence.frozen?
    of_this_sequence = of_this_sequence.dup
  end
  unless retain_hyphens
    of_this_sequence.delete!('-') if of_this_sequence.include? '-'
  end
  of_this_sequence.tr('C','U')
end

.editor?Boolean

#

Bioroebe.editor?

This method will determine which editor is to be used, if we have to use an editor for the bioroebe project.

#

Returns:

  • (Boolean)


442
443
444
445
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 442

def self.editor?
  require 'bioroebe/configuration/constants.rb'
  ::Bioroebe::Configuration::DEFAULT_EDITOR_TO_USE
end

.embeddable_interfaceObject

#

Bioroebe.embeddable_interface

#


775
776
777
778
779
# File 'lib/bioroebe/www/embeddable_interface.rb', line 775

def self.embeddable_interface
  object = Object.new
  object.extend(::Bioroebe::EmbeddableInterface)
  return object
end

.enable_coloursObject

#

Bioroebe.enable_colours

Use this method to enable colours for the whole Bioroebe project.

All classes that are part of the Bioroebe project should honour this setting (if it is a class that may make use of colours; some smaller classes do not need colours, and hence have no need for the method here).

#


203
204
205
# File 'lib/bioroebe/colours/colours.rb', line 203

def self.enable_colours
  @use_colours = true
end

.ensure_that_the_base_directories_existObject

#

Bioroebe.ensure_that_the_base_directories_exist

This method will ensure that the base directory for the Bioroebe project exist.

#


3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3259

def self.ensure_that_the_base_directories_exist
  # ======================================================================= #
  # We also need to create the temp directory, as well as having to
  # notify the user that this will be done. The taxonomy subdirectory
  # will also be created.
  # ======================================================================= #
  use_this_log_dir = log_dir?
  unless File.exist? use_this_log_dir
    erev "The base directory at `#{sdir(use_this_log_dir)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir use_this_log_dir
  end
  # ======================================================================= #
  # === Ensure that the Downloads/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}Downloads/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  # ======================================================================= #
  # === Ensure that the pdb/ directory exists
  # ======================================================================= #
  _ = "#{use_this_log_dir}pdb/"
  unless File.exist? _
    erev "The directory at `#{sdir(_)}#{rev}` does not exist."
    erev 'It will thus be created next.'
    mkdir _
  end
  autogenerated_sql_files_dir =
    AUTOGENERATED_SQL_FILES_DIR
  unless Dir.exist? autogenerated_sql_files_dir
    erev 'The directory at `'+sdir(autogenerated_sql_files_dir)+
         rev+'` does not exist.'
    erev 'It will thus be created next.'
    mkdir(autogenerated_sql_files_dir)
  end
end

.erev(i = '') ⇒ Object

#

Bioroebe.erev

#


69
70
71
# File 'lib/bioroebe/colours/colours.rb', line 69

def self.erev(i = '')
  puts "#{rev}#{i}"
end

.esystem(i) ⇒ Object

#

Bioroebe.esystem

#


253
254
255
256
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 253

def self.esystem(i)
  puts i.to_s
  system i.to_s
end

.every_reverse_palindrome_in_this_string(i = 'TCAATGCATGCGGGTCTATATGCAT', min_length = 4, max_length = 12) ⇒ Object

#

Bioroebe.every_reverse_palindrome_in_this_string

This method can return every reverse palindrome in the given input String.

The output will be an Array such as this:

[[4, 6], [5, 4], [6, 6], [7, 4], [17, 4], [18, 4], [4, 6], [5, 4]]
#


4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4984

def self.every_reverse_palindrome_in_this_string(
    i          = 'TCAATGCATGCGGGTCTATATGCAT',
    min_length =  4,
    max_length = 12
  )
  require 'bioroebe/sequence/reverse_complement.rb'
  if i.is_a? Array # Arrays will become Strings - or rather, whatever is the first argument.
    i = i.first
  end
  if i and File.exist?(i)
    i = File.readlines(i).reject {|entry|
      entry.start_with?('>')
    }.map {|inner_entry| inner_entry.strip }.join
  end
  # ======================================================================= #
  # How do we find all subsequences that are relevant? Well - we
  # need to find all the sequences between min_length and
  # max_length, e. g. 4 and 12.
  # ======================================================================= #
  string = i.dup
  array_containing_starting_index_and_length_of_reverse_palindromes = []
  i.size.times {
    substrings = return_every_substring_from_this_sequence(string)
    substrings.each {|entry|
      next if entry.size > max_length
      if (entry.size >= min_length) and
         (Bioroebe.reverse_complement(entry) == entry)
        array_containing_starting_index_and_length_of_reverse_palindromes << 
          [i.index(entry)+1, entry.size]
      end
    }
    string[0,1] = ''
  }
  return array_containing_starting_index_and_length_of_reverse_palindromes
end

.ewarn(i = '') ⇒ Object

#

Bioroebe.swarn

#


168
169
170
# File 'lib/bioroebe/colours/colours.rb', line 168

def self.ewarn(i = '')
  e swarn(i)
end

.extract(i = ARGV) ⇒ Object

#

Bioroebe.extract

This method can be used to quickly extract a local archive.

#


2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2298

def self.extract(
    i = ARGV
  )
  require 'bioroebe/colours/sdir_sfancy_sfile_simp_swarn.rb'
  if i.is_a? Array
    i = i.join(' ').strip
  end
  unless i.include?('/')
    unless File.exist? i
      i = return_pwd+
          File.basename(i)
    end
  end
  if File.exist? i
    case i
    when /bz2$/
      _ = "tar -xfv #{i}"
    when /xz$/
      _ = "tar -xvf #{i}"
    end
    if be_verbose?
      e "Now extracting `#{sfancy((i).squeeze('/'))}`."
      esystem(_)
      e 'Done extracting!'
    else
      system _
    end
  else
    ewarn "Can not extract #{sfile(i)} because it does "\
          "not appear to exist."
  end
end

.extractseq(i = 'AAAGGGTTT', *regions) ⇒ Object

#

Bioroebe.extractseq

Bioroebe.extractseq reads a sequence and writes sub-sequences from it to file. The set of regions to extract is specified on the command-line or in a file as pairs of start and end positions. The regions are written in the order in which they are specified. Thus, if the sequence AAAGGGTTT has been input and the regions: 7-9, 3-4 have been specified, then the output sequence will be:

TTTAG

See the next ruler for that:

012345678 # real index
123456789 # desired index
AAAGGGTTT

Usage example

Bioroebe.extractseq('AAAGGGTTT', '7-9','3-4') # => TTTAG
#


282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 282

def self.extractseq(
    i = 'AAAGGGTTT',
    *regions
  )
  new_sequence = ''.dup
  regions.each {|this_region|
    splitted = this_region.split('-') # We assume a '-' must be there.
    first_position = splitted[0].to_i - 1
    last_position  = splitted[1].to_i - 1
    subsequence = i[first_position .. last_position]
    new_sequence << subsequence
  }
  return new_sequence
end

.fasta_dir?Boolean

#

Bioroebe.fasta_dir?

#

Returns:

  • (Boolean)


721
722
723
# File 'lib/bioroebe/constants/constants.rb', line 721

def self.fasta_dir?
  "#{Bioroebe.log_dir?}fasta/"
end

.fasta_directory?Boolean

#

Bioroebe.fasta_directory?

This method will return a path such as “/root/Bioroebe/fasta/”.

#

Returns:

  • (Boolean)


193
194
195
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 193

def self.fasta_directory?
  "#{::Bioroebe.log_directory?}fasta/"
end

.fetch_data_from_uniprot(i = ARGV) ⇒ Object

#

Bioroebe.fetch_data_from_uniprot

#


259
260
261
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 259

def self.fetch_data_from_uniprot(i = ARGV)
  Bioroebe::FetchDataFromUniprot.new(i)
end

.fetch_fasta_sequence_from_pdb(i = ARGV) ⇒ Object

#

Bioroebe.fetch_fasta_sequence_from_pdb

#


126
127
128
# File 'lib/bioroebe/pdb_and_protein_structure/fetch_fasta_sequence_from_pdb.rb', line 126

def self.fetch_fasta_sequence_from_pdb(i = ARGV)
  Bioroebe::FetchFastaSequenceFromPdb.new(i)
end

.file_amino_acidsObject

#

Bioroebe.file_amino_acids

#


638
639
640
# File 'lib/bioroebe/constants/constants.rb', line 638

def self.file_amino_acids
  FILE_AMINO_ACIDS
end

.file_amino_acids_abbreviationsObject

#

Bioroebe.file_amino_acids_abbreviations

#


651
652
653
# File 'lib/bioroebe/constants/constants.rb', line 651

def self.file_amino_acids_abbreviations
  FILE_AMINO_ACIDS_ABBREVIATIONS
end

.file_amino_acids_frequencyObject

#

Bioroebe.file_amino_acids_frequency

#


830
831
832
# File 'lib/bioroebe/constants/constants.rb', line 830

def self.file_amino_acids_frequency
  "#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
end

.file_amino_acids_long_name_to_one_letterObject

#

Bioroebe.file_amino_acids_long_name_to_one_letter

This method will return a String such as:

"/home/Programs/Ruby/3.1.2/lib/ruby/site_ruby/3.1.0/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml"
#


1134
1135
1136
# File 'lib/bioroebe/constants/constants.rb', line 1134

def self.file_amino_acids_long_name_to_one_letter
  "#{project_yaml_directory?}aminoacids/amino_acids_long_name_to_one_letter.yml"
end

.file_fastq_quality_schemesObject

#

Bioroebe.file_fastq_quality_schemes

This constant will point to a location such as this one here:

/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/fastq_quality_schemes.yml
#


733
734
735
# File 'lib/bioroebe/constants/constants.rb', line 733

def self.file_fastq_quality_schemes
  "#{project_yaml_directory?}fasta_and_fastq/fastq_quality_schemes.yml"
end

.file_molecular_weightObject

#

Bioroebe.file_molecular_weight

#


1122
1123
1124
# File 'lib/bioroebe/constants/constants.rb', line 1122

def self.file_molecular_weight
  "#{project_yaml_directory?}aminoacids/molecular_weight.yml"
end

.file_restriction_enzymesObject

#

Bioroebe.file_restriction_enzymes

#


885
886
887
# File 'lib/bioroebe/constants/constants.rb', line 885

def self.file_restriction_enzymes
  FILE_RESTRICTION_ENZYMES
end

.file_statistics?Boolean

#

Bioroebe.file_statistics?

This file can normally be found here:

$BIOROEBE/yaml/statistics.yml
#

Returns:

  • (Boolean)


714
715
716
# File 'lib/bioroebe/constants/constants.rb', line 714

def self.file_statistics?
  "#{Bioroebe.log_dir?}statistics.yml"
end

.file_talensObject

#

Bioroebe.file_talens

#


740
741
742
# File 'lib/bioroebe/constants/constants.rb', line 740

def self.file_talens
  "#{project_yaml_directory?}talens.yml"
end

.filter_away_invalid_aminoacids(i) ⇒ Object

#

Bioroebe.filter_away_invalid_aminoacids

Usage example:

Bioroebe.filter_away_invalid_aminoacids('ATMÜ') # => "ATM"
#


174
175
176
177
# File 'lib/bioroebe/constants/constants.rb', line 174

def self.filter_away_invalid_aminoacids(i)
  array_that_is_allowed = all_aminoacids?
  return i.chars.select {|entry| array_that_is_allowed.include? entry }.join
end

.filter_away_invalid_nucleotides(i, preserve_uracil = false) ⇒ Object

#

Bioroebe.filter_away_invalid_nucleotides

This method can be used to filter away invalid nucleotides. An “invalid” nucleotide is, for example, if you work with DNA sequences, any character that is not allowed to be part of DNA. For example, Uracil, which can be found (almost exclusively) only in RNA.

As for now, the behaviour is to downcase the given input before applying the .tr() method on the given String.

Usage example:

Bioroebe.filter_away_invalid_nucleotides 'ATGCCGGAGGAGANNN' # => "ATGCCGGAGGAGA"
#


3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3842

def self.filter_away_invalid_nucleotides(
    i,
    preserve_uracil = false
  )
  if i.is_a? Array
    i = i.join(' ').strip
  end
  case preserve_uracil
  when :preserve_uracil
    preserve_uracil = true
  when :preserve_nothing
    preserve_uracil = false
  end
  i = i.to_s.upcase
  if preserve_uracil
    i.tr!('B,D-F,H-S,V-Z','') # A T C G U
  else
    i.tr!('B,D-F,H-S,U-Z','') # A T C G
  end
  return i
end

.find_substring(full_string = 'GATATATGCATATACTT', this_substring = :default) ⇒ Object

#

Bioroebe.find_substring

This method can be used to find a substring within a larger String.

For example, in the below default values, the substring “ATAT” would exist at the positions 2, 4 and 10, if compared to the larger parent string “GATATATGCATATACTT”.

The following display may help you see this more easily, in regards to the substring matches:

GATATATGCATATACTT
 ATATAT  ATAT

If you look closely, you will be able to see that “ATAT” can be found three times in the string above.

Indices in this context start at position 1, not 0. This is mostly done to refer to nucleotides or aminoacids, which also typically start at the first letter. Position 0 makes no sense for a nucleotide - what would “nucleotide 0” even refer to?

The first argument to this method may also be the path to a locally existing file, such as “/rosalind_subs.txt”. In fact this method has been largely motivated by Rosalind tasks.

The method will return an Array with the positions of all substrings that are found in the full_string variable. See the usage example below for how this may be.

Usage example:

Bioroebe.find_substring 'GATATATGCATATACTT', 'ATAT' # => [2, 4, 10]
#


2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2401

def self.find_substring(
    full_string    = 'GATATATGCATATACTT', # ← The full String comes here.
    this_substring = :default             # ← The substring we are searching for comes here.
  )
  if full_string.is_a? Array
    # ===================================================================== #
    # Presently this method will only work on the first member of an Array.
    # ===================================================================== #
    full_string = full_string.first
  end
  if full_string and File.file?(full_string) and
     this_substring == :default
    # ===================================================================== #
    # In this case it is ok to read from that file.
    # ===================================================================== #
    _ = File.read(full_string)
    splitted = _.split("\n")
    full_string    = splitted.first
    this_substring = splitted.last
  end
  case this_substring
  # ======================================================================= #
  # Use a default value in this case. In reality users should supply
  # their own substring when they use this method here.
  # ======================================================================= #
  when :default,
       nil
    this_substring = 'ATAT'
  else
    if this_substring.empty?
      this_substring = 'ATAT'
    end
  end
  if full_string.nil? or full_string.empty?
    full_string = 'GATATATGCATATACTT' # ← Use the default in this case.
  end
  result = Array.new.tap { |indexes|
    final_index_position = full_string.size - this_substring.size
    i = 0
    while (i < final_index_position)
      index = full_string.to_s.index(this_substring.to_s, i)
      break if index.nil?
      i = index + 1
      indexes << i
    end
  }
  result = nil if result.empty? # ← We will try this here; could also return an empty Array, though.
  result # Return our findings here.
end

.format_this_nucleotide_sequence(i = ARGV, &block) ⇒ Object

#

Bioroebe.format_this_nucleotide_sequence

#


660
661
662
663
664
665
666
667
668
669
670
# File 'lib/bioroebe/nucleotides/show_nucleotide_sequence/show_nucleotide_sequence.rb', line 660

def self.format_this_nucleotide_sequence(
    i = ARGV,
    &block
  )
  _ = ::Bioroebe::ShowNucleotideSequence.new(
    i, :do_not_report_anything, &block
  )
  _.clear_padding
  _.format
  _.formatted_sequence?
end

.frequency_per_thousand(i) ⇒ Object

#

Bioroebe.frequency_per_thousand

The input to this method should be a String ideally. If an Array is input then it will simply be .join()-ed.

This method will return a String, if all goes well.

#


867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 867

def self.frequency_per_thousand(i)
  result = "fields: [triplet] [frequency: per thousand] ([number])\n".dup # This String will be returned.
  if i.is_a? Array
    i = i.join
  end
  i.delete!("\n")
  hash = ::Bioroebe.codon_frequency_of_this_string(i)
  hash.default = 0
  total_n_elements = hash.values.sum
  append_this = <<-EOF 

UUU#{thousand_percentage(hash['UUU'], total_n_elements)}(     #{hash['UUU']})  UCU#{thousand_percentage(hash['UCU'], total_n_elements)}(     #{hash['UCU']})  UAU#{thousand_percentage(hash['UAU'], total_n_elements)}(     #{hash['UAU']})  UGU#{thousand_percentage(hash['UGU'], total_n_elements)}(     #{hash['UGU']})
UUC#{thousand_percentage(hash['UUC'], total_n_elements)}(     #{hash['UUC']})  UCC#{thousand_percentage(hash['UCC'], total_n_elements)}(     #{hash['UCC']})  UAC#{thousand_percentage(hash['UAC'], total_n_elements)}(     #{hash['UAC']})  UGC#{thousand_percentage(hash['UGC'], total_n_elements)}(     #{hash['UGC']})
UUA#{thousand_percentage(hash['UUA'], total_n_elements)}(     #{hash['UUA']})  UCA#{thousand_percentage(hash['UCA'], total_n_elements)}(     #{hash['UCA']})  UAA#{thousand_percentage(hash['UAA'], total_n_elements)}(     #{hash['UAA']})  UGA#{thousand_percentage(hash['UGA'], total_n_elements)}(     #{hash['UGA']})
UUG#{thousand_percentage(hash['UUG'], total_n_elements)}(     #{hash['UUG']})  UCG#{thousand_percentage(hash['UCG'], total_n_elements)}(     #{hash['UCG']})  UAG#{thousand_percentage(hash['UAG'], total_n_elements)}(     #{hash['UAG']})  UGG#{thousand_percentage(hash['UGG'], total_n_elements)}(     #{hash['UGG']})

CUU#{thousand_percentage(hash['CUU'], total_n_elements)}(     #{hash['CUU']})  CCU#{thousand_percentage(hash['CCU'], total_n_elements)}(     #{hash['CCU']})  CAU#{thousand_percentage(hash['CAU'], total_n_elements)}(     #{hash['CAU']})  CGU#{thousand_percentage(hash['CGU'], total_n_elements)}(     #{hash['CGU']})
CUC#{thousand_percentage(hash['CUC'], total_n_elements)}(     #{hash['CUC']})  CCC#{thousand_percentage(hash['CCC'], total_n_elements)}(     #{hash['CCC']})  CAC#{thousand_percentage(hash['CAC'], total_n_elements)}(     #{hash['CAC']})  CGC#{thousand_percentage(hash['CGC'], total_n_elements)}(     #{hash['CGC']})
CUA#{thousand_percentage(hash['CUA'], total_n_elements)}(     #{hash['CUA']})  CCA#{thousand_percentage(hash['CCA'], total_n_elements)}(     #{hash['CCA']})  CAA#{thousand_percentage(hash['CAA'], total_n_elements)}(     #{hash['CAA']})  CGA#{thousand_percentage(hash['CGA'], total_n_elements)}(     #{hash['CGA']})
CUG#{thousand_percentage(hash['CUG'], total_n_elements)}(     #{hash['CUG']})  CCG#{thousand_percentage(hash['CCG'], total_n_elements)}(     #{hash['CCG']})  CAG#{thousand_percentage(hash['CAG'], total_n_elements)}(     #{hash['CAG']})  CGG#{thousand_percentage(hash['CGG'], total_n_elements)}(     #{hash['CGG']})

AUU#{thousand_percentage(hash['AUU'], total_n_elements)}(     #{hash['AUU']})  ACU#{thousand_percentage(hash['ACU'], total_n_elements)}(     #{hash['ACU']})  AAU#{thousand_percentage(hash['AAU'], total_n_elements)}(     #{hash['AAU']})  AGU#{thousand_percentage(hash['AGU'], total_n_elements)}(     #{hash['AGU']})
AUC#{thousand_percentage(hash['AUC'], total_n_elements)}(     #{hash['AUC']})  ACC#{thousand_percentage(hash['ACC'], total_n_elements)}(     #{hash['ACC']})  AAC#{thousand_percentage(hash['AAC'], total_n_elements)}(     #{hash['AAC']})  AGC#{thousand_percentage(hash['AGC'], total_n_elements)}(     #{hash['AGC']})
AUA#{thousand_percentage(hash['AUA'], total_n_elements)}(     #{hash['AUA']})  ACA#{thousand_percentage(hash['ACA'], total_n_elements)}(     #{hash['ACA']})  AAA#{thousand_percentage(hash['AAA'], total_n_elements)}(     #{hash['AAA']})  AGA#{thousand_percentage(hash['AGA'], total_n_elements)}(     #{hash['AGA']})
AUG#{thousand_percentage(hash['AUG'], total_n_elements)}(     #{hash['AUG']})  ACG#{thousand_percentage(hash['ACG'], total_n_elements)}(     #{hash['ACG']})  AAG#{thousand_percentage(hash['AAG'], total_n_elements)}(     #{hash['AAG']})  AGG#{thousand_percentage(hash['AGG'], total_n_elements)}(     #{hash['AGG']})

GUU#{thousand_percentage(hash['GUU'], total_n_elements)}(     #{hash['GUU']})  GCU#{thousand_percentage(hash['GCU'], total_n_elements)}(     #{hash['GCU']})  GAU#{thousand_percentage(hash['GAU'], total_n_elements)}(     #{hash['GAU']})  GGU#{thousand_percentage(hash['GGU'], total_n_elements)}(     #{hash['GGU']})
GUC#{thousand_percentage(hash['GUC'], total_n_elements)}(     #{hash['GUC']})  GCC#{thousand_percentage(hash['GCC'], total_n_elements)}(     #{hash['GCC']})  GAC#{thousand_percentage(hash['GAC'], total_n_elements)}(     #{hash['GAC']})  GGC#{thousand_percentage(hash['GGC'], total_n_elements)}(     #{hash['GGC']})
GUA#{thousand_percentage(hash['GUA'], total_n_elements)}(     #{hash['GUA']})  GCA#{thousand_percentage(hash['GCA'], total_n_elements)}(     #{hash['GCA']})  GAA#{thousand_percentage(hash['GAA'], total_n_elements)}(     #{hash['GAA']})  GGA#{thousand_percentage(hash['GGA'], total_n_elements)}(     #{hash['GGA']})
GUG#{thousand_percentage(hash['GUG'], total_n_elements)}(     #{hash['GUG']})  GCG#{thousand_percentage(hash['GCG'], total_n_elements)}(     #{hash['GCG']})  GAG#{thousand_percentage(hash['GAG'], total_n_elements)}(     #{hash['GAG']})  GGG#{thousand_percentage(hash['GGG'], total_n_elements)}(     #{hash['GGG']})
EOF
  result << append_this
  return result
end

.gc_content(of_this_sequence, round_to_n_positions = 3) ⇒ Object

#

Bioroebe.gc_content

This is a convenience method that will return back the GC content, as a percentage value, of the input-given sequence (nucleotide sequence).

So for instance, the following example will correctly return 50.0 because the G and C content of the sequence is exactly 50%.

The second argument can be used for denoting where to round.

Usage example:

Bioroebe.gc_content('ATCG') # => 50.0
#


280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 280

def self.gc_content(
    of_this_sequence,
    round_to_n_positions = 3
  )
  if of_this_sequence.is_a? Array
    of_this_sequence.each {|entry|
      gc_content(of_this_sequence, round_to_n_positions)
    }
  else
    ::Bioroebe::CalculateGCContent.gc_percentage(
      of_this_sequence, round_to_n_positions
    )
  end
end

.genbank_to_fasta(this_file, be_verbose = :be_verbose) ⇒ Object

#

Bioroebe.genbank_to_fasta

This method will convert from a genbank file, to a .fasta file.

Invocation example:

Bioroebe.genbank_to_fasta('/home/x/programming/ruby/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
#


1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
# File 'lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb', line 1457

def self.genbank_to_fasta(
    this_file,
    be_verbose = :be_verbose
  )
  case be_verbose
  when :be_quiet
    be_verbose = false
  end
  if this_file.is_a? Array
    this_file = this_file.first
  end
  if File.exist? this_file
    _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
  else
    _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
    _.set_data # This will use the default file.
    _.split_into_proper_sections
  end
  file_path = _.save_into_a_fasta_file(be_verbose)
  return file_path
end

.generate_nucleotide_sequence_based_on_these_frequencies(n_nucleotides = 1061, hash_frequencies = { A: 0.3191430, C: 0.2086633, G: 0.2580345, T: 0.2141593 }) ⇒ Object

#

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies

The second argument to this method should be a Hash.

The default output may be a String such as this one here:

AACTGAACATTTTAGGAGATATCAAGACCCTCTGATTCTCAAGGAATAATTAGCTAATTT

Usage example:

Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies(:default, { A: 0.25, C: 0.25, G: 0.25, T: 0.25 })
#


4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 4675

def self.generate_nucleotide_sequence_based_on_these_frequencies(
    n_nucleotides = 1061, # Denote how many nucleotides to use.
    hash_frequencies = {
      A: 0.3191430,
      C: 0.2086633,
      G: 0.2580345,
      T: 0.2141593
    }
  )
  case n_nucleotides
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    n_nucleotides = 500
  end
  result = ''.dup
  frequency_for_A = hash_frequencies[:A]
  frequency_for_C = hash_frequencies[:C]
  frequency_for_G = hash_frequencies[:G]
  frequency_for_T = hash_frequencies[:T]
  n_nucleotides.times {|run_number_n|
    use_this_number = rand(0)
    if use_this_number <= frequency_for_A
      result << 'A'
    elsif use_this_number <= (frequency_for_A+frequency_for_C)
      result << 'C'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G)
      result << 'G'
    elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G+frequency_for_T)
      result << 'T'
    end
  }
  return result
end

.generate_pdf_tutorialObject

#

Bioroebe.generate_pdf_tutorial

#


11918
11919
11920
# File 'lib/bioroebe/shell/shell.rb', line 11918

def self.generate_pdf_tutorial
  ::Bioroebe::Shell.generate_pdf_tutorial
end

.generate_random_dna_sequence(i = ARGV, optional_hash_with_the_frequencies = {}) ⇒ Object

#

Bioroebe.generate_random_dna_sequence

This method will “generate” a random DNA sequence (as a String).

A String will be returned by this method.

The second argument to this method can be a Hash, specifying the percentage likelihood for each of the nucleotides. See the following usage examples to find out how to use this.

Usage examples:

Bioroebe.random_dna 15 # => "TTGGTAAGCTCTTTA"
Bioroebe.random_dna 25 # => "TTAGCACAAGCATGGACGGACCAGA"
Bioroebe.random_dna(50, { A: 10, T: 10, C: 10, G: 70}) # => "GGGGTGGGGAGGGTATGCGGAGGAAGGGCGGGAAGGGCGGGGGCTGGGCG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "TGAGGGGGGGGGTGGGAGGG"
Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "GGTAGGGGGGGGTAGGGGGG"
#


3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 3646

def self.generate_random_dna_sequence(
    i                                  = ARGV,
    optional_hash_with_the_frequencies = {} # ← This may be a String too, mind you.
  )
  # ======================================================================= #
  # First define our result-String. This one will be returned by this
  # method.
  # ======================================================================= #
  result = ''.dup
  _ = Bioroebe::DNA_NUCLEOTIDES # Get a handle to the four DNA nucleotides.
  if i.is_a? Array
    i = i.join.strip
  end
  case i
  when :default
    i = 250
  end
  i = i.to_i # This is "n times".
  # ======================================================================= #
  # First handle the case where the user passed a String:
  # ======================================================================= #
  if optional_hash_with_the_frequencies.is_a? String
    pool = optional_hash_with_the_frequencies.dup.chars.shuffle
    i.times {
      if pool.size == 0
        pool = optional_hash_with_the_frequencies.dup.chars.shuffle
      end
      result << pool.pop
    }
  elsif optional_hash_with_the_frequencies.empty?
    # ===================================================================== #
    # This is the default clause.
    # ===================================================================== #
    i.times {
      result << _.sample
    }
  else
    # ===================================================================== #
    # Else, the user wants to use a frequency hash:
    # ===================================================================== #
    hash = optional_hash_with_the_frequencies
    frequency_for_A = hash[:A]
    frequency_for_T = hash[:T]
    frequency_for_C = hash[:C]
    frequency_for_G = hash[:G]
    i.times {
      percentage = rand(100)+1
      if percentage <= frequency_for_A
        match = 'A'
      elsif (percentage > frequency_for_A) and
            (percentage <= frequency_for_A+frequency_for_T)
        match = 'T'
      elsif (percentage > frequency_for_A+frequency_for_T) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C)
        match = 'C'
      elsif (percentage > frequency_for_A+frequency_for_T+frequency_for_C) and
            (percentage <= frequency_for_A+frequency_for_T+frequency_for_C+frequency_for_G)
        match = 'G'
      else
        e 'Not found a match for '+percentage.to_s
      end
      result << match
    }
  end
  result
end

.generate_random_rna_sequence(i = ARGV) ⇒ Object

#

Bioroebe.generate_random_rna_sequence

The input-argument should be a number, an Integer, such as 10.

Usage example:

Bioroebe.generate_random_rna_sequence(10)
#


2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2662

def self.generate_random_rna_sequence(i = ARGV)
  if i.is_a? Array
    i = i.join(' ').strip
  end
  _ = Bioroebe::RNA_NUCLEOTIDES # Point to the allowed RNA-nucleotides here.
  result = ''.dup
  i.to_s.to_i.times {
    result << _.sample
  }
  return result
end

.guess_format(i) ⇒ Object

#

Bioroebe.guess_format

#


2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 2770

def self.guess_format(i)
  case i
  # ======================================================================= #
  # === fasta
  # ======================================================================= #
  when /.fa$/,
       /.fna$/,
       /.faa$/,
       /.fasta$/
    'fasta'
  # ======================================================================= #
  # === fastq
  # ======================================================================= #
  when /.fq$/,
       /.fastq$/
    'fastq'
  when /.fx/
    ''
  end
end

.hamming_distance(sequence1 = 'ATCG', sequence2 = 'ATCC') ⇒ Object

#

Bioroebe.hamming_distance

This method will return an Integer, aka a number, which represents the hamming distance between two sequences of equal length. This will state how many differences exist between two same-sized sequences (aka sequences that have the same length).

Do note that a second implementation may exist for the hamming distance, in the Bioroebe project.

Usage example:

Bioroebe.hamming_distance('ATCG','ATCC') # => 1
#


1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 1062

def self.hamming_distance(
    sequence1 = 'ATCG',
    sequence2 = 'ATCC'
  )
  if sequence1.nil?
    e 'Please provide a sequence (String) as input to this method.'
    return
  end
  if sequence1.is_a? String
    sequence1 = sequence1.split(//)
  end
  if sequence2.is_a? String
    sequence2 = sequence2.split(//)
  end
  array_sequence1 = [sequence1].flatten
  array_sequence2 = [sequence2].flatten
  # ======================================================================= #
  # Zip the two sequences together, then reduce this Array of
  # zipped values to an integer value, which will be returned.
  # ======================================================================= #
  zipped_array = array_sequence1.zip(array_sequence2)
  hamming_value = 0
  zipped_array.each { |left, right|
    hamming_value += 1 unless left == right
  }
  return hamming_value
end

.has_this_restriction_enzyme?(name_of_restriction_enzyme) ⇒ Boolean

#

Bioroebe.has_this_restriction_enzyme?

This method will determine whether we have a specific restriction enzyme registered in the yaml file restriction_enzymes.yml or whether we do not. That way we can query whether a restriction enzyme is registered (and thus available) or whether it is not.

The method will downcase all keys in use to simplify finding a matching entry.

Usage example:

Bioroebe.has_this_restriction_enzyme? 'MvnI'    # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRI'   # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRII'  # => true
Bioroebe.has_this_restriction_enzyme? 'EcoRIII' # => false
Bioroebe.has_this_restriction_enzyme? 'PvuI'    # => true
Bioroebe.has_this_restriction_enzyme? 'PvuII'   # => true
Bioroebe.has_this_restriction_enzyme? 'PvuIII'  # => false
#

Returns:

  • (Boolean)


33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/bioroebe/enzymes/has_this_restriction_enzyme.rb', line 33

def self.has_this_restriction_enzyme?(
    name_of_restriction_enzyme
  )
  _ = {}
  if name_of_restriction_enzyme.frozen?
    name_of_restriction_enzyme = name_of_restriction_enzyme.dup
  end
  name_of_restriction_enzyme.delete!('?') if name_of_restriction_enzyme.include? '?'
  name_of_restriction_enzyme.downcase!
  ::Bioroebe.restriction_enzymes?.each_pair {|key, value|
    _[key.downcase] = value
  }
  return _.has_key? name_of_restriction_enzyme
end

.hash_codon_tables?Boolean

#

Bioroebe.hash_codon_tables?

#

Returns:

  • (Boolean)


115
116
117
# File 'lib/bioroebe/codons/codon_tables.rb', line 115

def self.hash_codon_tables?
  ::Bioroebe::CodonTables.definitions?
end

.index_this_fasta_file(i = ARGV) ⇒ Object

#

Bioroebe.index_this_fasta_file

This method will use samtools faidx to index files.

#


569
570
571
572
573
574
575
# File 'lib/bioroebe/toplevel_methods/toplevel_methods.rb', line 569

def self.index_this_fasta_file(i = ARGV)
  [i].flatten.compact.each {|this_file|
    e
    esystem "samtools faidx #{this_file}"
    e
  }
end

.infer_type_from_this_sequence(i = 'ATGGTACGACAC') ⇒ Object