Class: Bioroebe::ParsePdbFile

Inherits:

CommandlineApplication

Object
Base
CommandlineApplication
Bioroebe::ParsePdbFile

show all

Defined in:: lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb

Overview

Bioroebe::ParsePdbFile

Constant Summary collapse

DEFAULT_PDB_FILE = # DEFAULT_PDB_FILE #

"#{::Bioroebe.log_directory?}test.pdb"

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

#aminoacid_sequence? ⇒ Boolean

# === aminoacid_sequence? ========================================================================= #.
#analyze_the_dataset(body = @body) ⇒ Object

# === analyze_the_dataset.
#body? ⇒ Boolean

# === body? ========================================================================= #.
#calculate_the_centroid_position ⇒ Object (also: #calculate_centroid)

# === calculate_the_centroid_position.
#calculate_the_distance_between_two_points(p1, p2) ⇒ Object

# === calculate_the_distance_between_two_points.
#check_whether_this_pdb_sequence_contains_dna ⇒ Object

# === check_whether_this_pdb_sequence_contains_dna ========================================================================= #.
#consider_creating_a_fasta_file ⇒ Object

# === consider_creating_a_fasta_file ========================================================================= #.
#consider_reporting_alpha_helices_that_were_found(i = @alpha_helices) ⇒ Object

# === consider_reporting_alpha_helices_that_were_found ========================================================================= #.
#consider_reporting_beta_sheet_that_were_found(i = @beta_sheets) ⇒ Object

# === consider_reporting_beta_sheet_that_were_found ========================================================================= #.
#consider_reporting_how_many_chains_are_in_this_structure ⇒ Object

# === consider_reporting_how_many_chains_are_in_this_structure.
#consider_reporting_the_aminoacid_sequence ⇒ Object

# === consider_reporting_the_aminoacid_sequence.
#consider_reporting_the_keywords(keywords = keywords? ) ⇒ Object

# === consider_reporting_the_keywords.
#consider_reporting_the_number_of_individual_aminoacids ⇒ Object

# === consider_reporting_the_number_of_individual_aminoacids ========================================================================= #.
#consider_reporting_the_number_of_residues ⇒ Object

# === consider_reporting_the_number_of_residues ========================================================================= #.
#convert_this_alphabet_character_to_number(i) ⇒ Object

# === convert_this_alphabet_character_to_number.
#header? ⇒ Boolean (also: #header)

# === header? ========================================================================= #.
#initialize(i = DEFAULT_PDB_FILE, run_already = true) ⇒ ParsePdbFile constructor

# === initialize ========================================================================= #.
#input_sequence? ⇒ Boolean

# === input_sequence?.
#keywords? ⇒ Boolean (also: #keywords)

# === keywords? ========================================================================= #.
#main_file? ⇒ Boolean (also: #return_filename)

# === main_file? ========================================================================= #.
#max_distance?(array = @body) ⇒ Boolean

# === max_distance? ========================================================================= #.
#menu(i = @commandline_arguments) ⇒ Object

# === menu (menu tag) ========================================================================= #.
#n_alpha_helices? ⇒ Boolean

# === n_alpha_helices? ========================================================================= #.
#n_aminoacids? ⇒ Boolean

# === n_aminoacids? ========================================================================= #.
#n_atoms?(i = data?) ) ⇒ Boolean (also: #n_atom_entries?)

# === n_atoms?.
#name_of_the_species? ⇒ Boolean

# === name_of_the_species? ========================================================================= #.
#organism_common? ⇒ Boolean

# === organism_common? ========================================================================= #.
#process_each_pdb_file ⇒ Object

# === process_each_pdb_file.
#readlines_from_this_file(file) ⇒ Object

# === readlines_from_this_file ========================================================================= #.
#report_extra_information_about_the_species_at_hand ⇒ Object

# === report_extra_information_about_the_species_at_hand ========================================================================= #.
#report_header(of_this_file = @this_file) ⇒ Object

# === report_header.
#report_n_atoms ⇒ Object

# === report_n_atoms ========================================================================= #.
#reset ⇒ Object

# === reset (reset tag) ========================================================================= #.
#reset_internal_variables ⇒ Object

# === reset_internal_variables ========================================================================= #.
#return_all_ATOM_entries ⇒ Object

# === return_all_ATOM_entries ========================================================================= #.
#return_short_filename ⇒ Object

# === return_short_filename ========================================================================= #.
#run ⇒ Object

# === run (run tag) ========================================================================= #.
#set_body(i) ⇒ Object (also: #body=)

# === set_body.
#set_header(i) ⇒ Object (also: #header=)

# === set_header.
#set_header_title_and_body(dataset) ⇒ Object

# === set_header_title_and_body.
#set_keywords(i) ⇒ Object (also: #keywords=)

# === set_keywords ========================================================================= #.
#set_pdb_files(i = DEFAULT_PDB_FILE) ⇒ Object

# === set_pdb_files.
#set_this_file(i) ⇒ Object

# === set_this_file ========================================================================= #.
#silently_determine_the_aminoacid_sequence(i) ⇒ Object

# === silently_determine_the_aminoacid_sequence.
#string? ⇒ Boolean (also: #data?)

# === string? ========================================================================= #.
#taxid? ⇒ Boolean

# === taxid? ========================================================================= #.
#taxid_of_the_species? ⇒ Boolean

# === taxid_of_the_species? ========================================================================= #.
#title=(i) ⇒ Object (also: #set_title)

# === title? ========================================================================= #.
#title? ⇒ Boolean (also: #title)

# === title? ========================================================================= #.
#try_to_determine_the_alpha_helices_in_this_protein(i) ⇒ Object

# === try_to_determine_the_alpha_helices_in_this_protein ========================================================================= #.
#try_to_determine_the_beta_sheets_in_this_protein(i) ⇒ Object

# === try_to_determine_the_beta_sheets_in_this_protein.
#try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(array = @body) ⇒ Boolean (also: #try_to_determine_the_max_distance_between_the_atoms_in_this_protein)

# === try_to_determine_the_max_distance_between_the_atoms_in_this_protein? ========================================================================= #.
#try_to_determine_the_taxid_from_this_input(i) ⇒ Object

# === try_to_determine_the_taxid_from_this_input.
#try_to_report_the_organism_at_hand(i = @body) ⇒ Object

# === try_to_report_the_organism_at_hand.

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = DEFAULT_PDB_FILE, run_already = true) ⇒ `ParsePdbFile`

#

initialize

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 125

def initialize(
    i           = DEFAULT_PDB_FILE,
    run_already = true
  )
  reset
  unless i.is_a? Array
    i = [i].flatten.compact
  end
  set_commandline_arguments(
    return_entries_with_two_leading_hyphens_from(i)
  )
  set_pdb_files(
    return_entries_without_two_leading_hyphens(i)
  )
  # ======================================================================= #
  # === Handle blocks
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :be_silent
    #
    # Invocation example for this entry point:
    #
    #   Bioroebe::ParsePdbFile.new(ARGV) { :be_silent }
    #
    # ===================================================================== #
    when :be_silent
      set_be_silent
    end
  end
  case run_already
  when :do_not_run_yet
    run_already = false
  end
  run if run_already
end

Instance Method Details

#aminoacid_sequence? ⇒ `Boolean`

#

aminoacid_sequence?

#

Returns:

(Boolean)



523
524
525

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 523

def aminoacid_sequence?
  @aminoacid_sequence
end

#analyze_the_dataset(body = @body) ⇒ `Object`

#

analyze_the_dataset

This method is the “powerhorse” of this class.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 927

def analyze_the_dataset(
    body = @body
  )
  if @does_the_file_exist
    report_header
    try_to_report_the_organism_at_hand(body)
    report_n_atoms
    check_whether_this_pdb_sequence_contains_dna
    silently_determine_the_aminoacid_sequence(body)
    consider_reporting_the_aminoacid_sequence
    consider_reporting_the_number_of_individual_aminoacids
    consider_reporting_the_number_of_residues
    # ===================================================================== #
    # Try to obtain the taxid.
    # ===================================================================== #
    try_to_determine_the_taxid_from_this_input(body)
    try_to_determine_the_alpha_helices_in_this_protein(body)
    consider_reporting_alpha_helices_that_were_found
    try_to_determine_the_beta_sheets_in_this_protein(body)
    try_to_determine_the_max_distance_between_the_atoms_in_this_protein(body)
    consider_reporting_beta_sheet_that_were_found
    consider_reporting_how_many_chains_are_in_this_structure
    consider_reporting_the_keywords
  end
end

#body? ⇒ `Boolean`

#

body?

#

Returns:

(Boolean)



313
314
315

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 313

def body?
  @body
end

#calculate_the_centroid_position ⇒ `Object` Also known as: calculate_centroid

#

calculate_the_centroid_position

This method will calculate the centroid aka the “average position of the atoms” in that .pdb file. Currently this will only assume that each atom is the same, but in reality we should also include the weight of the atom at hand - this is currently not implemented via this method, though.

If this is ever improved, we need to include the weight of the corresponding atom as well.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 890

def calculate_the_centroid_position
  n_atoms = n_atoms?.to_i
  if n_atoms > 0
    x_average = @x_coordinates.sum / n_atoms.to_f
    y_average = @y_coordinates.sum / n_atoms.to_f
    z_average = @z_coordinates.sum / n_atoms.to_f
  else
    x_average = y_average = z_average = 0
  end
  array = [x_average, y_average, z_average]
  return array
end

#calculate_the_distance_between_two_points(p1, p2) ⇒ `Object`

#

calculate_the_distance_between_two_points

Pass in two arrays to this method.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 532

def calculate_the_distance_between_two_points(p1, p2)
  a = (p2[0] - p1[0]) ** 2
  b = (p2[1] - p1[1]) ** 2
  c = (p2[2] - p1[2]) ** 2
  return Math.sqrt(a+b+c)
end

#check_whether_this_pdb_sequence_contains_dna ⇒ `Object`

#

check_whether_this_pdb_sequence_contains_dna

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 542

def check_whether_this_pdb_sequence_contains_dna
  _ = input_sequence?
  if _.include? 'MOLECULE: DNA'
    erev 'This protein sequence contains at the least one DNA strand.'
    sequence = _.scan(/ MOLECULE: DNA(.+)$/).flatten
    if sequence
      sequence = sequence.first.to_s.strip if sequence.respond_to? :first
      # =================================================================== #
      # This may look like this:
      #
      #   "(5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*CP*GP*CP*G)-3');"
      #
      # =================================================================== #
      sequence = sequence.tr('-','').delete("'D(P*);53")
      result = ('This DNA sequence is '+colourize_dna(sequence)+rev+'.').dup
      # =================================================================== #
      # Check whether it is a palindrome.
      # =================================================================== #
      if is_this_sequence_a_palindrome? sequence
        result << rev+' It is a palindrome.'
      else
        result << rev+' It is NOT a palindrome.'
      end
      erev result
    end
  end
end

#consider_creating_a_fasta_file ⇒ `Object`

#

consider_creating_a_fasta_file

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 793

def consider_creating_a_fasta_file
  if @do_create_a_fasta_file
    what = aminoacid_sequence?
    into = return_short_filename.sub(/\.pdb$/,'')+'.fasta'
    into = File.absolute_path(into)
    erev 'Storing into the file `'+sfile(into)+rev+'`.'
    write_what_into(what, into)
  end
end

#consider_reporting_alpha_helices_that_were_found(i = @alpha_helices) ⇒ `Object`

#

consider_reporting_alpha_helices_that_were_found

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 449

def consider_reporting_alpha_helices_that_were_found(
    i = @alpha_helices
  )
  unless i.empty?
    if be_verbose?
      erev "This protein contains "\
           "#{slateblue(i.size.to_s)}#{rev} alpha-helices."
    end
  end
end

#consider_reporting_beta_sheet_that_were_found(i = @beta_sheets) ⇒ `Object`

#

consider_reporting_beta_sheet_that_were_found

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 463

def consider_reporting_beta_sheet_that_were_found(i = @beta_sheets)
  unless i.empty?
    e "This protein contains #{slateblue(i.size.to_s)}#{rev} beta-sheets."
  end
end

#consider_reporting_how_many_chains_are_in_this_structure ⇒ `Object`

#

consider_reporting_how_many_chains_are_in_this_structure

A better way to report how many chains are in a structure is via:

COMPND   3 CHAIN: A, B;

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 598

def consider_reporting_how_many_chains_are_in_this_structure
  # _ = return_all_ATOM_entries.max {|line|
  #   line.split(' ')[1].to_i
  # }
  # # ===================================================================== #
  # # The entry may look like this:
  # #
  # #   "ATOM  52643  N   LYS N 393      27.402 -53.192  44.13"
  # #
  # # ===================================================================== #
  # _ = convert_this_alphabet_character_to_number(
  #   _.split(' ')[2]
  # )
  selection = @body.select {|entry|
    entry.include? 'COMPND  ' and entry.include? 'CHAIN: '
  }
  # ======================================================================= #
  # Examples:
  #
  #   ["COMPND   3 CHAIN: A, B, C, D, E, F, G, H, I, J, K, L, M, N;                     \n",
  #    "COMPND   8 CHAIN: O, P, Q, R, S, T, U;                                          \n"]
  #
  # ======================================================================= #
  _ = selection.join(' ')
  use_this_regex = /([A-Z])(,|;)/
  scanned = _.scan(use_this_regex).map {|inner_array| inner_array[0].ord }
  unless scanned.empty?
    max = scanned.max - 64 # -64 because A is 65 and it is the start.
    if be_verbose?
      erev "There are #{steelblue(max)}#{rev} chains in this molecule."
    end
  end
end

#consider_reporting_the_aminoacid_sequence ⇒ `Object`

#

consider_reporting_the_aminoacid_sequence

This method will typically display the aminoacid sequence at hand.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 1013

def consider_reporting_the_aminoacid_sequence
  if @report_the_aminoacid_sequence and be_verbose?
    _ = @aminoacid_sequence
    erev 'The aminoacid sequence ('+
      steelblue(_.size.to_s)+rev+
      ' aminoacids) is:'
    # erev '  '+colourize_this_aminoacid_sequence(_) # <- We could colourize it.
    erev "  #{steelblue(_)}"
  end
end

#consider_reporting_the_keywords(keywords = keywords? ) ⇒ `Object`

#

consider_reporting_the_keywords

This method will report the discovered keyword entries in the given .pdb file at hand (if this .pdb file contains these keywords entries that is).

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 960

def consider_reporting_the_keywords(
    keywords = keywords?
  )
  if keywords
    erev "The keywords are: #{steelblue(keywords)}"
  end
end

#consider_reporting_the_number_of_individual_aminoacids ⇒ `Object`

#

consider_reporting_the_number_of_individual_aminoacids

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 980

def consider_reporting_the_number_of_individual_aminoacids
  if @report_the_aminoacid_sequence and be_verbose?
    hash = @aminoacid_sequence.each_char.tally
    hash.each_pair {|aminoacid_one_letter, n_occurrences|
      erev 'Total no:of '+
            rev+
            lightgreen(::Bioroebe.one_to_three(aminoacid_one_letter).upcase)+
            rev+
            ' - '+
            steelblue(
              n_occurrences.to_s.rjust(2,' ')
            )
    }
  end
end

#consider_reporting_the_number_of_residues ⇒ `Object`

#

consider_reporting_the_number_of_residues

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 971

def consider_reporting_the_number_of_residues
  if @report_the_aminoacid_sequence and be_verbose?
    erev 'Total no:of residues - '+steelblue(@aminoacid_sequence.size.to_s)
  end
end

#convert_this_alphabet_character_to_number(i) ⇒ `Object`

#

convert_this_alphabet_character_to_number

The input of “A” would mean “1”.

#



637
638
639

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 637

def convert_this_alphabet_character_to_number(i)
  (i.ord - 64)
end

#

header?

#

Returns:

(Boolean)



394
395
396

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 394

def header?
  @header
end

#input_sequence? ⇒ `Boolean`

#

input_sequence?

This will return a String.

#

Returns:

(Boolean)

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 322

def input_sequence?
  _ = @body
  if _.is_a? Array
    _ = _.join(N)
  end
  _
end

#keywords? ⇒ `Boolean` Also known as: keywords

#

keywords?

#

Returns:

(Boolean)



248
249
250

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 248

def keywords?
  @keywords
end

#main_file? ⇒ `Boolean` Also known as: return_filename

#

main_file?

#

Returns:

(Boolean)



813
814
815

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 813

def main_file?
  @this_file
end

#max_distance?(array = @body) ⇒ `Boolean`

#

max_distance?

#

Returns:

(Boolean)

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 833

def max_distance?(
    array = @body
  )
  return if array.nil? or array.empty?
  # ======================================================================= #
  # ["ATOM    1  N   MET A  41       1.177 -10.035  -3.493  1.00  2.04   N",
  #  "ATOM    2  CA  MET A  41       0.292  -8.839  -3.377  1.00  1.55   C"]
  # ======================================================================= #
  max_value = 0
  modified_array = array.map {|line|
    splitted = line.split(' ')
    x = splitted[6].to_f
    y = splitted[7].to_f
    z = splitted[8].to_f
    [x,y,z]
  }
  # ======================================================================= #
  # [1.177, -10.035, -3.493]
  # [0.292, -8.839, -3.377]
  # ======================================================================= #
  modified_array.each_with_index {|entry, index| index += 1
    if index == modified_array.size
      index = 0
    end
    array1 = entry
    array2 = modified_array[index]
    result = calculate_the_distance_between_two_points(array1, array2)
    if result > max_value
      max_value = result
    end
  }
  return max_value
end

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 762

def menu(
    i = @commandline_arguments
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === parsedb 2HI4.pdb --no-colours
    # ===================================================================== #
    when /^-?-?no(-|_)?colou?rs$/i
      disable_colours
    # ===================================================================== #
    # === parsedb 2HI4.pdb --create-fasta-file
    # ===================================================================== #
    when /^-?-?create(-|_)?fasta(-|_)?file$/i
      @do_create_a_fasta_file = true
    end
  end
end

#n_alpha_helices? ⇒ `Boolean`

#

n_alpha_helices?

#

Returns:

(Boolean)



442
443
444

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 442

def n_alpha_helices?
  @alpha_helices.size
end

#n_aminoacids? ⇒ `Boolean`

#

n_aminoacids?

#

Returns:

(Boolean)



472
473
474

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 472

def n_aminoacids?
  @aminoacid_sequence.size if @aminoacid_sequence
end

#n_atoms?(i = data?) ) ⇒ `Boolean` Also known as: n_atom_entries?

#

n_atoms?

Returns how many ATOM entries we have in this .pdb file.

#

Returns:

(Boolean)



380
381
382

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 380

def n_atoms?(i = data?)
  i.select {|entry| entry.start_with? 'ATOM' }.size if i
end

#name_of_the_species? ⇒ `Boolean`

#

name_of_the_species?

#

Returns:

(Boolean)



686
687
688

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 686

def name_of_the_species?
  @name_of_the_species
end

#organism_common? ⇒ `Boolean`

#

organism_common?

#

Returns:

(Boolean)

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 717

def organism_common?
  if @body
    _ = @body.join(N).scan(/ORGANISM_COMMON: (.+);/).flatten.uniq
    if _ and _.is_a?(Array)
      _ = _.first.to_s
    end
    return _
  end
end

#process_each_pdb_file ⇒ `Object`

#

process_each_pdb_file

This method is the main powerhorse method of this class.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 732

def process_each_pdb_file
  @pdb_files.each {|file|
    if File.exist?(file) and File.file?(file)
      reset_internal_variables # Reset the internal variables here.
      @does_the_file_exist = true
      set_this_file(file)
      dataset = readlines_from_this_file(file)
      if dataset.any? {|line| line.include? 'KEYWDS' }
        set_keywords(dataset)
      end
      set_header_title_and_body(dataset)
      analyze_the_dataset
      consider_creating_a_fasta_file
    else
      opnerev "No file at `#{sfile(file)}#{rev}` could be found."
      @does_the_file_exist = false
    end
  }
end

#readlines_from_this_file(file) ⇒ `Object`

#

readlines_from_this_file

#



412
413
414

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 412

def readlines_from_this_file(file)
  File.readlines(file)
end

#report_extra_information_about_the_species_at_hand ⇒ `Object`

#

report_extra_information_about_the_species_at_hand

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 693

def report_extra_information_about_the_species_at_hand
  result = ''.dup
  result << 'The name of the organism (Entry: '+
             steelblue('ORGANISM_SCIENTIFIC')+
             rev+
             ') is'+N
  result << '`'+orange(name_of_the_species?)+rev+'`.'
  if @taxid_of_the_species
    organism_common = organism_common?.to_s
    if organism_common and !organism_common.empty?
      result << rev+' (Taxid: '+
                steelblue(@taxid_of_the_species.to_s)+
                rev+'; '+
                seagreen(
                  organism_common
                )+rev+')'
    end
  end
  erev result
end

#report_header(of_this_file = @this_file) ⇒ `Object`

#

report_header

This will also report the filename.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 908

def report_header(
    of_this_file = @this_file
  )
  if be_verbose?
    _ = return_short_filename
    e orange(header?)+rev+
      ' (File: '+
      steelblue(_)+
      rev+'; Filesize: '+
      (File.size(of_this_file) / 1024).to_s+
      'kb'+')'+rev
  end
end

#report_n_atoms ⇒ `Object`

#

report_n_atoms

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 401

def report_n_atoms
  if be_verbose?
    e "#{sfancy(n_atoms?.to_s)}#{rev} ATOM entries were found "\
      "being part of the file at"
    e "`#{sfile(main_file?)}#{rev}`."
  end
end

#reset ⇒ `Object`

#

reset (reset tag)

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 167

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @do_create_a_fasta_file
  #
  # This variable has to exist outside of the method call
  # reset_internal_variables() as it would otherwise
  # ignore every commandline argument passed in by the
  # user.
  # ======================================================================= #
  @do_create_a_fasta_file = false
  # ======================================================================= #
  # === @x_coordinates
  # ======================================================================= #
  @x_coordinates = []
  # ======================================================================= #
  # === @y_coordinates
  # ======================================================================= #
  @y_coordinates = []
  # ======================================================================= #
  # === @z_coordinates
  # ======================================================================= #
  @z_coordinates = []
  # ======================================================================= #
  # All internal variables can be reset through this method.    
  # ======================================================================= #
  reset_internal_variables
end

#reset_internal_variables ⇒ `Object`

#

reset_internal_variables

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 200

def reset_internal_variables
  self.header = ''
  self.title  = ''
  # ======================================================================= #
  # === @body
  # ======================================================================= #
  @body  = nil
  # ======================================================================= #
  # === @taxid
  # ======================================================================= #
  @taxid = nil
  # ======================================================================= #
  # === @n_chains_are_in_this_atom
  # ======================================================================= #
  @n_chains_are_in_this_atom = 0
  # ======================================================================= #
  # === @alpha_helices
  # ======================================================================= #
  @alpha_helices = []
  # ======================================================================= #
  # === @beta_sheets
  # ======================================================================= #
  @beta_sheets   = []
  # ======================================================================= #
  # === @does_the_file_exist
  # ======================================================================= #
  @does_the_file_exist = false
  # ======================================================================= #
  # === @name_of_the_species
  # ======================================================================= #
  @name_of_the_species = nil
  # ======================================================================= #
  # === @taxid_of_the_species
  # ======================================================================= #
  @taxid_of_the_species = nil
  # ======================================================================= #
  # === @report_the_aminoacid_sequence
  # ======================================================================= #
  @report_the_aminoacid_sequence = true
  # ======================================================================= #
  # === @keywords
  # ======================================================================= #
  @keywords = nil
end

#return_all_ATOM_entries ⇒ `Object`

#

return_all_ATOM_entries

#



585
586
587

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 585

def return_all_ATOM_entries
  @body.select {|entry| entry.start_with? 'ATOM  ' }
end

#return_short_filename ⇒ `Object`

#

return_short_filename

#



786
787
788

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 786

def return_short_filename
  File.basename(@this_file)
end

#run ⇒ `Object`

#

run (run tag)

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 1027

def run
  menu
  process_each_pdb_file
  if be_verbose?
    erev 'The centered position is at: '+
         steelblue(
           calculate_the_centroid_position.join(', ')
         )
  end
end

#set_body(i) ⇒ `Object` Also known as: body=

#

set_body

This method keeps track of the main “body” of the .pdb file at hand.

#



335
336
337

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 335

def set_body(i)
  @body = i
end

#set_header(i) ⇒ `Object` Also known as: header=

#

set_header

The header may have an entry such as:

HEADER    RIBOSOMAL PROTEIN/RNA                   16-APR-10   3IYQ

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 347

def set_header(i)
  if i
    if i.is_a? Array
      i = i.first
      return if i.nil? # Can't work with nil-entries.
    end
    i = i.dup if i.frozen?
    # ===================================================================== #
    # We do a bit sanitizing here.
    # ===================================================================== #
    if i.include?('HEADER')
      i.sub!(/HEADER/,'')
    end
    i.strip!
    if i.include? '   '
      i = i.split('  ').map(&:strip).first
    end
  end
  @header = i
end

#set_header_title_and_body(dataset) ⇒ `Object`

#

set_header_title_and_body

The input to this method should be an Array.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 421

def set_header_title_and_body(dataset)
  set_header(
    dataset.select {|entry| entry.include? 'HEADER' }
  )
  self.title = dataset.select {|entry| entry.include? 'TITLE'  }
  set_body(dataset)
end

#set_keywords(i) ⇒ `Object` Also known as: keywords=

#

set_keywords

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 999

def set_keywords(i)
  if i.is_a? Array
    i.flatten!
    selection = i.select {|entry| entry.include? 'KEYWDS' }
    i = selection.first
  end
  @keywords = i.to_s.strip
end

#set_pdb_files(i = DEFAULT_PDB_FILE) ⇒ `Object`

#

set_pdb_files

We will keep this as an Array.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 257

def set_pdb_files(
    i = DEFAULT_PDB_FILE
  )
  i = [i] unless i.is_a? Array
  i.map! {|entry|
    entry = entry.to_s.dup # To avoid frozen-Strings.
    case entry
    # ===================================================================== #
    # === :1fat
    # ===================================================================== #
    when ':1fat'
      entry = "#{::Bioroebe.project_base_directory?}data/1fat.pdb"
    end
    # ===================================================================== #
    # The user may input a String such as "1NR6", but may not want to
    # input the longer "1NR6.pdb". In that case, if such a .pdb file
    # exists, we will use that as path instead.
    # ===================================================================== #
    if File.exist?("#{entry}.pdb") and !File.exist?(entry)
      entry << '.pdb'
    end
    File.absolute_path(entry) # We require the full local path to the file at hand.
  }
  @pdb_files = i
end

#set_this_file(i) ⇒ `Object`

#

set_this_file

#



806
807
808

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 806

def set_this_file(i)
  @this_file = i
end

#silently_determine_the_aminoacid_sequence(i) ⇒ `Object`

#

silently_determine_the_aminoacid_sequence

This method is probably not quite correct, as it does not take into consideration that there may be a succession of aminoacids.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 482

def silently_determine_the_aminoacid_sequence(i)
  this_aminoacid_sequence = ''.dup
  selection = i.select {|line| line.start_with?('ATOM   ') }
  last_number_of_aminoacid = 0
  selection.each {|line, index|
    line.strip!
    # ===================================================================== #
    # The line may look like this:
    #
    #   ATOM   69  CG2 THR A   8    23.165  11.137  48.942  1.00 30.40  C
    #
    # Each atom in the coordinate section is identified by a sequential
    # number in the entry file. The entry at position 5, seen below,
    # identifies the aminoacid there.
    # ===================================================================== #
    #     0    1   2    3  4   5     6       7        8     9     10    11
    # ===================================================================== #
    splitted = line.split(' ').map(&:strip).map {|entry| entry.squeeze(' ') }
    @x_coordinates << splitted[6].to_f
    @y_coordinates << splitted[7].to_f
    @z_coordinates << splitted[8].to_f
    this_aminoacid = three_to_one(splitted[3])
    number_of_this_aminoacid = splitted[5].to_i
    # ===================================================================== #
    # Entries may look like this:
    #
    #   ATOM    490  HZ3 LYS A  70     4.674  -0.770  -3.751  1.00  2.07   H
    #   ATOM    491  N   LYS A  71     8.012   0.034   2.745  1.00  0.74   N
    #
    # ===================================================================== #
    if number_of_this_aminoacid > last_number_of_aminoacid
      this_aminoacid_sequence << this_aminoacid
      last_number_of_aminoacid = number_of_this_aminoacid
    end
  }
  @aminoacid_sequence = this_aminoacid_sequence
end

#string? ⇒ `Boolean` Also known as: data?

#

string?

#

Returns:

(Boolean)



371
372
373

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 371

def string?
  @body
end

#taxid? ⇒ `Boolean`

#

taxid?

#

Returns:

(Boolean)



306
307
308

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 306

def taxid?
  @taxid
end

#taxid_of_the_species? ⇒ `Boolean`

#

taxid_of_the_species?

#

Returns:

(Boolean)



755
756
757

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 755

def taxid_of_the_species?
  @taxid_of_the_species
end

#title=(i) ⇒ `Object` Also known as: set_title

#

title?

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 870

def title=(i)
  if i.is_a? Array
    i.map!(&:chomp)
    i.map!(&:strip)
  end
  @title = i
end

#title? ⇒ `Boolean` Also known as: title

#

title?

#

Returns:

(Boolean)



387
388
389

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 387

def title?
  @title
end

#try_to_determine_the_alpha_helices_in_this_protein(i) ⇒ `Object`

#

try_to_determine_the_alpha_helices_in_this_protein

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 432

def try_to_determine_the_alpha_helices_in_this_protein(i)
  if i.is_a? Array
    selection = i.select {|line| line.start_with? 'HELIX  ' }
    @alpha_helices = selection
  end
end

#try_to_determine_the_beta_sheets_in_this_protein(i) ⇒ `Object`

#

try_to_determine_the_beta_sheets_in_this_protein

Beta-sheets begin with the word ‘SHEET ’.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 575

def try_to_determine_the_beta_sheets_in_this_protein(i)
  if i.is_a? Array
    selection = i.select {|line| line.start_with? 'SHEET  ' }
    @beta_sheets = selection
  end
end

#try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(array = @body) ⇒ `Boolean` Also known as: try_to_determine_the_max_distance_between_the_atoms_in_this_protein

#

try_to_determine_the_max_distance_between_the_atoms_in_this_protein?

#

Returns:

(Boolean)

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 820

def try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(
    array = @body
  )
  max_value = max_distance?(array)
  if be_verbose?
    erev 'The maximum difference between the atoms is '+
         sfancy(max_value.to_s)+rev
  end
end

#try_to_determine_the_taxid_from_this_input(i) ⇒ `Object`

#

try_to_determine_the_taxid_from_this_input

This method will attempt to determine the taxid entry.

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 288

def try_to_determine_the_taxid_from_this_input(i)
  if i.is_a? Array
    # ===================================================================== #
    # We will try to find entries like this:
    #
    #   SOURCE   3 ORGANISM_TAXID: 300852;
    #
    # ===================================================================== #
    _ = i.select {|line| line.include? 'ORGANISM_TAXID:' }
    unless _.empty?
      @taxid = _.first.strip.delete(';').split(' ').last
    end
  end
end

#try_to_report_the_organism_at_hand(i = @body) ⇒ `Object`

#

try_to_report_the_organism_at_hand

This method will try to extract the organism’s name.

This entry may look like this:

SOURCE   2 ORGANISM_SCIENTIFIC: SQUALUS ACANTHIAS;

#

# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 651

def try_to_report_the_organism_at_hand(i = @body)
  if i.is_a?(Array) and !i.empty?
    # ===================================================================== #
    # === ORGANISM_SCIENTIFIC
    # ===================================================================== #
    _ = i.select {|line| line.include? 'ORGANISM_SCIENTIFIC:' }
    if _
      first_element = _.first
      if first_element
        first_element.strip!
        @name_of_the_species = first_element.split(':').last.delete(';').strip
      end
    end
    # ===================================================================== #
    # === ORGANISM_TAXID
    #
    # Next try to find out the taxid number of the organism at hand.
    # ===================================================================== #
    _ = i.select {|line| line.include? ' ORGANISM_TAXID: ' }
    if _
      first_element = _.first
      if first_element
        first_element.strip!
        @taxid_of_the_species = first_element.split(':').last.delete(';').strip
      end
    end
    if be_verbose? and @name_of_the_species
      report_extra_information_about_the_species_at_hand
    end
  end
end

Class: Bioroebe::ParsePdbFile

Overview

Bioroebe::ParsePdbFile

Constant Summary collapse

#

DEFAULT_PDB_FILE

#

Constants inherited from CommandlineApplication

Constants included from ColoursForBase

Constants inherited from Base

Instance Method Summary collapse

# === aminoacid_sequence? ========================================================================= #.

# === analyze_the_dataset.

# === body? ========================================================================= #.

# === calculate_the_centroid_position.

# === calculate_the_distance_between_two_points.

# === check_whether_this_pdb_sequence_contains_dna ========================================================================= #.

# === consider_creating_a_fasta_file ========================================================================= #.

# === consider_reporting_alpha_helices_that_were_found ========================================================================= #.

# === consider_reporting_beta_sheet_that_were_found ========================================================================= #.

# === consider_reporting_how_many_chains_are_in_this_structure.

# === consider_reporting_the_aminoacid_sequence.

# === consider_reporting_the_keywords.

# === consider_reporting_the_number_of_individual_aminoacids ========================================================================= #.

# === consider_reporting_the_number_of_residues ========================================================================= #.

# === convert_this_alphabet_character_to_number.

# === header? ========================================================================= #.

# === initialize ========================================================================= #.

# === input_sequence?.

# === keywords? ========================================================================= #.

# === main_file? ========================================================================= #.

# === max_distance? ========================================================================= #.

# === menu (menu tag) ========================================================================= #.

# === n_alpha_helices? ========================================================================= #.

# === n_aminoacids? ========================================================================= #.

# === n_atoms?.

# === name_of_the_species? ========================================================================= #.

# === organism_common? ========================================================================= #.

# === process_each_pdb_file.

# === readlines_from_this_file ========================================================================= #.

# === report_extra_information_about_the_species_at_hand ========================================================================= #.

# === report_header.

# === report_n_atoms ========================================================================= #.

# === reset (reset tag) ========================================================================= #.

# === reset_internal_variables ========================================================================= #.

# === return_all_ATOM_entries ========================================================================= #.

# === return_short_filename ========================================================================= #.

# === run (run tag) ========================================================================= #.

# === set_body.

# === set_header.

# === set_header_title_and_body.

# === set_keywords ========================================================================= #.

# === set_pdb_files.

# === set_this_file ========================================================================= #.

# === silently_determine_the_aminoacid_sequence.

# === string? ========================================================================= #.

# === taxid? ========================================================================= #.

# === taxid_of_the_species? ========================================================================= #.

# === title? ========================================================================= #.

# === title? ========================================================================= #.

# === try_to_determine_the_alpha_helices_in_this_protein ========================================================================= #.

# === try_to_determine_the_beta_sheets_in_this_protein.

# === try_to_determine_the_max_distance_between_the_atoms_in_this_protein? ========================================================================= #.

# === try_to_determine_the_taxid_from_this_input.

# === try_to_report_the_organism_at_hand.

Methods inherited from CommandlineApplication

Methods included from BaseModule

Methods included from CommandlineArguments

Methods included from ColoursForBase

Methods inherited from Base

Methods included from InternalHashModule

Methods included from InferTheNamespaceModule

Constructor Details

#initialize(i = DEFAULT_PDB_FILE, run_already = true) ⇒ ParsePdbFile

#

initialize

#

Instance Method Details

#aminoacid_sequence? ⇒ Boolean

#

#initialize(i = DEFAULT_PDB_FILE, run_already = true) ⇒ `ParsePdbFile`

#aminoacid_sequence? ⇒ `Boolean`

#analyze_the_dataset(body = @body) ⇒ `Object`

#body? ⇒ `Boolean`

#calculate_the_centroid_position ⇒ `Object` Also known as: calculate_centroid

#calculate_the_distance_between_two_points(p1, p2) ⇒ `Object`

#check_whether_this_pdb_sequence_contains_dna ⇒ `Object`

#consider_creating_a_fasta_file ⇒ `Object`

#consider_reporting_alpha_helices_that_were_found(i = @alpha_helices) ⇒ `Object`

#consider_reporting_beta_sheet_that_were_found(i = @beta_sheets) ⇒ `Object`

#consider_reporting_how_many_chains_are_in_this_structure ⇒ `Object`

#consider_reporting_the_aminoacid_sequence ⇒ `Object`

#consider_reporting_the_keywords(keywords = keywords? ) ⇒ `Object`

#consider_reporting_the_number_of_individual_aminoacids ⇒ `Object`

#consider_reporting_the_number_of_residues ⇒ `Object`

#convert_this_alphabet_character_to_number(i) ⇒ `Object`

#header? ⇒ `Boolean` Also known as: header

#input_sequence? ⇒ `Boolean`

#keywords? ⇒ `Boolean` Also known as: keywords