Class: Bioroebe::ParsePdbFile

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb

Overview

Bioroebe::ParsePdbFile

Constant Summary collapse

DEFAULT_PDB_FILE =
#

DEFAULT_PDB_FILE

#
"#{::Bioroebe.log_directory?}test.pdb"

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = DEFAULT_PDB_FILE, run_already = true) ⇒ ParsePdbFile

#

initialize

#


125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 125

def initialize(
    i           = DEFAULT_PDB_FILE,
    run_already = true
  )
  reset
  unless i.is_a? Array
    i = [i].flatten.compact
  end
  set_commandline_arguments(
    return_entries_with_two_leading_hyphens_from(i)
  )
  set_pdb_files(
    return_entries_without_two_leading_hyphens(i)
  )
  # ======================================================================= #
  # === Handle blocks
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :be_silent
    #
    # Invocation example for this entry point:
    #
    #   Bioroebe::ParsePdbFile.new(ARGV) { :be_silent }
    #
    # ===================================================================== #
    when :be_silent
      set_be_silent
    end
  end
  case run_already
  when :do_not_run_yet
    run_already = false
  end
  run if run_already
end

Instance Method Details

#aminoacid_sequence?Boolean

#

aminoacid_sequence?

#

Returns:

  • (Boolean)


523
524
525
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 523

def aminoacid_sequence?
  @aminoacid_sequence
end

#analyze_the_dataset(body = @body) ⇒ Object

#

analyze_the_dataset

This method is the “powerhorse” of this class.

#


927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 927

def analyze_the_dataset(
    body = @body
  )
  if @does_the_file_exist
    report_header
    try_to_report_the_organism_at_hand(body)
    report_n_atoms
    check_whether_this_pdb_sequence_contains_dna
    silently_determine_the_aminoacid_sequence(body)
    consider_reporting_the_aminoacid_sequence
    consider_reporting_the_number_of_individual_aminoacids
    consider_reporting_the_number_of_residues
    # ===================================================================== #
    # Try to obtain the taxid.
    # ===================================================================== #
    try_to_determine_the_taxid_from_this_input(body)
    try_to_determine_the_alpha_helices_in_this_protein(body)
    consider_reporting_alpha_helices_that_were_found
    try_to_determine_the_beta_sheets_in_this_protein(body)
    try_to_determine_the_max_distance_between_the_atoms_in_this_protein(body)
    consider_reporting_beta_sheet_that_were_found
    consider_reporting_how_many_chains_are_in_this_structure
    consider_reporting_the_keywords
  end
end

#body?Boolean

#

body?

#

Returns:

  • (Boolean)


313
314
315
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 313

def body?
  @body
end

#calculate_the_centroid_positionObject Also known as: calculate_centroid

#

calculate_the_centroid_position

This method will calculate the centroid aka the “average position of the atoms” in that .pdb file. Currently this will only assume that each atom is the same, but in reality we should also include the weight of the atom at hand - this is currently not implemented via this method, though.

If this is ever improved, we need to include the weight of the corresponding atom as well.

#


890
891
892
893
894
895
896
897
898
899
900
901
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 890

def calculate_the_centroid_position
  n_atoms = n_atoms?.to_i
  if n_atoms > 0
    x_average = @x_coordinates.sum / n_atoms.to_f
    y_average = @y_coordinates.sum / n_atoms.to_f
    z_average = @z_coordinates.sum / n_atoms.to_f
  else
    x_average = y_average = z_average = 0
  end
  array = [x_average, y_average, z_average]
  return array
end

#calculate_the_distance_between_two_points(p1, p2) ⇒ Object

#

calculate_the_distance_between_two_points

Pass in two arrays to this method.

#


532
533
534
535
536
537
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 532

def calculate_the_distance_between_two_points(p1, p2)
  a = (p2[0] - p1[0]) ** 2
  b = (p2[1] - p1[1]) ** 2
  c = (p2[2] - p1[2]) ** 2
  return Math.sqrt(a+b+c)
end

#check_whether_this_pdb_sequence_contains_dnaObject

#

check_whether_this_pdb_sequence_contains_dna

#


542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 542

def check_whether_this_pdb_sequence_contains_dna
  _ = input_sequence?
  if _.include? 'MOLECULE: DNA'
    erev 'This protein sequence contains at the least one DNA strand.'
    sequence = _.scan(/ MOLECULE: DNA(.+)$/).flatten
    if sequence
      sequence = sequence.first.to_s.strip if sequence.respond_to? :first
      # =================================================================== #
      # This may look like this:
      #
      #   "(5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*CP*GP*CP*G)-3');"
      #
      # =================================================================== #
      sequence = sequence.tr('-','').delete("'D(P*);53")
      result = ('This DNA sequence is '+colourize_dna(sequence)+rev+'.').dup
      # =================================================================== #
      # Check whether it is a palindrome.
      # =================================================================== #
      if is_this_sequence_a_palindrome? sequence
        result << rev+' It is a palindrome.'
      else
        result << rev+' It is NOT a palindrome.'
      end
      erev result
    end
  end
end

#consider_creating_a_fasta_fileObject

#

consider_creating_a_fasta_file

#


793
794
795
796
797
798
799
800
801
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 793

def consider_creating_a_fasta_file
  if @do_create_a_fasta_file
    what = aminoacid_sequence?
    into = return_short_filename.sub(/\.pdb$/,'')+'.fasta'
    into = File.absolute_path(into)
    erev 'Storing into the file `'+sfile(into)+rev+'`.'
    write_what_into(what, into)
  end
end

#consider_reporting_alpha_helices_that_were_found(i = @alpha_helices) ⇒ Object

#

consider_reporting_alpha_helices_that_were_found

#


449
450
451
452
453
454
455
456
457
458
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 449

def consider_reporting_alpha_helices_that_were_found(
    i = @alpha_helices
  )
  unless i.empty?
    if be_verbose?
      erev "This protein contains "\
           "#{slateblue(i.size.to_s)}#{rev} alpha-helices."
    end
  end
end

#consider_reporting_beta_sheet_that_were_found(i = @beta_sheets) ⇒ Object

#

consider_reporting_beta_sheet_that_were_found

#


463
464
465
466
467
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 463

def consider_reporting_beta_sheet_that_were_found(i = @beta_sheets)
  unless i.empty?
    e "This protein contains #{slateblue(i.size.to_s)}#{rev} beta-sheets."
  end
end

#consider_reporting_how_many_chains_are_in_this_structureObject

#

consider_reporting_how_many_chains_are_in_this_structure

A better way to report how many chains are in a structure is via:

COMPND   3 CHAIN: A, B;
#


598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 598

def consider_reporting_how_many_chains_are_in_this_structure
  # _ = return_all_ATOM_entries.max {|line|
  #   line.split(' ')[1].to_i
  # }
  # # ===================================================================== #
  # # The entry may look like this:
  # #
  # #   "ATOM  52643  N   LYS N 393      27.402 -53.192  44.13"
  # #
  # # ===================================================================== #
  # _ = convert_this_alphabet_character_to_number(
  #   _.split(' ')[2]
  # )
  selection = @body.select {|entry|
    entry.include? 'COMPND  ' and entry.include? 'CHAIN: '
  }
  # ======================================================================= #
  # Examples:
  #
  #   ["COMPND   3 CHAIN: A, B, C, D, E, F, G, H, I, J, K, L, M, N;                     \n",
  #    "COMPND   8 CHAIN: O, P, Q, R, S, T, U;                                          \n"]
  #
  # ======================================================================= #
  _ = selection.join(' ')
  use_this_regex = /([A-Z])(,|;)/
  scanned = _.scan(use_this_regex).map {|inner_array| inner_array[0].ord }
  unless scanned.empty?
    max = scanned.max - 64 # -64 because A is 65 and it is the start.
    if be_verbose?
      erev "There are #{steelblue(max)}#{rev} chains in this molecule."
    end
  end
end

#consider_reporting_the_aminoacid_sequenceObject

#

consider_reporting_the_aminoacid_sequence

This method will typically display the aminoacid sequence at hand.

#


1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 1013

def consider_reporting_the_aminoacid_sequence
  if @report_the_aminoacid_sequence and be_verbose?
    _ = @aminoacid_sequence
    erev 'The aminoacid sequence ('+
      steelblue(_.size.to_s)+rev+
      ' aminoacids) is:'
    # erev '  '+colourize_this_aminoacid_sequence(_) # <- We could colourize it.
    erev "  #{steelblue(_)}"
  end
end

#consider_reporting_the_keywords(keywords = keywords? ) ⇒ Object

#

consider_reporting_the_keywords

This method will report the discovered keyword entries in the given .pdb file at hand (if this .pdb file contains these keywords entries that is).

#


960
961
962
963
964
965
966
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 960

def consider_reporting_the_keywords(
    keywords = keywords?
  )
  if keywords
    erev "The keywords are: #{steelblue(keywords)}"
  end
end

#consider_reporting_the_number_of_individual_aminoacidsObject

#

consider_reporting_the_number_of_individual_aminoacids

#


980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 980

def consider_reporting_the_number_of_individual_aminoacids
  if @report_the_aminoacid_sequence and be_verbose?
    hash = @aminoacid_sequence.each_char.tally
    hash.each_pair {|aminoacid_one_letter, n_occurrences|
      erev 'Total no:of '+
            rev+
            lightgreen(::Bioroebe.one_to_three(aminoacid_one_letter).upcase)+
            rev+
            ' - '+
            steelblue(
              n_occurrences.to_s.rjust(2,' ')
            )
    }
  end
end

#consider_reporting_the_number_of_residuesObject

#

consider_reporting_the_number_of_residues

#


971
972
973
974
975
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 971

def consider_reporting_the_number_of_residues
  if @report_the_aminoacid_sequence and be_verbose?
    erev 'Total no:of residues - '+steelblue(@aminoacid_sequence.size.to_s)
  end
end

#convert_this_alphabet_character_to_number(i) ⇒ Object

#

convert_this_alphabet_character_to_number

The input of “A” would mean “1”.

#


637
638
639
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 637

def convert_this_alphabet_character_to_number(i)
  (i.ord - 64)
end

#header?Boolean Also known as: header

#

header?

#

Returns:

  • (Boolean)


394
395
396
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 394

def header?
  @header
end

#input_sequence?Boolean

#

input_sequence?

This will return a String.

#

Returns:

  • (Boolean)


322
323
324
325
326
327
328
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 322

def input_sequence?
  _ = @body
  if _.is_a? Array
    _ = _.join(N)
  end
  _
end

#keywords?Boolean Also known as: keywords

#

keywords?

#

Returns:

  • (Boolean)


248
249
250
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 248

def keywords?
  @keywords
end

#main_file?Boolean Also known as: return_filename

#

main_file?

#

Returns:

  • (Boolean)


813
814
815
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 813

def main_file?
  @this_file
end

#max_distance?(array = @body) ⇒ Boolean

#

max_distance?

#

Returns:

  • (Boolean)


833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 833

def max_distance?(
    array = @body
  )
  return if array.nil? or array.empty?
  # ======================================================================= #
  # ["ATOM    1  N   MET A  41       1.177 -10.035  -3.493  1.00  2.04   N",
  #  "ATOM    2  CA  MET A  41       0.292  -8.839  -3.377  1.00  1.55   C"]
  # ======================================================================= #
  max_value = 0
  modified_array = array.map {|line|
    splitted = line.split(' ')
    x = splitted[6].to_f
    y = splitted[7].to_f
    z = splitted[8].to_f
    [x,y,z]
  }
  # ======================================================================= #
  # [1.177, -10.035, -3.493]
  # [0.292, -8.839, -3.377]
  # ======================================================================= #
  modified_array.each_with_index {|entry, index| index += 1
    if index == modified_array.size
      index = 0
    end
    array1 = entry
    array2 = modified_array[index]
    result = calculate_the_distance_between_two_points(array1, array2)
    if result > max_value
      max_value = result
    end
  }
  return max_value
end
#

menu (menu tag)

#


762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 762

def menu(
    i = @commandline_arguments
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === parsedb 2HI4.pdb --no-colours
    # ===================================================================== #
    when /^-?-?no(-|_)?colou?rs$/i
      disable_colours
    # ===================================================================== #
    # === parsedb 2HI4.pdb --create-fasta-file
    # ===================================================================== #
    when /^-?-?create(-|_)?fasta(-|_)?file$/i
      @do_create_a_fasta_file = true
    end
  end
end

#n_alpha_helices?Boolean

#

n_alpha_helices?

#

Returns:

  • (Boolean)


442
443
444
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 442

def n_alpha_helices?
  @alpha_helices.size
end

#n_aminoacids?Boolean

#

n_aminoacids?

#

Returns:

  • (Boolean)


472
473
474
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 472

def n_aminoacids?
  @aminoacid_sequence.size if @aminoacid_sequence
end

#n_atoms?(i = data?) ) ⇒ Boolean Also known as: n_atom_entries?

#

n_atoms?

Returns how many ATOM entries we have in this .pdb file.

#

Returns:

  • (Boolean)


380
381
382
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 380

def n_atoms?(i = data?)
  i.select {|entry| entry.start_with? 'ATOM' }.size if i
end

#name_of_the_species?Boolean

#

name_of_the_species?

#

Returns:

  • (Boolean)


686
687
688
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 686

def name_of_the_species?
  @name_of_the_species
end

#organism_common?Boolean

#

organism_common?

#

Returns:

  • (Boolean)


717
718
719
720
721
722
723
724
725
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 717

def organism_common?
  if @body
    _ = @body.join(N).scan(/ORGANISM_COMMON: (.+);/).flatten.uniq
    if _ and _.is_a?(Array)
      _ = _.first.to_s
    end
    return _
  end
end

#process_each_pdb_fileObject

#

process_each_pdb_file

This method is the main powerhorse method of this class.

#


732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 732

def process_each_pdb_file
  @pdb_files.each {|file|
    if File.exist?(file) and File.file?(file)
      reset_internal_variables # Reset the internal variables here.
      @does_the_file_exist = true
      set_this_file(file)
      dataset = readlines_from_this_file(file)
      if dataset.any? {|line| line.include? 'KEYWDS' }
        set_keywords(dataset)
      end
      set_header_title_and_body(dataset)
      analyze_the_dataset
      consider_creating_a_fasta_file
    else
      opnerev "No file at `#{sfile(file)}#{rev}` could be found."
      @does_the_file_exist = false
    end
  }
end

#readlines_from_this_file(file) ⇒ Object

#

readlines_from_this_file

#


412
413
414
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 412

def readlines_from_this_file(file)
  File.readlines(file)
end

#report_extra_information_about_the_species_at_handObject

#

report_extra_information_about_the_species_at_hand

#


693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 693

def report_extra_information_about_the_species_at_hand
  result = ''.dup
  result << 'The name of the organism (Entry: '+
             steelblue('ORGANISM_SCIENTIFIC')+
             rev+
             ') is'+N
  result << '`'+orange(name_of_the_species?)+rev+'`.'
  if @taxid_of_the_species
    organism_common = organism_common?.to_s
    if organism_common and !organism_common.empty?
      result << rev+' (Taxid: '+
                steelblue(@taxid_of_the_species.to_s)+
                rev+'; '+
                seagreen(
                  organism_common
                )+rev+')'
    end
  end
  erev result
end

#report_header(of_this_file = @this_file) ⇒ Object

#

report_header

This will also report the filename.

#


908
909
910
911
912
913
914
915
916
917
918
919
920
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 908

def report_header(
    of_this_file = @this_file
  )
  if be_verbose?
    _ = return_short_filename
    e orange(header?)+rev+
      ' (File: '+
      steelblue(_)+
      rev+'; Filesize: '+
      (File.size(of_this_file) / 1024).to_s+
      'kb'+')'+rev
  end
end

#report_n_atomsObject

#

report_n_atoms

#


401
402
403
404
405
406
407
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 401

def report_n_atoms
  if be_verbose?
    e "#{sfancy(n_atoms?.to_s)}#{rev} ATOM entries were found "\
      "being part of the file at"
    e "`#{sfile(main_file?)}#{rev}`."
  end
end

#resetObject

#

reset (reset tag)

#


167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 167

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @do_create_a_fasta_file
  #
  # This variable has to exist outside of the method call
  # reset_internal_variables() as it would otherwise
  # ignore every commandline argument passed in by the
  # user.
  # ======================================================================= #
  @do_create_a_fasta_file = false
  # ======================================================================= #
  # === @x_coordinates
  # ======================================================================= #
  @x_coordinates = []
  # ======================================================================= #
  # === @y_coordinates
  # ======================================================================= #
  @y_coordinates = []
  # ======================================================================= #
  # === @z_coordinates
  # ======================================================================= #
  @z_coordinates = []
  # ======================================================================= #
  # All internal variables can be reset through this method.    
  # ======================================================================= #
  reset_internal_variables
end

#reset_internal_variablesObject

#

reset_internal_variables

#


200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 200

def reset_internal_variables
  self.header = ''
  self.title  = ''
  # ======================================================================= #
  # === @body
  # ======================================================================= #
  @body  = nil
  # ======================================================================= #
  # === @taxid
  # ======================================================================= #
  @taxid = nil
  # ======================================================================= #
  # === @n_chains_are_in_this_atom
  # ======================================================================= #
  @n_chains_are_in_this_atom = 0
  # ======================================================================= #
  # === @alpha_helices
  # ======================================================================= #
  @alpha_helices = []
  # ======================================================================= #
  # === @beta_sheets
  # ======================================================================= #
  @beta_sheets   = []
  # ======================================================================= #
  # === @does_the_file_exist
  # ======================================================================= #
  @does_the_file_exist = false
  # ======================================================================= #
  # === @name_of_the_species
  # ======================================================================= #
  @name_of_the_species = nil
  # ======================================================================= #
  # === @taxid_of_the_species
  # ======================================================================= #
  @taxid_of_the_species = nil
  # ======================================================================= #
  # === @report_the_aminoacid_sequence
  # ======================================================================= #
  @report_the_aminoacid_sequence = true
  # ======================================================================= #
  # === @keywords
  # ======================================================================= #
  @keywords = nil
end

#return_all_ATOM_entriesObject

#

return_all_ATOM_entries

#


585
586
587
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 585

def return_all_ATOM_entries
  @body.select {|entry| entry.start_with? 'ATOM  ' }
end

#return_short_filenameObject

#

return_short_filename

#


786
787
788
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 786

def return_short_filename
  File.basename(@this_file)
end

#runObject

#

run (run tag)

#


1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 1027

def run
  menu
  process_each_pdb_file
  if be_verbose?
    erev 'The centered position is at: '+
         steelblue(
           calculate_the_centroid_position.join(', ')
         )
  end
end

#set_body(i) ⇒ Object Also known as: body=

#

set_body

This method keeps track of the main “body” of the .pdb file at hand.

#


335
336
337
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 335

def set_body(i)
  @body = i
end

#set_header(i) ⇒ Object Also known as: header=

#

set_header

The header may have an entry such as:

HEADER    RIBOSOMAL PROTEIN/RNA                   16-APR-10   3IYQ
#


347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 347

def set_header(i)
  if i
    if i.is_a? Array
      i = i.first
      return if i.nil? # Can't work with nil-entries.
    end
    i = i.dup if i.frozen?
    # ===================================================================== #
    # We do a bit sanitizing here.
    # ===================================================================== #
    if i.include?('HEADER')
      i.sub!(/HEADER/,'')
    end
    i.strip!
    if i.include? '   '
      i = i.split('  ').map(&:strip).first
    end
  end
  @header = i
end

#set_header_title_and_body(dataset) ⇒ Object

#

set_header_title_and_body

The input to this method should be an Array.

#


421
422
423
424
425
426
427
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 421

def set_header_title_and_body(dataset)
  set_header(
    dataset.select {|entry| entry.include? 'HEADER' }
  )
  self.title = dataset.select {|entry| entry.include? 'TITLE'  }
  set_body(dataset)
end

#set_keywords(i) ⇒ Object Also known as: keywords=

#

set_keywords

#


999
1000
1001
1002
1003
1004
1005
1006
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 999

def set_keywords(i)
  if i.is_a? Array
    i.flatten!
    selection = i.select {|entry| entry.include? 'KEYWDS' }
    i = selection.first
  end
  @keywords = i.to_s.strip
end

#set_pdb_files(i = DEFAULT_PDB_FILE) ⇒ Object

#

set_pdb_files

We will keep this as an Array.

#


257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 257

def set_pdb_files(
    i = DEFAULT_PDB_FILE
  )
  i = [i] unless i.is_a? Array
  i.map! {|entry|
    entry = entry.to_s.dup # To avoid frozen-Strings.
    case entry
    # ===================================================================== #
    # === :1fat
    # ===================================================================== #
    when ':1fat'
      entry = "#{::Bioroebe.project_base_directory?}data/1fat.pdb"
    end
    # ===================================================================== #
    # The user may input a String such as "1NR6", but may not want to
    # input the longer "1NR6.pdb". In that case, if such a .pdb file
    # exists, we will use that as path instead.
    # ===================================================================== #
    if File.exist?("#{entry}.pdb") and !File.exist?(entry)
      entry << '.pdb'
    end
    File.absolute_path(entry) # We require the full local path to the file at hand.
  }
  @pdb_files = i
end

#set_this_file(i) ⇒ Object

#

set_this_file

#


806
807
808
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 806

def set_this_file(i)
  @this_file = i
end

#silently_determine_the_aminoacid_sequence(i) ⇒ Object

#

silently_determine_the_aminoacid_sequence

This method is probably not quite correct, as it does not take into consideration that there may be a succession of aminoacids.

#


482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 482

def silently_determine_the_aminoacid_sequence(i)
  this_aminoacid_sequence = ''.dup
  selection = i.select {|line| line.start_with?('ATOM   ') }
  last_number_of_aminoacid = 0
  selection.each {|line, index|
    line.strip!
    # ===================================================================== #
    # The line may look like this:
    #
    #   ATOM   69  CG2 THR A   8    23.165  11.137  48.942  1.00 30.40  C
    #
    # Each atom in the coordinate section is identified by a sequential
    # number in the entry file. The entry at position 5, seen below,
    # identifies the aminoacid there.
    # ===================================================================== #
    #     0    1   2    3  4   5     6       7        8     9     10    11
    # ===================================================================== #
    splitted = line.split(' ').map(&:strip).map {|entry| entry.squeeze(' ') }
    @x_coordinates << splitted[6].to_f
    @y_coordinates << splitted[7].to_f
    @z_coordinates << splitted[8].to_f
    this_aminoacid = three_to_one(splitted[3])
    number_of_this_aminoacid = splitted[5].to_i
    # ===================================================================== #
    # Entries may look like this:
    #
    #   ATOM    490  HZ3 LYS A  70     4.674  -0.770  -3.751  1.00  2.07   H
    #   ATOM    491  N   LYS A  71     8.012   0.034   2.745  1.00  0.74   N
    #
    # ===================================================================== #
    if number_of_this_aminoacid > last_number_of_aminoacid
      this_aminoacid_sequence << this_aminoacid
      last_number_of_aminoacid = number_of_this_aminoacid
    end
  }
  @aminoacid_sequence = this_aminoacid_sequence
end

#string?Boolean Also known as: data?

#

string?

#

Returns:

  • (Boolean)


371
372
373
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 371

def string?
  @body
end

#taxid?Boolean

#

taxid?

#

Returns:

  • (Boolean)


306
307
308
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 306

def taxid?
  @taxid
end

#taxid_of_the_species?Boolean

#

taxid_of_the_species?

#

Returns:

  • (Boolean)


755
756
757
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 755

def taxid_of_the_species?
  @taxid_of_the_species
end

#title=(i) ⇒ Object Also known as: set_title

#

title?

#


870
871
872
873
874
875
876
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 870

def title=(i)
  if i.is_a? Array
    i.map!(&:chomp)
    i.map!(&:strip)
  end
  @title = i
end

#title?Boolean Also known as: title

#

title?

#

Returns:

  • (Boolean)


387
388
389
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 387

def title?
  @title
end

#try_to_determine_the_alpha_helices_in_this_protein(i) ⇒ Object

#

try_to_determine_the_alpha_helices_in_this_protein

#


432
433
434
435
436
437
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 432

def try_to_determine_the_alpha_helices_in_this_protein(i)
  if i.is_a? Array
    selection = i.select {|line| line.start_with? 'HELIX  ' }
    @alpha_helices = selection
  end
end

#try_to_determine_the_beta_sheets_in_this_protein(i) ⇒ Object

#

try_to_determine_the_beta_sheets_in_this_protein

Beta-sheets begin with the word ‘SHEET ’.

#


575
576
577
578
579
580
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 575

def try_to_determine_the_beta_sheets_in_this_protein(i)
  if i.is_a? Array
    selection = i.select {|line| line.start_with? 'SHEET  ' }
    @beta_sheets = selection
  end
end

#try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(array = @body) ⇒ Boolean Also known as: try_to_determine_the_max_distance_between_the_atoms_in_this_protein

#

try_to_determine_the_max_distance_between_the_atoms_in_this_protein?

#

Returns:

  • (Boolean)


820
821
822
823
824
825
826
827
828
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 820

def try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(
    array = @body
  )
  max_value = max_distance?(array)
  if be_verbose?
    erev 'The maximum difference between the atoms is '+
         sfancy(max_value.to_s)+rev
  end
end

#try_to_determine_the_taxid_from_this_input(i) ⇒ Object

#

try_to_determine_the_taxid_from_this_input

This method will attempt to determine the taxid entry.

#


288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 288

def try_to_determine_the_taxid_from_this_input(i)
  if i.is_a? Array
    # ===================================================================== #
    # We will try to find entries like this:
    #
    #   SOURCE   3 ORGANISM_TAXID: 300852;
    #
    # ===================================================================== #
    _ = i.select {|line| line.include? 'ORGANISM_TAXID:' }
    unless _.empty?
      @taxid = _.first.strip.delete(';').split(' ').last
    end
  end
end

#try_to_report_the_organism_at_hand(i = @body) ⇒ Object

#

try_to_report_the_organism_at_hand

This method will try to extract the organism’s name.

This entry may look like this:

SOURCE   2 ORGANISM_SCIENTIFIC: SQUALUS ACANTHIAS;
#


651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
# File 'lib/bioroebe/pdb_and_protein_structure/parse_pdb_file.rb', line 651

def try_to_report_the_organism_at_hand(i = @body)
  if i.is_a?(Array) and !i.empty?
    # ===================================================================== #
    # === ORGANISM_SCIENTIFIC
    # ===================================================================== #
    _ = i.select {|line| line.include? 'ORGANISM_SCIENTIFIC:' }
    if _
      first_element = _.first
      if first_element
        first_element.strip!
        @name_of_the_species = first_element.split(':').last.delete(';').strip
      end
    end
    # ===================================================================== #
    # === ORGANISM_TAXID
    #
    # Next try to find out the taxid number of the organism at hand.
    # ===================================================================== #
    _ = i.select {|line| line.include? ' ORGANISM_TAXID: ' }
    if _
      first_element = _.first
      if first_element
        first_element.strip!
        @taxid_of_the_species = first_element.split(':').last.delete(';').strip
      end
    end
    if be_verbose? and @name_of_the_species
      report_extra_information_about_the_species_at_hand
    end
  end
end