Class: Bioroebe::FetchDataFromUniprot

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb

Overview

Bioroebe::FetchDataFromUniprot

Constant Summary collapse

DEFAULT_FASTA_FILE =
#

DEFAULT_FASTA_FILE

#
'B5ZC00'

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = nil, run_already = true) ⇒ FetchDataFromUniprot

#

initialize

The first argument to this class should be the name of the protein at hand. Alternatively it can also be just the full URL - both variants will work fine.

The second argument to this class specifies whether this class will automatically try to rename the FASTA file, after it was downloaded. By default this is enabled, largely because it makes it a bit easier to know what a particular fasta sequence contains.

#


42
43
44
45
46
47
48
49
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 42

def initialize(
    i           = nil,
    run_already = true
  )
  reset
  set_commandline_arguments(i)
  run if run_already
end

Class Method Details

.[](i = ARGV) ⇒ Object

#

Bioroebe::FetchDataFromUniprot[]

#


250
251
252
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 250

def self.[](i = ARGV)
  new(i)
end

Instance Method Details

#do_download_this_file_from_uniprot(_) ⇒ Object

#

do_download_this_file_from_uniprot

#


154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 154

def do_download_this_file_from_uniprot(_)
  _ = _.dup if _.frozen?
  _.strip!
  # ===================================================================== #
  # === Support omission of leading 'http' here:
  # ===================================================================== #
  if _.start_with? 'http'
    remote_URL = _
  else
    remote_URL = "https://www.uniprot.org/uniprot/#{_}"
  end
  remote_URL = remote_URL.dup if remote_URL.frozen?
  # ======================================================================= #
  # === Ensure trailing .fasta suffix
  #
  # Next, we will append a trailing ".fasta" part for uniprot, if
  # the very last part does NOT include a '.' character.
  # ======================================================================= #
  _ = File.basename(remote_URL)
  unless _.include?('.')
    remote_URL << '.fasta' unless remote_URL.end_with? '.fasta'
  end
  # ======================================================================= #
  # The remote_URL is now as it should be, so store it:
  # ======================================================================= #
  set_use_this_remote_URL(remote_URL)
  # ======================================================================= #
  # Store this protein sequence at an appropriate location:
  # ======================================================================= #
  newline
  message "Trying to obtain remote data from "\
          "#{sfancy(remote_URL)} #{rev}next:"
  newline
  local_uniprot_directory = LOCAL_DIRECTORY_FOR_UNIPROT
  store_here = local_uniprot_directory+
               File.basename(remote_URL)
  # ======================================================================= #
  # Next create the directory unless it already exists.
  # ======================================================================= #
  unless File.directory? local_uniprot_directory
    mkdir local_uniprot_directory
  end
  dataset = URI.open(remote_URL).read
  message "The remote dataset will be stored here: "\
          "#{sfile(store_here)}"
  newline
  write_what_into(dataset, store_here)
  # ======================================================================= #
  # Next comes a rename action - this will make the .fasta file more
  # verbose, but I think this may still be helpful, in particular if you
  # have a lot of .fasta files. There is a switch that controls this
  # setting though.
  # ======================================================================= #
  if do_perform_rename_action?
    # ===================================================================== #
    # The initially assumed filename may be like this:
    #
    #   /root/Bioroebe/A2Z669_CSPLT_ORYSI_CASP-like_protein_5A2_OS=Oryza_sativa_subsp_indica_OX=39946_GN=OsI_33147_PE=3_SV=1.fasta
    #
    # ===================================================================== #
    assumed_filename = ::Bioroebe.return_new_filename_based_on_fasta_identifier(store_here)
    new_filename = local_uniprot_directory+
                   File.basename(assumed_filename)
    message "Next renaming `#{sfile(store_here)}#{rev}` to"
    message "`#{sfile(new_filename)}#{rev}`."
    mv(store_here, new_filename)
    store_here = new_filename
  end
  set_stored_local_file(File.absolute_path(store_here))
end

#do_perform_rename_action?Boolean

#

do_perform_rename_action?

#

Returns:

  • (Boolean)


91
92
93
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 91

def do_perform_rename_action?
  @internal_hash[:do_perform_rename_action]
end

#message(i, use_opne = false) ⇒ Object Also known as: msg

#

message

#


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 126

def message(
    i,
    use_opne = false
  )
  case use_opne
  # ======================================================================= #
  # === :use_opnn
  # ======================================================================= #
  when :use_opnn
    use_opne = true
  end
  if use_opne
    opnn
  end
  @internal_hash[:result] << "#{i}\n"
  erev i if be_verbose?
end

#newlineObject

#

newline

#


147
148
149
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 147

def newline
  message("\n")
end

#resetObject

#

reset (reset tag)

#


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 54

def reset
  super()
  # ======================================================================= #
  # === :do_perform_rename_action
  #
  # By default we will rename the downloaded file, hence the true value
  # for this variable here.
  # ======================================================================= #
  @internal_hash[:do_perform_rename_action] = true
  # ======================================================================= #
  # === :result
  # ======================================================================= #
  @internal_hash[:result] = ''.dup
  reset_the_internal_variables
end

#reset_the_internal_variablesObject

#

reset_the_internal_variables

#


73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 73

def reset_the_internal_variables
  # ======================================================================= #
  # === :result
  # ======================================================================= #
  @internal_hash[:result].clear
  # ======================================================================= #
  # === :use_this_remote_URL
  # ======================================================================= #
  @internal_hash[:use_this_remote_URL] = DEFAULT_FASTA_FILE # Default value.
  # ======================================================================= #
  # === :stored_local_file
  # ======================================================================= #
  @internal_hash[:stored_local_file] = nil
end

#result?Boolean Also known as: string?

#

result?

#

Returns:

  • (Boolean)


228
229
230
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 228

def result?
  @internal_hash[:result]
end

#runObject

#

run (run tag)

#


235
236
237
238
239
240
241
242
243
244
245
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 235

def run
  reset_the_internal_variables
  _ = first_argument?
  if _.nil? or _.empty?
    _ = DEFAULT_FASTA_FILE # Use a default value in this case.
    message 'Making use of the default value '+steelblue(_)+
            rev+' as no specific argument was '\
            'passed to this class.', :use_opnn
  end
  do_download_this_file_from_uniprot(_)
end

#set_stored_local_file(i) ⇒ Object

#

set_stored_local_file

#


112
113
114
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 112

def set_stored_local_file(i)
  @internal_hash[:stored_local_file] = i
end

#set_use_this_remote_URL(i) ⇒ Object

#

set_use_this_remote_URL

#


105
106
107
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 105

def set_use_this_remote_URL(i)
  @internal_hash[:use_this_remote_URL] = i
end

#stored_local_file?Boolean Also known as: store_here?

#

stored_local_file?

#

Returns:

  • (Boolean)


119
120
121
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 119

def stored_local_file?
  @internal_hash[:stored_local_file]
end

#use_this_remote_URL?Boolean

#

use_this_remote_URL?

#

Returns:

  • (Boolean)


98
99
100
# File 'lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb', line 98

def use_this_remote_URL?
  @internal_hash[:use_this_remote_URL]
end