Class: Bioroebe::DownloadFasta
- Inherits:
-
CommandlineApplication
- Object
- Base
- CommandlineApplication
- Bioroebe::DownloadFasta
- Defined in:
- lib/bioroebe/fasta_and_fastq/download_fasta.rb
Overview
Bioroebe::DownloadFasta
Constant Summary collapse
- BASE_URL =
#
BASE_URL
#
'https://www.ncbi.nlm.nih.gov'
- DEFAULT_URL_TO_USE =
#
DEFAULT_URL_TO_USE
Here you can designate a default file to use.
This file should contain URLs to a target nucleotide sequence.
The format is just one URL per line, then a newline.
#
'/FASTA_FILE_URL.md'
Constants inherited from CommandlineApplication
CommandlineApplication::OLD_VERBOSE_VALUE
Constants included from ColoursForBase
ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited from Base
Instance Method Summary collapse
-
#download_remote_file(url) ⇒ Object
# === download_remote_file.
-
#enter_log_directory(be_verbose = true) ⇒ Object
# === enter_log_directory ========================================================================= #.
-
#file_is_stored_here? ⇒ Boolean
(also: #location?)
# === file_is_stored_here? ========================================================================= #.
-
#initialize(i = nil, run_already = true) ⇒ DownloadFasta
constructor
# === initialize ========================================================================= #.
-
#rename_to_fasta(i) ⇒ Object
# === rename_to_fasta ========================================================================= #.
-
#report_where_the_file_is_stored_at(i = @file_is_stored_here) ⇒ Object
# === report_where_the_file_is_stored_at ========================================================================= #.
-
#reset ⇒ Object
# === reset (reset tag) ========================================================================= #.
-
#run ⇒ Object
# === run (run tag) ========================================================================= #.
Methods inherited from CommandlineApplication
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
Methods included from BaseModule
#absolute_path, #default_file_read, #file_readlines
Methods included from CommandlineArguments
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
Methods included from ColoursForBase
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
Methods included from InternalHashModule
#internal_hash?, #reset_the_internal_hash
Methods included from InferTheNamespaceModule
#infer_the_namespace, #namespace?
Constructor Details
#initialize(i = nil, run_already = true) ⇒ DownloadFasta
#
initialize
#
45 46 47 48 49 50 51 52 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 45 def initialize( i = nil, run_already = true ) reset set_commandline_arguments(i) run if run_already end |
Instance Method Details
#download_remote_file(url) ⇒ Object
#
download_remote_file
The URL should be in the form of a NM_ file.
We will then find the proper fasta entry from that.
#
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 114 def download_remote_file(url) if url.is_a? Array url.each {|entry| download_remote_file(entry) } else url = url.dup if url.frozen? url.strip! # We don't want trailing newlines or ' ' characters. url.delete!("\n") # ===================================================================== # # We will have to read the URL for the proper href="" entry. # ===================================================================== # regex = /href="(.+\?report=fasta)"/ # http://rubular.com/r/pGYEMD2VCy # ===================================================================== # # Numbers are easier to handle, so we can bypass most of the next # logic. # ===================================================================== # if url =~ /^\d+$/ or url.start_with?('NC_') or # If only numbers or start with a NC_ value. url.include?('&id=XP_') or url.include?('&id=NP_') or url.include?('&id=YP_') or url.include?('&id=NM_') or url.include?('&id=WP_') match = url else opne "#{rev}We will use the URL `#{simp(url)}#{rev}`" remote_file_content = ''.dup # Our variable. # =================================================================== # # We store the content of the remote page in the variable called # remote_file_content, a String. # =================================================================== # begin URI.open(url) {|file| file.each_line {|line| remote_file_content << line } } if url.include?('ncbi') # =============================================================== # # Determine the regex we will use to grab more information. # Right now we look for two entries: fasta or genbank entry. # =============================================================== # regex = /(\d{1,18}\.?\d{0,5})\?report=(fasta|genbank)/ # See https://rubular.com/r/hiOp9ZPBOfDGJq # =============================================================== # # Match it against the page-content. # =============================================================== # remote_file_content =~ regex match = $1.to_s.dup else match = remote_file_content.split("\n").first.to_s.delete('>').strip if match.include? '|' # sp|Q9CR68 match = match.split('|')[1] end end rescue Errno::ENOENT => error opne tomato('We encountered an error, which we will feedback next.') pp error match = url end end # ===================================================================== # # Ok, next we have the NCBI Url. We still have to extract the # relevant fasta content from that. No longer needed. # ===================================================================== # # ncbi_url = BASE_URL+match if url.include? 'ncbi' # =================================================================== # # The url may look like this: # # https://www.ncbi.nlm.nih.gov/protein/NP_001065289.1?report=fasta # # =================================================================== # _ = File.basename(match) @file_is_stored_here = ::Bioroebe::Ncbi.efetch_by_url(_) else @file_is_stored_here = log_dir?+match.to_s+'.fasta' write_what_into(remote_file_content, @file_is_stored_here) end report_where_the_file_is_stored_at(@file_is_stored_here) return @file_is_stored_here end end |
#enter_log_directory(be_verbose = true) ⇒ Object
#
enter_log_directory
#
65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 65 def enter_log_directory( be_verbose = true ) _ = log_dir? if !(return_pwd == _) if be_verbose opne "#{rev}Now changing directory to `#{sdir(_)}#{rev}`." end unless File.exist? _ mkdir(_) end cd _ end end |
#file_is_stored_here? ⇒ Boolean Also known as: location?
#
file_is_stored_here?
#
94 95 96 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 94 def file_is_stored_here? @file_is_stored_here end |
#rename_to_fasta(i) ⇒ Object
#
rename_to_fasta
#
101 102 103 104 105 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 101 def rename_to_fasta(i) @file = rds(File.dirname(i)+'/'+File.basename(i)+'.fa') opnerev "Next renaming `#{sfile(i)}` to `#{sfile(@file)}`." mv(i, @file) end |
#report_where_the_file_is_stored_at(i = @file_is_stored_here) ⇒ Object
#
report_where_the_file_is_stored_at
#
83 84 85 86 87 88 89 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 83 def report_where_the_file_is_stored_at( i = @file_is_stored_here ) if i and File.exist?(i) opnerev "The file is now stored at `#{sfile(i)}#{rev}`." end end |
#reset ⇒ Object
#
reset (reset tag)
#
57 58 59 60 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 57 def reset super() infer_the_namespace end |
#run ⇒ Object
#
run (run tag)
#
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/bioroebe/fasta_and_fastq/download_fasta.rb', line 198 def run enter_log_directory if commandline_arguments?.empty? # ===================================================================== # # Assign a default URL to use in the event that the user did not # provide a valid URL for us to download & fetch. # ===================================================================== # commandline_arguments? << DEFAULT_URL_TO_USE end commandline_arguments?.each {|this_file| this_file = this_file.to_s # To avoid follow-up problems. if File.exist? this_file # This here is mostly done for testing-purposes. this_file = File.readlines(this_file).reject {|entry| entry.strip.empty? } unless this_file.first.include? '/' opnerev 'Now working on '+this_file.size.to_s+' entries.' this_file.map! {|entry| entry = entry.dup if entry.frozen? entry.delete!("\n") entry = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id='+ entry.to_s+'&rettype=fasta&retmode=text' entry.strip } end end download_remote_file(this_file) } end |