Class: Bioroebe::Ncbi

Inherits:
Base
  • Object
show all
Defined in:
lib/bioroebe/ncbi/ncbi.rb,
lib/bioroebe/ncbi/efetch.rb

Overview

Bioroebe::Ncbi

Constant Summary collapse

NCBI_URL =
#

NCBI_URL

#
'https://www.ncbi.nlm.nih.gov/gene/?term='
NAMESPACE =
#

NAMESPACE

#
inspect

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = nil, run_already = true) ⇒ Ncbi

#

initialize

#


34
35
36
37
38
39
40
41
# File 'lib/bioroebe/ncbi/ncbi.rb', line 34

def initialize(
    i           = nil,
    run_already = true
  )
  reset
  set_input(i)
  run if run_already
end

Class Method Details

.cd(i) ⇒ Object

#

Ncbi.cd

#


243
244
245
# File 'lib/bioroebe/ncbi/efetch.rb', line 243

def self.cd(i)
  ::Bioroebe.cd(i)
end

.e(i = '') ⇒ Object

#

Bioroebe::Ncbi.e

#


91
92
93
# File 'lib/bioroebe/ncbi/efetch.rb', line 91

def self.e(i = '')
  puts i
end

.efetch(accession_number = 'JN556047', email_to_use = '[email protected]') ⇒ Object

#

Bioroebe::Ncbi.efetch

Allows you to obtain a query. Currently this makes use of the “bio” gem.

Usage examples:

result = Bioroebe::Ncbi.efetch 'NM_007315'
#


66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/bioroebe/ncbi/efetch.rb', line 66

def self.efetch(
    accession_number = 'JN556047',
    email_to_use     = '[email protected]'
  )
  begin
    require 'bio' # For now, until we get rid of the Bio component.
  rescue LoadError; end
  # ======================================================================= #
  # Set any email next.
  # ======================================================================= #
  Bio::NCBI.default_email = email_to_use
  gb = Bio::NCBI::REST::EFetch.nucleotide(accession_number)
  return gb
end

.efetch_by_url(target_id = :default) ⇒ Object

#

Bioroebe::Ncbi.efetch_by_url

This method will be much simpler than the method Bioroebe::Ncbi.efetch; it will just use an URl such as:

http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=189458859&rettype=fasta&retmode=text

or

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=NP_000092.2&rettype=fasta&retmode=text

That way we don’t even have to parse anything! Just provide it the target ID.

If you want to obtain the CDS, or a subset, you can do this:

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=189458859&rettype=fasta&retmode=text&from=389&to=2641
/nuccore/NM_007315.3?&;feature=CDS
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NM_007315.3&rettype=fasta&retmode=text&feature=CDS
#


117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/bioroebe/ncbi/efetch.rb', line 117

def self.efetch_by_url(
    target_id = :default
  )
  if target_id.is_a? Array
    target_id = target_id.first
  end
  case target_id
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    target_id = '189458859'
  end
  if target_id.frozen?
    target_id = target_id.dup
  end
  # ======================================================================= #
  # === '?' is an unwanted character
  # ======================================================================= #
  #if target_id.include? '?'
  #  target_id.delete!('?')
  # end
  # ======================================================================= #
  # Next we will build up our desired URL. Since the user can also provide
  # the full remote URL, we will only add what is necessary here.
  # ======================================================================= #
  if target_id.start_with? 'http'
    use_this_url = target_id.dup
  elsif target_id.to_s.empty?
    opne 'No target id was passed. Please provide an ID.'
    exit
  else
    target_id = target_id.to_s
    use_this_database_type = 'nuccore' # This is for a nucleotide sequence.
    if target_id.start_with?('NP_') or
       target_id.start_with?('XP_') or # Assume a protein sequence in this case.
       target_id.start_with?('NP_') or
       target_id.start_with?('YP_') or
       target_id.start_with?('NM_')
      use_this_database_type = 'protein'
      # =================================================================== #
      # Example for the above:
      #
      #   https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=NP_000092.2&rettype=fasta&retmode=text
      #
      # =================================================================== #
    end
    target_id = target_id.dup
    if target_id.start_with? 'efetch.fcgi'
      target_id.prepend('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/')
    end
    if target_id.include?('entrez/eutils/efetch.fcgi?db=') or
       target_id.include?('&rettype=fasta&retmode=text')
      use_this_url = target_id.dup
    else
      use_this_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db="\
                     "#{use_this_database_type}&id="\
                     "#{target_id}"\
                     "&rettype=fasta&retmode=text"
    end
  end
  opnn; ::Bioroebe.erev 'We will use the following url: '
  opne "  #{::Colours.simp(use_this_url)}"
  # ======================================================================= #
  # Go to our log-directory.
  # ======================================================================= #
  cd ::Bioroebe.log_dir?
  # ======================================================================= #
  # Next, save this download into a local file.
  # ======================================================================= #
  _ = target_id.dup # Work on a copy of target_id
  if _.end_with? 'report=fasta'
    _.sub!(/report=fasta$/,'')
  end
  save_into_this_file = File.basename(_)+'.fasta'
  if save_into_this_file.include?('&id=')
    save_into_this_file = save_into_this_file.scan(/&id=(.+)?&/).flatten.first
    if save_into_this_file.include? '&'
      save_into_this_file = save_into_this_file.split('&').first.to_s+'.fasta'
    end
  end
  save_into_this_file = ::Bioroebe.rds(::Bioroebe.log_dir?+save_into_this_file).dup
  save_into_this_file.tr('?','_').tr('&','_').tr('=','_')
  opnn; ::Bioroebe.erev 'We will save into the file `'+
        ::Colours.sfile(save_into_this_file)+'`.'
  begin # We have to rescue OpenURI::HTTPError errors.
    File.write(save_into_this_file, URI.open(use_this_url).read)
    # ===================================================================== #
    # Next, we will rename this file to a "better" name.
    #
    # A Fasta identifier has several entries separated by '|'.
    #
    # First, we grab the identifier:
    # ===================================================================== #
    new_filename = ::Bioroebe.return_new_filename_based_on_fasta_identifier(
      save_into_this_file
    )
    opnn; ::Bioroebe.erev 'Next renaming `'+::Colours.sfile(save_into_this_file)+
                          ::Bioroebe.rev+'` to'
    opnn; ::Bioroebe.erev "  `#{::Colours.sfile(new_filename)}`."
    ::Bioroebe.mv(save_into_this_file, new_filename)
    _ = ::Bioroebe.fasta_directory?
    unless File.directory? _
      ::Bioroebe.mkdir_p(_)
    end
    # ===================================================================== #
    # Move it into the fasta/ subdirectory, to keep things cleaner.
    # ===================================================================== #
    if File.exist? _
      even_newer_filename = _+File.basename(new_filename) 
      ::Bioroebe.mv(new_filename, even_newer_filename)
      opnn; ::Bioroebe.erev "Moving into `#{::Colours.sfile(even_newer_filename)}`."
      new_filename = even_newer_filename
    end
    return new_filename # Return the new location here.
  rescue OpenURI::HTTPError => error
    ::Bioroebe.erev 'We could not download from this url: '+
                    ::Colours.simp(use_this_url)
    ::Bioroebe.erev 'The error was:'
    pp error
  end
end

.opnnObject

#

Bioroebe::Ncbi.opnn

#


84
85
86
# File 'lib/bioroebe/ncbi/efetch.rb', line 84

def self.opnn
  ::Opn.opn(namespace: NAMESPACE)
end

Instance Method Details

#input?Boolean

#

input?

#

Returns:

  • (Boolean)


78
79
80
# File 'lib/bioroebe/ncbi/ncbi.rb', line 78

def input?
  @input
end

#main_url?Boolean

#

main_url?

#

Returns:

  • (Boolean)


53
54
55
# File 'lib/bioroebe/ncbi/ncbi.rb', line 53

def main_url?
  NCBI_URL
end

#resetObject

#

reset (reset tag)

#


46
47
48
# File 'lib/bioroebe/ncbi/ncbi.rb', line 46

def reset
  super()
end

#runObject

#

run (run tag)

#


85
86
87
# File 'lib/bioroebe/ncbi/ncbi.rb', line 85

def run
  open_in_browser(input?)
end

#set_input(i = 'STAT3') ⇒ Object

#

set_input

#


60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/bioroebe/ncbi/ncbi.rb', line 60

def set_input(
    i = 'STAT3'
  )
  i = i.join(' ').strip if i.is_a? Array
  i = i.to_s.dup
  i.prepend main_url?
  # ======================================================================= #
  # Next, add quotes if we have a ' ' token.
  # ======================================================================= #
  if i.include? ' '
    i = '"'+i.strip+'"'
  end
  @input = i
end