Class: Bioroebe::Sequence
Constant Summary
collapse
- SHALL_WE_UPCASE =
#
SHALL_WE_UPCASE
This constant determines whether the given input at hand will be upcased or whether it will not.
Note that the value :do_upcase implies true - so it is equivalent to setting it to true. In my opinion it reads nicer than true or false, so it will be retained as it is.
#
:do_upcase
- REMOVE_INVALID_CHARACTERS =
#
REMOVE_INVALID_CHARACTERS
If the following constant is set to true then invalid characters from the given input will be eliminated.
#
true
Class Method Summary
collapse
Instance Method Summary
collapse
-
#automatic_support_for_nucleotides ⇒ Object
# === automatic_support_for_nucleotides.
-
#description? ⇒ Boolean
(also: #desc?)
# === description?.
-
#index(i) ⇒ Object
# === index ========================================================================= #.
-
#infer_type ⇒ Object
(also: #try_to_infer_the_type)
# === infer_type.
-
#initialize(this_sequence = 'ATCG', &block) ⇒ Sequence
constructor
# === initialize.
-
#is_a_protein? ⇒ Boolean
(also: #is_protein?)
# === is_a_protein? ========================================================================= #.
-
#is_a_protein_now ⇒ Object
# === is_a_protein_now.
-
#is_DNA? ⇒ Boolean
(also: #is_dna?)
# === is_DNA? ========================================================================= #.
-
#is_RNA? ⇒ Boolean
(also: #is_rna?)
# === is_RNA? ========================================================================= #.
-
#map(&block) ⇒ Object
# === map ========================================================================= #.
-
#n_uracil? ⇒ Boolean
# === n_uracil?.
-
#randomize(i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }) ⇒ Object
# === randomize.
-
#remove_invalid_entries_from_the_dna_sequence(i = sequence?) ) ⇒ Object
# === remove_invalid_entries_from_the_dna_sequence ========================================================================= #.
-
#remove_invalid_entries_from_the_dna_sequence!(i = sequence?) ) ⇒ Object
# === remove_invalid_entries_from_the_dna_sequence! ========================================================================= #.
-
#reset ⇒ Object
# === reset (reset tag) ========================================================================= #.
-
#return_string_nucleotides_or_aminoacids(type = type? ) ⇒ Object
(also: #nucleotides_or_aminoacids?)
# === return_string_nucleotides_or_aminoacids.
-
#sanitize_dataset(i = type? ) ⇒ Object
(also: #normalize)
# === sanitize_dataset.
-
#sanitize_rna ⇒ Object
# === sanitize_rna.
-
#save_sequence_to_this_file(into) ⇒ Object
# === save_sequence_to_this_file.
-
#set_description(i = nil) ⇒ Object
(also: #set_desc, #desc=)
# === set_description.
-
#set_dna ⇒ Object
(also: #set_dna_type, #set_DNA_type, #is_DNA_now)
# === set_dna ========================================================================= #.
-
#set_protein ⇒ Object
(also: #set_protein_type)
# === set_protein ========================================================================= #.
-
#set_rna ⇒ Object
(also: #set_rna_type, #convert_to_rna)
# === set_rna.
-
#set_save_file(i = "#{Bioroebe.log_dir?}default_sequence.fasta") ⇒ Object
# === set_save_file.
-
#set_sequence(i, upcase_downcase_or_make_no_modification = shall_we_upcase? ) ⇒ Object
(also: #set_string, #set_input, #set_this_sequence)
# === set_sequence.
-
#set_type(i = :dna) ⇒ Object
(also: #set_alphabet, #set_mode)
# === set_type.
-
#shall_we_upcase? ⇒ Boolean
# === shall_we_upcase? ========================================================================= #.
-
#size? ⇒ Boolean
# === size? ========================================================================= #.
-
#to_genbank ⇒ Object
# === to_genbank.
-
#to_regexp ⇒ Object
(also: #to_regex, #to_re)
# === to_regexp.
-
#type? ⇒ Boolean
(also: #type)
# === type?.
Methods inherited from RawSequence
#+, #<<, #[]=, #calculate_levensthein_distance, #chars?, #complement, #composition?, #count, #delete, #delete!, #downcase, #each_char, #empty?, #find_substring_indices, #first_position=, #freeze, #gsub, #gsub!, #include?, #insert_at_this_position, #prepend, #remove_n_characters_from_the_left_side, #reverse, #reverse!, #reverse_complement, #scan, #set_raw_sequence, #shuffle, #split, #start_with?, #strip, #subseq, #to_s, #to_str, #tr!, #upcase!
Constructor Details
#initialize(this_sequence = 'ATCG', &block) ⇒ Sequence
#
initialize
The first argument given to the constructor (.new()) will become the sequence.
Initialization example:
seq = Bioroebe::Sequence.new('ATTGCCG')
#
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
# File 'lib/bioroebe/sequence/sequence.rb', line 69
def initialize( this_sequence = 'ATCG',
&block
)
reset
_ = this_sequence if _.is_a? Hash
if _.has_key? :file
set_save_file(_.delete(:file))
end
if _.has_key? :desc
set_description(_.delete(:desc))
end
if _.has_key? :alphabet
set_type(_.delete(:alphabet))
elsif _.has_key? :type
set_type(_.delete(:type))
elsif _.has_key? :aminoacid
set_type(_.delete(:aminoacid))
end
if _.has_key? :seq
_ = _.delete :seq
elsif _.has_key? :sequence
_ = _.delete :sequence
end
end
set_sequence(_)
if block_given?
yielded = yield
case yielded
when :is_DNA,
:is_dna
set_DNA_type
when :aminoacid
set_protein_type
end
end
end
|
Class Method Details
.[](i) ⇒ Object
#
Bioroebe::Sequence[]
Invocation example:
sequence = Bioroebe::Sequence['atgggtgggcccc']
#
654
655
656
|
# File 'lib/bioroebe/sequence/sequence.rb', line 654
def self.[](i)
new(i)
end
|
.sequence_from_file(this_file) ⇒ Object
#
Bioroebe::Sequence.sequence_from_file
This method can be used to read in a dataset from a file. The first argument to this method denotes that.
Invocation examples:
x = Bioroebe::Sequence.sequence_from_file('/Depot/Temp/Bioroebe/vector_pBR322.fasta')
x = Bioroebe::Sequence.sequence_from_file('/home/x/DATA/PROGRAMMING_LANGUAGES/ruby/src/bioroebe/lib/bioroebe/data/alu_elements.fasta')
#
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
|
# File 'lib/bioroebe/sequence/sequence.rb', line 629
def self.sequence_from_file(this_file)
if File.exist? this_file
_ = ::Bioroebe::Sequence.new
dataset = File.readlines(this_file).map(&:chomp).reject {|line|
line.start_with? '#' }
if dataset.first.start_with? '>' dataset.shift end
sequence = dataset.join
_.set_sequence(sequence, :do_not_downcase)
return _
else
e "No file called `#{this_file}` exists."
end
end
|
Instance Method Details
#automatic_support_for_nucleotides ⇒ Object
#
automatic_support_for_nucleotides
This adds automatic support for RNA and DNA to this sequence object.
#
519
520
521
522
|
# File 'lib/bioroebe/sequence/sequence.rb', line 519
def automatic_support_for_nucleotides
require 'bioroebe/sequence/nucleotide_module/nucleotide_module.rb'
extend(Bioroebe::NucleotideModule)
end
|
#description? ⇒ Boolean
Also known as:
desc?
#
description?
Give us back the description of the sequence object at hand.
#
267
268
269
|
# File 'lib/bioroebe/sequence/sequence.rb', line 267
def description?
@internal_hash[:description]
end
|
#index(i) ⇒ Object
196
197
198
|
# File 'lib/bioroebe/sequence/sequence.rb', line 196
def index(i)
@sequence.index(i)
end
|
#infer_type ⇒ Object
Also known as:
try_to_infer_the_type
#
infer_type
This method attempts to determine whether the main sequence is a DNA, RNA or protein. Right now this is not very sophisticated, so we have to improve this at a later time (April 2023).
#
#is_a_protein? ⇒ Boolean
Also known as:
is_protein?
598
599
600
|
# File 'lib/bioroebe/sequence/sequence.rb', line 598
def is_a_protein?
@internal_hash[:type] == :protein
end
|
#is_a_protein_now ⇒ Object
#
is_a_protein_now
This will force the given sequence to “become” a protein - or be assumed to be a protein past this point.
#
384
385
386
|
# File 'lib/bioroebe/sequence/sequence.rb', line 384
def is_a_protein_now
@internal_hash[:type] = :protein
end
|
#is_DNA? ⇒ Boolean
Also known as:
is_dna?
391
392
393
|
# File 'lib/bioroebe/sequence/sequence.rb', line 391
def is_DNA?
@internal_hash[:type] == :dna
end
|
#is_RNA? ⇒ Boolean
Also known as:
is_rna?
398
399
400
|
# File 'lib/bioroebe/sequence/sequence.rb', line 398
def is_RNA?
@internal_hash[:type] == :rna
end
|
#map(&block) ⇒ Object
203
204
205
|
# File 'lib/bioroebe/sequence/sequence.rb', line 203
def map(&block)
@sequence.map(&block)
end
|
#n_uracil? ⇒ Boolean
#
n_uracil?
Report how many Uracil can be found in the given String. This is more of an ad-hoc method, though.
#
277
278
279
|
# File 'lib/bioroebe/sequence/sequence.rb', line 277
def n_uracil?
@sequence.to_s.upcase.tr('T','U').count('U')
end
|
#randomize(i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }) ⇒ Object
586
587
588
589
590
591
592
593
|
# File 'lib/bioroebe/sequence/sequence.rb', line 586
def randomize(
i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }
)
if i.is_a? Hash
i = i.map{|key, value| "#{key * value}" }.join
end
::Bioroebe.random_dna(size?, i) end
|
#remove_invalid_entries_from_the_dna_sequence(i = sequence?)
) ⇒ Object
#
remove_invalid_entries_from_the_dna_sequence
#
536
537
538
539
540
|
# File 'lib/bioroebe/sequence/sequence.rb', line 536
def remove_invalid_entries_from_the_dna_sequence(i = sequence?)
return i.chars.select {|character|
DNA_NUCLEOTIDES.include? character.upcase
}.join
end
|
#remove_invalid_entries_from_the_dna_sequence!(i = sequence?)
) ⇒ Object
#
remove_invalid_entries_from_the_dna_sequence!
#
545
546
547
548
549
550
|
# File 'lib/bioroebe/sequence/sequence.rb', line 545
def remove_invalid_entries_from_the_dna_sequence!(i = sequence?)
result = i.chars.select {|character|
DNA_NUCLEOTIDES.include? character.upcase
}.join
set_sequence(result)
end
|
#reset ⇒ Object
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
# File 'lib/bioroebe/sequence/sequence.rb', line 150
def reset
@internal_hash = {}
@internal_hash[:type] = nil
@internal_hash[:shall_we_upcase] = SHALL_WE_UPCASE
@internal_hash[:save_file] = nil
set_save_file
set_description
end
|
#return_string_nucleotides_or_aminoacids(type = type?
) ⇒ Object
Also known as:
nucleotides_or_aminoacids?
#
return_string_nucleotides_or_aminoacids
This will either return the String “nucleotides” or “aminoacids”.
This functionality may be useful in downstream applications that try to display the correct terminology/word.
#
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
|
# File 'lib/bioroebe/sequence/sequence.rb', line 560
def return_string_nucleotides_or_aminoacids(
type = type?
)
case type
when :rna,
:dna
'nucleotides'
when :protein
'aminoacids'
end
end
|
#sanitize_dataset(i = type?
) ⇒ Object
Also known as:
normalize
#
sanitize_dataset
This will sanitize the dataset, in particular for RNA and DNA.
#
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
|
# File 'lib/bioroebe/sequence/sequence.rb', line 306
def sanitize_dataset(
i = type?
)
case i
when :protein
when :dna
sequence?.tr!('U','T') if sequence?
if REMOVE_INVALID_CHARACTERS
@sequence = remove_invalid_entries_from_the_dna_sequence
end
when :rna
sequence?.tr!('T','U') if sequence?
end
end
|
#sanitize_rna ⇒ Object
#
sanitize_rna
This method will convert all T into U.
#
360
361
362
|
# File 'lib/bioroebe/sequence/sequence.rb', line 360
def sanitize_rna
sanitize_dataset :rna
end
|
#save_sequence_to_this_file(into) ⇒ Object
#
save_sequence_to_this_file
We save to a file but we are silent about this action, unless the directory does not exist.
#
287
288
289
290
291
292
293
294
295
296
297
298
299
|
# File 'lib/bioroebe/sequence/sequence.rb', line 287
def save_sequence_to_this_file(into)
what = sequence?
base_dir = File.dirname(into)
if File.exist? base_dir
::Bioroebe.write_what_into(what, into)
else
e "No directory at #{base_dir} exists, thus we can not save "\
"the DNA sequence into a file."
end
end
|
#set_description(i = nil) ⇒ Object
Also known as:
set_desc, desc=
#
set_description
Set a specific description for the given sequence object at hand.
If it is a DNA sequence then we can “tag” it via a specific name. This may not be hugely necessary, but nonetheless the option is there. Proteins can be named as well, of course.
#
411
412
413
|
# File 'lib/bioroebe/sequence/sequence.rb', line 411
def set_description(i = nil)
@description = i
end
|
#set_dna ⇒ Object
Also known as:
set_dna_type, set_DNA_type, is_DNA_now
527
528
529
|
# File 'lib/bioroebe/sequence/sequence.rb', line 527
def set_dna
set_type(:dna)
end
|
#set_protein ⇒ Object
Also known as:
set_protein_type
498
499
500
|
# File 'lib/bioroebe/sequence/sequence.rb', line 498
def set_protein
set_type(:protein)
end
|
#set_rna ⇒ Object
Also known as:
set_rna_type, convert_to_rna
#
set_rna
Note that one alias name, the one called .convert_to_rna(), is a more explicit variant for “conversion” into RNA. It just changes one variable, though.
#
509
510
511
|
# File 'lib/bioroebe/sequence/sequence.rb', line 509
def set_rna
set_type(:rna)
end
|
#set_save_file(i = "#{Bioroebe.log_dir?}default_sequence.fasta") ⇒ Object
#
set_save_file
Where to save any fasta file to etc..
The default will be into a file called “default_sequence.fasta”.
#
372
373
374
375
376
|
# File 'lib/bioroebe/sequence/sequence.rb', line 372
def set_save_file(
i = "#{Bioroebe.log_dir?}default_sequence.fasta"
)
@internal_hash[:save_file] = i
end
|
#set_sequence(i, upcase_downcase_or_make_no_modification = shall_we_upcase?
) ⇒ Object
Also known as:
set_string, set_input, set_this_sequence
#
set_sequence
This method sets the main sequence, aka DNA string or RNA string or protein string (aminoacids).
#
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
|
# File 'lib/bioroebe/sequence/sequence.rb', line 441
def set_sequence(
i,
upcase_downcase_or_make_no_modification = shall_we_upcase?
)
if i
if i.is_a? Array
i = i.join(' ').strip
end
i = i.to_s unless i.is_a? String
if i and !i.empty? and File.exist?(i)
i = File.read(i)
end
i = i.dup if i.frozen?
if i =~ /^\d+$/ and is_DNA?
i = n_random_dna(i)
end
case upcase_downcase_or_make_no_modification
when :do_not_downcase,
:make_no_modification
when :do_upcase,
:default
i.upcase!
when :do_downcase
i.downcase!
end
end
@sequence = i.to_s.dup sanitize_dataset
end
|
#set_type(i = :dna) ⇒ Object
Also known as:
set_alphabet, set_mode
#
set_type
The type to use. By default, DNA.
#
347
348
349
350
351
352
|
# File 'lib/bioroebe/sequence/sequence.rb', line 347
def set_type(i = :dna)
i.downcase! if i.is_a? String
i = i.to_sym unless i.is_a? Symbol
@internal_hash[:type] = i sanitize_rna if i == :rna
end
|
#shall_we_upcase? ⇒ Boolean
258
259
260
|
# File 'lib/bioroebe/sequence/sequence.rb', line 258
def shall_we_upcase?
@internal_hash[:shall_we_upcase]
end
|
#size? ⇒ Boolean
210
211
212
|
# File 'lib/bioroebe/sequence/sequence.rb', line 210
def size?
@sequence.size
end
|
#to_genbank ⇒ Object
#
to_genbank
Convert into the genbank format.
Usage example:
x = Bioroebe::Sequence.new('aaaatgggggggggggccccgtt'); y = x.to_genbank
#
426
427
428
429
430
431
432
433
|
# File 'lib/bioroebe/sequence/sequence.rb', line 426
def to_genbank
unless ::Bioroebe.const_defined?(:GenbankFlatFileFormatGenerator)
require 'bioroebe/genbank/genbank_flat_file_format_generator.rb'
end
_ = string?
result = Bioroebe::GenbankFlatFileFormatGenerator.new(_) { :be_quiet }.string?
return result
end
|
#to_regexp ⇒ Object
Also known as:
to_regex, to_re
#
to_regexp
This method can be used to return a matching regexp-object.
#
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
# File 'lib/bioroebe/sequence/sequence.rb', line 219
def to_regexp
regex = ''.dup
_ = @sequence.chars
_.each {|this_nucleotide|
this_nucleotide.upcase!
case this_nucleotide
when 'A','T','C','G'
regex << this_nucleotide
when 'B'
regex << '[TGC]'
when 'D'
regex << '[ATG]'
when 'H'
regex << '[ATC]'
when 'K'
regex << '[TG]'
when 'M'
regex << '[AC]'
when 'N'
regex << '[ATGC]'
when 'R'
regex << '[AG]'
when 'S'
regex << '[GC]'
when 'V'
regex << '[AGC]'
when 'W'
regex << '[AT]'
when 'Y'
regex << '[TC]'
end
}
return Regexp.new(regex, Regexp::IGNORECASE)
end
|
#type? ⇒ Boolean
Also known as:
type
#
type?
The type can be :dna, :rna or :protein. The default will be :dna.
#
189
190
191
|
# File 'lib/bioroebe/sequence/sequence.rb', line 189
def type?
@internal_hash[:type]
end
|