Class: Bioroebe::Sequence
Constant Summary
collapse
- SHALL_WE_UPCASE =
#
SHALL_WE_UPCASE
This constant determines whether the given input at hand will be upcased or whether it will not.
Note that the value :do_upcase implies true - so it is equivalent to setting it to true. In my opinion it reads nicer than true or false, so it will be retained as it is.
#
:do_upcase
- REMOVE_INVALID_CHARACTERS =
#
REMOVE_INVALID_CHARACTERS
If the following constant is set to true then invalid characters from the given input will be eliminated.
#
true
Class Method Summary
collapse
Instance Method Summary
collapse
-
#automatic_support_for_nucleotides ⇒ Object
# === automatic_support_for_nucleotides.
-
#description? ⇒ Boolean
(also: #desc?)
# === description?.
-
#index(i) ⇒ Object
# === index ========================================================================= #.
-
#infer_type ⇒ Object
# === infer_type ========================================================================= #.
-
#initialize(this_sequence = 'ATCG', &block) ⇒ Sequence
constructor
# === initialize.
-
#is_a_protein? ⇒ Boolean
(also: #is_protein?)
# === is_a_protein? ========================================================================= #.
-
#is_a_protein_now ⇒ Object
# === is_a_protein_now.
-
#is_DNA? ⇒ Boolean
(also: #is_dna?)
# === is_DNA? ========================================================================= #.
-
#is_RNA? ⇒ Boolean
(also: #is_rna?)
# === is_RNA? ========================================================================= #.
-
#map(&block) ⇒ Object
# === map ========================================================================= #.
-
#n_uracil? ⇒ Boolean
# === n_uracil?.
-
#randomize(i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }) ⇒ Object
# === randomize.
-
#remove_invalid_entries_from_the_dna_sequence(i = sequence?) ) ⇒ Object
# === remove_invalid_entries_from_the_dna_sequence ========================================================================= #.
-
#remove_invalid_entries_from_the_dna_sequence!(i = sequence?) ) ⇒ Object
# === remove_invalid_entries_from_the_dna_sequence! ========================================================================= #.
-
#reset ⇒ Object
# === reset (reset tag) ========================================================================= #.
-
#return_string_nucleotides_or_aminoacids(type = type? ) ⇒ Object
(also: #nucleotides_or_aminoacids?)
# === return_string_nucleotides_or_aminoacids.
-
#sanitize_dataset(i = type? ) ⇒ Object
(also: #normalize)
# === sanitize_dataset.
-
#sanitize_rna ⇒ Object
# === sanitize_rna.
-
#save_sequence_to_this_file(into) ⇒ Object
# === save_sequence_to_this_file.
-
#set_description(i = nil) ⇒ Object
(also: #set_desc, #desc=)
# === set_description.
-
#set_dna ⇒ Object
(also: #set_dna_type, #set_DNA_type, #is_DNA_now)
# === set_dna ========================================================================= #.
-
#set_protein ⇒ Object
(also: #set_protein_type)
# === set_protein ========================================================================= #.
-
#set_rna ⇒ Object
(also: #set_rna_type, #convert_to_rna)
# === set_rna.
-
#set_save_file(i = "#{Bioroebe.log_dir?}default_sequence.fasta") ⇒ Object
# === set_save_file.
-
#set_sequence(i, upcase_downcase_or_make_no_modification = shall_we_upcase? ) ⇒ Object
(also: #set_string, #set_input, #set_this_sequence)
# === set_sequence.
-
#set_type(i = :dna) ⇒ Object
(also: #set_alphabet, #set_mode)
# === set_type.
-
#shall_we_upcase? ⇒ Boolean
# === shall_we_upcase? ========================================================================= #.
-
#size? ⇒ Boolean
# === size? ========================================================================= #.
-
#to_genbank ⇒ Object
# === to_genbank.
-
#to_regexp ⇒ Object
(also: #to_regex, #to_re)
# === to_regexp.
-
#type? ⇒ Boolean
(also: #type)
# === type?.
Methods inherited from RawSequence
#+, #<<, #[]=, #calculate_levensthein_distance, #chars?, #complement, #composition?, #count, #delete, #delete!, #downcase, #each_char, #empty?, #find_substring_indices, #first_position=, #freeze, #gsub, #gsub!, #include?, #insert_at_this_position, #prepend, #remove_n_characters_from_the_left_side, #reverse, #reverse!, #reverse_complement, #scan, #set_raw_sequence, #shuffle, #split, #start_with?, #strip, #subseq, #to_s, #to_str, #tr!, #upcase!
Constructor Details
#initialize(this_sequence = 'ATCG', &block) ⇒ Sequence
#
initialize
The first argument given to the constructor (.new()) will become the sequence.
Initialization example:
seq = Bioroebe::Sequence.new('ATTGCCG')
#
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
# File 'lib/bioroebe/sequence/sequence.rb', line 67
def initialize( this_sequence = 'ATCG',
&block
)
reset
_ = this_sequence if _.is_a? Hash
if _.has_key? :file
set_save_file(_.delete(:file))
end
if _.has_key? :desc
set_description(_.delete(:desc))
end
if _.has_key? :alphabet
set_type(_.delete(:alphabet))
elsif _.has_key? :type
set_type(_.delete(:type))
elsif _.has_key? :aminoacid
set_type(_.delete(:aminoacid))
end
if _.has_key? :seq
_ = _.delete :seq
elsif _.has_key? :sequence
_ = _.delete :sequence
end
end
set_sequence(_)
if block_given?
yielded = yield
case yielded
when :is_DNA,
:is_dna
set_DNA_type
when :aminoacid
set_protein_type
end
end
end
|
Class Method Details
.[](i) ⇒ Object
#
Bioroebe::Sequence[]
Invocation example:
sequence = Bioroebe::Sequence['atgggtgggcccc']
#
643
644
645
|
# File 'lib/bioroebe/sequence/sequence.rb', line 643
def self.[](i)
new(i)
end
|
.sequence_from_file(this_file) ⇒ Object
#
Bioroebe::Sequence.sequence_from_file
This method can be used to read in a dataset from a file. The first argument to this method denotes that.
Invocation examples:
x = Bioroebe::Sequence.sequence_from_file('/Depot/Temp/Bioroebe/vector_pBR322.fasta')
x = Bioroebe::Sequence.sequence_from_file('/home/x/DATA/PROGRAMMING_LANGUAGES/ruby/src/bioroebe/lib/bioroebe/data/alu_elements.fasta')
#
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
|
# File 'lib/bioroebe/sequence/sequence.rb', line 618
def self.sequence_from_file(this_file)
if File.exist? this_file
_ = ::Bioroebe::Sequence.new
dataset = File.readlines(this_file).map(&:chomp).reject {|line|
line.start_with? '#' }
if dataset.first.start_with? '>' dataset.shift end
sequence = dataset.join
_.set_sequence(sequence, :do_not_downcase)
return _
else
e "No file called `#{this_file}` exists."
end
end
|
Instance Method Details
#automatic_support_for_nucleotides ⇒ Object
#
automatic_support_for_nucleotides
This adds automatic support for RNA and DNA to this sequence object.
#
556
557
558
559
|
# File 'lib/bioroebe/sequence/sequence.rb', line 556
def automatic_support_for_nucleotides
require 'bioroebe/sequence/nucleotide_module/nucleotide_module.rb'
extend(Bioroebe::NucleotideModule)
end
|
#description? ⇒ Boolean
Also known as:
desc?
#
description?
Give us back the description of the sequence object at hand.
#
265
266
267
|
# File 'lib/bioroebe/sequence/sequence.rb', line 265
def description?
@internal_hash[:description]
end
|
#index(i) ⇒ Object
194
195
196
|
# File 'lib/bioroebe/sequence/sequence.rb', line 194
def index(i)
@sequence.index(i)
end
|
#is_a_protein? ⇒ Boolean
Also known as:
is_protein?
399
400
401
|
# File 'lib/bioroebe/sequence/sequence.rb', line 399
def is_a_protein?
@internal_hash[:type] == :protein
end
|
#is_a_protein_now ⇒ Object
#
is_a_protein_now
This will force the given sequence to “become” a protein - or be assumed to be a protein past this point.
#
409
410
411
|
# File 'lib/bioroebe/sequence/sequence.rb', line 409
def is_a_protein_now
@internal_hash[:type] = :protein
end
|
#is_DNA? ⇒ Boolean
Also known as:
is_dna?
416
417
418
|
# File 'lib/bioroebe/sequence/sequence.rb', line 416
def is_DNA?
@internal_hash[:type] == :dna
end
|
#is_RNA? ⇒ Boolean
Also known as:
is_rna?
423
424
425
|
# File 'lib/bioroebe/sequence/sequence.rb', line 423
def is_RNA?
@internal_hash[:type] == :rna
end
|
#map(&block) ⇒ Object
201
202
203
|
# File 'lib/bioroebe/sequence/sequence.rb', line 201
def map(&block)
@sequence.map(&block)
end
|
#n_uracil? ⇒ Boolean
#
n_uracil?
Report how many Uracil can be found in the given String. This is more of an ad-hoc method, though.
#
294
295
296
|
# File 'lib/bioroebe/sequence/sequence.rb', line 294
def n_uracil?
@sequence.to_s.upcase.tr('T','U').count('U')
end
|
#randomize(i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }) ⇒ Object
597
598
599
600
601
602
603
604
|
# File 'lib/bioroebe/sequence/sequence.rb', line 597
def randomize(
i = { 'A'=>1,'C'=>2,'G'=>3,'T'=>4 }
)
if i.is_a? Hash
i = i.map{|key, value| "#{key * value}" }.join
end
::Bioroebe.random_dna(size?, i) end
|
#remove_invalid_entries_from_the_dna_sequence(i = sequence?)
) ⇒ Object
#
remove_invalid_entries_from_the_dna_sequence
#
573
574
575
576
577
|
# File 'lib/bioroebe/sequence/sequence.rb', line 573
def remove_invalid_entries_from_the_dna_sequence(i = sequence?)
return i.chars.select {|character|
DNA_NUCLEOTIDES.include? character.upcase
}.join
end
|
#remove_invalid_entries_from_the_dna_sequence!(i = sequence?)
) ⇒ Object
#
remove_invalid_entries_from_the_dna_sequence!
#
582
583
584
585
586
587
|
# File 'lib/bioroebe/sequence/sequence.rb', line 582
def remove_invalid_entries_from_the_dna_sequence!(i = sequence?)
result = i.chars.select {|character|
DNA_NUCLEOTIDES.include? character.upcase
}.join
set_sequence(result)
end
|
#reset ⇒ Object
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
# File 'lib/bioroebe/sequence/sequence.rb', line 148
def reset
@internal_hash = {}
@internal_hash[:type] = nil
@internal_hash[:shall_we_upcase] = SHALL_WE_UPCASE
@internal_hash[:save_file] = nil
set_save_file
set_description
end
|
#return_string_nucleotides_or_aminoacids(type = type?
) ⇒ Object
Also known as:
nucleotides_or_aminoacids?
#
return_string_nucleotides_or_aminoacids
This will either return the String “nucleotides” or “aminoacids”.
This functionality may be useful in downstream applications that try to display the correct terminology/word.
#
277
278
279
280
281
282
283
284
285
286
|
# File 'lib/bioroebe/sequence/sequence.rb', line 277
def return_string_nucleotides_or_aminoacids(
type = type?
)
case type
when :rna, :dna
'nucleotides'
when :protein
'aminoacids'
end
end
|
#sanitize_dataset(i = type?
) ⇒ Object
Also known as:
normalize
#
sanitize_dataset
This will sanitize the dataset, in particular for RNA and DNA.
#
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
|
# File 'lib/bioroebe/sequence/sequence.rb', line 324
def sanitize_dataset(
i = type?
)
case i
when :protein
when :dna
sequence?.tr!('U','T') if sequence?
if REMOVE_INVALID_CHARACTERS
@sequence = remove_invalid_entries_from_the_dna_sequence
end
when :rna
sequence?.tr!('T','U') if sequence?
end
end
|
#sanitize_rna ⇒ Object
#
sanitize_rna
This method will convert all T into U.
#
378
379
380
|
# File 'lib/bioroebe/sequence/sequence.rb', line 378
def sanitize_rna
sanitize_dataset :rna
end
|
#save_sequence_to_this_file(into) ⇒ Object
#
save_sequence_to_this_file
We save to a file but we are silent about this action, unless the directory does not exist.
#
305
306
307
308
309
310
311
312
313
314
315
316
317
|
# File 'lib/bioroebe/sequence/sequence.rb', line 305
def save_sequence_to_this_file(into)
what = sequence?
base_dir = File.dirname(into)
if File.exist? base_dir
::Bioroebe.write_what_into(what, into)
else
e "No directory at #{base_dir} exists, thus we can not save "\
"the DNA sequence into a file."
end
end
|
#set_description(i = nil) ⇒ Object
Also known as:
set_desc, desc=
#
set_description
Set a specific description for the given sequence object at hand.
If it is a DNA sequence then we can “tag” it via a specific name. This may not be hugely necessary, but nonetheless the option is there. Proteins can be named as well, of course.
#
448
449
450
|
# File 'lib/bioroebe/sequence/sequence.rb', line 448
def set_description(i = nil)
@description = i
end
|
#set_dna ⇒ Object
Also known as:
set_dna_type, set_DNA_type, is_DNA_now
564
565
566
|
# File 'lib/bioroebe/sequence/sequence.rb', line 564
def set_dna
set_type(:dna)
end
|
#set_protein ⇒ Object
Also known as:
set_protein_type
535
536
537
|
# File 'lib/bioroebe/sequence/sequence.rb', line 535
def set_protein
set_type(:protein)
end
|
#set_rna ⇒ Object
Also known as:
set_rna_type, convert_to_rna
#
set_rna
Note that one alias name, the one called .convert_to_rna(), is a more explicit variant for “conversion” into RNA. It just changes one variable, though.
#
546
547
548
|
# File 'lib/bioroebe/sequence/sequence.rb', line 546
def set_rna
set_type(:rna)
end
|
#set_save_file(i = "#{Bioroebe.log_dir?}default_sequence.fasta") ⇒ Object
#
set_save_file
Where to save any fasta file to etc..
The default will be into a file called “default_sequence.fasta”.
#
390
391
392
393
394
|
# File 'lib/bioroebe/sequence/sequence.rb', line 390
def set_save_file(
i = "#{Bioroebe.log_dir?}default_sequence.fasta"
)
@internal_hash[:save_file] = i
end
|
#set_sequence(i, upcase_downcase_or_make_no_modification = shall_we_upcase?
) ⇒ Object
Also known as:
set_string, set_input, set_this_sequence
#
set_sequence
This method sets the main sequence, aka DNA string or RNA string or protein string (aminoacids).
#
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
|
# File 'lib/bioroebe/sequence/sequence.rb', line 478
def set_sequence(
i,
upcase_downcase_or_make_no_modification = shall_we_upcase?
)
if i
if i.is_a? Array
i = i.join(' ').strip
end
i = i.to_s unless i.is_a? String
if i and !i.empty? and File.exist?(i)
i = File.read(i)
end
i = i.dup if i.frozen?
if i =~ /^\d+$/ and is_DNA?
i = n_random_dna(i)
end
case upcase_downcase_or_make_no_modification
when :do_not_downcase,
:make_no_modification
when :do_upcase,
:default
i.upcase!
when :do_downcase
i.downcase!
end
end
@sequence = i.to_s.dup sanitize_dataset
end
|
#set_type(i = :dna) ⇒ Object
Also known as:
set_alphabet, set_mode
#
set_type
The type to use. By default, DNA.
#
365
366
367
368
369
370
|
# File 'lib/bioroebe/sequence/sequence.rb', line 365
def set_type(i = :dna)
i.downcase! if i.is_a? String
i = i.to_sym unless i.is_a? Symbol
@internal_hash[:type] = i sanitize_rna if i == :rna
end
|
#shall_we_upcase? ⇒ Boolean
256
257
258
|
# File 'lib/bioroebe/sequence/sequence.rb', line 256
def shall_we_upcase?
@internal_hash[:shall_we_upcase]
end
|
#size? ⇒ Boolean
208
209
210
|
# File 'lib/bioroebe/sequence/sequence.rb', line 208
def size?
@sequence.size
end
|
#to_genbank ⇒ Object
#
to_genbank
Convert into the genbank format.
Usage example:
x = Bioroebe::Sequence.new('aaaatgggggggggggccccgtt'); y = x.to_genbank
#
463
464
465
466
467
468
469
470
|
# File 'lib/bioroebe/sequence/sequence.rb', line 463
def to_genbank
unless ::Bioroebe.const_defined?(:GenbankFlatFileFormatGenerator)
require 'bioroebe/genbank/genbank_flat_file_format_generator.rb'
end
_ = string?
result = Bioroebe::GenbankFlatFileFormatGenerator.new(_) { :be_quiet }.string?
return result
end
|
#to_regexp ⇒ Object
Also known as:
to_regex, to_re
#
to_regexp
This method can be used to return a matching regexp-object.
#
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
# File 'lib/bioroebe/sequence/sequence.rb', line 217
def to_regexp
regex = ''.dup
_ = @sequence.chars
_.each {|this_nucleotide|
this_nucleotide.upcase!
case this_nucleotide
when 'A','T','C','G'
regex << this_nucleotide
when 'B'
regex << '[TGC]'
when 'D'
regex << '[ATG]'
when 'H'
regex << '[ATC]'
when 'K'
regex << '[TG]'
when 'M'
regex << '[AC]'
when 'N'
regex << '[ATGC]'
when 'R'
regex << '[AG]'
when 'S'
regex << '[GC]'
when 'V'
regex << '[AGC]'
when 'W'
regex << '[AT]'
when 'Y'
regex << '[TC]'
end
}
return Regexp.new(regex, Regexp::IGNORECASE)
end
|
#type? ⇒ Boolean
Also known as:
type
#
type?
The type can be :dna, :rna or :protein. The default will be :dna.
#
187
188
189
|
# File 'lib/bioroebe/sequence/sequence.rb', line 187
def type?
@internal_hash[:type]
end
|