Class: Bioroebe::Parser::GFF
Overview
Constant Summary
collapse
- INPUT_FILE =
#
This file can be used for testing purposes.
#
'/Depot/Downloads/sequence.gff3'
CommandlineApplication::OLD_VERBOSE_VALUE
ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
Constructor Details
#initialize(i = ARGV, run_already = true) ⇒ GFF
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
# File 'lib/bioroebe/parsers/gff.rb', line 81
def initialize(
i = ARGV,
run_already = true
)
reset
if block_given?
yielded = yield
case yielded
when :do_not_check_for_missing_file
do_all_actions_without_parsing_any_file(i)
run_already = false
end
else
set_input_file(i)
end
run if run_already
end
|
Instance Method Details
#accession_id? ⇒ Boolean
187
188
189
|
# File 'lib/bioroebe/parsers/gff.rb', line 187
def accession_id?
@accession_id
end
|
#considering_splitting_the_gff_file_into_standalone_iles ⇒ Object
#
considering_splitting_the_gff_file_into_standalone_iles
#
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
# File 'lib/bioroebe/parsers/gff.rb', line 210
def considering_splitting_the_gff_file_into_standalone_iles
if has_more_than_one_accession_ids?
@array_unique_accession_ids.each {|this_accession_id|
into = this_accession_id+'.gff3'
opnn; erev 'Storing dataset for the accession id '+
sfancy(this_accession_id)+
rev+' into the file `'+sfile(into)+'`.'
what = @dataset.select {|line|
line.include? this_accession_id
}.join(N)
write_what_into(what, into)
}
else
unless @input_file.nil?
opnn; erev 'We were instructed to split into standalone files, but we'
opnn; erev 'can not do so, as there is not more than one accession id'
opnn; erev 'in this file.'
end
end
end
|
284
285
286
287
288
289
|
# File 'lib/bioroebe/parsers/gff.rb', line 284
def determine_accession_id_from_this_input(i)
if i.include? TABULATOR
i = i.split(TABULATOR).first
end
@accession_id = i
end
|
275
276
277
278
279
|
# File 'lib/bioroebe/parsers/gff.rb', line 275
def do_actions_past_the_parsing_of_the_input_file
find_all_unique_accession_ids
report_all_accession_ids
do_default_action
end
|
#do_all_actions_without_parsing_any_file(i) ⇒ Object
#
do_all_actions_without_parsing_any_file
#
267
268
269
270
|
# File 'lib/bioroebe/parsers/gff.rb', line 267
def do_all_actions_without_parsing_any_file(i)
(i)
do_actions_past_the_parsing_of_the_input_file
end
|
#do_default_action(i = @what_to_do) ⇒ Object
294
295
296
297
298
299
300
301
302
303
304
305
|
# File 'lib/bioroebe/parsers/gff.rb', line 294
def do_default_action(
i = @what_to_do
)
case i when :split_into_standalone_files,
:try_to_split_into_standalone_files
considering_splitting_the_gff_file_into_standalone_iles
end
end
|
251
252
253
254
255
256
257
258
259
260
261
262
|
# File 'lib/bioroebe/parsers/gff.rb', line 251
def do_parse_the_input_file
_ = input_file?
if _ and File.exist?(_)
@original_dataset = File.read(_)
else
opnn; erev "The input file does not exist at #{sfancy(_)}#{rev}."
end
end
|
#find_all_unique_accession_ids ⇒ Object
#
find_all_unique_accession_ids
This method will find all unique accession IDs.
#
196
197
198
199
200
201
202
203
204
205
|
# File 'lib/bioroebe/parsers/gff.rb', line 196
def find_all_unique_accession_ids
@dataset.each {|line|
first = line.split(TABULATOR).first
if first
unless @array_unique_accession_ids.include? first
@array_unique_accession_ids << first
end
end
}
end
|
#has_more_than_one_accession_ids? ⇒ Boolean
#
has_more_than_one_accession_ids?
This method returns true if there are more than one accession ids in the .gff/.gff3 file at hand.
#
140
141
142
|
# File 'lib/bioroebe/parsers/gff.rb', line 140
def has_more_than_one_accession_ids?
@array_unique_accession_ids.size > 1
end
|
162
163
164
|
# File 'lib/bioroebe/parsers/gff.rb', line 162
def input_file?
@input_file
end
|
#report_accession_id(i = @array_unique_accession_ids) ⇒ Object
Also known as:
report_this_accession_id, report_all_accession_ids
237
238
239
240
241
242
243
244
245
|
# File 'lib/bioroebe/parsers/gff.rb', line 237
def report_accession_id(
i = @array_unique_accession_ids
)
if i.is_a? Array
i.each {|entry| report_accession_id(entry) }
else
opnn; erev "The accession id is `#{sfancy(i.to_s)}#{rev}`."
end
end
|
#reset ⇒ Object
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
# File 'lib/bioroebe/parsers/gff.rb', line 108
def reset
super()
infer_the_namespace
@input_file = nil
@dataset = nil
@array_unique_accession_ids = []
@what_to_do = :split_into_standalone_files end
|
#run ⇒ Object
310
311
312
313
|
# File 'lib/bioroebe/parsers/gff.rb', line 310
def run
do_parse_the_input_file
do_actions_past_the_parsing_of_the_input_file
end
|
147
148
149
150
151
152
153
154
155
156
157
|
# File 'lib/bioroebe/parsers/gff.rb', line 147
def set_input_file(
i = INPUT_FILE
)
if i.is_a? Array
i = i.join.strip
end
if i.is_a? String and i.empty?
i = INPUT_FILE end
@input_file = i
end
|
#
Work on entries lacking a leading '#'.
#
171
172
173
174
175
176
177
178
179
180
181
182
|
# File 'lib/bioroebe/parsers/gff.rb', line 171
def (
i = @original_dataset
)
if i.is_a? Array
i = i.first
end
@dataset = i.split(N).reject {|line| line.start_with? '#' }
determine_accession_id_from_this_input(@dataset.last)
end
|