Class: Bioroebe::Parser::GFF
Overview
Constant Summary
collapse
- INPUT_FILE =
#
This file can be used for testing purposes.
#
'/Depot/Downloads/sequence.gff3'
CommandlineApplication::OLD_VERBOSE_VALUE
ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
Methods included from BaseModule
#absolute_path, #default_file_read, #file_readlines
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
#internal_hash?, #reset_the_internal_hash
#infer_the_namespace, #namespace?
Constructor Details
#initialize(i = ARGV, run_already = true) ⇒ GFF
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
# File 'lib/bioroebe/parsers/gff.rb', line 81
def initialize(
i = ARGV,
run_already = true
)
reset
if block_given?
yielded = yield
case yielded
when :do_not_check_for_missing_file
do_all_actions_without_parsing_any_file(i)
run_already = false
end
else
set_input_file(i)
end
run if run_already
end
|
Instance Method Details
#accession_id? ⇒ Boolean
187
188
189
|
# File 'lib/bioroebe/parsers/gff.rb', line 187
def accession_id?
@accession_id
end
|
#considering_splitting_the_gff_file_into_standalone_iles ⇒ Object
#
considering_splitting_the_gff_file_into_standalone_iles
#
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
# File 'lib/bioroebe/parsers/gff.rb', line 210
def considering_splitting_the_gff_file_into_standalone_iles
if has_more_than_one_accession_ids?
@array_unique_accession_ids.each {|this_accession_id|
into = this_accession_id+'.gff3'
opnerev 'Storing dataset for the accession id '+
sfancy(this_accession_id)+
rev+' into the file `'+sfile(into)+'`.'
what = @dataset.select {|line|
line.include? this_accession_id
}.join(N)
write_what_into(what, into)
}
else
unless @input_file.nil?
opnerev 'We were instructed to split into standalone files, but we'
opnerev 'can not do so, as there is not more than one accession id'
opnerev 'in this file.'
end
end
end
|
284
285
286
287
288
289
|
# File 'lib/bioroebe/parsers/gff.rb', line 284
def determine_accession_id_from_this_input(i)
if i.include? TABULATOR
i = i.split(TABULATOR).first
end
@accession_id = i
end
|
275
276
277
278
279
|
# File 'lib/bioroebe/parsers/gff.rb', line 275
def do_actions_past_the_parsing_of_the_input_file
find_all_unique_accession_ids
report_all_accession_ids
do_default_action
end
|
#do_all_actions_without_parsing_any_file(i) ⇒ Object
#
do_all_actions_without_parsing_any_file
#
267
268
269
270
|
# File 'lib/bioroebe/parsers/gff.rb', line 267
def do_all_actions_without_parsing_any_file(i)
(i)
do_actions_past_the_parsing_of_the_input_file
end
|
#do_default_action(i = @what_to_do) ⇒ Object
294
295
296
297
298
299
300
301
302
303
304
305
|
# File 'lib/bioroebe/parsers/gff.rb', line 294
def do_default_action(
i = @what_to_do
)
case i when :split_into_standalone_files,
:try_to_split_into_standalone_files
considering_splitting_the_gff_file_into_standalone_iles
end
end
|
251
252
253
254
255
256
257
258
259
260
261
262
|
# File 'lib/bioroebe/parsers/gff.rb', line 251
def do_parse_the_input_file
_ = input_file?
if _ and File.exist?(_)
@original_dataset = File.read(_)
else
opnerev "The input file does not exist at #{sfancy(_)}#{rev}."
end
end
|
#find_all_unique_accession_ids ⇒ Object
#
find_all_unique_accession_ids
This method will find all unique accession IDs.
#
196
197
198
199
200
201
202
203
204
205
|
# File 'lib/bioroebe/parsers/gff.rb', line 196
def find_all_unique_accession_ids
@dataset.each {|line|
first = line.split(TABULATOR).first
if first
unless @array_unique_accession_ids.include? first
@array_unique_accession_ids << first
end
end
}
end
|
#has_more_than_one_accession_ids? ⇒ Boolean
#
has_more_than_one_accession_ids?
This method returns true if there are more than one accession ids in the .gff/.gff3 file at hand.
#
140
141
142
|
# File 'lib/bioroebe/parsers/gff.rb', line 140
def has_more_than_one_accession_ids?
@array_unique_accession_ids.size > 1
end
|
162
163
164
|
# File 'lib/bioroebe/parsers/gff.rb', line 162
def input_file?
@input_file
end
|
#report_accession_id(i = @array_unique_accession_ids) ⇒ Object
Also known as:
report_this_accession_id, report_all_accession_ids
237
238
239
240
241
242
243
244
245
|
# File 'lib/bioroebe/parsers/gff.rb', line 237
def report_accession_id(
i = @array_unique_accession_ids
)
if i.is_a? Array
i.each {|entry| report_accession_id(entry) }
else
opnerev "The accession id is `#{sfancy(i.to_s)}#{rev}`."
end
end
|
#reset ⇒ Object
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
# File 'lib/bioroebe/parsers/gff.rb', line 108
def reset
super()
infer_the_namespace
@input_file = nil
@dataset = nil
@array_unique_accession_ids = []
@what_to_do = :split_into_standalone_files end
|
#run ⇒ Object
310
311
312
313
|
# File 'lib/bioroebe/parsers/gff.rb', line 310
def run
do_parse_the_input_file
do_actions_past_the_parsing_of_the_input_file
end
|
147
148
149
150
151
152
153
154
155
156
157
|
# File 'lib/bioroebe/parsers/gff.rb', line 147
def set_input_file(
i = INPUT_FILE
)
if i.is_a? Array
i = i.join.strip
end
if i.is_a? String and i.empty?
i = INPUT_FILE end
@input_file = i
end
|
#
Work on entries lacking a leading ‘#’.
#
171
172
173
174
175
176
177
178
179
180
181
182
|
# File 'lib/bioroebe/parsers/gff.rb', line 171
def (
i = @original_dataset
)
if i.is_a? Array
i = i.first
end
@dataset = i.split(N).reject {|line| line.start_with? '#' }
determine_accession_id_from_this_input(@dataset.last)
end
|