Class: Bioroebe::Parser::GFF

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/parsers/gff.rb

Overview

Bioroebe::Parser::GFF

Constant Summary collapse

INPUT_FILE =
#

INPUT_FILE

This file can be used for testing purposes.

#
'/Depot/Downloads/sequence.gff3'

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ GFF

#

initialize

#

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/bioroebe/parsers/gff.rb', line 81

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  # ======================================================================= #
  # === Handle blocks given to this method next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :do_not_check_for_missing_file
    # ===================================================================== #
    when :do_not_check_for_missing_file
      do_all_actions_without_parsing_any_file(i)
      run_already = false
    end
  else
    set_input_file(i)
  end
  run if run_already
end

Instance Method Details

#accession_id?Boolean

#

accession_id?

#

Returns:

  • (Boolean)

187
188
189
# File 'lib/bioroebe/parsers/gff.rb', line 187

def accession_id?
  @accession_id
end

#considering_splitting_the_gff_file_into_standalone_ilesObject

#

considering_splitting_the_gff_file_into_standalone_iles

#

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/bioroebe/parsers/gff.rb', line 210

def considering_splitting_the_gff_file_into_standalone_iles
  if has_more_than_one_accession_ids?
    # ===================================================================== #
    # In this case we can split up the dataset.
    # ===================================================================== #
    @array_unique_accession_ids.each {|this_accession_id|
      into = this_accession_id+'.gff3'
      opnn; erev 'Storing dataset for the accession id '+
                 sfancy(this_accession_id)+
                 rev+' into the file `'+sfile(into)+'`.'
      what = @dataset.select {|line|
        line.include? this_accession_id
      }.join(N)
      write_what_into(what, into)
    }
  else
    unless @input_file.nil?
      opnn; erev 'We were instructed to split into standalone files, but we'
      opnn; erev 'can not do so, as there is not more than one accession id'
      opnn; erev 'in this file.'
    end 
  end
end

#determine_accession_id_from_this_input(i) ⇒ Object

#

determine_accession_id_from_this_input

#

284
285
286
287
288
289
# File 'lib/bioroebe/parsers/gff.rb', line 284

def determine_accession_id_from_this_input(i)
  if i.include? TABULATOR
    i = i.split(TABULATOR).first
  end
  @accession_id = i
end

#do_actions_past_the_parsing_of_the_input_fileObject

#

do_actions_past_the_parsing_of_the_input_file

#

275
276
277
278
279
# File 'lib/bioroebe/parsers/gff.rb', line 275

def do_actions_past_the_parsing_of_the_input_file
  find_all_unique_accession_ids
  report_all_accession_ids
  do_default_action
end

#do_all_actions_without_parsing_any_file(i) ⇒ Object

#

do_all_actions_without_parsing_any_file

#

267
268
269
270
# File 'lib/bioroebe/parsers/gff.rb', line 267

def do_all_actions_without_parsing_any_file(i)
  work_on_non_comments_in_that_file(i)
  do_actions_past_the_parsing_of_the_input_file
end

#do_default_action(i = @what_to_do) ⇒ Object

#

do_default_action

#

294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/bioroebe/parsers/gff.rb', line 294

def do_default_action(
    i = @what_to_do
  )
  case i # case tag
  # ======================================================================= #
  # This task will split the .gff3 file into standalone files.
  # ======================================================================= #
  when :split_into_standalone_files,
       :try_to_split_into_standalone_files
    considering_splitting_the_gff_file_into_standalone_iles
  end
end

#do_parse_the_input_fileObject

#

do_parse_the_input_file

#

251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/bioroebe/parsers/gff.rb', line 251

def do_parse_the_input_file
  _ = input_file?
  # ======================================================================= #
  # === Properly check whether the file exists before continuing
  # ======================================================================= #
  if _ and File.exist?(_)
    @original_dataset = File.read(_) # Read in the dataset.
    work_on_non_comments_in_that_file
  else
    opnn; erev "The input file does not exist at #{sfancy(_)}#{rev}."
  end
end

#find_all_unique_accession_idsObject

#

find_all_unique_accession_ids

This method will find all unique accession IDs.

#

196
197
198
199
200
201
202
203
204
205
# File 'lib/bioroebe/parsers/gff.rb', line 196

def find_all_unique_accession_ids
  @dataset.each {|line|
    first = line.split(TABULATOR).first
    if first
      unless @array_unique_accession_ids.include? first
        @array_unique_accession_ids << first
      end
    end
  }
end

#has_more_than_one_accession_ids?Boolean

#

has_more_than_one_accession_ids?

This method returns true if there are more than one accession ids in the .gff/.gff3 file at hand.

#

Returns:

  • (Boolean)

140
141
142
# File 'lib/bioroebe/parsers/gff.rb', line 140

def has_more_than_one_accession_ids?
  @array_unique_accession_ids.size > 1
end

#input_file?Boolean Also known as: input?

#

input_file?

#

Returns:

  • (Boolean)

162
163
164
# File 'lib/bioroebe/parsers/gff.rb', line 162

def input_file?
  @input_file
end

#report_accession_id(i = @array_unique_accession_ids) ⇒ Object Also known as: report_this_accession_id, report_all_accession_ids

#

report_accession_id

#

237
238
239
240
241
242
243
244
245
# File 'lib/bioroebe/parsers/gff.rb', line 237

def report_accession_id(
    i = @array_unique_accession_ids
  )
  if i.is_a? Array
    i.each {|entry| report_accession_id(entry) }
  else
    opnn; erev "The accession id is `#{sfancy(i.to_s)}#{rev}`."
  end
end

#resetObject

#

reset

#

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/bioroebe/parsers/gff.rb', line 108

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @input_file
  # ======================================================================= #
  @input_file = nil
  # ======================================================================= #
  # === @dataset
  # ======================================================================= #
  @dataset = nil
  # ======================================================================= #
  # === @array_unique_accession_ids
  #
  # The following Array will store entries such as: NC_002483.1
  # ======================================================================= #
  @array_unique_accession_ids = []
  # ======================================================================= #
  # === @what_to_do
  #
  # Currently only the key-action called :split_into_standalone_files
  # is supported.
  # ======================================================================= #
  @what_to_do = :split_into_standalone_files # Specify which action to do.
end

#runObject

#

run

#

310
311
312
313
# File 'lib/bioroebe/parsers/gff.rb', line 310

def run
  do_parse_the_input_file
  do_actions_past_the_parsing_of_the_input_file
end

#set_input_file(i = INPUT_FILE) ⇒ Object

#

set_input_file

#

147
148
149
150
151
152
153
154
155
156
157
# File 'lib/bioroebe/parsers/gff.rb', line 147

def set_input_file(
    i = INPUT_FILE
  )
  if i.is_a? Array
    i = i.join.strip
  end
  if i.is_a? String and i.empty?
    i = INPUT_FILE # Use the default in this case.
  end
  @input_file = i
end

#work_on_non_comments_in_that_file(i = @original_dataset) ⇒ Object

#

work_on_non_comments_in_that_file

Work on entries lacking a leading '#'.

#

171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/bioroebe/parsers/gff.rb', line 171

def work_on_non_comments_in_that_file(
    i = @original_dataset
  )
  if i.is_a? Array
    i = i.first
  end 
  # ======================================================================= #
  # Reject all entries that start with a '#'.
  # ======================================================================= #
  @dataset = i.split(N).reject {|line| line.start_with? '#' }
  determine_accession_id_from_this_input(@dataset.last)
end