Class: Bioroebe::Parser::GFF

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/parsers/gff.rb

Overview

Bioroebe::Parser::GFF

Constant Summary collapse

INPUT_FILE =
#

INPUT_FILE

This file can be used for testing purposes.

#
'/Depot/Downloads/sequence.gff3'

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ GFF

#

initialize

#


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/bioroebe/parsers/gff.rb', line 81

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  # ======================================================================= #
  # === Handle blocks given to this method next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :do_not_check_for_missing_file
    # ===================================================================== #
    when :do_not_check_for_missing_file
      do_all_actions_without_parsing_any_file(i)
      run_already = false
    end
  else
    set_input_file(i)
  end
  run if run_already
end

Instance Method Details

#accession_id?Boolean

#

accession_id?

#

Returns:

  • (Boolean)


187
188
189
# File 'lib/bioroebe/parsers/gff.rb', line 187

def accession_id?
  @accession_id
end

#considering_splitting_the_gff_file_into_standalone_ilesObject

#

considering_splitting_the_gff_file_into_standalone_iles

#


210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/bioroebe/parsers/gff.rb', line 210

def considering_splitting_the_gff_file_into_standalone_iles
  if has_more_than_one_accession_ids?
    # ===================================================================== #
    # In this case we can split up the dataset.
    # ===================================================================== #
    @array_unique_accession_ids.each {|this_accession_id|
      into = this_accession_id+'.gff3'
      opnerev 'Storing dataset for the accession id '+
               sfancy(this_accession_id)+
               rev+' into the file `'+sfile(into)+'`.'
      what = @dataset.select {|line|
        line.include? this_accession_id
      }.join(N)
      write_what_into(what, into)
    }
  else
    unless @input_file.nil?
      opnerev 'We were instructed to split into standalone files, but we'
      opnerev 'can not do so, as there is not more than one accession id'
      opnerev 'in this file.'
    end 
  end
end

#determine_accession_id_from_this_input(i) ⇒ Object

#

determine_accession_id_from_this_input

#


284
285
286
287
288
289
# File 'lib/bioroebe/parsers/gff.rb', line 284

def determine_accession_id_from_this_input(i)
  if i.include? TABULATOR
    i = i.split(TABULATOR).first
  end
  @accession_id = i
end

#do_actions_past_the_parsing_of_the_input_fileObject

#

do_actions_past_the_parsing_of_the_input_file

#


275
276
277
278
279
# File 'lib/bioroebe/parsers/gff.rb', line 275

def do_actions_past_the_parsing_of_the_input_file
  find_all_unique_accession_ids
  report_all_accession_ids
  do_default_action
end

#do_all_actions_without_parsing_any_file(i) ⇒ Object

#

do_all_actions_without_parsing_any_file

#


267
268
269
270
# File 'lib/bioroebe/parsers/gff.rb', line 267

def do_all_actions_without_parsing_any_file(i)
  work_on_non_comments_in_that_file(i)
  do_actions_past_the_parsing_of_the_input_file
end

#do_default_action(i = @what_to_do) ⇒ Object

#

do_default_action

#


294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/bioroebe/parsers/gff.rb', line 294

def do_default_action(
    i = @what_to_do
  )
  case i # case tag
  # ======================================================================= #
  # This task will split the .gff3 file into standalone files.
  # ======================================================================= #
  when :split_into_standalone_files,
       :try_to_split_into_standalone_files
    considering_splitting_the_gff_file_into_standalone_iles
  end
end

#do_parse_the_input_fileObject

#

do_parse_the_input_file

#


251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/bioroebe/parsers/gff.rb', line 251

def do_parse_the_input_file
  _ = input_file?
  # ======================================================================= #
  # === Properly check whether the file exists before continuing
  # ======================================================================= #
  if _ and File.exist?(_)
    @original_dataset = File.read(_) # Read in the dataset.
    work_on_non_comments_in_that_file
  else
    opnerev "The input file does not exist at #{sfancy(_)}#{rev}."
  end
end

#find_all_unique_accession_idsObject

#

find_all_unique_accession_ids

This method will find all unique accession IDs.

#


196
197
198
199
200
201
202
203
204
205
# File 'lib/bioroebe/parsers/gff.rb', line 196

def find_all_unique_accession_ids
  @dataset.each {|line|
    first = line.split(TABULATOR).first
    if first
      unless @array_unique_accession_ids.include? first
        @array_unique_accession_ids << first
      end
    end
  }
end

#has_more_than_one_accession_ids?Boolean

#

has_more_than_one_accession_ids?

This method returns true if there are more than one accession ids in the .gff/.gff3 file at hand.

#

Returns:

  • (Boolean)


140
141
142
# File 'lib/bioroebe/parsers/gff.rb', line 140

def has_more_than_one_accession_ids?
  @array_unique_accession_ids.size > 1
end

#input_file?Boolean Also known as: input?

#

input_file?

#

Returns:

  • (Boolean)


162
163
164
# File 'lib/bioroebe/parsers/gff.rb', line 162

def input_file?
  @input_file
end

#report_accession_id(i = @array_unique_accession_ids) ⇒ Object Also known as: report_this_accession_id, report_all_accession_ids

#

report_accession_id

#


237
238
239
240
241
242
243
244
245
# File 'lib/bioroebe/parsers/gff.rb', line 237

def report_accession_id(
    i = @array_unique_accession_ids
  )
  if i.is_a? Array
    i.each {|entry| report_accession_id(entry) }
  else
    opnerev "The accession id is `#{sfancy(i.to_s)}#{rev}`."
  end
end

#resetObject

#

reset

#


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/bioroebe/parsers/gff.rb', line 108

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @input_file
  # ======================================================================= #
  @input_file = nil
  # ======================================================================= #
  # === @dataset
  # ======================================================================= #
  @dataset = nil
  # ======================================================================= #
  # === @array_unique_accession_ids
  #
  # The following Array will store entries such as: NC_002483.1
  # ======================================================================= #
  @array_unique_accession_ids = []
  # ======================================================================= #
  # === @what_to_do
  #
  # Currently only the key-action called :split_into_standalone_files
  # is supported.
  # ======================================================================= #
  @what_to_do = :split_into_standalone_files # Specify which action to do.
end

#runObject

#

run

#


310
311
312
313
# File 'lib/bioroebe/parsers/gff.rb', line 310

def run
  do_parse_the_input_file
  do_actions_past_the_parsing_of_the_input_file
end

#set_input_file(i = INPUT_FILE) ⇒ Object

#

set_input_file

#


147
148
149
150
151
152
153
154
155
156
157
# File 'lib/bioroebe/parsers/gff.rb', line 147

def set_input_file(
    i = INPUT_FILE
  )
  if i.is_a? Array
    i = i.join.strip
  end
  if i.is_a? String and i.empty?
    i = INPUT_FILE # Use the default in this case.
  end
  @input_file = i
end

#work_on_non_comments_in_that_file(i = @original_dataset) ⇒ Object

#

work_on_non_comments_in_that_file

Work on entries lacking a leading ‘#’.

#


171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/bioroebe/parsers/gff.rb', line 171

def work_on_non_comments_in_that_file(
    i = @original_dataset
  )
  if i.is_a? Array
    i = i.first
  end 
  # ======================================================================= #
  # Reject all entries that start with a '#'.
  # ======================================================================= #
  @dataset = i.split(N).reject {|line| line.start_with? '#' }
  determine_accession_id_from_this_input(@dataset.last)
end