Class: Bioroebe::Parser::GFF

Inherits:

CommandlineApplication

Object
Base
CommandlineApplication
Bioroebe::Parser::GFF

show all

Defined in:: lib/bioroebe/parsers/gff.rb

Overview

Bioroebe::Parser::GFF

Constant Summary collapse

INPUT_FILE = # INPUT_FILE This file can be used for testing purposes. #

'/Depot/Downloads/sequence.gff3'

Instance Method Summary collapse

#accession_id? ⇒ Boolean

# === accession_id? ========================================================================= #.
#considering_splitting_the_gff_file_into_standalone_iles ⇒ Object

# === considering_splitting_the_gff_file_into_standalone_iles ========================================================================= #.
#determine_accession_id_from_this_input(i) ⇒ Object

# === determine_accession_id_from_this_input ========================================================================= #.
#do_actions_past_the_parsing_of_the_input_file ⇒ Object

# === do_actions_past_the_parsing_of_the_input_file ========================================================================= #.
#do_all_actions_without_parsing_any_file(i) ⇒ Object

# === do_all_actions_without_parsing_any_file ========================================================================= #.
#do_default_action(i = @what_to_do) ⇒ Object

# === do_default_action ========================================================================= #.
#do_parse_the_input_file ⇒ Object

# === do_parse_the_input_file ========================================================================= #.
#find_all_unique_accession_ids ⇒ Object

# === find_all_unique_accession_ids.
#has_more_than_one_accession_ids? ⇒ Boolean

# === has_more_than_one_accession_ids?.
#initialize(i = ARGV, run_already = true) ⇒ GFF constructor

# === initialize ========================================================================= #.
#input_file? ⇒ Boolean (also: #input?)

# === input_file? ========================================================================= #.
#report_accession_id(i = @array_unique_accession_ids) ⇒ Object (also: #report_this_accession_id, #report_all_accession_ids)

# === report_accession_id ========================================================================= #.
#reset ⇒ Object

# === reset ========================================================================= #.
#run ⇒ Object

# === run ========================================================================= #.
#set_input_file(i = INPUT_FILE) ⇒ Object

# === set_input_file ========================================================================= #.
#work_on_non_comments_in_that_file(i = @original_dataset) ⇒ Object

# === work_on_non_comments_in_that_file.

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ `GFF`

#

initialize

#

# File 'lib/bioroebe/parsers/gff.rb', line 81

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  # ======================================================================= #
  # === Handle blocks given to this method next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :do_not_check_for_missing_file
    # ===================================================================== #
    when :do_not_check_for_missing_file
      do_all_actions_without_parsing_any_file(i)
      run_already = false
    end
  else
    set_input_file(i)
  end
  run if run_already
end

Instance Method Details

#accession_id? ⇒ `Boolean`

#

accession_id?

#

Returns:

(Boolean)



187
188
189

# File 'lib/bioroebe/parsers/gff.rb', line 187

def accession_id?
  @accession_id
end

#considering_splitting_the_gff_file_into_standalone_iles ⇒ `Object`

#

considering_splitting_the_gff_file_into_standalone_iles

#

# File 'lib/bioroebe/parsers/gff.rb', line 210

def considering_splitting_the_gff_file_into_standalone_iles
  if has_more_than_one_accession_ids?
    # ===================================================================== #
    # In this case we can split up the dataset.
    # ===================================================================== #
    @array_unique_accession_ids.each {|this_accession_id|
      into = this_accession_id+'.gff3'
      opnerev 'Storing dataset for the accession id '+
               sfancy(this_accession_id)+
               rev+' into the file `'+sfile(into)+'`.'
      what = @dataset.select {|line|
        line.include? this_accession_id
      }.join(N)
      write_what_into(what, into)
    }
  else
    unless @input_file.nil?
      opnerev 'We were instructed to split into standalone files, but we'
      opnerev 'can not do so, as there is not more than one accession id'
      opnerev 'in this file.'
    end 
  end
end

#determine_accession_id_from_this_input(i) ⇒ `Object`

#

determine_accession_id_from_this_input

#

# File 'lib/bioroebe/parsers/gff.rb', line 284

def determine_accession_id_from_this_input(i)
  if i.include? TABULATOR
    i = i.split(TABULATOR).first
  end
  @accession_id = i
end

#do_actions_past_the_parsing_of_the_input_file ⇒ `Object`

#

do_actions_past_the_parsing_of_the_input_file

#

# File 'lib/bioroebe/parsers/gff.rb', line 275

def do_actions_past_the_parsing_of_the_input_file
  find_all_unique_accession_ids
  report_all_accession_ids
  do_default_action
end

#do_all_actions_without_parsing_any_file(i) ⇒ `Object`

#

do_all_actions_without_parsing_any_file

#

# File 'lib/bioroebe/parsers/gff.rb', line 267

def do_all_actions_without_parsing_any_file(i)
  work_on_non_comments_in_that_file(i)
  do_actions_past_the_parsing_of_the_input_file
end

#do_default_action(i = @what_to_do) ⇒ `Object`

#

do_default_action

#

# File 'lib/bioroebe/parsers/gff.rb', line 294

def do_default_action(
    i = @what_to_do
  )
  case i # case tag
  # ======================================================================= #
  # This task will split the .gff3 file into standalone files.
  # ======================================================================= #
  when :split_into_standalone_files,
       :try_to_split_into_standalone_files
    considering_splitting_the_gff_file_into_standalone_iles
  end
end

#do_parse_the_input_file ⇒ `Object`

#

do_parse_the_input_file

#

# File 'lib/bioroebe/parsers/gff.rb', line 251

def do_parse_the_input_file
  _ = input_file?
  # ======================================================================= #
  # === Properly check whether the file exists before continuing
  # ======================================================================= #
  if _ and File.exist?(_)
    @original_dataset = File.read(_) # Read in the dataset.
    work_on_non_comments_in_that_file
  else
    opnerev "The input file does not exist at #{sfancy(_)}#{rev}."
  end
end

#find_all_unique_accession_ids ⇒ `Object`

#

find_all_unique_accession_ids

This method will find all unique accession IDs.

#

# File 'lib/bioroebe/parsers/gff.rb', line 196

def find_all_unique_accession_ids
  @dataset.each {|line|
    first = line.split(TABULATOR).first
    if first
      unless @array_unique_accession_ids.include? first
        @array_unique_accession_ids << first
      end
    end
  }
end

#has_more_than_one_accession_ids? ⇒ `Boolean`

#

has_more_than_one_accession_ids?

This method returns true if there are more than one accession ids in the .gff/.gff3 file at hand.

#

Returns:

(Boolean)



140
141
142

# File 'lib/bioroebe/parsers/gff.rb', line 140

def has_more_than_one_accession_ids?
  @array_unique_accession_ids.size > 1
end

#input_file? ⇒ `Boolean` Also known as: input?

#

input_file?

#

Returns:

(Boolean)



162
163
164

# File 'lib/bioroebe/parsers/gff.rb', line 162

def input_file?
  @input_file
end

#report_accession_id(i = @array_unique_accession_ids) ⇒ `Object` Also known as: report_this_accession_id, report_all_accession_ids

#

report_accession_id

#

# File 'lib/bioroebe/parsers/gff.rb', line 237

def report_accession_id(
    i = @array_unique_accession_ids
  )
  if i.is_a? Array
    i.each {|entry| report_accession_id(entry) }
  else
    opnerev "The accession id is `#{sfancy(i.to_s)}#{rev}`."
  end
end

#reset ⇒ `Object`

#

reset

#

# File 'lib/bioroebe/parsers/gff.rb', line 108

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @input_file
  # ======================================================================= #
  @input_file = nil
  # ======================================================================= #
  # === @dataset
  # ======================================================================= #
  @dataset = nil
  # ======================================================================= #
  # === @array_unique_accession_ids
  #
  # The following Array will store entries such as: NC_002483.1
  # ======================================================================= #
  @array_unique_accession_ids = []
  # ======================================================================= #
  # === @what_to_do
  #
  # Currently only the key-action called :split_into_standalone_files
  # is supported.
  # ======================================================================= #
  @what_to_do = :split_into_standalone_files # Specify which action to do.
end

#run ⇒ `Object`

#

run

#

# File 'lib/bioroebe/parsers/gff.rb', line 310

def run
  do_parse_the_input_file
  do_actions_past_the_parsing_of_the_input_file
end

#set_input_file(i = INPUT_FILE) ⇒ `Object`

#

set_input_file

#

# File 'lib/bioroebe/parsers/gff.rb', line 147

def set_input_file(
    i = INPUT_FILE
  )
  if i.is_a? Array
    i = i.join.strip
  end
  if i.is_a? String and i.empty?
    i = INPUT_FILE # Use the default in this case.
  end
  @input_file = i
end

#work_on_non_comments_in_that_file(i = @original_dataset) ⇒ `Object`

#

work_on_non_comments_in_that_file

Work on entries lacking a leading ‘#’.

#

# File 'lib/bioroebe/parsers/gff.rb', line 171

def work_on_non_comments_in_that_file(
    i = @original_dataset
  )
  if i.is_a? Array
    i = i.first
  end 
  # ======================================================================= #
  # Reject all entries that start with a '#'.
  # ======================================================================= #
  @dataset = i.split(N).reject {|line| line.start_with? '#' }
  determine_accession_id_from_this_input(@dataset.last)
end

Class: Bioroebe::Parser::GFF

Overview

Bioroebe::Parser::GFF

Constant Summary collapse

#

INPUT_FILE

#

Constants inherited from CommandlineApplication

Constants included from ColoursForBase

Constants inherited from Base

Instance Method Summary collapse

# === accession_id? ========================================================================= #.

# === considering_splitting_the_gff_file_into_standalone_iles ========================================================================= #.

# === determine_accession_id_from_this_input ========================================================================= #.

# === do_actions_past_the_parsing_of_the_input_file ========================================================================= #.

# === do_all_actions_without_parsing_any_file ========================================================================= #.

# === do_default_action ========================================================================= #.

# === do_parse_the_input_file ========================================================================= #.

# === find_all_unique_accession_ids.

# === has_more_than_one_accession_ids?.

# === initialize ========================================================================= #.

# === input_file? ========================================================================= #.

# === report_accession_id ========================================================================= #.

# === reset ========================================================================= #.

# === run ========================================================================= #.

# === set_input_file ========================================================================= #.

# === work_on_non_comments_in_that_file.

Methods inherited from CommandlineApplication

Methods included from BaseModule

Methods included from CommandlineArguments

Methods included from ColoursForBase

Methods inherited from Base

Methods included from InternalHashModule

Methods included from InferTheNamespaceModule

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ GFF

#

initialize

#

Instance Method Details

#accession_id? ⇒ Boolean

#

accession_id?

#

#considering_splitting_the_gff_file_into_standalone_iles ⇒ Object

#

considering_splitting_the_gff_file_into_standalone_iles

#

#determine_accession_id_from_this_input(i) ⇒ Object

#

determine_accession_id_from_this_input

#

#do_actions_past_the_parsing_of_the_input_file ⇒ Object

#

do_actions_past_the_parsing_of_the_input_file

#

#do_all_actions_without_parsing_any_file(i) ⇒ Object

#

do_all_actions_without_parsing_any_file

#

#do_default_action(i = @what_to_do) ⇒ Object

#

do_default_action

#

#do_parse_the_input_file ⇒ Object

#

do_parse_the_input_file

#

#find_all_unique_accession_ids ⇒ Object

#

find_all_unique_accession_ids

#

#has_more_than_one_accession_ids? ⇒ Boolean

#

has_more_than_one_accession_ids?

#

#input_file? ⇒ Boolean Also known as: input?

#

input_file?

#

#initialize(i = ARGV, run_already = true) ⇒ `GFF`

#accession_id? ⇒ `Boolean`

#considering_splitting_the_gff_file_into_standalone_iles ⇒ `Object`

#determine_accession_id_from_this_input(i) ⇒ `Object`

#do_actions_past_the_parsing_of_the_input_file ⇒ `Object`

#do_all_actions_without_parsing_any_file(i) ⇒ `Object`

#do_default_action(i = @what_to_do) ⇒ `Object`

#do_parse_the_input_file ⇒ `Object`

#find_all_unique_accession_ids ⇒ `Object`

#has_more_than_one_accession_ids? ⇒ `Boolean`

#input_file? ⇒ `Boolean` Also known as: input?

#report_accession_id(i = @array_unique_accession_ids) ⇒ `Object` Also known as: report_this_accession_id, report_all_accession_ids

#reset ⇒ `Object`

#run ⇒ `Object`

#set_input_file(i = INPUT_FILE) ⇒ `Object`

#work_on_non_comments_in_that_file(i = @original_dataset) ⇒ `Object`