Class: GeneValidator::TabularParser

Inherits:
Object
  • Object
show all
Defined in:
lib/genevalidator/tabular_parser.rb

Overview

This class parses the tabular output of BLAST (outfmt 6 & 7)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, format, type) ⇒ TabularParser

Initializes the object file_content : String with the tabular BLAST output format: format of the tabular output (comma/space delimited string) type: :nucleotide or :mrna



21
22
23
24
25
26
# File 'lib/genevalidator/tabular_parser.rb', line 21

def initialize(filename, format, type)
  @column_names = format.gsub(/[-\d]/, '').split(/[ ,]/)
  @tab_results  = analayse_tabular_file(filename)
  @rows         = @tab_results.to_enum
  @type         = type
end

Instance Attribute Details

#column_namesObject (readonly)

Returns the value of attribute column_names.



13
14
15
# File 'lib/genevalidator/tabular_parser.rb', line 13

def column_names
  @column_names
end

#rowsObject (readonly)

Returns the value of attribute rows.



11
12
13
# File 'lib/genevalidator/tabular_parser.rb', line 11

def rows
  @rows
end

#tab_resultsObject (readonly)

Returns the value of attribute tab_results.



12
13
14
# File 'lib/genevalidator/tabular_parser.rb', line 12

def tab_results
  @tab_results
end

#typeObject (readonly)

Returns the value of attribute type.



14
15
16
# File 'lib/genevalidator/tabular_parser.rb', line 14

def type
  @type
end

Instance Method Details

#analayse_tabular_file(filename) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
# File 'lib/genevalidator/tabular_parser.rb', line 30

def analayse_tabular_file(filename)
  tab_results  = []
  file         = File.read(filename)
  lines        = CSV.parse(file, col_sep: "\t",
                                 skip_lines: /^#/,
                                 headers: @column_names)
  lines.each do |line|
    tab_results << line.to_hash
  end
  tab_results
end

#initialise_all_hsps(current_query_id, hits, hit_seq) ⇒ Object



89
90
91
92
93
94
95
96
# File 'lib/genevalidator/tabular_parser.rb', line 89

def initialise_all_hsps(current_query_id, hits, hit_seq)
  hsps = hits.select { |row| row['sseqid'] == current_query_id }
  hsps.each do |row|
    hsp = Hsp.new
    hsp.init_tabular_attribute(row, type)
    hit_seq.hsp_list.push(hsp)
  end
end

#initialise_classes(hits) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/genevalidator/tabular_parser.rb', line 71

def initialise_classes(hits)
  hit_list = []
  grouped_hits = hits.group_by { |row| row['sseqid'] }

  grouped_hits.each do |query_id, row|
    hit_seq = Sequence.new
    hit_seq.init_tabular_attribute(row[0])

    initialise_all_hsps(query_id, hits, hit_seq)

    hit_seq.type = :protein
    hit_list.push(hit_seq)
  end
  hit_list
end

#nextObject Also known as: move_to_next_query

move to next query



44
45
46
47
48
49
50
51
52
# File 'lib/genevalidator/tabular_parser.rb', line 44

def next
  current_entry = @rows.peek['qseqid']
  loop do
    entry = @rows.peek['qseqid']
    @rows.next
    break unless entry == current_entry
  end
  # rescue StopIteration
end

#parse_next(query_id = nil) ⇒ Object



58
59
60
61
62
63
64
65
66
67
# File 'lib/genevalidator/tabular_parser.rb', line 58

def parse_next(query_id = nil)
  current_id = @rows.peek['qseqid']
  return [] if !query_id.nil? && current_id != query_id
  hits = @tab_results.partition { |h| h['qseqid'] == current_id }[0]
  hit_seq = initialise_classes(hits)
  move_to_next_query
  hit_seq
rescue StopIteration
  return []
end