Class: Bio::EBD::Format

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-express_beta_diversity/ebd_format.rb

Overview

Express Beta Diversity input “OTU table” format parser.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeFormat

Returns a new instance of Format.



11
12
13
14
# File 'lib/bio-express_beta_diversity/ebd_format.rb', line 11

def initialize
  @sample_counts = {}
  @otu_names = []
end

Instance Attribute Details

#otu_namesObject

Returns the value of attribute otu_names.



5
6
7
# File 'lib/bio-express_beta_diversity/ebd_format.rb', line 5

def otu_names
  @otu_names
end

#sample_countsObject

Hash of sample names to array of counts. The counts are floats that correspond to the otu_names.



9
10
11
# File 'lib/bio-express_beta_diversity/ebd_format.rb', line 9

def sample_counts
  @sample_counts
end

Class Method Details

.parse_from_file(filename) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bio-express_beta_diversity/ebd_format.rb', line 16

def self.parse_from_file(filename)
  ebd = Bio::EBD::Format.new

  #        100535  1008038
  # sample1    5.0       0
  # sample2    0       1.0
  first_line = true
  CSV.foreach(filename, :col_sep => "\t") do |row|
    if first_line
      # First line is the IDs of the OTUs
      raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
      ebd.otu_names = row[1...row.length]
      first_line = false
    else
      next if row.empty? #Ignore empty lines

      # all other lines are the sample names and then number of observations of the OTUs
      raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1

      sample_name = row[0]
      raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)

      ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
    end
  end

  return ebd
end

Instance Method Details

#number_of_samplesObject



45
46
47
# File 'lib/bio-express_beta_diversity/ebd_format.rb', line 45

def number_of_samples
  @sample_counts.length
end