Class: Bio::IMG::Metadata

Inherits:
Array
  • Object
show all
Defined in:
lib/bio-img_metadata.rb

Overview

Acts like an array of Bio::IMG::Lineage objects

Use TaxonomyDefinitionFile#read to read in a file downloaded through the IMG export system

Constant Summary collapse

FIELD_NAMES_TO_CLASSIFICATIONS =
{
  'taxon_oid' => :taxon_id,
  'Domain' => :domain,
  'Phylum' => :phylum,
  'Class' => :class_name,
  'Order' => :order,
  'Family' => :family,
  'Genus' => :genus,
  'Species' => :species,
}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.read(img_taxonomy_filename_path) ⇒ Object

Reads an img_taxonomy_file into a new TaxonomyDefinitionFile object. This object is then an array of Bio::IMG::Lineage objects from that file



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/bio-img_metadata.rb', line 39

def self.read(img_taxonomy_filename_path)
  all_lineages = TaxonomyDefinitionFile.new
  
  # taxon_oid       Domain  Status  Genome Name     Phylum  Class   Order   Family  Genus   Species Strain  Release Date    IMG Release
  # 650716001       Archaea Finished        Acidianus hospitalis W1 Crenarchaeota   Thermoprotei    Sulfolobales    Sulfolobaceae   Acidianus       hospitalis      W1      2011-12-01      IMG/W 3.5
  # 648028003       Archaea Finished        Acidilobus saccharovorans 345-15        Crenarchaeota   Thermoprotei    Acidilobales    Acidilobaceae   Acidilobus      saccharovorans  345-15  2011-01-01      IMG/W 3.3
  # 646564501       Archaea Finished        Aciduliprofundum boonei T469    Euryarchaeota   Thermoplasmata  Thermoplasmatales       Aciduloprofundaceae     Aciduliprofundum        boonei  T469    2010-08-01      IMG/
  #
  # Have to use a simple line#split because regular CSV class is narky and IMG metadata files aren't perfectly respetable
  headers = nil
  header_indices = {}
  File.open(img_taxonomy_filename_path).each_line do |line|
    row = line.chomp.split("\t")
    
    # If this is the first header row
    if headers==nil
      headers = row
      FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
        header_indices[header] = headers.index(header)
      end
      
    else
      lineage = Bio::IMG::Lineage.new
      lineage.definition_line = line.chomp
      
      # 0# 650716001
      # 1# Archaea
      # 2# Finished
      # 3# Acidianus hospitalis W1
      # 4# Crenarchaeota
      # 5# Thermoprotei
      # 6# Sulfolobales
      # 7# Sulfolobaceae
      # 8# Acidianus
      # 9# hospitalis
      # 10# W1
      # 11# 2011-12-01
      # 12# IMG/W 3.5
      FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
        value = row[header_indices[header]]
        value = value.to_i if attribute == :taxon_id
        lineage.send "#{attribute}=".to_sym, value
      end
      
      lineage.attributes = {}
      row.each_with_index do |col, i|
        lineage.attributes[headers[i]] = col
      end
      
      all_lineages.push lineage
    end
  end
  
  return all_lineages
end

Instance Method Details

#to_hashObject

Return a hash, indexed by taxon_oid



96
97
98
99
100
101
102
# File 'lib/bio-img_metadata.rb', line 96

def to_hash
  hash = {}
  each do |taxon|
    hash[taxon.taxon_id] = taxon
  end
  return hash
end