Class: Bio::KEGG::GENES

Inherits:
Bio::KEGGDB show all
Defined in:
lib/bio/db/kegg/genes.rb

Constant Summary collapse

DELIMITER =
RS = "\n///\n"
TAGSIZE =
12

Instance Method Summary collapse

Methods inherited from DB

#exists?, #fetch, #get, open, #tags

Constructor Details

#initialize(entry) ⇒ GENES

Returns a new instance of GENES.



83
84
85
# File 'lib/bio/db/kegg/genes.rb', line 83

def initialize(entry)
  super(entry, TAGSIZE)
end

Instance Method Details

#aalenObject



240
241
242
# File 'lib/bio/db/kegg/genes.rb', line 240

def aalen
  fetch('AASEQ')[/\d+/].to_i
end

#aaseqObject



233
234
235
236
237
238
# File 'lib/bio/db/kegg/genes.rb', line 233

def aaseq
  unless @data['AASEQ']
    @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
  end
  @data['AASEQ']
end

#chromosomeObject



158
159
160
161
162
163
164
165
166
# File 'lib/bio/db/kegg/genes.rb', line 158

def chromosome
  if position[/:/]
    position.sub(/:.*/, '')
  elsif ! position[/\.\./]
    position
  else
    nil
  end
end

#codon_usage(codon = nil) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/bio/db/kegg/genes.rb', line 206

def codon_usage(codon = nil)
  unless @data['CODON_USAGE']
    hash = Hash.new
    list = cu_list
    base = %w(t c a g)
    base.each_with_index do |x, i|
      base.each_with_index do |y, j|
        base.each_with_index do |z, k|
          hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
        end
      end
    end
    @data['CODON_USAGE'] = hash
  end
  @data['CODON_USAGE']
end

#cu_listObject



223
224
225
226
227
228
229
230
231
# File 'lib/bio/db/kegg/genes.rb', line 223

def cu_list
  ary = []
  get('CODON_USAGE').sub(/.*/,'').each_line do |line|	# cut 1st line
    line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
      ary.push(cu.to_i)
    end
  end
  return ary
end


194
195
196
197
198
199
200
201
202
203
204
# File 'lib/bio/db/kegg/genes.rb', line 194

def dblinks
  unless @data['DBLINKS']
    hash = {}
    get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
      id_array = str.strip.split(/\s+/)
      hash[db] = id_array
    end
    @data['DBLINKS'] = hash
  end
  @data['DBLINKS']		# Hash of Array of IDs in DBLINKS
end

#definitionObject



126
127
128
# File 'lib/bio/db/kegg/genes.rb', line 126

def definition
  field_fetch('DEFINITION')
end

#divisionObject



106
107
108
# File 'lib/bio/db/kegg/genes.rb', line 106

def division
  entry['division']			# CDS, tRNA etc.
end


130
131
132
133
134
135
136
137
# File 'lib/bio/db/kegg/genes.rb', line 130

def eclinks
  ec_list = definition.slice(/\[EC:(.*?)\]/, 1)
  if ec_list
    ec_list.strip.split(/\s+/)
  else
    []
  end
end

#entryObject



88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/bio/db/kegg/genes.rb', line 88

def entry
  unless @data['ENTRY']
    hash = Hash.new('')
    if get('ENTRY').length > 30
      e = get('ENTRY')
      hash['id']       = e[12..29].strip
      hash['division'] = e[30..39].strip
      hash['organism'] = e[40..80].strip
    end
    @data['ENTRY'] = hash
  end
  @data['ENTRY']
end

#entry_idObject



102
103
104
# File 'lib/bio/db/kegg/genes.rb', line 102

def entry_id
  entry['id']
end

#gbpositionObject



168
169
170
# File 'lib/bio/db/kegg/genes.rb', line 168

def gbposition
  position.sub(/.*?:/, '')
end

#geneObject



122
123
124
# File 'lib/bio/db/kegg/genes.rb', line 122

def gene
  genes.first
end

#genesObject



118
119
120
# File 'lib/bio/db/kegg/genes.rb', line 118

def genes
  name.split(', ')
end

#locationsObject



172
173
174
# File 'lib/bio/db/kegg/genes.rb', line 172

def locations
  Bio::Locations.new(gbposition)
end

#motifObject



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/bio/db/kegg/genes.rb', line 176

def motif
  unless @data['MOTIF']
    hash = {}
    db = nil
    lines_fetch('MOTIF').each do |line|
      if line[/^\S+:/]
        db, str = line.split(/:/)
      else
        str = line
      end
      hash[db] ||= []
      hash[db] += str.strip.split(/\s+/)
    end
    @data['MOTIF'] = hash
  end
  @data['MOTIF']		# Hash of Array of IDs in MOTIF
end

#nameObject



114
115
116
# File 'lib/bio/db/kegg/genes.rb', line 114

def name
  field_fetch('NAME')
end

#ntlenObject Also known as: nalen



252
253
254
# File 'lib/bio/db/kegg/genes.rb', line 252

def ntlen
  fetch('NTSEQ')[/\d+/].to_i
end

#ntseqObject Also known as: naseq



244
245
246
247
248
249
# File 'lib/bio/db/kegg/genes.rb', line 244

def ntseq
  unless @data['NTSEQ']
    @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
  end
  @data['NTSEQ']
end

#organismObject



110
111
112
# File 'lib/bio/db/kegg/genes.rb', line 110

def organism
  entry['organism']			# H.sapiens etc.
end

#orthologsObject



139
140
141
# File 'lib/bio/db/kegg/genes.rb', line 139

def orthologs
  lines_fetch('ORTHOLOGY')
end

#pathwayObject



143
144
145
# File 'lib/bio/db/kegg/genes.rb', line 143

def pathway
  field_fetch('PATHWAY')
end

#pathwaysObject



147
148
149
# File 'lib/bio/db/kegg/genes.rb', line 147

def pathways
  pathway.scan(/\[PATH:(.*?)\]/).flatten
end

#positionObject



151
152
153
154
155
156
# File 'lib/bio/db/kegg/genes.rb', line 151

def position
  unless @data['POSITION']
    @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
  end
  @data['POSITION']
end