Class: Bio::KEGG::GENES

Inherits:
Bio::KEGGDB show all
Includes:
Common::DblinksAsHash, Common::OrthologsAsHash, Common::PathwaysAsHash
Defined in:
lib/bio/db/kegg/genes.rb

Overview

Description

KEGG GENES entry parser.

References

Constant Summary collapse

DELIMITER =
RS = "\n///\n"
TAGSIZE =
12

Instance Method Summary collapse

Methods inherited from DB

#exists?, #fetch, #get, open, #tags

Constructor Details

#initialize(entry) ⇒ GENES

Creates a new Bio::KEGG::GENES object.


Arguments:

  • (required) entry: (String) single entry as a string

Returns

Bio::KEGG::GENES object



115
116
117
# File 'lib/bio/db/kegg/genes.rb', line 115

def initialize(entry)
  super(entry, TAGSIZE)
end

Instance Method Details

#aalenObject

Returns length of the amino acid sequence described in the AASEQ lines.


Returns

Integer



393
394
395
# File 'lib/bio/db/kegg/genes.rb', line 393

def aalen
  fetch('AASEQ')[/\d+/].to_i
end

#aaseqObject

Returns amino acid sequence described in the AASEQ lines.


Returns

Bio::Sequence::AA object



383
384
385
386
387
388
# File 'lib/bio/db/kegg/genes.rb', line 383

def aaseq
  unless @data['AASEQ']
    @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
  end
  @data['AASEQ']
end

#chromosomeObject

Chromosome described in the POSITION line.


Returns

String or nil



264
265
266
267
268
269
270
271
272
# File 'lib/bio/db/kegg/genes.rb', line 264

def chromosome
  if position[/:/]
    position.sub(/:.*/, '')
  elsif ! position[/\.\./]
    position
  else
    nil
  end
end

#codon_usage(codon = nil) ⇒ Object

Codon usage data described in the CODON_USAGE lines. (Deprecated: no more exists)


Returns

Hash



350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# File 'lib/bio/db/kegg/genes.rb', line 350

def codon_usage(codon = nil)
  unless @data['CODON_USAGE']
    hash = Hash.new
    list = cu_list
    base = %w(t c a g)
    base.each_with_index do |x, i|
      base.each_with_index do |y, j|
        base.each_with_index do |z, k|
          hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
        end
      end
    end
    @data['CODON_USAGE'] = hash
  end
  @data['CODON_USAGE']
end

#cu_listObject

Codon usage data described in the CODON_USAGE lines as an array.


Returns

Array



370
371
372
373
374
375
376
377
378
# File 'lib/bio/db/kegg/genes.rb', line 370

def cu_list
  ary = []
  get('CODON_USAGE').sub(/.*/,'').each_line do |line|	# cut 1st line
    line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
      ary.push(cu.to_i)
    end
  end
  return ary
end

Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.



97
# File 'lib/bio/db/kegg/genes.rb', line 97

def dblinks_as_hash; super; end

Links to other databases described in the DBLINKS lines.


Returns

Array containing String objects



332
333
334
# File 'lib/bio/db/kegg/genes.rb', line 332

def dblinks_as_strings
  lines_fetch('DBLINKS')
end

#definitionObject

Definition of the entry, described in the DEFINITION line.


Returns

String



199
200
201
# File 'lib/bio/db/kegg/genes.rb', line 199

def definition
  field_fetch('DEFINITION')
end

#divisionObject

Division of the entry, described in the ENTRY line.


Returns

String



149
150
151
# File 'lib/bio/db/kegg/genes.rb', line 149

def division
  entry['division']			# CDS, tRNA etc.
end

Enzyme’s EC numbers shown in the DEFINITION line.


Returns

Array containing String



206
207
208
209
210
211
212
213
214
215
# File 'lib/bio/db/kegg/genes.rb', line 206

def eclinks
  unless defined? @eclinks
    ec_list = 
      definition.slice(/\[EC\:([^\]]+)\]/, 1) ||
      definition.slice(/\(EC\:([^\)]+)\)/, 1)
    ary = ec_list ? ec_list.strip.split(/\s+/) : []
    @eclinks = ary
  end
  @eclinks
end

#entryObject

Returns the “ENTRY” line content as a Hash. For example,

{"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0356"}

Returns

Hash



125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/bio/db/kegg/genes.rb', line 125

def entry
  unless @data['ENTRY']
    hash = Hash.new('')
    if get('ENTRY').length > 30
      e = get('ENTRY')
      hash['id']       = e[12..29].strip
      hash['division'] = e[30..39].strip
      hash['organism'] = e[40..80].strip
    end
    @data['ENTRY'] = hash
  end
  @data['ENTRY']
end

#entry_idObject

ID of the entry, described in the ENTRY line.


Returns

String



142
143
144
# File 'lib/bio/db/kegg/genes.rb', line 142

def entry_id
  entry['id']
end

#gbpositionObject

The position in the genome described in the POSITION line as GenBank feature table location formatted string.


Returns

String



278
279
280
# File 'lib/bio/db/kegg/genes.rb', line 278

def gbposition
  position.sub(/.*?:/, '')
end

#geneObject

The method will be deprecated. Use entry.names.first instead.

Returns the first gene name described in the NAME line.


Returns

String



192
193
194
# File 'lib/bio/db/kegg/genes.rb', line 192

def gene
  genes.first
end

#genesObject

The method will be deprecated. Use Bio::KEGG::GENES#names.

Names of the entry as an Array, described in the NAME line.


Returns

Array containing String



182
183
184
# File 'lib/bio/db/kegg/genes.rb', line 182

def genes
  names_as_array
end

#keggclassObject

Returns CLASS field of the entry.



242
243
244
# File 'lib/bio/db/kegg/genes.rb', line 242

def keggclass
  field_fetch('CLASS')
end

#keggclassesObject

Returns an Array of biological classes in CLASS field.



247
248
249
# File 'lib/bio/db/kegg/genes.rb', line 247

def keggclasses
  keggclass.gsub(/ \[[^\]]+/, '').split(/\] ?/)
end

#locationsObject

The position in the genome described in the POSITION line as Bio::Locations object.


Returns

Bio::Locations object



286
287
288
# File 'lib/bio/db/kegg/genes.rb', line 286

def locations
  Bio::Locations.new(gbposition)
end

#motifObject

The specification of the method will be changed in the future. Please use Bio::KEGG::GENES#motifs.

Motif information described in the MOTIF lines.


Returns

Hash



325
326
327
# File 'lib/bio/db/kegg/genes.rb', line 325

def motif
  motifs
end

#motifs_as_hashObject Also known as: motifs

Motif information described in the MOTIF lines.


Returns

Hash



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'lib/bio/db/kegg/genes.rb', line 300

def motifs_as_hash
  unless @data['MOTIF']
    hash = {}
    db = nil
    motifs_as_strings.each do |line|
      if line[/^\S+:/]
        db, str = line.split(/:/, 2)
      else
        str = line
      end
      hash[db] ||= []
      hash[db] += str.strip.split(/\s+/)
    end
    @data['MOTIF'] = hash
  end
  @data['MOTIF']		# Hash of Array of IDs in MOTIF
end

#motifs_as_stringsObject

Motif information described in the MOTIF lines.


Returns

Strings



293
294
295
# File 'lib/bio/db/kegg/genes.rb', line 293

def motifs_as_strings
  lines_fetch('MOTIF')
end

#nameObject

Returns the NAME line.


Returns

String



163
164
165
# File 'lib/bio/db/kegg/genes.rb', line 163

def name
  field_fetch('NAME')
end

#names_as_arrayObject Also known as: names

Names of the entry as an Array, described in the NAME line.


Returns

Array containing String



171
172
173
# File 'lib/bio/db/kegg/genes.rb', line 171

def names_as_array
  name.split(', ')
end

#ntlenObject Also known as: nalen

Returns nucleic acid sequence length.


Returns

Integer



411
412
413
# File 'lib/bio/db/kegg/genes.rb', line 411

def ntlen
  fetch('NTSEQ')[/\d+/].to_i
end

#ntseqObject Also known as: naseq

Returns nucleic acid sequence described in the NTSEQ lines.


Returns

Bio::Sequence::NA object



400
401
402
403
404
405
# File 'lib/bio/db/kegg/genes.rb', line 400

def ntseq
  unless @data['NTSEQ']
    @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
  end
  @data['NTSEQ']
end

#organismObject

Organism name of the entry, described in the ENTRY line.


Returns

String



156
157
158
# File 'lib/bio/db/kegg/genes.rb', line 156

def organism
  entry['organism']			# H.sapiens etc.
end

#orthologs_as_hashObject Also known as: orthologs

Returns a Hash of the orthology ID and definition in ORTHOLOGY field.



107
# File 'lib/bio/db/kegg/genes.rb', line 107

def orthologs_as_hash; super; end

#orthologs_as_stringsObject

Orthologs described in the ORTHOLOGY lines.


Returns

Array containing String



220
221
222
# File 'lib/bio/db/kegg/genes.rb', line 220

def orthologs_as_strings
  lines_fetch('ORTHOLOGY')
end

#pathwayObject

Returns the PATHWAY lines as a String.


Returns

String



227
228
229
230
231
232
# File 'lib/bio/db/kegg/genes.rb', line 227

def pathway
  unless defined? @pathway
    @pathway = fetch('PATHWAY')
  end
  @pathway
end

#pathways_as_hashObject Also known as: pathways

Returns a Hash of the pathway ID and name in PATHWAY field.



102
# File 'lib/bio/db/kegg/genes.rb', line 102

def pathways_as_hash; super; end

#pathways_as_stringsObject

Pathways described in the PATHWAY lines.


Returns

Array containing String



237
238
239
# File 'lib/bio/db/kegg/genes.rb', line 237

def pathways_as_strings
  lines_fetch('PATHWAY')
end

#positionObject

The position in the genome described in the POSITION line.


Returns

String



254
255
256
257
258
259
# File 'lib/bio/db/kegg/genes.rb', line 254

def position
  unless @data['POSITION']
    @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
  end
  @data['POSITION']
end

#structureObject Also known as: structures

Returns structure ID information described in the STRUCTURE lines.


Returns

Array containing String



339
340
341
342
343
344
# File 'lib/bio/db/kegg/genes.rb', line 339

def structure
  unless @data['STRUCTURE']
    @data['STRUCTURE'] = fetch('STRUCTURE').sub(/(PDB: )*/,'').split(/\s+/)
  end
  @data['STRUCTURE'] # ['PDB:1A9X', ...]
end