Class: Bio::KEGG::GENES

Inherits:
Bio::KEGGDB show all
Includes:
Common::DblinksAsHash, Common::DiseasesAsHash, Common::OrthologsAsHash, Common::PathwaysAsHash
Defined in:
lib/bio/db/kegg/genes.rb

Overview

Description

KEGG GENES entry parser.

References

Constant Summary collapse

DELIMITER =
RS = "\n///\n"
TAGSIZE =
12

Instance Method Summary collapse

Methods inherited from DB

#exists?, #fetch, #get, open, #tags

Constructor Details

#initialize(entry) ⇒ GENES

Creates a new Bio::KEGG::GENES object.


Arguments:

  • (required) entry: (String) single entry as a string

Returns

Bio::KEGG::GENES object



120
121
122
# File 'lib/bio/db/kegg/genes.rb', line 120

def initialize(entry)
  super(entry, TAGSIZE)
end

Instance Method Details

#aalenObject

Returns length of the amino acid sequence described in the AASEQ lines.


Returns

Integer



419
420
421
# File 'lib/bio/db/kegg/genes.rb', line 419

def aalen
  fetch('AASEQ')[/\d+/].to_i
end

#aaseqObject

Returns amino acid sequence described in the AASEQ lines.


Returns

Bio::Sequence::AA object



409
410
411
412
413
414
# File 'lib/bio/db/kegg/genes.rb', line 409

def aaseq
  unless @data['AASEQ']
    @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
  end
  @data['AASEQ']
end

#chromosomeObject

Chromosome described in the POSITION line.


Returns

String or nil



290
291
292
293
294
295
296
297
298
# File 'lib/bio/db/kegg/genes.rb', line 290

def chromosome
  if position[/:/]
    position.sub(/:.*/, '')
  elsif ! position[/\.\./]
    position
  else
    nil
  end
end

#codon_usage(codon = nil) ⇒ Object

Codon usage data described in the CODON_USAGE lines. (Deprecated: no more exists)


Returns

Hash



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/bio/db/kegg/genes.rb', line 376

def codon_usage(codon = nil)
  unless @data['CODON_USAGE']
    hash = Hash.new
    list = cu_list
    base = %w(t c a g)
    base.each_with_index do |x, i|
      base.each_with_index do |y, j|
        base.each_with_index do |z, k|
          hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
        end
      end
    end
    @data['CODON_USAGE'] = hash
  end
  @data['CODON_USAGE']
end

#cu_listObject

Codon usage data described in the CODON_USAGE lines as an array.


Returns

Array



396
397
398
399
400
401
402
403
404
# File 'lib/bio/db/kegg/genes.rb', line 396

def cu_list
  ary = []
  get('CODON_USAGE').sub(/.*/,'').each_line do |line|	# cut 1st line
    line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
      ary.push(cu.to_i)
    end
  end
  return ary
end

Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.



97
# File 'lib/bio/db/kegg/genes.rb', line 97

def dblinks_as_hash; super; end

Links to other databases described in the DBLINKS lines.


Returns

Array containing String objects



358
359
360
# File 'lib/bio/db/kegg/genes.rb', line 358

def dblinks_as_strings
  lines_fetch('DBLINKS')
end

#definitionObject

Definition of the entry, described in the DEFINITION line.


Returns

String



204
205
206
# File 'lib/bio/db/kegg/genes.rb', line 204

def definition
  field_fetch('DEFINITION')
end

#diseases_as_hashObject Also known as: diseases

Returns a Hash of the disease ID and its definition



112
# File 'lib/bio/db/kegg/genes.rb', line 112

def diseases_as_hash; super; end

#diseases_as_stringsObject

Diseases described in the DISEASE lines.


Returns

Array containing String



256
257
258
# File 'lib/bio/db/kegg/genes.rb', line 256

def diseases_as_strings
  lines_fetch('DISEASE')
end

#divisionObject

Division of the entry, described in the ENTRY line.


Returns

String



154
155
156
# File 'lib/bio/db/kegg/genes.rb', line 154

def division
  entry['division']			# CDS, tRNA etc.
end

#drug_targets_as_stringsObject

Drug targets described in the DRUG_TARGET lines.


Returns

Array containing String



263
264
265
# File 'lib/bio/db/kegg/genes.rb', line 263

def drug_targets_as_strings
  lines_fetch('DRUG_TARGET')
end

Enzyme’s EC numbers shown in the DEFINITION line.


Returns

Array containing String



211
212
213
214
215
216
217
218
219
220
# File 'lib/bio/db/kegg/genes.rb', line 211

def eclinks
  unless defined? @eclinks
    ec_list = 
      definition.slice(/\[EC\:([^\]]+)\]/, 1) ||
      definition.slice(/\(EC\:([^\)]+)\)/, 1)
    ary = ec_list ? ec_list.strip.split(/\s+/) : []
    @eclinks = ary
  end
  @eclinks
end

#entryObject

Returns the “ENTRY” line content as a Hash. For example,

{"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0356"}

Returns

Hash



130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/bio/db/kegg/genes.rb', line 130

def entry
  unless @data['ENTRY']
    hash = Hash.new('')
    if get('ENTRY').length > 30
      e = get('ENTRY')
      hash['id']       = e[12..29].strip
      hash['division'] = e[30..39].strip
      hash['organism'] = e[40..80].strip
    end
    @data['ENTRY'] = hash
  end
  @data['ENTRY']
end

#entry_idObject

ID of the entry, described in the ENTRY line.


Returns

String



147
148
149
# File 'lib/bio/db/kegg/genes.rb', line 147

def entry_id
  entry['id']
end

#gbpositionObject

The position in the genome described in the POSITION line as GenBank feature table location formatted string.


Returns

String



304
305
306
# File 'lib/bio/db/kegg/genes.rb', line 304

def gbposition
  position.sub(/.*?:/, '')
end

#geneObject

The method will be deprecated. Use entry.names.first instead.

Returns the first gene name described in the NAME line.


Returns

String



197
198
199
# File 'lib/bio/db/kegg/genes.rb', line 197

def gene
  genes.first
end

#genesObject

The method will be deprecated. Use Bio::KEGG::GENES#names.

Names of the entry as an Array, described in the NAME line.


Returns

Array containing String



187
188
189
# File 'lib/bio/db/kegg/genes.rb', line 187

def genes
  names_as_array
end

#keggclassObject

Returns CLASS field of the entry.



268
269
270
# File 'lib/bio/db/kegg/genes.rb', line 268

def keggclass
  field_fetch('CLASS')
end

#keggclassesObject

Returns an Array of biological classes in CLASS field.



273
274
275
# File 'lib/bio/db/kegg/genes.rb', line 273

def keggclasses
  keggclass.gsub(/ \[[^\]]+/, '').split(/\] ?/)
end

#locationsObject

The position in the genome described in the POSITION line as Bio::Locations object.


Returns

Bio::Locations object



312
313
314
# File 'lib/bio/db/kegg/genes.rb', line 312

def locations
  Bio::Locations.new(gbposition)
end

#motifObject

The specification of the method will be changed in the future. Please use Bio::KEGG::GENES#motifs.

Motif information described in the MOTIF lines.


Returns

Hash



351
352
353
# File 'lib/bio/db/kegg/genes.rb', line 351

def motif
  motifs
end

#motifs_as_hashObject Also known as: motifs

Motif information described in the MOTIF lines.


Returns

Hash



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/bio/db/kegg/genes.rb', line 326

def motifs_as_hash
  unless @data['MOTIF']
    hash = {}
    db = nil
    motifs_as_strings.each do |line|
      if line[/^\S+:/]
        db, str = line.split(/:/, 2)
      else
        str = line
      end
      hash[db] ||= []
      hash[db] += str.strip.split(/\s+/)
    end
    @data['MOTIF'] = hash
  end
  @data['MOTIF']		# Hash of Array of IDs in MOTIF
end

#motifs_as_stringsObject

Motif information described in the MOTIF lines.


Returns

Strings



319
320
321
# File 'lib/bio/db/kegg/genes.rb', line 319

def motifs_as_strings
  lines_fetch('MOTIF')
end

#nameObject

Returns the NAME line.


Returns

String



168
169
170
# File 'lib/bio/db/kegg/genes.rb', line 168

def name
  field_fetch('NAME')
end

#names_as_arrayObject Also known as: names

Names of the entry as an Array, described in the NAME line.


Returns

Array containing String



176
177
178
# File 'lib/bio/db/kegg/genes.rb', line 176

def names_as_array
  name.split(', ')
end

#networks_as_stringsObject

Networks described in the NETWORK lines.


Returns

Array containing String



249
250
251
# File 'lib/bio/db/kegg/genes.rb', line 249

def networks_as_strings
  lines_fetch('NETWORK')
end

#ntlenObject Also known as: nalen

Returns nucleic acid sequence length.


Returns

Integer



437
438
439
# File 'lib/bio/db/kegg/genes.rb', line 437

def ntlen
  fetch('NTSEQ')[/\d+/].to_i
end

#ntseqObject Also known as: naseq

Returns nucleic acid sequence described in the NTSEQ lines.


Returns

Bio::Sequence::NA object



426
427
428
429
430
431
# File 'lib/bio/db/kegg/genes.rb', line 426

def ntseq
  unless @data['NTSEQ']
    @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
  end
  @data['NTSEQ']
end

#organismObject

Organism name of the entry, described in the ENTRY line.


Returns

String



161
162
163
# File 'lib/bio/db/kegg/genes.rb', line 161

def organism
  entry['organism']			# H.sapiens etc.
end

#orthologs_as_hashObject Also known as: orthologs

Returns a Hash of the orthology ID and definition in ORTHOLOGY field.



107
# File 'lib/bio/db/kegg/genes.rb', line 107

def orthologs_as_hash; super; end

#orthologs_as_stringsObject

Orthologs described in the ORTHOLOGY lines.


Returns

Array containing String



225
226
227
# File 'lib/bio/db/kegg/genes.rb', line 225

def orthologs_as_strings
  lines_fetch('ORTHOLOGY')
end

#pathwayObject

Returns the PATHWAY lines as a String.


Returns

String



232
233
234
235
236
237
# File 'lib/bio/db/kegg/genes.rb', line 232

def pathway
  unless defined? @pathway
    @pathway = fetch('PATHWAY')
  end
  @pathway
end

#pathways_as_hashObject Also known as: pathways

Returns a Hash of the pathway ID and name in PATHWAY field.



102
# File 'lib/bio/db/kegg/genes.rb', line 102

def pathways_as_hash; super; end

#pathways_as_stringsObject

Pathways described in the PATHWAY lines.


Returns

Array containing String



242
243
244
# File 'lib/bio/db/kegg/genes.rb', line 242

def pathways_as_strings
  lines_fetch('PATHWAY')
end

#positionObject

The position in the genome described in the POSITION line.


Returns

String



280
281
282
283
284
285
# File 'lib/bio/db/kegg/genes.rb', line 280

def position
  unless @data['POSITION']
    @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
  end
  @data['POSITION']
end

#structureObject Also known as: structures

Returns structure ID information described in the STRUCTURE lines.


Returns

Array containing String



365
366
367
368
369
370
# File 'lib/bio/db/kegg/genes.rb', line 365

def structure
  unless @data['STRUCTURE']
    @data['STRUCTURE'] = fetch('STRUCTURE').sub(/(PDB: )*/,'').split(/\s+/)
  end
  @data['STRUCTURE'] # ['PDB:1A9X', ...]
end