Class: Bio::GFFbrowser::Digest::InMemory

Inherits:
Object
  • Object
show all
Includes:
Parser, Gff3Sequence
Defined in:
lib/bio/db/gff/digest/gffinmemory.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Parser

#each_CDS, #each_CDS_seq, #each_exon, #each_exon_seq, #each_gene, #each_gene_seq, #each_mRNA, #each_mRNA_seq, #read_fasta, #show_unrecognized_features, #store_record

Methods included from Helpers::Logger

#debug, #error, #info, #log_sys_info, #warn

Methods included from Helpers::Validate

#validate_cdss, #validate_mrnas

Constructor Details

#initialize(filename, options) ⇒ InMemory

Returns a new instance of InMemory.



22
23
24
25
26
27
28
29
30
31
# File 'lib/bio/db/gff/digest/gffinmemory.rb', line 22

def initialize filename, options
  @options = options
  # Invoke the BioRuby in memory parser
  @gff = case @options[:parser]
    when :bioruby then
      Bio::GFF::GFF3.new(File.read(filename))
    else # line parser
      Bio::GFFbrowser::GFF3ParseFile.new(filename)
  end
end

Instance Attribute Details

#sequencelistObject (readonly)

Returns the value of attribute sequencelist.



20
21
22
# File 'lib/bio/db/gff/digest/gffinmemory.rb', line 20

def sequencelist
  @sequencelist
end

Instance Method Details

#each_item(list) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/bio/db/gff/digest/gffinmemory.rb', line 62

def each_item list
  list.each do | id, recs |
    seqid = recs[0].seqname
    component = find_component(recs[0])
    if @options[:no_assemble]
      recs.each do | rec |
        yield id, [rec], component
      end
    else
      yield id, recs, component
    end
  end
end

#parseObject

Digest mRNA from the GFFdb and store in Hash Next yield(id, seq) from Hash



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bio/db/gff/digest/gffinmemory.rb', line 35

def parse 
  info "---- Digest DB and store data in mRNA Hash"
  @count_ids          = Counter.new   # Count ids
  @count_seqnames     = Counter.new   # Count seqnames
  @componentlist      = {} # Store containers, like genes, contigs
  @orflist            = LinkedRecs.new
  @mrnalist           = LinkedRecs.new   # Store linked mRNA records
  @cdslist            = LinkedRecs.new
  @exonlist           = LinkedRecs.new
  @sequencelist       = {}
  @unrecognized_features = {}
  @gff.records.each do | rec |
    store_record(rec)
  end
  @gff.sequences.each do | bioseq |
    id = bioseq.entry_id
    @sequencelist[id] = bioseq.to_s # in Bio::Sequence with contained Bio::FastaFormat
  end
  validate_mrnas
  validate_cdss 
  show_unrecognized_features 
  @genelist      = @count_ids.keys 
  log_sys_info("After reading GFF")
  read_fasta
  log_sys_info("After reading FASTA")
end