Class: Bio::GFFbrowser::Digest::NoCache

Inherits:
Object
  • Object
show all
Includes:
NoCacheHelpers, Parser, Gff3Sequence
Defined in:
lib/bio/db/gff/digest/gffnocache.rb

Instance Method Summary collapse

Methods included from Parser

#each_CDS, #each_CDS_seq, #each_exon, #each_exon_seq, #each_gene, #each_gene_seq, #each_mRNA, #each_mRNA_seq, #read_fasta, #show_unrecognized_features, #store_record

Methods included from Helpers::Logger

#debug, #error, #info, #log_sys_info, #warn

Methods included from Helpers::Validate

#validate_cdss, #validate_mrnas

Constructor Details

#initialize(filename, options) ⇒ NoCache

Returns a new instance of NoCache.



88
89
90
91
92
# File 'lib/bio/db/gff/digest/gffnocache.rb', line 88

def initialize filename, options
  @filename = filename
  @options = options
  @iter = Bio::GFF::GFF3::FileIterator.new(@filename)
end

Instance Method Details

#each_item(list) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/bio/db/gff/digest/gffnocache.rb', line 128

def each_item list
  # p list.class
  fh = @iter.fh
  list.each do | id, io_seeklist |
    recs = []
    io_seeklist.each do | fpos |
      recs << SeekRec::fetch(fh,fpos,@options[:parser])
    end
    seqid = recs[0].seqname
    component = find_component(recs[0])
    if @options[:no_assemble]
      recs.each do | rec |
        yield id, [rec], component
      end
    else
      yield id, recs, component
    end
  end
end

#parseObject

parse the whole file once and store all seek locations, rather than the records themselves



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/bio/db/gff/digest/gffnocache.rb', line 96

def parse
  info "---- Digest DB and store data in mRNA Hash (NoCache)"
  @count_ids          = Counter.new   # Count ids
  @count_seqnames     = Counter.new   # Count seqnames
  @componentlist      = SeekRecList.new(@iter.fh,@options[:parser]) # Store containers, like genes, contigs
  @orflist            = SeekLinkedRecs.new   # Store linked gene records
  @mrnalist           = SeekLinkedRecs.new   # Store linked mRNA records
  @cdslist            = SeekLinkedRecs.new
  @exonlist           = SeekLinkedRecs.new
  @sequencelist       = {}
  @unrecognized_features = {}
  @iter.each_rec do |fpos, line|
    rec = case @options[:parser]
      when :bioruby
        Bio::GFF::GFF3::BioRubyFileRecord.new(fpos, line)
      when :line
        Bio::GFF::GFF3::FastParserFileRecord.new(fpos, line)
      else
        raise 'Unknown parser'
    end
    store_record(rec)
  end
  @iter.each_sequence do | id, bioseq |
    @sequencelist[id] = bioseq.to_s
  end
  validate_mrnas 
  validate_cdss
  show_unrecognized_features
  @genelist      = @count_ids.keys 
  read_fasta
end