Module: Bio::GFFbrowser::Digest::Parser
- Includes:
- Helpers, Helpers::Error, Helpers::Validate, Gff3Component, Gff3Features
- Defined in:
- lib/bio/db/gff/digest/gffparser.rb
Overview
Both in-memory and no-cache fully digest parsers share this Parser module.
Instance Method Summary collapse
-
#each_CDS ⇒ Object
Yield the id, recs, and containing component.
-
#each_CDS_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_exon ⇒ Object
Yield the id, recs, and containing component.
-
#each_exon_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_gene ⇒ Object
Yield the id, recs, containing component and sequence of genes.
-
#each_gene_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_mRNA ⇒ Object
Yield the id, recs, containing component and sequence of mRNAs.
-
#each_mRNA_seq ⇒ Object
Yield a unique description and the sequence.
- #read_fasta ⇒ Object
- #show_unrecognized_features ⇒ Object
-
#store_record(rec) ⇒ Object
Takes a parsed record
rec
and stores items in the relevant lists/tables.
Methods included from Helpers::Error
Methods included from Helpers::Validate
#validate_cdss, #validate_mrnas
Instance Method Details
#each_CDS ⇒ Object
Yield the id, recs, and containing component
89 90 91 92 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 89 def each_CDS parse if !@cdslist each_item(@cdslist) { |id, recs, component | yield id, recs, component } end |
#each_CDS_seq ⇒ Object
Yield a unique description and the sequence
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 131 def each_CDS_seq each_CDS do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence seq = assemble(sequence,component.start,reclist,@options.merge(:codonize=>true)) if seq.size % 3 != 0 p reclist # leave this in # raise "CDS size #{seq.size} is not a multiple of 3! <#{seq}>" warn "CDS size is not a multiple of 3",id end yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end |
#each_exon ⇒ Object
Yield the id, recs, and containing component
95 96 97 98 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 95 def each_exon parse if !@exonlist each_item(@exonlist) { |id, recs, component | yield id, recs, component } end |
#each_exon_seq ⇒ Object
Yield a unique description and the sequence
152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 152 def each_exon_seq each_exon do | id, reclist, component | if component sequence = @sequencelist[component.seqname] if sequence seq = assemble(sequence,component.start,reclist) yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end |
#each_gene ⇒ Object
Yield the id, recs, containing component and sequence of genes
77 78 79 80 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 77 def each_gene parse if !@orflist each_item(@orflist) { |id, recs, component | yield id, recs, component } end |
#each_gene_seq ⇒ Object
Yield a unique description and the sequence
101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 101 def each_gene_seq each_gene do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end |
#each_mRNA ⇒ Object
Yield the id, recs, containing component and sequence of mRNAs
83 84 85 86 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 83 def each_mRNA parse if !@mrnalist each_item(@mrnalist) { |id, recs, component | yield id, recs, component } end |
#each_mRNA_seq ⇒ Object
Yield a unique description and the sequence
116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 116 def each_mRNA_seq each_mRNA do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end |
#read_fasta ⇒ Object
63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 63 def read_fasta if @options[:fasta_filename] File.open(@options[:fasta_filename]) do | f | fasta = Bio::GFF::FastaReader.new(f) fasta.each do | id, fastarec | # p fastarec @sequencelist[id] = fastarec end end end # p :inmemory, @sequencelist end |
#show_unrecognized_features ⇒ Object
57 58 59 60 61 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 57 def show_unrecognized_features @unrecognized_features.keys.each do | k | warn "Feature has no match",k if k end end |
#store_record(rec) ⇒ Object
Takes a parsed record rec
and stores items in the relevant lists/tables
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 28 def store_record rec return if rec.comment # skip GFF comments id = Helpers::Record::formatID(rec) @count_ids.add(id) @count_seqnames.add(rec.seqname) is_component = COMPONENT_TYPES.include?(rec.feature_type) if is_component # check for container ID warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil @componentlist[id] = rec info "Added #{rec.feature_type} with component ID #{id}" end case rec.feature_type when 'gene' || 'SO:0000704' @orflist.add(id,rec) when 'mRNA' || 'SO:0000234' @mrnalist.add(id,rec) when 'CDS' || 'SO:0000316' @cdslist.add(id,rec) when 'exon' || 'SO:0000147' @exonlist.add(id,rec) else if !is_component and !IGNORE_FEATURES.include?(rec.feature_type) @unrecognized_features[rec.feature_type] = true end end end |