Module: Bio::GFFbrowser::Digest::Parser

Includes:
Helpers, Helpers::Logger, Helpers::Validate, Gff3Component, Gff3Features
Included in:
InMemory, LruCache, NoCache
Defined in:
lib/bio/db/gff/digest/gffparser.rb

Overview

Both in-memory and no-cache fully digest parsers share this Parser module.

Instance Method Summary collapse

Methods included from Helpers::Logger

#debug, #error, #info, #log_sys_info, #warn

Methods included from Helpers::Validate

#validate_cdss, #validate_mrnas

Instance Method Details

#each_CDSObject

Yield the id, recs, and containing component



90
91
92
93
# File 'lib/bio/db/gff/digest/gffparser.rb', line 90

def each_CDS
  parse if !@cdslist
  each_item(@cdslist) { |id, recs, component | yield id, recs, component }
end

#each_CDS_seqObject

Yield a unique description and the sequence



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/bio/db/gff/digest/gffparser.rb', line 132

def each_CDS_seq
  each_CDS do | id, reclist, component |
    if component
      # p id,reclist,component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        seq = assemble(sequence,component.start,reclist,@options.merge(:codonize=>true))
        if seq.size % 3 != 0
          p reclist # leave this in
          # raise "CDS size #{seq.size} is not a multiple of 3! <#{seq}>"
          warn "CDS size is not a multiple of 3",id
        end
        yield description(id,component,reclist), seq
      else 
        warn "No sequence information for",id
      end
    end
  end
end

#each_exonObject

Yield the id, recs, and containing component



96
97
98
99
# File 'lib/bio/db/gff/digest/gffparser.rb', line 96

def each_exon
  parse if !@exonlist
  each_item(@exonlist) { |id, recs, component | yield id, recs, component }
end

#each_exon_seqObject

Yield a unique description and the sequence



154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/bio/db/gff/digest/gffparser.rb', line 154

def each_exon_seq
  each_exon do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      if sequence
        seq = assemble(sequence,component.start,reclist)
        yield description(id,component,reclist), seq
      else 
        warn "No sequence information for",id
      end
    end
  end
end

#each_geneObject

Yield the id, recs, containing component and sequence of genes



78
79
80
81
# File 'lib/bio/db/gff/digest/gffparser.rb', line 78

def each_gene
  parse if !@orflist
  each_item(@orflist) { |id, recs, component | yield id, recs, component }
end

#each_gene_seqObject

Yield a unique description and the sequence



102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/bio/db/gff/digest/gffparser.rb', line 102

def each_gene_seq
  each_gene do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        yield description(id,component,reclist), assemble(sequence,component.start,reclist)
      else 
        warn "No sequence information for",id
      end
    end
  end
end

#each_mRNAObject

Yield the id, recs, containing component and sequence of mRNAs



84
85
86
87
# File 'lib/bio/db/gff/digest/gffparser.rb', line 84

def each_mRNA
  parse if !@mrnalist
  each_item(@mrnalist) { |id, recs, component | yield id, recs, component }
end

#each_mRNA_seqObject

Yield a unique description and the sequence



117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/bio/db/gff/digest/gffparser.rb', line 117

def each_mRNA_seq
  each_mRNA do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        yield description(id,component,reclist), assemble(sequence,component.start,reclist)
      else 
        warn "No sequence information for",id
      end
    end
  end
end

#read_fastaObject



64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/bio/db/gff/digest/gffparser.rb', line 64

def read_fasta
  if @options[:fasta_filename]
    File.open(@options[:fasta_filename]) do | f |
      fasta = Bio::GFF::FastaReader.new(f)
      fasta.each do | id, fastarec |
        # p fastarec
        @sequencelist[id] = fastarec
      end
    end
  end
  # p :inmemory, @sequencelist
end

#show_unrecognized_featuresObject



58
59
60
61
62
# File 'lib/bio/db/gff/digest/gffparser.rb', line 58

def show_unrecognized_features 
  @unrecognized_features.keys.each do | k |
    warn "Unknown feature is ignored",k if k
  end
end

#store_record(rec) ⇒ Object

Takes a parsed record rec and stores items in the relevant lists/tables



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/bio/db/gff/digest/gffparser.rb', line 28

def store_record rec
  return if rec.comment # skip GFF comments
  id = Helpers::Record::formatID(rec)
  @count_ids.add(id)
  @count_seqnames.add(rec.seqname)

  is_component = COMPONENT_TYPES.include?(rec.feature_type.upcase)
  if is_component
    # check for container ID
    warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil
    @componentlist[id] = rec
    info "Added feature <#{rec.feature_type}> with component ID",id
    # $stderr.print rec
  end 
  case rec.feature_type
    when 'gene' || 'SO:0000704'
      @orflist.add(id,rec)
    when 'mRNA' || 'SO:0000234'
      @mrnalist.add(id,rec)
    when 'CDS'  || 'SO:0000316'
      @cdslist.add(id,rec)
    when 'exon' || 'SO:0000147'
      @exonlist.add(id,rec)
    else
      if !is_component and !IGNORE_FEATURES.include?(rec.feature_type)
        @unrecognized_features[rec.feature_type] = true
      end
  end
end