Module: Bio::GFFbrowser::Helpers::Gff3Sequence

Includes:
Logger
Defined in:
lib/bio/db/gff/gffsequence.rb

Instance Method Summary collapse

Methods included from Logger

#debug, #error, #info, #log_sys_info, #warn

Instance Method Details

#assemble(sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true, :fix=>false, :debug=>false }) ⇒ Object

Patch a sequence together from a Sequence string and an array of records. Note that rec positions are 1-based coordinates, relative to the landmark given in column 1 - in this case the sequence as it is passed in. The following options are available:

:reverse      : do reverse if reverse is indicated (default true)
:complement   : do complement if reverse is indicated (default true)
:phase        : do set CDS phase (default false, normally ignore)
:trim         : make sure sequence is multiple of 3 nucleotide bps (default true)

special options:

:raw          : raw sequence (all above false)
:codonize     : codon sequence (reverse, complement, and trim are true)
:fix          : fix errors (default false)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/bio/db/gff/gffsequence.rb', line 37

def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true, :fix=>false, :debug=>false }
  # default to nil, if not passed in
  do_debug = options[:debug]
  do_phase = options[:phase]
  do_fix   = options[:fix]
  # default to true, if not passed in
  do_reverse = (options[:reverse] == false ? false : true)
  do_trim    = (options[:trim] == false ? false : true)
  do_complement = (options[:complement] == false ? false : true)
  if options[:raw]
    do_phase = false
    do_reverse = false
    do_trim = false
    do_complement = false
  elsif options[:codonize]
    do_phase = false
    do_reverse = true
    do_trim = true
    do_complement = true
  end
  sectionlist = Sections::sort(reclist)
  rec0 = sectionlist.first.rec
  # we assume ORF is always read in the same direction
  orf_reverse = (rec0.strand == '-')
  orf_frame = startpos - 1
  orf_frameshift = orf_frame % 3
  sectionlist = sectionlist.reverse if orf_reverse
  if do_debug
    debug options.to_s
    debug [:reverse,do_reverse].to_s
    debug [:complement,do_complement].to_s
    debug [:trim,do_trim].to_s
    debug [:orf_reverse, orf_reverse, rec0.strand].to_s
  end

  if sequence.kind_of?(Bio::FastaFormat)
    # BioRuby conversion
    sequence = sequence.seq
  end
  # Generate array of sequences
  seq = sectionlist.map { | section |
    rec = section.rec
    s = sequence[(section.begin-1)..(section.end-1)]
    if do_reverse and orf_reverse
      s = s.reverse 
    end
    if do_phase and rec.phase
      phase = rec.phase.to_i
      s = s[phase..-1]
    end
    s
  }
  seq = seq.join
  if do_complement and do_reverse and orf_reverse
    ntseq = Bio::Sequence::NA.new(seq)
    seq = ntseq.forward_complement.upcase
  end
  # This is the place to fix sequences (e.g. the Wormbase bug)
  if do_fix or @options[:fix] or @options[:fix_wormbase]
    if @options[:fix_wormbase] and rec0.id.index('gene1')==0
      # Wormbase gene1 only, so ignore rest
    else
      test_frame = 0
      ntseq = Bio::Sequence::NA.new(seq)
      aaseq = ntseq.translate
      if aaseq.count('*') > 1
        test_frame = 1
        seq = seq[1..-1] 
        ntseq = Bio::Sequence::NA.new(seq)
        aaseq = ntseq.translate
        if aaseq.count('*') > 1
          test_frame = 2
          seq = seq[1..-1]
          ntseq = Bio::Sequence::NA.new(seq)
          aaseq = ntseq.translate
          raise 'Validation problem '+rec0.id if aaseq.count('*') > 1
        end
      end
      if test_frame > 0
        warn rec0.id,"Frame adjusted to #{test_frame} (fix)"
      end
    end
  end
  if do_trim
    reduce = seq.size % 3
    seq = seq[0..(seq.size-1-reduce)] if reduce != 0
  end
  if @options[:validate]
    ntseq = Bio::Sequence::NA.new(seq)
    aaseq = ntseq.translate
    raise "Validate translation problem #{rec0.id}\n#{seq}" if aaseq.count('*') > 1
  end

  retval = seq
  retval
end

#assembleAA(sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true }) ⇒ Object

Patch a sequence together from a Sequence string and an array of records and translate in the correct direction and frame. The options are the same as for assemble, except :trim defaults to true.



137
138
139
140
141
# File 'lib/bio/db/gff/gffsequence.rb', line 137

def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>true, :complement=>true }
  seq = assemble(sequence, startpos, reclist, options)
  ntseq = Bio::Sequence::NA.new(seq)
  ntseq.translate
end

#description(id, component, rec) ⇒ Object

Create a description for output



144
145
146
147
148
# File 'lib/bio/db/gff/gffsequence.rb', line 144

def description id, component, rec
  sections = Sections::sort(rec)
  id+' Sequence:'+component.seqname+"_#{component.start}:#{component.end} ("+
   sections.map { |s| "#{s.first}:#{s.last}" }.join(', ')  +")"
end