Module: Bio::Ngs::Cufflinks::GtfParser

Included in:
Gtf
Defined in:
lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb

Overview

TODO use a specific class for each block (transcript)

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#lazyObject

Returns the value of attribute lazy.



11
12
13
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 11

def lazy
  @lazy
end

Instance Method Details

#annotated_isoformsObject



98
99
100
101
102
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 98

def annotated_isoforms
  select do |transcript|
    transcript.annotated_isoform?
  end
end

#brand_new_isoformsObject



86
87
88
89
90
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 86

def brand_new_isoforms
  select do |transcript|
    transcript.brand_new_isoform?
  end
end

#build_idxObject

count



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 156

def build_idx
  idx = Hash.new {|h,k| h[k]=[]}
  idx[:transcripts]
  idx[:names]={}
  idx[:exons]
  each_transcript do |t, f_lno|
    # t_idx=(f_lno-t.exons.size-2)
    idx[:transcripts] << t.byte_length
    idx[:names][t.attributes[:transcript_id]] = idx[:transcripts].length
    # eidx_b = t_idx +1
    # t.exons.each_index do |ei|
    #   idx[t_idx] << eidx_b + ei
    #   idx[:exons] << eidx_b + ei
    # end
  end
  @idx = idx
end

#countObject

save



148
149
150
151
152
153
154
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 148

def count
  size = 0
  each_transcript do
    size+=1
  end
  size
end

#coverage_gt(size) ⇒ Object



104
105
106
107
108
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 104

def coverage_gt(size)
  select do |transcript|
    transcript.attributes[:cov] > size
  end
end

#dump_idx(fn = nil) ⇒ Object

build_idx



174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 174

def dump_idx(fn=nil)
  fn||="#{source.path}.idx"

  build_idx unless defined?(@idx)
  @idx[:default_hash] = @idx.default
  @idx.default = nil
  File.open(fn, "w+") do |f|
    Marshal.dump(@idx, f)
  end
  @idx.default = @idx[:default_hash]
  fn
end

#each_transcript(&block) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 13

def each_transcript(&block)
  if @blocks.nil? || @blocks.empty?
    transcript = Transcript.new
    @fh.rewind
    transcript.tra = @fh.readline
    @fh.each_line do |line|
      if line =~ /\ttranscript\t/
        block.call(transcript, @fh.lineno)
        transcript.clear
        transcript.tra = line
      else line =~ /\texon\t/
        transcript.exons << line
      end
    end
  else #lazy
    not_lazy
    blocks_to_run = @blocks
    @blocks=[]
    result=select do |transcript|
      bool_blocks = blocks_to_run.map do |b|
        b.call(transcript)
      end
      !(bool_blocks.include?(nil) || bool_blocks.include?(false))
    end
    set_lazy
    result.send(:each_transcript, &block)
  end #lazy or not?
end

#get_transcript(n = 1) ⇒ Object Also known as: []



222
223
224
225
226
227
228
229
230
231
232
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 222

def get_transcript(n=1)
  x=nil
  if r=read_transcript(n)
    s=r.split("\n").first
    e=r.split("\n")[1..-1]
    x=Bio::Ngs::Cufflinks::Transcript.new
    x.tra= s+"\n"
    x.exons=e.map{|ei| ei+"\n"}
  end
  x
end

#indexObject

load_idx



197
198
199
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 197

def index
  @idx
end

#is_lazy?Boolean

Returns:

  • (Boolean)


130
131
132
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 130

def is_lazy?
  @lazy
end

#length_gt(length) ⇒ Object



79
80
81
82
83
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 79

def length_gt(length)
  select do |transcript|
    transcript.size > length
  end
end

#load_idxObject

dump_idx



187
188
189
190
191
192
193
194
195
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 187

def load_idx
  if File.exists?("#{source.path}.idx")
    @idx = Marshal.load(File.open("#{source.path}.idx"))
    @idx.default = @idx[:default_hash]
  else
    build_idx
    dump_idx
  end
end

#mono_exonObject



72
73
74
75
76
77
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 72

def mono_exon
  # mark
  select do |transcript|
    transcript.mono_exon? #transcript line and exon line
  end
end

#multi_exon_with_lengh_and_coverage(length, coverage) ⇒ Object

select



59
60
61
62
63
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 59

def multi_exon_with_lengh_and_coverage(length, coverage)
  select do |transcript|
    transcript.multi_exons? && (transcript.size > length) && (transcript.attributes[:cov] > coverage)
  end
end

#multi_exonsObject



65
66
67
68
69
70
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 65

def multi_exons
  # mark
  select do |transcript|
    transcript.multi_exons? #transcript line and exon line
  end
end

#new_isoformsObject



92
93
94
95
96
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 92

def new_isoforms
  select do |transcript|
    transcript.new_isoform?
  end
end

#not_lazyObject



134
135
136
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 134

def not_lazy
  @lazy = false
end

#read_transcript(n = 1) ⇒ Object

start from 1 n can be a number or a name for a transcript



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 203

def read_transcript(n=1)
  load_idx unless defined?(@idx)
  if n.to_s.is_numeric?
    n = n.to_i
    if n==1
      source.seek(0)
      source.read(@idx[:transcripts][0])
    elsif n==2
      source.seek(@idx[:transcripts][0])
      source.read(@idx[:transcripts][n-1])
    else
      source.seek(@idx[:transcripts][0..n-2].sum)
      source.read(@idx[:transcripts][n-1])
    end
  else
    read_transcript(@idx[:names][n])
  end
end

#save(filename = nil) ⇒ Object



138
139
140
141
142
143
144
145
146
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 138

def save(filename=nil)
  fn = filename || "#{@fh.path}.gtf"
  File.open(fn, 'w') do |f|
    each_transcript do |transcript|
      f.write transcript
    end
  end
  # dump_idx("#{fn}.idx") #BUGGY this saves the old index in case the user called a select
end

#select(&block) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 42

def select(&block)
  if is_lazy?
    @blocks||=[]
    @blocks << block
    self
  else
    # Find out how to concatenate multiple selections
    file = Tempfile.new("transcripts")
    each_transcript do |transcript|
      if block.call(transcript)
        file.write transcript.to_s
      end
    end
    gtf=Gtf.new(file.path) unless file.size == 0
  end
end

#set_lazyObject

to_bed



126
127
128
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 126

def set_lazy
  @lazy=true
end

#to_bed(only_exons = true, &block) ⇒ Object

to_gff3



120
121
122
123
124
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 120

def to_bed(only_exons=true, &block)
  each_transcript do |t|
    block.call(t, t.to_bed(only_exons))
  end
end

#to_gff3(path = ".") ⇒ Object



110
111
112
113
114
115
116
117
118
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 110

def to_gff3(path=".")
  if File.exists?(File.join(path,"transcripts.gtf"))
    gffread = GffRead.new
    gffread.params = {output:"transcripts.gff3"}
    gffread.run :arguments=>["transcripts.gtf"], :separator=>''
  else
    raise ArgumentError, "transcripts.gtf doesn't exists in #{path}"
  end
end