Module: Bgzf

Defined in:
lib/rbbt/util/misc/bgzf.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#block_cache_sizeObject

Returns the value of attribute block_cache_size.



5
6
7
# File 'lib/rbbt/util/misc/bgzf.rb', line 5

def block_cache_size
  @block_cache_size
end

#compressed_streamObject

Returns the value of attribute compressed_stream.



5
6
7
# File 'lib/rbbt/util/misc/bgzf.rb', line 5

def compressed_stream
  @compressed_stream
end

#data_offsetObject

Returns the value of attribute data_offset.



5
6
7
# File 'lib/rbbt/util/misc/bgzf.rb', line 5

def data_offset
  @data_offset
end

Class Method Details

.setup(compressed_stream) ⇒ Object



7
8
9
10
11
12
13
14
# File 'lib/rbbt/util/misc/bgzf.rb', line 7

def self.setup(compressed_stream)
  require 'bio-bgzf'
  reader = Bio::BGZF::Reader.new(compressed_stream)
  reader.extend Bgzf
  reader.compressed_stream = compressed_stream
  reader.data_offset = 0
  reader
end

Instance Method Details

#_get_block(vo) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
# File 'lib/rbbt/util/misc/bgzf.rb', line 121

def _get_block(vo)
  @blocks ||= {}
  @access ||= []
  @access << vo
  if @blocks.include? vo
    @blocks[vo]
  else
    _purge_cache
    @blocks[vo] ||= read_block_at vo
  end
end

#_indexObject



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/util/misc/bgzf.rb', line 36

def _index
  @_index ||= begin
                index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
                  index = []
                  pos = 0
                  while true do
                    blockdata_offset = tell
                    block = begin
                              read_block
                            rescue Exception
                              raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!.message =~ /BGFZ.*expected/
                              raise $!
                            end
                    break unless block
                    index << [pos, blockdata_offset]
                    pos += block.length
                  end
                  index
                end
                @block_cache_size = Math.log(index.length).to_i + 1
                index
             end
end

#_purge_cacheObject



113
114
115
116
117
118
119
# File 'lib/rbbt/util/misc/bgzf.rb', line 113

def _purge_cache
  if @blocks.length > @block_cache_size
    @access.uniq!
    oldest = @access.last
    @blocks.delete oldest
  end
end

#block_offsetObject



105
106
107
108
109
110
111
# File 'lib/rbbt/util/misc/bgzf.rb', line 105

def block_offset
  pos = data_offset
  i = closest_page(data_offset)
  page = _index[i][1]
  offset = pos - _index[i][0]
  [page, offset]
end

#closeObject



26
27
28
29
30
# File 'lib/rbbt/util/misc/bgzf.rb', line 26

def close
  @compressed_stream.close unless @compressed_stream.closed?
  @access.clear if @access
  @blocks.clear if @blocks
end

#closed?Boolean

Returns:

  • (Boolean)


22
23
24
# File 'lib/rbbt/util/misc/bgzf.rb', line 22

def closed?
  @compressed_stream.closed?
end

#closest_page(pos) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/rbbt/util/misc/bgzf.rb', line 74

def closest_page(pos)
  upper = _index.size - 1
  lower = 0
  @_index_pos ||= _index.collect{|v| v.first }

  return -1 if upper < lower

  while(upper >= lower) do
    idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2)
    pos_idx = @_index_pos[idx]

    case pos <=> pos_idx
    when 0
      break
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    end
  end

  @last_idx = idx

  if pos_idx > pos
    idx = idx - 1
  end


  idx.to_i
end

#filenameObject



16
17
18
19
20
# File 'lib/rbbt/util/misc/bgzf.rb', line 16

def filename
  @filename ||= begin
                compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil
              end
end

#get_blockObject



133
134
135
136
137
# File 'lib/rbbt/util/misc/bgzf.rb', line 133

def get_block
  block_vo, offset = block_offset
  block = _get_block block_vo
  block[offset..-1]
end

#getcObject



156
157
158
# File 'lib/rbbt/util/misc/bgzf.rb', line 156

def getc
  read(1)
end

#getsObject



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/rbbt/util/misc/bgzf.rb', line 160

def gets
  str = nil
  current = @data_offset
  while true
    block = read(1024)
    break if block.empty?
    str = "" if str.nil?
    if i = block.index("\n")
      str << block[0..i]
      break
    else
      str << block
    end
  end
  return nil if str.nil?

  @data_offset = current + str.length

  str
end

#initObject



70
71
72
# File 'lib/rbbt/util/misc/bgzf.rb', line 70

def init
  _index
end

#read(size = nil) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/rbbt/util/misc/bgzf.rb', line 139

def read(size=nil)
  return read_all if size.nil?

  block = get_block 
  return "" if block.nil? or block.empty?
  len = block.length
  if len >= size
    @data_offset += size
    return block[0..size-1]
  else
    @data_offset += len
    str = block
    str << read(size - len)
    str
  end
end

#read_allObject



60
61
62
63
64
65
66
67
68
# File 'lib/rbbt/util/misc/bgzf.rb', line 60

def read_all
  str = ""
  while true
    block = read_block
    break if block.nil?
    str << block
  end
  str
end

#seek(off) ⇒ Object



32
33
34
# File 'lib/rbbt/util/misc/bgzf.rb', line 32

def seek(off)
  @data_offset = off
end