Module: Bio::BGZF

Included in:
Reader, Writer
Defined in:
lib/bio-bgzf/constants.rb,
lib/bio-bgzf/writer.rb,
lib/bio-bgzf/unpack.rb,
lib/bio-bgzf/reader.rb,
lib/bio-bgzf/block.rb,
lib/bio-bgzf/pack.rb,
lib/bio-bgzf/vo.rb

Defined Under Namespace

Classes: FormatError, NotBGZFError, Reader, Writer

Constant Summary collapse

ID1 =
31
ID2 =
139
CM =
8
FLG =
4
SI1 =
66
SI2 =
67
SLEN =
2
MTIME =
0
XFL =
0
OS =
255
XLEN =
6
MAX_BYTES =
65536

Class Method Summary collapse

Class Method Details

.decompress_block(f) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/bio-bgzf/block.rb', line 42

def decompress_block(f)
  cdata, in_size, expected_crc = read_bgzf_block(f)
  return nil if cdata == nil
  data = unpack(cdata)
  if data.bytesize != in_size
    raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!"
  end
  crc = Zlib.crc32(data, 0)
  if crc != expected_crc
    raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}"
  end
  return data
end

.pack(str, level = Zlib::BEST_COMPRESSION) ⇒ Object

Packs str into a BGZF block using given compression level.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/bio-bgzf/pack.rb', line 7

def pack(str, level=Zlib::BEST_COMPRESSION)
  zs = Zlib::Deflate.new level, -15
  cdata = zs.deflate str, Zlib::FINISH
  zs.close

  crc32 = Zlib.crc32 str, 0
  isize = str.length

  bsize = cdata.length + 19 + XLEN

  array = [   ID1, 
              ID2, 
               CM, 
              FLG, 
            MTIME, 
              XFL,
               OS,
             XLEN,
              SI1,
              SI2,
             SLEN,
            bsize,
            cdata,
            crc32,
            isize
          ]

   array.pack('CCCCVCCvCCvva*VV')
end

.read_bgzf_block(f) ⇒ Object

Raises:



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/bio-bgzf/block.rb', line 9

def read_bgzf_block(f)
  hstart = f.read(12)
  return nil if hstart == nil # EOF?
  magic, gzip_extra_length = hstart.unpack('Vxxxxxxv')
  raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F

  len = 0
  bsize = nil
  while len < gzip_extra_length do
    si1, si2, slen = f.read(4).unpack('CCv')
    if si1 == 66 and si2 == 67 then
      raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2
      raise FormatError, 'duplicate field with block size' unless bsize.nil?
      bsize = f.read(2).unpack('v')[0]
      f.seek(slen - 2, IO::SEEK_CUR)
    else
      f.seek(slen, IO::SEEK_CUR)
    end
    len += 4 + slen
  end

  if len != gzip_extra_length then
    raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}"
  end
  raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil?

  compressed_data = f.read(bsize - gzip_extra_length - 19)
  crc32, input_size = f.read(8).unpack('VV')

  return compressed_data, input_size, crc32
end

.unpack(str) ⇒ Object

Unpacks compressed data, NOT a BGZF block.



5
6
7
8
# File 'lib/bio-bgzf/unpack.rb', line 5

def unpack(str)
    zs = Zlib::Inflate.new(-15)
    zs.inflate(str)
end

.vo_block_offset(vo) ⇒ Object



2
3
4
# File 'lib/bio-bgzf/vo.rb', line 2

def vo_block_offset(vo)
  vo >> 16
end

.vo_data_offset(vo) ⇒ Object



7
8
9
# File 'lib/bio-bgzf/vo.rb', line 7

def vo_data_offset(vo)
  vo & 0xFFFF
end