Class: Zlib::GzipReader

Inherits:
GzipFile show all
Includes:
Enumerable
Defined in:
ext/zlib/zlib.c,
ext/zlib/zlib.c

Overview

Zlib::GzipReader is the class for reading a gzipped file. GzipReader should be used as an IO, or -IO-like, object.

Zlib::GzipReader.open('hoge.gz') {|gz|
  print gz.read
}

File.open('hoge.gz') do |f|
  gz = Zlib::GzipReader.new(f)
  print gz.read
  gz.close
end

Method Catalogue

The following methods in Zlib::GzipReader are just like their counterparts in IO, but they raise Zlib::Error or Zlib::GzipFile::Error exception if an error was found in the gzip file.

  • #each

  • #each_line

  • #each_byte

  • #gets

  • #getc

  • #lineno

  • #lineno=

  • #read

  • #readchar

  • #readline

  • #readlines

  • #ungetc

Be careful of the footer of the gzip file. A gzip file has the checksum of pre-compressed data in its footer. GzipReader checks all uncompressed data against that checksum at the following cases, and if it fails, raises Zlib::GzipFile::NoFooter, Zlib::GzipFile::CRCError, or Zlib::GzipFile::LengthError exception.

  • When an reading request is received beyond the end of file (the end of compressed data). That is, when Zlib::GzipReader#read, Zlib::GzipReader#gets, or some other methods for reading returns nil.

  • When Zlib::GzipFile#close method is called after the object reaches the end of file.

  • When Zlib::GzipReader#unused method is called after the object reaches the end of file.

The rest of the methods are adequately described in their own documentation.

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from GzipFile

#close, #closed?, #comment, #crc, #finish, #level, #mtime, #orig_name, #os_code, #sync, #sync=, #to_io, wrap

Constructor Details

#initialize(*args) ⇒ Object

call-seq:

Zlib::GzipReader.new(io, options = {})

Creates a GzipReader object associated with io. The GzipReader object reads gzipped data from io, and parses/decompresses it. The io must have a read method that behaves same as the IO#read.

The options hash may be used to set the encoding of the data. :external_encoding, :internal_encoding and :encoding may be set as in IO::new.

If the gzip file header is incorrect, raises an Zlib::GzipFile::Error exception.



4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
# File 'ext/zlib/zlib.c', line 4020

static VALUE
rb_gzreader_initialize(int argc, VALUE *argv, VALUE obj)
{
    VALUE io, opt = Qnil;
    struct gzfile *gz;
    int err;

    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    rb_scan_args(argc, argv, "1:", &io, &opt);

    /* this is undocumented feature of zlib */
    err = inflateInit2(&gz->z.stream, -MAX_WBITS);
    if (err != Z_OK) {
  raise_zlib_error(err, gz->z.stream.msg);
    }
    gz->io = io;
    ZSTREAM_READY(&gz->z);
    gzfile_read_header(gz, Qnil);
    rb_gzfile_ecopts(gz, opt);

    if (rb_respond_to(io, id_path)) {
  /* File#path may raise IOError in case when a path is unavailable */
  rb_rescue2(gzfile_initialize_path_partial, obj, NULL, Qnil, rb_eIOError, (VALUE)0);
    }

    return obj;
}

Class Method Details

.open(*args) ⇒ Object

call-seq: Zlib::GzipReader.open(filename) {|gz| … }

Opens a file specified by filename as a gzipped file, and returns a GzipReader object associated with that file. Further details of this method are in Zlib::GzipReader.new and ZLib::GzipFile.wrap.



3943
3944
3945
3946
3947
# File 'ext/zlib/zlib.c', line 3943

static VALUE
rb_gzreader_s_open(int argc, VALUE *argv, VALUE klass)
{
    return gzfile_s_open(argc, argv, klass, "rb");
}

.zcat(*args) ⇒ Object

call-seq:

Zlib::GzipReader.zcat(io, options = {}, &block) => nil
Zlib::GzipReader.zcat(io, options = {}) => string

Decompresses all gzip data in the io, handling multiple gzip streams until the end of the io. There should not be any non-gzip data after the gzip streams.

If a block is given, it is yielded strings of uncompressed data, and the method returns nil. If a block is not given, the method returns the concatenation of all uncompressed data in all gzip streams.



3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
# File 'ext/zlib/zlib.c', line 3965

static VALUE
rb_gzreader_s_zcat(int argc, VALUE *argv, VALUE klass)
{
    VALUE io, unused, obj, buf=0, tmpbuf;
    long pos;

    rb_check_arity(argc, 1, 2);
    io = argv[0];

    do {
        obj = rb_funcallv(klass, rb_intern("new"), argc, argv);
        if (rb_block_given_p()) {
           rb_gzreader_each(0, 0, obj);
        }
        else {
            if (!buf) {
                buf = rb_str_new(0, 0);
            }
            tmpbuf = gzfile_read_all(get_gzfile(obj), Qnil);
            rb_str_cat(buf, RSTRING_PTR(tmpbuf), RSTRING_LEN(tmpbuf));
        }

        rb_gzreader_read(0, 0, obj);
        pos = NUM2LONG(rb_funcall(io, rb_intern("pos"), 0));
        unused = rb_gzreader_unused(obj);
        rb_gzfile_finish(obj);
        if (!NIL_P(unused)) {
            pos -= NUM2LONG(rb_funcall(unused, rb_intern("length"), 0));
            rb_funcall(io, rb_intern("pos="), 1, LONG2NUM(pos));
        }
    } while (pos < NUM2LONG(rb_funcall(io, rb_intern("size"), 0)));

    if (rb_block_given_p()) {
        return Qnil;
    }
    return buf;
}

Instance Method Details

#each(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
# File 'ext/zlib/zlib.c', line 4491

static VALUE
rb_gzreader_each(int argc, VALUE *argv, VALUE obj)
{
    VALUE str;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
  rb_yield(str);
    }
    return obj;
}

#each_byteObject

See Zlib::GzipReader documentation for a description.



4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
# File 'ext/zlib/zlib.c', line 4216

static VALUE
rb_gzreader_each_byte(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getbyte(obj))) {
  rb_yield(c);
    }
    return Qnil;
}

#each_charObject

See Zlib::GzipReader documentation for a description.



4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
# File 'ext/zlib/zlib.c', line 4198

static VALUE
rb_gzreader_each_char(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getc(obj))) {
  rb_yield(c);
    }
    return Qnil;
}

#each_line(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
# File 'ext/zlib/zlib.c', line 4491

static VALUE
rb_gzreader_each(int argc, VALUE *argv, VALUE obj)
{
    VALUE str;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
  rb_yield(str);
    }
    return obj;
}

#eofObject

Returns true or false whether the stream has reached the end.



3570
3571
3572
3573
3574
3575
3576
3577
3578
# File 'ext/zlib/zlib.c', line 3570

static VALUE
rb_gzfile_eof_p(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    while (!ZSTREAM_IS_FINISHED(&gz->z) && ZSTREAM_BUF_FILLED(&gz->z) == 0) {
  gzfile_read_more(gz, Qnil);
    }
    return GZFILE_IS_FINISHED(gz) ? Qtrue : Qfalse;
}

#eof?Boolean

Returns true or false whether the stream has reached the end.

Returns:

  • (Boolean)


3570
3571
3572
3573
3574
3575
3576
3577
3578
# File 'ext/zlib/zlib.c', line 3570

static VALUE
rb_gzfile_eof_p(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    while (!ZSTREAM_IS_FINISHED(&gz->z) && ZSTREAM_BUF_FILLED(&gz->z) == 0) {
  gzfile_read_more(gz, Qnil);
    }
    return GZFILE_IS_FINISHED(gz) ? Qtrue : Qfalse;
}

#external_encodingObject

See Zlib::GzipReader documentation for a description.



4525
4526
4527
4528
4529
# File 'ext/zlib/zlib.c', line 4525

static VALUE
rb_gzreader_external_encoding(VALUE self)
{
    return rb_enc_from_encoding(get_gzfile(self)->enc);
}

#getbyteObject

See Zlib::GzipReader documentation for a description.



4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
# File 'ext/zlib/zlib.c', line 4164

static VALUE
rb_gzreader_getbyte(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE dst;

    dst = gzfile_read(gz, 1, Qnil);
    if (!NIL_P(dst)) {
  dst = INT2FIX((unsigned int)(RSTRING_PTR(dst)[0]) & 0xff);
    }
    return dst;
}

#getcObject

See Zlib::GzipReader documentation for a description.



4135
4136
4137
4138
4139
4140
4141
# File 'ext/zlib/zlib.c', line 4135

static VALUE
rb_gzreader_getc(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);

    return gzfile_getc(gz);
}

#gets(*args) ⇒ Object

See Zlib::GzipReader documentation for a description. However, note that this method can return nil even if #eof? returns false, unlike the behavior of File#gets.



4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
# File 'ext/zlib/zlib.c', line 4459

static VALUE
rb_gzreader_gets(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = gzreader_gets(argc, argv, obj);
    if (!NIL_P(dst)) {
  rb_lastline_set(dst);
    }
    return dst;
}

#linenoObject

The line number of the last row read from this file.



3409
3410
3411
3412
3413
# File 'ext/zlib/zlib.c', line 3409

static VALUE
rb_gzfile_lineno(VALUE obj)
{
    return INT2NUM(get_gzfile(obj)->lineno);
}

#lineno=(lineno) ⇒ Object

Specify line number of the last row read from this file.



3420
3421
3422
3423
3424
3425
3426
# File 'ext/zlib/zlib.c', line 3420

static VALUE
rb_gzfile_set_lineno(VALUE obj, VALUE lineno)
{
    struct gzfile *gz = get_gzfile(obj);
    gz->lineno = NUM2INT(lineno);
    return lineno;
}

#posObject

Total number of output bytes output so far.



3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
# File 'ext/zlib/zlib.c', line 3631

static VALUE
rb_gzfile_total_out(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    uLong total_out = gz->z.stream.total_out;
    long buf_filled = ZSTREAM_BUF_FILLED(&gz->z);

    if (total_out >= (uLong)buf_filled) {
        return rb_uint2inum(total_out - buf_filled);
    } else {
        return LONG2FIX(-(buf_filled - (long)total_out));
    }
}

#read(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
# File 'ext/zlib/zlib.c', line 4081

static VALUE
rb_gzreader_read(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen, outbuf;
    long len;

    rb_scan_args(argc, argv, "02", &vlen, &outbuf);
    if (NIL_P(vlen)) {
  return gzfile_read_all(gz, outbuf);
    }

    len = NUM2INT(vlen);
    if (len < 0) {
  rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    return gzfile_read(gz, len, outbuf);
}

#readbyteObject

See Zlib::GzipReader documentation for a description.



4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
# File 'ext/zlib/zlib.c', line 4182

static VALUE
rb_gzreader_readbyte(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getbyte(obj);
    if (NIL_P(dst)) {
  rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readcharObject

See Zlib::GzipReader documentation for a description.



4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
# File 'ext/zlib/zlib.c', line 4148

static VALUE
rb_gzreader_readchar(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getc(obj);
    if (NIL_P(dst)) {
  rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readline(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
# File 'ext/zlib/zlib.c', line 4475

static VALUE
rb_gzreader_readline(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_gets(argc, argv, obj);
    if (NIL_P(dst)) {
  rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readlines(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
# File 'ext/zlib/zlib.c', line 4509

static VALUE
rb_gzreader_readlines(int argc, VALUE *argv, VALUE obj)
{
    VALUE str, dst;
    dst = rb_ary_new();
    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
  rb_ary_push(dst, str);
    }
    return dst;
}

#readpartial(*args) ⇒ Object

call-seq:

   gzipreader.readpartial(maxlen [, outbuf]) => string, outbuf

Reads at most <i>maxlen</i> bytes from the gzipped stream but
it blocks only if <em>gzipreader</em> has no data immediately available.
If the optional <i>outbuf</i> argument is present,
it must reference a String, which will receive the data.
It raises <code>EOFError</code> on end of file.


4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
# File 'ext/zlib/zlib.c', line 4112

static VALUE
rb_gzreader_readpartial(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen, outbuf;
    long len;

    rb_scan_args(argc, argv, "11", &vlen, &outbuf);

    len = NUM2INT(vlen);
    if (len < 0) {
  rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    if (!NIL_P(outbuf))
        Check_Type(outbuf, T_STRING);
    return gzfile_readpartial(gz, len, outbuf);
}

#rewindObject

Resets the position of the file pointer to the point created the GzipReader object. The associated IO object needs to respond to the seek method.



4054
4055
4056
4057
4058
4059
4060
# File 'ext/zlib/zlib.c', line 4054

static VALUE
rb_gzreader_rewind(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_reader_rewind(gz);
    return INT2FIX(0);
}

#tellObject

Total number of output bytes output so far.



3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
# File 'ext/zlib/zlib.c', line 3631

static VALUE
rb_gzfile_total_out(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    uLong total_out = gz->z.stream.total_out;
    long buf_filled = ZSTREAM_BUF_FILLED(&gz->z);

    if (total_out >= (uLong)buf_filled) {
        return rb_uint2inum(total_out - buf_filled);
    } else {
        return LONG2FIX(-(buf_filled - (long)total_out));
    }
}

#ungetbyte(ch) ⇒ Object

See Zlib::GzipReader documentation for a description.



4256
4257
4258
4259
4260
4261
4262
# File 'ext/zlib/zlib.c', line 4256

static VALUE
rb_gzreader_ungetbyte(VALUE obj, VALUE ch)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_ungetbyte(gz, NUM2CHR(ch));
    return Qnil;
}

#ungetc(s) ⇒ Object

See Zlib::GzipReader documentation for a description.



4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
# File 'ext/zlib/zlib.c', line 4234

static VALUE
rb_gzreader_ungetc(VALUE obj, VALUE s)
{
    struct gzfile *gz;

    if (FIXNUM_P(s))
  return rb_gzreader_ungetbyte(obj, s);
    gz = get_gzfile(obj);
    StringValue(s);
    if (gz->enc2 && gz->enc2 != rb_ascii8bit_encoding()) {
  s = rb_str_conv_enc(s, rb_enc_get(s), gz->enc2);
    }
    gzfile_ungets(gz, (const Bytef*)RSTRING_PTR(s), RSTRING_LEN(s));
    RB_GC_GUARD(s);
    return Qnil;
}

#unusedObject

Returns the rest of the data which had read for parsing gzip format, or nil if the whole gzip file is not parsed yet.



4068
4069
4070
4071
4072
4073
4074
# File 'ext/zlib/zlib.c', line 4068

static VALUE
rb_gzreader_unused(VALUE obj)
{
    struct gzfile *gz;
    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    return gzfile_reader_get_unused(gz);
}