Class: Zlib::GzipReader

Inherits:
GzipFile show all
Includes:
Enumerable
Defined in:
ext/zlib/zlib.c,
ext/zlib/zlib.c

Overview

Zlib::GzipReader is the class for reading a gzipped file. GzipReader should be used as an IO, or -IO-like, object.

Zlib::GzipReader.open('hoge.gz') {|gz|
  print gz.read
}

File.open('hoge.gz') do |f|
  gz = Zlib::GzipReader.new(f)
  print gz.read
  gz.close
end

Method Catalogue

The following methods in Zlib::GzipReader are just like their counterparts in IO, but they raise Zlib::Error or Zlib::GzipFile::Error exception if an error was found in the gzip file.

  • #each

  • #each_line

  • #each_byte

  • #gets

  • #getc

  • #lineno

  • #lineno=

  • #read

  • #readchar

  • #readline

  • #readlines

  • #ungetc

Be careful of the footer of the gzip file. A gzip file has the checksum of pre-compressed data in its footer. GzipReader checks all uncompressed data against that checksum at the following cases, and if it fails, raises Zlib::GzipFile::NoFooter, Zlib::GzipFile::CRCError, or Zlib::GzipFile::LengthError exception.

  • When an reading request is received beyond the end of file (the end of compressed data). That is, when Zlib::GzipReader#read, Zlib::GzipReader#gets, or some other methods for reading returns nil.

  • When Zlib::GzipFile#close method is called after the object reaches the end of file.

  • When Zlib::GzipReader#unused method is called after the object reaches the end of file.

The rest of the methods are adequately described in their own documentation.

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from GzipFile

#close, #closed?, #comment, #crc, #finish, #level, #mtime, #orig_name, #os_code, #sync, #sync=, #to_io, wrap

Constructor Details

#initialize(*args) ⇒ Object

call-seq:

Zlib::GzipReader.new(io, options = {})

Creates a GzipReader object associated with io. The GzipReader object reads gzipped data from io, and parses/decompresses it. The io must have a read method that behaves same as the IO#read.

The options hash may be used to set the encoding of the data. :external_encoding, :internal_encoding and :encoding may be set as in IO::new.

If the gzip file header is incorrect, raises an Zlib::GzipFile::Error exception.



3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
# File 'ext/zlib/zlib.c', line 3717

static VALUE
rb_gzreader_initialize(int argc, VALUE *argv, VALUE obj)
{
    VALUE io, opt = Qnil;
    struct gzfile *gz;
    int err;

    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    rb_scan_args(argc, argv, "1:", &io, &opt);

    /* this is undocumented feature of zlib */
    err = inflateInit2(&gz->z.stream, -MAX_WBITS);
    if (err != Z_OK) {
	raise_zlib_error(err, gz->z.stream.msg);
    }
    gz->io = io;
    ZSTREAM_READY(&gz->z);
    gzfile_read_header(gz);
    rb_gzfile_ecopts(gz, opt);

    if (rb_respond_to(io, id_path)) {
	gz->path = rb_funcall(gz->io, id_path, 0);
	rb_define_singleton_method(obj, "path", rb_gzfile_path, 0);
    }

    return obj;
}

Class Method Details

.open(*args) ⇒ Object

call-seq: Zlib::GzipReader.open(filename) {|gz| … }

Opens a file specified by filename as a gzipped file, and returns a GzipReader object associated with that file. Further details of this method are in Zlib::GzipReader.new and ZLib::GzipFile.wrap.



3694
3695
3696
3697
3698
# File 'ext/zlib/zlib.c', line 3694

static VALUE
rb_gzreader_s_open(int argc, VALUE *argv, VALUE klass)
{
    return gzfile_s_open(argc, argv, klass, "rb");
}

Instance Method Details

#bytesObject

This is a deprecated alias for each_byte.



3931
3932
3933
3934
3935
3936
3937
3938
# File 'ext/zlib/zlib.c', line 3931

static VALUE
rb_gzreader_bytes(VALUE obj)
{
    rb_warn("Zlib::GzipReader#bytes is deprecated; use #each_byte instead");
    if (!rb_block_given_p())
	return rb_enumeratorize(obj, ID2SYM(rb_intern("each_byte")), 0, 0);
    return rb_gzreader_each_byte(obj);
}

#each(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
# File 'ext/zlib/zlib.c', line 4199

static VALUE
rb_gzreader_each(int argc, VALUE *argv, VALUE obj)
{
    VALUE str;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
	rb_yield(str);
    }
    return obj;
}

#each_byteObject

See Zlib::GzipReader documentation for a description.



3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
# File 'ext/zlib/zlib.c', line 3913

static VALUE
rb_gzreader_each_byte(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getbyte(obj))) {
	rb_yield(c);
    }
    return Qnil;
}

#each_charObject

See Zlib::GzipReader documentation for a description.



3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
# File 'ext/zlib/zlib.c', line 3895

static VALUE
rb_gzreader_each_char(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getc(obj))) {
	rb_yield(c);
    }
    return Qnil;
}

#each_line(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
# File 'ext/zlib/zlib.c', line 4199

static VALUE
rb_gzreader_each(int argc, VALUE *argv, VALUE obj)
{
    VALUE str;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
	rb_yield(str);
    }
    return obj;
}

#eofObject

Returns true or false whether the stream has reached the end.



3334
3335
3336
3337
3338
3339
# File 'ext/zlib/zlib.c', line 3334

static VALUE
rb_gzfile_eof_p(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    return GZFILE_IS_FINISHED(gz) ? Qtrue : Qfalse;
}

#eof?Boolean

Returns true or false whether the stream has reached the end.

Returns:

  • (Boolean)


3334
3335
3336
3337
3338
3339
# File 'ext/zlib/zlib.c', line 3334

static VALUE
rb_gzfile_eof_p(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    return GZFILE_IS_FINISHED(gz) ? Qtrue : Qfalse;
}

#external_encodingObject

See Zlib::GzipReader documentation for a description.



4247
4248
4249
4250
4251
# File 'ext/zlib/zlib.c', line 4247

static VALUE
rb_gzreader_external_encoding(VALUE self)
{
    return rb_enc_from_encoding(get_gzfile(self)->enc);
}

#getbyteObject

See Zlib::GzipReader documentation for a description.



3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
# File 'ext/zlib/zlib.c', line 3861

static VALUE
rb_gzreader_getbyte(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE dst;

    dst = gzfile_read(gz, 1);
    if (!NIL_P(dst)) {
	dst = INT2FIX((unsigned int)(RSTRING_PTR(dst)[0]) & 0xff);
    }
    return dst;
}

#getcObject

See Zlib::GzipReader documentation for a description.



3832
3833
3834
3835
3836
3837
3838
# File 'ext/zlib/zlib.c', line 3832

static VALUE
rb_gzreader_getc(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);

    return gzfile_getc(gz);
}

#gets(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
# File 'ext/zlib/zlib.c', line 4167

static VALUE
rb_gzreader_gets(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = gzreader_gets(argc, argv, obj);
    if (!NIL_P(dst)) {
	rb_lastline_set(dst);
    }
    return dst;
}

#linenoObject

The line number of the last row read from this file.



3174
3175
3176
3177
3178
# File 'ext/zlib/zlib.c', line 3174

static VALUE
rb_gzfile_lineno(VALUE obj)
{
    return INT2NUM(get_gzfile(obj)->lineno);
}

#lineno=(lineno) ⇒ Object

Specify line number of the last row read from this file.



3185
3186
3187
3188
3189
3190
3191
# File 'ext/zlib/zlib.c', line 3185

static VALUE
rb_gzfile_set_lineno(VALUE obj, VALUE lineno)
{
    struct gzfile *gz = get_gzfile(obj);
    gz->lineno = NUM2INT(lineno);
    return lineno;
}

#lines(*args) ⇒ Object

This is a deprecated alias for each_line.



4217
4218
4219
4220
4221
4222
4223
4224
# File 'ext/zlib/zlib.c', line 4217

static VALUE
rb_gzreader_lines(int argc, VALUE *argv, VALUE obj)
{
    rb_warn("Zlib::GzipReader#lines is deprecated; use #each_line instead");
    if (!rb_block_given_p())
	return rb_enumeratorize(obj, ID2SYM(rb_intern("each_line")), argc, argv);
    return rb_gzreader_each(argc, argv, obj);
}

#posObject

Total number of output bytes output so far.



3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
# File 'ext/zlib/zlib.c', line 3392

static VALUE
rb_gzfile_total_out(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    uLong total_out = gz->z.stream.total_out;
    long buf_filled = ZSTREAM_BUF_FILLED(&gz->z);

    if (total_out >= (uLong)buf_filled) {
        return rb_uint2inum(total_out - buf_filled);
    } else {
        return LONG2FIX(-(buf_filled - (long)total_out));
    }
}

#read(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
# File 'ext/zlib/zlib.c', line 3778

static VALUE
rb_gzreader_read(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen;
    long len;

    rb_scan_args(argc, argv, "01", &vlen);
    if (NIL_P(vlen)) {
	return gzfile_read_all(gz);
    }

    len = NUM2INT(vlen);
    if (len < 0) {
	rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    return gzfile_read(gz, len);
}

#readbyteObject

See Zlib::GzipReader documentation for a description.



3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
# File 'ext/zlib/zlib.c', line 3879

static VALUE
rb_gzreader_readbyte(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getbyte(obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readcharObject

See Zlib::GzipReader documentation for a description.



3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
# File 'ext/zlib/zlib.c', line 3845

static VALUE
rb_gzreader_readchar(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getc(obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readline(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
# File 'ext/zlib/zlib.c', line 4183

static VALUE
rb_gzreader_readline(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_gets(argc, argv, obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readlines(*args) ⇒ Object

See Zlib::GzipReader documentation for a description.



4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
# File 'ext/zlib/zlib.c', line 4231

static VALUE
rb_gzreader_readlines(int argc, VALUE *argv, VALUE obj)
{
    VALUE str, dst;
    dst = rb_ary_new();
    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
	rb_ary_push(dst, str);
    }
    return dst;
}

#readpartial(*args) ⇒ Object

call-seq:

   gzipreader.readpartial(maxlen [, outbuf]) => string, outbuf

Reads at most <i>maxlen</i> bytes from the gziped stream but
it blocks only if <em>gzipreader</em> has no data immediately available.
If the optional <i>outbuf</i> argument is present,
it must reference a String, which will receive the data.
It raises <code>EOFError</code> on end of file.


3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
# File 'ext/zlib/zlib.c', line 3809

static VALUE
rb_gzreader_readpartial(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen, outbuf;
    long len;

    rb_scan_args(argc, argv, "11", &vlen, &outbuf);

    len = NUM2INT(vlen);
    if (len < 0) {
	rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    if (!NIL_P(outbuf))
        Check_Type(outbuf, T_STRING);
    return gzfile_readpartial(gz, len, outbuf);
}

#rewindObject

Resets the position of the file pointer to the point created the GzipReader object. The associated IO object needs to respond to the seek method.



3751
3752
3753
3754
3755
3756
3757
# File 'ext/zlib/zlib.c', line 3751

static VALUE
rb_gzreader_rewind(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_reader_rewind(gz);
    return INT2FIX(0);
}

#tellObject

Total number of output bytes output so far.



3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
# File 'ext/zlib/zlib.c', line 3392

static VALUE
rb_gzfile_total_out(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    uLong total_out = gz->z.stream.total_out;
    long buf_filled = ZSTREAM_BUF_FILLED(&gz->z);

    if (total_out >= (uLong)buf_filled) {
        return rb_uint2inum(total_out - buf_filled);
    } else {
        return LONG2FIX(-(buf_filled - (long)total_out));
    }
}

#ungetbyte(ch) ⇒ Object

See Zlib::GzipReader documentation for a description.



3967
3968
3969
3970
3971
3972
3973
# File 'ext/zlib/zlib.c', line 3967

static VALUE
rb_gzreader_ungetbyte(VALUE obj, VALUE ch)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_ungetbyte(gz, NUM2CHR(ch));
    return Qnil;
}

#ungetc(s) ⇒ Object

See Zlib::GzipReader documentation for a description.



3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
# File 'ext/zlib/zlib.c', line 3945

static VALUE
rb_gzreader_ungetc(VALUE obj, VALUE s)
{
    struct gzfile *gz;

    if (FIXNUM_P(s))
	return rb_gzreader_ungetbyte(obj, s);
    gz = get_gzfile(obj);
    StringValue(s);
    if (gz->enc2 && gz->enc2 != rb_ascii8bit_encoding()) {
	s = rb_str_conv_enc(s, rb_enc_get(s), gz->enc2);
    }
    gzfile_ungets(gz, (const Bytef*)RSTRING_PTR(s), RSTRING_LEN(s));
    RB_GC_GUARD(s);
    return Qnil;
}

#unusedObject

Returns the rest of the data which had read for parsing gzip format, or nil if the whole gzip file is not parsed yet.



3765
3766
3767
3768
3769
3770
3771
# File 'ext/zlib/zlib.c', line 3765

static VALUE
rb_gzreader_unused(VALUE obj)
{
    struct gzfile *gz;
    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    return gzfile_reader_get_unused(gz);
}