Class: VibeZstd::DecompressReader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/vibe_zstd.rb,
ext/vibe_zstd/vibe_zstd.c

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object

Wraps ZSTD streaming decompression to read from a compressed IO object



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'ext/vibe_zstd/streaming.c', line 210

static VALUE
vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
    VALUE io, options;
    rb_scan_args(argc, argv, "11", &io, &options);

    vibe_zstd_dstream* dstream;
    TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);

    // Validate IO object responds to read (duck typing)
    if (!rb_respond_to(io, rb_intern("read"))) {
        rb_raise(rb_eTypeError, "IO object must respond to read");
    }

    // Store IO object
    dstream->io = io;
    rb_ivar_set(self, rb_intern("@io"), io);

    // Parse options
    VALUE dict = Qnil;
    size_t initial_chunk_size = 0;  // 0 = use default ZSTD_DStreamOutSize()
    if (!NIL_P(options)) {
        Check_Type(options, T_HASH);
        dict = rb_hash_aref(options, ID2SYM(rb_intern("dict")));

        VALUE v_chunk_size = rb_hash_aref(options, ID2SYM(rb_intern("initial_chunk_size")));
        if (!NIL_P(v_chunk_size)) {
            initial_chunk_size = NUM2SIZET(v_chunk_size);
            if (initial_chunk_size == 0) {
                rb_raise(rb_eArgError, "initial_chunk_size must be greater than 0");
            }
        }
    }

    // Create decompression context (DStream and DCtx are the same since v1.3.0)
    dstream->dstream = ZSTD_createDStream();
    if (!dstream->dstream) {
        rb_raise(rb_eRuntimeError, "Failed to create decompression stream");
    }

    // Reset context for streaming
    size_t result = ZSTD_DCtx_reset((ZSTD_DCtx*)dstream->dstream, ZSTD_reset_session_only);
    if (ZSTD_isError(result)) {
        rb_raise(rb_eRuntimeError, "Failed to reset decompression context: %s", ZSTD_getErrorName(result));
    }

    // Set dictionary if provided
    if (!NIL_P(dict)) {
        vibe_zstd_ddict* ddict_obj;
        TypedData_Get_Struct(dict, vibe_zstd_ddict, &vibe_zstd_ddict_type, ddict_obj);
        result = ZSTD_DCtx_refDDict((ZSTD_DCtx*)dstream->dstream, ddict_obj->ddict);
        if (ZSTD_isError(result)) {
            rb_raise(rb_eRuntimeError, "Failed to set dictionary: %s", ZSTD_getErrorName(result));
        }
    }

    // Initialize input buffer management
    dstream->input_data = rb_str_new(NULL, 0);
    dstream->input.src = NULL;
    dstream->input.size = 0;
    dstream->input.pos = 0;
    dstream->eof = 0;
    dstream->initial_chunk_size = initial_chunk_size;

    return self;
}

Class Method Details

.open(io, **options) {|reader| ... } ⇒ Object

Block-based resource management Automatically cleans up when block completes

Yields:

  • (reader)


184
185
186
187
188
189
190
191
# File 'lib/vibe_zstd.rb', line 184

def self.open(io, **options)
  reader = new(io, **options)
  return reader unless block_given?

  yield reader

  # Reader doesn't have finish, but this ensures cleanup
end

Instance Method Details

#each(chunk_size = nil) ⇒ Object

Iterate over chunks (required for Enumerable)



208
209
210
211
212
213
214
215
# File 'lib/vibe_zstd.rb', line 208

def each(chunk_size = nil)
  return enum_for(:each, chunk_size) unless block_given?

  until eof?
    chunk = read(chunk_size)
    yield chunk if chunk
  end
end

#each_line(sep = $/) ⇒ Object

Iterate over lines



234
235
236
237
238
239
240
# File 'lib/vibe_zstd.rb', line 234

def each_line(sep = $/)
  return enum_for(:each_line, sep) unless block_given?

  while (line = gets(sep))
    yield line
  end
end

#eofObject

Alias for eof?



203
204
205
# File 'lib/vibe_zstd.rb', line 203

def eof
  eof?
end

#eof?Boolean

Returns:

  • (Boolean)


387
388
389
390
391
392
# File 'ext/vibe_zstd/streaming.c', line 387

static VALUE
vibe_zstd_reader_eof(VALUE self) {
    vibe_zstd_dstream* dstream;
    TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);
    return dstream->eof ? Qtrue : Qfalse;
}

#gets(sep = $/) ⇒ Object Also known as: readline

Read a single line (up to newline or EOF)



218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/vibe_zstd.rb', line 218

def gets(sep = $/)
  return nil if eof?

  line = +""
  until eof?
    chunk = read(1)
    break unless chunk

    line << chunk
    break if chunk.end_with?(sep)
  end

  line.empty? ? nil : line
end

#read(*args) ⇒ Object

of arbitrarily large files without loading everything into memory.



293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# File 'ext/vibe_zstd/streaming.c', line 293

static VALUE
vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
    VALUE size_arg;
    rb_scan_args(argc, argv, "01", &size_arg);

    vibe_zstd_dstream* dstream;
    TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);

    if (dstream->eof) {
        return Qnil;
    }

    // Unbounded reads use configurable chunk size (defaults to ZSTD_DStreamOutSize() ~128KB)
    // This provides chunked streaming behavior for true streaming use cases
    size_t default_chunk_size = (dstream->initial_chunk_size > 0) ? dstream->initial_chunk_size : ZSTD_DStreamOutSize();
    size_t requested_size = NIL_P(size_arg) ? default_chunk_size : NUM2SIZET(size_arg);
    size_t inBufferSize = ZSTD_DStreamInSize();

    // Preallocate buffer for requested size
    VALUE result = rb_str_buf_new(requested_size);

    size_t total_read = 0;
    int made_progress = 0;

    while (total_read < requested_size) {
        // Refill input buffer when all compressed data consumed
        if (dstream->input.pos >= dstream->input.size) {
            VALUE chunk = rb_funcall(dstream->io, rb_intern("read"), 1, SIZET2NUM(inBufferSize));
            if (NIL_P(chunk)) {
                dstream->eof = 1;
                if (total_read == 0 && !made_progress) {
                    return Qnil;
                }
                break;
            }

            // Reset input buffer with new data
            dstream->input_data = chunk;
            dstream->input.src = RSTRING_PTR(chunk);
            dstream->input.size = RSTRING_LEN(chunk);
            dstream->input.pos = 0;
        }

        if (dstream->input.size == 0) {
            dstream->eof = 1;
            break;
        }

        size_t space_left = requested_size - total_read;

        ZSTD_outBuffer output = {
            .dst = RSTRING_PTR(result) + total_read,
            .size = space_left,
            .pos = 0
        };

        // ZSTD_decompressStream advances input.pos and output.pos
        // Return value: 0 = frame complete, >0 = hint for next input size, error if < 0
        size_t ret = ZSTD_decompressStream(dstream->dstream, &output, &dstream->input);
        if (ZSTD_isError(ret)) {
            rb_raise(rb_eRuntimeError, "Decompression failed: %s", ZSTD_getErrorName(ret));
        }

        if (output.pos > 0) {
            total_read += output.pos;
            made_progress = 1;
        }

        // Exit when we've read enough data
        if (total_read >= requested_size) {
            break;
        }

        // ret == 0 signals end of current frame
        if (ret == 0) {
            dstream->eof = 1;
            break;
        }

        // No output produced: need more input
        if (output.pos == 0) {
            continue;
        }
    }

    if (total_read == 0) {
        dstream->eof = 1;
        return Qnil;
    }

    rb_str_set_len(result, total_read);
    return result;
}

#read_allObject

Read all remaining data



194
195
196
197
198
199
200
# File 'lib/vibe_zstd.rb', line 194

def read_all
  chunks = []
  while (chunk = read)
    chunks << chunk
  end
  chunks.join
end

#readpartial(maxlen) ⇒ Object

Read exactly n bytes, or raise EOFError

Raises:

  • (EOFError)


246
247
248
249
250
251
252
253
# File 'lib/vibe_zstd.rb', line 246

def readpartial(maxlen)
  raise EOFError, "end of file reached" if eof?

  data = read(maxlen)
  raise EOFError, "end of file reached" if data.nil?

  data
end