Class: Liquid::C::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/liquid/c.rb,
ext/liquid_c/tokenizer.c

Constant Summary collapse

MAX_SOURCE_BYTE_SIZE =
(1 << 24) - 1

Instance Method Summary collapse

Constructor Details

#initialize(source, start_line_number, for_liquid_tag) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'ext/liquid_c/tokenizer.c', line 46

static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE start_line_number, VALUE for_liquid_tag)
{
    tokenizer_t *tokenizer;

    Check_Type(source, T_STRING);
    check_utf8_encoding(source, "source");

#define MAX_SOURCE_CODE_BYTES ((1 << 24) - 1)
    if (RSTRING_LEN(source) > MAX_SOURCE_CODE_BYTES) {
        rb_enc_raise(utf8_encoding, rb_eArgError, "Source too large, max %d bytes", MAX_SOURCE_CODE_BYTES);
    }
#undef MAX_SOURCE_CODE_BYTES

    Tokenizer_Get_Struct(self, tokenizer);
    source = rb_str_dup_frozen(source);
    tokenizer->source = source;
    tokenizer->cursor = RSTRING_PTR(source);
    tokenizer->cursor_end = tokenizer->cursor + RSTRING_LEN(source);
    tokenizer->lstrip_flag = false;
    // tokenizer->line_number keeps track of the current line number or it is 0
    // to indicate that line numbers aren't being calculated
    tokenizer->line_number = FIX2UINT(start_line_number);
    tokenizer->for_liquid_tag = RTEST(for_liquid_tag);
    return Qnil;
}

Instance Method Details

#bug_compatible_whitespace_trimming!Object

Temporary to test rollout of the fix for this bug



275
276
277
278
279
280
281
# File 'ext/liquid_c/tokenizer.c', line 275

static VALUE tokenizer_bug_compatible_whitespace_trimming(VALUE self) {
    tokenizer_t *tokenizer;
    Tokenizer_Get_Struct(self, tokenizer);

    tokenizer->bug_compatible_whitespace_trimming = true;
    return Qnil;
}

#for_liquid_tagObject



265
266
267
268
269
270
271
# File 'ext/liquid_c/tokenizer.c', line 265

static VALUE tokenizer_for_liquid_tag_method(VALUE self)
{
    tokenizer_t *tokenizer;
    Tokenizer_Get_Struct(self, tokenizer);

    return tokenizer->for_liquid_tag ? Qtrue : Qfalse;
}

#line_numberObject



254
255
256
257
258
259
260
261
262
263
# File 'ext/liquid_c/tokenizer.c', line 254

static VALUE tokenizer_line_number_method(VALUE self)
{
    tokenizer_t *tokenizer;
    Tokenizer_Get_Struct(self, tokenizer);

    if (tokenizer->line_number == 0)
        return Qnil;

    return UINT2NUM(tokenizer->line_number);
}

#shiftObject



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'ext/liquid_c/tokenizer.c', line 223

static VALUE tokenizer_shift_method(VALUE self)
{
    tokenizer_t *tokenizer;
    Tokenizer_Get_Struct(self, tokenizer);

    token_t token;
    tokenizer_next(tokenizer, &token);
    if (!token.type)
        return Qnil;

    // When sent back to Ruby, tokens are the raw string including whitespace
    // and tag delimiters. It should be possible to reconstruct the exact
    // template from the tokens.
    return rb_enc_str_new(token.str_full, token.len_full, utf8_encoding);
}