Class: StringScanner::UTF8

Inherits:
StringScanner show all
Defined in:
lib/utf8/string_scanner.rb,
ext/utf8/string_scanner_utf8.c

Instance Method Summary collapse

Methods inherited from StringScanner

#as_utf8

Instance Method Details

#as_rawObject

Returns a non-UTF8-aware version of StringScanner wrapping your original string

NOTE: this will lose all state associated with the current StringScanner::UTF8 instance (like the current scan position)



17
18
19
# File 'lib/utf8/string_scanner.rb', line 17

def as_raw
  StringScanner.new(self.string)
end

#getchObject

Works like StringScanner#getch but is UTF8-aware



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'ext/utf8/string_scanner_utf8.c', line 41

static VALUE rb_cStringScanner_UTF8_getch(VALUE self) {
  unsigned char *str;
  long len = 0, pos = 0;
  VALUE utf8Str, curStr;
  int8_t lastCharLen=0;
  
#ifndef RUBINIUS
  struct strscanner *scanner;
  GET_SCANNER(self, scanner);

  curStr = scanner->str;
  pos = scanner->curr;
#else
  curStr = rb_iv_get(self, "@string");
  pos = FIX2LONG(rb_iv_get(self, "@pos"));
#endif

  str = (unsigned char *)RSTRING_PTR(curStr);
  len = RSTRING_LEN(curStr);

  if (len > 0 && len > pos) {
    lastCharLen = utf8CharLen(str, len);
    if (lastCharLen < 0) {
      rb_raise(rb_eArgError, "invalid utf-8 byte sequence");
    }
    utf8Str = rb_str_new((char *)str+pos, lastCharLen);
    pos += lastCharLen;
#ifndef RUBINIUS
    scanner->curr = pos;
#else
    rb_iv_set(self, "@pos", LONG2FIX(pos));
#endif
    AS_UTF8(utf8Str);
    return utf8Str;
  } else {
    return Qnil;
  }
}