Class: CharacterSet

Inherits:
Object
  • Object
show all
Extended by:
PredefinedSets
Includes:
SetMethodAdapters, SharedMethods, Enumerable
Defined in:
lib/character_set/pure.rb,
lib/character_set.rb,
lib/character_set/parser.rb,
lib/character_set/writer.rb,
lib/character_set/version.rb,
lib/character_set/character.rb,
lib/character_set/ruby_fallback.rb,
lib/character_set/shared_methods.rb,
lib/character_set/predefined_sets.rb,
lib/character_set/core_ext/regexp_ext.rb,
lib/character_set/core_ext/string_ext.rb,
lib/character_set/set_method_adapters.rb,
lib/character_set/expression_converter.rb,
lib/character_set/ruby_fallback/set_methods.rb,
lib/character_set/ruby_fallback/character_set_methods.rb,
ext/character_set/character_set.c

Overview

Various methods shared by the pure-Ruby and the extended implementation.

Many of these methods are hotspots, so they are defined directly on the including classes for better performance.

Defined Under Namespace

Modules: CoreExt, ExpressionConverter, Parser, PredefinedSets, RubyFallback, SetMethodAdapters, SharedMethods, Writer Classes: Character, Pure

Constant Summary collapse

VERSION =
'1.4.1'

Class Method Summary collapse

Instance Method Summary collapse

Methods included from PredefinedSets

build_from_cps_file

Methods included from SharedMethods

included

Class Method Details

.from_ranges(ranges) ⇒ Object

******************************



680
681
682
683
684
685
686
687
688
689
690
691
# File 'ext/character_set/character_set.c', line 680

static VALUE
cs_class_method_from_ranges(VALUE self, VALUE ranges)
{
  VALUE new_cs, range_count, i;
  new_cs = rb_class_new_instance(0, 0, self);
  range_count = RARRAY_LEN(ranges);
  for (i = 0; i < range_count; i++)
  {
    cs_merge_rb_range(new_cs, RARRAY_AREF(ranges, i));
  }
  return new_cs;
}

.of(str) ⇒ Object



1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
# File 'ext/character_set/character_set.c', line 1048

static VALUE
cs_class_method_of(VALUE self, VALUE str)
{
  VALUE new_cs;
  struct cs_data *new_data;
  new_cs = cs_alloc(self, &new_data);
  raise_arg_err_unless_string(str);
  each_cp(str, add_str_cp_to_arr, 0, 0, new_data, 0);
  return new_cs;
}

Instance Method Details

#&(other) ⇒ Object



331
332
333
334
335
# File 'ext/character_set/character_set.c', line 331

static VALUE
cs_method_intersection(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, &&);
}

#+(other) ⇒ Object



343
344
345
346
347
# File 'ext/character_set/character_set.c', line 343

static VALUE
cs_method_union(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, ||);
}

#-(other) ⇒ Object



349
350
351
352
353
# File 'ext/character_set/character_set.c', line 349

static VALUE
cs_method_difference(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, >);
}

#<(other) ⇒ Object



654
655
656
657
658
659
660
# File 'ext/character_set/character_set.c', line 654

static VALUE
cs_method_proper_subset_p(VALUE self, VALUE other)
{
  int is_subset, is_proper;
  is_subset = cs_a_subset_of_b(self, other, &is_proper);
  return (is_subset && is_proper) ? Qtrue : Qfalse;
}

#<<(cp_num) ⇒ Object



393
394
395
396
397
# File 'ext/character_set/character_set.c', line 393

static VALUE
cs_method_add(VALUE self, VALUE cp_num)
{
  return cs_toggle_codepoint(self, cp_num, 1, 0);
}

#<=(other) ⇒ Object



648
649
650
651
652
# File 'ext/character_set/character_set.c', line 648

static VALUE
cs_method_subset_p(VALUE self, VALUE other)
{
  return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
}

#==(other) ⇒ Object



463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'ext/character_set/character_set.c', line 463

static VALUE
cs_method_eql_p(VALUE self, VALUE other)
{
  if (!cs_check_type(other))
  {
    return Qfalse;
  }
  if (self == other) // same object_id
  {
    return Qtrue;
  }
  return cs_cps_eql(self, other);
}

#===(num) ⇒ Object



355
356
357
358
359
360
361
362
# File 'ext/character_set/character_set.c', line 355

static VALUE
cs_method_include_p(VALUE self, VALUE num)
{
  cs_ar *cps;
  cs_cp len;
  cps = cs_fetch_cps(self, &len);
  return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
}

#>(other) ⇒ Object



668
669
670
671
672
673
674
# File 'ext/character_set/character_set.c', line 668

static VALUE
cs_method_proper_superset_p(VALUE self, VALUE other)
{
  int is_superset, is_proper;
  is_superset = cs_a_subset_of_b(other, self, &is_proper);
  return (is_superset && is_proper) ? Qtrue : Qfalse;
}

#>=(other) ⇒ Object



662
663
664
665
666
# File 'ext/character_set/character_set.c', line 662

static VALUE
cs_method_superset_p(VALUE self, VALUE other)
{
  return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
}

#^(other) ⇒ Object



337
338
339
340
341
# File 'ext/character_set/character_set.c', line 337

static VALUE
cs_method_exclusion(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, ^);
}

#add(cp_num) ⇒ Object



393
394
395
396
397
# File 'ext/character_set/character_set.c', line 393

static VALUE
cs_method_add(VALUE self, VALUE cp_num)
{
  return cs_toggle_codepoint(self, cp_num, 1, 0);
}

#add?(cp_num) ⇒ Boolean

Returns:

  • (Boolean)


399
400
401
402
403
# File 'ext/character_set/character_set.c', line 399

static VALUE
cs_method_add_p(VALUE self, VALUE cp_num)
{
  return cs_toggle_codepoint(self, cp_num, 1, 1);
}

#allocated_lengthObject



1275
1276
1277
1278
1279
# File 'ext/character_set/character_set.c', line 1275

static VALUE
cs_method_allocated_length(VALUE self)
{
  return LONG2FIX(cs_fetch_data(self)->len);
}

#case_insensitiveObject



925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
# File 'ext/character_set/character_set.c', line 925

static VALUE
cs_method_case_insensitive(VALUE self)
{
  cs_cp i, len;
  cs_ar *cps;
  VALUE new_cs;
  struct cs_data *new_data;

  cps = cs_fetch_cps(self, &len);
  new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
  cs_merge_cs(new_cs, self);

  for (i = 0; i < CASEFOLD_COUNT; i++)
  {
    casefold_mapping m = unicode_casefold_table[i];

    if (tst_cp(cps, len, m.from))
    {
      set_cp(new_data, m.to);
    }
    else if (tst_cp(cps, len, m.to))
    {
      set_cp(new_data, m.from);
    }
  }

  return new_cs;

  // OnigCaseFoldType flags;
  // rb_encoding *enc;
  //
  // enc = rb_utf8_encoding();
  //
  // ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE (not public on ruby < 2.4)
  // flags = (1<<13) | (1<<14);
  //
  // // case_map args: flags, pp, end, to, to_end, enc
  // enc->case_map(flags, (const OnigUChar**)&cp, ?, ?, ?, enc);
}

#clearObject



267
268
269
270
271
272
273
274
275
# File 'ext/character_set/character_set.c', line 267

static VALUE
cs_method_clear(VALUE self)
{
  struct cs_data *data;
  rb_check_frozen(self);
  data = cs_fetch_data(self);
  memset(data->cps, 0, CS_MSIZE(data->len));
  return self;
}

#count_in(str) ⇒ Object



1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
# File 'ext/character_set/character_set.c', line 1069

static VALUE
cs_method_count_in(VALUE self, VALUE str)
{
  VALUE count;
  struct cs_data *data;
  raise_arg_err_unless_string(str);
  data = cs_fetch_data(self);
  count = 0;
  each_cp(str, count_str_cp, data->cps, data->len, data, &count);
  return INT2NUM((int)count);
}

#cover?(str) ⇒ Boolean

Returns:

  • (Boolean)


1087
1088
1089
1090
1091
1092
1093
1094
# File 'ext/character_set/character_set.c', line 1087

static VALUE
cs_method_cover_p(VALUE self, VALUE str)
{
  struct cs_data *data;
  raise_arg_err_unless_string(str);
  data = cs_fetch_data(self);
  return each_cp(str, str_cp_in_arr, data->cps, data->len, data, 0);
}

#delete(cp_num) ⇒ Object



405
406
407
408
409
# File 'ext/character_set/character_set.c', line 405

static VALUE
cs_method_delete(VALUE self, VALUE cp_num)
{
  return cs_toggle_codepoint(self, cp_num, 0, 0);
}

#delete?(cp_num) ⇒ Boolean

Returns:

  • (Boolean)


411
412
413
414
415
# File 'ext/character_set/character_set.c', line 411

static VALUE
cs_method_delete_p(VALUE self, VALUE cp_num)
{
  return cs_toggle_codepoint(self, cp_num, 0, 1);
}

#delete_ifObject



253
254
255
256
257
258
# File 'ext/character_set/character_set.c', line 253

static VALUE
cs_method_delete_if(VALUE self)
{
  RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
  return cs_delete_if_block_result(self, 1);
}

#delete_in(str) ⇒ Object



1251
1252
1253
1254
1255
# File 'ext/character_set/character_set.c', line 1251

static VALUE
cs_method_delete_in(VALUE self, VALUE str)
{
  return cs_apply_to_str(self, str, 1, 0);
}

#delete_in!(str) ⇒ Object



1257
1258
1259
1260
1261
# File 'ext/character_set/character_set.c', line 1257

static VALUE
cs_method_delete_in_bang(VALUE self, VALUE str)
{
  return cs_apply_to_str(self, str, 1, 1);
}

#difference(other) ⇒ Object



349
350
351
352
353
# File 'ext/character_set/character_set.c', line 349

static VALUE
cs_method_difference(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, >);
}

#disjoint?(other) ⇒ Boolean

Returns:

  • (Boolean)


434
435
436
437
438
# File 'ext/character_set/character_set.c', line 434

static VALUE
cs_method_disjoint_p(VALUE self, VALUE other)
{
  return cs_method_intersect_p(self, other) ? Qfalse : Qtrue;
}

#eachObject

‘Set` compatibility methods



175
176
177
178
179
180
181
# File 'ext/character_set/character_set.c', line 175

static VALUE
cs_method_each(VALUE self)
{
  RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
  FOR_EACH_ACTIVE_CODEPOINT(rb_yield(LONG2FIX(cp)));
  return self;
}

#empty?Boolean

Returns:

  • (Boolean)


206
207
208
209
210
211
# File 'ext/character_set/character_set.c', line 206

static VALUE
cs_method_empty_p(VALUE self)
{
  FOR_EACH_ACTIVE_CODEPOINT(return Qfalse);
  return Qtrue;
}

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)


463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'ext/character_set/character_set.c', line 463

static VALUE
cs_method_eql_p(VALUE self, VALUE other)
{
  if (!cs_check_type(other))
  {
    return Qfalse;
  }
  if (self == other) // same object_id
  {
    return Qtrue;
  }
  return cs_cps_eql(self, other);
}

#ext_count_in_section(from, upto) ⇒ Object



774
775
776
777
778
779
780
# File 'ext/character_set/character_set.c', line 774

static VALUE
cs_method_ext_count_in_section(VALUE self, VALUE from, VALUE upto)
{
  cs_cp count;
  count = cs_active_cp_count_in_section(self, FIX2ULONG(from), FIX2ULONG(upto));
  return LONG2FIX(count);
}

#ext_inversion(*args) ⇒ Object



889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
# File 'ext/character_set/character_set.c', line 889

static VALUE
cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
{
  int inc_surr;
  cs_cp upto, cp, len;
  cs_ar *cps;
  VALUE new_cs;
  struct cs_data *new_data;

  rb_check_arity(argc, 0, 2);

  cps = cs_fetch_cps(self, &len);
  inc_surr = argc && argv[0] == Qtrue;
  new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
  upto = argc > 1 && FIXNUM_P(argv[1]) ? FIX2ULONG(argv[1]) : UNICODE_CP_COUNT;

  for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
  {
    if (cp <= upto && !tst_cp(cps, len, cp) && (inc_surr || NON_SURROGATE(cp)))
    {
      set_cp(new_data, cp);
    }
  }

  return new_cs;
}

#ext_section(from, upto) ⇒ Object



752
753
754
755
756
# File 'ext/character_set/character_set.c', line 752

static VALUE
cs_method_ext_section(VALUE self, VALUE from, VALUE upto)
{
  return cs_from_section(self, FIX2ULONG(from), FIX2ULONG(upto));
}

#ext_section?(from, upto) ⇒ Boolean

Returns:

  • (Boolean)


796
797
798
799
800
801
802
803
# File 'ext/character_set/character_set.c', line 796

static VALUE
cs_method_ext_section_p(VALUE self, VALUE from, VALUE upto)
{
  cs_ar *cps;
  cs_cp len;
  cps = cs_fetch_cps(self, &len);
  return cs_has_cp_in_section(cps, len, FIX2ULONG(from), FIX2ULONG(upto));
}

#ext_section_ratio(from, upto) ⇒ Object



814
815
816
817
818
# File 'ext/character_set/character_set.c', line 814

static VALUE
cs_method_ext_section_ratio(VALUE self, VALUE from, VALUE upto)
{
  return cs_ratio_of_section(self, FIX2ULONG(from), FIX2ULONG(upto));
}

#hashObject



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'ext/character_set/character_set.c', line 213

static VALUE
cs_method_hash(VALUE self)
{
  cs_cp cp, len, hash, four_byte_value;
  cs_ar *cps;
  cps = cs_fetch_cps(self, &len);
  four_byte_value = 0;

  hash = 17;
  for (cp = 0; cp < len; cp++)
  {
    if (cp % 32 == 0)
    {
      if (cp != 0)
      {
        hash = hash * 23 + four_byte_value;
      }
      four_byte_value = 0;
    }
    if (tst_cp(cps, len, cp))
    {
      four_byte_value++;
    }
  }

  return LONG2FIX(hash);
}

#include?(num) ⇒ Boolean

Returns:

  • (Boolean)


355
356
357
358
359
360
361
362
# File 'ext/character_set/character_set.c', line 355

static VALUE
cs_method_include_p(VALUE self, VALUE num)
{
  cs_ar *cps;
  cs_cp len;
  cps = cs_fetch_cps(self, &len);
  return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
}

#initialize_clone(orig) ⇒ Object



581
582
583
584
585
586
# File 'ext/character_set/character_set.c', line 581

static VALUE
cs_method_initialize_copy(VALUE self, VALUE orig)
{
  cs_merge_cs(self, orig);
  return self;
}

#initialize_dup(orig) ⇒ Object



581
582
583
584
585
586
# File 'ext/character_set/character_set.c', line 581

static VALUE
cs_method_initialize_copy(VALUE self, VALUE orig)
{
  cs_merge_cs(self, orig);
  return self;
}

#intersect?(other) ⇒ Boolean

Returns:

  • (Boolean)


417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'ext/character_set/character_set.c', line 417

static VALUE
cs_method_intersect_p(VALUE self, VALUE other)
{
  cs_cp cp, alen, blen;
  cs_ar *acps, *bcps;
  acps = cs_fetch_cps(self, &alen);
  bcps = cs_fetch_cps(other, &blen);
  for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
  {
    if (tst_cp(acps, alen, cp) && tst_cp(bcps, blen, cp))
    {
      return Qtrue;
    }
  }
  return Qfalse;
}

#intersection(other) ⇒ Object



331
332
333
334
335
# File 'ext/character_set/character_set.c', line 331

static VALUE
cs_method_intersection(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, &&);
}

#keep_ifObject



260
261
262
263
264
265
# File 'ext/character_set/character_set.c', line 260

static VALUE
cs_method_keep_if(VALUE self)
{
  RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
  return cs_delete_if_block_result(self, 0);
}

#keep_in(str) ⇒ Object



1263
1264
1265
1266
1267
# File 'ext/character_set/character_set.c', line 1263

static VALUE
cs_method_keep_in(VALUE self, VALUE str)
{
  return cs_apply_to_str(self, str, 0, 0);
}

#keep_in!(str) ⇒ Object



1269
1270
1271
1272
1273
# File 'ext/character_set/character_set.c', line 1269

static VALUE
cs_method_keep_in_bang(VALUE self, VALUE str)
{
  return cs_apply_to_str(self, str, 0, 1);
}

#lengthObject



163
164
165
166
167
# File 'ext/character_set/character_set.c', line 163

static VALUE
cs_method_length(VALUE self)
{
  return LONG2FIX(cs_active_cp_count(self));
}

#maxObject



284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'ext/character_set/character_set.c', line 284

static VALUE
cs_method_max(VALUE self)
{
  cs_cp len;
  long reverse_idx;
  cs_ar *cps;
  cps = cs_fetch_cps(self, &len);
  for (reverse_idx = len; reverse_idx >= 0; reverse_idx--)
  {
    if (tst_cp(cps, len, reverse_idx))
    {
      return LONG2FIX(reverse_idx);
    }
  }
  return Qnil;
}

#member?(num) ⇒ Boolean

Returns:

  • (Boolean)


355
356
357
358
359
360
361
362
# File 'ext/character_set/character_set.c', line 355

static VALUE
cs_method_include_p(VALUE self, VALUE num)
{
  cs_ar *cps;
  cs_cp len;
  cps = cs_fetch_cps(self, &len);
  return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
}

#member_in_plane?(plane_num) ⇒ Boolean

Returns:

  • (Boolean)


876
877
878
879
880
881
882
883
884
885
# File 'ext/character_set/character_set.c', line 876

static VALUE
cs_method_member_in_plane_p(VALUE self, VALUE plane_num)
{
  cs_ar *cps;
  cs_cp len;
  unsigned int plane;
  plane = cs_valid_plane_num(plane_num);
  cps = cs_fetch_cps(self, &len);
  return cs_has_cp_in_plane(cps, len, plane);
}

#merge(other) ⇒ Object



566
567
568
569
570
571
572
573
574
575
576
577
578
579
# File 'ext/character_set/character_set.c', line 566

static VALUE
cs_method_merge(VALUE self, VALUE other)
{
  rb_check_frozen(self);
  if (cs_check_type(other))
  {
    return cs_merge_cs(self, other);
  }
  else if (TYPE(other) == T_ARRAY)
  {
    return cs_merge_rb_array(self, other);
  }
  return cs_merge_rb_range(self, other);
}

#minObject



277
278
279
280
281
282
# File 'ext/character_set/character_set.c', line 277

static VALUE
cs_method_min(VALUE self)
{
  FOR_EACH_ACTIVE_CODEPOINT(return LONG2FIX(cp));
  return Qnil;
}

#minmaxObject



301
302
303
304
305
306
307
308
309
# File 'ext/character_set/character_set.c', line 301

static VALUE
cs_method_minmax(VALUE self)
{
  VALUE arr;
  arr = rb_ary_new2(2);
  rb_ary_push(arr, cs_method_min(self));
  rb_ary_push(arr, cs_method_max(self));
  return arr;
}

#plane(plane_num) ⇒ Object



866
867
868
869
870
871
872
873
874
# File 'ext/character_set/character_set.c', line 866

static VALUE
cs_method_plane(VALUE self, VALUE plane_num)
{
  cs_cp plane, plane_beg, plane_end;
  plane = cs_valid_plane_num(plane_num);
  plane_beg = plane * UNICODE_PLANE_SIZE;
  plane_end = (plane + 1) * MAX_BMP_CP;
  return cs_from_section(self, plane_beg, plane_end);
}

#planesObject



834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
# File 'ext/character_set/character_set.c', line 834

static VALUE
cs_method_planes(VALUE self)
{
  cs_ar *cps;
  cs_cp len;
  unsigned int i;
  VALUE planes;
  cps = cs_fetch_cps(self, &len);
  planes = rb_ary_new();
  for (i = 0; i < UNICODE_PLANE_COUNT; i++)
  {
    if (cs_has_cp_in_plane(cps, len, i))
    {
      rb_ary_push(planes, INT2FIX(i));
    }
  }
  return planes;
}

#proper_subset?(other) ⇒ Boolean

Returns:

  • (Boolean)


654
655
656
657
658
659
660
# File 'ext/character_set/character_set.c', line 654

static VALUE
cs_method_proper_subset_p(VALUE self, VALUE other)
{
  int is_subset, is_proper;
  is_subset = cs_a_subset_of_b(self, other, &is_proper);
  return (is_subset && is_proper) ? Qtrue : Qfalse;
}

#proper_superset?(other) ⇒ Boolean

Returns:

  • (Boolean)


668
669
670
671
672
673
674
# File 'ext/character_set/character_set.c', line 668

static VALUE
cs_method_proper_superset_p(VALUE self, VALUE other)
{
  int is_superset, is_proper;
  is_superset = cs_a_subset_of_b(other, self, &is_proper);
  return (is_superset && is_proper) ? Qtrue : Qfalse;
}

#rangesObject



693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
# File 'ext/character_set/character_set.c', line 693

static VALUE
cs_method_ranges(VALUE self)
{
  VALUE ranges, cp_num, previous_cp_num, current_start, current_end;

  ranges = rb_ary_new();
  previous_cp_num = 0;
  current_start = 0;
  current_end = 0;

  FOR_EACH_ACTIVE_CODEPOINT(
      cp_num = LONG2FIX(cp);

      if (!previous_cp_num) {
        current_start = cp_num;
      } else if (previous_cp_num + 2 != cp_num) {
        // gap found, finalize previous range
        rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
        current_start = cp_num;
      } current_end = cp_num;
      previous_cp_num = cp_num;);

  // add final range
  if (current_start)
  {
    rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
  }

  return ranges;
}

#sample(*args) ⇒ Object



724
725
726
727
728
729
730
731
# File 'ext/character_set/character_set.c', line 724

static VALUE
cs_method_sample(int argc, VALUE *argv, VALUE self)
{
  VALUE array, to_a_args[1] = {Qtrue};
  rb_check_arity(argc, 0, 1);
  array = cs_method_to_a(1, to_a_args, self);
  return rb_funcall(array, rb_intern("sample"), argc, argc ? argv[0] : 0);
}

#scan(str) ⇒ Object



1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
# File 'ext/character_set/character_set.c', line 1106

static VALUE
cs_method_scan(VALUE self, VALUE str)
{
  VALUE memo[2];
  struct cs_data *data;
  raise_arg_err_unless_string(str);
  data = cs_fetch_data(self);
  memo[0] = rb_ary_new();
  memo[1] = (VALUE)rb_enc_get(str);
  each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
  return memo[0];
}

#sizeObject



163
164
165
166
167
# File 'ext/character_set/character_set.c', line 163

static VALUE
cs_method_length(VALUE self)
{
  return LONG2FIX(cs_active_cp_count(self));
}

#subset?(other) ⇒ Boolean

Returns:

  • (Boolean)


648
649
650
651
652
# File 'ext/character_set/character_set.c', line 648

static VALUE
cs_method_subset_p(VALUE self, VALUE other)
{
  return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
}

#subtract(other) ⇒ Object



588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
# File 'ext/character_set/character_set.c', line 588

static VALUE
cs_method_subtract(VALUE self, VALUE other)
{
  cs_cp cp, len, other_len;
  cs_ar *cps, *other_cps;
  rb_check_frozen(self);
  cps = cs_fetch_cps(self, &len);
  other_cps = cs_fetch_cps(other, &other_len);
  for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
  {
    if (tst_cp(other_cps, other_len, cp))
    {
      clr_cp(cps, len, cp);
    }
  }
  return self;
}

#superset?(other) ⇒ Boolean

Returns:

  • (Boolean)


662
663
664
665
666
# File 'ext/character_set/character_set.c', line 662

static VALUE
cs_method_superset_p(VALUE self, VALUE other)
{
  return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
}

#to_a(*args) ⇒ Object

returns an Array of Strings of length 1 if passed ‘true`.



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'ext/character_set/character_set.c', line 185

static VALUE
cs_method_to_a(int argc, VALUE *argv, VALUE self)
{
  VALUE arr;
  rb_encoding *enc;
  rb_check_arity(argc, 0, 1);

  arr = rb_ary_new();
  if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
  {
    FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, LONG2FIX(cp)));
  }
  else
  {
    enc = rb_utf8_encoding();
    FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, rb_enc_uint_chr((int)cp, enc)));
  }

  return arr;
}

#union(other) ⇒ Object



343
344
345
346
347
# File 'ext/character_set/character_set.c', line 343

static VALUE
cs_method_union(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, ||);
}

#used_by?(str) ⇒ Boolean

Returns:

  • (Boolean)


1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
# File 'ext/character_set/character_set.c', line 1125

static VALUE
cs_method_used_by_p(VALUE self, VALUE str)
{
  VALUE only_uses_other_cps;
  struct cs_data *data;
  raise_arg_err_unless_string(str);
  data = cs_fetch_data(self);
  only_uses_other_cps = each_cp(str, str_cp_not_in_arr, data->cps, data->len, data, 0);
  return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
}

#|(other) ⇒ Object



343
344
345
346
347
# File 'ext/character_set/character_set.c', line 343

static VALUE
cs_method_union(VALUE self, VALUE other)
{
  RETURN_COMBINED_CS(self, other, ||);
}