Method: WordTree::Text.common_trigrams

Defined in:
ext/wordtree.cc

.common_trigrams(text) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'ext/wordtree.cc', line 79

static VALUE text_common_trigrams(VALUE self, VALUE text) {
  char* ptext = RSTRING_PTR(text);
  long len = RSTRING_LEN(text);

  if (len < 3) return INT2NUM(0);

  /* 28 most common English trigrams, all squished together */
  char common_trigrams[] = "allandedtentereforhashatherhisingionithmenncendeoftsthterthathethitiotisverwaswityou";

  char* ptr = ptext;
  char* tail = ptext + len;
  int i = 0, common_matched = 0;
  while (ptr < tail) {
    for (i = 0; i < sizeof(common_trigrams); i += 3) {
      if (memcmp(ptr, common_trigrams + i, 3) == 0) {
        common_matched++;
        break;
      }
    }
    ptr++;
  }

  return INT2NUM(common_matched);
}