Class: TwitterCldr::Segmentation::Dictionary

Inherits:
Object
  • Object
show all
Defined in:
lib/twitter_cldr/segmentation/dictionary.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(trie) ⇒ Dictionary

Returns a new instance of Dictionary.



50
51
52
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 50

def initialize(trie)
  @trie = trie
end

Instance Attribute Details

#trieObject (readonly)

Returns the value of attribute trie.



48
49
50
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 48

def trie
  @trie
end

Class Method Details

.burmeseObject



11
12
13
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 11

def burmese
  get('burmese')
end

.cjObject



15
16
17
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 15

def cj
  get('cj')
end

.get(name) ⇒ Object



31
32
33
34
35
36
37
38
39
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 31

def get(name)
  dictionary_cache[name] ||= begin
    resource = TwitterCldr.get_resource(
      'shared', 'segments', 'dictionaries', "#{name}dict.dump"
    )

    new(resource)
  end
end

.khmerObject



19
20
21
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 19

def khmer
  get('khmer')
end

.laoObject



23
24
25
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 23

def lao
  get('lao')
end

.thaiObject



27
28
29
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 27

def thai
  get('thai')
end

Instance Method Details

#matches(cursor, max_search_length, limit) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/twitter_cldr/segmentation/dictionary.rb', line 54

def matches(cursor, max_search_length, limit)
  return 0 if cursor.length == 0

  count = 0
  num_chars = 1
  current = trie.root.child(cursor.codepoint)
  values = []
  lengths = []

  until current.nil?
    if current.has_value? && count < limit
      values << current.value
      lengths << num_chars
      count += 1
    end

    break if num_chars >= max_search_length

    current = current.child(
      cursor.codepoint(cursor.position + num_chars)
    )

    num_chars += 1
  end

  [count, values, lengths, num_chars]
end