Class: TwitterCldr::Segmentation::BrahmicBreakEngine

Inherits:
DictionaryBreakEngine show all
Defined in:
lib/twitter_cldr/segmentation/brahmic_break_engine.rb

Overview

Base class break engine for languages derived from the Brahmic script, i.e. Lao, Thai, Khmer, and Burmese.

This class is based on duplicated code found in ICU’s BurmeseBreakEngine and friends, which all make use of the same break logic.

Defined Under Namespace

Classes: EngineState

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from DictionaryBreakEngine

#each_boundary

Constructor Details

#initialize(options = {}) ⇒ BrahmicBreakEngine

Returns a new instance of BrahmicBreakEngine.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 37

def initialize(options = {})
  @lookahead = options.fetch(:lookahead)
  @root_combine_threshold = options.fetch(:root_combine_threshold)
  @prefix_combine_threshold = options.fetch(:prefix_combine_threshold)
  @min_word = options.fetch(:min_word)
  @min_word_span = options.fetch(:min_word_span)

  @word_set = options.fetch(:word_set)
  @mark_set = options.fetch(:mark_set)
  @end_word_set = options.fetch(:end_word_set)
  @begin_word_set = options.fetch(:begin_word_set)

  @dictionary = options.fetch(:dictionary)
  @advance_past_suffix = options.fetch(:advance_past_suffix)
end

Instance Attribute Details

#advance_past_suffixObject (readonly)

Returns the value of attribute advance_past_suffix.



35
36
37
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 35

def advance_past_suffix
  @advance_past_suffix
end

#begin_word_setObject (readonly)

Returns the value of attribute begin_word_set.



34
35
36
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 34

def begin_word_set
  @begin_word_set
end

#dictionaryObject (readonly)

Returns the value of attribute dictionary.



35
36
37
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 35

def dictionary
  @dictionary
end

#end_word_setObject (readonly)

Returns the value of attribute end_word_set.



34
35
36
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 34

def end_word_set
  @end_word_set
end

#lookaheadObject (readonly)

Returns the value of attribute lookahead.



32
33
34
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 32

def lookahead
  @lookahead
end

#mark_setObject (readonly)

Returns the value of attribute mark_set.



34
35
36
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 34

def mark_set
  @mark_set
end

#min_wordObject (readonly)

Returns the value of attribute min_word.



33
34
35
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 33

def min_word
  @min_word
end

#min_word_spanObject (readonly)

Returns the value of attribute min_word_span.



33
34
35
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 33

def min_word_span
  @min_word_span
end

#prefix_combine_thresholdObject (readonly)

Returns the value of attribute prefix_combine_threshold.



33
34
35
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 33

def prefix_combine_threshold
  @prefix_combine_threshold
end

#root_combine_thresholdObject (readonly)

Returns the value of attribute root_combine_threshold.



32
33
34
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 32

def root_combine_threshold
  @root_combine_threshold
end

#word_setObject (readonly)

Returns the value of attribute word_set.



34
35
36
# File 'lib/twitter_cldr/segmentation/brahmic_break_engine.rb', line 34

def word_set
  @word_set
end