Class: ChainPunk::Corpus

Inherits:
Object
  • Object
show all
Defined in:
lib/chain_punk/corpus.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, options = {}) ⇒ Corpus

Returns a new instance of Corpus.



7
8
9
# File 'lib/chain_punk/corpus.rb', line 7

def initialize(text, options = {})
  train(text, options)
end

Instance Attribute Details

#frequency_tableObject (readonly)

Returns the value of attribute frequency_table.



5
6
7
# File 'lib/chain_punk/corpus.rb', line 5

def frequency_table
  @frequency_table
end

#seedsObject (readonly)

Returns the value of attribute seeds.



5
6
7
# File 'lib/chain_punk/corpus.rb', line 5

def seeds
  @seeds
end

Instance Method Details

#train(text, options = {}) ⇒ Object



11
12
13
14
15
16
# File 'lib/chain_punk/corpus.rb', line 11

def train(text, options = {})
  exclusion_text = remove_exclusions(text, options[:exclusions])
  text_phrases = process_sets(exclusion_text, options[:closures])
  grapheme_phrases = process_phrases(text_phrases, options[:boundaries])
  @frequency_table, @seeds = process_graphemes(grapheme_phrases, options[:index_size])
end