Class: TeRex::Corpus::Body
- Inherits:
-
Object
- Object
- TeRex::Corpus::Body
- Defined in:
- lib/te_rex/corpus.rb
Instance Attribute Summary collapse
-
#category_klass ⇒ Object
readonly
Returns the value of attribute category_klass.
-
#format_klass ⇒ Object
readonly
Returns the value of attribute format_klass.
-
#sample_size ⇒ Object
readonly
Returns the value of attribute sample_size.
-
#set ⇒ Object
readonly
Returns the value of attribute set.
-
#testing ⇒ Object
readonly
Returns the value of attribute testing.
-
#total_sentences ⇒ Object
readonly
Returns the value of attribute total_sentences.
-
#training ⇒ Object
readonly
Returns the value of attribute training.
Instance Method Summary collapse
- #build ⇒ Object
- #build_superset ⇒ Object
- #file_partition ⇒ Object
-
#initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass) ⇒ Body
constructor
A new instance of Body.
- #sentence_partition ⇒ Object
Constructor Details
#initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass) ⇒ Body
7 8 9 10 11 12 |
# File 'lib/te_rex/corpus.rb', line 7 def initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass) @glob = glob @format_klass = format_klass @category_klass = category_klass @partition = partition end |
Instance Attribute Details
#category_klass ⇒ Object (readonly)
Returns the value of attribute category_klass.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def category_klass @category_klass end |
#format_klass ⇒ Object (readonly)
Returns the value of attribute format_klass.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def format_klass @format_klass end |
#sample_size ⇒ Object (readonly)
Returns the value of attribute sample_size.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def sample_size @sample_size end |
#set ⇒ Object (readonly)
Returns the value of attribute set.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def set @set end |
#testing ⇒ Object (readonly)
Returns the value of attribute testing.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def testing @testing end |
#total_sentences ⇒ Object (readonly)
Returns the value of attribute total_sentences.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def total_sentences @total_sentences end |
#training ⇒ Object (readonly)
Returns the value of attribute training.
5 6 7 |
# File 'lib/te_rex/corpus.rb', line 5 def training @training end |
Instance Method Details
#build ⇒ Object
15 16 17 18 19 20 21 22 23 |
# File 'lib/te_rex/corpus.rb', line 15 def build define_set case @partition when /file/ file_partition else sentence_partition end end |
#build_superset ⇒ Object
41 42 43 44 45 |
# File 'lib/te_rex/corpus.rb', line 41 def build_superset @set.reduce([]) do |memo,formatter| memo << formatter.sentences end.flatten end |
#file_partition ⇒ Object
25 26 27 28 29 30 |
# File 'lib/te_rex/corpus.rb', line 25 def file_partition @sample_size = (@set.count.to_f * 0.75).round @training = partition_training_by_file @testing = partition_test_by_file count_all end |
#sentence_partition ⇒ Object
32 33 34 35 36 37 38 39 |
# File 'lib/te_rex/corpus.rb', line 32 def sentence_partition corpus_set = partition_files_for_sentences @training = partition_training_by_sentence(corpus_set) @testing = partition_test_by_sentence(corpus_set) c = count_all @sample_size = (c.to_f * 0.75) c end |