Class: TeRex::Corpus::Body

Inherits:
Object
  • Object
show all
Defined in:
lib/te_rex/corpus.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass) ⇒ Body



7
8
9
10
11
12
# File 'lib/te_rex/corpus.rb', line 7

def initialize(glob: "", partition: :file, format_klass: NilClass, category_klass: NilClass)
  @glob = glob
  @format_klass = format_klass
  @category_klass = category_klass
  @partition = partition
end

Instance Attribute Details

#category_klassObject (readonly)

Returns the value of attribute category_klass.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def category_klass
  @category_klass
end

#format_klassObject (readonly)

Returns the value of attribute format_klass.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def format_klass
  @format_klass
end

#sample_sizeObject (readonly)

Returns the value of attribute sample_size.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def sample_size
  @sample_size
end

#setObject (readonly)

Returns the value of attribute set.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def set
  @set
end

#testingObject (readonly)

Returns the value of attribute testing.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def testing
  @testing
end

#total_sentencesObject (readonly)

Returns the value of attribute total_sentences.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def total_sentences
  @total_sentences
end

#trainingObject (readonly)

Returns the value of attribute training.



5
6
7
# File 'lib/te_rex/corpus.rb', line 5

def training
  @training
end

Instance Method Details

#buildObject



15
16
17
18
19
20
21
22
23
# File 'lib/te_rex/corpus.rb', line 15

def build
  define_set
  case @partition
  when /file/
    file_partition
  else
    sentence_partition
  end
end

#build_supersetObject



41
42
43
44
45
# File 'lib/te_rex/corpus.rb', line 41

def build_superset
  @set.reduce([]) do |memo,formatter|
    memo << formatter.sentences
  end.flatten
end

#file_partitionObject



25
26
27
28
29
30
# File 'lib/te_rex/corpus.rb', line 25

def file_partition
  @sample_size = (@set.count.to_f * 0.75).round
  @training = partition_training_by_file
  @testing = partition_test_by_file
  count_all
end

#sentence_partitionObject



32
33
34
35
36
37
38
39
# File 'lib/te_rex/corpus.rb', line 32

def sentence_partition
  corpus_set = partition_files_for_sentences
  @training = partition_training_by_sentence(corpus_set)
  @testing = partition_test_by_sentence(corpus_set)
  c = count_all
  @sample_size = (c.to_f * 0.75)
  c
end