Class: EncodingEstimator::ParallelModelBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/encoding_estimator/builder/parallel_model_builder.rb

Overview

Class used to build language models from multiple files

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(directory, min_char_threshold = 0.00001) ⇒ ParallelModelBuilder

Create a new builder object from all files of a given directory.



17
18
19
20
21
# File 'lib/encoding_estimator/builder/parallel_model_builder.rb', line 17

def initialize( directory, min_char_threshold = 0.00001 )
  @files     = Dir.new( directory ).entries.map { |p| "#{directory}/#{p}" }.select { |p| File.file?( p ) }
  @results   = nil
  @threshold = min_char_threshold
end

Instance Attribute Details

#filesObject (readonly)

Returns the value of attribute files.



9
10
11
# File 'lib/encoding_estimator/builder/parallel_model_builder.rb', line 9

def files
  @files
end

#resultsObject (readonly)

Returns the value of attribute results.



10
11
12
# File 'lib/encoding_estimator/builder/parallel_model_builder.rb', line 10

def results
  @results
end

Instance Method Details

#execute!(max_processes = 4, show_progress = true) ⇒ Hash

Load and process all files from the directory. If the parallel gem is installed, this is done in multiple processes and therefore truly concurrent. If the ruby-progressbar gem is installed and the show_progress parameter is set to true, a progressbar will be shown.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/encoding_estimator/builder/parallel_model_builder.rb', line 30

def execute!( max_processes = 4, show_progress = true )
  if EncodingEstimator::ParallelSupport.supported? && !max_processes.nil?
    opts = {
        in_processes: max_processes,
        progress: ( show_progress && EncodingEstimator::ParallelSupport.progress? ) ? 'Analyzing' : nil
    }

    result_list = Parallel.map( files, opts ) { |f| EncodingEstimator::ModelBuilder.new( f ).execute }
  else
    result_list = files.map { |f| EncodingEstimator::ModelBuilder.new( f ).execute }
  end

  @results  = EncodingEstimator::ModelBuilder.join_and_postprocess(result_list, @threshold )
end