Module: Spark

Includes:
Helper::System
Defined in:
lib/spark/sampler.rb,
lib/spark.rb,
lib/spark/cli.rb,
lib/spark/rdd.rb,
lib/spark/sort.rb,
lib/spark/sort.rb,
lib/spark/build.rb,
lib/spark/error.rb,
lib/spark/mllib.rb,
lib/spark/config.rb,
lib/spark/ext/io.rb,
lib/spark/helper.rb,
lib/spark/logger.rb,
lib/spark/command.rb,
lib/spark/context.rb,
lib/spark/sampler.rb,
lib/spark/version.rb,
lib/spark/constant.rb,
lib/spark/ext/hash.rb,
lib/spark/broadcast.rb,
lib/spark/ext/module.rb,
lib/spark/ext/object.rb,
lib/spark/ext/string.rb,
lib/spark/serializer.rb,
lib/spark/accumulator.rb,
lib/spark/accumulator.rb,
lib/spark/ext/integer.rb,
lib/spark/java_bridge.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/stat_counter.rb,
lib/spark/ext/ip_socket.rb,
lib/spark/helper/logger.rb,
lib/spark/helper/parser.rb,
lib/spark/helper/system.rb,
lib/spark/serializer/oj.rb,
lib/spark/storage_level.rb,
lib/spark/command_builder.rb,
lib/spark/java_bridge/rjb.rb,
lib/spark/serializer/base.rb,
lib/spark/serializer/pair.rb,
lib/spark/serializer/text.rb,
lib/spark/helper/serialize.rb,
lib/spark/helper/statistic.rb,
lib/spark/java_bridge/base.rb,
lib/spark/command_validator.rb,
lib/spark/java_bridge/jruby.rb,
lib/spark/serializer/batched.rb,
lib/spark/serializer/marshal.rb,
lib/spark/serializer/cartesian.rb,
lib/spark/serializer/compressed.rb,
lib/spark/mllib/regression/lasso.rb,
lib/spark/mllib/regression/ridge.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/linear.rb,
lib/spark/serializer/auto_batched.rb,
lib/spark/serializer/message_pack.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/regression/labeled_point.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/ruby_matrix/matrix_adapter.rb,
lib/spark/mllib/ruby_matrix/vector_adapter.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
ext/ruby_c/ruby-spark.c

Overview

Spark::JavaBridge::Base

Parent for all adapter (ruby - java)

Defined Under Namespace

Modules: Build, CommandValidator, Constant, CoreExtension, Digest, Helper, InternalSorter, JavaBridge, Mllib, RandomGenerator, Sampler, Serializer Classes: Accumulator, AccumulatorError, Broadcast, BroadcastError, BuildError, CLI, Command, CommandBuilder, CommandError, Config, ConfigurationError, Context, ContextError, ExternalSorter, JavaBridgeError, Logger, MllibError, NotImplemented, ParseError, PipelinedRDD, RDD, RDDError, SerializeError, StatCounter, StorageLevel

Constant Summary collapse

DEFAULT_CONFIG_FILE =
File.join(Dir.home, '.ruby-spark.conf')
VERSION =
'1.2.1'

Class Method Summary collapse

Methods included from Helper::System

included

Class Method Details

.clear_configObject

Destroy current configuration. This can be useful for restarting config to set new. It has no effect if context is already started.



75
76
77
# File 'lib/spark.rb', line 75

def self.clear_config
  @config = nil
end

.config(&block) ⇒ Object

Returns current configuration. Configurations can be changed until context is initialized. In this case config is locked only for reading.

Configuration can be changed:

Spark.config.set('spark.app.name', 'RubySpark')

Spark.config['spark.app.name'] = 'RubySpark'

Spark.config do
  set 'spark.app.name', 'RubySpark'
end


63
64
65
66
67
68
69
70
71
# File 'lib/spark.rb', line 63

def self.config(&block)
  @config ||= Spark::Config.new

  if block_given?
    @config.instance_eval(&block)
  else
    @config
  end
end

.contextObject Also known as: sc

Return a current active context or nil.

TODO: Run start if context is nil?



83
84
85
# File 'lib/spark.rb', line 83

def self.context
  @context
end

.java_bridgeObject Also known as: jb



218
219
220
# File 'lib/spark.rb', line 218

def self.java_bridge
  @java_bridge
end

.load_defaultsObject

Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created



120
121
122
123
124
125
126
# File 'lib/spark.rb', line 120

def self.load_defaults
  unless File.exists?(DEFAULT_CONFIG_FILE)
    save_defaults_to(DEFAULT_CONFIG_FILE)
  end

  load_defaults_from(DEFAULT_CONFIG_FILE)
end

.load_defaults_from(file_path) ⇒ Object

Clear prev setting and load new from file



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/spark.rb', line 129

def self.load_defaults_from(file_path)
  # Parse values
  values = File.readlines(file_path)
  values.map!(&:strip)
  values.select!{|value| value.start_with?('gem.')}
  values.map!{|value| value.split(nil, 2)}
  values = Hash[values]

  # Clear prev values
  @target_dir = nil
  @ruby_spark_jar = nil
  @spark_home = nil

  # Load new
  @target_dir = values['gem.target']
end

.load_lib(target = nil) ⇒ Object

Load dependent libraries, can be use once Cannot load before CLI::install

Parameters:

target

path to directory where are located sparks .jar files or single Spark jar



208
209
210
211
212
213
214
215
216
# File 'lib/spark.rb', line 208

def self.load_lib(target=nil)
  return if @java_bridge

  target ||= Spark.target_dir

  @java_bridge = JavaBridge.init(target)
  @java_bridge.import_all
  nil
end

.loggerObject

Global settings and variables



170
171
172
# File 'lib/spark.rb', line 170

def self.logger
  @logger ||= Spark::Logger.new
end


35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/spark.rb', line 35

def self.(message=nil)
  puts <<-STRING

  Welcome to
                __           ____              __
      ______ __/ /  __ __   / __/__  ___ _____/ /__
     / __/ // / _ \\/ // /  _\\ \\/ _ \\/ _ `/ __/  '_/
    /_/  \\_,_/_.__/\\_, /  /___/ .__/\\_,_/_/ /_/\\_\\   version #{Spark::VERSION}
                  /___/      /_/

  #{message}

  STRING
end

.rootObject Also known as: home

Root of the gem



175
176
177
# File 'lib/spark.rb', line 175

def self.root
  @root ||= File.expand_path('..', File.dirname(__FILE__))
end

.ruby_spark_jarObject



189
190
191
# File 'lib/spark.rb', line 189

def self.ruby_spark_jar
  @ruby_spark_jar ||= File.join(target_dir, 'ruby-spark.jar')
end

.save_defaults_to(file_path) ⇒ Object

Create target dir and new config file



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/spark.rb', line 147

def self.save_defaults_to(file_path)
  dir = File.join(Dir.home, ".ruby-spark.#{SecureRandom.uuid}")

  if Dir.exist?(dir)
    save_defaults_to(file_path)
  else
    Dir.mkdir(dir, 0700)
    file = File.open(file_path, 'w')
    file.puts "# Directory where will be Spark saved"
    file.puts "gem.target   #{dir}"
    file.puts ""
    file.puts "# You can also defined spark properties"
    file.puts "# spark.master                       spark://master:7077"
    file.puts "# spark.ruby.serializer              marshal"
    file.puts "# spark.ruby.serializer.batch_size   2048"
    file.close
  end
end

.spark_ext_dirObject



193
194
195
# File 'lib/spark.rb', line 193

def self.spark_ext_dir
  @spark_ext_dir ||= File.join(root, 'ext', 'spark')
end

.startObject

Initialize spark context if not already. Config will be automatically loaded on constructor. From that point config will use configuration from running Spark and will be locked only for reading.



90
91
92
93
94
95
96
# File 'lib/spark.rb', line 90

def self.start
  if started?
    # Already started
  else
    @context ||= Spark::Context.new
  end
end

.started?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/spark.rb', line 109

def self.started?
  !!@context
end

.stopObject



98
99
100
101
102
103
104
105
106
107
# File 'lib/spark.rb', line 98

def self.stop
  @context.stop
  RubyWorker.stopServer
  logger.info('Workers were stopped')
rescue
  nil
ensure
  @context = nil
  clear_config
end

.target_dirObject

Default directory for java extensions



180
181
182
# File 'lib/spark.rb', line 180

def self.target_dir
  @target_dir ||= File.join(root, 'target')
end

.worker_dirObject

Directory where is worker.rb



185
186
187
# File 'lib/spark.rb', line 185

def self.worker_dir
  @worker_dir ||= File.join(root, 'lib', 'spark', 'worker')
end