Module: Spark
- Includes:
- Helper::System
- Defined in:
- lib/spark/sampler.rb,
lib/spark.rb,
lib/spark/cli.rb,
lib/spark/rdd.rb,
lib/spark/sort.rb,
lib/spark/sort.rb,
lib/spark/build.rb,
lib/spark/error.rb,
lib/spark/mllib.rb,
lib/spark/config.rb,
lib/spark/ext/io.rb,
lib/spark/helper.rb,
lib/spark/logger.rb,
lib/spark/command.rb,
lib/spark/context.rb,
lib/spark/sampler.rb,
lib/spark/version.rb,
lib/spark/constant.rb,
lib/spark/ext/hash.rb,
lib/spark/broadcast.rb,
lib/spark/ext/module.rb,
lib/spark/ext/object.rb,
lib/spark/ext/string.rb,
lib/spark/serializer.rb,
lib/spark/accumulator.rb,
lib/spark/accumulator.rb,
lib/spark/ext/integer.rb,
lib/spark/java_bridge.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/stat_counter.rb,
lib/spark/ext/ip_socket.rb,
lib/spark/helper/logger.rb,
lib/spark/helper/parser.rb,
lib/spark/helper/system.rb,
lib/spark/serializer/oj.rb,
lib/spark/storage_level.rb,
lib/spark/command_builder.rb,
lib/spark/java_bridge/rjb.rb,
lib/spark/serializer/base.rb,
lib/spark/serializer/pair.rb,
lib/spark/serializer/text.rb,
lib/spark/helper/serialize.rb,
lib/spark/helper/statistic.rb,
lib/spark/java_bridge/base.rb,
lib/spark/command_validator.rb,
lib/spark/java_bridge/jruby.rb,
lib/spark/serializer/batched.rb,
lib/spark/serializer/marshal.rb,
lib/spark/serializer/cartesian.rb,
lib/spark/serializer/compressed.rb,
lib/spark/mllib/regression/lasso.rb,
lib/spark/mllib/regression/ridge.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/linear.rb,
lib/spark/serializer/auto_batched.rb,
lib/spark/serializer/message_pack.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/regression/labeled_point.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/ruby_matrix/matrix_adapter.rb,
lib/spark/mllib/ruby_matrix/vector_adapter.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
ext/ruby_c/ruby-spark.c
Overview
Spark::JavaBridge::Base
Parent for all adapter (ruby - java)
Defined Under Namespace
Modules: Build, CommandValidator, Constant, CoreExtension, Digest, Helper, InternalSorter, JavaBridge, Mllib, RandomGenerator, Sampler, Serializer Classes: Accumulator, AccumulatorError, Broadcast, BroadcastError, BuildError, CLI, Command, CommandBuilder, CommandError, Config, ConfigurationError, Context, ContextError, ExternalSorter, JavaBridgeError, Logger, MllibError, NotImplemented, ParseError, PipelinedRDD, RDD, RDDError, SerializeError, StatCounter, StorageLevel
Constant Summary collapse
- DEFAULT_CONFIG_FILE =
File.join(Dir.home, '.ruby-spark.conf')
- VERSION =
'1.2.1'
Class Method Summary collapse
-
.clear_config ⇒ Object
Destroy current configuration.
-
.config(&block) ⇒ Object
Returns current configuration.
-
.context ⇒ Object
(also: sc)
Return a current active context or nil.
- .java_bridge ⇒ Object (also: jb)
-
.load_defaults ⇒ Object
Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created.
-
.load_defaults_from(file_path) ⇒ Object
Clear prev setting and load new from file.
-
.load_lib(target = nil) ⇒ Object
Load dependent libraries, can be use once Cannot load before CLI::install.
-
.logger ⇒ Object
Global settings and variables.
- .print_logo(message = nil) ⇒ Object
-
.root ⇒ Object
(also: home)
Root of the gem.
- .ruby_spark_jar ⇒ Object
-
.save_defaults_to(file_path) ⇒ Object
Create target dir and new config file.
- .spark_ext_dir ⇒ Object
-
.start ⇒ Object
Initialize spark context if not already.
- .started? ⇒ Boolean
- .stop ⇒ Object
-
.target_dir ⇒ Object
Default directory for java extensions.
-
.worker_dir ⇒ Object
Directory where is worker.rb.
Methods included from Helper::System
Class Method Details
.clear_config ⇒ Object
Destroy current configuration. This can be useful for restarting config to set new. It has no effect if context is already started.
75 76 77 |
# File 'lib/spark.rb', line 75 def self.clear_config @config = nil end |
.config(&block) ⇒ Object
Returns current configuration. Configurations can be changed until context is initialized. In this case config is locked only for reading.
Configuration can be changed:
Spark.config.set('spark.app.name', 'RubySpark')
Spark.config['spark.app.name'] = 'RubySpark'
Spark.config do
set 'spark.app.name', 'RubySpark'
end
63 64 65 66 67 68 69 70 71 |
# File 'lib/spark.rb', line 63 def self.config(&block) @config ||= Spark::Config.new if block_given? @config.instance_eval(&block) else @config end end |
.context ⇒ Object Also known as: sc
Return a current active context or nil.
TODO: Run start if context is nil?
83 84 85 |
# File 'lib/spark.rb', line 83 def self.context @context end |
.java_bridge ⇒ Object Also known as: jb
218 219 220 |
# File 'lib/spark.rb', line 218 def self.java_bridge @java_bridge end |
.load_defaults ⇒ Object
Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created
120 121 122 123 124 125 126 |
# File 'lib/spark.rb', line 120 def self.load_defaults unless File.exists?(DEFAULT_CONFIG_FILE) save_defaults_to(DEFAULT_CONFIG_FILE) end load_defaults_from(DEFAULT_CONFIG_FILE) end |
.load_defaults_from(file_path) ⇒ Object
Clear prev setting and load new from file
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/spark.rb', line 129 def self.load_defaults_from(file_path) # Parse values values = File.readlines(file_path) values.map!(&:strip) values.select!{|value| value.start_with?('gem.')} values.map!{|value| value.split(nil, 2)} values = Hash[values] # Clear prev values @target_dir = nil @ruby_spark_jar = nil @spark_home = nil # Load new @target_dir = values['gem.target'] end |
.load_lib(target = nil) ⇒ Object
Load dependent libraries, can be use once Cannot load before CLI::install
Parameters:
- target
-
path to directory where are located sparks .jar files or single Spark jar
208 209 210 211 212 213 214 215 216 |
# File 'lib/spark.rb', line 208 def self.load_lib(target=nil) return if @java_bridge target ||= Spark.target_dir @java_bridge = JavaBridge.init(target) @java_bridge.import_all nil end |
.logger ⇒ Object
Global settings and variables
170 171 172 |
# File 'lib/spark.rb', line 170 def self.logger @logger ||= Spark::Logger.new end |
.print_logo(message = nil) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/spark.rb', line 35 def self.print_logo(=nil) puts <<-STRING Welcome to __ ____ __ ______ __/ / __ __ / __/__ ___ _____/ /__ / __/ // / _ \\/ // / _\\ \\/ _ \\/ _ `/ __/ '_/ /_/ \\_,_/_.__/\\_, / /___/ .__/\\_,_/_/ /_/\\_\\ version #{Spark::VERSION} /___/ /_/ #{} STRING end |
.root ⇒ Object Also known as: home
Root of the gem
175 176 177 |
# File 'lib/spark.rb', line 175 def self.root @root ||= File.('..', File.dirname(__FILE__)) end |
.ruby_spark_jar ⇒ Object
189 190 191 |
# File 'lib/spark.rb', line 189 def self.ruby_spark_jar @ruby_spark_jar ||= File.join(target_dir, 'ruby-spark.jar') end |
.save_defaults_to(file_path) ⇒ Object
Create target dir and new config file
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/spark.rb', line 147 def self.save_defaults_to(file_path) dir = File.join(Dir.home, ".ruby-spark.#{SecureRandom.uuid}") if Dir.exist?(dir) save_defaults_to(file_path) else Dir.mkdir(dir, 0700) file = File.open(file_path, 'w') file.puts "# Directory where will be Spark saved" file.puts "gem.target #{dir}" file.puts "" file.puts "# You can also defined spark properties" file.puts "# spark.master spark://master:7077" file.puts "# spark.ruby.serializer marshal" file.puts "# spark.ruby.serializer.batch_size 2048" file.close end end |
.spark_ext_dir ⇒ Object
193 194 195 |
# File 'lib/spark.rb', line 193 def self.spark_ext_dir @spark_ext_dir ||= File.join(root, 'ext', 'spark') end |
.start ⇒ Object
Initialize spark context if not already. Config will be automatically loaded on constructor. From that point config will use configuration from running Spark and will be locked only for reading.
90 91 92 93 94 95 96 |
# File 'lib/spark.rb', line 90 def self.start if started? # Already started else @context ||= Spark::Context.new end end |
.started? ⇒ Boolean
109 110 111 |
# File 'lib/spark.rb', line 109 def self.started? !!@context end |
.stop ⇒ Object
98 99 100 101 102 103 104 105 106 107 |
# File 'lib/spark.rb', line 98 def self.stop @context.stop RubyWorker.stopServer logger.info('Workers were stopped') rescue nil ensure @context = nil clear_config end |
.target_dir ⇒ Object
Default directory for java extensions
180 181 182 |
# File 'lib/spark.rb', line 180 def self.target_dir @target_dir ||= File.join(root, 'target') end |
.worker_dir ⇒ Object
Directory where is worker.rb
185 186 187 |
# File 'lib/spark.rb', line 185 def self.worker_dir @worker_dir ||= File.join(root, 'lib', 'spark', 'worker') end |