Class: Hjc::HadoopStreaming
- Inherits:
-
Object
- Object
- Hjc::HadoopStreaming
- Defined in:
- lib/hjc/hadoop_streaming.rb
Instance Attribute Summary collapse
-
#debug ⇒ Object
Returns the value of attribute debug.
-
#input_path ⇒ Object
Returns the value of attribute input_path.
-
#jobconf ⇒ Object
Returns the value of attribute jobconf.
-
#local ⇒ Object
Returns the value of attribute local.
-
#mapper_path ⇒ Object
Returns the value of attribute mapper_path.
-
#output_path ⇒ Object
Returns the value of attribute output_path.
-
#reducer_path ⇒ Object
Returns the value of attribute reducer_path.
Instance Method Summary collapse
- #add_file(f) ⇒ Object
- #args ⇒ Object
-
#initialize ⇒ HadoopStreaming
constructor
A new instance of HadoopStreaming.
- #input=(input) ⇒ Object
- #mapper=(mapper) ⇒ Object
- #reducer=(reducer) ⇒ Object
- #run ⇒ Object
- #success? ⇒ Boolean
Constructor Details
#initialize ⇒ HadoopStreaming
Returns a new instance of HadoopStreaming.
6 7 8 9 10 11 |
# File 'lib/hjc/hadoop_streaming.rb', line 6 def initialize @files = {} @jobconf = {} @local = false @debug = false end |
Instance Attribute Details
#debug ⇒ Object
Returns the value of attribute debug.
4 5 6 |
# File 'lib/hjc/hadoop_streaming.rb', line 4 def debug @debug end |
#input_path ⇒ Object
Returns the value of attribute input_path.
3 4 5 |
# File 'lib/hjc/hadoop_streaming.rb', line 3 def input_path @input_path end |
#jobconf ⇒ Object
Returns the value of attribute jobconf.
3 4 5 |
# File 'lib/hjc/hadoop_streaming.rb', line 3 def jobconf @jobconf end |
#local ⇒ Object
Returns the value of attribute local.
4 5 6 |
# File 'lib/hjc/hadoop_streaming.rb', line 4 def local @local end |
#mapper_path ⇒ Object
Returns the value of attribute mapper_path.
3 4 5 |
# File 'lib/hjc/hadoop_streaming.rb', line 3 def mapper_path @mapper_path end |
#output_path ⇒ Object
Returns the value of attribute output_path.
3 4 5 |
# File 'lib/hjc/hadoop_streaming.rb', line 3 def output_path @output_path end |
#reducer_path ⇒ Object
Returns the value of attribute reducer_path.
3 4 5 |
# File 'lib/hjc/hadoop_streaming.rb', line 3 def reducer_path @reducer_path end |
Instance Method Details
#add_file(f) ⇒ Object
42 43 44 |
# File 'lib/hjc/hadoop_streaming.rb', line 42 def add_file(f) @files[File.basename(f.path)] = f end |
#args ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/hjc/hadoop_streaming.rb', line 46 def args concated_args = [] concated_args.concat ['-input', @input_path] if @input_path concated_args.concat ['-output' ,@output_path] if @output_path concated_args.concat ['-mapper', @mapper_path] if @mapper_path concated_args.concat ['-reducer', @reducer_path] if @reducer_path concated_args.concat ['-dfs', 'file:///'] if @local concated_args.concat ['-jt', 'local'] if @local # no use? concated_args.concat ['-debug'] if @debug @jobconf.each do |k, v| concated_args += ['-jobconf', "#{k}=#{v}"] end @files.each do |k, v| concated_args.concat ["-file", v.path] end puts "args: #{concated_args.join(' ')}" if @debug concated_args end |
#input=(input) ⇒ Object
22 23 24 25 26 27 28 29 30 |
# File 'lib/hjc/hadoop_streaming.rb', line 22 def input=(input) # input param seems to explain exact path on Hadoop streaming.. file = Util.to_temp_file('input', input) @input_path = Util.rel_path(file) unless @local # path seems on HDFS sh = FsShell.new sh.put(file.path, Util.rel_path(file)) end end |
#mapper=(mapper) ⇒ Object
32 33 34 35 |
# File 'lib/hjc/hadoop_streaming.rb', line 32 def mapper=(mapper) @files['mapper'] = file = Util.to_temp_file('mapper', mapper, :mod => 0700) @mapper_path = File.basename(file.path) end |
#reducer=(reducer) ⇒ Object
37 38 39 40 |
# File 'lib/hjc/hadoop_streaming.rb', line 37 def reducer=(reducer) @files['reducer'] = file = Util.to_temp_file('reducer', reducer, :mod => 0700) @reducer_path = File.basename(file.path) end |
#run ⇒ Object
13 14 15 16 |
# File 'lib/hjc/hadoop_streaming.rb', line 13 def run java_job = StreamJob.new @ret = java_job.run(args) end |
#success? ⇒ Boolean
18 19 20 |
# File 'lib/hjc/hadoop_streaming.rb', line 18 def success? @ret == 0 # success if job returned 0 end |