Class: Swineherd::Script::HadoopScript

Inherits:
Object
  • Object
show all
Includes:
Common
Defined in:
lib/swineherd/script/hadoop_script.rb

Overview

native Java map-reduce

Instance Attribute Summary collapse

Attributes included from Common

#attributes, #input, #options, #output

Instance Method Summary collapse

Methods included from Common

#env, #local_cmd, #refresh!, #run, #script

Constructor Details

#initialize(*args) ⇒ HadoopScript

Returns a new instance of HadoopScript.



10
11
12
13
# File 'lib/swineherd/script/hadoop_script.rb', line 10

def initialize *args
  super(*args)
  @options = Hash.new{|h,k| h[k] = {}} # need to support nested options for this
end

Instance Attribute Details

#hadoop_classpathObject

Returns the value of attribute hadoop_classpath.



8
9
10
# File 'lib/swineherd/script/hadoop_script.rb', line 8

def hadoop_classpath
  @hadoop_classpath
end

#java_optionsObject

Returns the value of attribute java_options.



8
9
10
# File 'lib/swineherd/script/hadoop_script.rb', line 8

def java_options
  @java_options
end

#libjarsObject

Returns the value of attribute libjars.



8
9
10
# File 'lib/swineherd/script/hadoop_script.rb', line 8

def libjars
  @libjars
end

#main_classObject

Returns the value of attribute main_class.



8
9
10
# File 'lib/swineherd/script/hadoop_script.rb', line 8

def main_class
  @main_class
end

#run_jarObject

Returns the value of attribute run_jar.



8
9
10
# File 'lib/swineherd/script/hadoop_script.rb', line 8

def run_jar
  @run_jar
end

Instance Method Details

#cmdObject



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/swineherd/script/hadoop_script.rb', line 46

def cmd
  [
    "HADOOP_CLASSPATH=#{hadoop_classpath}",
    "#{hadoop_home}/bin/hadoop jar #{run_jar}",
    main_class,
    java_args(options),
    "-libjars #{libjars}",
    "#{input.join(',')}",
    "#{output.join(',')}"
  ].flatten.compact.join(" \t\\\n  ")
end

#java_args(args) ⇒ Object

Converts an arbitrarily nested hash to flattened arguments for passing to java program. For example:

=> {:reduce => {:tasks => 0}}

will transform to:

‘-Dmapred.reduce.tasks=0’



25
26
27
# File 'lib/swineherd/script/hadoop_script.rb', line 25

def java_args args
  to_dotted_args(args).map{|arg| "-D#{arg}"}
end

#to_dotted_args(args) ⇒ Object

Uses recursion to take an arbitrarily nested hash and flatten it into dotted args. See ‘to_java_args’. Can you do it any better?



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/swineherd/script/hadoop_script.rb', line 34

def to_dotted_args args
  args.map do |k,v|
    if v.is_a?(Hash)
      to_dotted_args(v).map do |s|
        [k,s].join(".")
      end
    else
      "#{k}=#{v}"
    end
  end.flatten
end