Class: Wukong::CassandraScript

Inherits:
Script show all
Defined in:
lib/wukong/script/cassandra_loader_script.rb

Instance Attribute Summary

Attributes inherited from Script

#input_paths, #mapper, #options, #output_path, #reducer

Instance Method Summary collapse

Methods inherited from Script

#initialize, #job_name, #mapper_commandline, #reducer_commandline, #run, #run_mode, #safely

Methods included from LocalCommand

#execute_local_workflow, #local_commandline, #local_mode_sort_commandline

Methods included from HadoopCommand

#execute_hadoop_workflow, #hadoop_jobconf_options, #hadoop_recycle_env, #hadoop_runner, included, #jobconf

Constructor Details

This class inherits a constructor from Wukong::Script

Instance Method Details

#avro_schemaObject



35
36
37
# File 'lib/wukong/script/cassandra_loader_script.rb', line 35

def avro_schema
  File.join(Settings.cassandra_home, "interface/avro/cassandra.avpr")
end

#cassandra_jarsObject

Return paths to cassandra jars as a string



27
28
29
30
31
32
33
# File 'lib/wukong/script/cassandra_loader_script.rb', line 27

def cassandra_jars
  jars = []
  Dir["#{Settings.cassandra_home}/build/apache-cassandra*.jar", "#{Settings.cassandra_home}/build/lib/jars/*.jar", "#{Settings.cassandra_home}/lib/*.jar"].each do |jar|
    jars << jar
  end
  jars.join(',')
end

#hadoop_other_args(*args) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/wukong/script/cassandra_loader_script.rb', line 7

def hadoop_other_args *args
  opts = super(*args)
  opts << "-D stream.map.output=\'cassandra_avro_output\'"
  opts << "-D stream.io.identifier.resolver.class=\'org.apache.cassandra.hadoop.streaming.AvroResolver\'"
  opts << "-D cassandra.output.keyspace=\'#{Settings.cassandra_keyspace}\'"
  opts << "-D cassandra.output.columnfamily=\'#{Settings.cassandra_col_family}\'"
  opts << "-D cassandra.partitioner.class=\'org.apache.cassandra.dht.RandomPartitioner\'"
  opts << "-D cassandra.thrift.address=\'#{[Settings.cassandra_hosts].flatten.map{|s| s.gsub(/:.*/, '')}.join(",")}\'"
  opts << "-D cassandra.thrift.port=\'9160\'"
  # opts << "-D mapreduce.output.columnfamilyoutputformat.batch.threshold=\'1024\'"
  # ORDER MATTERS
  opts << "-libjars \'#{cassandra_jars}\'"
  opts << "-file    \'#{avro_schema}\'"
  opts << "-outputformat \'org.apache.cassandra.hadoop.ColumnFamilyOutputFormat\'"
  opts
end