Class: Wukong::CassandraScript
- Defined in:
- lib/wukong/script/cassandra_loader_script.rb
Instance Attribute Summary
Attributes inherited from Script
#input_paths, #mapper, #options, #output_path, #reducer
Instance Method Summary collapse
- #avro_schema ⇒ Object
-
#cassandra_jars ⇒ Object
Return paths to cassandra jars as a string.
- #hadoop_other_args(*args) ⇒ Object
Methods inherited from Script
#initialize, #job_name, #mapper_commandline, #reducer_commandline, #run, #run_mode, #safely
Methods included from LocalCommand
#execute_local_workflow, #local_commandline, #local_mode_sort_commandline
Methods included from HadoopCommand
#execute_hadoop_workflow, #hadoop_jobconf_options, #hadoop_recycle_env, #hadoop_runner, included, #jobconf
Constructor Details
This class inherits a constructor from Wukong::Script
Instance Method Details
#avro_schema ⇒ Object
35 36 37 |
# File 'lib/wukong/script/cassandra_loader_script.rb', line 35 def avro_schema File.join(Settings.cassandra_home, "interface/avro/cassandra.avpr") end |
#cassandra_jars ⇒ Object
Return paths to cassandra jars as a string
27 28 29 30 31 32 33 |
# File 'lib/wukong/script/cassandra_loader_script.rb', line 27 def cassandra_jars jars = [] Dir["#{Settings.cassandra_home}/build/apache-cassandra*.jar", "#{Settings.cassandra_home}/build/lib/jars/*.jar", "#{Settings.cassandra_home}/lib/*.jar"].each do |jar| jars << jar end jars.join(',') end |
#hadoop_other_args(*args) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/wukong/script/cassandra_loader_script.rb', line 7 def hadoop_other_args *args opts = super(*args) opts << "-D stream.map.output=\'cassandra_avro_output\'" opts << "-D stream.io.identifier.resolver.class=\'org.apache.cassandra.hadoop.streaming.AvroResolver\'" opts << "-D cassandra.output.keyspace=\'#{Settings.cassandra_keyspace}\'" opts << "-D cassandra.output.columnfamily=\'#{Settings.cassandra_col_family}\'" opts << "-D cassandra.partitioner.class=\'org.apache.cassandra.dht.RandomPartitioner\'" opts << "-D cassandra.thrift.address=\'#{[Settings.cassandra_hosts].flatten.map{|s| s.gsub(/:.*/, '')}.join(",")}\'" opts << "-D cassandra.thrift.port=\'9160\'" # opts << "-D mapreduce.output.columnfamilyoutputformat.batch.threshold=\'1024\'" # ORDER MATTERS opts << "-libjars \'#{cassandra_jars}\'" opts << "-file \'#{avro_schema}\'" opts << "-outputformat \'org.apache.cassandra.hadoop.ColumnFamilyOutputFormat\'" opts end |