Class: RBPig::Pig

Inherits:
Object
  • Object
show all
Defined in:
lib/rbpig.rb

Instance Method Summary collapse

Constructor Details

#initialize(configs) ⇒ Pig

Returns a new instance of Pig.



46
47
48
49
# File 'lib/rbpig.rb', line 46

def initialize(configs)
  @configs = configs
  @oink_oink = []
end

Instance Method Details

#datasets(*datasets) ⇒ Object



51
52
53
# File 'lib/rbpig.rb', line 51

def datasets(*datasets)
   datasets.each {|e| @oink_oink << e.to_s}
end

#fetch(*aliases) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/rbpig.rb', line 59

def fetch(*aliases)
  alias_dump_dir = "/tmp/pigdump/#{Process.pid}_#{Time.now.to_i}"
  aliases = aliases.map {|alias_to_fetch| "#{alias_dump_dir}/#{alias_to_fetch}"}
  
  pig_script_path = "/tmp/pigscript/#{Process.pid}_#{Time.now.to_i}"
  FileUtils.mkdir_p(File.dirname(pig_script_path))
  File.open(pig_script_path, "w") do |file|
    @oink_oink.each {|oink| file << "#{oink}\n"}
    aliases.each do |dump_file_path|
      file << "STORE #{File.basename(dump_file_path)} INTO '#{dump_file_path}' USING PigStorage ('\\t');\n"
    end
  end
  
  execute("#{RBPig.executable(@configs)} -f #{pig_script_path} 2>&1")
  return *fetch_files_in_hdfs(aliases).map {|lines| lines.map{|e| e.chomp("\n").split("\t", -1)}}        
end

#grunt(oink) ⇒ Object



55
56
57
# File 'lib/rbpig.rb', line 55

def grunt(oink)
  @oink_oink << oink
end