Class: Mortar::Local::Pig

Inherits:
Object
  • Object
show all
Includes:
InstallUtil
Defined in:
lib/mortar/local/pig.rb

Constant Summary collapse

PIG_LOG_FORMAT =
"humanreadable"
LIB_TGZ_NAME =
"lib-common.tar.gz"
PIG_COMMON_LIB_URL_PATH =
"resource/lib_common"
DEFAULT_PIGOPTS_FILES =

This needs to be defined for watchtower.

%w(
    /lib-common/conf/pig-hawk-global.properties
    /lib-common/conf/pig-cli-local-dev.properties
)

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from InstallUtil

#download_file, #ensure_mortar_local_directory, #extract_tgz, #get_resource, #gitignore_template_path, #head_resource, #http_date_to_epoch, #install_date, #install_file_for, #is_newer_version, #jython_cache_directory, #jython_directory, #local_install_directory, #local_install_directory_name, #local_log_dir, #local_project_gitignore, #local_udf_log_dir, #make_call, #make_call_sleep_seconds, #note_install, #osx?, #project_root, #render_script_template, #reset_local_logs, #run_templated_script, #unset_hadoop_env_vars, #url_date

Methods included from Helpers

#action, #ask, #confirm, #copy_if_not_present_at_dest, #default_host, #deprecate, #display, #display_header, #display_object, #display_row, #display_table, #display_with_indent, #download_to_file, #ensure_dir_exists, #error, error_with_failure, error_with_failure=, extended, extended_into, #format_bytes, #format_date, #format_with_bang, #get_terminal_environment, #home_directory, #host, #hprint, #hputs, included, included_into, #installed_with_omnibus?, #json_decode, #json_encode, #line_formatter, #longest, #output_with_bang, #quantify, #redisplay, #retry_on_exception, #running_on_a_mac?, #running_on_windows?, #set_buffer, #shell, #spinner, #status, #string_distance, #styled_array, #styled_error, #styled_hash, #styled_header, #suggestion, #test_name, #ticking, #time_ago, #truncate, #warning, #with_tty, #write_to_file

Constructor Details

#initializePig

Returns a new instance of Pig.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/mortar/local/pig.rb', line 53

def initialize
  @temp_file_objects = []

  @resource_locations = { 
    "illustrate_template" => File.expand_path("../../templates/report/illustrate-report.html", __FILE__),
    "illustrate_css" => File.expand_path("../../../../css/illustrate.css", __FILE__),
    "jquery" => File.expand_path("../../../../js/jquery-1.7.1.min.js", __FILE__),
    "jquery_transit" => File.expand_path("../../../../js/jquery.transit.js", __FILE__),
    "jquery_stylestack" => File.expand_path("../../../../js/jquery.stylestack.js", __FILE__),
    "mortar_table" => File.expand_path("../../../../js/mortar-table.js", __FILE__),
    "zeroclipboard" => File.expand_path("../../../../js/zero_clipboard.js", __FILE__),
    "zeroclipboard_swf" => File.expand_path("../../../../flash/zeroclipboard.swf", __FILE__)
}

@resource_destinations = {
    "illustrate_html" => "illustrate-output/illustrate-output.html",
    "illustrate_css" => "illustrate-output/resources/css/illustrate-output.css",
    "jquery" => "illustrate-output/resources/js/jquery-1.7.1.min.js",
    "jquery_transit" => "illustrate-output/resources/js/jquery.transit.js",
    "jquery_stylestack" => "illustrate-output/resources/js/jquery.stylestack.js",
    "mortar_table" => "illustrate-output/resources/js/mortar-table.js",
    "zeroclipboard" => "illustrate-output/resources/js/zero_clipboard.js",
    "zeroclipboard_swf" => "illustrate-output/resources/flash/zeroclipboard.swf"
}
end

Instance Attribute Details

#resource_destinationsObject

Returns the value of attribute resource_destinations.



51
52
53
# File 'lib/mortar/local/pig.rb', line 51

def resource_destinations
  @resource_destinations
end

#resource_locationsObject

Returns the value of attribute resource_locations.



50
51
52
# File 'lib/mortar/local/pig.rb', line 50

def resource_locations
  @resource_locations
end

Instance Method Details

#automatic_pig_parametersObject

Pig Paramenters that are supplied directly from Mortar when running on the server side. We duplicate these here.



404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/mortar/local/pig.rb', line 404

def automatic_pig_parameters
  params = {}

  if ENV['MORTAR_EMAIL_S3_ESCAPED']
    params['MORTAR_EMAIL_S3_ESCAPED'] = ENV['MORTAR_EMAIL_S3_ESCAPED']
  else
    params['MORTAR_EMAIL_S3_ESCAPED'] = Mortar::Auth.user_s3_safe(true)
  end

  # Coerce into the same format as pig parameters that were
  # passed in via the command line or a parameter file
  param_list = []
  params.each{ |k,v|
    param_list.push({"name" => k, "value" => v})
  }
  return param_list
end

#command(pig_version) ⇒ Object



79
80
81
# File 'lib/mortar/local/pig.rb', line 79

def command(pig_version)
  return File.join(pig_directory(pig_version), "bin", "pig")
end

#create_illustrate_output_pathObject

Create a temp file to be used for writing the illustrate json output, and return it’s path. This data file will later be used to create the result html output. Tempfile will take care of cleaning up the file when we exit.



208
209
210
211
212
213
214
# File 'lib/mortar/local/pig.rb', line 208

def create_illustrate_output_path
  # Using Tempfile for the path generation and so that the
  # file will be cleaned up on process exit
  outfile = Tempfile.new("mortar-illustrate-output")
  outfile.close(false)
  outfile.path
end

#create_illustrate_template_parameters(illustrate_data) ⇒ Object



264
265
266
267
268
269
# File 'lib/mortar/local/pig.rb', line 264

def create_illustrate_template_parameters(illustrate_data)
  params = {}
  params['tables'] = illustrate_data['tables']
  params['udf_output'] = illustrate_data['udf_output']
  return params
end

#decode_illustrate_input_file(illustrate_outpath) ⇒ Object

Given a file path, open it and decode the containing text



217
218
219
220
221
222
223
224
225
226
# File 'lib/mortar/local/pig.rb', line 217

def decode_illustrate_input_file(illustrate_outpath)
  data_raw = File.read(illustrate_outpath)
  begin
    data_encoded = data_raw.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
  rescue NoMethodError
    require 'iconv'
    ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
    data_encoded = ic.iconv(data_raw)
  end
end

#illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters) ⇒ Object



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/mortar/local/pig.rb', line 271

def illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters)
  cmd = "-e 'illustrate "

  # Parameters have to be entered with the illustrate command (as
  # apposed to as a command line argument) or it will result in an
  # 'Undefined parameter' error.
  param_file = make_pig_param_file(pig_parameters)
  cmd += "-param_file #{param_file} "

  # Now point us at the script/alias to illustrate
  illustrate_outpath = create_illustrate_output_path()
  cmd += "-script #{pig_script.path} -out #{illustrate_outpath} "
  
  if skip_pruning
    cmd += " -skipPruning "
  end

  if no_browser
    cmd += " -str '"
  else
    cmd += " -json '"
  end

  if pig_alias
    cmd += " #{pig_alias} "
  end

  result = run_pig_command(cmd, pig_version, [], false)
  if result
    if no_browser
      display decode_illustrate_input_file(illustrate_outpath)
    else
      show_illustrate_output_browser(illustrate_outpath)
    end
  end
end

#install_libObject



164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/mortar/local/pig.rb', line 164

def install_lib
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? lib_directory
    FileUtils.rm_rf lib_directory
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, LIB_TGZ_NAME)
  download_file(lib_archive_url, local_tgz)
  extract_tgz(local_tgz, local_install_directory)

  File.delete(local_tgz)
  note_install("lib-common")
end

#install_or_update(pig_version, command = nil) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/mortar/local/pig.rb', line 122

def install_or_update(pig_version, command=nil)
  if should_do_pig_install?(pig_version)
    action "Installing #{pig_version.name} to #{local_install_directory_name}" do
      install_pig(pig_version, command)
    end
  elsif should_do_pig_update?(pig_version, command)
    action "Updating to latest #{pig_version.name} in #{local_install_directory_name}" do
      install_pig(pig_version)
    end
  end

  if should_do_lib_install?
    action "Installing pig dependencies to #{local_install_directory_name}" do
      install_lib()
    end
  elsif should_do_lib_update?
    action "Updating to latest pig dependencies in #{local_install_directory_name}" do
      install_lib()
    end
  end
end

#install_pig(pig_version, command = nil) ⇒ Object

Installs pig for this project if it is not already present



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/mortar/local/pig.rb', line 145

def install_pig(pig_version, command=nil)
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? pig_directory(pig_version)
    FileUtils.rm_rf pig_directory(pig_version)
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, pig_version.tgz_name)
  download_file(pig_archive_url(pig_version), local_tgz, command)
  extract_tgz(local_tgz, local_install_directory)

  # This has been seening coming out of the tgz w/o +x so we do
  # here to be sure it has the necessary permissions
  FileUtils.chmod(0755, command(pig_version))

  File.delete(local_tgz)
  note_install(pig_version.name)
end

#launch_repl(pig_version, pig_parameters) ⇒ Object



183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/mortar/local/pig.rb', line 183

def launch_repl(pig_version, pig_parameters)
  # The REPL is very likely to be run outside a mortar project and almost equally as likely
  # to be run in the users home directory.  The default log4j config file references pig log
  # file as being ../logs/local-pig.log, which is a path relative to the 'pigscripts' directory.
  # Since we very likely aren't going be run from a mortar project we won't have a pigscripts
  # directory to cd into, so log4j spits out an ugly error message when it doesn't have permissions
  # to create /home/logs/local-pig.log. So to work around this we copy the log4j configuration and
  # overwrite the log file to no longer be relative.
  File.open(log4j_conf_no_project, 'w') do |out|
    out << File.open(log4j_conf).read.gsub(/log4j.appender.LogFileAppender.File=.*\n/,
                                      "log4j.appender.LogFileAppender.File=local-pig.log\n")
  end
  run_pig_command(" ", pig_version, pig_parameters)
end

#lib_archive_urlObject



97
98
99
100
101
# File 'lib/mortar/local/pig.rb', line 97

def lib_archive_url
  full_host  = (host =~ /^http/) ? host : "https://api.#{host}"
  default_url = full_host + "/" + PIG_COMMON_LIB_URL_PATH
  ENV.fetch('COMMON_LIB_DISTRO_URL', default_url)
end

#lib_directoryObject



87
88
89
# File 'lib/mortar/local/pig.rb', line 87

def lib_directory
  return File.join(local_install_directory, "lib-common")
end

#log4j_confObject



352
353
354
# File 'lib/mortar/local/pig.rb', line 352

def log4j_conf
 "#{lib_directory}/conf/log4j-cli-local-dev.properties"
end

#log4j_conf_no_projectObject



356
357
358
# File 'lib/mortar/local/pig.rb', line 356

def log4j_conf_no_project
 "#{lib_directory}/conf/log4j-cli-local-no-project.properties"
end

#make_pig_param_file(pig_parameters) ⇒ Object

Given a set of user specified pig parameters, combine with the automatic mortar parameters and write out to a tempfile, returning it’s path so it may be referenced later in the process



425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'lib/mortar/local/pig.rb', line 425

def make_pig_param_file(pig_parameters)
  mortar_pig_params = automatic_pig_parameters
  all_parameters = mortar_pig_params.concat(pig_parameters)
  param_file = Tempfile.new("mortar-pig-parameters")
  all_parameters.each { |p|
    param_file.write("#{p['name']}=#{p['value']}\n")
  }
  param_file.close(false)

  # Keep track a reference the tempfile object so that the
  # garbage collector does not automatically delete the file
  # out from under us
  @temp_file_objects.push(param_file)

  param_file.path
end

#pig_archive_url(pig_version) ⇒ Object



91
92
93
94
95
# File 'lib/mortar/local/pig.rb', line 91

def pig_archive_url(pig_version)
  full_host  = (host =~ /^http/) ? host : "https://api.#{host}"
  default_url = full_host + "/" + pig_version.tgz_default_url_path
  ENV.fetch('PIG_DISTRO_URL', default_url)
end

#pig_classpath(pig_version) ⇒ Object



340
341
342
343
344
345
346
347
348
349
350
# File 'lib/mortar/local/pig.rb', line 340

def pig_classpath(pig_version)
  [ "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{project_root}/lib/*",
  ].join(":")
end

#pig_command_script_template_parameters(cmd, pig_version, pig_parameters) ⇒ Object

Parameters necessary for rendering the bash script template



361
362
363
364
365
366
367
368
369
370
371
372
373
# File 'lib/mortar/local/pig.rb', line 361

def pig_command_script_template_parameters(cmd, pig_version, pig_parameters)
  template_params = {}
  template_params['pig_params_file'] = make_pig_param_file(pig_parameters)
  template_params['pig_dir'] = pig_version.name
  template_params['pig_home'] = pig_directory(pig_version)
  template_params['pig_classpath'] = pig_classpath(pig_version)
  template_params['classpath'] = template_params_classpath
  template_params['log4j_conf'] = log4j_conf
  template_params['no_project_log4j_conf'] = log4j_conf_no_project
  template_params['pig_sub_command'] = cmd
  template_params['pig_opts'] = pig_options
  template_params
end

#pig_command_script_template_pathObject

Path to the template which generates the bash script for running pig



318
319
320
# File 'lib/mortar/local/pig.rb', line 318

def pig_command_script_template_path
  File.expand_path("../../templates/script/runpig.sh", __FILE__)
end

#pig_directory(pig_version) ⇒ Object



83
84
85
# File 'lib/mortar/local/pig.rb', line 83

def pig_directory(pig_version)
  return File.join(local_install_directory, pig_version.name)
end

#pig_optionsObject

Returns a hash of settings that need to be passed in via pig options



377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/mortar/local/pig.rb', line 377

def pig_options
  opts = {}
  opts['fs.s3n.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3n.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['fs.s3.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['pig.events.logformat'] = PIG_LOG_FORMAT
  opts['pig.logfile'] = local_log_dir + "/local-pig.log"
  opts['pig.udf.scripting.log.dir'] = local_udf_log_dir
  opts['python.verbose'] = 'error'
  opts['jython.output'] = true
  opts['python.home'] = jython_directory
  opts['python.path'] = "#{local_install_directory}/../controlscripts/lib:#{local_install_directory}/../vendor/controlscripts/lib"
  opts['python.cachedir'] = jython_cache_directory
  if osx? then
    opts['java.security.krb5.realm'] = 'OX.AC.UK'
    opts['java.security.krb5.kdc'] = 'kdc0.ox.ac.uk:kdc1.ox.ac.uk'
    opts['java.security.krb5.conf'] = '/dev/null'
  else
    opts['java.security.krb5.realm'] = ''
    opts['java.security.krb5.kdc'] = ''
  end
  return opts
end

#run_pig_command(cmd, pig_version, parameters = nil, jython_output = true) ⇒ Object

Run pig with the specified command (‘command’ is anything that can be appended to the command line invocation of Pig that will get it to do something interesting, such as ‘-f some-file.pig’



311
312
313
314
315
# File 'lib/mortar/local/pig.rb', line 311

def run_pig_command(cmd, pig_version, parameters = nil, jython_output = true)
  template_params = pig_command_script_template_parameters(cmd, pig_version, parameters)
  template_params['pig_opts']['jython.output'] = jython_output
  return run_templated_script(pig_command_script_template_path, template_params)
end

#run_script(pig_script, pig_version, pig_parameters) ⇒ Object

run the pig script with user supplied pig parameters



200
201
202
# File 'lib/mortar/local/pig.rb', line 200

def run_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -f #{pig_script.path}", pig_version, pig_parameters, true)
end

#should_do_lib_install?Boolean

Returns:

  • (Boolean)


108
109
110
# File 'lib/mortar/local/pig.rb', line 108

def should_do_lib_install?
  not (File.exists?(lib_directory))
end

#should_do_lib_update?Boolean

Returns:

  • (Boolean)


118
119
120
# File 'lib/mortar/local/pig.rb', line 118

def should_do_lib_update?
  return is_newer_version('lib-common', lib_archive_url)
end

#should_do_pig_install?(pig_version) ⇒ Boolean

Determines if a pig install needs to occur, true if no pig install present

Returns:

  • (Boolean)


104
105
106
# File 'lib/mortar/local/pig.rb', line 104

def should_do_pig_install?(pig_version)
  not (File.exists?(pig_directory(pig_version)))
end

#should_do_pig_update?(pig_version, command = nil) ⇒ Boolean

Determines if a pig install needs to occur, true if server side pig tgz is newer than date of the existing install

Returns:

  • (Boolean)


114
115
116
# File 'lib/mortar/local/pig.rb', line 114

def should_do_pig_update?(pig_version, command=nil)
  return is_newer_version(pig_version.name, pig_archive_url(pig_version), command)
end

#show_illustrate_output_browser(illustrate_outpath) ⇒ Object



228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/mortar/local/pig.rb', line 228

def show_illustrate_output_browser(illustrate_outpath)
  ensure_dir_exists("illustrate-output")
  ensure_dir_exists("illustrate-output/resources")
  ensure_dir_exists("illustrate-output/resources/css")
  ensure_dir_exists("illustrate-output/resources/js")
  ensure_dir_exists("illustrate-output/resources/flash")

  ["illustrate_css",
   "jquery", "jquery_transit", "jquery_stylestack",
   "mortar_table", "zeroclipboard", "zeroclipboard_swf"].each { |resource|
    copy_if_not_present_at_dest(@resource_locations[resource], @resource_destinations[resource])
  }

  # Pull in the dumped json file
  illustrate_data_json_text = decode_illustrate_input_file(illustrate_outpath)
  illustrate_data = json_decode(illustrate_data_json_text)

  # Render a template using it's values
  template_params = create_illustrate_template_parameters(illustrate_data)

  # template_params = {'tables' => []}
  erb = ERB.new(File.read(@resource_locations["illustrate_template"]), 0, "%<>")
  html = erb.result(BindingClazz.new(template_params).get_binding)

  # Write the rendered template out to a file
  File.open(@resource_destinations["illustrate_html"], 'w') { |f|
    f.write(html)
  }

  # Open a browser pointing to the rendered template output file
  action("Opening illustrate results from #{@resource_destinations["illustrate_html"]} ") do
    require "launchy"
    Launchy.open(File.expand_path(@resource_destinations["illustrate_html"]))
  end
end

#template_params_classpath(pig_version = nil) ⇒ Object



322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/mortar/local/pig.rb', line 322

def template_params_classpath(pig_version=nil)
  # Need to support old watchtower plugins that don't set pig_version
  if pig_version.nil?
    pig_version = Mortar::PigVersion::Pig09.new
  end
  [ "#{pig_directory(pig_version)}/*",
    "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{lib_directory}/conf/jets3t.properties",
    "#{project_root}/lib/*",
  ].join(":")
end

#validate_script(pig_script, pig_version, pig_parameters) ⇒ Object



179
180
181
# File 'lib/mortar/local/pig.rb', line 179

def validate_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -check #{pig_script.path}", pig_version, pig_parameters)
end