Class: Mortar::Local::Pig

Inherits:
Object
  • Object
show all
Includes:
InstallUtil, Params
Defined in:
lib/mortar/local/pig.rb

Constant Summary collapse

PIG_LOG_FORMAT =
"humanreadable"
LIB_TGZ_NAME =
"lib-common.tar.gz"
PIG_COMMON_LIB_URL_PATH =
"resource/lib_common"
DEFAULT_PIGOPTS_FILES =

This needs to be defined for watchtower.

%w(
    /lib-common/conf/pig-hawk-global.properties
    /lib-common/conf/pig-cli-local-dev.properties
)

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Params

#automatic_parameters, #merge_parameters

Methods included from InstallUtil

#download_file, #ensure_mortar_local_directory, #extract_tgz, #get_resource, #gitignore_template_path, #head_resource, #http_date_to_epoch, #install_date, #install_file_for, #is_newer_version, #jython_cache_directory, #jython_directory, #local_install_directory, #local_install_directory_name, #local_log_dir, #local_project_gitignore, #local_udf_log_dir, #make_call, #make_call_sleep_seconds, #note_install, #osx?, #project_root, #render_script_template, #reset_local_logs, #run_templated_script, #unset_hadoop_env_vars, #url_date

Methods included from Helpers

#action, #ask, #confirm, #copy_if_not_present_at_dest, #default_host, #deprecate, #display, #display_header, #display_object, #display_row, #display_table, #display_with_indent, #download_to_file, #ensure_dir_exists, #error, error_with_failure, error_with_failure=, extended, extended_into, #format_bytes, #format_date, #format_with_bang, #get_terminal_environment, #home_directory, #host, #hprint, #hputs, included, included_into, #installed_with_omnibus?, #json_decode, #json_encode, #line_formatter, #longest, #output_with_bang, #pending_github_team_state_message, #quantify, #redisplay, #retry_on_exception, #running_on_a_mac?, #running_on_windows?, #set_buffer, #shell, #spinner, #status, #string_distance, #styled_array, #styled_error, #styled_hash, #styled_header, #suggestion, #test_name, #ticking, #time_ago, #truncate, #warning, #with_tty, #write_to_file

Constructor Details

#initializePig

Returns a new instance of Pig.



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/mortar/local/pig.rb', line 55

def initialize
  @temp_file_objects = []

  @resource_locations = { 
    "illustrate_template" => File.expand_path("../../templates/report/illustrate-report.html", __FILE__),
    "illustrate_css" => File.expand_path("../../../../css/illustrate.css", __FILE__),
    "jquery" => File.expand_path("../../../../js/jquery-1.7.1.min.js", __FILE__),
    "jquery_transit" => File.expand_path("../../../../js/jquery.transit.js", __FILE__),
    "jquery_stylestack" => File.expand_path("../../../../js/jquery.stylestack.js", __FILE__),
    "mortar_table" => File.expand_path("../../../../js/mortar-table.js", __FILE__),
    "zeroclipboard" => File.expand_path("../../../../js/zero_clipboard.js", __FILE__),
    "zeroclipboard_swf" => File.expand_path("../../../../flash/zeroclipboard.swf", __FILE__)
}

@resource_destinations = {
    "illustrate_html" => "illustrate-output/illustrate-output.html",
    "illustrate_css" => "illustrate-output/resources/css/illustrate-output.css",
    "jquery" => "illustrate-output/resources/js/jquery-1.7.1.min.js",
    "jquery_transit" => "illustrate-output/resources/js/jquery.transit.js",
    "jquery_stylestack" => "illustrate-output/resources/js/jquery.stylestack.js",
    "mortar_table" => "illustrate-output/resources/js/mortar-table.js",
    "zeroclipboard" => "illustrate-output/resources/js/zero_clipboard.js",
    "zeroclipboard_swf" => "illustrate-output/resources/flash/zeroclipboard.swf"
}
end

Instance Attribute Details

#resource_destinationsObject

Returns the value of attribute resource_destinations.



53
54
55
# File 'lib/mortar/local/pig.rb', line 53

def resource_destinations
  @resource_destinations
end

#resource_locationsObject

Returns the value of attribute resource_locations.



52
53
54
# File 'lib/mortar/local/pig.rb', line 52

def resource_locations
  @resource_locations
end

Instance Method Details

#automatic_pig_parametersObject



424
425
426
427
# File 'lib/mortar/local/pig.rb', line 424

def automatic_pig_parameters
  warn "[DEPRECATION] Please call automatic_parameters instead"
  automatic_parameters
end

#command(pig_version) ⇒ Object



81
82
83
# File 'lib/mortar/local/pig.rb', line 81

def command(pig_version)
  return File.join(pig_directory(pig_version), "bin", "pig")
end

#create_illustrate_output_pathObject

Create a temp file to be used for writing the illustrate json output, and return it’s path. This data file will later be used to create the result html output. Tempfile will take care of cleaning up the file when we exit.



210
211
212
213
214
215
216
# File 'lib/mortar/local/pig.rb', line 210

def create_illustrate_output_path
  # Using Tempfile for the path generation and so that the
  # file will be cleaned up on process exit
  outfile = Tempfile.new("mortar-illustrate-output")
  outfile.close(false)
  outfile.path
end

#create_illustrate_template_parameters(illustrate_data) ⇒ Object



266
267
268
269
270
271
# File 'lib/mortar/local/pig.rb', line 266

def create_illustrate_template_parameters(illustrate_data)
  params = {}
  params['tables'] = illustrate_data['tables']
  params['udf_output'] = illustrate_data['udf_output']
  return params
end

#decode_illustrate_input_file(illustrate_outpath) ⇒ Object

Given a file path, open it and decode the containing text



219
220
221
222
223
224
225
226
227
228
# File 'lib/mortar/local/pig.rb', line 219

def decode_illustrate_input_file(illustrate_outpath)
  data_raw = File.read(illustrate_outpath)
  begin
    data_encoded = data_raw.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
  rescue NoMethodError
    require 'iconv'
    ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
    data_encoded = ic.iconv(data_raw)
  end
end

#illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# File 'lib/mortar/local/pig.rb', line 273

def illustrate_alias(pig_script, pig_alias, skip_pruning, no_browser, pig_version, pig_parameters)
  cmd = "-e 'illustrate "

  # Parameters have to be entered with the illustrate command (as
  # apposed to as a command line argument) or it will result in an
  # 'Undefined parameter' error.
  param_file = make_pig_param_file(pig_parameters)
  cmd += "-param_file #{param_file} "

  # Now point us at the script/alias to illustrate
  illustrate_outpath = create_illustrate_output_path()
  cmd += "-script #{pig_script.path} -out #{illustrate_outpath} "
  
  if skip_pruning
    cmd += " -skipPruning "
  end

  if no_browser
    cmd += " -str '"
  else
    cmd += " -json '"
  end

  if pig_alias
    cmd += " #{pig_alias} "
  end

  result = run_pig_command(cmd, pig_version, [], false)
  if result
    if no_browser
      display decode_illustrate_input_file(illustrate_outpath)
    else
      show_illustrate_output_browser(illustrate_outpath)
    end
  end
end

#install_libObject



166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/mortar/local/pig.rb', line 166

def install_lib
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? lib_directory
    FileUtils.rm_rf lib_directory
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, LIB_TGZ_NAME)
  download_file(lib_archive_url, local_tgz)
  extract_tgz(local_tgz, local_install_directory)

  File.delete(local_tgz)
  note_install("lib-common")
end

#install_or_update(pig_version, command = nil) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/mortar/local/pig.rb', line 124

def install_or_update(pig_version, command=nil)
  if should_do_pig_install?(pig_version)
    action "Installing #{pig_version.name} to #{local_install_directory_name}" do
      install_pig(pig_version, command)
    end
  elsif should_do_pig_update?(pig_version, command)
    action "Updating to latest #{pig_version.name} in #{local_install_directory_name}" do
      install_pig(pig_version)
    end
  end

  if should_do_lib_install?
    action "Installing pig dependencies to #{local_install_directory_name}" do
      install_lib()
    end
  elsif should_do_lib_update?
    action "Updating to latest pig dependencies in #{local_install_directory_name}" do
      install_lib()
    end
  end
end

#install_pig(pig_version, command = nil) ⇒ Object

Installs pig for this project if it is not already present



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/mortar/local/pig.rb', line 147

def install_pig(pig_version, command=nil)
  #Delete the directory if it already exists to ensure cruft isn't left around.
  if File.directory? pig_directory(pig_version)
    FileUtils.rm_rf pig_directory(pig_version)
  end

  FileUtils.mkdir_p(local_install_directory)
  local_tgz = File.join(local_install_directory, pig_version.tgz_name)
  download_file(pig_archive_url(pig_version), local_tgz, command)
  extract_tgz(local_tgz, local_install_directory)

  # This has been seening coming out of the tgz w/o +x so we do
  # here to be sure it has the necessary permissions
  FileUtils.chmod(0755, command(pig_version))

  File.delete(local_tgz)
  note_install(pig_version.name)
end

#launch_repl(pig_version, pig_parameters) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/mortar/local/pig.rb', line 185

def launch_repl(pig_version, pig_parameters)
  # The REPL is very likely to be run outside a mortar project and almost equally as likely
  # to be run in the users home directory.  The default log4j config file references pig log
  # file as being ../logs/local-pig.log, which is a path relative to the 'pigscripts' directory.
  # Since we very likely aren't going be run from a mortar project we won't have a pigscripts
  # directory to cd into, so log4j spits out an ugly error message when it doesn't have permissions
  # to create /home/logs/local-pig.log. So to work around this we copy the log4j configuration and
  # overwrite the log file to no longer be relative.
  File.open(log4j_conf_no_project, 'w') do |out|
    out << File.open(log4j_conf).read.gsub(/log4j.appender.LogFileAppender.File=.*\n/,
                                      "log4j.appender.LogFileAppender.File=local-pig.log\n")
  end
  run_pig_command(" ", pig_version, pig_parameters)
end

#lib_archive_urlObject



99
100
101
102
103
# File 'lib/mortar/local/pig.rb', line 99

def lib_archive_url
  full_host  = (host =~ /^http/) ? host : "https://api.#{host}"
  default_url = full_host + "/" + PIG_COMMON_LIB_URL_PATH
  ENV.fetch('COMMON_LIB_DISTRO_URL', default_url)
end

#lib_directoryObject



89
90
91
# File 'lib/mortar/local/pig.rb', line 89

def lib_directory
  return File.join(local_install_directory, "lib-common")
end

#log4j_confObject



354
355
356
# File 'lib/mortar/local/pig.rb', line 354

def log4j_conf
 "#{lib_directory}/conf/log4j-cli-local-dev.properties"
end

#log4j_conf_no_projectObject



358
359
360
# File 'lib/mortar/local/pig.rb', line 358

def log4j_conf_no_project
 "#{lib_directory}/conf/log4j-cli-local-no-project.properties"
end

#make_pig_param_file(pig_parameters) ⇒ Object

Given a set of user specified pig parameters, combine with the automatic mortar parameters and write out to a tempfile, returning it’s path so it may be referenced later in the process



407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'lib/mortar/local/pig.rb', line 407

def make_pig_param_file(pig_parameters)
  mortar_pig_params = automatic_parameters()
  all_parameters = mortar_pig_params.concat(pig_parameters)
  param_file = Tempfile.new("mortar-pig-parameters")
  all_parameters.each { |p|
    param_file.write("#{p['name']}=#{p['value']}\n")
  }
  param_file.close(false)

  # Keep track a reference the tempfile object so that the
  # garbage collector does not automatically delete the file
  # out from under us
  @temp_file_objects.push(param_file)

  param_file.path
end

#pig_archive_url(pig_version) ⇒ Object



93
94
95
96
97
# File 'lib/mortar/local/pig.rb', line 93

def pig_archive_url(pig_version)
  full_host  = (host =~ /^http/) ? host : "https://api.#{host}"
  default_url = full_host + "/" + pig_version.tgz_default_url_path
  ENV.fetch('PIG_DISTRO_URL', default_url)
end

#pig_classpath(pig_version) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
# File 'lib/mortar/local/pig.rb', line 342

def pig_classpath(pig_version)
  [ "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{project_root}/lib/*",
  ].join(":")
end

#pig_command_script_template_parameters(cmd, pig_version, pig_parameters) ⇒ Object

Parameters necessary for rendering the bash script template



363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/mortar/local/pig.rb', line 363

def pig_command_script_template_parameters(cmd, pig_version, pig_parameters)
  template_params = {}
  template_params['pig_params_file'] = make_pig_param_file(pig_parameters)
  template_params['pig_dir'] = pig_version.name
  template_params['pig_home'] = pig_directory(pig_version)
  template_params['pig_classpath'] = pig_classpath(pig_version)
  template_params['classpath'] = template_params_classpath
  template_params['log4j_conf'] = log4j_conf
  template_params['no_project_log4j_conf'] = log4j_conf_no_project
  template_params['pig_sub_command'] = cmd
  template_params['pig_opts'] = pig_options
  template_params
end

#pig_command_script_template_pathObject

Path to the template which generates the bash script for running pig



320
321
322
# File 'lib/mortar/local/pig.rb', line 320

def pig_command_script_template_path
  File.expand_path("../../templates/script/runpig.sh", __FILE__)
end

#pig_directory(pig_version) ⇒ Object



85
86
87
# File 'lib/mortar/local/pig.rb', line 85

def pig_directory(pig_version)
  return File.join(local_install_directory, pig_version.name)
end

#pig_optionsObject

Returns a hash of settings that need to be passed in via pig options



379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
# File 'lib/mortar/local/pig.rb', line 379

def pig_options
  opts = {}
  opts['fs.s3n.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3n.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['fs.s3.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['pig.events.logformat'] = PIG_LOG_FORMAT
  opts['pig.logfile'] = local_log_dir + "/local-pig.log"
  opts['pig.udf.scripting.log.dir'] = local_udf_log_dir
  opts['python.verbose'] = 'error'
  opts['jython.output'] = true
  opts['python.home'] = jython_directory
  opts['python.path'] = "#{local_install_directory}/../controlscripts/lib:#{local_install_directory}/../vendor/controlscripts/lib"
  opts['python.cachedir'] = jython_cache_directory
  if osx? then
    opts['java.security.krb5.realm'] = 'OX.AC.UK'
    opts['java.security.krb5.kdc'] = 'kdc0.ox.ac.uk:kdc1.ox.ac.uk'
    opts['java.security.krb5.conf'] = '/dev/null'
  else
    opts['java.security.krb5.realm'] = ''
    opts['java.security.krb5.kdc'] = ''
  end
  return opts
end

#run_pig_command(cmd, pig_version, parameters = nil, jython_output = true) ⇒ Object

Run pig with the specified command (‘command’ is anything that can be appended to the command line invocation of Pig that will get it to do something interesting, such as ‘-f some-file.pig’



313
314
315
316
317
# File 'lib/mortar/local/pig.rb', line 313

def run_pig_command(cmd, pig_version, parameters = nil, jython_output = true)
  template_params = pig_command_script_template_parameters(cmd, pig_version, parameters)
  template_params['pig_opts']['jython.output'] = jython_output
  return run_templated_script(pig_command_script_template_path, template_params)
end

#run_script(pig_script, pig_version, pig_parameters) ⇒ Object

run the pig script with user supplied pig parameters



202
203
204
# File 'lib/mortar/local/pig.rb', line 202

def run_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -f #{pig_script.path}", pig_version, pig_parameters, true)
end

#should_do_lib_install?Boolean

Returns:

  • (Boolean)


110
111
112
# File 'lib/mortar/local/pig.rb', line 110

def should_do_lib_install?
  not (File.exists?(lib_directory))
end

#should_do_lib_update?Boolean

Returns:

  • (Boolean)


120
121
122
# File 'lib/mortar/local/pig.rb', line 120

def should_do_lib_update?
  return is_newer_version('lib-common', lib_archive_url)
end

#should_do_pig_install?(pig_version) ⇒ Boolean

Determines if a pig install needs to occur, true if no pig install present

Returns:

  • (Boolean)


106
107
108
# File 'lib/mortar/local/pig.rb', line 106

def should_do_pig_install?(pig_version)
  not (File.exists?(pig_directory(pig_version)))
end

#should_do_pig_update?(pig_version, command = nil) ⇒ Boolean

Determines if a pig install needs to occur, true if server side pig tgz is newer than date of the existing install

Returns:

  • (Boolean)


116
117
118
# File 'lib/mortar/local/pig.rb', line 116

def should_do_pig_update?(pig_version, command=nil)
  return is_newer_version(pig_version.name, pig_archive_url(pig_version), command)
end

#show_illustrate_output_browser(illustrate_outpath) ⇒ Object



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/mortar/local/pig.rb', line 230

def show_illustrate_output_browser(illustrate_outpath)
  ensure_dir_exists("illustrate-output")
  ensure_dir_exists("illustrate-output/resources")
  ensure_dir_exists("illustrate-output/resources/css")
  ensure_dir_exists("illustrate-output/resources/js")
  ensure_dir_exists("illustrate-output/resources/flash")

  ["illustrate_css",
   "jquery", "jquery_transit", "jquery_stylestack",
   "mortar_table", "zeroclipboard", "zeroclipboard_swf"].each { |resource|
    copy_if_not_present_at_dest(@resource_locations[resource], @resource_destinations[resource])
  }

  # Pull in the dumped json file
  illustrate_data_json_text = decode_illustrate_input_file(illustrate_outpath)
  illustrate_data = json_decode(illustrate_data_json_text)

  # Render a template using it's values
  template_params = create_illustrate_template_parameters(illustrate_data)

  # template_params = {'tables' => []}
  erb = ERB.new(File.read(@resource_locations["illustrate_template"]), 0, "%<>")
  html = erb.result(BindingClazz.new(template_params).get_binding)

  # Write the rendered template out to a file
  File.open(@resource_destinations["illustrate_html"], 'w') { |f|
    f.write(html)
  }

  # Open a browser pointing to the rendered template output file
  action("Opening illustrate results from #{@resource_destinations["illustrate_html"]} ") do
    require "launchy"
    Launchy.open(File.expand_path(@resource_destinations["illustrate_html"]))
  end
end

#template_params_classpath(pig_version = nil) ⇒ Object



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/mortar/local/pig.rb', line 324

def template_params_classpath(pig_version=nil)
  # Need to support old watchtower plugins that don't set pig_version
  if pig_version.nil?
    pig_version = Mortar::PigVersion::Pig09.new
  end
  [ "#{pig_directory(pig_version)}/*",
    "#{pig_directory(pig_version)}/lib-local/*",
    "#{lib_directory}/lib-local/*",
    "#{pig_directory(pig_version)}/lib-pig/*",
    "#{pig_directory(pig_version)}/lib-cluster/*",
    "#{lib_directory}/lib-pig/*",
    "#{lib_directory}/lib-cluster/*",
    "#{jython_directory}/jython.jar",
    "#{lib_directory}/conf/jets3t.properties",
    "#{project_root}/lib/*",
  ].join(":")
end

#validate_script(pig_script, pig_version, pig_parameters) ⇒ Object



181
182
183
# File 'lib/mortar/local/pig.rb', line 181

def validate_script(pig_script, pig_version, pig_parameters)
  run_pig_command(" -check #{pig_script.path}", pig_version, pig_parameters)
end