Class: Humboldt::EmrFlow

Inherits:
Object
  • Object
show all
Defined in:
lib/humboldt/emr_flow.rb

Defined Under Namespace

Modules: InstanceGroupConfiguration

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ EmrFlow

Returns a new instance of EmrFlow.



7
8
9
10
# File 'lib/humboldt/emr_flow.rb', line 7

def initialize(*args)
  @job_name, @input_glob, @package, @emr, @data_bucket, @job_bucket, @output_path = args
  @output_path ||= "#{@package.project_name}/#{@job_name}/output"
end

Instance Attribute Details

#output_pathObject (readonly)

Returns the value of attribute output_path.



5
6
7
# File 'lib/humboldt/emr_flow.rb', line 5

def output_path
  @output_path
end

Instance Method Details

#cleanup!Object



17
18
19
# File 'lib/humboldt/emr_flow.rb', line 17

def cleanup!
  delete_output_dir!
end

#jar_pathObject



27
28
29
# File 'lib/humboldt/emr_flow.rb', line 27

def jar_path
  "#{@package.project_name}/#{File.basename(@package.jar_path)}"
end

#jar_uriObject



31
32
33
# File 'lib/humboldt/emr_flow.rb', line 31

def jar_uri
  s3_uri(jar_path)
end

#log_pathObject



39
40
41
# File 'lib/humboldt/emr_flow.rb', line 39

def log_path
  "#{@package.project_name}/#{@job_name}/logs"
end

#output_uriObject



35
36
37
# File 'lib/humboldt/emr_flow.rb', line 35

def output_uri
  s3_uri(output_path)
end

#prepare!Object



12
13
14
15
# File 'lib/humboldt/emr_flow.rb', line 12

def prepare!
  upload_bootstrap_task_files!
  upload_jar!
end

#run!(launch_options = {}) ⇒ Object



21
22
23
24
25
# File 'lib/humboldt/emr_flow.rb', line 21

def run!(launch_options={})
  check_jar!
  check_output_dir!
  create_flow!(launch_options)
end