6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
# File 'lib/simple_map_reduce/worker/run_map_task_worker.rb', line 6
def perform(job, map_worker_id)
task_wrapper_class_name = "TaskWrapper#{job.id.delete('-')}"
self.class.class_eval("class #{task_wrapper_class_name}; end", 'Task Wrapper Class')
task_wrapper_class = self.class.const_get(task_wrapper_class_name)
task_wrapper_class.class_eval(job.map_script, 'Map task script')
map_task = task_wrapper_class.const_get(job.map_class_name, false).new
unless map_task.respond_to?(:map)
logger.error('no map method')
return
end
logger.info('map task start')
local_input_cache = Tempfile.new
s3_client.get_object(
response_target: local_input_cache.path,
bucket: job.job_input_bucket_name,
key: job.job_input_directory_path
)
local_input_cache.rewind
local_output_cache = Tempfile.new
local_input_cache.each_line(chomp: true, rs: "\n") do |line|
map_task.map(line, local_output_cache)
end
local_output_cache.rewind
logger.debug("output data size: #{local_output_cache.size}")
logger.debug('---map output digest---')
local_output_cache.take(5).each do |line|
logger.debug(line)
end
logger.debug('---map output digest---')
response = http_client(SimpleMapReduce.job_tracker_url).post do |request|
request.url('/workers/reserve')
request.body = { worker_size: 2 }.to_json
end
logger.debug(response.body)
reserved_workers = JSON.parse(response.body, symbolize_names: true)[:reserved_workers]
if reserved_workers.count == 0
reserved_workers << { id: map_worker_id, url: job.map_worker_url, state: 'working' }
end
shuffle(job, reserved_workers, local_output_cache)
unless reserved_workers.map { |w| w[:id] }.include?(map_worker_id)
response = http_client(SimpleMapReduce.job_tracker_url).put do |request|
request.url("/workers/#{map_worker_id}")
request.body = { event: 'ready' }.to_json
end
logger.debug(response.body)
end
rescue => e
logger.error(e.inspect)
logger.error(e.backtrace.take(50))
ensure
local_input_cache&.delete
local_output_cache&.delete
reserved_workers&.each do |worker|
worker[:shuffled_local_output]&.delete
end
if self.class.const_defined?(task_wrapper_class_name.to_sym)
self.class.send(:remove_const, task_wrapper_class_name.to_sym)
end
logger.info('map task end')
end
|