Class: Hodor::Hdfs
- Inherits:
-
Object
show all
- Includes:
- Singleton
- Defined in:
- lib/hodor/api/hdfs.rb
Overview
Defined Under Namespace
Classes: FailedToGetFile, FailedToPutDir, FailedToPutFile, FailedToRemovePath
Instance Method Summary
collapse
Instance Method Details
#env ⇒ Object
11
12
13
|
# File 'lib/hodor/api/hdfs.rb', line 11
def env
Hodor::Environment.instance
end
|
#get_file(file, options = {}) ⇒ Object
get
Gets a file from HDFS and copies it to a local file
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
|
# File 'lib/hodor/api/hdfs.rb', line 197
def get_file(file, options = {})
disc_path = env.path_on_disc(file)
hdfs_path = path_on_hdfs(file)
git_path = env.path_on_github(file)
dest_path = "#{file}.hdfs_copy"
logger.info "\tgetting '#{git_path}'. Writing to '#{dest_path}'."
get_script = %Q["rm -f #{dest_path}; HADOOP_USER_NAME=#{user} hadoop fs -get #{hdfs_path} #{dest_path}"]
env.ssh get_script, echo: true, echo_cmd: true
if options[:clobber]
FileUtils.rm_f dest_path
end
env.run_local %Q[scp #{env.ssh_user}@#{env[:ssh_host]}:#{dest_path} .],
echo: true, echo_cmd: true
rescue StandardError => ex
raise FailedToGetFile.new ex,
msg: "Unable to get file from HDFS.",
ssh_user: env.ssh_user,
path_on_disc: disc_path,
path_on_github: git_path,
path_on_hdfs: hdfs_path,
dest_file: dest_file
end
|
#hdfs_root ⇒ Object
19
20
21
|
# File 'lib/hodor/api/hdfs.rb', line 19
def hdfs_root
env.settings[:hdfs_root]
end
|
#logger ⇒ Object
15
16
17
|
# File 'lib/hodor/api/hdfs.rb', line 15
def logger
env.logger
end
|
#ls ⇒ Object
75
76
77
78
79
80
81
82
83
84
|
# File 'lib/hodor/api/hdfs.rb', line 75
def ls
dest_path = path_on_hdfs(".")
ls_script = %Q[HADOOP_USER_NAME=#{user} hadoop fs -ls #{dest_path}]
env.ssh ls_script, echo:true
rescue StandardError => ex
raise FailedToRemovePath.new ex,
msg: "Unable to list HDFS path.",
ssh_user: env.ssh_user,
path_to_list: dest_path
end
|
#path_on_hdfs(file) ⇒ Object
27
28
29
30
|
# File 'lib/hodor/api/hdfs.rb', line 27
def path_on_hdfs(file)
git_path = env.path_on_github(file)
"#{hdfs_root}/#{git_path}".sub(/\/\/\//, '/').sub(/\/\//, '/').sub(/\/\.\//, '/').sub(/\/\.$/, '')
end
|
#put_dir(path, options) ⇒ Object
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
# File 'lib/hodor/api/hdfs.rb', line 132
def put_dir(path, options)
if env.dryrun? and env.verbose?
logger.info ""
logger.info " ********************* Dry Run *********************"
logger.info ""
end
disc_path = env.path_on_disc(path)
git_path = env.path_on_github(path)
hdfs_path = path_on_hdfs(path)
sync_file = "#{disc_path}/.hdfs-#{target}.sync"
logger.info "Deploying: #{git_path}" unless env.silent?
fail "Path '#{disc_path}' not found." unless File.exists?(disc_path)
fail "Path '#{disc_path}' exists but is not a directory." unless File.directory?(disc_path)
if env.clean?
logger.info " cleaning: #{git_path}"
FileUtils.rm_f sync_file unless env.dryrun?
rm_rf(git_path)
clean_done = true
end
fargs = if sync_file && File.exists?(sync_file) && !env.clean?
"-newer '#{sync_file}'"
else
""
end
fargs << " -maxdepth #{options[:maxdepth]}" unless options[:maxdepth].nil?
mod_files = env.run_local %Q[find #{disc_path} #{fargs} -type f]
mod_files.split("\n").each { |file|
basename = File.basename(file)
next if basename.start_with?('job.properties') ||
basename.eql?("run.properties") ||
basename.eql?(".DS_Store") ||
basename.eql?(".bak") ||
basename.eql?(".tmp") ||
basename.eql?(".hdfs") ||
basename.eql?("Rakefile") ||
basename.end_with?(".sync") ||
file.include?("migrations/") ||
file.include?(".bak/") ||
file.include?(".tmp/")
put_file(file, already_cleaned: clean_done)
}
rescue StandardError => ex
raise FailedToPutDir.new ex,
msg: "Unable to write directory to HDFS.",
ssh_user: env.ssh_user,
path_on_disc: disc_path,
path_on_github: git_path,
path_on_hdfs: hdfs_path,
sync_file: sync_file,
max_depth: options[:maxdepth],
clean: env.clean? ? "true" : "false"
else
env.run_local %Q[touch '#{sync_file}'] unless env.dryrun?
end
|
#put_file(file, options = {}) ⇒ Object
put_file
Puts a local file on HDFS, preserving path and replacing if necessary. Files
with .erb extensions are ERB expanded before deployment.
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
# File 'lib/hodor/api/hdfs.rb', line 91
def put_file(file, options = {})
disc_path = env.path_on_disc(file)
hdfs_path = path_on_hdfs(file)
git_path = env.path_on_github(file)
raise "File '#{disc_path}' not found." if !File.exists?(disc_path)
logger.info "\tdeploying '#{git_path}'"
src_file = file
if disc_path.end_with?('.erb')
erb_expanded = env.erb_load(disc_path)
src_file = "/tmp/#{File.basename(disc_path.sub(/\.erb$/,''))}"
hdfs_path.sub!(/\.erb$/, '')
puts "ends with erb srcfile = #{src_file}"
File.open(src_file, 'w') { |f| f.write(erb_expanded) }
end
raise "File '#{src_file}' not found." if !File.exists?(src_file)
put_script = "HADOOP_USER_NAME=#{user} hadoop fs -put - #{hdfs_path}"
unless options[:already_cleaned]
rm_script = "HADOOP_USER_NAME=#{user} hadoop fs -rm -f #{hdfs_path}; "
put_script = rm_script + put_script
end
env.run_local %Q[cat #{src_file} | ssh #{env.ssh_addr} "#{put_script}"],
echo: true, echo_cmd: true
rescue StandardError => ex
raise FailedToPutFile.new ex,
msg: "Unable to write file to HDFS.",
ssh_user: env.ssh_user,
path_on_disc: disc_path,
path_on_github: git_path,
path_on_hdfs: hdfs_path,
src_file: src_file
end
|
#pwd ⇒ Object
23
24
25
|
# File 'lib/hodor/api/hdfs.rb', line 23
def pwd
"#{hdfs_root}#{env.pwd}"
end
|
#rm(path) ⇒ Object
42
43
44
45
46
47
48
49
50
51
|
# File 'lib/hodor/api/hdfs.rb', line 42
def rm(path)
dest_path = path_on_hdfs(path||".")
rm_path_script = %Q[HADOOP_USER_NAME=#{user} hadoop fs -rm -skipTrash #{dest_path}]
env.ssh rm_path_script
rescue StandardError => ex
raise FailedToRemovePath.new ex,
msg: "Unable to remove HDFS path.",
ssh_user: env.ssh_user,
path_to_remove: dest_path
end
|
#rm_f(path) ⇒ Object
53
54
55
56
57
58
59
60
61
62
|
# File 'lib/hodor/api/hdfs.rb', line 53
def rm_f(path)
dest_path = path_on_hdfs(path||".")
rm_path_script = %Q[HADOOP_USER_NAME=#{user} hadoop fs -rm -f -skipTrash #{dest_path}]
env.ssh rm_path_script
rescue StandardError => ex
raise FailedToRemovePath.new ex,
msg: "Unable to remove HDFS path.",
ssh_user: env.ssh_user,
path_to_remove: dest_path
end
|
#rm_rf(path) ⇒ Object
64
65
66
67
68
69
70
71
72
73
|
# File 'lib/hodor/api/hdfs.rb', line 64
def rm_rf(path)
hdfs_path = path_on_hdfs(path||".")
rm_path_script = %Q[HADOOP_USER_NAME=#{user} hadoop fs -rm -f -R -skipTrash #{hdfs_path}]
env.ssh rm_path_script
rescue StandardError => ex
raise FailedToRemovePath.new ex,
msg: "Unable to remove HDFS path.",
ssh_user: env.ssh_user,
path_to_remove: dest_path
end
|
#target ⇒ Object
36
37
38
|
# File 'lib/hodor/api/hdfs.rb', line 36
def target
env.settings[:target]
end
|
#user ⇒ Object
32
33
34
|
# File 'lib/hodor/api/hdfs.rb', line 32
def user
env.settings[:hdfs_user]
end
|