Class: Mortar::Local::Sqoop
Instance Method Summary
collapse
Methods included from Helpers
#action, #ask, #confirm, #copy_if_not_present_at_dest, #default_host, #deprecate, #display, #display_header, #display_object, #display_row, #display_table, #display_with_indent, #download_to_file, #ensure_dir_exists, #error, error_with_failure, error_with_failure=, extended, extended_into, #format_bytes, #format_date, #format_with_bang, #full_host, #get_terminal_environment, #home_directory, #host, #hprint, #hputs, included, included_into, #installed_with_omnibus?, #json_decode, #json_encode, #line_formatter, #longest, #output_with_bang, #pending_github_team_state_message, #quantify, #redisplay, #retry_on_exception, #running_on_a_mac?, #running_on_windows?, #set_buffer, #shell, #spinner, #status, #string_distance, #styled_array, #styled_error, #styled_hash, #styled_header, #suggestion, #test_name, #ticking, #time_ago, #truncate, #warning, #with_tty, #write_to_file
#download_file, #ensure_mortar_local_directory, #extract_tgz, #get_resource, #gitignore_template_path, #head_resource, #http_date_to_epoch, #install_date, #install_file_for, #is_newer_version, #jython_cache_directory, #jython_directory, #local_install_directory, #local_install_directory_name, #local_log_dir, #local_project_gitignore, #local_udf_log_dir, #make_call, #make_call_sleep_seconds, #note_install, #osx?, #project_root, #render_script_template, #reset_local_logs, #run_templated_script, #unset_hadoop_env_vars, #url_date
Instance Method Details
#do_install ⇒ Object
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/mortar/local/sqoop.rb', line 61
def do_install
local_tgz = File.join(local_install_directory, "sqoop-1.4.4-mortar.tar.gz")
if File.exists?(local_tgz)
FileUtils.rm(local_tgz)
end
download_file(sqoop_url, local_tgz)
if File.exists?(sqoop_directory)
FileUtils.rm_rf(sqoop_directory)
end
(local_tgz, local_install_directory)
FileUtils.mv(File.join(local_install_directory, sqoop_dir_in_tgz), sqoop_directory)
FileUtils.chmod(0755, "#{sqoop_directory}/bin/sqoop")
FileUtils.chmod(0755, "#{sqoop_directory}/hadoop/bin/hadoop")
File.delete(local_tgz)
note_install("sqoop")
end
|
#export(connstr, s3dest, options) ⇒ Object
93
94
95
96
|
# File 'lib/mortar/local/sqoop.rb', line 93
def export(connstr, s3dest, options)
template_params = sqoop_export_template_parameters(connstr, s3dest, options)
return run_templated_script(sqoop_command_script_template_path, template_params)
end
|
#hadoop_home ⇒ Object
89
90
91
|
# File 'lib/mortar/local/sqoop.rb', line 89
def hadoop_home
"#{sqoop_directory}/hadoop"
end
|
#install_or_update ⇒ Object
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/mortar/local/sqoop.rb', line 26
def install_or_update
@command = "#{local_install_directory}/python/bin/python"
if should_do_install?
action "Installing sqoop to #{local_install_directory_name}" do
do_install
end
elsif should_do_update?
action "Updating to latest sqoop in #{local_install_directory_name}" do
do_install
end
end
true
end
|
#prep_query(original_query) ⇒ Object
So this part kind of sucks. In order to partition a query across multiple map reduce tasks sqoop does a query to to find the range of identifiying values, divides this range across the number of tasks to be executed and then modifies the query for each m/r task. To do this Sqoop needs to know at what point in the query that it should place its portion of the where clause. This is done via the $CONDITIONS marker. So that’s well and good when you’re running sqoop on a cluster but our users will be running on their own machine and don’t know or care for this parrallel queries stuff. So to make their lives easier we make a best effort to add the clause for them in a safe way.
149
150
151
152
153
154
155
156
157
158
159
160
|
# File 'lib/mortar/local/sqoop.rb', line 149
def prep_query(original_query)
if original_query.include? "$CONDITIONS"
return original_query
elsif original_query.downcase.include? "where"
idxwhere = original_query.downcase.index("where")
select_where = original_query[0..idxwhere+"where".length-1]
clause = original_query[idxwhere+"where".length+1..original_query.length]
return "#{select_where} (#{clause}) AND \$CONDITIONS"
else
return "#{original_query} WHERE \$CONDITIONS"
end
end
|
#should_do_install? ⇒ Boolean
45
46
47
|
# File 'lib/mortar/local/sqoop.rb', line 45
def should_do_install?
return (not (File.exists?(sqoop_directory)))
end
|
#should_do_update? ⇒ Boolean
49
50
51
|
# File 'lib/mortar/local/sqoop.rb', line 49
def should_do_update?
return is_newer_version('sqoop', sqoop_url)
end
|
#sqoop_command_script_template_path ⇒ Object
85
86
87
|
# File 'lib/mortar/local/sqoop.rb', line 85
def sqoop_command_script_template_path
File.expand_path("../../templates/script/sqoop.sh", __FILE__)
end
|
#sqoop_dir_in_tgz ⇒ Object
57
58
59
|
# File 'lib/mortar/local/sqoop.rb', line 57
def sqoop_dir_in_tgz
"sqoop-1.4.4-mortar"
end
|
#sqoop_directory ⇒ Object
53
54
55
|
# File 'lib/mortar/local/sqoop.rb', line 53
def sqoop_directory
return "#{local_install_directory}/sqoop"
end
|
#sqoop_export_template_parameters(connstr, s3dest, options) ⇒ Object
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
# File 'lib/mortar/local/sqoop.rb', line 98
def sqoop_export_template_parameters(connstr, s3dest, options)
pig = Mortar::Local::Pig.new()
parameters = {
"sqoop_dir" => sqoop_directory,
"jdb_conn_string" => connstr,
"destination" => s3dest,
"hadoop_home" => hadoop_home,
"classpath" => pig.template_params_classpath,
"postgres_jar" => "#{pig.lib_directory}/lib-cluster/postgresql.jar",
"jdbc_conn" => connstr,
"s3dest" => s3dest,
"project_root" => project_root,
"sqoop_opts" => sqoop_java_options
}
parameters["dbtable"] = options[:dbtable] if options[:dbtable]
parameters["sqlquery"] = options[:sqlquery] if options[:sqlquery]
parameters["inc_column"] = options[:inc_column] if options[:inc_column]
parameters["inc_value"] = options[:inc_value] if options[:inc_value]
if options[:inc_value] and 0 == options[:inc_value].to_i
parameters[:inc_mode] = "lastmodified"
elsif options[:inc_value]
parameters[:inc_mode] = "append"
end
parameters["dbuser"] = options[:username] if options[:username]
parameters["dbpass"] = options[:password] if options[:password]
parameters["jdbcdriver"] = options[:jdbcdriver] if options[:jdbcdriver]
parameters["driverjar"] = options[:driverjar] if options[:driverjar]
parameters["direct_import"] = true if options[:direct]
return parameters
end
|
#sqoop_java_options ⇒ Object
129
130
131
132
133
134
135
136
137
138
|
# File 'lib/mortar/local/sqoop.rb', line 129
def sqoop_java_options
opts = {}
opts['fs.s3n.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
opts['fs.s3n.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
opts['fs.s3.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
opts['fs.s3.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
opts['fs.s3.impl'] = 'org.apache.hadoop.fs.s3native.NativeS3FileSystem'
opts['fs.s3n.impl'] = 'org.apache.hadoop.fs.s3native.NativeS3FileSystem'
return opts
end
|
#sqoop_url ⇒ Object
40
41
42
43
|
# File 'lib/mortar/local/sqoop.rb', line 40
def sqoop_url
default_url = full_host + "/" + "resource/sqoop"
return ENV.fetch('SQOOP_DISTRO_URL', default_url)
end
|