Class: Mortar::Local::Sqoop

Inherits:
Object
  • Object
show all
Includes:
Helpers, InstallUtil
Defined in:
lib/mortar/local/sqoop.rb

Instance Method Summary collapse

Methods included from Helpers

#action, #ask, #confirm, #copy_if_not_present_at_dest, #default_host, #deprecate, #display, #display_header, #display_object, #display_row, #display_table, #display_with_indent, #download_to_file, #ensure_dir_exists, #error, error_with_failure, error_with_failure=, extended, extended_into, #format_bytes, #format_date, #format_with_bang, #full_host, #get_terminal_environment, #home_directory, #host, #hprint, #hputs, included, included_into, #installed_with_omnibus?, #json_decode, #json_encode, #line_formatter, #longest, #output_with_bang, #pending_github_team_state_message, #quantify, #redisplay, #retry_on_exception, #running_on_a_mac?, #running_on_windows?, #set_buffer, #shell, #spinner, #status, #string_distance, #styled_array, #styled_error, #styled_hash, #styled_header, #suggestion, #test_name, #ticking, #time_ago, #truncate, #warning, #with_tty, #write_to_file

Methods included from InstallUtil

#download_file, #ensure_mortar_local_directory, #extract_tgz, #get_resource, #gitignore_template_path, #head_resource, #http_date_to_epoch, #install_date, #install_file_for, #is_newer_version, #jython_cache_directory, #jython_directory, #local_install_directory, #local_install_directory_name, #local_log_dir, #local_project_gitignore, #local_udf_log_dir, #make_call, #make_call_sleep_seconds, #note_install, #osx?, #project_root, #render_script_template, #reset_local_logs, #run_templated_script, #unset_hadoop_env_vars, #url_date

Instance Method Details

#do_installObject



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/mortar/local/sqoop.rb', line 61

def do_install
  local_tgz = File.join(local_install_directory, "sqoop-1.4.4-mortar.tar.gz")
  if File.exists?(local_tgz)
    FileUtils.rm(local_tgz)
  end
  download_file(sqoop_url, local_tgz)

  if File.exists?(sqoop_directory)
    FileUtils.rm_rf(sqoop_directory)
  end

  extract_tgz(local_tgz, local_install_directory)

  FileUtils.mv(File.join(local_install_directory, sqoop_dir_in_tgz), sqoop_directory)

  # This has been seening coming out of the tgz w/o +x so we do
  # here to be sure it has the necessary permissions
  FileUtils.chmod(0755, "#{sqoop_directory}/bin/sqoop")
  FileUtils.chmod(0755, "#{sqoop_directory}/hadoop/bin/hadoop")

  File.delete(local_tgz)
  note_install("sqoop")
end

#export(connstr, s3dest, options) ⇒ Object



93
94
95
96
# File 'lib/mortar/local/sqoop.rb', line 93

def export(connstr, s3dest, options)
  template_params = sqoop_export_template_parameters(connstr, s3dest, options)
  return run_templated_script(sqoop_command_script_template_path, template_params)
end

#hadoop_homeObject



89
90
91
# File 'lib/mortar/local/sqoop.rb', line 89

def hadoop_home
  "#{sqoop_directory}/hadoop"
end

#install_or_updateObject



26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/mortar/local/sqoop.rb', line 26

def install_or_update
  @command = "#{local_install_directory}/python/bin/python"
  if should_do_install?
    action "Installing sqoop to #{local_install_directory_name}" do
      do_install
    end
  elsif should_do_update?
    action "Updating to latest sqoop in #{local_install_directory_name}" do
      do_install
    end
  end
  true
end

#prep_query(original_query) ⇒ Object

So this part kind of sucks. In order to partition a query across multiple map reduce tasks sqoop does a query to to find the range of identifiying values, divides this range across the number of tasks to be executed and then modifies the query for each m/r task. To do this Sqoop needs to know at what point in the query that it should place its portion of the where clause. This is done via the $CONDITIONS marker. So that’s well and good when you’re running sqoop on a cluster but our users will be running on their own machine and don’t know or care for this parrallel queries stuff. So to make their lives easier we make a best effort to add the clause for them in a safe way.



149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/mortar/local/sqoop.rb', line 149

def prep_query(original_query)
  if original_query.include? "$CONDITIONS"
    return original_query
  elsif original_query.downcase.include? "where"
    idxwhere = original_query.downcase.index("where")
    select_where = original_query[0..idxwhere+"where".length-1]
    clause = original_query[idxwhere+"where".length+1..original_query.length]
    return "#{select_where} (#{clause}) AND \$CONDITIONS"
  else
    return "#{original_query} WHERE \$CONDITIONS"
  end
end

#should_do_install?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/mortar/local/sqoop.rb', line 45

def should_do_install?
  return (not (File.exists?(sqoop_directory)))
end

#should_do_update?Boolean

Returns:

  • (Boolean)


49
50
51
# File 'lib/mortar/local/sqoop.rb', line 49

def should_do_update?
  return is_newer_version('sqoop', sqoop_url)
end

#sqoop_command_script_template_pathObject



85
86
87
# File 'lib/mortar/local/sqoop.rb', line 85

def sqoop_command_script_template_path
  File.expand_path("../../templates/script/sqoop.sh", __FILE__)
end

#sqoop_dir_in_tgzObject



57
58
59
# File 'lib/mortar/local/sqoop.rb', line 57

def sqoop_dir_in_tgz
  "sqoop-1.4.4-mortar"
end

#sqoop_directoryObject



53
54
55
# File 'lib/mortar/local/sqoop.rb', line 53

def sqoop_directory
  return "#{local_install_directory}/sqoop"
end

#sqoop_export_template_parameters(connstr, s3dest, options) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/mortar/local/sqoop.rb', line 98

def sqoop_export_template_parameters(connstr, s3dest, options)
  pig = Mortar::Local::Pig.new()
  parameters = {
    "sqoop_dir" => sqoop_directory,
    "jdb_conn_string" => connstr,
    "destination" => s3dest,
    "hadoop_home" => hadoop_home,
    "classpath" => pig.template_params_classpath,
    "postgres_jar" => "#{pig.lib_directory}/lib-cluster/postgresql.jar",
    "jdbc_conn" => connstr,
    "s3dest" => s3dest,
    "project_root" => project_root,
    "sqoop_opts" => sqoop_java_options
  }
  parameters["dbtable"] = options[:dbtable] if options[:dbtable]
  parameters["sqlquery"] = options[:sqlquery] if options[:sqlquery]
  parameters["inc_column"] = options[:inc_column] if options[:inc_column]
  parameters["inc_value"] = options[:inc_value] if options[:inc_value]
  if options[:inc_value] and 0 == options[:inc_value].to_i
    parameters[:inc_mode] = "lastmodified"
  elsif options[:inc_value]
    parameters[:inc_mode] = "append"
  end
  parameters["dbuser"] = options[:username] if options[:username]
  parameters["dbpass"] = options[:password] if options[:password]
  parameters["jdbcdriver"] = options[:jdbcdriver] if options[:jdbcdriver]
  parameters["driverjar"] = options[:driverjar] if options[:driverjar]
  parameters["direct_import"] = true if options[:direct]
  return parameters
end

#sqoop_java_optionsObject



129
130
131
132
133
134
135
136
137
138
# File 'lib/mortar/local/sqoop.rb', line 129

def sqoop_java_options
  opts = {}
  opts['fs.s3n.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3n.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['fs.s3.awsAccessKeyId'] = ENV['AWS_ACCESS_KEY']
  opts['fs.s3.awsSecretAccessKey'] = ENV['AWS_SECRET_KEY']
  opts['fs.s3.impl'] = 'org.apache.hadoop.fs.s3native.NativeS3FileSystem'
  opts['fs.s3n.impl'] = 'org.apache.hadoop.fs.s3native.NativeS3FileSystem'
  return opts
end

#sqoop_urlObject



40
41
42
43
# File 'lib/mortar/local/sqoop.rb', line 40

def sqoop_url
  default_url = full_host + "/" + "resource/sqoop"
  return ENV.fetch('SQOOP_DISTRO_URL', default_url)
end