Module: Embulk
- Defined in:
- lib/embulk.rb,
lib/embulk/page.rb,
lib/embulk/error.rb,
lib/embulk/buffer.rb,
lib/embulk/column.rb,
lib/embulk/plugin.rb,
lib/embulk/schema.rb,
lib/embulk/version.rb,
lib/embulk/guess/csv.rb,
lib/embulk/file_input.rb,
lib/embulk/guess/gzip.rb,
lib/embulk/data_source.rb,
lib/embulk/file_output.rb,
lib/embulk/java_plugin.rb,
lib/embulk/guess_plugin.rb,
lib/embulk/input_plugin.rb,
lib/embulk/page_builder.rb,
lib/embulk/filter_plugin.rb,
lib/embulk/guess/charset.rb,
lib/embulk/guess/newline.rb,
lib/embulk/output_plugin.rb,
lib/embulk/parser_plugin.rb,
lib/embulk/decoder_plugin.rb,
lib/embulk/encoder_plugin.rb,
lib/embulk/java/bootstrap.rb,
lib/embulk/executor_plugin.rb,
lib/embulk/plugin_registry.rb,
lib/embulk/formatter_plugin.rb,
lib/embulk/java/time_helper.rb,
lib/embulk/data/package_data.rb,
lib/embulk/file_input_plugin.rb,
lib/embulk/command/embulk_run.rb,
lib/embulk/file_output_plugin.rb,
lib/embulk/command/embulk_example.rb,
lib/embulk/command/embulk_new_plugin.rb,
lib/embulk/command/embulk_generate_bin.rb,
lib/embulk/data/bundle/embulk/input/example.rb,
lib/embulk/data/bundle/embulk/filter/example.rb,
lib/embulk/data/bundle/embulk/output/example.rb
Defined Under Namespace
Modules: Filter, Guess, Input, Java, Output, Plugin, Type Classes: Buffer, Column, ConfigError, DataSource, DecoderPlugin, EncoderPlugin, ExecutorPlugin, FileInput, FileInputPlugin, FileOutput, FileOutputPlugin, FilterPlugin, FormatterPlugin, GuessPlugin, InputPlugin, JavaPlugin, LineGuessPlugin, OutputPlugin, PackageData, Page, PageBuilder, ParserPlugin, PluginLoadError, PluginManager, PluginRegistry, Schema, TextGuessPlugin
Constant Summary collapse
- VERSION =
'0.6.1'
Class Method Summary collapse
- .b(s) ⇒ Object
- .create_example(path) ⇒ Object
- .default_gem_home ⇒ Object
- .generate_bin(options = {}) ⇒ Object
- .generate_bin_data(jruby_jar_path, ruby_script_path, options = {}) ⇒ Object
- .home(dir) ⇒ Object
- .java? ⇒ Boolean
- .new_plugin(name, language, category) ⇒ Object
- .run(argv) ⇒ Object
Class Method Details
.b(s) ⇒ Object
59 60 61 |
# File 'lib/embulk/command/embulk_generate_bin.rb', line 59 def self.b(s) s.force_encoding('ASCII-8BIT') end |
.create_example(path) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/embulk/command/embulk_example.rb', line 2 def self.create_example(path) require 'fileutils' require 'zlib' puts " Creating #{path}/" FileUtils.mkdir_p File.join(path, 'csv') puts " Creating #{path}/csv/" puts " Creating #{path}/csv/sample_01.csv.gz" Zlib::GzipWriter.open(File.join(path, 'csv', 'sample_01.csv.gz')) do |f| f.write <<EOF id,account,time,purchase,comment 1,32864,2015-01-27 19:23:49,20150127,embulk 2,14824,2015-01-27 19:01:23,20150127,embulk jruby 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin" 4,11270,2015-01-29 11:54:36,20150129,NULL EOF end puts " Creating #{path}/example.yml" File.open(File.join(path, 'example.yml'), 'w') do |f| f.write <<EOF in: type: file path_prefix: "#{File. File.join(path, 'csv', 'sample_')}" out: type: stdout EOF end end |
.default_gem_home ⇒ Object
314 315 316 317 318 319 320 321 322 323 |
# File 'lib/embulk/command/embulk_run.rb', line 314 def self.default_gem_home if RUBY_PLATFORM =~ /java/i user_home = java.lang.System.properties["user.home"] end user_home ||= ENV['HOME'] unless user_home raise "HOME environment variable is not set." end File. File.join(user_home, '.embulk', Gem.ruby_engine, RbConfig::CONFIG['ruby_version']) end |
.generate_bin(options = {}) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 |
# File 'lib/embulk/command/embulk_generate_bin.rb', line 2 def self.generate_bin(={}) jruby_jar_path = org.jruby.Main.java_class.protection_domain.code_source.location.to_s if __FILE__ =~ /^classpath:/ || __FILE__.include?('!/') resource_class = org.embulk.command.Runner.java_class ruby_script_path = resource_class.resource("/embulk/command/embulk.rb").to_s else ruby_script_path = File.join(File.dirname(__FILE__), 'embulk.rb') end java_home = java.lang.System.properties['java.home'] generate_bin_data(jruby_jar_path, ruby_script_path, .merge(java_home: java_home)) end |
.generate_bin_data(jruby_jar_path, ruby_script_path, options = {}) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/embulk/command/embulk_generate_bin.rb', line 15 def self.generate_bin_data(jruby_jar_path, ruby_script_path, ={}) if java_home = [:java_home] java_home_script = %{export JAVA_HOME='#{java_home}'} java_path = %{"$JAVA_HOME"/bin/java} else java_home_script = %{} java_path = %{java} end # TODO parse -D options to set them to java shell_script = <<EOF #!/bin/sh =begin 2>/dev/null #{java_home_script} exec #{java_path} -classpath "$0" org.jruby.Main "$0" "$@" exit 127 =end EOF if [:bundle_path] == :here bundle_path_script = %{ENV['EMBULK_BUNDLE_PATH'] = File.expand_path('..', File.dirname(__FILE__))} elsif path = [:bundle_path] bundle_path_script = %{ENV['EMBULK_BUNDLE_PATH'] = '#{path}'} else bundle_path_script = b '' end ruby_init_script = b <<EOF #{bundle_path_script} ENV.delete 'GEM_HOME' ENV.delete 'GEM_PATH' EOF ruby_script = b(File.read(ruby_script_path)) if i = ruby_script.index(b("\n__END__\n")) # delete contents after __END__ ruby_script = ruby_script[0, i] end jruby_jar = b(File.read(jruby_jar_path)) data = shell_script << ruby_init_script + ruby_script << b("\n__END__\n") << jruby_jar end |
.home(dir) ⇒ Object
309 310 311 312 |
# File 'lib/embulk/command/embulk_run.rb', line 309 def self.home(dir) home = File.('../../..', File.dirname(__FILE__)) File.join(home, dir) end |
.java? ⇒ Boolean
3 4 5 |
# File 'lib/embulk.rb', line 3 def self.java? true end |
.new_plugin(name, language, category) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/embulk/command/embulk_new_plugin.rb', line 2 def self.new_plugin(name, language, category) require 'embulk/data/package_data' require 'embulk/version' require 'fileutils' embulk_category = category embulk_category = :input if category == :file_input embulk_category = :output if category == :file_output project_name = "embulk-#{embulk_category}-#{name}" plugin_dir = "lib/embulk" plugin_path = "#{plugin_dir}/#{embulk_category}/#{name}.rb" if File.exist?(project_name) raise "./#{project_name} already exists. Please delete it first." end FileUtils.mkdir_p(project_name) puts "Creating #{project_name}/" success = false begin = `git config user.name`.strip = "YOUR_NAME" if .empty? email = `git config user.email`.strip email = "YOUR_NAME" if email.empty? ruby_class_name = name.split('-').map {|a| a.capitalize }.join + category.to_s.split('_').map {|a| a.capitalize }.join + "Plugin" java_iface = category.to_s.split('_').map {|a| a.capitalize }.join java_class_name = name.split('-').map {|a| a.capitalize }.join + java_iface + "Plugin" display_name = name.split('-').map {|a| a.capitalize }.join(' ') display_category = category.to_s.gsub('_', ' ') extra_guess_erb = {} case category when :input description = %[Loads records from #{display_name}.] when :file_input description = %[Reads files stored on #{display_name}.] when :parser description = %[Parses #{display_name} files read by other file input plugins.] extra_guess_erb["ruby/parser_guess.rb.erb"] = "#{plugin_dir}/guess/#{name}.rb" when :decoder description = %[Decodes #{display_name}-encoded files read by other file input plugins.] extra_guess_erb["ruby/decoder_guess.rb.erb"] = "#{plugin_dir}/guess/#{name}.rb" when :output description = %[Dumps records to #{display_name}.] when :file_output description = %[Stores files on #{display_name}.] when :formatter description = %[Formats #{display_name} files for other file output plugins.] when :encoder description = %[Encodes files using #{display_name} for other file output plugins.] when :filter description = %[#{display_name}] end pkg = Embulk::PackageData.new("new", project_name, binding()) pkg.cp_erb("README.md.erb", "README.md") pkg.cp("LICENSE.txt", "LICENSE.txt") pkg.cp_erb("gitignore.erb", ".gitignore") case language when :ruby pkg.cp("ruby/Rakefile", "Rakefile") pkg.cp("ruby/Gemfile", "Gemfile") pkg.cp_erb("ruby/gemspec.erb", "#{project_name}.gemspec") pkg.cp_erb("ruby/#{category}.rb.erb", plugin_path) when :java pkg.cp("java/gradle/wrapper/gradle-wrapper.jar", "gradle/wrapper/gradle-wrapper.jar") pkg.cp("java/gradle/wrapper/gradle-wrapper.properties", "gradle/wrapper/gradle-wrapper.properties") pkg.cp("java/gradlew.bat", "gradlew.bat") pkg.cp("java/gradlew", "gradlew") pkg.set_executable("gradlew") pkg.cp_erb("java/build.gradle.erb", "build.gradle") pkg.cp_erb("java/plugin_loader.rb.erb", plugin_path) pkg.cp_erb("java/#{category}.java.erb", "src/main/java/org/embulk/#{embulk_category}/#{java_class_name}.java") pkg.cp_erb("java/test.java.erb", "src/test/java/org/embulk/#{embulk_category}/Test#{java_class_name}.java") end extra_guess_erb.each_pair do |erb,dest| pkg.cp_erb(erb, dest) end success = true puts "" ensure FileUtils.rm_rf project_name unless success end end |
.run(argv) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
# File 'lib/embulk/command/embulk_run.rb', line 2 def self.run(argv) # default_bundle_path default_bundle_path = nil gemfile_path = ENV['BUNDLE_GEMFILE'].to_s gemfile_path = nil if gemfile_path.empty? default_bundle_path = File.dirname(gemfile_path) if gemfile_path # default GEM_HOME is ~/.embulk/jruby/1.9/. If -b option is set, # GEM_HOME is already set by embulk/command/embulk.rb gem_home = ENV['GEM_HOME'].to_s if gem_home.empty? ENV['GEM_HOME'] = default_gem_home Gem.clear_paths # force rubygems to reload GEM_HOME end # to make sure org.embulk.jruby.JRubyScriptingModule can require 'embulk/java/bootstrap' $LOAD_PATH << Embulk.home('lib') require 'embulk/version' i = argv.find_index {|arg| arg !~ /^\-/ } unless i if argv.include?('--version') puts "embulk #{Embulk::VERSION}" exit 0 end usage nil end subcmd = argv.slice!(i) require 'java' require 'optparse' op = OptionParser.new op.version = Embulk::VERSION puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S.%3N %z")}: Embulk v#{Embulk::VERSION}" load_paths = [] classpaths = [] classpath_separator = java.io.File.pathSeparator = {} op.on('-b', '--bundle BUNDLE_DIR', 'Path to a Gemfile directory') do |path| # only for help message. implemented at lib/embulk/command/embulk.rb end case subcmd.to_sym when :bundle op.remove # remove --bundle if default_bundle_path op. = "Usage: bundle [directory=#{default_bundle_path}]" args = 0..1 else op. = "Usage: bundle <directory>" args = 1..1 end when :run op. = "Usage: run <config.yml>" op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level| [:logLevel] = level end op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path| load_paths << load_path end op.on('-C', '--classpath PATH', "Add java classpath separated by #{classpath_separator} (CLASSPATH)") do |classpath| classpaths.concat classpath.split(classpath_separator) end op.on('-o', '--output PATH', 'Path to a file to write the next configuration') do |path| [:nextConfigOutputPath] = path end op.on('-r', '--resume-state PATH', 'Path to a file to write or read resume state') do |path| [:resumeStatePath] = path end args = 1..1 when :cleanup op. = "Usage: cleanup <config.yml>" op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level| [:logLevel] = level end op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path| load_paths << load_path end op.on('-C', '--classpath PATH', "Add java classpath separated by #{classpath_separator} (CLASSPATH)") do |classpath| classpaths.concat classpath.split(classpath_separator) end op.on('-r', '--resume-state PATH', 'Path to a file to write or read resume state') do |path| [:resumeStatePath] = path end args = 1..1 when :preview op. = "Usage: preview <config.yml>" op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level| [:logLevel] = level end op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path| load_paths << load_path end op.on('-C', '--classpath PATH', "Add java classpath separated by #{classpath_separator} (CLASSPATH)") do |classpath| classpaths.concat classpath.split(classpath_separator) end op.on('-G', '--vertical', "Use vertical output format", TrueClass) do |b| [:previewOutputFormat] = "vertical" end args = 1..1 when :guess op. = "Usage: guess <partial-config.yml>" op.on('-l', '--log-level LEVEL', 'Log level (fatal, error, warn, info, debug or trace)') do |level| [:logLevel] = level end op.on('-o', '--output PATH', 'Path to a file to write the guessed configuration') do |path| [:nextConfigOutputPath] = path end op.on('-I', '--load-path PATH', 'Add ruby script directory path ($LOAD_PATH)') do |load_path| load_paths << load_path end op.on('-C', '--classpath PATH', "Add java classpath separated by #{classpath_separator} (CLASSPATH)") do |classpath| classpaths.concat classpath.split(classpath_separator) end op.on('-g', '--guess NAMES', "Comma-separated list of guess plugin names") do |names| ([:guessPlugins] ||= []).concat names.split(",") end args = 1..1 when :new op.remove # remove --bundle op. = "Usage: new <category> <name>" + %[ categories: ruby-input Ruby record input plugin (like "mysql") ruby-output Ruby record output plugin (like "mysql") ruby-filter Ruby record filter plugin (like "add-hostname") #ruby-file-input Ruby file input plugin (like "ftp") # not implemented yet [#21] #ruby-file-output Ruby file output plugin (like "ftp") # not implemented yet [#22] ruby-parser Ruby file parser plugin (like "csv") ruby-formatter Ruby file formatter plugin (like "csv") #ruby-decoder Ruby file decoder plugin (like "gzip") # not implemented yet [#31] #ruby-encoder Ruby file encoder plugin (like "gzip") # not implemented yet [#32] java-input Java record input plugin (like "mysql") java-output Java record output plugin (like "mysql") java-filter Java record filter plugin (like "add-hostname") java-file-input Java file input plugin (like "ftp") java-file-output Java file output plugin (like "ftp") java-parser Java file parser plugin (like "csv") java-formatter Java file formatter plugin (like "csv") java-decoder Java file decoder plugin (like "gzip") java-encoder Java file encoder plugin (like "gzip") examples: new ruby-output hbase new ruby-filter int-to-string ] args = 2..2 when :gem require 'rubygems/gem_runner' Gem::GemRunner.new.run argv exit 0 when :example args = 0..1 when :exec exec(*argv) exit 127 else usage "Unknown subcommand #{subcmd.dump}." end begin op.parse!(argv) unless args.include?(argv.length) usage_op op, nil end rescue => e usage_op op, e.to_s end case subcmd.to_sym when :bundle path = argv[0] || default_bundle_path require 'fileutils' require 'rubygems/gem_runner' setup_load_paths(load_paths) setup_classpaths(classpaths) unless File.exists?(path) puts "Initializing #{path}..." FileUtils.mkdir_p File.dirname(path) begin success = false # copy embulk/data/bundle/ directory require 'embulk/data/package_data' pkg = PackageData.new("bundle", path) %w[.bundle/config embulk/input/example.rb embulk/output/example.rb embulk/filter/example.rb Gemfile].each do |file| pkg.cp(file, file) end ## TODO this is disabled for now. enable this if you want to use ## create bin/embulk #bin_embulk_path = File.join(path, 'bin', 'embulk') #FileUtils.mkdir_p File.dirname(bin_embulk_path) #require 'embulk/command/embulk_generate_bin' # defines Embulk.generate_bin #File.open(bin_embulk_path, 'wb', 0755) {|f| f.write Embulk.generate_bin(bundle_path: :here) } # install bundler setup_gem_paths(path) Gem::GemRunner.new.run %w[install bundler] success = true rescue Gem::SystemExitException => e raise e if e.exit_code != 0 success = true ensure FileUtils.rm_rf path unless success end else setup_gem_paths(path) end ENV['BUNDLE_GEMFILE'] = File. File.join(path, "Gemfile") Dir.chdir(path) do require 'bundler' require 'bundler/friendly_errors' require 'bundler/cli' Bundler.with_friendly_errors do # run > bundle install Bundler::CLI.start(%w[install], debug: true) end end when :example require_relative 'embulk_example' path = ARGV[0] || "embulk-example" puts "Creating #{path} directory..." Embulk.create_example(path) puts "" puts "Run following subcommands to try embulk:" puts "" puts " 1. guess #{File.join(path, 'example.yml')} -o config.yml" puts " 2. preview config.yml" puts " 3. run config.yml" puts "" when :new lang_cate = ARGV[0] name = ARGV[1] language, category = case lang_cate when "java-input" then [:java, :input] when "java-output" then [:java, :output] when "java-filter" then [:java, :filter] when "java-file-input" then [:java, :file_input] when "java-file-output" then [:java, :file_output] when "java-parser" then [:java, :parser] when "java-formatter" then [:java, :formatter] when "java-decoder" then [:java, :decoder] when "java-encoder" then [:java, :encoder] when "ruby-input" then [:ruby, :input] when "ruby-output" then [:ruby, :output] when "ruby-filter" then [:ruby, :filter] when "ruby-file-input" then raise "ruby-file-input is not implemented yet. See #21 on github." #[:ruby, :file_input] when "ruby-file-output" then raise "ruby-file-output is not implemented yet. See #22 on github." #[:ruby, :file_output] when "ruby-parser" then [:ruby, :parser] when "ruby-formatter" then [:ruby, :formatter] when "ruby-decoder" then raise "ruby-decoder is not implemented yet. See #31 on github." #[:ruby, :decoder] when "ruby-encoder" then raise "ruby-decoder is not implemented yet. See #32 on github." #[:ruby, :encoder] else usage_op op, "Unknown category '#{lang_cate}'" end require 'embulk/command/embulk_new_plugin' Embulk.new_plugin(name, language, category) else require 'json' begin java.lang.Class.forName('org.embulk.command.Runner') rescue java.lang.ClassNotFoundException # load classpath classpath_dir = Embulk.home('classpath') jars = Dir.entries(classpath_dir).select {|f| f =~ /\.jar$/ }.sort jars.each do |jar| require File.join(classpath_dir, jar) end end setup_load_paths(load_paths) setup_classpaths(classpaths) begin org.embulk.command.Runner.new(.to_json).main(subcmd, argv.to_java(:string)) rescue => ex print_exception(ex) puts "" puts "Error: #{ex}" raise SystemExit.new(1, ex.to_s) end end end |