Class: MARC2Solr::Conf
- Inherits:
-
Object
- Object
- MARC2Solr::Conf
- Includes:
- JLogger::Simple
- Defined in:
- lib/marc2solr.rb
Constant Summary collapse
- SUB_COMMANDS =
%w(index delete commit help ping)
- OPTIONSCONFIG =
[:custom, {:desc => "Any custom value you want. In a config file, use two String arguments (custom key value); on the command line use (--custom key=value) or (--custom key=\"three word value\")", :type=>String, :multi => true, :only => [:index], :short => '-C' }], [:config, {:desc => "Configuation file specifying options. Repeatable. Command-line arguments always override the config file(s)", :type => :io, :multi => true}], [:benchmark, {:desc=> "Benchmark production of each solr field", :only=> [:index], :short => '-B' }], [:NObenchmark, {:desc=> "Benchmark production of each solr field", :only=> [:index], }], [:dryrun, {:desc => "Don't send anything to solr", }], [:NOdryrun, {:desc => "Disable a previous 'dryrun' directive", }], [:printmarc, {:desc =>"Print MARC Record (as text) to --debugfile", :only => [:index], :short => '-r' }], [:NOprintmarc, {:desc =>"Turn off printing MARC Record (as text) to --debugfile", :only => [:index], }], [:printdoc, {:desc => "Print each completed document to --debugfile", :only => [:index], :short => '-d'} ], [:NOprintdoc, {:desc => "Turn off printing each completed document to --debugfile", :only => [:index], }], [:debugfile, {:desc => "Where to send output from --printmarc and --printdoc (takes filename, 'STDERR', 'STDOUT', or 'NONE') (repeatable)", \ :default => "STDOUT", :isOutfile => true, :takesNone => true, :type => String, :only => [:delete, :index], }], [:clearsolr, {:desc => "Clean out Solr by deleting everything in it (DANGEROUS)", :only => [:index] }], [:NOclearsolr, {:desc => "Disable a previous --clearsolr command", :only => [:index] }], [:skipcommit, {:desc => "DON'T send solr a 'commit' afterwards", :short => '-S', :only => [:delete, :index], }], [:threads, {:desc => "Number of threads to use to process MARC records (>1 => use 'threach')", :type => :int, :default => 1, :only => [:index] }], [:sussthreads, {:desc => "Number of threads to send completed docs to Solr", :type => :int, :default => 1}], [:susssize, {:desc => "Size of the documente queue for sending to Solr", :default => 128}], [:machine, {:desc => "Name of solr machine (e.g., solr.myplace.org)", :short => '-m', # :required => [:index, :commit, :delete], :type => String}], [:port, {:desc => "Port of solr machine (e.g., '8088')", :short => '-p', :type => :int}], [:solrpath, {:desc => "URL path to solr", :short => '-P', :type => String, }], [:javabin, {:desc => "Use javabin (presumes /update/bin is configured in schema.xml)", }], [:NOjavabin, {:desc => "Don't use javabin", }], [:logfile, {:desc => "Name of the logfile (filename, 'STDERR', 'DEFAULT', or 'NONE'). 'DEFAULT' is a file based on input file name", :default => "DEFAULT", :takesNone => true, :type => String}], [:loglevel, {:desc=>"Level at which to log (DEBUG, INFO, WARN, ERROR, OFF)", :short => '-L', :takesNone => true, :valid => %w{OFF DEBUG INFO WARN ERROR }, :default => 'INFO'}], [:logbatchsize, {:desc => "Write progress information to logfile after every N records", :default => 25000, :only => [:delete, :index], :short => '-b'}], [:indexfile, {:desc => "The index file describing your specset (usually index.dsl)", :type => String, :only => [:index], }], [:tmapdir, {:desc => "Directory that contains any translation maps", :type => String, :only => [:index] }], [:customdir, {:desc=>"The directory containging custom routine libraries (usually the 'lib' next to index.rb). Repeatable", :only => [:index], :multi => true, :takesNone => true, :type => String }], [:marctype, {:desc => "Type of marc file ('bestguess', 'strictmarc'. 'marcxml', 'alephsequential', 'permissivemarc')", :only => [:index], :short => '-t', :valid => %w{bestguess strictmarc permissivemarc marcxml alephsequential }, :default => 'bestguess' }], [:encoding, {:desc => "Encoding of the MARC file ('bestguess', 'utf8', 'marc8', 'iso')", :valid => %w{bestguess utf8 marc8 iso}, :only => [:index], :default => 'bestguess'}], [:gzipped, {:desc=>"Is the input gzipped? An extenstion of .gz will always force this to true", :default => false, :only => [:index, :delete], }]
- VALIDOPTIONS =
{}
- HELPTEXT =
{ 'help' => "Get help on a command\nmarc2solr help <cmd> where <cmd> is index, delete, or commit", 'index' => "Index the given MARC file\nmarc2solr index --config <file> --override <marcfile> <marcfile2...>", 'delete' => "Delete based on ID\nmarc2solr delete --config <file> --override <file_of_ids_to_delete> <another_file...>", 'commit' => "Send a commit to the specified Solr\nmarc2solr commit --config <file> --override", }
Instance Attribute Summary collapse
-
#cmdline ⇒ Object
Returns the value of attribute cmdline.
-
#command ⇒ Object
Returns the value of attribute command.
-
#config ⇒ Object
Returns the value of attribute config.
-
#rest ⇒ Object
Returns the value of attribute rest.
Instance Method Summary collapse
- #[](arg) ⇒ Object
- #command_line_opts ⇒ Object
-
#custom(*args) ⇒ Object
Handle custom events specially.
-
#initialize ⇒ Conf
constructor
A new instance of Conf.
- #masterLogger ⇒ Object
- #method_missing(methodSymbol, arg = :notgiven, fromCmdline = false) ⇒ Object
- #pretty_print(pp) ⇒ Object
- #print_basic_help ⇒ Object
- #print_command_help(cmd) ⇒ Object
- #reader(filename) ⇒ Object
- #suss ⇒ Object
-
#sussURL ⇒ Object
Create a SUSS from the given arguments.
Constructor Details
#initialize ⇒ Conf
Returns a new instance of Conf.
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/marc2solr.rb', line 152 def initialize @config = {} @cmdline = command_line_opts # Load the config files if @cmdline[:config] @cmdline[:config].each do |f| log.info "Reading config-file '{}'", f.path self.instance_eval(f.read) end end # Remove the config # Now override with the command line @cmdline.delete :config @cmdline.delete :config_given # Remove any "help" stuff @cmdline.delete_if {|k, v| k.to_s =~ /^help/} # Keep track of what was passed on cmdline @cmdline_given = {} @cmdline.keys.map do |k| if k.to_s =~ /^(.+?)_given$/ @cmdline_given[$1.to_sym] = true @cmdline.delete(k) end end @cmdline.each_pair do |k,v| if @cmdline_given[k] puts "Send override #{k} = #{v}" self.send(k,v) else unless @config.has_key? k # puts "Send default #{k} = #{v}" self.send(k,v) end end end @rest = ARGV end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(methodSymbol, arg = :notgiven, fromCmdline = false) ⇒ Object
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 |
# File 'lib/marc2solr.rb', line 297
def method_missing(methodSymbol, arg=:notgiven, fromCmdline = false)
return @config[methodSymbol] if arg == :notgiven
methodSymbol = methodSymbol.to_s.gsub(/=$/, '').to_sym
# Deal with negatives. We only want them if the argument is true
if methodSymbol.to_s =~ /^NO(.*)/
if arg == true
methodSymbol = $1.to_sym
arg = false
else
# puts "Ignoring false-valued #{methodSymbol}"
return # do nothing
end
end
# puts " Setting #{methodSymbol} to #{arg}"
if VALIDOPTIONS.has_key? methodSymbol
conf = VALIDOPTIONS[methodSymbol]
# Zero it out?
if conf[:takesNone] and arg.downcase == 'none'
@config[methodSymbol] = nil
return nil
end
# Check for a valid value
if conf[:valid]
unless conf[:valid].include? arg
raise ArgumentError "'#{arg}' is not a valid value for #{methodSymbol}"
end
end
# Make it a file?
if conf[:isOutfile]
# If it's an IO object, just take it
break if arg.is_a? IO or arg.is_a? StringIO
# Otherwise...
case arg.downcase
when "stdin"
arg = STDIN
when "stdout"
arg = STDOUT
when "stderr"
arg = STDERR
else
arg = File.new(arg, 'w')
Trollop.die "Can't open '#{arg}' for writing in argument #{methodSymbol}" unless arg
end
end
if conf[:multi]
@config[methodSymbol] ||= []
@config[methodSymbol] << arg
@config[methodSymbol].flatten!
else
@config[methodSymbol] = arg
end
# puts "Set #{methodSymbol} to #{arg}"
return @config[methodSymbol]
else
raise NoMethodError, "'#{methodSymbol} is not a valid MARC2Solr configuration option for #{@cmd}"
end
end
|
Instance Attribute Details
#cmdline ⇒ Object
Returns the value of attribute cmdline.
151 152 153 |
# File 'lib/marc2solr.rb', line 151 def cmdline @cmdline end |
#command ⇒ Object
Returns the value of attribute command.
151 152 153 |
# File 'lib/marc2solr.rb', line 151 def command @command end |
#config ⇒ Object
Returns the value of attribute config.
151 152 153 |
# File 'lib/marc2solr.rb', line 151 def config @config end |
#rest ⇒ Object
Returns the value of attribute rest.
151 152 153 |
# File 'lib/marc2solr.rb', line 151 def rest @rest end |
Instance Method Details
#[](arg) ⇒ Object
197 198 199 |
# File 'lib/marc2solr.rb', line 197 def [] arg return @config[arg] end |
#command_line_opts ⇒ Object
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/marc2solr.rb', line 201 def command_line_opts @command = ARGV.shift # get the subcommand # First, deal with the help situations unless SUB_COMMANDS.include? @command puts "Unknown command '#{@command}'" if @command print_basic_help end if ARGV.size == 0 print_basic_help end if @command== 'help' @command= ARGV.shift if SUB_COMMANDS.include? @cmd print_command_help @cmd else print_basic_help end end # OK. Now let's actuall get and return the args # # Trollop is a DSL and doesn't see our local instance variable, so I # need to alias @commandto cmd cmd = @command return Trollop:: do OPTIONSCONFIG.each do |opt| k = opt[0] d = opt[1] next if d[:only] and not d[:only].include? cmd.to_sym desc = d.delete(:desc) opt k, desc, d end end end |
#custom(*args) ⇒ Object
Handle custom events specially
284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/marc2solr.rb', line 284 def custom (*args) if args.size == 2 # called in a config file @config[args[0]] = args[1] else # parse it out args[0].each do |str| key,val = str.split(/\s*=\s*/) val.gsub!(/^["']*(.*?)['"]$/, '\1') @config[key] = val end end end |
#masterLogger ⇒ Object
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 |
# File 'lib/marc2solr.rb', line 409 def masterLogger mlog = JLogger::RootLogger.new mlog.loglevel = @config[:loglevel].downcase.to_sym firstfile = self.rest[0] || self.command logfilename = File.basename(firstfile).gsub(/\..*$/, '') # remove the last extension logfilename += '-' + Time.new.strftime('%Y%m%d-%H%M%S') + '.log' case @config[:logfile] when "STDERR" mlog.startConsole when "DEFAULT" mlog.startFile(logfilename) when 'NONE', nil mlog.stopConsole mlog.loglevel = :off # do nothing else mlog.startFile(@config[:logfile]) end return mlog end |
#pretty_print(pp) ⇒ Object
279 280 281 |
# File 'lib/marc2solr.rb', line 279 def pretty_print(pp) pp.pp @config end |
#print_basic_help ⇒ Object
241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/marc2solr.rb', line 241 def print_basic_help puts %Q{ marc2solr: get MARC data into Solr USAGE marc2solr index (index MARC records into Solr) marc2solr delete (delete by ID from Solr) marc2solr commit (send a 'commit' to a solr install) Use "marc2solr <cmd> --help" for more help } Process.exit end |
#print_command_help(cmd) ⇒ Object
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/marc2solr.rb', line 256 def print_command_help cmd ARGV.unshift '--help' Trollop:: do puts "\n\n" + HELPTEXT[cmd] + "\n\n" puts "You may specify multiple configuration files and they will be loaded in" puts "the order given." puts "" puts "Command line arguments always override configuration file settings\n\n" OPTIONSCONFIG.each do |opt| k = opt[0] d = opt[1] next if d[:only] and not d[:only].include? cmd.to_sym desc = d.delete(:desc) opt k, desc, d end end print "\n\n" Process.exit end |
#reader(filename) ⇒ Object
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 |
# File 'lib/marc2solr.rb', line 433 def reader filename configuredType = @config[:marctype].downcase.to_sym encoding = @config[:encoding].downcase.to_sym if encoding == :bestguess encoding = nil end gzipped = false if configuredType == :bestguess if filename =~ /\.(.+)$/ # if there's an extension ext = File.basename(filename).split(/\./)[-1].downcase if ext == 'gz' ext = File.basename(filename).split(/\./)[-2].downcase gzipped = true end log.info "Sniffed marc file type as {}", ext case ext when /xml/, /marcxml/ type = :marcxml when /seq/, /aleph/ type = :alephsequential else type = :permissivemarc end else type = :permissivemarc end else type = configuredType end source = filename if source == "STDIN" source = STDIN end if gzipped or @config[:gzipped] source = Java::java.util.zip.GZIPInputStream.new(IOConvert.byteinstream(source)) end return MARC4J4R::Reader.new(source, type, encoding) end |
#suss ⇒ Object
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
# File 'lib/marc2solr.rb', line 392 def suss url = self.sussURL log.info "Set suss url to {}", url if @config[:sussthreads] > 1 log.info "Using {} threads for the suss", @config[:sussthreads] else log.info "Using a single thread for the suss" end suss = StreamingUpdateSolrServer.new(url,@config[:susssize],@config[:sussthreads]) if self[:javabin] suss.setRequestWriter Java::org.apache.solr.client.solrj.impl.BinaryRequestWriter.new log.debug "Using javabin" end return suss end |
#sussURL ⇒ Object
Create a SUSS from the given arguments
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 |
# File 'lib/marc2solr.rb', line 368 def sussURL machine = self[:machine] unless machine log.error "Need solr machine name (--machine)" raise ArgumentError, "Need solr machine name (--machine)" end port = self[:port] unless port log.error "Need solr port (--port)" raise ArgumentError, "Need solr port (--port)" end path = self[:solrpath] unless path log.error "Need solr path (--solrpath)" raise ArgumentError, "Need solr path (--solrpath)" end path.gsub! /^\/*(.*?)\/*$/, '\1' # remove any leading/trailing slashes path.squeeze! '/' # make sure there are no double-slashses url = 'http://' + machine + ':' + port.to_s + '/' + path end |