Class: MARC2Solr::Conf

Inherits:
Object
  • Object
show all
Includes:
JLogger::Simple
Defined in:
lib/marc2solr.rb

Constant Summary collapse

SUB_COMMANDS =
%w(index delete commit help ping)
OPTIONSCONFIG =
[:custom,    {:desc => "Any custom value you want. In a config file, use two String arguments (custom key value); on the command line use (--custom key=value) or (--custom key=\"three word value\")",
  :type=>String,
  :multi => true,
  :only => [:index],
  :short => '-C'
      }],
      [:config,    {:desc => "Configuation file specifying options. Repeatable. Command-line arguments always override the config file(s)",
  :type => :io,
  :multi => true}],
      [:benchmark, {:desc=> "Benchmark production of each solr field",
  :only=> [:index],
  :short => '-B'
 }],
     [:NObenchmark, {:desc=> "Benchmark production of each solr field",
 :only=> [:index],
}],
     [:dryrun,   {:desc => "Don't send anything to solr",
  }],
      [:NOdryrun,   {:desc => "Disable a previous 'dryrun' directive",
  }],
  
      [:printmarc, {:desc =>"Print MARC Record (as text) to --debugfile",
  :only => [:index],
  :short => '-r'
  }],
      [:NOprintmarc, {:desc =>"Turn off printing MARC Record (as text) to --debugfile",
  :only => [:index],
  }],
      [:printdoc,  {:desc => "Print each completed document to --debugfile", 
  :only => [:index],
  :short => '-d'}
      ],
      [:NOprintdoc,  {:desc => "Turn off printing each completed document to --debugfile", 
  :only => [:index],
  }],      
      [:debugfile, {:desc => "Where to send output from --printmarc and --printdoc (takes filename, 'STDERR', 'STDOUT', or 'NONE') (repeatable)", \
  :default => "STDOUT",
  :isOutfile => true,
  :takesNone => true,
  :type => String, 
  :only => [:delete, :index],
  }],
      [:clearsolr, {:desc => "Clean out Solr by deleting everything in it (DANGEROUS)",
  :only => [:index]
  }],
      [:NOclearsolr, {:desc => "Disable a previous --clearsolr command",
:only => [:index]
}],
      [:skipcommit,    {:desc => "DON'T send solr a 'commit' afterwards", 
  :short => '-S',
  :only => [:delete, :index],
  }],
      [:threads,   {:desc => "Number of threads to use to process MARC records (>1 => use 'threach')", 
  :type => :int,
  :default => 1,
  :only => [:index]
  }],                    
      [:sussthreads, {:desc => "Number of threads to send completed docs to Solr", 
    :type => :int,
    :default => 1}],
      [:susssize,    {:desc => "Size of the documente queue for sending to Solr", 
    :default => 128}],
      [:machine, {:desc => "Name of solr machine (e.g., solr.myplace.org)",
    :short => '-m',
    # :required => [:index, :commit, :delete],
    :type => String}],
      [:port,        {:desc => "Port of solr machine (e.g., '8088')", 
  :short => '-p',
  :type => :int}],
      [:solrpath,  {:desc => "URL path to solr",
  :short => '-P',
  :type => String,
 }],
      [:javabin, {:desc => "Use javabin (presumes /update/bin is configured in schema.xml)", 
    }],                      
      [:NOjavabin, {:desc => "Don't use javabin", 
    }],                      
      [:logfile,   {:desc => "Name of the logfile (filename, 'STDERR', 'DEFAULT', or 'NONE'). 'DEFAULT' is a file based on input file name", 
  :default => "DEFAULT",
  :takesNone => true,                    
  :type => String}],
      [:loglevel, {:desc=>"Level at which to log (DEBUG, INFO, WARN, ERROR, OFF)",
 :short => '-L',
 :takesNone => true,
 :valid => %w{OFF DEBUG INFO WARN ERROR },
 :default => 'INFO'}],
      [:logbatchsize, {:desc => "Write progress information to logfile after every N records",
     :default => 25000,
     :only => [:delete, :index],
     :short => '-b'}],
      [:indexfile, {:desc => "The index file describing your specset (usually index.dsl)",
  :type => String,
  :only => [:index],
  }],
      [:tmapdir,   {:desc => "Directory that contains any translation maps",
  :type => String,
  :only => [:index]
  }],
      [:customdir, {:desc=>"The directory containging custom routine libraries (usually the 'lib' next to index.rb). Repeatable",
  :only => [:index],
  :multi => true,
  :takesNone => true,
  :type => String
  }],
      [:marctype, {:desc => "Type of marc file ('bestguess', 'strictmarc'. 'marcxml', 'alephsequential', 'permissivemarc')",
 :only => [:index],
 :short => '-t',
 :valid => %w{bestguess strictmarc permissivemarc marcxml alephsequential },
 :default => 'bestguess'
 }],      
      [:encoding, {:desc => "Encoding of the MARC file ('bestguess', 'utf8', 'marc8', 'iso')",
 :valid => %w{bestguess utf8 marc8 iso},
 :only => [:index],
 :default => 'bestguess'}],
      [:gzipped, {:desc=>"Is the input gzipped? An extenstion of .gz will always force this to true",
:default => false,
:only => [:index, :delete],
}]
VALIDOPTIONS =
{}
HELPTEXT =
{
  'help'  => "Get help on a command\nmarc2solr help <cmd> where <cmd> is index, delete, or commit",
  'index' => "Index the given MARC file\nmarc2solr index --config <file> --override <marcfile> <marcfile2...>",
  'delete' => "Delete based on ID\nmarc2solr delete --config <file> --override <file_of_ids_to_delete> <another_file...>",
  'commit' => "Send a commit to the specified Solr\nmarc2solr commit --config <file> --override",
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeConf

Returns a new instance of Conf.



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/marc2solr.rb', line 151

def initialize
  @config = {}
  @cmdline = command_line_opts
  
  # Load the config files
  if @cmdline[:config]
    @cmdline[:config].each do |f|
      log.info "Reading config-file '{}'", f.path
      self.instance_eval(f.read)
    end
  end
  
  # Remove the config
  # Now override with the command line
  @cmdline.delete :config
  @cmdline.delete :config_given
  
  # Remove any "help" stuff 
  @cmdline.delete_if {|k, v| k.to_s =~ /^help/}

  # Keep track of what was passed on cmdline
        
  @cmdline_given = {}
  @cmdline.keys.map do |k|
    if k.to_s =~ /^(.+?)_given$/
      @cmdline_given[$1.to_sym] = true
      @cmdline.delete(k)
    end
  end
  
  @cmdline.each_pair do |k,v|
    if @cmdline_given[k]
      puts "Send override #{k} = #{v}"
      self.send(k,v) 
    else
      unless @config.has_key? k
        # puts "Send default #{k} = #{v}"
        self.send(k,v) 
      end
    end
  end
  
  @rest = ARGV
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(methodSymbol, arg = :notgiven, fromCmdline = false) ⇒ Object



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/marc2solr.rb', line 296

def method_missing(methodSymbol, arg=:notgiven, fromCmdline = false)
  return @config[methodSymbol] if arg == :notgiven
  methodSymbol = methodSymbol.to_s.gsub(/=$/, '').to_sym
  
  # Deal with negatives. We only want them if the argument is true
  if methodSymbol.to_s =~ /^NO(.*)/
    if arg == true
      methodSymbol = $1.to_sym
      arg = false
    else
      # puts "Ignoring false-valued #{methodSymbol}"
      return # do nothing
    end
  end
  
  # puts "   Setting #{methodSymbol} to #{arg}"
  if VALIDOPTIONS.has_key? methodSymbol
    conf = VALIDOPTIONS[methodSymbol]
    # Zero it out?
    if conf[:takesNone] and arg.to_a.map{|a| a.downcase}.include? 'none'
      @config[methodSymbol] = nil 
      return nil
    end
    
    
    # Check for a valid value
    if conf[:valid]
      unless conf[:valid].include? arg
        raise ArgumentError "'#{arg}' is not a valid value for #{methodSymbol}"
      end
    end
    
    # Make it a file?
    
    if conf[:isOutfile]
      # If it's an IO object, just take it
      break if arg.is_a? IO or arg.is_a? StringIO
      
      # Otherwise...
      case arg.downcase
      when "stdin"
        arg = STDIN
      when "stdout"
        arg = STDOUT
      when "stderr"
        arg = STDERR
      else
        arg = File.new(arg, 'w')
        Trollop.die "Can't open '#{arg}' for writing in argument #{methodSymbol}" unless arg
      end
    end
        
    
    if conf[:multi]
      @config[methodSymbol] ||= []
      @config[methodSymbol] << arg
      @config[methodSymbol].flatten!
    else
      @config[methodSymbol] = arg 
    end
    # puts "Set #{methodSymbol} to #{arg}"
    return @config[methodSymbol]
  else
    raise NoMethodError, "'#{methodSymbol} is not a valid MARC2Solr configuration option for #{@cmd}"
  end
end

Instance Attribute Details

#cmdlineObject

Returns the value of attribute cmdline.



150
151
152
# File 'lib/marc2solr.rb', line 150

def cmdline
  @cmdline
end

#commandObject

Returns the value of attribute command.



150
151
152
# File 'lib/marc2solr.rb', line 150

def command
  @command
end

#configObject

Returns the value of attribute config.



150
151
152
# File 'lib/marc2solr.rb', line 150

def config
  @config
end

#restObject

Returns the value of attribute rest.



150
151
152
# File 'lib/marc2solr.rb', line 150

def rest
  @rest
end

Instance Method Details

#[](arg) ⇒ Object



196
197
198
# File 'lib/marc2solr.rb', line 196

def [] arg
  return @config[arg]
end

#command_line_optsObject



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/marc2solr.rb', line 200

def command_line_opts
  @command = ARGV.shift # get the subcommand
  
  # First, deal with the help situations
  unless SUB_COMMANDS.include? @command
    puts "Unknown command '#{@command}'" if @command
    print_basic_help
  end
  
  if ARGV.size == 0 
    print_basic_help
  end
  
  if @command== 'help'
    @command= ARGV.shift
    if SUB_COMMANDS.include? @cmd
      print_command_help @cmd
    else
      print_basic_help
    end
  end

  # OK. Now let's actuall get and return the args
  #
  # Trollop is a DSL and doesn't see our local instance variable, so I 
  # need to alias @commandto cmd
  
  cmd = @command
  return Trollop::options do
    OPTIONSCONFIG.each do |opt|
      k = opt[0]
      d = opt[1]
      next if d[:only] and not d[:only].include? cmd.to_sym
      desc = d.delete(:desc)
      opt k, desc, d
    end
  end
end

#custom(*args) ⇒ Object

Handle custom events specially



283
284
285
286
287
288
289
290
291
292
293
# File 'lib/marc2solr.rb', line 283

def custom (*args)
  if args.size == 2 # called in a config file
    @config[args[0]] = args[1]
  else # parse it out
    args[0].each do |str|
      key,val = str.split(/\s*=\s*/)
      val.gsub!(/^["']*(.*?)['"]$/, '\1')
      @config[key] = val
    end
  end
end

#masterLoggerObject



407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
# File 'lib/marc2solr.rb', line 407

def masterLogger
  mlog = JLogger::RootLogger.new
  mlog.loglevel = @config[:loglevel].downcase.to_sym

  firstfile = self.rest[0] || self.command
  logfilename = File.basename(firstfile).gsub(/\..*$/, '') # remove the last extension
  logfilename += '-' +  Time.new.strftime('%Y%m%d-%H%M%S') + '.log'

  case @config[:logfile]
  when "STDERR"
    mlog.startConsole
  when "DEFAULT"
    mlog.startFile(logfilename)
  when 'NONE', nil
    mlog.stopConsole
    mlog.loglevel = :off
    # do nothing
  else
    mlog.startFile(@config[:logfile])
  end
  return mlog
end

#pretty_print(pp) ⇒ Object



278
279
280
# File 'lib/marc2solr.rb', line 278

def pretty_print(pp)
  pp.pp @config
end


240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/marc2solr.rb', line 240

def print_basic_help
  puts %Q{
  marc2solr: get MARC data into Solr
  
  USAGE
marc2solr index (index MARC records into Solr)
marc2solr delete (delete by ID from Solr)
marc2solr commit (send a 'commit' to a solr install)

  Use "marc2solr <cmd> --help" for more help

}
  Process.exit
end


255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/marc2solr.rb', line 255

def print_command_help cmd
  ARGV.unshift '--help'
  Trollop::options do
    puts "\n\n" + HELPTEXT[cmd] + "\n\n"
    puts "You may specify multiple configuration files and they will be loaded in"
    puts "the order given."
    puts ""
    puts "Command line arguments always override configuration file settings\n\n"
    
    OPTIONSCONFIG.each do |opt|
      k = opt[0]
      d = opt[1]
      next if d[:only] and not d[:only].include? cmd.to_sym
      desc = d.delete(:desc)
      opt k, desc, d
    end
  end
  print "\n\n"
  Process.exit
  
end

#reader(filename) ⇒ Object



431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# File 'lib/marc2solr.rb', line 431

def reader filename
  configuredType = @config[:marctype].downcase.to_sym
  encoding = @config[:encoding].downcase.to_sym
  
  if encoding == :bestguess
    encoding = nil
  end
  
  gzipped = false
  if configuredType == :bestguess
    if filename =~ /\.(.+)$/ # if there's an extension
      ext = File.basename(filename).split(/\./)[-1].downcase
      if ext == 'gz'
        ext  = File.basename(filename).split(/\./)[-2].downcase
        gzipped = true
      end          
      
      log.info "Sniffed marc file type as {}", ext
      case ext
      when /xml/, /marcxml/
        type = :marcxml
      when /seq/, /aleph/
        type = :alephsequential
      else
        type = :permissivemarc
      end
    else
      type = :permissivemarc
    end
  else
    type = configuredType
  end

  source = filename
  if source == "STDIN"
    source = STDIN
  end

  if gzipped or @config[:gzipped]
    source = Java::java.util.zip.GZIPInputStream.new(IOConvert.byteinstream(source))
  end
  
  return MARC4J4R::Reader.new(source, type, encoding)
end

#sussObject



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/marc2solr.rb', line 390

def suss
  url = self.sussURL
  log.info "Set suss url to {}", url
  if @config[:sussthreads] > 1
    log.info "Using {} threads for the suss", @config[:sussthreads]
  else
    log.info "Using a single thread for the suss"
  end

  suss = StreamingUpdateSolrServer.new(url,@config[:susssize],@config[:sussthreads])
  if self[:javabin]
    suss.setRequestWriter Java::org.apache.solr.client.solrj.impl.BinaryRequestWriter.new
    log.debug "Using javabin"
  end
  return suss
end

#sussURLObject

Create a SUSS from the given arguments



366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/marc2solr.rb', line 366

def sussURL
  machine = self[:machine] 
  unless machine 
    log.error  "Need solr machine name (--machine)"
    raise ArgumentError, "Need solr machine name (--machine)"
  end
  
  port = self[:port] 
  unless port
    log.error "Need solr port (--port)"
    raise ArgumentError, "Need solr port (--port)"
  end
  
  path = self[:solrpath]
  unless path
    log.error "Need solr path (--solrpath)"
    raise ArgumentError, "Need solr path (--solrpath)"
  end
  path.gsub! /^\/*(.*?)\/*$/, '\1' # remove any leading/trailing slashes
  path.squeeze! '/'     # make sure there are no double-slashses
  
  url = 'http://' + machine + ':' + port.to_s + '/' + path
end