Class: TSV::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/scout/tsv/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double) ⇒ Parser

Returns a new instance of Parser.



343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/scout/tsv/parser.rb', line 343

def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
  if IO === file
    @stream = file
  else
    @stream = Open.open(file)
  end
  @fix = fix
  @source_options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
  @source_options[:filename] = file if Path.is_filename?(file)
  @source_options[:sep] = sep if @source_options[:sep].nil?
  @source_options.merge!(:key_field => @key_field, :fields => @fields)
  @type = @source_options[:type] || type
end

Instance Attribute Details

#fieldsObject

Returns the value of attribute fields.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def fields
  @fields
end

#first_lineObject

Returns the value of attribute first_line.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def first_line
  @first_line
end

#key_fieldObject

Returns the value of attribute key_field.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def key_field
  @key_field
end

#preambleObject

Returns the value of attribute preamble.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def preamble
  @preamble
end

#source_optionsObject

Returns the value of attribute source_options.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def source_options
  @source_options
end

#streamObject

Returns the value of attribute stream.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def stream
  @stream
end

#typeObject

Returns the value of attribute type.



342
343
344
# File 'lib/scout/tsv/parser.rb', line 342

def type
  @type
end

Instance Method Details

#all_fieldsObject



361
362
363
364
# File 'lib/scout/tsv/parser.rb', line 361

def all_fields
  return nil if @fields.nil?
  [@key_field] + @fields
end

#digest_strObject



453
454
455
# File 'lib/scout/tsv/parser.rb', line 453

def digest_str
  fingerprint
end

#fingerprintObject



449
450
451
# File 'lib/scout/tsv/parser.rb', line 449

def fingerprint
  "Parser:{" + Log.fingerprint(self.all_fields|| []) << "}"
end

#identify_field(name) ⇒ Object



374
375
376
# File 'lib/scout/tsv/parser.rb', line 374

def identify_field(name)
  TSV.identify_field(@key_field, @fields, name)
end

#inspectObject



457
458
459
# File 'lib/scout/tsv/parser.rb', line 457

def inspect
  fingerprint
end

#optionsObject



357
358
359
# File 'lib/scout/tsv/parser.rb', line 357

def options
  IndiferentHash.add_defaults @source_options.dup, type: type, key_field: key_field, fields: fields
end

#traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block) ⇒ Object



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# File 'lib/scout/tsv/parser.rb', line 378

def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil,  **kwargs, &block)
  kwargs[:type] ||=  self.source_options[:type] ||= @type
  kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]

  if fields
    if @fields
      all_field_names ||= [@key_field] + @fields
      fields = all_field_names if fields == :all
      positions = NamedArray.identify_name(all_field_names, fields)

      raise "Not all fields (#{Log.fingerprint fields}) identified in #{Log.fingerprint all_field_names}" if positions.include?(nil)
      kwargs[:positions] = positions
      field_names = all_field_names.values_at *positions
    elsif fields.reject{|f| Numeric === f}.empty?
      positions = fields
      kwargs[:positions] = positions
    else
      raise "Non-numeric fields specified, but no field names available"
    end
  else
    field_names = @fields
  end

  kwargs[:positions] = nil if @type == :flat

  if key_field
    if @fields
      all_field_names ||= [@key_field] + @fields
      key = NamedArray.identify_name(all_field_names, key_field)
      kwargs[:key] = key == :key ? 0 : key
      key_field_name = (key.nil? || key == :key) ? @key_field : all_field_names[key]
      if fields.nil?
        field_names = all_field_names - [key_field_name]
      end
    else
      kwargs[:key] = key_field == :key ? 0 : key_field
      key = key_field
    end
  else
    key_field_name = @key_field
  end

  if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
    field_names = field_names.slice(0,1)
  end

  @source_options.each do |option,value|
    option = option.to_sym
    next unless KEY_PARAMETERS.include? option
    kwargs[option] = value unless kwargs.include?(option)
  end

  kwargs[:source_type] = @source_options[:type]
  kwargs[:data] = false if kwargs[:data].nil?

  if kwargs[:tsv_grep]
    data = with_stream do |stream|
      grep_stream = Open.grep(stream, kwargs.delete(:tsv_grep), kwargs.delete(:tsv_invert_grep))
      TSV.parse_stream(grep_stream, first_line: nil, fix: @fix, field_names: @fields, **kwargs, &block)
    end
  else
    data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
  end

  if data
    TSV.setup(data, @source_options.merge(:key_field => key_field_name, :fields => field_names, :type => @type))
  else
    [key_field || self.key_field, fields || self.fields]
  end
end

#with_stream {|sout| ... } ⇒ Object

Yields:

  • (sout)


461
462
463
464
465
466
467
# File 'lib/scout/tsv/parser.rb', line 461

def with_stream
  sout = Open.open_pipe do |sin|
    sin.puts @first_line
    Open.consume_stream(@stream, false, sin)
  end
  yield sout
end