Class: DataForge::Transformation::Deduplication

Inherits:
TransformationBase show all
Defined in:
lib/data_forge/transformation/deduplication.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(reader, writer, unique_fields) ⇒ Deduplication

Returns a new instance of Deduplication.



17
18
19
20
# File 'lib/data_forge/transformation/deduplication.rb', line 17

def initialize(reader, writer, unique_fields)
  @reader, @writer, @unique_fields = reader, writer, unique_fields
  @fingerprints = Set.new
end

Class Method Details

.from_input(source_name, options = {}) ⇒ Object



6
7
8
9
10
11
12
# File 'lib/data_forge/transformation/deduplication.rb', line 6

def from_input(source_name, options = {})
  reader = File.reader_for source_name
  writer = File.writer_for(options.fetch :into, source_name)
  unique_fields = Array(options.fetch :using, reader.fields)

  new reader, writer, unique_fields
end

Instance Method Details

#executeObject



24
25
26
27
28
29
30
31
32
33
34
# File 'lib/data_forge/transformation/deduplication.rb', line 24

def execute
  with_writer @writer do |writer|
    @reader.each_record do |record|
      fingerprint = @unique_fields.map { |field_name| record[field_name] }
      unless @fingerprints.include? fingerprint
        @fingerprints.add fingerprint
        writer.write record
      end
    end
  end
end