Class: GoodData::Data::Guesser

Inherits:
Object
  • Object
show all
Defined in:
lib/gooddata/data/guesser.rb

Overview

Utility class to guess data types of a data stream by looking at first couple of rows

Constant Summary collapse

TYPES_PRIORITY =
[:connection_point, :fact, :date, :attribute]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(reader) ⇒ Guesser

Returns a new instance of Guesser.



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/gooddata/data/guesser.rb', line 29

def initialize(reader)
  @reader = reader
  @headers = reader.shift.map!(&:to_s) || fail('Empty data set')
  @pros = {}
  @cons = {}
  @seen = {}

  @headers.map do |h|
    @cons[h.to_s] = {}
    @pros[h.to_s] = {}
    @seen[h.to_s] = {}
  end
end

Instance Attribute Details

#headersObject (readonly)

Returns the value of attribute headers.



19
20
21
# File 'lib/gooddata/data/guesser.rb', line 19

def headers
  @headers
end

Class Method Details

.sort_types(types) ⇒ Object



22
23
24
25
26
# File 'lib/gooddata/data/guesser.rb', line 22

def sort_types(types)
  types.sort do |x, y|
    TYPES_PRIORITY.index(x) <=> TYPES_PRIORITY.index(y)
  end
end

Instance Method Details

#guess(limit) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/gooddata/data/guesser.rb', line 43

def guess(limit)
  count = 0
  while (row = @reader.shift)
    break unless row && !row.empty? && count < limit
    fail '%i fields in row %i, %i expected' % [row.size, count + 1, @headers.size] if row.size != @headers.size
    row.each_with_index do |value, j|
      header = @headers[j]
      number = check_number(header, value)
      date = check_date(header, value)
      store_guess header, @pros => :attribute unless number || date
      hash_increment @seen[header], value
    end
    count += 1
  end
  # fields with unique values are connection point candidates
  @seen.each do |header, values|
    store_guess header, @pros => :connection_point if values.size == count
  end
  guess_result
end