Class: DataKit::CSV::SchemaAnalysis

Inherits:
Object
  • Object
show all
Defined in:
lib/data_kit/csv/schema_analysis.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fields) ⇒ SchemaAnalysis

Returns a new instance of SchemaAnalysis.



9
10
11
12
13
14
15
16
17
18
19
# File 'lib/data_kit/csv/schema_analysis.rb', line 9

def initialize(fields)
  @fields, @types = fields, {}
  @row_count, @sample_count = 0, 0

  fields.each do |field_name|
    @types[field_name] = {}
    Dataset::Field::Types.each do |type|
      @types[field_name][type] = 0
    end
  end
end

Instance Attribute Details

#fieldsObject (readonly)

Returns the value of attribute fields.



4
5
6
# File 'lib/data_kit/csv/schema_analysis.rb', line 4

def fields
  @fields
end

#row_countObject (readonly)

Returns the value of attribute row_count.



6
7
8
# File 'lib/data_kit/csv/schema_analysis.rb', line 6

def row_count
  @row_count
end

#sample_countObject (readonly)

Returns the value of attribute sample_count.



7
8
9
# File 'lib/data_kit/csv/schema_analysis.rb', line 7

def sample_count
  @sample_count
end

#typesObject (readonly)

Returns the value of attribute types.



5
6
7
# File 'lib/data_kit/csv/schema_analysis.rb', line 5

def types
  @types
end

Instance Method Details

#field_typesObject



33
34
35
36
37
38
# File 'lib/data_kit/csv/schema_analysis.rb', line 33

def field_types
  fields.inject({}) do |result, field_name|
    result[field_name] = type?(field_name)
    result
  end
end

#has_only_numeric_types?(field) ⇒ Boolean

Returns:

  • (Boolean)


64
65
66
# File 'lib/data_kit/csv/schema_analysis.rb', line 64

def has_only_numeric_types?(field)
  (type_list(field) - [:integer, :number, :null]).length == 0
end

#has_single_type?(field) ⇒ Boolean

Returns:

  • (Boolean)


60
61
62
# File 'lib/data_kit/csv/schema_analysis.rb', line 60

def has_single_type?(field)
  type_list(field).length == 1
end

#increment_sampleObject



25
26
27
# File 'lib/data_kit/csv/schema_analysis.rb', line 25

def increment_sample
  @sample_count += 1
end

#increment_totalObject



21
22
23
# File 'lib/data_kit/csv/schema_analysis.rb', line 21

def increment_total
  @row_count += 1
end

#insert(field_name, value) ⇒ Object



29
30
31
# File 'lib/data_kit/csv/schema_analysis.rb', line 29

def insert(field_name, value)
  @types[field_name][Dataset::Field.type?(value)] += 1
end

#type?(field) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
43
44
45
46
47
48
# File 'lib/data_kit/csv/schema_analysis.rb', line 40

def type?(field)
  if has_single_type?(field)
    type_list(field).first
  elsif has_only_numeric_types?(field)
    :number
  else
    :string
  end
end

#type_count(field, type) ⇒ Object



50
51
52
# File 'lib/data_kit/csv/schema_analysis.rb', line 50

def type_count(field, type)
  types[field][type] || 0
end

#type_list(field) ⇒ Object



54
55
56
57
58
# File 'lib/data_kit/csv/schema_analysis.rb', line 54

def type_list(field)
  types[field].keys.select do |type|
    type_count(field, type) > 0
  end
end