Class: DataKit::CSV::SchemaAnalysis
- Inherits:
-
Object
- Object
- DataKit::CSV::SchemaAnalysis
- Defined in:
- lib/data_kit/csv/schema_analysis.rb
Instance Attribute Summary collapse
-
#fields ⇒ Object
readonly
Returns the value of attribute fields.
-
#row_count ⇒ Object
readonly
Returns the value of attribute row_count.
-
#sample_count ⇒ Object
readonly
Returns the value of attribute sample_count.
-
#type_hints ⇒ Object
readonly
Returns the value of attribute type_hints.
-
#types ⇒ Object
readonly
Returns the value of attribute types.
-
#use_type_hints ⇒ Object
readonly
Returns the value of attribute use_type_hints.
Instance Method Summary collapse
- #field_types ⇒ Object
- #has_only_numeric_types?(field) ⇒ Boolean
- #has_single_type?(field) ⇒ Boolean
- #increment_sample ⇒ Object
- #increment_total ⇒ Object
-
#initialize(fields, options = {}) ⇒ SchemaAnalysis
constructor
A new instance of SchemaAnalysis.
- #insert(field_name, value) ⇒ Object
- #type?(field) ⇒ Boolean
- #type_count(field, type) ⇒ Object
- #type_list(field) ⇒ Object
Constructor Details
#initialize(fields, options = {}) ⇒ SchemaAnalysis
Returns a new instance of SchemaAnalysis.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 12 def initialize(fields, = {}) @fields, @types = fields, {} @row_count, @sample_count = 0, 0 @type_hints = {} if [:use_type_hints].nil? || [:use_type_hints] == true @use_type_hints = true else @use_type_hints = false end fields.each do |field_name| @types[field_name] = {} @type_hints[field_name] = :string Dataset::Field::Types.each do |type| @types[field_name][type] = 0 end end end |
Instance Attribute Details
#fields ⇒ Object (readonly)
Returns the value of attribute fields.
4 5 6 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 4 def fields @fields end |
#row_count ⇒ Object (readonly)
Returns the value of attribute row_count.
6 7 8 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 6 def row_count @row_count end |
#sample_count ⇒ Object (readonly)
Returns the value of attribute sample_count.
7 8 9 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 7 def sample_count @sample_count end |
#type_hints ⇒ Object (readonly)
Returns the value of attribute type_hints.
9 10 11 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 9 def type_hints @type_hints end |
#types ⇒ Object (readonly)
Returns the value of attribute types.
5 6 7 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 5 def types @types end |
#use_type_hints ⇒ Object (readonly)
Returns the value of attribute use_type_hints.
10 11 12 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 10 def use_type_hints @use_type_hints end |
Instance Method Details
#field_types ⇒ Object
52 53 54 55 56 57 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 52 def field_types fields.inject({}) do |result, field_name| result[field_name] = type?(field_name) result end end |
#has_only_numeric_types?(field) ⇒ Boolean
83 84 85 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 83 def has_only_numeric_types?(field) (type_list(field) - [:integer, :number, :null]).length == 0 end |
#has_single_type?(field) ⇒ Boolean
79 80 81 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 79 def has_single_type?(field) (type_list(field) - [:null]).length == 1 end |
#increment_sample ⇒ Object
37 38 39 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 37 def increment_sample @sample_count += 1 end |
#increment_total ⇒ Object
33 34 35 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 33 def increment_total @row_count += 1 end |
#insert(field_name, value) ⇒ Object
41 42 43 44 45 46 47 48 49 50 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 41 def insert(field_name, value) if use_type_hints type = Dataset::Field.type?(value, type_hints[field_name]) @type_hints[field_name] = type # cache the most recent type else type = Dataset::Field.type?(value) end @types[field_name][type] += 1 end |
#type?(field) ⇒ Boolean
59 60 61 62 63 64 65 66 67 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 59 def type?(field) if has_single_type?(field) type_list(field).first elsif has_only_numeric_types?(field) :number else :string end end |
#type_count(field, type) ⇒ Object
69 70 71 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 69 def type_count(field, type) types[field][type] || 0 end |
#type_list(field) ⇒ Object
73 74 75 76 77 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 73 def type_list(field) types[field].keys.select do |type| type_count(field, type) > 0 end end |