Class: DataKit::CSV::SchemaAnalysis
- Inherits:
-
Object
- Object
- DataKit::CSV::SchemaAnalysis
- Defined in:
- lib/data_kit/csv/schema_analysis.rb
Instance Attribute Summary collapse
-
#fields ⇒ Object
readonly
Returns the value of attribute fields.
-
#row_count ⇒ Object
readonly
Returns the value of attribute row_count.
-
#sample_count ⇒ Object
readonly
Returns the value of attribute sample_count.
-
#types ⇒ Object
readonly
Returns the value of attribute types.
Instance Method Summary collapse
- #field_types ⇒ Object
- #has_only_numeric_types?(field) ⇒ Boolean
- #has_single_type?(field) ⇒ Boolean
- #increment_sample ⇒ Object
- #increment_total ⇒ Object
-
#initialize(fields) ⇒ SchemaAnalysis
constructor
A new instance of SchemaAnalysis.
- #insert(field_name, value) ⇒ Object
- #type?(field) ⇒ Boolean
- #type_count(field, type) ⇒ Object
- #type_list(field) ⇒ Object
Constructor Details
#initialize(fields) ⇒ SchemaAnalysis
Returns a new instance of SchemaAnalysis.
9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 9 def initialize(fields) @fields, @types = fields, {} @row_count, @sample_count = 0, 0 fields.each do |field_name| @types[field_name] = {} Dataset::Field::Types.each do |type| @types[field_name][type] = 0 end end end |
Instance Attribute Details
#fields ⇒ Object (readonly)
Returns the value of attribute fields.
4 5 6 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 4 def fields @fields end |
#row_count ⇒ Object (readonly)
Returns the value of attribute row_count.
6 7 8 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 6 def row_count @row_count end |
#sample_count ⇒ Object (readonly)
Returns the value of attribute sample_count.
7 8 9 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 7 def sample_count @sample_count end |
#types ⇒ Object (readonly)
Returns the value of attribute types.
5 6 7 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 5 def types @types end |
Instance Method Details
#field_types ⇒ Object
33 34 35 36 37 38 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 33 def field_types fields.inject({}) do |result, field_name| result[field_name] = type?(field_name) result end end |
#has_only_numeric_types?(field) ⇒ Boolean
64 65 66 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 64 def has_only_numeric_types?(field) (type_list(field) - [:integer, :number, :null]).length == 0 end |
#has_single_type?(field) ⇒ Boolean
60 61 62 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 60 def has_single_type?(field) type_list(field).length == 1 end |
#increment_sample ⇒ Object
25 26 27 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 25 def increment_sample @sample_count += 1 end |
#increment_total ⇒ Object
21 22 23 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 21 def increment_total @row_count += 1 end |
#insert(field_name, value) ⇒ Object
29 30 31 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 29 def insert(field_name, value) @types[field_name][Dataset::Field.type?(value)] += 1 end |
#type?(field) ⇒ Boolean
40 41 42 43 44 45 46 47 48 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 40 def type?(field) if has_single_type?(field) type_list(field).first elsif has_only_numeric_types?(field) :number else :string end end |
#type_count(field, type) ⇒ Object
50 51 52 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 50 def type_count(field, type) types[field][type] || 0 end |
#type_list(field) ⇒ Object
54 55 56 57 58 |
# File 'lib/data_kit/csv/schema_analysis.rb', line 54 def type_list(field) types[field].keys.select do |type| type_count(field, type) > 0 end end |