Class: FlatKit::FieldStats
- Inherits:
-
Object
- Object
- FlatKit::FieldStats
- Defined in:
- lib/flat_kit/field_stats.rb
Overview
Collect stats on a single field. We may not know what the field data type is to start with, so collect a bunch of values until we have the threshold, and then calculte states based upon the data types determined by the guess
Constant Summary collapse
- DEFAULT_GUESS_THRESHOLD =
1000- CORE_STATS =
:core- CARDINALITY_STATS =
:cardinality- ALL_STATS =
[ CORE_STATS, CARDINALITY_STATS ]
- EXPORT_FIELDS =
%w[ name type count max mean min stddev sum mode unique_count max_length mean_length min_length stddev_length mode_length unique_count_lengths null_count unknown_count out_of_type_count total_count null_percent unknown_percent ]
Instance Attribute Summary collapse
-
#field_type ⇒ Object
readonly
Returns the value of attribute field_type.
-
#name ⇒ Object
readonly
Returns the value of attribute name.
-
#type_counts ⇒ Object
readonly
Returns the value of attribute type_counts.
Instance Method Summary collapse
- #collecting_frequencies? ⇒ Boolean
- #count ⇒ Object
- #field_type_determined? ⇒ Boolean
- #frequencies ⇒ Object
-
#initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) ⇒ FieldStats
constructor
A new instance of FieldStats.
- #length_frequencies ⇒ Object
- #max ⇒ Object
- #max_length ⇒ Object
- #mean ⇒ Object
- #mean_length ⇒ Object
- #min ⇒ Object
- #min_length ⇒ Object
- #mode ⇒ Object
- #mode_length ⇒ Object
- #null_count ⇒ Object
- #null_percent ⇒ Object
- #out_of_type_count ⇒ Object
- #stddev ⇒ Object
- #stddev_length ⇒ Object
- #sum ⇒ Object
- #to_hash ⇒ Object
- #total_count ⇒ Object
- #type ⇒ Object
- #unique_count ⇒ Object
- #unique_count_lengths ⇒ Object
- #unique_values ⇒ Object
- #unique_values_lengths ⇒ Object
- #unknown_count ⇒ Object
- #unknown_percent ⇒ Object
- #update(value) ⇒ Object
Constructor Details
#initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) ⇒ FieldStats
Returns a new instance of FieldStats.
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/flat_kit/field_stats.rb', line 46 def initialize(name:, stats_to_collect: CORE_STATS, type: ::FlatKit::FieldType::GuessType, guess_threshold: DEFAULT_GUESS_THRESHOLD) @name = name @field_type = type @guess_threshold = guess_threshold @type_counts = Hash.new(0) @out_of_type_count = 0 @values = [] @stats = nil @length_stats = nil @stats_to_collect = [stats_to_collect].flatten @stats_to_collect.each do |collection_set| next if ALL_STATS.include?(collection_set) raise ArgumentError, "#{collection_set} is not a valid stats collection set, must be one of #{ALL_STATS.map { |s| s.to_s }.join(", ") }" end raise ArgumentError, "type: must be FieldType subclasses - not #{type}" unless type.kind_of?(Class) && (type.superclass == ::FlatKit::FieldType) end |
Instance Attribute Details
#field_type ⇒ Object (readonly)
Returns the value of attribute field_type.
43 44 45 |
# File 'lib/flat_kit/field_stats.rb', line 43 def field_type @field_type end |
#name ⇒ Object (readonly)
Returns the value of attribute name.
44 45 46 |
# File 'lib/flat_kit/field_stats.rb', line 44 def name @name end |
#type_counts ⇒ Object (readonly)
Returns the value of attribute type_counts.
42 43 44 |
# File 'lib/flat_kit/field_stats.rb', line 42 def type_counts @type_counts end |
Instance Method Details
#collecting_frequencies? ⇒ Boolean
82 83 84 |
# File 'lib/flat_kit/field_stats.rb', line 82 def collecting_frequencies? @stats_to_collect.include?(CARDINALITY_STATS) end |
#count ⇒ Object
90 91 92 |
# File 'lib/flat_kit/field_stats.rb', line 90 def count stats.count end |
#field_type_determined? ⇒ Boolean
64 65 66 |
# File 'lib/flat_kit/field_stats.rb', line 64 def field_type_determined? @field_type != ::FlatKit::FieldType::GuessType end |
#frequencies ⇒ Object
126 127 128 |
# File 'lib/flat_kit/field_stats.rb', line 126 def frequencies stats.frequencies if collecting_frequencies? end |
#length_frequencies ⇒ Object
158 159 160 |
# File 'lib/flat_kit/field_stats.rb', line 158 def length_frequencies length_stats.frequencies if @length_stats && collecting_frequencies? end |
#max ⇒ Object
94 95 96 |
# File 'lib/flat_kit/field_stats.rb', line 94 def max stats.max if stats.respond_to?(:max) end |
#max_length ⇒ Object
134 135 136 |
# File 'lib/flat_kit/field_stats.rb', line 134 def max_length length_stats.max if @length_stats end |
#mean ⇒ Object
98 99 100 |
# File 'lib/flat_kit/field_stats.rb', line 98 def mean stats.mean if stats.respond_to?(:mean) end |
#mean_length ⇒ Object
138 139 140 |
# File 'lib/flat_kit/field_stats.rb', line 138 def mean_length length_stats.mean if @length_stats end |
#min ⇒ Object
102 103 104 |
# File 'lib/flat_kit/field_stats.rb', line 102 def min stats.min if stats.respond_to?(:min) end |
#min_length ⇒ Object
130 131 132 |
# File 'lib/flat_kit/field_stats.rb', line 130 def min_length length_stats.min if @length_stats end |
#mode ⇒ Object
114 115 116 |
# File 'lib/flat_kit/field_stats.rb', line 114 def mode stats.mode if collecting_frequencies? end |
#mode_length ⇒ Object
146 147 148 |
# File 'lib/flat_kit/field_stats.rb', line 146 def mode_length length_stats.mode if @length_stats && collecting_frequencies? end |
#null_count ⇒ Object
162 163 164 |
# File 'lib/flat_kit/field_stats.rb', line 162 def null_count type_counts[FieldType::NullType] end |
#null_percent ⇒ Object
174 175 176 177 |
# File 'lib/flat_kit/field_stats.rb', line 174 def null_percent return 0 if total_count.zero? ((null_count.to_f / total_count) * 100.0).truncate(2) end |
#out_of_type_count ⇒ Object
170 171 172 |
# File 'lib/flat_kit/field_stats.rb', line 170 def out_of_type_count @out_of_type_count end |
#stddev ⇒ Object
106 107 108 |
# File 'lib/flat_kit/field_stats.rb', line 106 def stddev stats.stddev if stats.respond_to?(:stddev) end |
#stddev_length ⇒ Object
142 143 144 |
# File 'lib/flat_kit/field_stats.rb', line 142 def stddev_length length_stats.stddev if @length_stats end |
#sum ⇒ Object
110 111 112 |
# File 'lib/flat_kit/field_stats.rb', line 110 def sum stats.sum if stats.respond_to?(:sum) end |
#to_hash ⇒ Object
188 189 190 191 192 193 194 195 196 |
# File 'lib/flat_kit/field_stats.rb', line 188 def to_hash resolve_guess Hash.new.tap do |h| EXPORT_FIELDS.each do |n| h[n] = self.send(n) end end end |
#total_count ⇒ Object
166 167 168 |
# File 'lib/flat_kit/field_stats.rb', line 166 def total_count stats.count + @out_of_type_count end |
#type ⇒ Object
86 87 88 |
# File 'lib/flat_kit/field_stats.rb', line 86 def type @field_type.type_name end |
#unique_count ⇒ Object
118 119 120 |
# File 'lib/flat_kit/field_stats.rb', line 118 def unique_count stats.unique_count if collecting_frequencies? end |
#unique_count_lengths ⇒ Object
150 151 152 |
# File 'lib/flat_kit/field_stats.rb', line 150 def unique_count_lengths length_stats.unique_count if @length_stats && collecting_frequencies? end |
#unique_values ⇒ Object
122 123 124 |
# File 'lib/flat_kit/field_stats.rb', line 122 def unique_values stats.unique_values if collecting_frequencies? end |
#unique_values_lengths ⇒ Object
154 155 156 |
# File 'lib/flat_kit/field_stats.rb', line 154 def unique_values_lengths length_stats.unique_values if @length_stats && collecting_frequencies? end |
#unknown_count ⇒ Object
179 180 181 |
# File 'lib/flat_kit/field_stats.rb', line 179 def unknown_count type_counts[FieldType::UnknownType] end |
#unknown_percent ⇒ Object
183 184 185 186 |
# File 'lib/flat_kit/field_stats.rb', line 183 def unknown_percent return 0 if total_count.zero? ((unknown_count.to_f / total_count) * 100.0).truncate(2) end |
#update(value) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/flat_kit/field_stats.rb', line 68 def update(value) update_type_count(value) if field_type_determined? then update_stats(value) else @values << value if @values.size >= @guess_threshold then resolve_guess end end end |