Class: DataKit::CSV::FieldAnalyzer
- Inherits:
-
Object
- Object
- DataKit::CSV::FieldAnalyzer
- Defined in:
- lib/data_kit/csv/field_analyzer.rb
Instance Attribute Summary collapse
-
#csv ⇒ Object
Returns the value of attribute csv.
-
#field_pos ⇒ Object
Returns the value of attribute field_pos.
-
#match_type ⇒ Object
Returns the value of attribute match_type.
-
#sampling_rate ⇒ Object
Returns the value of attribute sampling_rate.
Class Method Summary collapse
Instance Method Summary collapse
- #execute ⇒ Object
-
#initialize(csv, field_pos, options = {}) ⇒ FieldAnalyzer
constructor
A new instance of FieldAnalyzer.
Constructor Details
#initialize(csv, field_pos, options = {}) ⇒ FieldAnalyzer
Returns a new instance of FieldAnalyzer.
9 10 11 12 13 14 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 9 def initialize(csv, field_pos, = {}) @csv = csv @field_pos = field_pos @match_type = [:match_type] || :any @sampling_rate = [:sampling_rate] || 0.1 end |
Instance Attribute Details
#csv ⇒ Object
Returns the value of attribute csv.
4 5 6 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 4 def csv @csv end |
#field_pos ⇒ Object
Returns the value of attribute field_pos.
5 6 7 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 5 def field_pos @field_pos end |
#match_type ⇒ Object
Returns the value of attribute match_type.
6 7 8 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 6 def match_type @match_type end |
#sampling_rate ⇒ Object
Returns the value of attribute sampling_rate.
7 8 9 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 7 def sampling_rate @sampling_rate end |
Class Method Details
.analyze(csv, field_pos, options = {}) ⇒ Object
39 40 41 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 39 def analyze(csv, field_pos, = {}) new(csv, field_pos, ).execute end |
Instance Method Details
#execute ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/data_kit/csv/field_analyzer.rb', line 16 def execute first = true analysis = nil random = Random.new csv.each_row do |row| if first first = false field_name = csv.headers[field_pos] analysis = FieldAnalysis.new(field_name, { :match_type => match_type }) end analysis.increment_total if random.rand <= sampling_rate analysis.increment_sample analysis.insert(row[field_pos]) end end analysis end |