Class: DataKit::CSV::FieldAnalyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/data_kit/csv/field_analyzer.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(csv, field_pos, options = {}) ⇒ FieldAnalyzer

Returns a new instance of FieldAnalyzer.



9
10
11
12
13
14
# File 'lib/data_kit/csv/field_analyzer.rb', line 9

def initialize(csv, field_pos, options = {})
  @csv = csv
  @field_pos = field_pos
  @match_type = options[:match_type] || :any
  @sampling_rate = options[:sampling_rate] || 0.1
end

Instance Attribute Details

#csvObject

Returns the value of attribute csv.



4
5
6
# File 'lib/data_kit/csv/field_analyzer.rb', line 4

def csv
  @csv
end

#field_posObject

Returns the value of attribute field_pos.



5
6
7
# File 'lib/data_kit/csv/field_analyzer.rb', line 5

def field_pos
  @field_pos
end

#match_typeObject

Returns the value of attribute match_type.



6
7
8
# File 'lib/data_kit/csv/field_analyzer.rb', line 6

def match_type
  @match_type
end

#sampling_rateObject

Returns the value of attribute sampling_rate.



7
8
9
# File 'lib/data_kit/csv/field_analyzer.rb', line 7

def sampling_rate
  @sampling_rate
end

Class Method Details

.analyze(csv, field_pos, options = {}) ⇒ Object



39
40
41
# File 'lib/data_kit/csv/field_analyzer.rb', line 39

def analyze(csv, field_pos, options = {})
  new(csv, field_pos, options).execute
end

Instance Method Details

#executeObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/data_kit/csv/field_analyzer.rb', line 16

def execute
  first = true
  analysis = nil
  random = Random.new

  csv.each_row do |row|
    if first
      first = false
      field_name = csv.headers[field_pos]
      analysis = FieldAnalysis.new(field_name, { :match_type => match_type })
    end

    analysis.increment_total
    if random.rand <= sampling_rate
      analysis.increment_sample
      analysis.insert(row[field_pos])
    end
  end

  analysis
end