Class: JsonTableSchema::Infer

Inherits:
Object
  • Object
show all
Includes:
Helpers
Defined in:
lib/jsontableschema/infer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Helpers

#convert_to_boolean, #false_values, #get_class_for_type, #true_values, #type_class_lookup

Constructor Details

#initialize(headers, rows, opts = {}) ⇒ Infer

Returns a new instance of Infer.



8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/jsontableschema/infer.rb', line 8

def initialize(headers, rows, opts = {})
  @headers = headers
  @rows = rows
  @explicit = opts[:explicit]
  @primary_key = opts[:primary_key]
  @row_limit = opts[:row_limit]

  @schema = {
    'fields' => fields
  }
  @schema['primaryKey'] = @primary_key if @primary_key
  infer!
end

Instance Attribute Details

#schemaObject (readonly)

Returns the value of attribute schema.



6
7
8
# File 'lib/jsontableschema/infer.rb', line 6

def schema
  @schema
end

Instance Method Details

#available_typesObject



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/jsontableschema/infer.rb', line 126

def available_types
  [
    'any',
    'string',
    'boolean',
    'number',
    'integer',
    'null',
    'date',
    'time',
    'datetime',
    'array',
    'object',
    'geopoint',
    'geojson'
  ]
end

#fieldsObject



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/jsontableschema/infer.rb', line 22

def fields
  @headers.map do |header|
    descriptor = {
      'name' => header,
      'title' => '',
      'description' => '',
    }

    constraints = {}
    constraints['required'] = @explicit === true
    constraints['unique'] = (header == @primary_key)
    constraints.delete_if { |k,v| v == false } unless @explicit === true
    descriptor['constraints'] = constraints if constraints.count > 0
    descriptor
  end
end

#guess_format(converter, col) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/jsontableschema/infer.rb', line 88

def guess_format(converter, col)
  guessed_format = 'default'
  converter.class.instance_methods.grep(/cast_/).each do |method|
    begin
      format = method.to_s
      format.slice!('cast_')
      next if format == 'default'
      converter.send(method, col)
      guessed_format = format
      break
    rescue JsonTableSchema::Exception
    end
  end
  guessed_format
end

#guess_type(col, index) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/jsontableschema/infer.rb', line 66

def guess_type(col, index)
  guessed_type = 'string'
  guessed_format = 'default'

  unless col.nil? || col == ""
    available_types.reverse_each do |type|
      klass = get_class_for_type(type)
      converter = Kernel.const_get(klass).new(@schema['fields'][index])
      if converter.test(col) === true
        guessed_type = type
        guessed_format = guess_format(converter, col)
        break
      end
    end
  end

  {
    'type' => guessed_type,
    'format' => guessed_format
  }
end

#infer!Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/jsontableschema/infer.rb', line 39

def infer!
  type_matches = []
  @rows.each_with_index do |row, i|
    break if @row_limit && i > @row_limit
    row = row.fields if row.class == CSV::Row

    row_length = row.count
    headers_length = @headers.count

    if row_length > headers_length
      row = row[0..headers_length]
    elsif row_length < headers_length
      diff = headers_length - row_length
      fill = [''] * diff
      row = row.push(fill).flatten
    end

    row.each_with_index do |col, i|
      type_matches[i] ||= []
      type_matches[i] << guess_type(col, i)
    end

  end
  resolve_types(type_matches)
  @schema = JsonTableSchema::Schema.new(@schema)
end

#resolve_types(results) ⇒ Object



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/jsontableschema/infer.rb', line 104

def resolve_types(results)
  results.each_with_index do |result,v|
    result.uniq!

    if result.count == 1
      rv = result[0]
    else
      counts = {}
      result.each do |r|
        counts[r] ||= 0
        counts[r] += 1
      end

      sorted_counts = counts.sort_by {|_key, value| value}
      rv = sorted_counts[0][0]
    end

    @schema['fields'][v].merge!(rv)
  end

end