Class: DataPackage::SimpleDataFormatValidator

Inherits:
DataPackageValidator show all
Defined in:
lib/datapackage/validator.rb

Overview

Validator that checks whether a package conforms to the Simple Data Format profile

Instance Attribute Summary

Attributes inherited from Validator

#messages

Instance Method Summary collapse

Methods inherited from DataPackageValidator

#validate_metadata

Methods inherited from Validator

create, #valid?, #validate

Constructor Details

#initialize(schema_name = :datapackage, opts = {}) ⇒ SimpleDataFormatValidator

Returns a new instance of SimpleDataFormatValidator.



125
126
127
128
129
# File 'lib/datapackage/validator.rb', line 125

def initialize(schema_name=:datapackage, opts={})
    super(:datapackage, opts)
    @jsontable_schema = load_schema(:jsontable)
    @csvddf_schema = load_schema("csvddf-dialect")
end

Instance Method Details

#csv?(resource) ⇒ Boolean

Returns:

  • (Boolean)


175
176
177
178
# File 'lib/datapackage/validator.rb', line 175

def csv?(resource)
    resource["mediatype"] == "text/csv" ||
    resource["format"] == "csv"       
end

#dialect_to_csv_options(dialect) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
# File 'lib/datapackage/validator.rb', line 195

def dialect_to_csv_options(dialect)
    return {} unless dialect
    #supplying defaults here just in case the dialect is invalid
    delimiter = dialect["delimiter"] || ","
    delimiter = delimiter + " " if !dialect["skipinitialspace"]  
    return {
        :col_sep => delimiter,
        :row_sep => ( dialect["lineterminator"] || :auto ),
        :quote_char => ( dialect["quotechar"] || '"')
    }
end

#headers(package, resource, path) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/datapackage/validator.rb', line 180

def headers(package, resource, path)
    headers = []
    #Using built-in CSV parser here as its more permissive than fastercsv
    #Lets us provide options to tweak the parsing                
    opts = dialect_to_csv_options(resource["dialect"])
    begin
        CSV.open( package.resolve_resource(resource), "r", opts) do |csv|
            headers = csv.shift
        end
    rescue => e
        add_error( :integrity, "Parse error for #{package.resolve_resource(resource)}: #{e}", path)
    end
    return headers
end

#validate_resource(package, resource, path) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/datapackage/validator.rb', line 131

def validate_resource(package, resource, path)
    super(package, resource, path)
    
    if !csv?(resource)
        add_error(:integrity, "#{resource["name"]} is not a CSV file", path )
    else  
        schema = resource["schema"]              
        if !schema
            add_error(:metadata, "#{resource["name"]} does not have a schema", path )
        else
            messages = JSON::Validator.fully_validate(@jsontable_schema, schema, :errors_as_objects => true)
            @messages[:errors] += adjust_messages(messages, :metadata, path + "/schema/")                                                                                   
        end  
                  
        if resource["dialect"]
            messages = JSON::Validator.fully_validate(@csvddf_schema, resource["dialect"], :errors_as_objects => true)
            @messages[:errors] += adjust_messages(messages, :metadata, path + "/dialect")
        end
        
        if package.resource_exists?( package.resolve_resource( resource ) )
            if resource["schema"] && resource["schema"]["fields"]
                fields = resource["schema"]["fields"]
                declared_fields = fields.map{ |f| f["name"] }
                headers = headers(package, resource, path)
                
                #set algebra to finding fields missing from schema and/or CSV file
                missing_fields = declared_fields - headers
                if missing_fields != []
                    add_error( :integrity, 
                        "Declared schema has fields not present in CSV file (#{missing_fields.join(",")})", 
                        path+"/schema/fields")
                end
                undeclared_fields = headers - declared_fields
                if undeclared_fields != []
                    add_error( :integrity, 
                        "CSV file has fields missing from schema (#{undeclared_fields.join(",")})", 
                        path+"/schema/fields")
                end                    
            end
        end
    end
    
end