Class: PubliSci::Readers::ARFF

Inherits:
Object
  • Object
show all
Includes:
Dataset::DataCube
Defined in:
lib/publisci/readers/arff.rb

Instance Method Summary collapse

Methods included from Dataset::DataCube

#abbreviate_known, #code_lists, #component_gen, #component_specifications, #concept_codes, #data_structure_definition, #dataset, #defaults, #dimension_properties, #encode_data, #generate, #generate_resources, #measure_properties, #observations, #prefixes, #vocabulary

Methods included from PubliSci::RDFParser

#add_node, #bnode_value, #encode_value, #get_ary, #get_hashes, #is_complex?, #is_uri?, #load_string, #observation_hash, #sanitize, #sanitize_hash, #strip_prefixes, #strip_uri, #to_literal, #to_resource, #turtle_indent

Instance Method Details

#components(arff) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/publisci/readers/arff.rb', line 19

def components(arff)
  #still needs support for quoted strings with whitespace
  h ={}
  arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
    if line =~ /\{.*}/
      name = line.match(/\s.*/).to_a.first.strip.split.first
      type = :coded
      codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
      h[name] = {type: type, codes: codes}
    else
      name = line.split[1]
      type = line.split[2]
      h[name] = {type: type}
    end
  }
  h
end

#data(arff, attributes) ⇒ Object



37
38
39
40
41
42
43
44
45
46
# File 'lib/publisci/readers/arff.rb', line 37

def data(arff, attributes)
  lines = arff.split("\n")
  data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
  h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
  data_lines.map{|line|
    line = line.split ','
    attributes.each_with_index{|a,i| h[a] << line[i]}
  }
  h
end

#generate_n3(arff, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/publisci/readers/arff.rb', line 6

def generate_n3(arff, options={})
  arff = IO.read(arff) if File.exist? arff
  options[:no_labels] = true
  @options = options
  comps =  components(arff)
  obs = data(arff, comps.keys)
  generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
end

#relation(arff) ⇒ Object



15
16
17
# File 'lib/publisci/readers/arff.rb', line 15

def relation(arff)
  arff.match(/@relation.+/i).to_a.first.split.last
end