Class: PubliSci::Readers::ARFF

Inherits:
Object
  • Object
show all
Includes:
Dataset::DataCube
Defined in:
lib/bio-publisci/readers/arff.rb

Instance Method Summary collapse

Methods included from Dataset::DataCube

#abbreviate_known, #code_lists, #component_gen, #component_specifications, #concept_codes, #data_structure_definition, #dataset, #defaults, #dimension_properties, #encode_data, #generate, #generate_resources, #measure_properties, #observations, #prefixes, #vocabulary

Methods included from Parser

#add_node, #bnode_value, #encode_value, #get_ary, #get_hashes, #is_complex?, #is_uri?, #load_string, #observation_hash, #sanitize, #sanitize_hash, #strip_prefixes, #strip_uri, #to_literal, #to_resource, #turtle_indent

Instance Method Details

#components(arff) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/bio-publisci/readers/arff.rb', line 19

def components(arff)
	#still needs support for quoted strings with whitespace
	h ={}
	arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
		if line =~ /\{.*}/
			name = line.match(/\s.*/).to_a.first.strip.split.first
			type = :coded
			codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
			h[name] = {type: type, codes: codes}
		else
			name = line.split[1]
			type = line.split[2]
			h[name] = {type: type}
		end
	}
	h
end

#data(arff, attributes) ⇒ Object



37
38
39
40
41
42
43
44
45
46
# File 'lib/bio-publisci/readers/arff.rb', line 37

def data(arff, attributes)
	lines = arff.split("\n")
	data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
	h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
	data_lines.map{|line|
		line = line.split ','
		attributes.each_with_index{|a,i| h[a] << line[i]}
	}
	h
end

#generate_n3(arff, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/bio-publisci/readers/arff.rb', line 6

def generate_n3(arff, options={})
	arff = IO.read(arff) if File.exist? arff
	options[:no_labels] = true # unless options[:no_labels] == nil
	@options = options
	comps =  components(arff)
	obs = data(arff, comps.keys)
	generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
end

#relation(arff) ⇒ Object



15
16
17
# File 'lib/bio-publisci/readers/arff.rb', line 15

def relation(arff)
	arff.match(/@relation.+/i).to_a.first.split.last
end