Module: PubliSci::Parser

Included in:
Dataset::DataCube, Metadata::Generator, ORM::DataCube, ORM::DataCube, Writer::ARFF
Defined in:
lib/bio-publisci/parser.rb

Instance Method Summary collapse

Instance Method Details

#get_ary(query_results, method = 'to_s') ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bio-publisci/parser.rb', line 50

def get_ary(query_results,method='to_s')
  query_results.map{|solution|
    solution.to_a.map{|entry|
      if entry.last.respond_to? method
       entry.last.send(method)
     else
     	entry.last.to_s
     end
    }
  }
end

#get_hashes(query_results, method = nil) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/bio-publisci/parser.rb', line 62

def get_hashes(query_results,method=nil)
	arr=[]
	query_results.map{|solution|
		h={}
		solution.map{|element|
	if method && element[1].respond_to?(method)
	 	h[element[0]] = element[1].send(method)
	else
	 	h[element[0]] = element[1]
	end
		}
		arr << h
	}
	arr
end

#is_uri?(string) ⇒ Boolean

Returns:

  • (Boolean)


4
5
6
# File 'lib/bio-publisci/parser.rb', line 4

def is_uri?(string)
  RDF::Resource(string).valid?
end

#load_string(string, repo = RDF::Repository.new) ⇒ Object



41
42
43
44
45
46
47
48
# File 'lib/bio-publisci/parser.rb', line 41

def load_string(string,repo=RDF::Repository.new)
	f = Tempfile.new('repo')
	f.write(string)
	f.close
	repo.load(f.path, :format => :ttl)
	f.unlink
	repo
end

#observation_hash(query_results, shorten_uris = false, method = 'to_s') ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/bio-publisci/parser.rb', line 78

def observation_hash(query_results,shorten_uris=false,method='to_s')
	h={}
	query_results.map{|sol|
		(h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
	}

	if shorten_uris
 	newh= {}
 	h.map{|k,v|
 		newh[strip_uri(k)] ||= {}
 		v.map{|kk,vv|
 			newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
 		}
 	}
 	newh
 else
 	h
 end
end

#sanitize(array) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/bio-publisci/parser.rb', line 8

def sanitize(array)
  #remove spaces and other special characters
  array = Array(array)
  processed = []
  array.map{|entry|
    if entry.is_a? String
      if is_uri? entry
        processed << entry.gsub(/[\s]/,'_')
      else
        processed << entry.gsub(/[\s\.]/,'_')
      end
    else
      processed << entry
    end
  }
  processed
end

#sanitize_hash(h) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/bio-publisci/parser.rb', line 26

def sanitize_hash(h)
  mappings = {}
  h.keys.map{|k|
    if(k.is_a? String)
      mappings[k] = k.gsub(' ','_')
    end
  }

  h.keys.map{|k|
    h[mappings[k]] = h.delete(k) if mappings[k]
  }

  h
end

#strip_prefixes(string) ⇒ Object



143
144
145
# File 'lib/bio-publisci/parser.rb', line 143

def strip_prefixes(string)
  string.to_s.split(':').last
end

#strip_uri(uri) ⇒ Object



137
138
139
140
141
# File 'lib/bio-publisci/parser.rb', line 137

def strip_uri(uri)
  uri = uri.to_s.dup
  uri[-1] = '' if uri[-1] == '>'
  uri.to_s.split('/').last.split('#').last
end

#to_literal(obj, options) ⇒ Object



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/bio-publisci/parser.rb', line 118

def to_literal(obj, options)
  if obj.is_a? String
    # Depressing that there's no more elegant way to check if a string is
    # a number...
    if val = Integer(obj) rescue nil
      val
    elsif val = Float(obj) rescue nil
      val
    else
      '"'+obj+'"'
    end
  elsif obj == nil && options[:encode_nulls]
    #TODO decide the right way to handle missing values, since RDF has no null
    '"NA"'
  else
    obj
  end
end

#to_resource(obj, options) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/bio-publisci/parser.rb', line 98

def to_resource(obj, options)
  if obj.is_a? String
    obj = "<#{obj}>" if is_uri? obj

    #TODO decide the right way to handle missing values, since RDF has no null
    #probably throw an error here since a missing resource is a bigger problem
    obj = "NA" if obj.empty?

    #TODO  remove special characters (faster) as well (eg '?')
    obj.gsub(' ','_').gsub('?','')
  elsif obj == nil && options[:encode_nulls]
    '"NA"'
  elsif obj.is_a? Numeric
    #resources cannot be referred to purely by integer (?)
    "n"+obj.to_s
  else
    obj
  end
end