Module: PubliSci::Parser
- Included in:
- Dataset::DataCube, Metadata::Generator, ORM::DataCube, ORM::DataCube, Writer::ARFF
- Defined in:
- lib/bio-publisci/parser.rb
Instance Method Summary collapse
- #get_ary(query_results, method = 'to_s') ⇒ Object
- #get_hashes(query_results, method = nil) ⇒ Object
- #is_uri?(string) ⇒ Boolean
- #load_string(string, repo = RDF::Repository.new) ⇒ Object
- #observation_hash(query_results, shorten_uris = false, method = 'to_s') ⇒ Object
- #sanitize(array) ⇒ Object
- #sanitize_hash(h) ⇒ Object
- #strip_prefixes(string) ⇒ Object
- #strip_uri(uri) ⇒ Object
- #to_literal(obj, options) ⇒ Object
- #to_resource(obj, options) ⇒ Object
Instance Method Details
#get_ary(query_results, method = 'to_s') ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/bio-publisci/parser.rb', line 50 def get_ary(query_results,method='to_s') query_results.map{|solution| solution.to_a.map{|entry| if entry.last.respond_to? method entry.last.send(method) else entry.last.to_s end } } end |
#get_hashes(query_results, method = nil) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/bio-publisci/parser.rb', line 62 def get_hashes(query_results,method=nil) arr=[] query_results.map{|solution| h={} solution.map{|element| if method && element[1].respond_to?(method) h[element[0]] = element[1].send(method) else h[element[0]] = element[1] end } arr << h } arr end |
#is_uri?(string) ⇒ Boolean
4 5 6 |
# File 'lib/bio-publisci/parser.rb', line 4 def is_uri?(string) RDF::Resource(string).valid? end |
#load_string(string, repo = RDF::Repository.new) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/bio-publisci/parser.rb', line 41 def load_string(string,repo=RDF::Repository.new) f = Tempfile.new('repo') f.write(string) f.close repo.load(f.path, :format => :ttl) f.unlink repo end |
#observation_hash(query_results, shorten_uris = false, method = 'to_s') ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/bio-publisci/parser.rb', line 78 def observation_hash(query_results,shorten_uris=false,method='to_s') h={} query_results.map{|sol| (h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s } if shorten_uris newh= {} h.map{|k,v| newh[strip_uri(k)] ||= {} v.map{|kk,vv| newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv) } } newh else h end end |
#sanitize(array) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/bio-publisci/parser.rb', line 8 def sanitize(array) #remove spaces and other special characters array = Array(array) processed = [] array.map{|entry| if entry.is_a? String if is_uri? entry processed << entry.gsub(/[\s]/,'_') else processed << entry.gsub(/[\s\.]/,'_') end else processed << entry end } processed end |
#sanitize_hash(h) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/bio-publisci/parser.rb', line 26 def sanitize_hash(h) mappings = {} h.keys.map{|k| if(k.is_a? String) mappings[k] = k.gsub(' ','_') end } h.keys.map{|k| h[mappings[k]] = h.delete(k) if mappings[k] } h end |
#strip_prefixes(string) ⇒ Object
143 144 145 |
# File 'lib/bio-publisci/parser.rb', line 143 def strip_prefixes(string) string.to_s.split(':').last end |
#strip_uri(uri) ⇒ Object
137 138 139 140 141 |
# File 'lib/bio-publisci/parser.rb', line 137 def strip_uri(uri) uri = uri.to_s.dup uri[-1] = '' if uri[-1] == '>' uri.to_s.split('/').last.split('#').last end |
#to_literal(obj, options) ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/bio-publisci/parser.rb', line 118 def to_literal(obj, ) if obj.is_a? String # Depressing that there's no more elegant way to check if a string is # a number... if val = Integer(obj) rescue nil val elsif val = Float(obj) rescue nil val else '"'+obj+'"' end elsif obj == nil && [:encode_nulls] #TODO decide the right way to handle missing values, since RDF has no null '"NA"' else obj end end |
#to_resource(obj, options) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/bio-publisci/parser.rb', line 98 def to_resource(obj, ) if obj.is_a? String obj = "<#{obj}>" if is_uri? obj #TODO decide the right way to handle missing values, since RDF has no null #probably throw an error here since a missing resource is a bigger problem obj = "NA" if obj.empty? #TODO remove special characters (faster) as well (eg '?') obj.gsub(' ','_').gsub('?','') elsif obj == nil && [:encode_nulls] '"NA"' elsif obj.is_a? Numeric #resources cannot be referred to purely by integer (?) "n"+obj.to_s else obj end end |