Class: Dbtools::Rdf_reader
- Inherits:
-
Object
- Object
- Dbtools::Rdf_reader
- Defined in:
- lib/dbtools/rdf/rdf_reader.rb
Instance Method Summary collapse
-
#get_available_databases ⇒ Object
Executes a query that returns all csv/postgres/mysql datasets from the rdf graph.
- #get_metadata(dataset_id) ⇒ Object
-
#has_next_page ⇒ Object
Check if there’s a hydra next page.
-
#initialize ⇒ Rdf_reader
constructor
A new instance of Rdf_reader.
-
#load_from_file(file) ⇒ Object
Loads a file into the graph.
-
#next_page ⇒ Object
Adds the next page to the graph.
Constructor Details
#initialize ⇒ Rdf_reader
Returns a new instance of Rdf_reader.
9 10 11 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 9 def initialize @graph = RDF::Graph.new end |
Instance Method Details
#get_available_databases ⇒ Object
Executes a query that returns all csv/postgres/mysql datasets from the rdf graph.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 22 def get_available_databases query = SPARQL.parse(%( PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX gp: <http://www.geophy.com/rdf/terms#> SELECT ?dataset ?database_title ?dataset_title WHERE { ?dataset rdf:type dcat:Dataset ; dct:title ?dataset_title ; gp:database ?database_title . } ORDER BY ?database_title )) @graph.query(query).map.with_index do |result, index| queryResult = Hash.new queryResult['dataset'] = result.dataset.to_s queryResult['dataset_title'] = result.dataset_title.to_s queryResult['database_title'] = result.database_title.to_s [index, queryResult] end.to_h end |
#get_metadata(dataset_id) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 46 def (dataset_id) query = SPARQL.parse(%( PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX gp: <http://www.geophy.com/rdf/terms#> SELECT ?database_title ?dataset ?distribution ?format ?description ?access_url ?resource_title WHERE { ?dataset rdf:type dcat:Dataset ; gp:database ?database_title ; dcat:distribution ?distribution . OPTIONAL { ?dataset dct:description ?description . } ?distribution dct:format ?format ; dct:title ?resource_title ; dcat:accessURL ?access_url . FILTER( regex(?format, "postgres|mysql|csv|tsv|xlsx|xls|txt", 'i') && regex(str(?dataset), "#{dataset_id}") ) } ORDER BY ?title )) output = @graph.query(query).map.with_index do |result, index| queryResult = Hash.new queryResult['dataset'] = result.dataset queryResult['resource'] = result.distribution queryResult['database_title'] = result.database_title.to_s queryResult['resource_title'] = result.resource_title.to_s queryResult['format'] = result.format.to_s queryResult['description'] = result.description.to_s if defined?(result.description) queryResult['access_url'] = result.access_url.to_s [index, queryResult] end.to_h return output end |
#has_next_page ⇒ Object
Check if there’s a hydra next page.
85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 85 def has_next_page question = SPARQL.parse(%[ PREFIX hydra: <http://www.w3.org/ns/hydra/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ASK { ?catalog rdf:type hydra:PagedCollection ; hydra:nextPage ?next_catalog . } ]) @graph.query(question).true? end |
#load_from_file(file) ⇒ Object
Loads a file into the graph
14 15 16 17 18 19 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 14 def load_from_file(file) @graph.load(file) while has_next_page next_page end end |
#next_page ⇒ Object
Adds the next page to the graph
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/dbtools/rdf/rdf_reader.rb', line 98 def next_page return nil if !has_next_page # Get next page links next_page_query = SPARQL.parse(%( PREFIX hydra: <http://www.w3.org/ns/hydra/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?catalog WHERE { ?current_catalog rdf:type hydra:PagedCollection ; hydra:nextPage ?catalog . })) next_page = @graph.query(next_page_query).first # Delete existing hydra nodes delete_query = SPARQL.parse(%( PREFIX hydra: <http://www.w3.org/ns/hydra/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?catalog ?p ?o WHERE { ?catalog rdf:type hydra:PagedCollection ; ?p ?o . } )) @graph.query(delete_query) do |res| statement = RDF::Statement(res.catalog, res.p, res.o) @graph.delete(statement) end # Load the next page. @graph.load(next_page[:catalog]) end |