Class: Dbtools::Rdf_reader

Inherits:

Object

Object
Dbtools::Rdf_reader

show all

Defined in:: lib/dbtools/rdf/rdf_reader.rb

Instance Method Summary collapse

#get_available_databases ⇒ Object

Executes a query that returns all csv/postgres/mysql datasets from the rdf graph.
#get_metadata(dataset_id) ⇒ Object
#has_next_page ⇒ Object

Check if there’s a hydra next page.
#initialize ⇒ Rdf_reader constructor

A new instance of Rdf_reader.
#load_from_file(file) ⇒ Object

Loads a file into the graph.
#next_page ⇒ Object

Adds the next page to the graph.

Constructor Details

#initialize ⇒ `Rdf_reader`

Returns a new instance of Rdf_reader.



9
10
11

# File 'lib/dbtools/rdf/rdf_reader.rb', line 9

def initialize
  @graph = RDF::Graph.new
end

Instance Method Details

#get_available_databases ⇒ `Object`

Executes a query that returns all csv/postgres/mysql datasets from the rdf graph.

# File 'lib/dbtools/rdf/rdf_reader.rb', line 22

def get_available_databases
  query = SPARQL.parse(%(
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX gp: <http://www.geophy.com/rdf/terms#>
SELECT ?dataset ?database_title ?dataset_title
WHERE {
?dataset rdf:type dcat:Dataset ;
     dct:title ?dataset_title ;
     gp:database ?database_title .
}
ORDER BY ?database_title
))
  @graph.query(query).map.with_index do |result, index|
    queryResult = Hash.new
    queryResult['dataset'] = result.dataset.to_s
    queryResult['dataset_title'] = result.dataset_title.to_s
    queryResult['database_title'] = result.database_title.to_s
    [index, queryResult]
  end.to_h
end

#get_metadata(dataset_id) ⇒ `Object`

# File 'lib/dbtools/rdf/rdf_reader.rb', line 46

def get_metadata(dataset_id)
  query = SPARQL.parse(%(
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX gp: <http://www.geophy.com/rdf/terms#>
SELECT ?database_title ?dataset ?distribution ?format ?description ?access_url ?resource_title
WHERE {
?dataset rdf:type dcat:Dataset ;
     gp:database ?database_title ;
     dcat:distribution ?distribution .
OPTIONAL {
?dataset dct:description ?description .
}
?distribution dct:format ?format ;
          dct:title ?resource_title ;
          dcat:accessURL ?access_url .
FILTER( regex(?format, "postgres|mysql|csv|tsv|xlsx|xls|txt", 'i') &&
    regex(str(?dataset), "#{dataset_id}") )
}
ORDER BY ?title
))
  output = @graph.query(query).map.with_index do |result, index|
    queryResult = Hash.new
    queryResult['dataset'] = result.dataset
    queryResult['resource'] = result.distribution
    queryResult['database_title'] = result.database_title.to_s
    queryResult['resource_title'] = result.resource_title.to_s
    queryResult['format'] = result.format.to_s
    queryResult['description'] = result.description.to_s if defined?(result.description)
    queryResult['access_url'] = result.access_url.to_s
    [index, queryResult]
  end.to_h

  return output
end

#has_next_page ⇒ `Object`

Check if there’s a hydra next page.

# File 'lib/dbtools/rdf/rdf_reader.rb', line 85

def has_next_page
  question = SPARQL.parse(%[
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

ASK { ?catalog rdf:type hydra:PagedCollection ; 
         hydra:nextPage ?next_catalog .  }
                       ])
  @graph.query(question).true?
end

#load_from_file(file) ⇒ `Object`

Loads a file into the graph

# File 'lib/dbtools/rdf/rdf_reader.rb', line 14

def load_from_file(file)
  @graph.load(file)
  while has_next_page
    next_page
  end
end

#next_page ⇒ `Object`

Adds the next page to the graph

# File 'lib/dbtools/rdf/rdf_reader.rb', line 98

def next_page
  return nil if !has_next_page

  # Get next page links
  next_page_query = SPARQL.parse(%(
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?catalog 
WHERE { ?current_catalog rdf:type hydra:PagedCollection ; 
            hydra:nextPage ?catalog . }))
  next_page = @graph.query(next_page_query).first

  # Delete existing hydra nodes
  delete_query = SPARQL.parse(%(
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?catalog ?p ?o
WHERE { ?catalog rdf:type hydra:PagedCollection ; 
         ?p ?o .  }
                       ))
  @graph.query(delete_query) do |res|
    statement = RDF::Statement(res.catalog, res.p, res.o)
    @graph.delete(statement)
  end
  # Load the next page. 
  @graph.load(next_page[:catalog])
end

Class: Dbtools::Rdf_reader

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Rdf_reader

Instance Method Details

#get_available_databases ⇒ Object

#get_metadata(dataset_id) ⇒ Object

#has_next_page ⇒ Object