Class: Dbtools::Rdf_reader

Inherits:
Object
  • Object
show all
Defined in:
lib/dbtools/rdf/rdf_reader.rb

Instance Method Summary collapse

Constructor Details

#initializeRdf_reader

Returns a new instance of Rdf_reader.



9
10
11
# File 'lib/dbtools/rdf/rdf_reader.rb', line 9

def initialize
  @graph = RDF::Graph.new
end

Instance Method Details

#get_available_databasesObject

Executes a query that returns all csv/postgres/mysql datasets from the rdf graph.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/dbtools/rdf/rdf_reader.rb', line 22

def get_available_databases
  query = SPARQL.parse(%(
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX gp: <http://www.geophy.com/rdf/terms#>
SELECT ?dataset ?database_title ?dataset_title
WHERE {
?dataset rdf:type dcat:Dataset ;
     dct:title ?dataset_title ;
     gp:database ?database_title .
}
ORDER BY ?database_title
))
  @graph.query(query).map.with_index do |result, index|
    queryResult = Hash.new
    queryResult['dataset'] = result.dataset.to_s
    queryResult['dataset_title'] = result.dataset_title.to_s
    queryResult['database_title'] = result.database_title.to_s
    [index, queryResult]
  end.to_h
end

#get_metadata(dataset_id) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/dbtools/rdf/rdf_reader.rb', line 46

def (dataset_id)
  query = SPARQL.parse(%(
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX gp: <http://www.geophy.com/rdf/terms#>
SELECT ?database_title ?dataset ?distribution ?format ?description ?access_url ?resource_title
WHERE {
?dataset rdf:type dcat:Dataset ;
     gp:database ?database_title ;
     dcat:distribution ?distribution .
OPTIONAL {
?dataset dct:description ?description .
}
?distribution dct:format ?format ;
          dct:title ?resource_title ;
          dcat:accessURL ?access_url .
FILTER( regex(?format, "postgres|mysql|csv|tsv|xlsx|xls|txt", 'i') &&
    regex(str(?dataset), "#{dataset_id}") )
}
ORDER BY ?title
))
  output = @graph.query(query).map.with_index do |result, index|
    queryResult = Hash.new
    queryResult['dataset'] = result.dataset
    queryResult['resource'] = result.distribution
    queryResult['database_title'] = result.database_title.to_s
    queryResult['resource_title'] = result.resource_title.to_s
    queryResult['format'] = result.format.to_s
    queryResult['description'] = result.description.to_s if defined?(result.description)
    queryResult['access_url'] = result.access_url.to_s
    [index, queryResult]
  end.to_h

  return output
end

#has_next_pageObject

Check if there’s a hydra next page.



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/dbtools/rdf/rdf_reader.rb', line 85

def has_next_page
  question = SPARQL.parse(%[
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

ASK { ?catalog rdf:type hydra:PagedCollection ; 
         hydra:nextPage ?next_catalog .  }
                       ])
  @graph.query(question).true?
end

#load_from_file(file) ⇒ Object

Loads a file into the graph



14
15
16
17
18
19
# File 'lib/dbtools/rdf/rdf_reader.rb', line 14

def load_from_file(file)
  @graph.load(file)
  while has_next_page
    next_page
  end
end

#next_pageObject

Adds the next page to the graph



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/dbtools/rdf/rdf_reader.rb', line 98

def next_page
  return nil if !has_next_page

  # Get next page links
  next_page_query = SPARQL.parse(%(
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?catalog 
WHERE { ?current_catalog rdf:type hydra:PagedCollection ; 
            hydra:nextPage ?catalog . }))
  next_page = @graph.query(next_page_query).first

  # Delete existing hydra nodes
  delete_query = SPARQL.parse(%(
PREFIX hydra: <http://www.w3.org/ns/hydra/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?catalog ?p ?o
WHERE { ?catalog rdf:type hydra:PagedCollection ; 
         ?p ?o .  }
                       ))
  @graph.query(delete_query) do |res|
    statement = RDF::Statement(res.catalog, res.p, res.o)
    @graph.delete(statement)
  end
  # Load the next page. 
  @graph.load(next_page[:catalog])
end