Class: RdfContext::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/rdf_context/parser.rb,
lib/rdf_context/n3parser.rb

Overview

Generic RdfContext Parser class

Direct Known Subclasses

N3Parser, RdfXmlParser, RdfaParser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Parser

Creates a new parser

Parameters:

  • options (Hash) (defaults to: {})

    a customizable set of options

Options Hash (options):

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise



28
29
30
31
32
33
34
# File 'lib/rdf_context/parser.rb', line 28

def initialize(options = {})
  # initialize the triplestore
  @processor_graph = options[:processor_graph] if options[:processor_graph]
  @debug = options[:debug] # XXX deprecated
  @strict = options[:strict]
  @named_bnodes = {}
end

Instance Attribute Details

#debugArray<String> (readonly)

Returns:



108
109
110
# File 'lib/rdf_context/parser.rb', line 108

def debug
  @debug
end

#docNokogiri::XML::Document, #read

Source of parsed document

Returns:

  • (Nokogiri::XML::Document, #read)


12
13
14
# File 'lib/rdf_context/parser.rb', line 12

def doc
  @doc
end

#graphGraph

Returns:



16
17
18
# File 'lib/rdf_context/parser.rb', line 16

def graph
  @graph
end

#processor_graphGraph

Returns:



20
21
22
# File 'lib/rdf_context/parser.rb', line 20

def processor_graph
  @processor_graph
end

#uriRdfContext::URIRef (readonly)

URI of parsed document

Returns:



8
9
10
# File 'lib/rdf_context/parser.rb', line 8

def uri
  @uri
end

Class Method Details

.n3_parser(options = {}) ⇒ N3Parser

Return N3 Parser instance

Returns:



112
# File 'lib/rdf_context/parser.rb', line 112

def self.n3_parser(options = {}); N3Parser.new(options); end

.parse(stream, uri = nil, options = {}) {|triple| ... } ⇒ Graph

Instantiate Parser and parse document

Parameters:

  • stream (#read, #to_s)

    the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document

  • uri (String) (defaults to: nil)

    (nil) the URI of the document

  • options (Hash) (defaults to: {})

    a customizable set of options

Options Hash (options):

  • :graph (Graph) — default: Graph.new

    Graph to parse into, otherwise a new Graph instance is created

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise

Yields:

  • (triple)

Yield Parameters:

Returns:

  • (Graph)

    Returns the graph containing parsed triples

  • (Graph)

    Returns the graph containing parsed triples

Raises:

  • (Error)

    Raises RdfError if strict

  • (Error)

    Raises RdfError if strict



50
51
52
53
# File 'lib/rdf_context/parser.rb', line 50

def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple
  parser = self.new(options)
  parser.parse(stream, uri, options, &block)
end

.rdfa_parser(options = {}) ⇒ RdfaParser

Return Rdfa Parser instance

Returns:



118
# File 'lib/rdf_context/parser.rb', line 118

def self.rdfa_parser(options = {}); RdfaParser.new(options); end

.rdfxml_parser(options = {}) ⇒ RdfXmlParser

Return RDF/XML Parser instance

Returns:



115
# File 'lib/rdf_context/parser.rb', line 115

def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end

Instance Method Details

#add_debug(node, message) ⇒ Object (protected)

Add debug event to debug array, if specified

Parameters:

  • node:: (XML Node, any)

    XML Node or string for showing context

  • message:: (String)


162
163
164
# File 'lib/rdf_context/parser.rb', line 162

def add_debug(node, message)
  add_processor_message(node, message, RDFA_NS.Info)
end

#add_error(node, message, process_class = RDFA_NS.Error) ⇒ Object (protected)

Raises:



174
175
176
177
# File 'lib/rdf_context/parser.rb', line 174

def add_error(node, message, process_class = RDFA_NS.Error)
  add_processor_message(node, message, process_class)
  raise ParserException, message if @strict
end

#add_info(node, message, process_class = RDFA_NS.Info) ⇒ Object (protected)



166
167
168
# File 'lib/rdf_context/parser.rb', line 166

def add_info(node, message, process_class = RDFA_NS.Info)
  add_processor_message(node, message, process_class)
end

#add_processor_message(node, message, process_class) ⇒ Object (protected)



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/rdf_context/parser.rb', line 179

def add_processor_message(node, message, process_class)
  puts "#{node_path(node)}: #{message}" if ::RdfContext::debug?
  @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
  if @processor_graph
    @processor_sequence ||= 0
    n = BNode.new
    @processor_graph << Triple.new(n, RDF_TYPE, process_class)
    @processor_graph << Triple.new(n, DC_NS.description, message)
    @processor_graph << Triple.new(n, DC_NS.date, Literal.build_from(DateTime.now))
    @processor_graph << Triple.new(n, RDFA_NS.sequence, Literal.build_from(@processor_sequence += 1))
    @processor_graph << Triple.new(n, RDFA_NS.context, uri)
    nc = BNode.new
    @processor_graph << Triple.new(nc, RDF_TYPE, PTR_NS.XPathPointer)
    @processor_graph << Triple.new(nc, PTR_NS.expression, node.path)
    @processor_graph << Triple.new(n, RDFA_NS.context, nc)
  end
end

#add_triple(node, subject, predicate, object) ⇒ Array (protected)

add a triple, object can be literal or URI or bnode

If the parser is called with a block, triples are passed to the block rather than added to the graph.

Parameters:

Returns:

  • (Array)

    An array of the triples (leaky abstraction? consider returning the graph instead)

Raises:

  • (Error)

    Checks parameter types and raises if they are incorrect if parsing mode is strict.



208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/rdf_context/parser.rb', line 208

def add_triple(node, subject, predicate, object)
  triple = Triple.new(subject, predicate, object)
  add_debug(node, "triple: #{triple}")
  if @callback
    @callback.call(triple)  # Perform yield to saved block
  else
    @graph << triple
  end
  triple
rescue RdfException => e
  add_debug(node, "add_triple raised #{e.class}: #{e.message}")
  puts e.backtrace if ::RdfContext::debug?
  raise if @strict
end

#add_warning(node, message, process_class = RDFA_NS.Warning) ⇒ Object (protected)



170
171
172
# File 'lib/rdf_context/parser.rb', line 170

def add_warning(node, message, process_class = RDFA_NS.Warning)
  add_processor_message(node, message, process_class)
end

#detect_format(stream, uri = nil) ⇒ :rdfxml, ...

Heuristically detect the format of the uri

Parameters:

  • stream (#read, #to_s)
  • uri (#to_s) (defaults to: nil)

    (nil)

Returns:

  • (:rdfxml, :rdfa, :n3)


124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/rdf_context/parser.rb', line 124

def detect_format(stream, uri = nil)
  uri ||= stream.path if stream.respond_to?(:path)
  format = case uri.to_s
  when /\.(rdf|xml)$/      then :rdfxml
  when /\.(html|xhtml)$/   then :rdfa
  when /\.(nt|n3|txt)$/    then :n3
  else
    # Got to look into the file to see
    if stream.respond_to?(:read)
      stream.rewind
      string = stream.read(1000)
      stream.rewind
    else
      string = stream.to_s
    end
    case string
    when /<\w+:RDF/ then :rdfxml
    when /<RDF/     then :rdfxml
    when /<html/i   then :rdfa
    else                 :n3
    end
  end
  format
end

#node_path(node) ⇒ Object (protected)

Figure out the document path, if it is a Nokogiri::XML::Element or Attribute



151
152
153
154
155
156
# File 'lib/rdf_context/parser.rb', line 151

def node_path(node)
  case node
  when Nokogiri::XML::Node then node.display_path
  else node.to_s
  end
end

#parse(stream, uri = nil, options = {}) {|triple| ... } ⇒ Graph

Parse RDF document from a string or input stream to closure or graph.

If the parser is called with a block, triples are passed to the block rather than added to the graph.

Virtual Class, prototype for Parser subclass.

Parameters:

  • stream (#read, #to_s)

    the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document

  • uri (String) (defaults to: nil)

    (nil) the URI of the document

  • options (Hash) (defaults to: {})

    a customizable set of options

Options Hash (options):

  • :graph (Graph) — default: Graph.new

    Graph to parse into, otherwise a new Graph instance is created

  • :processor_graph (Graph) — default: nil

    Graph to record information, warnings and errors.

  • :type (:rdfxml, :html, :n3) — default: nil
  • :strict (Boolean) — default: false

    Raise Error if true, continue with lax parsing, otherwise

Yields:

  • (triple)

Yield Parameters:

Returns:

  • (Graph)

    Returns the graph containing parsed triples

  • (Graph)

    Returns the graph containing parsed triples

Raises:

  • (Error)

    Raises RdfError if strict

  • (Error)

    Raises RdfError if strict



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/rdf_context/parser.rb', line 74

def parse(stream, uri = nil, options = {}, &block) # :yields: triple
  @graph = options[:graph] || Graph.new(:identifier => @uri)
  if self.class == Parser
    
    options[:strict] ||= @strict if @strict
    options[:graph] ||= @graph
    options[:debug] ||= @debug if @debug  # XXX deprecated
    # Intuit type, if not provided
    options[:type] ||= detect_format(stream, uri)
    
    # Create a delegate of a specific parser class
    @delegate ||= case options[:type].to_s
    when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options)
    when "rdfa", "html", "xhtml"                              then RdfaParser.new(options)
    when "xml", "rdf", "rdfxml"                               then RdfXmlParser.new(options)
    else                                                           RdfXmlParser.new(options)
      # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
    end
    @delegate.parse(stream, uri, options, &block)
  else
    # Common parser operations
    @uri = URIRef.new(uri.to_s) unless uri.nil?
    @strict = options[:strict] if options.has_key?(:strict)
    @debug = options[:debug] if options.has_key?(:debug)
  end
end