Class: TaliaCore::ActiveSourceParts::Xml::GenericReader

Inherits:
Object
  • Object
show all
Extended by:
TaliaUtil::IoHelper
Includes:
TaliaUtil::IoHelper, TaliaUtil::Progressable, TaliaUtil::UriHelper
Defined in:
lib/talia_core/active_source_parts/xml/generic_reader.rb

Overview

Superclass for importers/readers of generic xml files. This is as close as possible to the SourceReader class, and will (obviously) only work if a subclass fleshes out the mappings.

See the SourceReader class for a simple example.

When adding new sources, the reader will always check if the element is already present. If attributes for one source are imported in more than one place, all subsequent calls will merge the newly imported attributes with the existing ones.

Direct Known Subclasses

SourceReader

Defined Under Namespace

Classes: State

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TaliaUtil::IoHelper

base_for, file_url, open_from_url, open_generic

Methods included from TaliaUtil::UriHelper

#irify, #sanitize_sparql

Methods included from TaliaUtil::Progressable

#progressor, #progressor=, #run_with_progress

Constructor Details

#initialize(source) ⇒ GenericReader

End class methods



87
88
89
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 87

def initialize(source)
  @doc = Hpricot.XML(source)
end

Class Attribute Details

.create_handlersObject (readonly)

Returns the registered handlers



70
71
72
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 70

def create_handlers
  @create_handlers
end

Class Method Details

.can_use_rootObject

Set the reader to allow the use of root elements for import



59
60
61
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 59

def can_use_root
  @use_root = true
end

.element(element_name, &handler_block) ⇒ Object

Create a handler for an element from which a source will be created



48
49
50
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 48

def element(element_name, &handler_block)
  element_handler(element_name, true, &handler_block)
end

.plain_element(element_name, &handler_block) ⇒ Object

Create a handler for an element which will be processed but from which no source will be created



54
55
56
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 54

def plain_element(element_name, &handler_block)
  element_handler(element_name, false, &handler_block)
end

.sources_from(source, progressor = nil, base_url = nil) ⇒ Object

Reader the sources from the given IO stream. You may specify a base url to help the reader to decide from where files should be opened.



40
41
42
43
44
45
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 40

def sources_from(source, progressor = nil, base_url=nil)
  reader = self.new(source)
  reader.base_file_url = base_url if(base_url)
  reader.progressor = progressor
  reader.sources
end

.sources_from_url(url, options = nil, progressor = nil) ⇒ Object

See the IoHelper class for help on the options. A progressor may be supplied on which the importer will report it’s progress.



34
35
36
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 34

def sources_from_url(url, options = nil, progressor = nil)
  open_generic(url, options) { |io| sources_from(io, progressor, url) }
end

.use_rootObject

True if the reader should also check the root element, instead of only checking the children



65
66
67
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 65

def use_root
  @use_root || false
end

Instance Method Details

#add_source_with_check(source_attribs) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 113

def add_source_with_check(source_attribs)
  assit_kind_of(Hash, source_attribs)
  if((uri = source_attribs['uri']).blank?)
    raise(RuntimeError, "Problem reading from XML: Source without URI (#{source_attribs.inspect})")
  else
    uri = irify(uri)
    source_attribs['uri'] = uri
    @sources[uri] ||= {} 
    @sources[uri].each do |key, value|
      next unless(new_value = source_attribs.delete(key))

      assit(!((key.to_sym == :type) && (value != 'TaliaCore::SourceTypes::DummySource') && (value != new_value)), "Type should not change during import, may be a format problem. (From #{value} to #{new_value})")
      if(new_value.is_a?(Array) && value.is_a?(Array))
        # If both are Array-types, the new elements will be appended
        # and duplicates will be removed
        @sources[uri][key] = (value + new_value).uniq
      else
        # Otherwise just replace
        @sources[uri][key] = new_value
      end
    end
    # Now merge in everything else
    @sources[uri].merge!(source_attribs)
  end
end

#base_file_urlObject

This is the “base” for resolving file URLs. If a file URL is found to be relative, it will be relative to this URL



104
105
106
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 104

def base_file_url
  @base_file_url ||= TALIA_ROOT
end

#base_file_url=(new_base_url) ⇒ Object

Assign a new base url



109
110
111
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 109

def base_file_url=(new_base_url)
  @base_file_url = base_for(new_base_url)
end

#create_handlersObject



139
140
141
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 139

def create_handlers
  @handlers ||= (self.class.create_handlers || {})
end

#read_children_of(element, progress = nil, &block) ⇒ Object



154
155
156
157
158
159
160
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 154

def read_children_of(element, progress = nil, &block)
  element.children.each do |element|
    progress.inc if(progress)
    next unless(element.is_a?(Hpricot::Elem))
    read_source(element, &block)
  end
end

#read_children_with_progress(element, &block) ⇒ Object



148
149
150
151
152
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 148

def read_children_with_progress(element, &block)
  run_with_progress('Xml Read', element.children.size) do |prog|
    read_children_of(element, prog, &block)
  end
end

#read_source(element, &block) ⇒ Object



143
144
145
146
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 143

def read_source(element, &block)
  attribs = call_handler(element, &block)
  add_source_with_check(attribs) if(attribs)
end

#sourcesObject



91
92
93
94
95
96
97
98
99
100
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 91

def sources
  return @sources if(@sources)
  @sources = {}
  if(use_root && self.respond_to?("#{@doc.root.name}_handler".to_sym))
    run_with_progress('XmlRead', 1) { read_source(@doc.root) }
  else
    read_children_with_progress(@doc.root)
  end
  @sources.values
end

#use_rootObject



162
163
164
# File 'lib/talia_core/active_source_parts/xml/generic_reader.rb', line 162

def use_root
  self.class.use_root
end