Module: MARC::NokogiriReader

Defined in:
lib/marc/xml_parsers.rb

Overview

NokogiriReader uses the Nokogiri SAX Parser to quickly read a MARCXML document. Because dynamically subclassing MARC::XMLReader is a little ugly, we need to recreate all of the SAX event methods from Nokogiri::XML::SAX::Document here rather than subclassing.

Class Method Summary collapse

Instance Method Summary collapse

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(methName, *args) ⇒ Object



108
109
110
111
112
113
114
# File 'lib/marc/xml_parsers.rb', line 108

def method_missing(methName, *args)
  sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
    :end_element, :comment, :warning, :error, :cdata_block]
  unless sax_methods.index(methName)
    raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
  end
end

Class Method Details

.extended(receiver) ⇒ Object



40
41
42
43
# File 'lib/marc/xml_parsers.rb', line 40

def self.extended(receiver)
  require 'nokogiri'
  receiver.init
end

Instance Method Details

#characters(text) ⇒ Object



83
84
85
86
87
88
89
# File 'lib/marc/xml_parsers.rb', line 83

def characters text
  case @current_element
  when :leader then @record[:record].leader = text
  when :field then @record[:field].value << text
  when :subfield then @record[:subfield].value << text
  end
end

#each(&block) ⇒ Object

Loop through the MARC records in the XML document



54
55
56
57
# File 'lib/marc/xml_parsers.rb', line 54

def each(&block)    
  @block = block
  @parser.parse(@handle)
end

#end_element_namespace(name, prefix = nil, uri = nil) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/marc/xml_parsers.rb', line 91

def end_element_namespace name, prefix = nil, uri = nil
  @current_element = nil
  if uri == "http://www.loc.gov/MARC21/slim"
    case name.downcase
    when 'record' then yield_record
    when /(control|data)field/
      @record[:record] << @record[:field]
      @record[:field] = nil
      @current_element = nil if @current_element == :field          
    when 'subfield'
      @record[:field].append(@record[:subfield])
      @record[:subfield] = nil
      @current_element = nil if @current_element == :subfield
    end
  end
end

#initObject

Sets our instance variables for SAX parsing in Nokogiri and parser



46
47
48
49
50
51
# File 'lib/marc/xml_parsers.rb', line 46

def init
  @record = {:record=>nil,:field=>nil,:subfield=>nil}
  @current_element = nil
  @ns = "http://www.loc.gov/MARC21/slim"
  @parser = Nokogiri::XML::SAX::Parser.new(self)         
end

#start_element_namespace(name, attributes = [], prefix = nil, uri = nil, ns = {}) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/marc/xml_parsers.rb', line 65

def start_element_namespace name, attributes = [], prefix = nil, uri = nil, ns = {}
  attributes = attributes_to_hash(attributes)
  if uri == @ns
    case name.downcase
    when 'record' then @record[:record] = MARC::Record.new
    when 'leader' then @current_element = :leader
    when 'controlfield'
      @current_element=:field
      @record[:field] = MARC::ControlField.new(attributes["tag"])
    when 'datafield'
      @record[:field] = MARC::DataField.new(attributes["tag"], attributes['ind1'], attributes['ind2'])
    when 'subfield'
      @current_element=:subfield
      @record[:subfield] = MARC::Subfield.new(attributes['code'])
    end
  end
end

#yield_recordObject

Returns our MARC::Record object to the #each block.



60
61
62
63
# File 'lib/marc/xml_parsers.rb', line 60

def yield_record
  @block.call(@record[:record])       
  @record[:record] = nil
end