Class: ContentDm::Harvester

Inherits:
Object
  • Object
show all
Extended by:
URI
Defined in:
lib/contentdm/harvester.rb

Constant Summary collapse

OAI_PAGE_SIZE =
1000

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from URI

normalize

Constructor Details

#initialize(base_uri) ⇒ Harvester

The constructor must be passed the URL of a CONTENTdm installation. This will usually be the root of the server on which CONTENTdm is installed.



19
20
21
22
# File 'lib/contentdm/harvester.rb', line 19

def initialize(base_uri)
  @base_uri = self.class.normalize(base_uri)
  @page_size = 1000
end

Instance Attribute Details

#base_uriObject (readonly)

Returns the value of attribute base_uri.



14
15
16
# File 'lib/contentdm/harvester.rb', line 14

def base_uri
  @base_uri
end

#page_sizeObject

Returns the value of attribute page_size.



15
16
17
# File 'lib/contentdm/harvester.rb', line 15

def page_size
  @page_size
end

Class Method Details

.get_record(url) ⇒ Object

Convenience method which returns a single Record when passed a URL in one of two forms:

  • A CONTENTdm URL containing CISOROOT/CISOPTR values for the desired item

  • A CONTENTdm canonical URL in the form

    http://path/to/contentdm/u?[collection],[ptr]
    

    where [collection] is the CONTENTdm collection name, and [ptr] is the sequential item ID within the collection.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/contentdm/harvester.rb', line 31

def self.get_record(url)
  base_uri = self.normalize(url)
  params = {}
  if args = url.match(/^(.+\/)u\/?\?\/(.+),(\d+)$/)
    params[:base_url] = args[1]
    params[:collection] = args[2]
    params[:id] = args[3]
  else
    args = base_uri.query.split(/&/).inject({}) { |hash,arg|
      (k,v) = arg.split(/\=/,2)
      hash[k] = ::URI.decode(v)
      hash
    }
    params[:base_url] = base_uri.merge('..')
    params[:collection] = args['CISOROOT'][1..-1]
    params[:id] = args['CISOPTR']
  end
  harvester = Harvester.new(params[:base_url])
  harvester.get_record(params[:collection],params[:id])
end

Instance Method Details

#collectionsObject

Return a hash of collection IDs and collection names



53
54
55
56
57
58
59
60
61
62
63
# File 'lib/contentdm/harvester.rb', line 53

def collections
  response = Nokogiri::XML(open(@base_uri.merge('cgi-bin/oai.exe?verb=ListSets')))
  sets = response.search('//xmlns:set',response.namespaces)
  result = {}
  sets.inject({}) { |hash,set| 
    set_id = (set / 'setSpec').text()
    set_desc = (set / 'setName').text()
    hash[set_id] = set_desc
    hash
  }
end

#get_record(collection, id) ⇒ Object

Return a single Record given its collection ID and ordinal position within the collection



67
68
69
70
71
72
# File 'lib/contentdm/harvester.rb', line 67

def get_record(collection, id)
  oai_id = "oai:%s:%s/%d" % [@base_uri.host, collection, id]
  response = get_response({ :verb => 'GetRecord', :identifier => oai_id, :metadataPrefix => 'qdc' })
  record = parse_records(response).first
  Record.new(record, { :base_uri => @base_uri, :collection => collection })
end

#get_records(collection, opts = {}) ⇒ Object

Return an array of all the Records in a given collection



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/contentdm/harvester.rb', line 75

def get_records(collection, opts = {})
  max = opts[:max].to_i
  token = "#{collection}:#{opts[:from].to_s}:#{opts[:until].to_s}:qdc:#{opts[:first].to_i || 0}"
  result = []
  until token.nil? or ((max > 0) and (result.length >= max))
    args = { :verb => 'ListRecords', :resumptionToken => token.to_s }
    response = get_response(args)
    token = response.search('/xmlns:OAI-PMH/xmlns:ListRecords/xmlns:resumptionToken/text()', response.namespaces).first
    result += parse_records(response)
  end
  if result.length > max
    result = result[0..max-1]
  end
  result.collect { |record|
    Record.new(record, { :base_uri => @base_uri, :collection => collection })
  }
end