Class: RDF::Util::File::RemoteDocument

Inherits:
StringIO
  • Object
show all
Defined in:
lib/rdf/util/file.rb

Overview

A RemoteDocument contains the body and headers of a remote resource.

Link headers are parsed using the ‘LinkHeader` gem

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(body, options = {}) ⇒ RemoteDocument

Set content

Since:

  • 0.2.4



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/rdf/util/file.rb', line 397

def initialize(body, options = {})
  options.each do |key, value|
    # de-quote charset
    matchdata = value.match(/^["'](.*)["']$/.freeze) if key == "charset"
    value = matchdata[1] if matchdata
    value = value.downcase if value.is_a?(String)
    instance_variable_set(:"@#{key}", value)
  end
  @headers = options.fetch(:headers, {})
  @charset = options[:charset].to_s.downcase if options[:charset]

  # Find Content-Type
  if headers[:content_type]
    ct, *params = headers[:content_type].split(';').map(&:strip)
    @content_type ||= ct

    # Find charset
    params.each do |param|
      p, v = param.split('=')
      next unless p.downcase == 'charset'
      @charset ||= v.sub(/^["']?(.*)["']?$/, '\1').downcase
    end
  end

  @etag = headers[:etag]
  @last_modified = DateTime.parse(headers[:last_modified]) if headers[:last_modified]
  encoding = @charset ||= "utf-8"

  unless encoding.start_with?("utf")
    body.force_encoding(Encoding::UTF_8)
    encoding = "utf-8"

    # Make sure Unicode is in NFC
    begin
      body.unicode_normalize! unless !body.unicode_normalized?
    rescue Encoding::CompatibilityError
      # Oh, well ...
    end if body.respond_to?(:unicode_normalized?)
  end

  super(body, "r:#{encoding}")
end

Instance Attribute Details

#base_uriString (readonly)

Base URI based on resource location or returned Location header.

Since:

  • 0.2.4



363
364
365
# File 'lib/rdf/util/file.rb', line 363

def base_uri
  @base_uri
end

#charsetString (readonly)

Encoding of resource (from Content-Type), downcased. Also applied to content if it is UTF

Since:

  • 0.2.4



371
372
373
# File 'lib/rdf/util/file.rb', line 371

def charset
  @charset
end

#codeInteger (readonly)

Response code

Since:

  • 0.2.4



375
376
377
# File 'lib/rdf/util/file.rb', line 375

def code
  @code
end

#content_typeString (readonly)

Content-Type of the returned resource

Since:

  • 0.2.4



367
368
369
# File 'lib/rdf/util/file.rb', line 367

def content_type
  @content_type
end

#etagString (readonly)

ETag from headers

Since:

  • 0.2.4



380
381
382
# File 'lib/rdf/util/file.rb', line 380

def etag
  @etag
end

#headersHash{Symbol => Object} (readonly)

Raw headers from response

Since:

  • 0.2.4



388
389
390
# File 'lib/rdf/util/file.rb', line 388

def headers
  @headers
end

#last_modifiedDateTime (readonly)

Last-Modified time from headers

Since:

  • 0.2.4



384
385
386
# File 'lib/rdf/util/file.rb', line 384

def last_modified
  @last_modified
end

#requested_urlString (readonly)

Originally requested URL

Since:

  • 0.2.4



392
393
394
# File 'lib/rdf/util/file.rb', line 392

def requested_url
  @requested_url
end

Instance Method Details

#content_encodingArray<String>

Returns a list of encodings in Content-Encoding field as an array of strings.

The encodings are downcased for canonicalization.

Since:

  • 0.2.4



445
446
447
# File 'lib/rdf/util/file.rb', line 445

def content_encoding
  headers.fetch(:content_encoding, "").split(',').map(&:strip).map(&:downcase)
end

Return links from the Link header.

Links can be returned in array form, or searched.

Examples:


d = RemoteDocument.new(...)
describedby = links.find_link(['rel', 'describedby']).href

Since:

  • 0.2.4



460
461
462
# File 'lib/rdf/util/file.rb', line 460

def links
  @links ||= LinkHeader.parse(@headers[:link])
end