Module: URI

Extended by:
Escape
Includes:
REGEXP
Included in:
Generic
Defined in:
lib/uri.rb,
lib/uri/ftp.rb,
lib/uri/http.rb,
lib/uri/ldap.rb,
lib/uri/https.rb,
lib/uri/mailto.rb,
lib/uri/common.rb,
lib/uri/generic.rb

Overview

uri/common.rb

Author

Akira Yamada <[email protected]>

Revision

$Id: common.rb 16982 2008-06-07 20:11:00Z shyouhei $

License

You can redistribute it and/or modify it under the same term as Ruby.

Defined Under Namespace

Modules: Escape, REGEXP, Util Classes: BadURIError, Error, FTP, Generic, HTTP, HTTPS, InvalidComponentError, InvalidURIError, LDAP, MailTo

Constant Summary collapse

VERSION_CODE =

:stopdoc:

'000911'.freeze
VERSION =
VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze
@@schemes =
{}

Constants included from REGEXP

REGEXP::ABS_PATH, REGEXP::ABS_URI, REGEXP::ABS_URI_REF, REGEXP::ESCAPED, REGEXP::FRAGMENT, REGEXP::HOST, REGEXP::OPAQUE, REGEXP::PORT, REGEXP::QUERY, REGEXP::REGISTRY, REGEXP::REL_PATH, REGEXP::REL_URI, REGEXP::REL_URI_REF, REGEXP::SCHEME, REGEXP::UNSAFE, REGEXP::URI_REF, REGEXP::USERINFO

Class Method Summary collapse

Methods included from Escape

escape, unescape

Class Method Details

.extract(str, schemes = nil, &block) ⇒ Object

Synopsis

URI::extract(str[, schemes][,&blk])

Args

str

String to extract URIs from.

schemes

Limit URI matching to a specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

require "uri"

URI.extract("text here http://foo.example.org/bla and here mailto:[email protected] and here also.")
# => ["http://foo.example.com/bla", "mailto:[email protected]"]


551
552
553
554
555
556
557
558
559
560
# File 'lib/uri/common.rb', line 551

def self.extract(str, schemes = nil, &block)
  if block_given?
    str.scan(regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(regexp(schemes)) { result.push $& }
    result
  end
end

.join(*str) ⇒ Object

Synopsis

URI::join(str[, str, ...])

Args

str

String(s) to work with

Description

Joins URIs.

Usage

require 'uri'

p URI.join("http://localhost/","main.rbx")
# => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>


519
520
521
522
523
524
525
# File 'lib/uri/common.rb', line 519

def self.join(*str)
  u = self.parse(str[0])
  str[1 .. -1].each do |x|
    u = u.merge(x)
  end
  u
end

.parse(uri) ⇒ Object

Synopsis

URI::parse(uri_str)

Args

uri_str

String with URI.

Description

Creates one of the URI's subclasses instance from the string.

Raises

URI::InvalidURIError

Raised if URI given is not a correct one.

Usage

require 'uri'

uri = URI.parse("http://www.ruby-lang.org/")
p uri
# => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
p uri.scheme 
# => "http" 
p uri.host 
# => "www.ruby-lang.org"


483
484
485
486
487
488
489
490
491
492
493
494
495
496
# File 'lib/uri/common.rb', line 483

def self.parse(uri)
  scheme, userinfo, host, port, 
    registry, path, opaque, query, fragment = self.split(uri)

  if scheme && @@schemes.include?(scheme.upcase)
    @@schemes[scheme.upcase].new(scheme, userinfo, host, port, 
                                 registry, path, opaque, query, 
                                 fragment)
  else
    Generic.new(scheme, userinfo, host, port, 
                registry, path, opaque, query, 
                fragment)
  end
end

.regexp(schemes = nil) ⇒ Object

Synopsis

URI::regexp([match_schemes])

Args

match_schemes

Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it's number.

Usage

require 'uri'

# extract first URI from html_string
html_string.slice(URI.regexp)

# remove ftp URIs
html_string.sub(URI.regexp(['ftp'])

# You should not rely on the number of parentheses
html_string.scan(URI.regexp) do |*matches|
  p $&
end


593
594
595
596
597
598
599
# File 'lib/uri/common.rb', line 593

def self.regexp(schemes = nil)
  unless schemes
    ABS_URI_REF
  else
    /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
  end
end

.split(uri) ⇒ Object

Synopsis

URI::split(uri)

Args

uri

String with URI.

Description

Splits the string on following parts and returns array with result:

* Scheme
* Userinfo
* Host
* Port
* Registry
* Path
* Opaque
* Query
* Fragment

Usage

require 'uri'

p URI.split("http://www.ruby-lang.org/")
# => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]


380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
# File 'lib/uri/common.rb', line 380

def self.split(uri)
  case uri
  when ''
    # null uri

  when ABS_URI
    scheme, opaque, userinfo, host, port, 
      registry, path, query, fragment = $~[1..-1]

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric

    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

    if !scheme
      raise InvalidURIError, 
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}" 
    end

  when REL_URI
    scheme = nil
    opaque = nil

    userinfo, host, port, registry, 
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end

  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme, 
    userinfo, host, port,         # X
    registry,                        # X
    path,                         # Y
    opaque,                        # Y
    query,
    fragment
  ]
  return ret
end