Module: URI::REGEXP::PATTERN

Defined in:
lib/uri/common.rb

Overview

Patterns used to parse URI's

Constant Summary collapse

ALPHA =

alpha = lowalpha | upalpha

"a-zA-Z"
ALNUM =

alphanum = alpha | digit

"#{ALPHA}\\d"
HEX =

hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |

"a" | "b" | "c" | "d" | "e" | "f"
"a-fA-F\\d"
ESCAPED =

escaped = "%" hex hex

"%[#{HEX}]{2}"
UNRESERVED =

mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |

"(" | ")"

unreserved = alphanum | mark

"-_.!~*'()#{ALNUM}"
RESERVED =

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

"$" | ","

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

"$" | "," | "[" | "]" (RFC 2732)
";/?:@&=+$,\\[\\]"
URIC =

uric = reserved | unreserved | escaped

"(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})"
URIC_NO_SLASH =

uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |

"&" | "=" | "+" | "$" | ","
"(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})"
QUERY =

query = *uric

"#{URIC}*"
FRAGMENT =

fragment = *uric

"#{URIC}*"
DOMLABEL =

domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum

"(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
TOPLABEL =

toplabel = alpha | alpha *( alphanum | "-" ) alphanum

"(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
HOSTNAME =

hostname = *( domainlabel "." ) toplabel [ "." ]

"(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
IPV4ADDR =

RFC 2373, APPENDIX B: IPv6address = hexpart [ ":" IPv4address ] IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] hexseq = hex4 *( ":" hex4) hex4 = 1*4HEXDIG

XXX: This definition has a flaw. "::" + IPv4address must be allowed too. Here is a replacement.

IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT

"\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
HEX4 =

hex4 = 1*4HEXDIG

"[#{HEX}]{1,4}"
LASTPART =

lastpart = hex4 | IPv4address

"(?:#{HEX4}|#{IPV4ADDR})"
HEXSEQ1 =

hexseq1 = *( hex4 ":" ) hex4

"(?:#{HEX4}:)*#{HEX4}"
HEXSEQ2 =

hexseq2 = *( hex4 ":" ) lastpart

"(?:#{HEX4}:)*#{LASTPART}"
IPV6ADDR =

IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]

"(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"
IPV6REF =

ipv6reference = "[" IPv6address "]" (RFC 2732)

"\\[#{IPV6ADDR}\\]"
HOST =

host = hostname | IPv4address host = hostname | IPv4address | IPv6reference (RFC 2732)

"(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})"
PORT =

port = *digit

'\d*'
HOSTPORT =

hostport = host [ ":" port ]

"#{HOST}(?::#{PORT})?"
USERINFO =

userinfo = *( unreserved | escaped |

";" | ":" | "&" | "=" | "+" | "$" | "," )
"(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*"
PCHAR =

pchar = unreserved | escaped |

":" | "@" | "&" | "=" | "+" | "$" | ","
"(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})"
PARAM =

param = *pchar

"#{PCHAR}*"
SEGMENT =

segment = *pchar *( ";" param )

"#{PCHAR}*(?:;#{PARAM})*"
PATH_SEGMENTS =

path_segments = segment *( "/" segment )

"#{SEGMENT}(?:/#{SEGMENT})*"
SERVER =

server = [ [ userinfo "@" ] hostport ]

"(?:#{USERINFO}@)?#{HOSTPORT}"
REG_NAME =

reg_name = 1*( unreserved | escaped | "$" | "," |

";" | ":" | "@" | "&" | "=" | "+" )
"(?:[#{UNRESERVED}$,;:@&=+]|#{ESCAPED})+"
AUTHORITY =

authority = server | reg_name

"(?:#{SERVER}|#{REG_NAME})"
REL_SEGMENT =

rel_segment = 1*( unreserved | escaped |

";" | "@" | "&" | "=" | "+" | "$" | "," )
"(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+"
SCHEME =

scheme = alpha *( alpha | digit | "+" | "-" | "." )

"[#{ALPHA}][-+.#{ALPHA}\\d]*"
ABS_PATH =

abs_path = "/" path_segments

"/#{PATH_SEGMENTS}"
REL_PATH =

rel_path = rel_segment [ abs_path ]

"#{REL_SEGMENT}(?:#{ABS_PATH})?"
NET_PATH =

net_path = "//" authority [ abs_path ]

"//#{AUTHORITY}(?:#{ABS_PATH})?"
HIER_PART =

hier_part = ( net_path | abs_path ) [ "?" query ]

"(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"
OPAQUE_PART =

opaque_part = uric_no_slash *uric

"#{URIC_NO_SLASH}#{URIC}*"
ABS_URI =

absoluteURI = scheme ":" ( hier_part | opaque_part )

"#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})"
REL_URI =

relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]

"(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?"
URI_REF =

URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

"(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?"
X_ABS_URI =

XXX:

"
  (#{PATTERN::SCHEME}):                     (?# 1: scheme)
  (?:
     (#{PATTERN::OPAQUE_PART})              (?# 2: opaque)
  |
     (?:(?:
       //(?:
           (?:(?:(#{PATTERN::USERINFO})@)?  (?# 3: userinfo)
             (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
         |
           (#{PATTERN::REG_NAME})           (?# 6: registry)
         )
       |
       (?!//))                              (?# XXX: '//' is the mark for hostport)
       (#{PATTERN::ABS_PATH})?              (?# 7: path)
     )(?:\\?(#{PATTERN::QUERY}))?           (?# 8: query)
  )
  (?:\\#(#{PATTERN::FRAGMENT}))?            (?# 9: fragment)
"
X_REL_URI =
"
  (?:
    (?:
      //
      (?:
        (?:(#{PATTERN::USERINFO})@)?       (?# 1: userinfo)
          (#{PATTERN::HOST})?(?::(\\d*))?  (?# 2: host, 3: port)
      |
        (#{PATTERN::REG_NAME})             (?# 4: registry)
      )
    )
  |
    (#{PATTERN::REL_SEGMENT})              (?# 5: rel_segment)
  )?
  (#{PATTERN::ABS_PATH})?                  (?# 6: abs_path)
  (?:\\?(#{PATTERN::QUERY}))?              (?# 7: query)
  (?:\\#(#{PATTERN::FRAGMENT}))?           (?# 8: fragment)
"