Method: Addressable::URI.normalize_component
- Defined in:
- lib/addressable/uri.rb
.normalize_component(component, character_class = CharacterClassesRegexps::RESERVED_AND_UNRESERVED, leave_encoded = '') ⇒ String
Normalizes the encoding of a URI component.
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 |
# File 'lib/addressable/uri.rb', line 552 def self.normalize_component(component, character_class= CharacterClassesRegexps::RESERVED_AND_UNRESERVED, leave_encoded='') return nil if component.nil? begin component = component.to_str rescue NoMethodError, TypeError raise TypeError, "Can't convert #{component.class} into String." end if !component.is_a? String if ![String, Regexp].include?(character_class.class) raise TypeError, "Expected String or Regexp, got #{character_class.inspect}" end if character_class.kind_of?(String) leave_re = if leave_encoded.length > 0 character_class = "#{character_class}%" unless character_class.include?('%') bytes = leave_encoded.bytes leave_encoded_pattern = bytes.map { |b| SEQUENCE_ENCODING_TABLE[b] }.join('|') "|%(?!#{leave_encoded_pattern}|#{leave_encoded_pattern.upcase})" end character_class = if leave_re /[^#{character_class}]#{leave_re}/ else /[^#{character_class}]/ end end # We can't perform regexps on invalid UTF sequences, but # here we need to, so switch to ASCII. component = component.dup component.force_encoding(Encoding::ASCII_8BIT) unencoded = self.unencode_component(component, String, leave_encoded) begin encoded = self.encode_component( unencoded.unicode_normalize(:nfc), character_class, leave_encoded ) rescue ArgumentError encoded = self.encode_component(unencoded) end encoded.force_encoding(Encoding::UTF_8) return encoded end |