Module: Gammo::Tokenizer::Escape

Included in:
Gammo::Tokenizer, EscapedToken
Defined in:
lib/gammo/tokenizer/escape.rb

Constant Summary collapse

LONGEST_ENTITY_WITHOUT_SEMICOLON =
6
ESCAPE_REPLACEMENT_TABLE =
{
  ?&  => '&',
  ?'  => ''',
  ?<  => '&lt;',
  ?>  => '&gt;',
  ?"  => '&#34;',
  ?\r => '&#13;',
}.freeze
REPLACEMENT_TABLE =
[
  "\u{20AC}",
  "\u{0081}",
  "\u{201A}",
  "\u{0192}",
  "\u{201E}",
  "\u{2026}",
  "\u{2020}",
  "\u{2021}",
  "\u{02C6}",
  "\u{2030}",
  "\u{0160}",
  "\u{2039}",
  "\u{0152}",
  "\u{008D}",
  "\u{017D}",
  "\u{008F}",
  "\u{0090}",
  "\u{2018}",
  "\u{2019}",
  "\u{201C}",
  "\u{201D}",
  "\u{2022}",
  "\u{2013}",
  "\u{2014}",
  "\u{02DC}",
  "\u{2122}",
  "\u{0161}",
  "\u{203A}",
  "\u{0153}",
  "\u{009D}",
  "\u{017E}",
  "\u{0178}",
].freeze

Instance Method Summary collapse

Instance Method Details

#escape(s) ⇒ Object

Escapes given string according to ESCAPE_REPLACEMENT_TABLE.



52
53
54
# File 'lib/gammo/tokenizer/escape.rb', line 52

def escape(s)
  s.gsub!(/[&'<>"\r]/) { |ch| ESCAPE_REPLACEMENT_TABLE[ch] }
end

#unescape(data, **options) ⇒ String?

Unescapes given data.

Parameters:

  • data (String)

Returns:

  • (String, nil)


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/gammo/tokenizer/escape.rb', line 59

def unescape(data, **options)
  return unless data
  data.each_byte.with_index do |byte, i|
    next unless byte.chr == ?&
    dst, src = unescape_entity(data, i, i, **options)
    while src < data.bytes.length
      byte = data.getbyte(src)
      if byte.chr == ?&
        dst, src = unescape_entity(data, dst, src, **options)
      else
        data.setbyte(dst, byte)
        dst, src = dst + 1, src + 1
      end
    end
    return data.byteslice(0, dst)
  end
end