Class: Owasp::Esapi::Codec::XmlCodec

Inherits:
BaseCodec
  • Object
show all
Defined in:
lib/codec/xml_codec.rb

Constant Summary collapse

ENTITY_MAP =

Entity maps

{
  "lt" => "<",
  "gt" => ">",
  "amp" => "&",
  "apos" => "\'",
  "quot" => "\""
}

Constants inherited from BaseCodec

BaseCodec::END_CODE_POINT, BaseCodec::START_CODE_POINT

Instance Method Summary collapse

Methods inherited from BaseCodec

#decode, #encode, #hex, #min

Constructor Details

#initializeXmlCodec

Returns a new instance of XmlCodec.



27
28
29
30
31
32
33
34
35
36
# File 'lib/codec/xml_codec.rb', line 27

def initialize
  @longest_key = 0
  @lookup_map = {}
  ENTITY_MAP.each_key do |k|
    if k.size > @longest_key
      @longest_key += 1
    end
    @lookup_map[k.downcase] = k
  end
end

Instance Method Details

#decode_char(input) ⇒ Object

Returns the decoded version of the character starting at index, or nil if no decoding is possible.



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/codec/xml_codec.rb', line 47

def decode_char(input)
  input.mark
  result = nil
  # check first
  first = input.next
  return nil if first.nil?
  return first unless first == "&"
  # check second
  second = input.next
  if second == "#"
    result = numeric_entity(input)
  elsif second =~ /[a-zA-Z]/
    input.push(second)
    result = named_entity(input)
  else
    input.push(second)
    return nil
  end

  if result.nil?
    input.reset
  end
  result
end

#encode_char(immune, input) ⇒ Object

Encodes a Character using XML entities as necessary.



39
40
41
42
43
# File 'lib/codec/xml_codec.rb', line 39

def encode_char(immune,input)
  return input if immune.include?(input)
  return input if input =~ /[a-zA-Z0-9\\t ]/
  return "&#x#{hex(input)};"
end

#named_entity(input) ⇒ Object

extract the named entity fromt he input we convert the entity to the real character i.e. &amp; becoems &



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/codec/xml_codec.rb', line 134

def named_entity(input) #:nodoc:
  possible = ''
  len = min(input.remainder.size,@longest_key+1)
  found_key = false
  last_possible = ''
  for i in 0..len do
    possible << input.next if input.next?
    # we have to find the longest match
    # so we dont find sub values
    if @lookup_map[possible.downcase]
      last_possible = @lookup_map[possible.downcase]
    end
  end
  # no matches found return
  return nil if last_possible.empty?
  return nil unless possible.include?(";")
  # reset the input and plow through
  input.reset
  for i in 0..last_possible.size
    input.next if input.next?
  end
  possible = ENTITY_MAP[last_possible]
  input.next # consume the ;
  return possible unless possible.empty?
  return nil
end

#numeric_entity(input) ⇒ Object

:nodoc:



72
73
74
75
76
77
78
79
80
# File 'lib/codec/xml_codec.rb', line 72

def numeric_entity(input) #:nodoc:
  first = input.peek
  return nil if first.nil?
  if first.downcase.eql?("x")
    input.next
    return parse_hex(input)
  end
  return parse_number(input)
end

#parse_hex(input) ⇒ Object

parse the hex value back to its decimal value



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/codec/xml_codec.rb', line 83

def parse_hex(input) #:nodoc:
  result = ''
  while input.next?
    c = input.peek
    if "0123456789ABCDEFabcdef".include?(c)
      result << c
      input.next
    elsif c == ";"
      input.next
      break
    else
      return nil
    end
  end
  begin
    i = result.hex
    return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
  rescue Exception => e
  end
  nil
end

#parse_number(input) ⇒ Object

parse a number out of the encoded value



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/codec/xml_codec.rb', line 106

def parse_number(input) #:nodoc:
  result = ''
  missing_semi = true
  while input.next?
    c = input.peek
    if c =~ /\d/
      result << c
      input.next
    elsif c == ';'
      input.next
      break;
    elsif not c =~ /\d/
      return nil
    else
      break;
    end
  end

  begin
    i = result.to_i
    return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
  rescue Exception => e
  end
  nil
end