Class: AntiSamy::Scanner

Inherits:
Object
  • Object
show all
Defined in:
lib/antisamy/html/scanner.rb

Constant Summary collapse

DEFAULT_ENCODE =
"UTF-8"
ALLOW_EMPTY =
%w[br hr a img link iframe script object applet frame base param meta input textarea embed basefont col]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(policy) ⇒ Scanner

Create a scanner with a given policy



19
20
21
22
# File 'lib/antisamy/html/scanner.rb', line 19

def initialize(policy)
  @policy = policy
  @errors = []
end

Instance Attribute Details

#errorsObject

Returns the value of attribute errors.



3
4
5
# File 'lib/antisamy/html/scanner.rb', line 3

def errors
  @errors
end

#nofollowObject

Returns the value of attribute nofollow.



3
4
5
# File 'lib/antisamy/html/scanner.rb', line 3

def nofollow
  @nofollow
end

#paeObject

Returns the value of attribute pae.



3
4
5
# File 'lib/antisamy/html/scanner.rb', line 3

def pae
  @pae
end

#policyObject

Returns the value of attribute policy.



3
4
5
# File 'lib/antisamy/html/scanner.rb', line 3

def policy
  @policy
end

Instance Method Details

#scan(input, input_encode, output_encoder) ⇒ Object

Scan the input using the provided input and output encoding will raise an error if nil input or the maximum input size is exceeded

Raises:

  • (ArgumentError)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/antisamy/html/scanner.rb', line 26

def scan(input, input_encode, output_encoder)
  raise ArgumentError if input.nil?
  raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
	  fragment = true
	  if input =~ /\<\s?html\s?.*?\>|DOCTYPE/im
		fragment = false
	  end
  # check poilcy stuff
  handler = Handler.new(@policy,output_encoder,fragment)
  scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule,fragment)
  parser = Nokogiri::HTML::SAX::Parser.new(scanner,input_encode)
  #parser.parse(input)
  parser.parse(input) do |ctx|
    ctx.replace_entities = true
  end
  results = ScanResults.new(Time.now)
  results.clean_html = handler.document
  results.messages = handler.errors
  results
end