Class: HtmlFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/raw/util/html_filter.rb

Constant Summary collapse

DEFAULT =

default settings

{
  'allowed' => {
    'a'   => ['href', 'target'],
    'b'   => [],
    'i'   => [],
    'ul'  => [],
    'ol'  => [],
    'li'  => [],
    'img' => ['src', 'width', 'height', 'alt'],
    'object' => ['width', 'height'],
    'param' => ['name', 'value'],
    'embed' => ['src', 'type', 'wmode', 'name', 'value'],
  },
  'no_close' => ['img', 'br', 'hr'],
  'always_close' => ['a', 'b'],
  'protocol_attributes' => ['src', 'href'],
  'allowed_protocols' => ['http', 'ftp', 'mailto'],
  'remove_blanks' => ['a', 'b'],
  'strip_comments' => true,
  'always_make_tags' => true,
  'allow_numbered_entities' => true,
  'allowed_entities' => ['amp', 'gt', 'lt', 'quot']
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = nil) ⇒ HtmlFilter

new html filter



99
100
101
102
103
# File 'lib/raw/util/html_filter.rb', line 99

def initialize( options=nil )
  @tag_counts = {}

  (options || DEFAULT).each{ |k,v| send("#{k}=",v) }
end

Instance Attribute Details

#allow_numbered_entitiesObject

entity control option (true, false)



65
66
67
# File 'lib/raw/util/html_filter.rb', line 65

def allow_numbered_entities
  @allow_numbered_entities
end

#allowedObject

tags and attributes that are allowed

Eg.

{
  'a' => ['href', 'target'],
  'b' => [],
  'img' => ['src', 'width', 'height', 'alt']
}


38
39
40
# File 'lib/raw/util/html_filter.rb', line 38

def allowed
  @allowed
end

#allowed_entitiesObject

entity control option (amp, gt, lt, quot, etc.)



68
69
70
# File 'lib/raw/util/html_filter.rb', line 68

def allowed_entities
  @allowed_entities
end

#allowed_protocolsObject

protocols which are allowed (http, ftp, mailto)



52
53
54
# File 'lib/raw/util/html_filter.rb', line 52

def allowed_protocols
  @allowed_protocols
end

#always_closeObject

tags which must always have seperate opening and closing tags (e.g. “”)



45
46
47
# File 'lib/raw/util/html_filter.rb', line 45

def always_close
  @always_close
end

#always_make_tagsObject

should we try and make a b tag out of “b>” (true, false)



62
63
64
# File 'lib/raw/util/html_filter.rb', line 62

def always_make_tags
  @always_make_tags
end

#no_closeObject

tags which should always be self-closing (e.g. “<img />”)



41
42
43
# File 'lib/raw/util/html_filter.rb', line 41

def no_close
  @no_close
end

#protocol_attributesObject

attributes which should be checked for valid protocols (src,href)



49
50
51
# File 'lib/raw/util/html_filter.rb', line 49

def protocol_attributes
  @protocol_attributes
end

#remove_blanksObject

tags which should be removed if they contain no content (e.g. “” or “<b />”)



56
57
58
# File 'lib/raw/util/html_filter.rb', line 56

def remove_blanks
  @remove_blanks
end

#strip_commentsObject

should we remove comments? (true, false)



59
60
61
# File 'lib/raw/util/html_filter.rb', line 59

def strip_comments
  @strip_comments
end

Instance Method Details

#filter(data) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
# File 'lib/raw/util/html_filter.rb', line 109

def filter(data)
  tag_counts = []

  data = escape_comments(data)
  data = balance_html(data)
  data = check_tags(data)
  data = process_remove_blanks(data)
  data = validate_entities(data)

  return data
end