Class: XlsHtmlCleaner

Inherits:
Object
  • Object
show all
Defined in:
lib/xls_html_cleaner.rb

Constant Summary collapse

VERSION =
'0.0.2'
ALLOW_TAGS =
%w( html head title body 
table thead tbody tfoot tr th td col colgroup )

Instance Method Summary collapse

Constructor Details

#initializeXlsHtmlCleaner

Returns a new instance of XlsHtmlCleaner.



10
11
12
# File 'lib/xls_html_cleaner.rb', line 10

def initialize
  @parser = nil
end

Instance Method Details

#clean!(str) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/xls_html_cleaner.rb', line 14

def clean!( str )
  @parser = Hpricot( str )
  @parser.traverse_all_element { |e|
    if ( e.elem? and !ALLOW_TAGS.include?( e.name.downcase ) )
      e.swap( e.inner_html.size > 0 ? e.inner_html : "\n" )
    end
    if ( e.comment? )
      e.swap( "\n" )
    end
    if ( e.elem? )
      e.attributes.to_hash.each_key { |a|
        e.remove_attribute( a )
      }
    end
  }.to_s
end