Class: Deface::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/deface/parser.rb

Class Method Summary collapse

Class Method Details

.convert(source) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/deface/parser.rb', line 76

def self.convert(source)
  if source.encoding_aware?
    # Look for # encoding: *. If we find one, we'll encode the
    # String in that encoding, otherwise, we'll use the
    # default external encoding.
    encoding = source.scan(/#{ActionView::Template::Handlers::ERB.const_get(:ENCODING_TAG)}/).first.try(:last) || Encoding.default_external

    # Tag the source with the default external encoding
    # or the encoding specified in the file
    source.force_encoding(encoding)

    unless source.valid_encoding?
      raise ActionView::WrongEncodingError.new(source, encoding)
    end
  end

  erb_markup!(source)

  if source =~ /<html.*?(?:(?!>)[\s\S])*>/
    Nokogiri::HTML::Document.parse(source)
  elsif source =~ /<body.*?(?:(?!>)[\s\S])*>/
    Nokogiri::HTML::Document.parse(source).css('body').first
  else
    Nokogiri::HTML::DocumentFragment.parse(source)
  end
end

.erb_markup!(source) ⇒ Object

converts erb to markup



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/deface/parser.rb', line 8

def self.erb_markup!(source)

  #all opening html tags that contain <% %> blocks
  source.scan(/<\w+[^<>]+(?:<%.*?%>[^<>]*)+/m).each do |line|

    #regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes
    erb_attrs_regexs = [/([\w-]+)(\s?=\s?)(")([^"]*<%.*?%>[^"]*)/m,
      /([\w-]+)(\s?=\s?)(')([^']*<%.*?%>[^']*)'/m,
      /([\w-]+)(\s?=\s?)()(<%.*?%>)(?:\s|>|\z)/m]

    replace_line = erb_attrs_regexs.inject(line.clone) do |replace_line, regex|

      replace_line = line.scan(regex).inject(replace_line) do |replace_line, match|
        replace_line.sub("#{match[0]}#{match[1]}#{match[2]}#{match[3]}#{match[2]}") { |m| m = " data-erb-#{match[0]}=\"#{CGI.escapeHTML(match[3])}\"" }
      end

      replace_line
    end


    i = -1
    #catch all <% %> inside tags id <p <%= test %>> , not inside attrs
    replace_line.scan(/(<%.*?%>)/m).each do |match|
      replace_line.sub!(match[0]) { |m| m = " data-erb-#{i += 1}=\"#{CGI.escapeHTML(match[0])}\"" }
    end

    source.sub!(line) { |m| m = replace_line }
  end

  #replaces all <% %> not inside opening html tags
  replacements = [ {"<%=" => "<code erb-loud>"},
                   {"<%"  => "<code erb-silent>"},
                   {"%>"  => "</code>"} ]

  replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

  source.scan(/(<code.*?>)((?:(?!<\/code>)[\s\S])*)(<\/code>)/).each do |match|
    source.sub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}" }
  end

  source
end

.undo_erb_markup!(source) ⇒ Object

undoes ERB markup generated by Deface::Parser::ERB



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/deface/parser.rb', line 53

def self.undo_erb_markup!(source)
  replacements = [ {"<code erb-silent>" => '<%'},
                   {"<code erb-loud>"   => '<%='},
                   {"</code>"           => '%>'}]

  replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

  source.scan(/data-erb-(\d+)+=(['"])(.*?)\2/m).each do |match|
    source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| m = CGI.unescapeHTML(match[2]) }
  end

  source.scan(/data-erb-([\w-]+)+=(["'])(.*?)\2/m).each do |match|
    source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| "#{match[0]}=#{match[1]}#{CGI.unescapeHTML(match[2])}#{match[1]}" }
  end

  #un-escape changes from Nokogiri and erb-markup!
  source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match|
    source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ CGI.unescapeHTML match[1] }#{match[2]}" }
  end

  source
end