Class: Deface::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/deface/parser.rb

Class Method Summary collapse

Class Method Details

.convert(source) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/deface/parser.rb', line 85

def self.convert(source)
  # Look for # encoding: *. If we find one, we'll encode the
  # String in that encoding, otherwise, we'll use the
  # default external encoding.
  encoding = source.scan(/#{ActionView::Template::Handlers::ERB.const_get(:ENCODING_TAG)}/).first.try(:last) || Encoding.default_external

  # Tag the source with the default external encoding
  # or the encoding specified in the file
  if source.frozen?
    source = source.dup.force_encoding(encoding)
  else
    source.force_encoding(encoding)
  end

  unless source.valid_encoding?
    raise ActionView::WrongEncodingError.new(source, encoding)
  end

  erb_markup!(source)

  if source =~ /<html.*?(?:(?!>)[\s\S])*>/
    Nokogiri::HTML::Document.parse(source)
  elsif source =~ /<body.*?(?:(?!>)[\s\S])*>/
    Nokogiri::HTML::Document.parse(source).css('body').first
  else
    Nokogiri::HTML::DocumentFragment.parse(source)
  end
end

.erb_markup!(source) ⇒ Object

converts erb to markup



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/deface/parser.rb', line 8

def self.erb_markup!(source)

  #all opening html tags that contain <% %> blocks
  source.scan(/<\w+[^<>]+(?:<%.*?%>[^<>]*)+/m).each do |line|

    #regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes
    erb_attrs_regexs = [/([\w-]+)(\s?=\s?)(")([^"]*<%.*?%>[^"]*)/m,
      /([\w-]+)(\s?=\s?)(')([^']*<%.*?%>[^']*)'/m,
      /([\w-]+)(\s?=\s?)()(<%.*?%>)(?:\s|>|\z)/m]

    replace_line = erb_attrs_regexs.inject(line.clone) do |replace_line, regex|

      replace_line = line.scan(regex).inject(replace_line) do |replace_line, match|
        replace_line.sub("#{match[0]}#{match[1]}#{match[2]}#{match[3]}#{match[2]}") { |m| m = " data-erb-#{match[0]}=\"#{CGI.escapeHTML(match[3])}\"" }
      end

      replace_line
    end

    i = -1
    #catch all <% %> inside tags id <p <%= test %>> , not inside attrs
    replace_line.scan(/(<%.*?%>)/m).each do |match|
      replace_line.sub!(match[0]) { |m| m = " data-erb-#{i += 1}=\"#{CGI.escapeHTML(match[0])}\"" }
    end

    source.sub!(line) { |m| m = replace_line }
  end

  #replaces all <% %> not inside opening html tags
  replacements = [ {"<%=" => "<erb loud>"},
                   {"<%"  => "<erb silent>"},
                   {"%>"  => "</erb>"} ]

  replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

  source.scan(/(<erb.*?>)((?:(?!<\/erb>)[\s\S])*)(<\/erb>)/).each do |match|
    source.sub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}" }
  end

  source
end

.undo_erb_markup!(source) ⇒ Object

undoes ERB markup generated by Deface::Parser::ERB



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/deface/parser.rb', line 52

def self.undo_erb_markup!(source)
  replacements = [ {"<erb silent>"      => '<%'},
                   {"<erb silent=\"\">" => '<%'},
                   {"<erb loud>"        => '<%='},
                   {"<erb loud=\"\">"   => '<%='},
                   {"</erb>"                => '%>'}]

  replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

  source.scan(/data-erb-(\d+)+=(['"])(.*?)\2/m).each do |match|
    source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| m = CGI.unescapeHTML(match[2]) }
  end

  source.scan(/data-erb-([\w-]+)+=(["'])(.*?)\2/m).each do |match|
    source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| "#{match[0]}=#{match[1]}#{CGI.unescapeHTML(match[2])}#{match[1]}" }
  end

  #un-escape changes from Nokogiri and erb-markup!
  source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match|
    source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ CGI.unescapeHTML match[1] }#{match[2]}" }
  end

  if RUBY_PLATFORM == 'java'
    #un-escapes changes from Nokogiri under Java, where " are converted to %22 when in an attribute of an element
    #
    source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match|
      source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ match[1].gsub('%22', '"') }#{match[2]}" }
    end
  end

  source
end