Class: CML::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/cml/parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content, opts = {}) ⇒ Parser

Returns a new instance of Parser.



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/cml/parser.rb', line 5

def initialize(content, opts = {})
  @opts = opts
  #Because nokogiri is munging my CDATA sections, we parse it out ahead of time
  @cdata = content.scan(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m)
  @doc = Parser.parse(content)
  @cftags = @doc.xpath("//cml:*[not(ancestor::cml:*)]")
  normalize if opts[:normalize]
  @tags = @cftags.map do |t|
    Parser.tag_class(t.name).new(t, @opts)
  end
end

Instance Attribute Details

#cftagsObject (readonly)

Returns the value of attribute cftags.



3
4
5
# File 'lib/cml/parser.rb', line 3

def cftags
  @cftags
end

#docObject (readonly)

Returns the value of attribute doc.



3
4
5
# File 'lib/cml/parser.rb', line 3

def doc
  @doc
end

#errorsObject (readonly)

Returns the value of attribute errors.



3
4
5
# File 'lib/cml/parser.rb', line 3

def errors
  @errors
end

#tagsObject (readonly)

Returns the value of attribute tags.



3
4
5
# File 'lib/cml/parser.rb', line 3

def tags
  @tags
end

Class Method Details

.escape(string) ⇒ Object



132
133
134
135
136
137
# File 'lib/cml/parser.rb', line 132

def self.escape( string )
  string.to_s.gsub( /&/, "&amp;" ).
         gsub( /</, "&lt;" ).
         gsub( />/, "&gt;" ).
         gsub( /"/, "&quot;" )
end

.parse(content) ⇒ Object



17
18
19
20
21
22
# File 'lib/cml/parser.rb', line 17

def self.parse(content)
  #This sucks, we remove scripts, styles, and close non self closed tags
  #We could potentially add CDATA clauses to them, but this is "easier"
  xhtml = content.gsub(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m, "\\1\\4").gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') #base, basefont, area, meta
  Nokogiri::XML("<root xmlns:cml=\"http://crowdflower.com\">#{xhtml}</root>")
end

.tag_class(name) ⇒ Object

This takes the name of the tag and converts it to the appropriate task



140
141
142
143
144
# File 'lib/cml/parser.rb', line 140

def self.tag_class(name)
  CML::Tags.module_eval(name.gsub(/(^|_)(.)/) { $2.upcase })
rescue
  CML::Tags::Unknown
end

Instance Method Details

#convert(opts = nil) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/cml/parser.rb', line 24

def convert(opts = nil)
  @opts.merge!(opts) if opts
  cloned = @doc.dup
  cloned.xpath("//cml:*[not(ancestor::cml:*)]").each_with_index do |t,i|
    t.replace(@tags[i].convert(opts))
  end
  cloned
end

#fieldsObject



45
46
47
48
49
50
51
52
# File 'lib/cml/parser.rb', line 45

def fields
  @fields = {}
  @tags.each do |g|
    agg = g.attrs["aggregation"]
    @fields[g.name] = agg.to_s if agg
  end
  @fields
end

#goldsObject



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/cml/parser.rb', line 54

def golds
  return @golds if @golds
  @golds = {}
  @tags.each do |g|
    gold = g.gold?
    next unless gold
    val = (gold.attributes["gold"] || gold.attributes["src"]).to_s
    val = "#{g.name}_gold" if val =~ /^(true|\s*)$/
    @golds[g.name] = val
    ["strict","regex"].each do |attrs|
      if a = gold.attributes[attrs]
        @golds["_#{val}_#{attrs}"] = attrs == "regex" ? [a.to_s, gold.attributes["flags"].to_s] : a.to_s
      end
    end
  end
  @golds
end

#normalizeObject



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/cml/parser.rb', line 33

def normalize
  @cftags.each do |t|
    if ["radios", "select"].include?(t.name)
      child = t.name == "radios" ? "radio" : "option"
      t.name = "checkboxes"
      t.xpath("cml:#{child}").each { |c| c.name = "checkbox" }
    elsif t.name == "meta"
      t.name = "text"
    end
  end
end

#to_cmlObject



108
109
110
111
112
113
114
115
116
117
# File 'lib/cml/parser.rb', line 108

def to_cml
  cml = @doc.to_xhtml.gsub(/<\/?root[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>')
  #Hack to ensure the next sub doesn't match...
  @cdata.each do |matches| 
    html.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m|
      "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>"
    end
  end
  cml
end

#to_html(opts = nil) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/cml/parser.rb', line 119

def to_html(opts = nil)
  #We convert the entire document and strip root tags / rando empty tags ALA libxml 2.6.32
  #We're also adding self closing tags
  html = convert(opts).to_xhtml.gsub(/<\/?root[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') 
  #Let's re-insert that CDATA, tricky because scripts will sometimes contain single quotes...
  @cdata.each do |matches| 
    html.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m|
      "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>"
    end
  end
  wrap(html)
end

#to_sObject



104
105
106
# File 'lib/cml/parser.rb', line 104

def to_s
  to_html
end

#valid?Boolean

Returns:

  • (Boolean)


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/cml/parser.rb', line 72

def valid?
  @errors = []
  if e = @doc.errors.select {|e| s.fatal? }.last
    @errors << "Malformed CML (#{e.level}).  #{e.message.chomp} on line #{e.line} column #{e.column}."
  end
  @tags.select {|t| t.validate? && t.name =~ /^\s*$/ }.each do |t|
    @errors << "#{t.to_s.split("\n")[0]} does not have a label or name specified."
  end
  dupes = @tags.select do |tag|
    tag.validate? && @tags.select {|t| t.name == tag.name}.length > 1
  end
  (dupes[1..-1] || []).each do |t|
    @errors << "#{t.to_s.split("\n")[0]} has a duplicated name, please specify a unique name attribute."
  end
  @tags.each do |t|
    next unless t.children
    dupes = t.children.select do |child|
      t.children.select {|c| c.value == child.value}.length > 1
    end
    (dupes[1..-1] || []).each do |c|
      @errors << "#{c} a child of #{t.to_s.split("\n")[0]} has a duplicated value, please specify a unique value attribute."
    end
  end
  @errors.length == 0
end

#wrap(content) ⇒ Object



98
99
100
101
102
# File 'lib/cml/parser.rb', line 98

def wrap(content)
  #This happens when variables are inside of href's
  content = content.gsub(/%7B%7B/,'{{').gsub(/%7D%7D/,'}}')
  @opts[:no_wrap] ? content : "<div class=\"cml#{" "+@opts[:class] if @opts[:class]}\" id=\"#{@opts[:prefix]}\">#{content}</div>"
end