Class: CML::Parser
- Inherits:
-
Object
- Object
- CML::Parser
- Defined in:
- lib/cml/parser.rb
Instance Attribute Summary collapse
-
#cftags ⇒ Object
readonly
Returns the value of attribute cftags.
-
#doc ⇒ Object
readonly
Returns the value of attribute doc.
-
#errors ⇒ Object
readonly
Returns the value of attribute errors.
-
#tags ⇒ Object
readonly
Returns the value of attribute tags.
Class Method Summary collapse
- .escape(string) ⇒ Object
- .parse(content) ⇒ Object
-
.tag_class(name) ⇒ Object
This takes the name of the tag and converts it to the appropriate task.
Instance Method Summary collapse
- #convert(opts = nil) ⇒ Object
- #fields ⇒ Object
- #golds ⇒ Object
-
#initialize(content, opts = {}) ⇒ Parser
constructor
A new instance of Parser.
- #normalize ⇒ Object
- #to_cml ⇒ Object
- #to_html(opts = nil) ⇒ Object
- #to_s ⇒ Object
- #valid? ⇒ Boolean
- #wrap(content) ⇒ Object
Constructor Details
#initialize(content, opts = {}) ⇒ Parser
Returns a new instance of Parser.
5 6 7 8 9 10 11 12 13 14 15 |
# File 'lib/cml/parser.rb', line 5 def initialize(content, opts = {}) @opts = opts #Because nokogiri is munging my CDATA sections, we parse it out ahead of time @cdata = content.scan(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m) @doc = Parser.parse(content) @cftags = @doc.xpath("//cml:*[not(ancestor::cml:*)]") normalize if opts[:normalize] @tags = @cftags.map do |t| Parser.tag_class(t.name).new(t, @opts) end end |
Instance Attribute Details
#cftags ⇒ Object (readonly)
Returns the value of attribute cftags.
3 4 5 |
# File 'lib/cml/parser.rb', line 3 def @cftags end |
#doc ⇒ Object (readonly)
Returns the value of attribute doc.
3 4 5 |
# File 'lib/cml/parser.rb', line 3 def doc @doc end |
#errors ⇒ Object (readonly)
Returns the value of attribute errors.
3 4 5 |
# File 'lib/cml/parser.rb', line 3 def errors @errors end |
#tags ⇒ Object (readonly)
Returns the value of attribute tags.
3 4 5 |
# File 'lib/cml/parser.rb', line 3 def @tags end |
Class Method Details
.escape(string) ⇒ Object
132 133 134 135 136 137 |
# File 'lib/cml/parser.rb', line 132 def self.escape( string ) string.to_s.gsub( /&/, "&" ). gsub( /</, "<" ). gsub( />/, ">" ). gsub( /"/, """ ) end |
.parse(content) ⇒ Object
17 18 19 20 21 22 |
# File 'lib/cml/parser.rb', line 17 def self.parse(content) #This sucks, we remove scripts, styles, and close non self closed tags #We could potentially add CDATA clauses to them, but this is "easier" xhtml = content.gsub(/(<(script|style)[^>]*?>)(.*?)(<\/\2>)/m, "\\1\\4").gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') #base, basefont, area, meta Nokogiri::XML("<root xmlns:cml=\"http://crowdflower.com\">#{xhtml}</root>") end |
Instance Method Details
#convert(opts = nil) ⇒ Object
24 25 26 27 28 29 30 31 |
# File 'lib/cml/parser.rb', line 24 def convert(opts = nil) @opts.merge!(opts) if opts cloned = @doc.dup cloned.xpath("//cml:*[not(ancestor::cml:*)]").each_with_index do |t,i| t.replace(@tags[i].convert(opts)) end cloned end |
#fields ⇒ Object
45 46 47 48 49 50 51 52 |
# File 'lib/cml/parser.rb', line 45 def fields @fields = {} @tags.each do |g| agg = g.attrs["aggregation"] @fields[g.name] = agg.to_s if agg end @fields end |
#golds ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/cml/parser.rb', line 54 def golds return @golds if @golds @golds = {} @tags.each do |g| gold = g.gold? next unless gold val = (gold.attributes["gold"] || gold.attributes["src"]).to_s val = "#{g.name}_gold" if val =~ /^(true|\s*)$/ @golds[g.name] = val ["strict","regex"].each do |attrs| if a = gold.attributes[attrs] @golds["_#{val}_#{attrs}"] = attrs == "regex" ? [a.to_s, gold.attributes["flags"].to_s] : a.to_s end end end @golds end |
#normalize ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/cml/parser.rb', line 33 def normalize @cftags.each do |t| if ["radios", "select"].include?(t.name) child = t.name == "radios" ? "radio" : "option" t.name = "checkboxes" t.xpath("cml:#{child}").each { |c| c.name = "checkbox" } elsif t.name == "meta" t.name = "text" end end end |
#to_cml ⇒ Object
108 109 110 111 112 113 114 115 116 117 |
# File 'lib/cml/parser.rb', line 108 def to_cml cml = @doc.to_xhtml.gsub(/<\/?root[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') #Hack to ensure the next sub doesn't match... @cdata.each do |matches| html.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m| "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>" end end cml end |
#to_html(opts = nil) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/cml/parser.rb', line 119 def to_html(opts = nil) #We convert the entire document and strip root tags / rando empty tags ALA libxml 2.6.32 #We're also adding self closing tags html = convert(opts).to_xhtml.gsub(/<\/?root[^>]*?>|<\/?>/,'').gsub(/(<(input|link|img|br|hr).*?)\/?>/,'\1/>') #Let's re-insert that CDATA, tricky because scripts will sometimes contain single quotes... @cdata.each do |matches| html.sub!(/(<(script|style)[^>]*>)<\/\2>/m) do |m| "#{$1}#{matches[2].empty? ? " " : matches[2]}</#{$2}>" end end wrap(html) end |
#to_s ⇒ Object
104 105 106 |
# File 'lib/cml/parser.rb', line 104 def to_s to_html end |
#valid? ⇒ Boolean
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/cml/parser.rb', line 72 def valid? @errors = [] if e = @doc.errors.select {|e| s.fatal? }.last @errors << "Malformed CML (#{e.level}). #{e..chomp} on line #{e.line} column #{e.column}." end @tags.select {|t| t.validate? && t.name =~ /^\s*$/ }.each do |t| @errors << "#{t.to_s.split("\n")[0]} does not have a label or name specified." end dupes = @tags.select do |tag| tag.validate? && @tags.select {|t| t.name == tag.name}.length > 1 end (dupes[1..-1] || []).each do |t| @errors << "#{t.to_s.split("\n")[0]} has a duplicated name, please specify a unique name attribute." end @tags.each do |t| next unless t.children dupes = t.children.select do |child| t.children.select {|c| c.value == child.value}.length > 1 end (dupes[1..-1] || []).each do |c| @errors << "#{c} a child of #{t.to_s.split("\n")[0]} has a duplicated value, please specify a unique value attribute." end end @errors.length == 0 end |
#wrap(content) ⇒ Object
98 99 100 101 102 |
# File 'lib/cml/parser.rb', line 98 def wrap(content) #This happens when variables are inside of href's content = content.gsub(/%7B%7B/,'{{').gsub(/%7D%7D/,'}}') @opts[:no_wrap] ? content : "<div class=\"cml#{" "+@opts[:class] if @opts[:class]}\" id=\"#{@opts[:prefix]}\">#{content}</div>" end |