Module: MightyString::HTML

Defined in:
lib/mightystring/strip_html.rb

Class Method Summary collapse

Class Method Details

.default_optionsObject



102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/mightystring/strip_html.rb', line 102

def self.default_options
  {
    :tag_markers => [["<",">"],["&",";"]],
    :mappings => {
      "&quot;"=>"'","br"=>"\n","&#39;" => "'", "&nbsp;" => " ", "&trade;" => "(TM)", "&copy;" => "(c)"
    },
    :math_by_space => false,
    :drop_styles => true, # TODO Add this feature
    :drop_scripts => true, # TODO Add this feature
    :drop_iframes => false, # TODO Add this feature
    :permitted_blank_line_rows => 1, # TODO Add this feature
    :images_to_alt_text => false # TODO Add this feature
  }
end

.html_math_exceptions(in_str = "") ⇒ Object



69
70
71
72
73
74
75
76
77
78
# File 'lib/mightystring/strip_html.rb', line 69

def self.html_math_exceptions(in_str = "")
	if in_str["< "] or in_str["& "]
		return 1 # Execption found at beginning
	elsif in_str["&"] and in_str[";"] and (in_str[" "] or in_str.length > 7) # Shouldn't have spaces in html &code;s or be greater than 7 in length
		return 2 # Exception found for both
	else
		return 0
	end

end

.strip_first_seq(mstr = "", mseq = "", cmpchar = ) ⇒ Object

strip sequence out ( master string, sequence to remove, any characters to swap inplace this for that )



81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/mightystring/strip_html.rb', line 81

def self.strip_first_seq( mstr = "", mseq = "", cmpchar = default_options[:mappings] )
	if not cmpchar.empty? and cmpchar.keys.any? {|mkey| mseq.match_pci(mkey) } # keys exist and one of the keys match
		cmpchar.each_key { |mkey|
			if mseq.match_pci(mkey)
				mstr = mstr[0,mstr.index(mseq)] + cmpchar[mkey] + mstr[(mstr.index(mseq)+mseq.length)..-1]
			end
		}
	elsif mstr.index(mseq)
		mstr = mstr[0,mstr.index(mseq)] + mstr[(mstr.index(mseq)+mseq.length)..-1]
	end
	return mstr
end

.strip_html(htmlstr = "", xarg = ) ⇒ Object

Pick tags/blocks of string to remove (ex: “&”, “;” like in “&quot;” can become “” or “‘” if rules set))



97
98
99
100
# File 'lib/mightystring/strip_html.rb', line 97

def self.strip_html( htmlstr = "", xarg = default_options[:tag_markers] ) # xarg start, end
    warn "#{Kernel.caller.first} [DEPRECATED] `MightyString::HTML.strip_html` is depreciated. Please use MightyString::HTML.text instead."
    text(htmlstr, :tag_markers => xarg)
end

.text(htmlstr, options = {}) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/mightystring/strip_html.rb', line 36

def self.text(htmlstr, options = {})
  options[:mappings] = default_options[:mappings].
    merge(options.delete(:mappings)) if options.has_key?(:mappings)
  options = default_options.merge(options)
  options[:tag_markers].each { |g|
    sh_endpoints = htmlstr.index_all(g[1])
    if sh_endpoints.nil?
      break
    end
    sh_end = htmlstr.rindex(g[1])
    sh_start = htmlstr.rindex(g[0])
    while !!sh_end and !!sh_start do
      if sh_end > sh_start
        sh_seq = htmlstr[sh_start,sh_end - sh_start + 1]
        until sh_seq.count(g[1]) == 1 do # until we've selected only the inner block
          sh_end = htmlstr[0,sh_end-1].rindex(g[1])
          sh_seq = htmlstr[sh_start,sh_end - sh_start + 1]
        end
        if not (options[:math_by_space] and not html_math_exceptions(htmlstr[sh_start,sh_end - sh_start + 1]) == 0)
          htmlstr = strip_first_seq( htmlstr, htmlstr[sh_start,sh_end - sh_start + 1], options[:mappings])
        else
          sh_end = sh_end - 1
        end
      else
        sh_start = sh_start - 1
      end
      sh_end = htmlstr[0..sh_end].rindex(g[1])
      sh_start = htmlstr[0..sh_start].rindex(g[0])
    end
  }
  return htmlstr
end