Module: Butter

Included in:
String
Defined in:
lib/butter.rb,
lib/butter/version.rb

Constant Summary collapse

VERSION =
"1.0.1"

Instance Method Summary collapse

Instance Method Details

#truncate_html(num_words = 30, opts = {}) ⇒ Object

Usage ==========================================================

“<p>An HTML string</p>”.truncate_html 2, :tail => “…” => “<p>An HTML…</p>”

“<p>An HTML string</p>”.truncate_html 2, :tail => “ &rarr;” => “<p>An HTML &rarr;</p>”

“<p>An HTML string</p>”.truncate_html 2, :strip_html => true => “An HTML…”



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/butter.rb', line 18

def truncate_html(num_words = 30, opts = {})
	opts = { :word_cut => true, :tail => "&hellip;", :strip_html => false }.merge(opts)
	tail = HTMLEntities.new.decode opts[:tail]

	doc = Nokogiri::HTML(self)

	current = doc.children.first
	count = 0

	while true
		# we found a text node
		if current.is_a?(Nokogiri::XML::Text)
			count += current.text.split.length
			# we reached our limit, let's get outta here!
			break if count > num_words
			previous = current
		end

		if current.children.length > 0
			# this node has children, can't be a text node,
			# lets descend and look for text nodes
			current = current.children.first
		elsif !current.next.nil?
			#this has no children, but has a sibling, let's check it out
			current = current.next
		else 
			# we are the last child, we need to ascend until we are
			# either done or find a sibling to continue on to
			n = current
			while !n.is_a?(Nokogiri::HTML::Document) and n.parent.next.nil?
				n = n.parent
			end

			# we've reached the top and found no more text nodes, break
			if n.is_a?(Nokogiri::HTML::Document)
				break;
			else
				current = n.parent.next
			end
		end
	end

	if count >= num_words
		unless count == num_words
	 		new_content = current.text.split
	 		
			 # If we're here, the last text node we counted eclipsed the number of words
			 # that we want, so we need to cut down on words.	 The easiest way to think about
			 # this is that without this node we'd have fewer words than the limit, so all
			 # the previous words plus a limited number of words from this node are needed.
			 # We simply need to figure out how many words are needed and grab that many.
			 # Then we need to -subtract- an index, because the first word would be index zero.
			 
			 # For example, given:
			 # <p>Testing this HTML truncater.</p><p>To see if its working.</p>
			 # Let's say I want 6 words.	The correct returned string would be:
			 # <p>Testing this HTML truncater.</p><p>To see...</p>
			 # All the words in both paragraphs = 9
			 # The last paragraph is the one that breaks the limit.	 How many words would we
			 # have without it? 4.	But we want up to 6, so we might as well get that many.
			 # 6 - 4 = 2, so we get 2 words from this node, but words #1-2 are indices #0-1, so
			 # we subtract 1.	 If this gives us -1, we want nothing from this node. So go back to
			 # the previous node instead.
			 index = num_words-(count-new_content.length)-1
			 if index >= 0
				 new_content = new_content[0..index]
	 			 current.content = new_content.join(' ') + tail
			 else
				 current = previous
				 current.content = current.content + tail
			end
		end

		# remove everything else
		while !current.is_a?(Nokogiri::HTML::Document)
			while !current.next.nil?
				current.next.remove
			end
			current = current.parent
		end
	end

	# now we grab the html and not the text.
	# we do first because nokogiri adds html and body tags
	# which we don't want
	
	# Strip out the unwanted <p> tag that gets added, if it is present. This is mostly for the sake of markup, since extra <p> tags can throw off styling. In the future, this will need to see if the original code was already wrapped in a plain <p> tag.
	
	root = doc.root.children.first
	only_child = root.children.first
	
	if root.children.size == 1 and only_child.name == "p" and only_child.attributes.empty?
		truncated = only_child
	else
		truncated = root
	end
	
	if opts[:strip_html] == true
		truncated = truncated.inner_text
	else
		truncated = truncated.inner_html
	end
	
	if respond_to? :html_safe? and html_safe?
		truncated.html_safe
	else
		truncated
	end
end