Module: Jkl::Text

Defined in:
lib/jkl/text_client.rb

Class Method Summary collapse

Class Method Details

.plain_text(document, words_on_line = 5) ⇒ Object Also known as: sanitize



5
6
7
8
9
10
# File 'lib/jkl/text_client.rb', line 5

def plain_text(document, words_on_line = 5)
  CGI::unescapeHTML(
      remove_short_lines(
          strip_all_tags(
              remove_script_tags(document)), words_on_line))
end

.remove_blank_lines(text) ⇒ Object



17
18
19
# File 'lib/jkl/text_client.rb', line 17

def remove_blank_lines(text)
  text.gsub(/\n\r|\r\n|\n|\r/, "")
end

.remove_html_comments(text) ⇒ Object



21
22
23
# File 'lib/jkl/text_client.rb', line 21

def remove_html_comments(text)
  text.gsub(/<!--(.|\s)*?-->/, "")
end

.remove_script_tags(text) ⇒ Object



25
26
27
28
# File 'lib/jkl/text_client.rb', line 25

def remove_script_tags(text)
  text = remove_html_comments(text)
  text.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i, "")
end

.remove_short_lines(text, words_on_line = 5) ⇒ Object



30
31
32
33
34
35
36
37
# File 'lib/jkl/text_client.rb', line 30

def remove_short_lines(text, words_on_line = 5)
  text = text.gsub(/\s\s/, "\n")
  str = ""
  text.split("\n").each do |line|
    str << "#{line}\n" unless line.count(" ") < words_on_line
  end
  str
end

.strip_all_tags(text) ⇒ Object



13
14
15
# File 'lib/jkl/text_client.rb', line 13

def strip_all_tags(text)
  text.gsub(/<\/?[^>]*>/, "")
end