Module: JekyllImport::Util

Defined in:
lib/jekyll-import/util.rb

Class Method Summary collapse

Class Method Details

.wpautop(pee, br = true) ⇒ Object

Ruby translation of wordpress wpautop (see core.trac.wordpress.org/browser/trunk/src/wp-includes/formatting.php)

A group of regex replaces used to identify text formatted with newlines and replace double line-breaks with HTML paragraph tags. The remaining line-breaks after conversion become <<br />> tags, unless $br is set to false

Parameters:

  • string

    pee The text which has to be formatted.

  • bool

    br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.

Returns:

  • string Text which has been converted into correct paragraph tags.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/jekyll-import/util.rb', line 15

def self.wpautop(pee, br = true)
  return "" if pee.strip == ""

  allblocks = "(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)"
  pre_tags = {}
  pee += "\n"

  if pee.include?("<pre")
    pee_parts = pee.split("</pre>")
    last_pee = pee_parts.pop
    pee = ""
    pee_parts.each_with_index do |pee_part, i|
      start = pee_part.index("<pre")

      unless start
        pee += pee_part
        next
      end

      name = "<pre wp-pre-tag-#{i}></pre>"
      pre_tags[name] = (pee_part[start..-1] + "</pre>").gsub('\\', '\\\\\\\\')

      pee += pee_part[0, start] + name
    end
    pee += last_pee
  end

  pee = pee.gsub(Regexp.new('<br />\s*<br />'), "\n\n")
  pee = pee.gsub(Regexp.new("(<" + allblocks + "[^>]*>)"), "\n\\1")
  pee = pee.gsub(Regexp.new("(</" + allblocks + ">)"), "\\1\n\n")
  pee = pee.gsub("\r\n", "\n").tr("\r", "\n")
  if pee.include? "<object"
    pee = pee.gsub(Regexp.new('\s*<param([^>]*)>\s*'), "<param\\1>")
    pee = pee.gsub(Regexp.new('\s*</embed>\s*'), "</embed>")
  end

  pees = pee.split(%r!\n\s*\n!).compact
  pee = ""
  pees.each { |tinkle| pee += "<p>" + tinkle.chomp("\n") + "</p>\n" }
  pee = pee.gsub(Regexp.new('<p>\s*</p>'), "")
  pee = pee.gsub(Regexp.new("<p>([^<]+)</(div|address|form)>"), "<p>\\1</p></\\2>")
  pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + '[^>]*>)\s*</p>'), "\\1")
  pee = pee.gsub(Regexp.new("<p>(<li.+?)</p>"), "\\1")
  pee = pee.gsub(Regexp.new("<p><blockquote([^>]*)>", "i"), "<blockquote\\1><p>")
  pee = pee.gsub("</blockquote></p>", "</p></blockquote>")
  pee = pee.gsub(Regexp.new('<p>\s*(</?' + allblocks + "[^>]*>)"), "\\1")
  pee = pee.gsub(Regexp.new("(</?" + allblocks + '[^>]*>)\s*</p>'), "\\1")
  if br
    pee = pee.gsub(Regexp.new('<(script|style).*?</\1>')) { |match| match.gsub("\n", "<WPPreserveNewline />") }
    pee = pee.gsub(Regexp.new('(?<!<br />)\s*\n'), "<br />\n")
    pee = pee.gsub("<WPPreserveNewline />", "\n")
  end
  pee = pee.gsub(Regexp.new("(</?" + allblocks + '[^>]*>)\s*<br />'), "\\1")
  pee = pee.gsub(Regexp.new('<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)'), "\\1")
  pee = pee.gsub(Regexp.new('\n</p>$'), "</p>")

  pre_tags.each do |name, value|
    pee.gsub!(name, value)
  end
  pee
end