Class: ReverseAsciidoctor::Cleaner

Inherits:
Object
  • Object
show all
Defined in:
lib/reverse_asciidoctor/cleaner.rb

Instance Method Summary collapse

Instance Method Details

#clean_headings(string) ⇒ Object

following added by me



83
84
85
86
87
88
# File 'lib/reverse_asciidoctor/cleaner.rb', line 83

def clean_headings(string)
  string.gsub!(%r{<h([1-9])[^>]*></h\1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
  string.gsub!(%r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
               "<sup>\\2</sup>")         # I absolutely don't know why Libre Office is rendering superscripts as h1
  string
end

#clean_punctuation_characters(string) ⇒ Object



63
64
65
# File 'lib/reverse_asciidoctor/cleaner.rb', line 63

def clean_punctuation_characters(string)
  string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
end

#clean_tag_borders(string) ⇒ Object

Find non-asterisk content that is enclosed by two or more asterisks. Ensure that only one whitespace occurs in the border area. Same for underscores and brackets.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/reverse_asciidoctor/cleaner.rb', line 37

def clean_tag_borders(string)
  result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
    preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
      match.strip.sub('** ', '**').sub(' **', '**')
    end
  end

  result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match|
    preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
      match.strip.sub('__ ', '__').sub(' __', '__')
    end
  end

  result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
    preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
      match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
    end
  end

  result.gsub(/\s?\[.*?\]\s?/) do |match|
    preserve_border_whitespaces(match) do
      match.strip.sub('[ ', '[').sub(' ]', ']')
    end
  end
end

#preprocess_word_html(string) ⇒ Object

preprocesses HTML, rather than postprocessing it



68
69
70
# File 'lib/reverse_asciidoctor/cleaner.rb', line 68

def preprocess_word_html(string)
  clean_headings(scrub_whitespace(string.dup))
end

#remove_inner_whitespaces(string) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/reverse_asciidoctor/cleaner.rb', line 20

def remove_inner_whitespaces(string)
  unless string.nil?
    string.gsub!(/\n stem:\[/, "\nstem:[")
    string.gsub!(/(stem:\[([^\]]|\\\])*\])\n(?=\S)/, "\\1 ")
    string.gsub!(/(stem:\[([^\]]|\\\])*\])\s+(?=[\^-])/, "\\1")
  end
  string.each_line.inject("") do |memo, line|
    memo + preserve_border_whitespaces(line) do
      line.strip.gsub(/[ \t]{2,}/, ' ')
    end
  end
end

#remove_leading_newlines(string) ⇒ Object



16
17
18
# File 'lib/reverse_asciidoctor/cleaner.rb', line 16

def remove_leading_newlines(string)
  string.gsub(/\A\n+/, '')
end

#remove_newlines(string) ⇒ Object



12
13
14
# File 'lib/reverse_asciidoctor/cleaner.rb', line 12

def remove_newlines(string)
  string.gsub(/\n{3,}/, "\n\n")
end

#scrub_whitespace(string) ⇒ Object



72
73
74
75
76
77
78
79
80
# File 'lib/reverse_asciidoctor/cleaner.rb', line 72

def scrub_whitespace(string)
  string.gsub!(/&nbsp;|\&#xA0;|\u00a0/i, '&#xA0;')       # HTML encoded spaces
  string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
  string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
  string.gsub!(/([ ]+)$/, ' ')       # line trailing whitespace
  string.gsub!(/\n\n\n\n/, "\n\n")  # Quadruple line breaks
  #string.delete!('?| ')               # Unicode non-breaking spaces, injected as tabs
  string
end

#tidy(string) ⇒ Object



4
5
6
7
8
9
10
# File 'lib/reverse_asciidoctor/cleaner.rb', line 4

def tidy(string)
  result = remove_inner_whitespaces(string)
  result = remove_newlines(result)
  result = remove_leading_newlines(result)
  result = clean_tag_borders(result)
  clean_punctuation_characters(result)
end