Class: ReverseAsciidoctor::Cleaner
- Inherits:
-
Object
- Object
- ReverseAsciidoctor::Cleaner
- Defined in:
- lib/reverse_asciidoctor/cleaner.rb
Instance Method Summary collapse
-
#clean_headings(string) ⇒ Object
following added by me.
- #clean_punctuation_characters(string) ⇒ Object
-
#clean_tag_borders(string) ⇒ Object
Find non-asterisk content that is enclosed by two or more asterisks.
-
#preprocess_word_html(string) ⇒ Object
preprocesses HTML, rather than postprocessing it.
- #remove_inner_whitespaces(string) ⇒ Object
- #remove_leading_newlines(string) ⇒ Object
- #remove_newlines(string) ⇒ Object
- #scrub_whitespace(string) ⇒ Object
- #tidy(string) ⇒ Object
Instance Method Details
#clean_headings(string) ⇒ Object
following added by me
83 84 85 86 87 88 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 83 def clean_headings(string) string.gsub!(%r{<h([1-9])[^>]*></h\1>}, " ") # I don't know why Libre Office is inserting them, but they need to go string.gsub!(%r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>}, "<sup>\\2</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1 string end |
#clean_punctuation_characters(string) ⇒ Object
63 64 65 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 63 def clean_punctuation_characters(string) string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2") end |
#clean_tag_borders(string) ⇒ Object
Find non-asterisk content that is enclosed by two or more asterisks. Ensure that only one whitespace occurs in the border area. Same for underscores and brackets.
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 37 def clean_tag_borders(string) result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do match.strip.sub('** ', '**').sub(' **', '**') end end result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do match.strip.sub('__ ', '__').sub(' __', '__') end end result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match| preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do match.strip.sub('~~ ', '~~').sub(' ~~', '~~') end end result.gsub(/\s?\[.*?\]\s?/) do |match| preserve_border_whitespaces(match) do match.strip.sub('[ ', '[').sub(' ]', ']') end end end |
#preprocess_word_html(string) ⇒ Object
preprocesses HTML, rather than postprocessing it
68 69 70 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 68 def preprocess_word_html(string) clean_headings(scrub_whitespace(string.dup)) end |
#remove_inner_whitespaces(string) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 20 def remove_inner_whitespaces(string) unless string.nil? string.gsub!(/\n stem:\[/, "\nstem:[") string.gsub!(/(stem:\[([^\]]|\\\])*\])\n(?=\S)/, "\\1 ") string.gsub!(/(stem:\[([^\]]|\\\])*\])\s+(?=[\^-])/, "\\1") end string.each_line.inject("") do |memo, line| memo + preserve_border_whitespaces(line) do line.strip.gsub(/[ \t]{2,}/, ' ') end end end |
#remove_leading_newlines(string) ⇒ Object
16 17 18 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 16 def remove_leading_newlines(string) string.gsub(/\A\n+/, '') end |
#remove_newlines(string) ⇒ Object
12 13 14 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 12 def remove_newlines(string) string.gsub(/\n{3,}/, "\n\n") end |
#scrub_whitespace(string) ⇒ Object
72 73 74 75 76 77 78 79 80 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 72 def scrub_whitespace(string) string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks #string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs string end |
#tidy(string) ⇒ Object
4 5 6 7 8 9 10 |
# File 'lib/reverse_asciidoctor/cleaner.rb', line 4 def tidy(string) result = remove_inner_whitespaces(string) result = remove_newlines(result) result = remove_leading_newlines(result) result = clean_tag_borders(result) clean_punctuation_characters(result) end |