Class: Govspeak::Document
- Inherits:
-
Object
- Object
- Govspeak::Document
- Defined in:
- lib/govspeak.rb
Constant Summary collapse
- Parser =
Kramdown::Parser::Govuk
- PARSER_CLASS_NAME =
Parser.name.split("::").last
- UUID_REGEX =
/^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.freeze
- NEW_PARAGRAPH_LOOKBEHIND =
%q{(?<=\A|\n\n|\r\n\r\n)}.freeze
Class Attribute Summary collapse
-
.extensions ⇒ Object
readonly
Returns the value of attribute extensions.
Instance Attribute Summary collapse
-
#attachments ⇒ Object
readonly
Returns the value of attribute attachments.
-
#contacts ⇒ Object
readonly
Returns the value of attribute contacts.
-
#images ⇒ Object
Returns the value of attribute images.
-
#links ⇒ Object
readonly
Returns the value of attribute links.
-
#locale ⇒ Object
readonly
Returns the value of attribute locale.
Class Method Summary collapse
- .devolved_options ⇒ Object
- .extension(title, regexp = nil, &block) ⇒ Object
- .surrounded_by(open, close = nil) ⇒ Object
- .to_html(source, options = {}) ⇒ Object
- .wrap_with_div(class_name, character, parser = Kramdown::Document) ⇒ Object
Instance Method Summary collapse
- #extract_contact_content_ids ⇒ Object
- #extracted_links(website_root: nil) ⇒ Object
- #footnote_definitions(source) ⇒ Object
- #headers ⇒ Object
-
#initialize(source, options = {}) ⇒ Document
constructor
A new instance of Document.
- #insert_strong_inside_p(body, parser = Govspeak::Document) ⇒ Object
- #preprocess(source) ⇒ Object
- #remove_forbidden_characters(source) ⇒ Object
-
#render_image(image) ⇒ Object
As of version 1.12.0 of Kramdown the block elements (div & figcaption) inside this html block will have it’s < > converted into HTML Entities when ever this code is used inside block level elements.
- #structured_headers ⇒ Object
- #to_html ⇒ Object
- #to_liquid ⇒ Object
- #to_text ⇒ Object
- #valid?(validation_options = {}) ⇒ Boolean
Constructor Details
#initialize(source, options = {}) ⇒ Document
Returns a new instance of Document.
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/govspeak.rb', line 50 def initialize(source, = {}) = .dup.deep_symbolize_keys @source = source ? source.dup : "" @images = .delete(:images) || [] @allowed_elements = .delete(:allowed_elements) || [] @attachments = Array.wrap(.delete(:attachments)) @links = Array.wrap(.delete(:links)) @contacts = Array.wrap(.delete(:contacts)) @locale = .fetch(:locale, "en") @options = { input: PARSER_CLASS_NAME, sanitize: true, syntax_highlighter: nil }.merge() @options[:entity_output] = :symbolic @footnote_definition_html = nil @acronyms = [] end |
Class Attribute Details
.extensions ⇒ Object (readonly)
Returns the value of attribute extensions.
47 48 49 |
# File 'lib/govspeak.rb', line 47 def extensions @extensions end |
Instance Attribute Details
#attachments ⇒ Object (readonly)
Returns the value of attribute attachments.
40 41 42 |
# File 'lib/govspeak.rb', line 40 def @attachments end |
#contacts ⇒ Object (readonly)
Returns the value of attribute contacts.
40 41 42 |
# File 'lib/govspeak.rb', line 40 def contacts @contacts end |
#images ⇒ Object
Returns the value of attribute images.
39 40 41 |
# File 'lib/govspeak.rb', line 39 def images @images end |
#links ⇒ Object (readonly)
Returns the value of attribute links.
40 41 42 |
# File 'lib/govspeak.rb', line 40 def links @links end |
#locale ⇒ Object (readonly)
Returns the value of attribute locale.
40 41 42 |
# File 'lib/govspeak.rb', line 40 def locale @locale end |
Class Method Details
.devolved_options ⇒ Object
385 386 387 388 389 390 391 392 |
# File 'lib/govspeak.rb', line 385 def self. { "scotland" => "Scotland", "england" => "England", "england-wales" => "England and Wales", "northern-ireland" => "Northern Ireland", "wales" => "Wales", "london" => "London" } end |
.extension(title, regexp = nil, &block) ⇒ Object
175 176 177 178 |
# File 'lib/govspeak.rb', line 175 def self.extension(title, regexp = nil, &block) regexp ||= %r${::#{title}}(.*?){:/#{title}}$m @extensions << [title, regexp, block] end |
.surrounded_by(open, close = nil) ⇒ Object
180 181 182 183 184 185 186 187 188 |
# File 'lib/govspeak.rb', line 180 def self.surrounded_by(open, close = nil) open = Regexp.escape(open) if close close = Regexp.escape(close) %r{(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?}m else %r{(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)}m end end |
.to_html(source, options = {}) ⇒ Object
42 43 44 |
# File 'lib/govspeak.rb', line 42 def self.to_html(source, = {}) new(source, ).to_html end |
.wrap_with_div(class_name, character, parser = Kramdown::Document) ⇒ Object
190 191 192 193 194 195 |
# File 'lib/govspeak.rb', line 190 def self.wrap_with_div(class_name, character, parser = Kramdown::Document) extension(class_name, surrounded_by(character)) do |body| content = parser ? parser.new("#{body.strip}\n").to_html : body.strip %(\n<div class="#{class_name}">\n#{content}</div>\n) end end |
Instance Method Details
#extract_contact_content_ids ⇒ Object
114 115 116 117 118 119 |
# File 'lib/govspeak.rb', line 114 def extract_contact_content_ids _, regex = self.class.extensions.find { |(title)| title == "Contact" } return [] unless regex @source.scan(regex).map(&:first).uniq.select { |id| id.match(UUID_REGEX) } end |
#extracted_links(website_root: nil) ⇒ Object
110 111 112 |
# File 'lib/govspeak.rb', line 110 def extracted_links(website_root: nil) Govspeak::LinkExtractor.new(self, website_root: website_root).call end |
#footnote_definitions(source) ⇒ Object
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/govspeak.rb', line 135 def footnote_definitions(source) is_legislative_list = source.scan(/\$LegislativeList.*?\[\^\d\]*.*?\$EndLegislativeList/m).size.positive? is_cta = source.scan(/\$CTA.*?\[\^\d\]*.*?\$CTA/m).size.positive? footnotes = source.scan(/\[\^(\d+)\]:(.*)/) @acronyms = source.scan(/(?<=\*)\[(.*)\]:(.*)/) if (is_legislative_list || is_cta) && footnotes.size.positive? list_items = footnotes.map do |footnote| number = footnote[0] text = footnote[1].strip footnote_definition = Govspeak::Document.new(text).to_html[/(?<=<p>).*(?=<\/p>)/] <<~HTML_SNIPPET <li id="fn:#{number}" role="doc-endnote"> <p> #{footnote_definition}<a href="#fnref:#{number}" class="reversefootnote" role="doc-backlink" aria-label="go to where this is referenced">↩</a> </p> </li> HTML_SNIPPET end @footnote_definition_html = <<~HTML_CONTAINER <div class="footnotes" role="doc-endnotes"> <ol> #{list_items.join.strip} </ol> </div> HTML_CONTAINER end unless @footnote_definition_html.nil? && @acronyms.size.positive? add_acronym_alt_text(@footnote_definition_html) end end |
#headers ⇒ Object
102 103 104 |
# File 'lib/govspeak.rb', line 102 def headers Govspeak::HeaderExtractor.convert(kramdown_doc).first end |
#insert_strong_inside_p(body, parser = Govspeak::Document) ⇒ Object
197 198 199 |
# File 'lib/govspeak.rb', line 197 def insert_strong_inside_p(body, parser = Govspeak::Document) parser.new(body.strip).to_html.sub(/^<p>(.*)<\/p>$/, "<p><strong>\\1</strong></p>") end |
#preprocess(source) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/govspeak.rb', line 121 def preprocess(source) source = Govspeak::BlockquoteExtraQuoteRemover.remove(source) source = remove_forbidden_characters(source) footnote_definitions(source) self.class.extensions.each do |_, regexp, block| source.gsub!(regexp) do instance_exec(*Regexp.last_match.captures, &block) end end source end |
#remove_forbidden_characters(source) ⇒ Object
169 170 171 172 173 |
# File 'lib/govspeak.rb', line 169 def remove_forbidden_characters(source) # These are characters that are not deemed not suitable for # markup: https://www.w3.org/TR/unicode-xml/#Charlist source.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, "") end |
#render_image(image) ⇒ Object
As of version 1.12.0 of Kramdown the block elements (div & figcaption) inside this html block will have it’s < > converted into HTML Entities when ever this code is used inside block level elements.
To resolve this we have a post-processing task that will convert this back into HTML (I know - it’s ugly). The way we could resolve this without ugliness would be to output only inline elements which rules out div and figcaption
This issue is not considered a bug by kramdown: github.com/gettalong/kramdown/issues/191
314 315 316 317 318 319 320 321 322 |
# File 'lib/govspeak.rb', line 314 def render_image(image) id_attr = image.id ? %( id="attachment_#{image.id}") : "" lines = [] lines << %(<figure#{id_attr} class="image embedded">) lines << %(<div class="img"><img src="#{encode(image.url)}" alt="#{encode(image.alt_text)}"></div>) lines << image. if image. lines << "</figure>" lines.join end |
#structured_headers ⇒ Object
106 107 108 |
# File 'lib/govspeak.rb', line 106 def structured_headers Govspeak::StructuredHeaderExtractor.new(self).call end |
#to_html ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/govspeak.rb', line 68 def to_html @to_html ||= begin html = if @options[:sanitize] HtmlSanitizer.new(kramdown_doc.to_html).sanitize(allowed_elements: @allowed_elements) else kramdown_doc.to_html end unless @footnote_definition_html.nil? regex = /<div class="footnotes".*[<\/div>]/m if html.scan(regex).empty? html << @footnote_definition_html else html.gsub!(regex, @footnote_definition_html) end end Govspeak::PostProcessor.process(html, self) end end |
#to_liquid ⇒ Object
90 91 92 |
# File 'lib/govspeak.rb', line 90 def to_liquid to_html end |
#to_text ⇒ Object
94 95 96 |
# File 'lib/govspeak.rb', line 94 def to_text HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip) end |
#valid?(validation_options = {}) ⇒ Boolean
98 99 100 |
# File 'lib/govspeak.rb', line 98 def valid?( = {}) Govspeak::HtmlValidator.new(@source, ).valid? end |