Class: Govspeak::Document
- Inherits:
-
Object
- Object
- Govspeak::Document
- Defined in:
- lib/govspeak.rb
Constant Summary collapse
- Parser =
Kramdown::Parser::Govuk
- PARSER_CLASS_NAME =
Parser.name.split("::").last
- UUID_REGEX =
/^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.freeze
- NEW_PARAGRAPH_LOOKBEHIND =
%q{(?<=\A|\n\n|\r\n\r\n)}.freeze
Class Attribute Summary collapse
-
.extensions ⇒ Object
readonly
Returns the value of attribute extensions.
Instance Attribute Summary collapse
-
#attachments ⇒ Object
readonly
Returns the value of attribute attachments.
-
#contacts ⇒ Object
readonly
Returns the value of attribute contacts.
-
#images ⇒ Object
Returns the value of attribute images.
-
#links ⇒ Object
readonly
Returns the value of attribute links.
-
#locale ⇒ Object
readonly
Returns the value of attribute locale.
Class Method Summary collapse
- .devolved_options ⇒ Object
- .extension(title, regexp = nil, &block) ⇒ Object
- .surrounded_by(open, close = nil) ⇒ Object
- .to_html(source, options = {}) ⇒ Object
- .wrap_with_div(class_name, character, parser = Kramdown::Document) ⇒ Object
Instance Method Summary collapse
- #extract_contact_content_ids ⇒ Object
- #extracted_links(website_root: nil) ⇒ Object
- #footnote_definitions(source) ⇒ Object
- #headers ⇒ Object
-
#initialize(source, options = {}) ⇒ Document
constructor
A new instance of Document.
- #insert_strong_inside_p(body, parser = Govspeak::Document) ⇒ Object
- #preprocess(source) ⇒ Object
- #remove_forbidden_characters(source) ⇒ Object
-
#render_image(image) ⇒ Object
As of version 1.12.0 of Kramdown the block elements (div & figcaption) inside this html block will have it’s < > converted into HTML Entities when ever this code is used inside block level elements.
- #structured_headers ⇒ Object
- #to_html ⇒ Object
- #to_liquid ⇒ Object
- #to_text ⇒ Object
- #valid?(validation_options = {}) ⇒ Boolean
Constructor Details
#initialize(source, options = {}) ⇒ Document
Returns a new instance of Document.
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/govspeak.rb', line 51 def initialize(source, = {}) = .dup.deep_symbolize_keys @source = source ? source.dup : "" @images = .delete(:images) || [] @allowed_elements = .delete(:allowed_elements) || [] @allowed_image_hosts = .delete(:allowed_image_hosts) || [] @attachments = Array.wrap(.delete(:attachments)) @links = Array.wrap(.delete(:links)) @contacts = Array.wrap(.delete(:contacts)) @locale = .fetch(:locale, "en") @options = { input: PARSER_CLASS_NAME, sanitize: true, syntax_highlighter: nil }.merge() @options[:entity_output] = :symbolic @footnote_definition_html = nil @acronyms = [] end |
Class Attribute Details
.extensions ⇒ Object (readonly)
Returns the value of attribute extensions.
48 49 50 |
# File 'lib/govspeak.rb', line 48 def extensions @extensions end |
Instance Attribute Details
#attachments ⇒ Object (readonly)
Returns the value of attribute attachments.
41 42 43 |
# File 'lib/govspeak.rb', line 41 def @attachments end |
#contacts ⇒ Object (readonly)
Returns the value of attribute contacts.
41 42 43 |
# File 'lib/govspeak.rb', line 41 def contacts @contacts end |
#images ⇒ Object
Returns the value of attribute images.
40 41 42 |
# File 'lib/govspeak.rb', line 40 def images @images end |
#links ⇒ Object (readonly)
Returns the value of attribute links.
41 42 43 |
# File 'lib/govspeak.rb', line 41 def links @links end |
#locale ⇒ Object (readonly)
Returns the value of attribute locale.
41 42 43 |
# File 'lib/govspeak.rb', line 41 def locale @locale end |
Class Method Details
.devolved_options ⇒ Object
388 389 390 391 392 393 394 395 |
# File 'lib/govspeak.rb', line 388 def self. { "scotland" => "Scotland", "england" => "England", "england-wales" => "England and Wales", "northern-ireland" => "Northern Ireland", "wales" => "Wales", "london" => "London" } end |
.extension(title, regexp = nil, &block) ⇒ Object
178 179 180 181 |
# File 'lib/govspeak.rb', line 178 def self.extension(title, regexp = nil, &block) regexp ||= %r${::#{title}}(.*?){:/#{title}}$m @extensions << [title, regexp, block] end |
.surrounded_by(open, close = nil) ⇒ Object
183 184 185 186 187 188 189 190 191 |
# File 'lib/govspeak.rb', line 183 def self.surrounded_by(open, close = nil) open = Regexp.escape(open) if close close = Regexp.escape(close) %r{(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?}m else %r{(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)}m end end |
.to_html(source, options = {}) ⇒ Object
43 44 45 |
# File 'lib/govspeak.rb', line 43 def self.to_html(source, = {}) new(source, ).to_html end |
.wrap_with_div(class_name, character, parser = Kramdown::Document) ⇒ Object
193 194 195 196 197 198 |
# File 'lib/govspeak.rb', line 193 def self.wrap_with_div(class_name, character, parser = Kramdown::Document) extension(class_name, surrounded_by(character)) do |body| content = parser ? parser.new("#{body.strip}\n").to_html : body.strip %(\n<div class="#{class_name}">\n#{content}</div>\n) end end |
Instance Method Details
#extract_contact_content_ids ⇒ Object
117 118 119 120 121 122 |
# File 'lib/govspeak.rb', line 117 def extract_contact_content_ids _, regex = self.class.extensions.find { |(title)| title == "Contact" } return [] unless regex @source.scan(regex).map(&:first).uniq.select { |id| id.match(UUID_REGEX) } end |
#extracted_links(website_root: nil) ⇒ Object
113 114 115 |
# File 'lib/govspeak.rb', line 113 def extracted_links(website_root: nil) Govspeak::LinkExtractor.new(self, website_root: website_root).call end |
#footnote_definitions(source) ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/govspeak.rb', line 138 def footnote_definitions(source) is_legislative_list = source.scan(/\$LegislativeList.*?\[\^\d\]*.*?\$EndLegislativeList/m).size.positive? is_cta = source.scan(/\$CTA.*?\[\^\d\]*.*?\$CTA/m).size.positive? footnotes = source.scan(/^\s*\[\^(\d+)\]:(.*)/) @acronyms = source.scan(/(?<=\*)\[(.*)\]:(.*)/) if (is_legislative_list || is_cta) && footnotes.size.positive? list_items = footnotes.map do |footnote| number = footnote[0] text = footnote[1].strip footnote_definition = Govspeak::Document.new(text).to_html[/(?<=<p>).*(?=<\/p>)/] <<~HTML_SNIPPET <li id="fn:#{number}" role="doc-endnote"> <p> #{footnote_definition}<a href="#fnref:#{number}" class="reversefootnote" role="doc-backlink" aria-label="go to where this is referenced">↩</a> </p> </li> HTML_SNIPPET end @footnote_definition_html = <<~HTML_CONTAINER <div class="footnotes" role="doc-endnotes"> <ol> #{list_items.join.strip} </ol> </div> HTML_CONTAINER end unless @footnote_definition_html.nil? && @acronyms.size.positive? add_acronym_alt_text(@footnote_definition_html) end end |
#headers ⇒ Object
105 106 107 |
# File 'lib/govspeak.rb', line 105 def headers Govspeak::HeaderExtractor.convert(kramdown_doc).first end |
#insert_strong_inside_p(body, parser = Govspeak::Document) ⇒ Object
200 201 202 |
# File 'lib/govspeak.rb', line 200 def insert_strong_inside_p(body, parser = Govspeak::Document) parser.new(body.strip).to_html.sub(/^<p>(.*)<\/p>$/, "<p><strong>\\1</strong></p>") end |
#preprocess(source) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/govspeak.rb', line 124 def preprocess(source) source = Govspeak::BlockquoteExtraQuoteRemover.remove(source) source = remove_forbidden_characters(source) footnote_definitions(source) self.class.extensions.each do |_, regexp, block| source.gsub!(regexp) do instance_exec(*Regexp.last_match.captures, &block) end end source end |
#remove_forbidden_characters(source) ⇒ Object
172 173 174 175 176 |
# File 'lib/govspeak.rb', line 172 def remove_forbidden_characters(source) # These are characters that are not deemed not suitable for # markup: https://www.w3.org/TR/unicode-xml/#Charlist source.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, "") end |
#render_image(image) ⇒ Object
As of version 1.12.0 of Kramdown the block elements (div & figcaption) inside this html block will have it’s < > converted into HTML Entities when ever this code is used inside block level elements.
To resolve this we have a post-processing task that will convert this back into HTML (I know - it’s ugly). The way we could resolve this without ugliness would be to output only inline elements which rules out div and figcaption
This issue is not considered a bug by kramdown: github.com/gettalong/kramdown/issues/191
317 318 319 320 321 322 323 324 325 |
# File 'lib/govspeak.rb', line 317 def render_image(image) id_attr = image.id ? %( id="attachment_#{image.id}") : "" lines = [] lines << %(<figure#{id_attr} class="image embedded">) lines << %(<div class="img"><img src="#{encode(image.url)}" alt="#{encode(image.alt_text)}"></div>) lines << image. if image. lines << "</figure>" lines.join end |
#structured_headers ⇒ Object
109 110 111 |
# File 'lib/govspeak.rb', line 109 def structured_headers Govspeak::StructuredHeaderExtractor.new(self).call end |
#to_html ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/govspeak.rb', line 70 def to_html @to_html ||= begin html = if @options[:sanitize] HtmlSanitizer.new(kramdown_doc.to_html, allowed_image_hosts: @allowed_image_hosts) .sanitize(allowed_elements: @allowed_elements) else kramdown_doc.to_html end unless @footnote_definition_html.nil? regex = /<div class="footnotes".*[<\/div>]/m if html.scan(regex).empty? html << @footnote_definition_html else html.gsub!(regex, @footnote_definition_html) end end Govspeak::PostProcessor.process(html, self) end end |
#to_liquid ⇒ Object
93 94 95 |
# File 'lib/govspeak.rb', line 93 def to_liquid to_html end |
#to_text ⇒ Object
97 98 99 |
# File 'lib/govspeak.rb', line 97 def to_text HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip) end |
#valid?(validation_options = {}) ⇒ Boolean
101 102 103 |
# File 'lib/govspeak.rb', line 101 def valid?( = {}) Govspeak::HtmlValidator.new(@source, ).valid? end |