Class: Govspeak::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/govspeak.rb

Constant Summary collapse

Parser =
Kramdown::Parser::KramdownWithAutomaticExternalLinks
PARSER_CLASS_NAME =
Parser.name.split("::").last
UUID_REGEX =
/^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.freeze
NEW_PARAGRAPH_LOOKBEHIND =
%q{(?<=\A|\n\n|\r\n\r\n)}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, options = {}) ⇒ Document

Returns a new instance of Document.



49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/govspeak.rb', line 49

def initialize(source, options = {})
  options = options.dup.deep_symbolize_keys
  @source = source ? source.dup : ""

  @images = options.delete(:images) || []
  @attachments = Array.wrap(options.delete(:attachments))
  @links = Array.wrap(options.delete(:links))
  @contacts = Array.wrap(options.delete(:contacts))
  @locale = options.fetch(:locale, "en")
  @options = { input: PARSER_CLASS_NAME, sanitize: true }.merge(options)
  @options[:entity_output] = :symbolic
end

Instance Attribute Details

#attachmentsObject (readonly)

Returns the value of attribute attachments.



39
40
41
# File 'lib/govspeak.rb', line 39

def attachments
  @attachments
end

#contactsObject (readonly)

Returns the value of attribute contacts.



39
40
41
# File 'lib/govspeak.rb', line 39

def contacts
  @contacts
end

#imagesObject

Returns the value of attribute images.



38
39
40
# File 'lib/govspeak.rb', line 38

def images
  @images
end

Returns the value of attribute links.



39
40
41
# File 'lib/govspeak.rb', line 39

def links
  @links
end

#localeObject (readonly)

Returns the value of attribute locale.



39
40
41
# File 'lib/govspeak.rb', line 39

def locale
  @locale
end

Class Method Details

.devolved_optionsObject



293
294
295
296
297
298
299
300
# File 'lib/govspeak.rb', line 293

def self.devolved_options
  { 'scotland' => 'Scotland',
    'england' => 'England',
    'england-wales' => 'England and Wales',
    'northern-ireland' => 'Northern Ireland',
    'wales' => 'Wales',
    'london' => 'London' }
end

.extension(title, regexp = nil, &block) ⇒ Object



115
116
117
118
# File 'lib/govspeak.rb', line 115

def self.extension(title, regexp = nil, &block)
  regexp ||= %r${::#{title}}(.*?){:/#{title}}$m
  @extensions << [title, regexp, block]
end

.extensionsObject



45
46
47
# File 'lib/govspeak.rb', line 45

def self.extensions
  @extensions
end

.surrounded_by(open, close = nil) ⇒ Object



120
121
122
123
124
125
126
127
128
# File 'lib/govspeak.rb', line 120

def self.surrounded_by(open, close = nil)
  open = Regexp::escape(open)
  if close
    close = Regexp::escape(close)
    %r+(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?+m
  else
    %r+(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)+m
  end
end

.to_html(source, options = {}) ⇒ Object



41
42
43
# File 'lib/govspeak.rb', line 41

def self.to_html(source, options = {})
  new(source, options).to_html
end

.wrap_with_div(class_name, character, parser = Kramdown::Document) ⇒ Object



130
131
132
133
134
135
# File 'lib/govspeak.rb', line 130

def self.wrap_with_div(class_name, character, parser = Kramdown::Document)
  extension(class_name, surrounded_by(character)) { |body|
    content = parser ? parser.new("#{body.strip}\n").to_html : body.strip
    %{\n<div class="#{class_name}">\n#{content}</div>\n}
  }
end

Instance Method Details

#extract_contact_content_idsObject



98
99
100
101
102
103
# File 'lib/govspeak.rb', line 98

def extract_contact_content_ids
  _, regex = self.class.extensions.find { |(title)| title == "Contact" }
  return [] unless regex

  @source.scan(regex).map(&:first).uniq.select { |id| id.match(UUID_REGEX) }
end


94
95
96
# File 'lib/govspeak.rb', line 94

def extracted_links(website_root: nil)
  Govspeak::LinkExtractor.new(self, website_root: website_root).call
end

#headersObject



86
87
88
# File 'lib/govspeak.rb', line 86

def headers
  Govspeak::HeaderExtractor.convert(kramdown_doc).first
end

#insert_strong_inside_p(body, parser = Govspeak::Document) ⇒ Object



137
138
139
# File 'lib/govspeak.rb', line 137

def insert_strong_inside_p(body, parser = Govspeak::Document)
  parser.new(body.strip).to_html.sub(/^<p>(.*)<\/p>$/, "<p><strong>\\1</strong></p>")
end

#preprocess(source) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'lib/govspeak.rb', line 105

def preprocess(source)
  source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
  self.class.extensions.each do |_, regexp, block|
    source.gsub!(regexp) {
      instance_exec(*Regexp.last_match.captures, &block)
    }
  end
  source
end

#render_image(image) ⇒ Object

As of version 1.12.0 of Kramdown the block elements (div & figcaption) inside this html block will have it’s < > converted into HTML Entities when ever this code is used inside block level elements.

To resolve this we have a post-processing task that will convert this back into HTML (I know - it’s ugly). The way we could resolve this without ugliness would be to output only inline elements which rules out div and figcaption

This issue is not considered a bug by kramdown: github.com/gettalong/kramdown/issues/191



253
254
255
256
257
258
259
260
261
# File 'lib/govspeak.rb', line 253

def render_image(image)
  id_attr = image.id ? %{ id="attachment_#{image.id}"} : ""
  lines = []
  lines << %{<figure#{id_attr} class="image embedded">}
  lines << %{<div class="img"><img src="#{encode(image.url)}" alt="#{encode(image.alt_text)}"></div>}
  lines << image.figcaption_html if image.figcaption?
  lines << '</figure>'
  lines.join
end

#structured_headersObject



90
91
92
# File 'lib/govspeak.rb', line 90

def structured_headers
  Govspeak::StructuredHeaderExtractor.new(self).call
end

#to_htmlObject



62
63
64
65
66
67
68
69
70
71
72
# File 'lib/govspeak.rb', line 62

def to_html
  @to_html ||= begin
                 html = if @options[:sanitize]
                          HtmlSanitizer.new(kramdown_doc.to_html).sanitize
                        else
                          kramdown_doc.to_html
                        end

                 Govspeak::PostProcessor.process(html, self)
               end
end

#to_liquidObject



74
75
76
# File 'lib/govspeak.rb', line 74

def to_liquid
  to_html
end

#to_textObject



78
79
80
# File 'lib/govspeak.rb', line 78

def to_text
  HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
end

#valid?(validation_options = {}) ⇒ Boolean

Returns:

  • (Boolean)


82
83
84
# File 'lib/govspeak.rb', line 82

def valid?(validation_options = {})
  Govspeak::HtmlValidator.new(@source, validation_options).valid?
end