Module: TextUtils::HypertextHelper

Defined in:: lib/textutils/helper/hypertext_helper.rb

Instance Method Summary collapse

#content_tag(tag, content, opts = {}) ⇒ Object

content tag (e.g. <p>hello</p> - w/ opening and closing tag).
#image_tag(src, opts = {}) ⇒ Object
#link_to(content, href, opts = {}) ⇒ Object
#sanitize(ht, opts = {}) ⇒ Object

change to simple_hypertext or hypertext_simple or sanitize ???.
#strip_tags(ht) ⇒ Object
#stylesheet_link_tag(href, opts = {}) ⇒ Object
#tag(tag, opts = {}) ⇒ Object

rails-style asset, url tag helpers and friends.
#textify(ht, opts = {}) ⇒ Object

ht -> hypertext.
#whitelist(ht, tags, opts = {}) ⇒ Object

Instance Method Details

#content_tag(tag, content, opts = {}) ⇒ `Object`

content tag (e.g. <p>hello</p> - w/ opening and closing tag)

# File 'lib/textutils/helper/hypertext_helper.rb', line 150

def content_tag( tag, content, opts={} ) # content tag (e.g. <p>hello</p> - w/ opening and closing tag)
  attribs = []
  opts.each do |key,value|
    attribs << "#{key}='#{value}'"
  end
  
  if attribs.size > 0
    "<#{tag} #{attribs.join(' ')}>#{content}</#{tag}>"
  else
    "<#{tag}>#{content}</#{tag}>"
  end
end

#image_tag(src, opts = {}) ⇒ `Object`

# File 'lib/textutils/helper/hypertext_helper.rb', line 173

def image_tag( src, opts={} )
  attribs = { src: src }
  attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
  tag( :img, attribs )   ### "<img src='#{src}' #{attributes}>"
end

#link_to(content, href, opts = {}) ⇒ `Object`

# File 'lib/textutils/helper/hypertext_helper.rb', line 179

def link_to( content, href, opts={} )
  attribs = { href: href }
  attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
  content_tag( :a, content, attribs )  ### "<a href='#{href}' #{attributes}>#{text}</a>"
end

#sanitize(ht, opts = {}) ⇒ `Object`

change to simple_hypertext or

hypertext_simple or
sanitize ???

# File 'lib/textutils/helper/hypertext_helper.rb', line 92

def sanitize( ht, opts={} )  # ht -> hypertext
  # todo: add options for
  #   keep links, images, lists (?too), code, codeblocks

  ht = whitelist( ht, [:br, :p, :ul, :ol, :li, :pre, :code, :blockquote, :q, :cite], opts )

# clean (prettify) literal urls (strip protocoll) 
  ht = ht.gsub( /(http|https):\/\//, '' )
  ht
end

#strip_tags(ht) ⇒ `Object`

# File 'lib/textutils/helper/hypertext_helper.rb', line 7

def strip_tags( ht )
  ### to be done
  ## strip markup tags; return plain text; use brute force for now
  # check at least for presence of required a-z+ tag names
  #
  #  note: make sure we cover h1/h2/h3/h4/h5/h6  tag w/ number!!

  ### ht.gsub( /<[^>]+>/, '' ) - old simple

  ## todo: add strip comments e.g. <!-- xxxx --> ???
  ##  or use new strip_comments( ht )


  ## note: follow offical xml spec
  ##  - allows for first char:  (Letter | '_' | ':')
  ##  - allows for followup chars: (Letter | Digit | '_' | ':' | '.' | '-')

  tag_name_pattern = "[a-z_:][a-z0-9_:.\\-]*"

  empty_tag_pattern   =  "<#{tag_name_pattern}\\s*/>"
  opening_tag_pattern =  "<#{tag_name_pattern}(\\s+[^>]*)?>"
  closing_tag_pattern =  "</#{tag_name_pattern}\\s*>"

  ht = ht.gsub( /#{empty_tag_pattern}/i, '' )    # remove xml-style empty tags eg. <br /> or <br/>
  ht = ht.gsub( /#{opening_tag_pattern}/i, '' )  # opening tag <p>
  ht = ht.gsub( /#{closing_tag_pattern}/i, '' )  # closing tag e.g. </p>
  ht
end

#stylesheet_link_tag(href, opts = {}) ⇒ `Object`

# File 'lib/textutils/helper/hypertext_helper.rb', line 164

def stylesheet_link_tag( href, opts={} )
  href = "#{href}.css"  unless href.end_with?( '.css' )   # auto-add .css if not present
  attribs = { rel:  'stylesheet',
              type: 'text/css',
              href: href }
  attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
  tag( :link, attribs )
end

#tag(tag, opts = {}) ⇒ `Object`

rails-style asset, url tag helpers and friends

todo:  move into different helper module/modules?? why? why not?

# File 'lib/textutils/helper/hypertext_helper.rb', line 137

def tag( tag, opts={} )  # empty tag (no content e.g. <br>, <img src=''> etc.)
  attribs  = []
  opts.each do |key,value|
    attribs << "#{key}='#{value}'"
  end
  
  if attribs.size > 0
    "<#{tag} #{attribs.join(' ')}>"
  else
    "<#{tag}>"
  end
end

#textify(ht, opts = {}) ⇒ `Object`

ht -> hypertext

# File 'lib/textutils/helper/hypertext_helper.rb', line 104

def textify( ht, opts={} )   # ht -> hypertext
  ## turn into plain (or markdown/wiki-style) text - to be done

  sanitize( ht, opts )   # step 1 - sanitize html
  # to be done

# strip bold
#    ht = ht.gsub( /<b[^>]*>/, '**' )  # fix: will also swallow bxxx tags - add b space
#    ht = ht.gsub( /<\/b>/, '**' )

# strip em
#   ht = ht.gsub( /<em[^>]*>/, '__' )
#   ht = ht.gsub( /<\/em>/, '__' )

#    ht = ht.gsub( /&nbsp;/, ' ' )

#    # try to cleanup whitespaces
#    # -- keep no more than two spaces
#    ht = ht.gsub( /[ \t]{3,}/, '  ' )
#    # -- keep no more than two new lines
#    ht = ht.gsub( /\n{2,}/m, "\n\n" ) 
#    # -- remove all trailing spaces
#    ht = ht.gsub( /[ \t\n]+$/m, '' )
#    # -- remove all leading spaces
#    ht = ht.gsub( /^[ \t\n]+/m, '' )
end

#whitelist(ht, tags, opts = {}) ⇒ `Object`

# File 'lib/textutils/helper/hypertext_helper.rb', line 37

def whitelist( ht, tags, opts={} )

  # note: assumes properly escaped <> in ht/hypertext

  ###############################################
  # step one - save whitelisted tags use ‹tag›
  tags.each do |tag|
    # note: we strip all attribues
    # note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
    #   downcase all tags

    # convert xml-style empty tags to simple html emtpty tags
    #  e.g. <br/> or <br /> becomses <br>
    ht = ht.gsub( /<(#{tag})\s*\/>/i )       { |_| "‹#{$1.downcase}›" }   # eg. <br /> or <br/> becomes ‹br›

    # make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
    ht = ht.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" }   # opening tag <p>
    ht = ht.gsub( /<\/(#{tag})\s*>/i )       { |_| "‹/#{$1.downcase}›" }  # closing tag e.g. </p>
  end

  ############################
  # step two - clean tags

  #   strip images - special treatment for debugging
  ht = ht.gsub( /<img[^>]*>/i, '♦' )   # for debugging use black diamond e.g. ♦
  ht = ht.gsub( /<\/img>/i, '' )   # should not exists

  # strip all remaining tags
  #  -- note: will NOT strip comments for now e.g. <!-- -->
  ht = strip_tags( ht )

  ## pp ht  # fix: debugging indo - remove

  ############################################
  # step three - restore whitelisted tags

  return ht   if opts[:skip_restore]   # skip step 3 for debugging

  tags.each do |tag|
#      ht = ht.gsub( /‹(#{tag})›/, "<\1>" )  # opening tag e.g. <p>
#      ht = ht.gsub( /‹\/(#{tag})›/, "<\/\1>" )  # closing tag e.g. </p>
    ht = ht.gsub( /‹(#{tag})›/ )   { |_| "<#{$1}>" }
    ht = ht.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" }  # closing tag e.g. </p>
  end

  ht
end

Module: TextUtils::HypertextHelper

Instance Method Summary collapse

Instance Method Details

#content_tag(tag, content, opts = {}) ⇒ Object

#image_tag(src, opts = {}) ⇒ Object

#link_to(content, href, opts = {}) ⇒ Object

#sanitize(ht, opts = {}) ⇒ Object

#strip_tags(ht) ⇒ Object

#stylesheet_link_tag(href, opts = {}) ⇒ Object

#tag(tag, opts = {}) ⇒ Object

#textify(ht, opts = {}) ⇒ Object

#whitelist(ht, tags, opts = {}) ⇒ Object

#content_tag(tag, content, opts = {}) ⇒ `Object`

#image_tag(src, opts = {}) ⇒ `Object`

#link_to(content, href, opts = {}) ⇒ `Object`

#sanitize(ht, opts = {}) ⇒ `Object`

#strip_tags(ht) ⇒ `Object`

#stylesheet_link_tag(href, opts = {}) ⇒ `Object`

#tag(tag, opts = {}) ⇒ `Object`

#textify(ht, opts = {}) ⇒ `Object`

#whitelist(ht, tags, opts = {}) ⇒ `Object`