Class: ArticleJSON::Export::AppleNews::Elements::Text

Inherits:
Base
  • Object
show all
Includes:
Common::HTML::Elements::Base, Common::HTML::Elements::Text
Defined in:
lib/article_json/export/apple_news/elements/text.rb

Constant Summary collapse

UNSUPPORTED_HTML_TAGS =
%w[title meta script noscript style link applet object iframe
  noframes form select option optgroup
].freeze

Instance Method Summary collapse

Methods included from Common::HTML::Elements::Base

included

Methods inherited from Base

namespace

Methods included from Common::Elements::Base

included, #initialize

Instance Method Details

#create_text_nodes(text) ⇒ Object

Parameters:

  • text (String)


23
24
25
# File 'lib/article_json/export/apple_news/elements/text.rb', line 23

def create_text_nodes(text)
  Nokogiri::HTML.fragment(sanitize_text(text).gsub(/\n/, '<br>')).children
end

#exportString

A Nokogiri object is returned with`super`, which is is then returned as a either a string or as HTML (when not plain text), both of which are compatible with Apple News format. Takes into account bold, italic and href.

Returns:

  • (String)


18
19
20
# File 'lib/article_json/export/apple_news/elements/text.rb', line 18

def export
  super.to_s
end

#sanitize_text(text) ⇒ String

Removes UNSUPPORTED_TAGS from text

Parameters:

  • text (String)

Returns:

  • (String)


31
32
33
34
35
36
37
# File 'lib/article_json/export/apple_news/elements/text.rb', line 31

def sanitize_text(text)
  doc = Nokogiri::HTML.fragment(text)
  UNSUPPORTED_HTML_TAGS.each do |tag|
    doc.search(tag).each(&:remove)
  end
  doc.inner_html
end