Class: Gitlab::Email::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/email/html_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw_body) ⇒ HTMLParser

Returns a new instance of HTMLParser.



12
13
14
# File 'lib/gitlab/email/html_parser.rb', line 12

def initialize(raw_body)
  @raw_body = raw_body
end

Instance Attribute Details

#raw_bodyObject (readonly)

Returns the value of attribute raw_body.



10
11
12
# File 'lib/gitlab/email/html_parser.rb', line 10

def raw_body
  @raw_body
end

Class Method Details

.parse_reply(raw_body) ⇒ Object



6
7
8
# File 'lib/gitlab/email/html_parser.rb', line 6

def self.parse_reply(raw_body)
  new(raw_body).filtered_text
end

Instance Method Details

#documentObject



16
17
18
# File 'lib/gitlab/email/html_parser.rb', line 16

def document
  @document ||= Nokogiri::HTML.parse(raw_body)
end

#filter_replies!Object



20
21
22
23
24
25
26
27
# File 'lib/gitlab/email/html_parser.rb', line 20

def filter_replies!
  # bogus links with no href are sometimes added by outlook,
  # and can result in Html2Text adding extra square brackets
  # to the text, so we unwrap them here.
  document.xpath('//a[not(@href)]').each do |link|
    link.replace(link.children)
  end
end

#filtered_htmlObject



29
30
31
32
33
34
# File 'lib/gitlab/email/html_parser.rb', line 29

def filtered_html
  @filtered_html ||= begin
    filter_replies!
    document.inner_html
  end
end

#filtered_textObject



36
37
38
# File 'lib/gitlab/email/html_parser.rb', line 36

def filtered_text
  @filtered_text ||= ::Gitlab::Email::HtmlToMarkdownParser.convert(filtered_html)
end