Class: Gitlab::Email::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/email/html_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw_body) ⇒ HTMLParser

Returns a new instance of HTMLParser.


11
12
13
# File 'lib/gitlab/email/html_parser.rb', line 11

def initialize(raw_body)
  @raw_body = raw_body
end

Instance Attribute Details

#raw_bodyObject (readonly)

Returns the value of attribute raw_body


10
11
12
# File 'lib/gitlab/email/html_parser.rb', line 10

def raw_body
  @raw_body
end

Class Method Details

.parse_reply(raw_body) ⇒ Object


6
7
8
# File 'lib/gitlab/email/html_parser.rb', line 6

def self.parse_reply(raw_body)
  new(raw_body).filtered_text
end

Instance Method Details

#documentObject


15
16
17
# File 'lib/gitlab/email/html_parser.rb', line 15

def document
  @document ||= Nokogiri::HTML.parse(raw_body)
end

#filter_replies!Object


19
20
21
22
23
24
25
26
27
28
29
# File 'lib/gitlab/email/html_parser.rb', line 19

def filter_replies!
  document.xpath('//blockquote').each(&:remove)
  document.xpath('//table').each(&:remove)

  # bogus links with no href are sometimes added by outlook,
  # and can result in Html2Text adding extra square brackets
  # to the text, so we unwrap them here.
  document.xpath('//a[not(@href)]').each do |link|
    link.replace(link.children)
  end
end

#filtered_htmlObject


31
32
33
34
35
36
# File 'lib/gitlab/email/html_parser.rb', line 31

def filtered_html
  @filtered_html ||= begin
    filter_replies!
    document.inner_html
  end
end

#filtered_textObject


38
39
40
# File 'lib/gitlab/email/html_parser.rb', line 38

def filtered_text
  @filtered_text ||= Html2Text.convert(filtered_html)
end