Class: Gitlab::Email::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/email/html_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw_body) ⇒ HTMLParser

Returns a new instance of HTMLParser.


12
13
14
# File 'lib/gitlab/email/html_parser.rb', line 12

def initialize(raw_body)
  @raw_body = raw_body
end

Instance Attribute Details

#raw_bodyObject (readonly)

Returns the value of attribute raw_body.


10
11
12
# File 'lib/gitlab/email/html_parser.rb', line 10

def raw_body
  @raw_body
end

Class Method Details

.parse_reply(raw_body) ⇒ Object


6
7
8
# File 'lib/gitlab/email/html_parser.rb', line 6

def self.parse_reply(raw_body)
  new(raw_body).filtered_text
end

Instance Method Details

#documentObject


16
17
18
# File 'lib/gitlab/email/html_parser.rb', line 16

def document
  @document ||= Nokogiri::HTML.parse(raw_body)
end

#filter_replies!Object


20
21
22
23
24
25
26
27
28
29
30
# File 'lib/gitlab/email/html_parser.rb', line 20

def filter_replies!
  document.xpath('//blockquote').each(&:remove)
  document.xpath('//table').each(&:remove)

  # bogus links with no href are sometimes added by outlook,
  # and can result in Html2Text adding extra square brackets
  # to the text, so we unwrap them here.
  document.xpath('//a[not(@href)]').each do |link|
    link.replace(link.children)
  end
end

#filtered_htmlObject


32
33
34
35
36
37
# File 'lib/gitlab/email/html_parser.rb', line 32

def filtered_html
  @filtered_html ||= begin
    filter_replies!
    document.inner_html
  end
end

#filtered_textObject


39
40
41
# File 'lib/gitlab/email/html_parser.rb', line 39

def filtered_text
  @filtered_text ||= Html2Text.convert(filtered_html)
end