Class: JekyllSearch::HtmlProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/jekyll_search/html_processor.rb

Class Method Summary collapse

Class Method Details

.detect_sections(input) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/jekyll_search/html_processor.rb', line 21

def self.detect_sections(input)
  result = []

  current = { :id => nil, :title => nil, :content => '' }
  Loofah.fragment(input).children.each { |node|
    if node.name =~ /^h\d$/
      result << current
      current = { :id => nil, :title => nil, :content => '' }
      current[:id] = extract_headline_id(node)
      current[:title] = node.text
    else
      current[:content] += node.to_html
    end
  }
  if current[:title] != nil or current[:content] != ''
    result << current
  end

  result
end

.extract_headline_id(node) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/jekyll_search/html_processor.rb', line 42

def self.extract_headline_id(node)
  if node.has_attribute?('id')
    node.attribute('id').value
  else
    children = []
    node.traverse { |n| children << n }
    children_with_id = children.select { |n| n['id'] }

    if children_with_id.count == 1
      children_with_id.first['id']
    else
      nil
    end
  end
end

.strip_html(input) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/jekyll_search/html_processor.rb', line 6

def self.strip_html(input)
  strip_pre = Loofah::Scrubber.new do |node|
    if node.name == 'pre'
      node.remove
      Loofah::Scrubber::STOP
    end
  end

  Loofah.fragment(input).
      scrub!(:prune).
      scrub!(strip_pre).
      to_text.
      gsub(/([\r\n\t\s]+)/, ' ').strip
end