Class: Jekyll::LunrJsSearch::Indexer

Inherits:
Generator
  • Object
show all
Defined in:
lib/jekyll_lunr_js_search/indexer.rb

Instance Method Summary collapse

Constructor Details

#initialize(config = {}) ⇒ Indexer

Returns a new instance of Indexer.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/jekyll_lunr_js_search/indexer.rb', line 10

def initialize(config = {})
  super(config)
  
  lunr_config = { 
    'excludes' => [],
    'strip_index_html' => false,
    'min_length' => 3,
    'stopwords' => 'stopwords.txt',
    'fields' => {
      'title' => 10,
      'tags' => 20,
      'body' => 1
    },
    'js_dir' => 'js'
  }.merge!(config['lunr_search'] || {})

  @js_dir = lunr_config['js_dir']
  gem_lunr = File.join(File.dirname(__FILE__), "../../build/lunr.min.js")
  @lunr_path = File.exist?(gem_lunr) ? gem_lunr : File.join(@js_dir, File.basename(gem_lunr))
  raise "Could not find #{@lunr_path}" if !File.exist?(@lunr_path)

  ctx = V8::Context.new
  ctx.load(@lunr_path)
  ctx['indexer'] = proc do |this|
    this.ref('id')
    lunr_config['fields'].each_pair do |name, boost|
      this.field(name, { 'boost' => boost })
    end
  end
  @index = ctx.eval('lunr(indexer)')
  @lunr_version = ctx.eval('lunr.version')
  @docs = {}
  @excludes = lunr_config['excludes']
  
  # if web host supports index.html as default doc, then optionally exclude it from the url 
  @strip_index_html = lunr_config['strip_index_html']

  # stop word exclusion configuration
  @min_length = lunr_config['min_length']
  @stopwords_file = lunr_config['stopwords']
end

Instance Method Details

#generate(site) ⇒ Object

Index all pages except pages matching any value in config or with date The main content from each page is extracted and saved to disk as json



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/jekyll_lunr_js_search/indexer.rb', line 54

def generate(site)
  Jekyll.logger.info "Lunr:", 'Creating search index...'

  @site = site
  # gather pages and posts
  items = pages_to_index(site)
  content_renderer = PageRenderer.new(site)
  index = []

  items.each_with_index do |item, i|
    entry = SearchEntry.create(item, content_renderer)

    entry.strip_index_suffix_from_url! if @strip_index_html
    entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file) 

    doc = {
      "id" => i,
      "title" => entry.title,
      "url" => entry.url,
      "date" => entry.date,
      "categories" => entry.categories,
      "body" => entry.body
    }

    @index.add(doc)
    doc.delete("body")
    @docs[i] = doc
    
    Jekyll.logger.debug "Lunr:", (entry.title ? "#{entry.title} (#{entry.url})" : entry.url)
  end
  
  FileUtils.mkdir_p(File.join(site.dest, @js_dir))
  filename = File.join(@js_dir, 'index.json')
  
  total = {
    "docs" => @docs,
    "index" => @index.to_hash
  }

  filepath = File.join(site.dest, filename)
  File.open(filepath, "w") { |f| f.write(JSON.dump(total)) }
  Jekyll.logger.info "Lunr:", "Index ready (lunr.js v#{@lunr_version})"
  added_files = [filename]

  site_js = File.join(site.dest, @js_dir)
  # If we're using the gem, add the lunr and search JS files to the _site
  if File.expand_path(site_js) != File.dirname(@lunr_path)
    extras = Dir.glob(File.join(File.dirname(@lunr_path), "*.min.js"))
    FileUtils.cp(extras, site_js)
    extras.map! { |min| File.join(@js_dir, File.basename(min)) }
    Jekyll.logger.debug "Lunr:", "Added JavaScript to #{@js_dir}"
    added_files.push(*extras)
  end

  # Keep the written files from being cleaned by Jekyll
  added_files.each do |filename|
    site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
  end
end