Class: SearchFilter

Inherits:
Nanoc::Filter
  • Object
show all
Defined in:
lib/nanoc-search.rb

Instance Method Summary collapse

Constructor Details

#initialize(hash = {}) ⇒ SearchFilter

Returns a new instance of SearchFilter.



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/nanoc-search.rb', line 9

def initialize(hash = {})
  super

  raise ArgumentError.new 'Missing indextank:api_url.' unless @config[:indextank][:api_url]
  raise ArgumentError.new 'Missing indextank:index.' unless @config[:indextank][:index]

  @last_indexed_file = '.nanoc_indextank'

  load_last_timestamp

  api = IndexTank::Client.new(@config[:indextank][:api_url])
  @index = api.indexes(@config[:indextank][:index])
end

Instance Method Details

#extract_text(content) ⇒ Object



53
54
55
56
# File 'lib/nanoc-search.rb', line 53

def extract_text(content)
  doc = Nokogiri::HTML(content)
  doc.xpath('//*/text()').to_a.join(" ").gsub("\r"," ").gsub("\n"," ")
end

#load_last_timestampObject



66
67
68
69
70
71
72
# File 'lib/nanoc-search.rb', line 66

def load_last_timestamp
  begin
    @last_indexed = File.open(@last_indexed_file, "rb") {|f| Marshal.load(f)}
  rescue
    @last_indexed = nil
  end
end

#run(content, params = {}) ⇒ Object

Index all pages except pages matching any value in config[:excludes] The main content from each page is extracted and indexed at indextank.com The doc_id of each indextank document will be the absolute url to the resource without domain name



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/nanoc-search.rb', line 26

def run(content, params={})
  # only process item that are changed since last regeneration
  if (!@last_indexed.nil? && @last_indexed > item.mtime)
    return content
  end

  puts "Indexing page #{@item.identifier}"

  while not @index.running?
    # wait for the indextank index to get ready
    sleep 0.5
  end

  page_text = extract_text(content)

  @index.document(@item.identifier).add({
        :text => page_text,
        :title => @item[:title] || item.identifier
    })
  puts 'Indexed ' << item.identifier

  @last_indexed = Time.now
  write_last_indexed

  content
end

#write_last_indexedObject



58
59
60
61
62
63
64
# File 'lib/nanoc-search.rb', line 58

def write_last_indexed
  begin
    File.open(@last_indexed_file, 'w') {|f| Marshal.dump(@last_indexed, f)}
  rescue
    puts 'WARNING: cannot write indexed timestamps file.'
  end
end