Class: Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/picolena/templates/app/models/indexer.rb

Constant Summary collapse

@@exclude =

This regexp defines which files should not be indexed.

/(Thumbs\.db)/
@@threads_number =

Number of threads that will be used during indexing process

8

Class Method Summary collapse

Class Method Details

.add_or_update_file(complete_path) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/picolena/templates/app/models/indexer.rb', line 51

def add_or_update_file(complete_path)
  default_fields = Document.default_fields_for(complete_path)
  begin
    document = PlainTextExtractor.extract_content_and_language_from(complete_path)
    raise "empty document #{complete_path}" if document[:content].strip.empty?
    document.merge! default_fields
    log :debug => ["Added : #{complete_path}",document[:language] ? " (#{document[:language]})" : ""].join
  rescue => e
    log :debug => "\tindexing without content: #{e.message}"
    document = default_fields
  end
  index << document
end

.clear!(all = false) ⇒ Object

Ensures index is closed, and removes every index file for RAILS_ENV.



66
67
68
69
70
# File 'lib/picolena/templates/app/models/indexer.rb', line 66

def clear!(all=false)
  close
  to_remove=all ? Picolena::IndexesSavePath : Picolena::IndexSavePath
  Dir.glob(File.join(to_remove,'**/*')).each{|f| FileUtils.rm(f) if File.file?(f)}
end

.closeObject

Closes the index and ensures that a new Index is instantiated next time index is called.



74
75
76
77
78
# File 'lib/picolena/templates/app/models/indexer.rb', line 74

def close
  @@index.close rescue nil
  # Ferret will SEGFAULT otherwise.
  @@index = nil
end

.ensure_index_existenceObject



100
101
102
# File 'lib/picolena/templates/app/models/indexer.rb', line 100

def ensure_index_existence
  index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
end

.indexObject

Only one IndexWriter should be instantiated. If one index already exists, returns it. Creates it otherwise.



96
97
98
# File 'lib/picolena/templates/app/models/indexer.rb', line 96

def index
  @@index ||= Ferret::Index::Index.new(default_index_params)
end

.index_directory_with_multithreads(dir) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/picolena/templates/app/models/indexer.rb', line 27

def index_directory_with_multithreads(dir)
  log :debug => "Indexing #{dir}, #{@@threads_number} threads"

  indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
    File.file?(filename) && filename !~ @@exclude
  }

  indexing_list_chunks=indexing_list.in_transposed_slices(@@threads_number)
  
  prepare_multi_threads_environment
  
  indexing_list_chunks.each_with_thread{|chunk|
    chunk.each{|complete_path|
      last_itime=index_time_dbm_file[complete_path]
      if @from_scratch || !last_itime || File.mtime(complete_path)> Time._load(last_itime) then
        add_or_update_file(complete_path)
      else
        log :debug => "Identical : #{complete_path}"
      end
      index_time_dbm_file[complete_path] = Time.now._dump
    }
  }
end

.index_every_directory(remove_first = false) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/picolena/templates/app/models/indexer.rb', line 10

def index_every_directory(remove_first=false)
  @@do_not_disturb_while_indexing=true
  clear! if remove_first
  @from_scratch = remove_first
  # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
  Finder.reload!
  log :debug => "Indexing every directory"
  start=Time.now
  Picolena::IndexedDirectories.each{|dir, alias_dir|
    index_directory_with_multithreads(dir)
  }
  log :debug => "Now optimizing index"
  index.optimize
  @@do_not_disturb_while_indexing=false
  log :debug => "Indexing done in #{Time.now-start} s."
end

.prune_indexObject

Checks for indexed files that are missing from filesytem and removes them from index & dbm file.



83
84
85
86
87
88
89
90
91
# File 'lib/picolena/templates/app/models/indexer.rb', line 83

def prune_index
  missing_files=index_time_dbm_file.reject{|filename,itime| File.exists?(filename) && Picolena::IndexedDirectories.any?{|dir,alias_path| filename.starts_with?(dir)}}
  missing_files.each{|filename, itime|
    index.writer.delete(:complete_path, filename)
    index_time_dbm_file.delete(filename)
    log :debug => "Removed : #{filename}"
  }
  index.optimize
end

.sizeObject

Returns how many files are indexed.



105
106
107
# File 'lib/picolena/templates/app/models/indexer.rb', line 105

def size
  index.size
end