Class: ClWiki::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/cl_wiki/index.rb

Constant Summary collapse

WAIT =
true

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(wiki_conf = $wiki_conf, fn = nil) ⇒ Indexer

Returns a new instance of Indexer.



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/cl_wiki/index.rb', line 23

def initialize(wiki_conf=$wiki_conf, fn=nil)
  @wiki_conf = wiki_conf

  @index = ClIndex.new
  @recent = ClIndex.new
  @pages = ClIndex.new
  @hits = ClIndex.new if @wiki_conf.access_log_index
  @rootDir = @wiki_conf.wiki_path
  @fn = fn
  @record_hits = true
  load
end

Instance Attribute Details

#indexObject (readonly)

Returns the value of attribute index.



15
16
17
# File 'lib/cl_wiki/index.rb', line 15

def index
  @index
end

Class Method Details

.defaultPortObject



19
20
21
# File 'lib/cl_wiki/index.rb', line 19

def self.defaultPort
  '9111'
end

Instance Method Details

#add_hit(fullPageName) ⇒ Object



276
277
278
279
280
281
282
283
284
285
# File 'lib/cl_wiki/index.rb', line 276

def add_hit(fullPageName)
  if @record_hits && @wiki_conf.access_log_index
    put_status('Hit on ' + fullPageName)
    @hits.add(fullPageName, Time.now, WAIT)
    thread = Thread.new do
      @hits.save(hits_filename, WAIT)
    end
    @wiki_conf.wait_on_thread(thread)
  end
end

#add_to_index(term, fullPageName) ⇒ Object



93
94
95
# File 'lib/cl_wiki/index.rb', line 93

def add_to_index(term, fullPageName)
  @index.add(term, fullPageName, WAIT)
end

#add_to_pages(fullPageName) ⇒ Object



103
104
105
# File 'lib/cl_wiki/index.rb', line 103

def add_to_pages(fullPageName)
  @pages.add(fullPageName, nil, WAIT)
end

#add_to_recent(modTime, fullPageName) ⇒ Object



97
98
99
100
101
# File 'lib/cl_wiki/index.rb', line 97

def add_to_recent(modTime, fullPageName)
  # remove all other instances of this page, we only need the current modTime
  @recent.remove(fullPageName, WAIT)
  @recent.add(modTime.strftime("%Y-%m-%dT%H:%M:%S"), fullPageName, WAIT)
end

#build(limit = -1,, purge = false) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/cl_wiki/index.rb', line 46

def build(limit=-1, purge=false)
  @record_hits = false
  begin
    fileCount = 0
    raise '$wikiPageExt not set' if $wikiPageExt.empty? # for debugging only, really
    files = Dir[::File.join(@rootDir, '**/*' + $wikiPageExt)]
    if limit == -1
      p = Progress.new(files.length)
    else
      p = Progress.new(limit)
    end
    p.start
    files.each do |fn|
      next if !::File.file?(fn)
      break if (limit > -1) && (fileCount >= limit)
      fileCount += 1
      fullName = fn.sub(@rootDir, '')
      fullName = fullName.sub(/#{$wikiPageExt}/, '')
      index_page(fullName, purge)
      do_puts p.progress(true)
    end
  ensure
    @record_hits = true
  end
end

#do_puts(text) ⇒ Object



36
37
38
39
40
41
42
43
44
# File 'lib/cl_wiki/index.rb', line 36

def do_puts(text)
  if @fn
    ::File.open(@fn, 'a+') do |f|
      f.puts text
    end
  else
    puts text
  end
end

#dumpObject



208
209
210
211
212
213
# File 'lib/cl_wiki/index.rb', line 208

def dump
  dump_clindex(@index, 'index')
  dump_clindex(@recent, 'recent')
  dump_clindex(@pages, 'pages')
  dump_clindex(@hits, 'hits') if @wiki_conf.access_log_index
end

#dump_clindex(aindex, fn_prefix) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/cl_wiki/index.rb', line 189

def dump_clindex(aindex, fn_prefix)
  put_status "Dumping #{fn_prefix}..." do
    hash = aindex.index
    ::File.open(fn_prefix + '.keys.dump.txt', 'w+') do |f|
      keys = hash.keys
      keys.sort.each do |key|
        f.puts key
      end
    end
    File.open(fn_prefix + '.full.dump.txt', 'w+') do |f|
      fullary = hash.to_a
      fullary.sort!
      fullary.each do |keyValueAry|
        f.puts keyValueAry[0].inspect + " => " + keyValueAry[1].inspect
      end
    end
  end
end

#hit_summary(start_index = 0, end_index = -1)) ⇒ Object



287
288
289
290
291
292
293
294
295
# File 'lib/cl_wiki/index.rb', line 287

def hit_summary(start_index=0, end_index=-1)
  if @wiki_conf.access_log_index
    hit_index = nil
    @hits.do_read(WAIT) do
      hit_index = @hits.index.dup
    end
    hit_index.sort { |a, b| b[1].length <=> a[1].length }[start_index..end_index]
  end
end

#hits_filenameObject



107
108
109
# File 'lib/cl_wiki/index.rb', line 107

def hits_filename
  ::File.join(::File.expand_path($wiki_path), 'hits.dat')
end

#index_filenameObject



111
112
113
# File 'lib/cl_wiki/index.rb', line 111

def index_filename
  ::File.join(::File.expand_path($wiki_path), 'index.dat')
end

#index_page(fullName, purge = false) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/cl_wiki/index.rb', line 72

def index_page(fullName, purge=false)
  put_status "indexing #{fullName}" do
    pg = ClWiki::Page.new(fullName, @rootDir)
    pg.read_raw_content
    if purge && pg.content_never_edited?
      put_status("purging #{fullName}")
      pg.delete
      remove_page_from_index(fullName)
    else
      formatter = ClWiki::PageFormatter.new(pg.raw_content, fullName)
      formatter.formatLinks do |word|
        add_to_index(word.downcase, fullName)
      end
      add_to_index(fullName, fullName)
      add_to_pages(fullName)

      add_to_recent(pg.mtime, fullName)
    end
  end
end

#loadObject



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/cl_wiki/index.rb', line 170

def load
  put_status 'Loading' do
    put_status 'Loading Main' do
      @index.load(index_filename, WAIT) if ::File.exist?(index_filename)
    end
    put_status 'Loading Recent' do
      @recent.load(recent_filename, WAIT) if ::File.exist?(recent_filename)
    end
    put_status 'Loading Pages' do
      @pages.load(pages_filename, WAIT) if ::File.exist?(pages_filename)
    end
    if @wiki_conf.access_log_index
      put_status 'Loading Hits' do
        @hits.load(hits_filename, WAIT) if ::File.exist?(hits_filename)
      end
    end
  end
end

#page_exists?(fullPageName) ⇒ Boolean

Returns:

  • (Boolean)


270
271
272
273
274
# File 'lib/cl_wiki/index.rb', line 270

def page_exists?(fullPageName)
  exists = false
  exists = @pages.term_exists?(fullPageName, WAIT)
  exists
end

#pages_filenameObject



119
120
121
# File 'lib/cl_wiki/index.rb', line 119

def pages_filename
  ::File.join(::File.expand_path($wiki_path), 'pages.dat')
end

#pages_out(rootPage) ⇒ Object



256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/cl_wiki/index.rb', line 256

def pages_out(rootPage)
  all = @index.all_terms(rootPage, WAIT)
  #all.delete_if do |term|
  #  term[0..0] != '/' || !ClWikiPage.page_exists?(term.dup)
  #end
  all.delete_if do |term|
    (term[0..0] != '/') || (term == '/') || (term == '//')
  end
  all.delete_if do |term|
    !ClWikiPage.page_exists?(term.dup)
  end
  all
end

#put_status(status) ⇒ Object



128
129
130
131
132
133
134
135
136
# File 'lib/cl_wiki/index.rb', line 128

def put_status(status)
  if block_given?
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + '... '
    yield
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status + ' done '
  else
    do_puts Time.now.strftime("%I:%M:%S") + ' ' + status
  end
end

#recent(top = -1)) ⇒ Object



249
250
251
252
253
254
# File 'lib/cl_wiki/index.rb', line 249

def recent(top=-1)
  @recent.do_read(WAIT) do
    hash = @recent.index
    hash.sort { |a, b| b[0] <=> a[0] }[0..top]
  end
end

#recent_filenameObject



115
116
117
# File 'lib/cl_wiki/index.rb', line 115

def recent_filename
  ::File.join(::File.expand_path($wiki_path), 'recent.dat')
end

#reindex_and_save_async(fullPageName) ⇒ Object



138
139
140
141
142
143
144
# File 'lib/cl_wiki/index.rb', line 138

def reindex_and_save_async(fullPageName)
  thread = Thread.new do
    reindex_page(fullPageName)
    save
  end
  @wiki_conf.wait_on_thread(thread)
end

#reindex_page(fullPageName) ⇒ Object



146
147
148
149
150
151
# File 'lib/cl_wiki/index.rb', line 146

def reindex_page(fullPageName)
  put_status 'Reindexing ' + fullPageName do
    remove_page_from_index(fullPageName)
    index_page(fullPageName)
  end
end

#remove_page_from_index(fullPageName) ⇒ Object



123
124
125
126
# File 'lib/cl_wiki/index.rb', line 123

def remove_page_from_index(fullPageName)
  @index.remove(fullPageName, WAIT)
  @recent.remove(fullPageName, WAIT)
end

#saveObject



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/cl_wiki/index.rb', line 153

def save
  put_status 'Saving' do
    put_status 'Saving Main' do
      @index.save(index_filename, WAIT)
    end
    put_status 'Saving Recent' do
      @recent.save(recent_filename, WAIT)
    end
    put_status 'Saving Pages' do
      @pages.save(pages_filename, WAIT)
    end
    put_status 'Saving Hits' do
      @hits.save(hits_filename, WAIT) if @wiki_conf.access_log_index
    end
  end
end

#search(text) ⇒ Object



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/cl_wiki/index.rb', line 215

def search(text)
  terms = text.split(' ')
  allhits = nil
  terms.each do |term|
    termhits = []
    @index.search(term, termhits, WAIT)
    termhits.flatten!
    if !allhits
      allhits = termhits
    else
      allhits = allhits & termhits
    end
  end
  allhits = [] if !allhits # shouldn't ever happen I'd wager
  p allhits if $debug
  allhits.flatten!
  allhits.uniq!
  allhits.sort!
  p allhits if $debug
  allhits
end

#sort_hits_by_recent(hits, top = -1)) ⇒ Object



237
238
239
240
241
242
243
244
245
246
247
# File 'lib/cl_wiki/index.rb', line 237

def sort_hits_by_recent(hits, top=-1)
  hits_by_date = {}
  # don't send top into this call to recent, we need all recent, then
  # we filter that down to all matches, /then/ we take the topmost
  # of that matching list
  recent.each do |date, page_name_array|
    hits_at_this_time = page_name_array & hits
    hits_by_date[date] = hits_at_this_time if !hits_at_this_time.empty?
  end
  hits_by_date.sort { |a, b| b[0] <=> a[0] }[0..top]
end