Class: Milkode::DocumentTable

Inherits:
Object
  • Object
show all
Defined in:
lib/milkode/database/document_table.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(table) ⇒ DocumentTable

Returns a new instance of DocumentTable.



108
109
110
# File 'lib/milkode/database/document_table.rb', line 108

def initialize(table)
  @table = table
end

Instance Attribute Details

#tableObject (readonly)

Returns the value of attribute table.



13
14
15
# File 'lib/milkode/database/document_table.rb', line 13

def table
  @table
end

Class Method Details

.define_schemaObject



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/milkode/database/document_table.rb', line 15

def self.define_schema
  begin
    Groonga::Schema.define do |schema|
      schema.create_table("documents", :type => :hash) do |table|          
        table.string("path")
        table.string("package")
        table.string("restpath")
        table.text("content")
        table.time("timestamp")
        table.string("suffix")
      end

      schema.create_table("terms",
                          :type => :patricia_trie,
                          :key_normalize => true,
                          :default_tokenizer => "TokenBigramSplitSymbolAlphaDigit") do |table|
        table.index("documents.path", :with_position => true)
        table.index("documents.package", :with_position => true)
        table.index("documents.restpath", :with_position => true)
        table.index("documents.content", :with_position => true)
        table.index("documents.suffix", :with_position => true)
      end
    end
  rescue Groonga::Schema::ColumnCreationWithDifferentOptions
    puts <<EOF
WARNING: Milkode database is old. (Renewal at 1.4.0)
Can't get the new features. Please execute rebuild command.

  $ milk rebuild --all

EOF

    Groonga::Schema.define do |schema|
      schema.create_table("documents", :type => :hash) do |table|          
        table.string("path")
        table.string("package")
        table.string("restpath")
        table.text("content")
        table.time("timestamp")
        table.text("suffix")
      end

      schema.create_table("terms",
                          :type => :patricia_trie,
                          :key_normalize => true,
                          :default_tokenizer => "TokenBigramSplitSymbolAlphaDigit") do |table|
        table.index("documents.path", :with_position => true)
        table.index("documents.package", :with_position => true)
        table.index("documents.restpath", :with_position => true)
        table.index("documents.content", :with_position => true)
        table.index("documents.suffix", :with_position => true)
      end
    end
  end
end

.drilldown(result, column, num = nil) ⇒ Object



346
347
348
349
# File 'lib/milkode/database/document_table.rb', line 346

def self.drilldown(result, column, num = nil)
  drilled = result.group(column).map {|record| [record.n_sub_records, record.key]}.sort_by {|a| a[0]}.reverse
  num ? drilled[0, num] : drilled
end

Instance Method Details

#add(package_dir, restpath, package_name = nil) ⇒ Object

指定ファイルをテーブルに追加

Parameters:

  • package_dir

    パッケージディレクトリ -> ‘/path/to/Package’

  • restpath

    パッケージディレクトリ以下のパス名 -> ‘src/Foo.hpp’

  • package_name (defaults to: nil)

    パッケージ名(未指定の場合は Fie.basename(package_dir) )



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/milkode/database/document_table.rb', line 126

def add(package_dir, restpath, package_name = nil)
  filename  = File.join(package_dir, restpath)           # フルパスの作成
  filename  = File.expand_path(filename)                 # 絶対パスに変換
  path      = Util.filename_to_utf8(filename)           # データベースに格納する時のファイル名はutf8
  package   = package_name || File.basename(package_dir)
  package   = Util.filename_to_utf8(package)
  restpath  = Util.filename_to_utf8(restpath)
  suffix    = File.extname(path).sub('.', "")
  timestamp = Util.truncate_nsec(File.mtime(filename)) # OSへの問い合わせは変換前のファイル名で

  record = @table[path]

  unless record
    # 新規追加
    @table.add(path, 
               :path => path,
               :package => package,
               :restpath => restpath,
               :content => load_content(filename),
               :timestamp => timestamp,
               :suffix => suffix)
    return :newfile
  else
    if (record.timestamp < timestamp)
      # 更新
      record.package   = package
      record.restpath = restpath
      record.content   = load_content(filename)
      record.timestamp = timestamp
      record.suffix    = suffix
      return :update
    else
      # タイムスタンプ比較により更新無し
      return nil
    end
  end
end

#cleanupObject

実体の存在しないデータを削除



199
200
201
202
203
204
205
206
207
# File 'lib/milkode/database/document_table.rb', line 199

def cleanup
  self.each do |r|
    unless File.exist? r.path
      yield r if block_given?
      # p r.restpath
      remove(r.path)
    end
  end
end

#cleanup_package_name(package_name, ignore_checker = nil) ⇒ Object

指定されたパッケージのクリーンアップ



374
375
376
377
378
379
380
381
382
383
384
385
386
# File 'lib/milkode/database/document_table.rb', line 374

def cleanup_package_name(package_name, ignore_checker = nil)
  # クリーンアップ対象のファイルを検索
  result = @table.select { |record| record.package == package_name }

  # 存在しない&無視ファイルの削除
  result.each do |r|
    if !File.exist?(r.path) || (ignore_checker && ignore_checker.ignore?("/#{r.restpath}"))
      yield r if block_given?
      # p r.restpath
      remove(r.path)
    end
  end
end

#dumpObject



394
395
396
397
398
# File 'lib/milkode/database/document_table.rb', line 394

def dump
  self.each do |r|
    p [r.path, r.package, r.restpath, r.content, r.timestamp, r.suffix]
  end
end

#eachObject



388
389
390
391
392
# File 'lib/milkode/database/document_table.rb', line 388

def each
  @table.select.each do |r|
    yield r
  end
end

#find_shortpath(shortpath) ⇒ Object

shortpathの一致するレコードを取得



177
178
179
180
181
# File 'lib/milkode/database/document_table.rb', line 177

def find_shortpath(shortpath)
  package, restpath = Util.divide_shortpath(shortpath)
  result = @table.select { |record| (record.package == package) & (record.restpath == restpath) }
  return result.records[0]
end

#find_shortpath_below(shortpath) ⇒ Object

指定パス以下のファイルを全て取得



184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/milkode/database/document_table.rb', line 184

def find_shortpath_below(shortpath)
  if (shortpath.nil? || shortpath.empty?)
    @table.select.records
  else
    package, restpath = Util.divide_shortpath(shortpath)

    if (restpath.nil? || restpath.empty?)
      @table.select { |record| record.package == package }.to_a
    else
      @table.select { |record| (record.package == package) & (record.restpath =~ restpath)}.to_a
    end
  end
end

#package_records(name) ⇒ Object



404
405
406
# File 'lib/milkode/database/document_table.rb', line 404

def package_records(name)
  search(:strict_packages => [name])
end

#remove(name) ⇒ Object



164
165
166
# File 'lib/milkode/database/document_table.rb', line 164

def remove(name)
  @table[name].delete
end

#remove_all(&block) ⇒ Object



172
173
174
# File 'lib/milkode/database/document_table.rb', line 172

def remove_all(&block)
  remove_records(@table.select, &block)
end

#remove_match_path(path, &block) ⇒ Object



168
169
170
# File 'lib/milkode/database/document_table.rb', line 168

def remove_match_path(path, &block)
  remove_records(search(:paths => [path]), &block)
end

#remove_records(records, &block) ⇒ Object

レコードをまとめて削除する

過去の方法
  検索結果にマッチしたレコード等をまとめて削除
  削除前にインデックスを削除し、削除後にインデックスを再度追加してい
  大量のレコードを削除する場合に高速に動作する

現在の方法
  上記の方法がかえって遅くなったので元に戻す
  普通に速くなった気がする


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/milkode/database/document_table.rb', line 81

def remove_records(records, &block)
  # Groonga::Schema.define do |schema|
  #   schema.change_table("terms") do |table|
  #     table.remove_index("documents.path")
  #     table.remove_index("documents.package")
  #     table.remove_index("documents.restpath")
  #     table.remove_index("documents.content")
  #     table.remove_index("documents.suffix")
  #   end
  # end

  records.each do |record|
    yield record if block
    record.key.delete
  end

  # Groonga::Schema.define do |schema|
  #   schema.change_table("terms") do |table|
  #     table.index("documents.path", :with_position => true)
  #     table.index("documents.package", :with_position => true)
  #     table.index("documents.restpath", :with_position => true)
  #     table.index("documents.content", :with_position => true)
  #     table.index("documents.suffix", :with_position => true)
  #   end
  # end
end

#search(options) ⇒ Object

マッチしたレコードのみを返す



341
342
343
344
# File 'lib/milkode/database/document_table.rb', line 341

def search(options)
  records, match_total = search_with_match(options)
  records
end

#search_with_match(options) ⇒ Object

詳細検索

Parameters:

  • options

    検索オプション、ハッシュで指定 :patterns => マッチする行 :keywords => 検索キーワード :paths => ファイルパス(AND) :packages => パッケージ名(OR) :strict_packages => 厳密なパッケージ名(OR) :restpaths => 短縮パス(AND) :suffixs => 拡張子 :offset => オフセット(default = 0) :limit => 表示リミット(default = -1)



221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/milkode/database/document_table.rb', line 221

def search_with_match(options)
  patterns = options[:patterns] || []
  keywords = options[:keywords] || []
  packages = options[:packages] || []
  strict_packages = options[:strict_packages] || []
  paths    = options[:paths]    || []
  restpaths = options[:restpaths]    || []
  suffixs  = options[:suffixs]  || []
  fpath_or_packages = options[:fpath_or_packages] || []
  offset   = options[:offset]   || 0
  limit    = options[:limit]    || -1
  
  result = @table.select do |record|
    expression = nil

    # マッチする行
    patterns.each do |word|
      sub_expression = record.content =~ word
      if expression.nil?
        expression = sub_expression
      else
        expression &= sub_expression
      end
    end
    
    # キーワード(絞り込むための手がかり)
    keywords.each do |word|
      sub_expression = record.content =~ word
      sub_expression |= record.restpath =~ word
      sub_expression |= record.package =~ word
      if expression.nil?
        expression = sub_expression
      else
        expression &= sub_expression
      end
    end
    
    # パッケージ(OR)
    pe = package_expression(record, packages) 
    if (pe)
      if expression.nil?
        expression = pe
      else
        expression &= pe
      end
    end
    
    # 厳密なパッケージ(OR)
    pe = strict_packages_expression(record, strict_packages) 
    if (pe)
      if expression.nil?
        expression = pe
      else
        expression &= pe
      end
    end
    
    # ファイルパス
    paths.each do |word|
      sub_expression = record.path =~ word
      if expression.nil?
        expression = sub_expression
      else
        expression &= sub_expression
      end
    end

    # 短縮パス
    restpaths.each do |word|
      sub_expression = record.restpath =~ word
      if expression.nil?
        expression = sub_expression
      else
        expression &= sub_expression
      end
    end

    # 拡張子(OR)
    se = suffix_expression(record, suffixs) 
    if (se)
      if expression.nil?
        expression = se
      else
        expression &= se
      end
    end
    
    # ファイル名かパッケージ名
    fpath_or_packages.each do |word|
      sub_expression = record.restpath =~ word
      sub_expression |= record.package =~ word
      if expression.nil?
        expression = sub_expression
      else
        expression &= sub_expression
      end
    end
    
    # 検索式
    expression
  end

  # スコアとタイムスタンプでソート
  # records = result.sort([{:key => "_score", :order => "descending"},
  #                       {:key => "timestamp", :order => "descending"}],
  #                      :offset => offset,
  #                      :limit => limit)
  
  # ファイル名でソート
  records = Util.groonga_table_sort(result,
                                    [{:key => "package", :order => "ascending"},
                                     {:key => "restpath", :order => "ascending"}],
                                    :offset => offset,
                                    :limit => limit)

  # 検索結果のレコード(limitの影響を受ける), 総マッチ数(limitの影響を受けない), result(Groonga::Hash)
  return records, result.size, result
end

#select_all_sort_by_shortpath(offset = 0, limit = -1)) ⇒ Object



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/milkode/database/document_table.rb', line 351

def select_all_sort_by_shortpath(offset = 0, limit = -1)
  result = @table.select

  # @todo ここが速度低下の原因?と思ったけど、ここは全て選択の部分だけか・・・

  # 2010/10/29 ongaeshi
  # 本当はこのようにgroongaAPIでソートしたいのだが上手くいかなかった
  #       # ファイル名順にソート
  #       records = table.sort([{:key => "shortpath", :order => "descending"}],
  #                            :offset => offset,
  #                            :limit => limit)

  # ソート
  if (limit != -1)
    records = result.records.sort_by{|record| DocumentRecord::shortpath(record).downcase }[offset, limit]
  else
    records = result.records.sort_by{|record| DocumentRecord::shortpath(record).downcase }[offset..limit]
  end

  return records, result.size
end

#sizeObject



112
113
114
# File 'lib/milkode/database/document_table.rb', line 112

def size
  @table.size
end

#to_aObject



400
401
402
# File 'lib/milkode/database/document_table.rb', line 400

def to_a
  @table.to_a
end