Module: DocumentRepo

Defined in:
lib/rbbt/corpus/document_repo.rb

Defined Under Namespace

Classes: KeyFormatError, OpenError

Constant Summary collapse

TC_CONNECTIONS =
{}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.open_tokyocabinet(path, write) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/rbbt/corpus/document_repo.rb', line 9

def self.open_tokyocabinet(path, write)
  write = true if not File.exists?(path)
  flags = (write ? TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT : TokyoCabinet::BDB::OREADER)

  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))

  database = TC_CONNECTIONS[path] ||= TokyoCabinet::BDB.new
  database.close

  if !database.open(path, flags)
    ecode = database.ecode
    raise "Open error: #{database.errmsg(ecode)}. Trying to open file #{path}"
  end

  class << database
    attr_accessor :writable, :persistence_path

    def read
      return if not @writable
      self.close
      if !self.open(@persistence_path, TokyoCabinet::BDB::OREADER)
        ecode = self.ecode
        raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
      end
      @writable = false
      self
    end

    def write
      return if @writable
      self.close
      if !self.open(@persistence_path, TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT)
        ecode = self.ecode
        raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
      end
      @writable = true
      self
    end

    def write?
      @writable
    end

    def collect
      res = []
      each do |key, value|
        res << if block_given?
                 yield key, value
        else
          [key, value]
        end
      end
      res
    end

    def delete(key)
      out(key)
    end

    def values_at(*keys)
      keys.collect do |key|
        self[key]
      end
    end

    def merge!(hash)
      hash.each do |key,values|
        self[key] = values
      end
    end

  end

  database.persistence_path ||= path

  database.extend DocumentRepo

  database
end

Instance Method Details

#add(text, namespace, id, type, hash) ⇒ Object



101
102
103
104
105
106
107
108
# File 'lib/rbbt/corpus/document_repo.rb', line 101

def add(text, namespace, id, type, hash)
  read
  write unless write?
  docid = fields2docid(namespace, id, type, hash)
  self[docid] = text unless self.include? docid
  read
  docid
end

#docid(docid) ⇒ Object



97
98
99
# File 'lib/rbbt/corpus/document_repo.rb', line 97

def docid(docid)
  get(docid)
end

#docid2fields(docid) ⇒ Object



89
90
91
# File 'lib/rbbt/corpus/document_repo.rb', line 89

def docid2fields(docid)
  docid.split(":", -1).values_at 0,1,2,3
end

#fields2docid(namespace = nil, id = nil, type = nil, hash = nil) ⇒ Object



93
94
95
# File 'lib/rbbt/corpus/document_repo.rb', line 93

def fields2docid(namespace = nil, id = nil, type = nil, hash = nil)
  [namespace, id, type, hash] * ":"
end

#find(namespace = nil, id = nil, type = nil, hash = nil) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/rbbt/corpus/document_repo.rb', line 110

def find(namespace=nil, id = nil, type = nil, hash = nil)
  case
  when namespace.nil?
    self.keys
  when id.nil?
    range_start = [namespace] * ":" + ':'
    range_end   = [namespace] * ":" + ';'
    self.range(range_start, true, range_end, false)
  when (type and hash)
    [[namespace, id, type, hash] * ":"]
  when hash
    [[namespace, id, "", hash] * ":"]
  when type
    range_start = [namespace, id, type] * ":" + ':'
    range_end   = [namespace, id, type] * ":" + ';'
    self.range(range_start, true, range_end, false)
  else
    range_start = [namespace, id] * ":" + ':'
    range_end   = [namespace, id] * ":" + ';'
    self.range(range_start, true, range_end, false)
  end
end

#find_docid(docid) ⇒ Object



133
134
135
# File 'lib/rbbt/corpus/document_repo.rb', line 133

def find_docid(docid)
  find(*docid2fields(docid))
end