Class: KnowledgeBase

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/knowledge_base.rb,
lib/rbbt/knowledge_base/query.rb,
lib/rbbt/knowledge_base/entity.rb,
lib/rbbt/knowledge_base/registry.rb,
lib/rbbt/knowledge_base/traverse.rb,
lib/rbbt/knowledge_base/syndicate.rb,
lib/rbbt/knowledge_base/enrichment.rb

Defined Under Namespace

Classes: Traverser

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dir, namespace = nil) ⇒ KnowledgeBase

Returns a new instance of KnowledgeBase.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/rbbt/knowledge_base.rb', line 11

def initialize(dir, namespace = nil)
  @dir = Path.setup(dir.dup)

  @namespace = namespace
  @format = IndiferentHash.setup({})

  @registry ||= IndiferentHash.setup({})
  @entity_options = IndiferentHash.setup({})

  @indices = IndiferentHash.setup({})
  @databases = IndiferentHash.setup({})
  @identifiers = IndiferentHash.setup({})
  @descriptions = {}
  @databases = {}
end

Instance Attribute Details

#databasesObject

Returns the value of attribute databases.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def databases
  @databases
end

#dirObject

Returns the value of attribute dir.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def dir
  @dir
end

#entity_optionsObject

Returns the value of attribute entity_options.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def entity_options
  @entity_options
end

#formatObject

Returns the value of attribute format.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def format
  @format
end

#indicesObject

Returns the value of attribute indices.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def indices
  @indices
end

#namespaceObject

Returns the value of attribute namespace.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def namespace
  @namespace
end

#registryObject

Returns the value of attribute registry.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def registry
  @registry
end

Instance Method Details

#_children(name, entity) ⇒ Object



46
47
48
49
# File 'lib/rbbt/knowledge_base/query.rb', line 46

def _children(name, entity)
  repo = get_index name
  repo.match(entity)
end

#_neighbours(name, entity) ⇒ Object



66
67
68
69
70
71
72
# File 'lib/rbbt/knowledge_base/query.rb', line 66

def _neighbours(name, entity)
  if undirected(name) and source(name) == target(name)
    {:children => _children(name, entity)}
  else
    {:parents => _parents(name, entity), :children => _children(name, entity)}
  end
end

#_parents(name, entity) ⇒ Object



56
57
58
59
# File 'lib/rbbt/knowledge_base/query.rb', line 56

def _parents(name, entity)
  repo = get_index name
  repo.reverse.match(entity)
end

#_subset(name, source = :all, target = :all, options = {}) ⇒ Object



5
6
7
8
9
# File 'lib/rbbt/knowledge_base/query.rb', line 5

def _subset(name, source = :all, target = :all, options = {})
  repo = get_index name, options

  repo.subset(source, target)
end

#all(name, options = {}) ⇒ Object



41
42
43
44
# File 'lib/rbbt/knowledge_base/query.rb', line 41

def all(name, options={})
  repo = get_index name, options
  setup name, repo.keys
end

#all_databasesObject



17
18
19
# File 'lib/rbbt/knowledge_base/registry.rb', line 17

def all_databases
  @registry.keys 
end

#annotate(entities, type, database = nil) ⇒ Object



36
37
38
39
40
# File 'lib/rbbt/knowledge_base/entity.rb', line 36

def annotate(entities, type, database = nil)
  format = @format[type] || type
  entity_options = entity_options_for(type, database)
  Misc.prepare_entity(entities, format, entity_options)
end

#children(name, entity) ⇒ Object



51
52
53
54
# File 'lib/rbbt/knowledge_base/query.rb', line 51

def children(name, entity)
  entity = identify_source(name, entity)
  setup(name, _children(name, entity))
end

#description(name) ⇒ Object



21
22
23
# File 'lib/rbbt/knowledge_base/registry.rb', line 21

def description(name)
  @descriptions[name] ||= get_index(name).key_field.split("~")
end

#enrichment(name, entities, options = {}) ⇒ Object



4
5
6
7
8
# File 'lib/rbbt/knowledge_base/enrichment.rb', line 4

def enrichment(name, entities, options = {})
  database = get_database(name, options)
  entities = identify_source name, entities
  database.enrichment entities, database.fields.first, :persist => false
end

#entitiesObject



58
59
60
# File 'lib/rbbt/knowledge_base/entity.rb', line 58

def entities
  all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
end

#entity_options_for(type, database_name = nil) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rbbt/knowledge_base/entity.rb', line 21

def entity_options_for(type, database_name = nil)
  entity_options = self.entity_options
  IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
  options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
  options[:format] = @format[type] if @format.include? :type
  options = {:organism => namespace}.merge(options)
  if database_name  
    database = get_database(database_name)
    if database.entity_options and (database.entity_options[type] or database.entity_options[Entity.formats[type.to_s].to_s])
      options = options.merge(database.entity_options[type] || database.entity_options[Entity.formats[type.to_s].to_s])
    end
  end
  options
end

#entity_typesObject



62
63
64
# File 'lib/rbbt/knowledge_base/entity.rb', line 62

def entity_types
  entities.collect{|entity| Entity.formats[entity] }.uniq
end

#get_database(name, options = {}) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rbbt/knowledge_base/registry.rb', line 81

def get_database(name, options = {})
  name = name.to_s
  options[:organism] ||= options[:namespace] ||= self.namespace
  @databases[[name, options]] ||= 
    begin 
      fp = Misc.fingerprint([name,options])
      key = name.to_s + "_" + Misc.digest(fp) + '.database'
      Persist.memory("Database:" << [key, dir] * "@") do
        options = options.dup
        persist_dir = dir
        persist_file = persist_dir[key].find
        file, registered_options = registry[name]

        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        database = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                  Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                  Association.open(nil, options, persist_options)
                else
                  options = Misc.add_defaults options, registered_options if registered_options
                  raise "Repo #{ name } not found and not registered" if file.nil?
                  Log.medium "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
                  Association.open(file, options, persist_options)
                end

        database.namespace = self.namespace

        database
      end
    end
end

#get_index(name, options = {}) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/rbbt/knowledge_base/registry.rb', line 37

def get_index(name, options = {})
  name = name.to_s
  options[:organism] ||= options[:namespace] ||= self.namespace
  @indices[[name, options]] ||= 
    begin 
      fp = Misc.fingerprint([name,options])
      key = name.to_s + "_" + Misc.digest(fp)

      Persist.memory("Index:" << [key, dir] * "@") do
        options = options.dup
        persist_dir = dir
        persist_file = persist_dir[key].find
        file, registered_options = registry[name]

        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        index = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                  Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                  Association.index(nil, options, persist_options.dup)
                else
                  options = Misc.add_defaults options, registered_options if registered_options
                  raise "Repo #{ name } not found and not registered" if file.nil?
                  Log.medium "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
                  Association.index(file, options, persist_options.dup)
                end

        index.namespace = self.namespace

        index
      end
    end
end

#identifier_files(name) ⇒ Object



66
67
68
# File 'lib/rbbt/knowledge_base/entity.rb', line 66

def identifier_files(name)
  get_database(name).identifier_files.dup
end

#identify(name, entity) ⇒ Object



107
108
109
# File 'lib/rbbt/knowledge_base/entity.rb', line 107

def identify(name, entity)
  identify_source(name, entity) || identify_target(name, entity)
end

#identify_source(name, entity) ⇒ Object



92
93
94
95
96
97
# File 'lib/rbbt/knowledge_base/entity.rb', line 92

def identify_source(name, entity)
  return :all if entity == :all
  index = source_index(name)
  return entity if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#identify_target(name, entity) ⇒ Object



100
101
102
103
104
105
# File 'lib/rbbt/knowledge_base/entity.rb', line 100

def identify_target(name, entity)
  return :all if entity == :all
  index = target_index(name)
  return entity if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#index_fields(name) ⇒ Object



124
125
126
# File 'lib/rbbt/knowledge_base/registry.rb', line 124

def index_fields(name)
  get_index(name).fields
end

#neighbours(name, entity) ⇒ Object



74
75
76
77
78
79
80
# File 'lib/rbbt/knowledge_base/query.rb', line 74

def neighbours(name, entity)
  hash = _neighbours(name, entity)
  IndiferentHash.setup(hash)
  setup(name, hash[:children]) if hash[:children] 
  setup(name, hash[:parents], true) if hash[:parents]
  hash
end

#parents(name, entity) ⇒ Object



61
62
63
64
# File 'lib/rbbt/knowledge_base/query.rb', line 61

def parents(name, entity)
  entity = identify_target(name, entity)
  setup(name, _parents(name, entity), true)
end

#register(name, file = nil, options = {}, &block) ⇒ Object



6
7
8
9
10
11
12
13
14
15
# File 'lib/rbbt/knowledge_base/registry.rb', line 6

def register(name, file = nil, options = {}, &block)
  if block_given?
    block.define_singleton_method(:filename) do name.to_s end
    Log.debug("Registering #{ name } from code block")
    @registry[name] = [block, options]
  else
    Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
    @registry[name] = [file, options]
  end
end

#select_entities(name, entities, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/rbbt/knowledge_base/entity.rb', line 6

def select_entities(name, entities, options = {})
  index = get_index(name, options)
  source_field = index.source_field
  target_field = index.target_field

  source_type = Entity.formats[source_field] 
  target_type = Entity.formats[target_field]

  source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s] 
  target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]

  [source_entities, target_entities]
end

#setup(name, matches, reverse = false) ⇒ Object



27
28
29
# File 'lib/rbbt/knowledge_base.rb', line 27

def setup(name, matches, reverse = false)
  AssociationItem.setup matches, self, name, reverse
end

#source(name) ⇒ Object



25
26
27
# File 'lib/rbbt/knowledge_base/registry.rb', line 25

def source(name)
  description(name)[0]
end

#source_index(name) ⇒ Object



70
71
72
73
74
75
76
77
78
79
# File 'lib/rbbt/knowledge_base/entity.rb', line 70

def source_index(name)
  Persist.memory("Source index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, source(name), nil, :persist => true
  end
end

#source_type(name) ⇒ Object



50
51
52
# File 'lib/rbbt/knowledge_base/entity.rb', line 50

def source_type(name)
  Entity.formats[source(name)]
end

#subset(name, entities, options = {}, &block) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/knowledge_base/query.rb', line 11

def subset(name, entities, options = {}, &block)
  entities, options = options, entities if entities.nil? and Hash === options
  entities = case entities
             when :all
               {:target => :all, :source => :all}
             when AnnotatedArray
               format = entities.format if entities.respond_to? :format 
               format ||= entities.base_entity.to_s
               {format => entities.clean_annotations}
             when Hash
               entities
             else
               raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
             end

  source, target = select_entities(name, entities, options)

  return [] if source.nil? or target.nil?
  return [] if Array === target and target.empty?
  return [] if Array === source and source.empty?

  matches = _subset name, source, target, options

  setup(name, matches)

  matches = matches.select(&block) if block_given? 

  matches
end

#syndicate(name, kb) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/rbbt/knowledge_base/syndicate.rb', line 2

def syndicate(name, kb)
  kb.all_databases.each do |database|
    if name.nil?
      db_name = database
    else
      db_name = [database, name] * "@"
    end
    file, kb_options = kb.registry[database]
    options = {}
    options[:entity_options] = kb_options[:entity_options]
    options[:undirected] = kb_options[:undirected] if kb_options 
    if kb.entity_options
      options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
    end

    register(db_name, nil, options) do
      kb.get_database(database)
    end
  end
end

#target(name) ⇒ Object



29
30
31
# File 'lib/rbbt/knowledge_base/registry.rb', line 29

def target(name)
  description(name)[1]
end

#target_index(name) ⇒ Object



81
82
83
84
85
86
87
88
89
90
# File 'lib/rbbt/knowledge_base/entity.rb', line 81

def target_index(name)
  Persist.memory("Target index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, target(name), nil, :persist => true
  end
end

#target_type(name) ⇒ Object



54
55
56
# File 'lib/rbbt/knowledge_base/entity.rb', line 54

def target_type(name)
  Entity.formats[target(name)]
end

#translate(entities, type) ⇒ Object



42
43
44
45
46
47
48
# File 'lib/rbbt/knowledge_base/entity.rb', line 42

def translate(entities, type)
  if format = @format[type] and (entities.respond_to? :format and format != entities.format)
    entities.to format
  else
    entities
  end
end

#traverse(rules) ⇒ Object



170
171
172
173
# File 'lib/rbbt/knowledge_base/traverse.rb', line 170

def traverse(rules)
  traverser = KnowledgeBase::Traverser.new self, rules
  traverser.traverse
end

#undirected(name) ⇒ Object



33
34
35
# File 'lib/rbbt/knowledge_base/registry.rb', line 33

def undirected(name)
  description(name)[2]
end