Class: KnowledgeBase

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/knowledge_base.rb,
lib/rbbt/knowledge_base/query.rb,
lib/rbbt/knowledge_base/entity.rb,
lib/rbbt/knowledge_base/registry.rb,
lib/rbbt/knowledge_base/syndicate.rb,
lib/rbbt/knowledge_base/enrichment.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dir, namespace = nil) ⇒ KnowledgeBase

Returns a new instance of KnowledgeBase.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/rbbt/knowledge_base.rb', line 11

def initialize(dir, namespace = nil)
  @dir = Path.setup(dir.dup).find

  @namespace = namespace
  @format = IndiferentHash.setup({})

  @registry ||= IndiferentHash.setup({})
  @entity_options = IndiferentHash.setup({})

  @indices = IndiferentHash.setup({})
  @databases = IndiferentHash.setup({})
  @identifiers = IndiferentHash.setup({})
  @descriptions = {}
  @databases = {}
end

Instance Attribute Details

#databasesObject

Returns the value of attribute databases.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def databases
  @databases
end

#dirObject

Returns the value of attribute dir.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def dir
  @dir
end

#entity_optionsObject

Returns the value of attribute entity_options.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def entity_options
  @entity_options
end

#formatObject

Returns the value of attribute format.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def format
  @format
end

#indicesObject

Returns the value of attribute indices.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def indices
  @indices
end

#namespaceObject

Returns the value of attribute namespace.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def namespace
  @namespace
end

#registryObject

Returns the value of attribute registry.



10
11
12
# File 'lib/rbbt/knowledge_base.rb', line 10

def registry
  @registry
end

Instance Method Details

#_children(name, entity) ⇒ Object



47
48
49
50
# File 'lib/rbbt/knowledge_base/query.rb', line 47

def _children(name, entity)
  repo = get_index name
  repo.match(entity)
end

#_neighbours(name, entity) ⇒ Object



67
68
69
70
71
72
73
# File 'lib/rbbt/knowledge_base/query.rb', line 67

def _neighbours(name, entity)
  if undirected(name) and source(name) == target(name)
    {:children => _children(name, entity)}
  else
    {:parents => _parents(name, entity), :children => _children(name, entity)}
  end
end

#_parents(name, entity) ⇒ Object



57
58
59
60
# File 'lib/rbbt/knowledge_base/query.rb', line 57

def _parents(name, entity)
  repo = get_index name
  repo.reverse.match(entity)
end

#_subset(name, source = :all, target = :all, options = {}) ⇒ Object



6
7
8
9
10
# File 'lib/rbbt/knowledge_base/query.rb', line 6

def _subset(name, source = :all, target = :all, options = {})
  repo = get_index name, options

  repo.subset(source, target)
end

#all(name, options = {}) ⇒ Object



42
43
44
45
# File 'lib/rbbt/knowledge_base/query.rb', line 42

def all(name, options={})
  repo = get_index name, options
  setup name, repo.keys
end

#all_databasesObject



17
18
19
# File 'lib/rbbt/knowledge_base/registry.rb', line 17

def all_databases
  @registry.keys 
end

#annotate(entities, type, database = nil) ⇒ Object



36
37
38
39
40
# File 'lib/rbbt/knowledge_base/entity.rb', line 36

def annotate(entities, type, database = nil)
  format = @format[type] || type
  entity_options = entity_options_for(type, database)
  Misc.prepare_entity(entities, format, entity_options)
end

#children(name, entity) ⇒ Object



52
53
54
55
# File 'lib/rbbt/knowledge_base/query.rb', line 52

def children(name, entity)
  entity = identify_source(name, entity)
  setup(name, _children(name, entity))
end

#description(name) ⇒ Object



21
22
23
# File 'lib/rbbt/knowledge_base/registry.rb', line 21

def description(name)
  @descriptions[name] ||= get_index(name).key_field.split("~")
end

#enrichment(name, entities, options = {}) ⇒ Object



4
5
6
7
8
# File 'lib/rbbt/knowledge_base/enrichment.rb', line 4

def enrichment(name, entities, options = {})
  database = get_database(name, options)
  entities = identify_source name, entities
  database.enrichment entities, database.fields.first, :persist => false
end

#entitiesObject



58
59
60
# File 'lib/rbbt/knowledge_base/entity.rb', line 58

def entities
  all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
end

#entity_options_for(type, database_name = nil) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rbbt/knowledge_base/entity.rb', line 21

def entity_options_for(type, database_name = nil)
  entity_options = self.entity_options
  IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
  options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
  options[:format] = @format[type] if @format.include? :type
  options = {:organism => namespace}.merge(options)
  if database_name  
    database = get_database(database_name)
    if database.entity_options and (database.entity_options[type] or database.entity_options[Entity.formats[type.to_s].to_s])
      options = options.merge(database.entity_options[type] || database.entity_options[Entity.formats[type.to_s].to_s])
    end
  end
  options
end

#entity_typesObject



62
63
64
# File 'lib/rbbt/knowledge_base/entity.rb', line 62

def entity_types
  entities.collect{|entity| Entity.formats[entity] }.uniq
end

#get_database(name, options = {}) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/rbbt/knowledge_base/registry.rb', line 80

def get_database(name, options = {})
  name = name.to_s
  key = "Index:" + name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options.dup]))
  @indices[key] ||= 
    begin 
      Persist.memory("Database:" << [key, dir] * "@") do
        options = options.dup
        persist_file = dir.indices[key]
        file, registered_options = registry[name]


        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        database = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                  Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                  Association.open(nil, options, persist_options)
                else
                  options = Misc.add_defaults options, registered_options if registered_options
                  raise "Repo #{ name } not found and not registered" if file.nil?
                  Log.medium "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
                  Association.open(file, options, persist_options)
                end

        database.namespace = self.namespace

        database
      end
    end
end

#get_index(name, options = {}) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/rbbt/knowledge_base/registry.rb', line 37

def get_index(name, options = {})
  name = name.to_s
  @indices[[name, options]] ||= 
    begin 
      fp = Misc.fingerprint([name,options])
      key = name.to_s + "_" + Misc.digest(fp)

      Persist.memory("Index:" << [key, dir] * "@") do
        options = options.dup
        persist_dir = dir
        persist_file = persist_dir[key]
        file, registered_options = registry[name]

        options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
        options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true

        if entity_options
          options[:entity_options] ||= {}
          entity_options.each do |type, info|
            options[:entity_options][type] ||= {}
            options[:entity_options][type] = Misc.add_defaults options[:entity_options][type], info
          end
        end

        persist_options = Misc.pull_keys options, :persist

        index = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
                  Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
                  Association.index(nil, options, persist_options.dup)
                else
                  options = Misc.add_defaults options, registered_options if registered_options
                  raise "Repo #{ name } not found and not registered" if file.nil?
                  Log.medium "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
                  Association.index(file, options, persist_options.dup)
                end

        index.namespace = self.namespace

        index
      end
    end
end

#identifier_files(name) ⇒ Object



66
67
68
# File 'lib/rbbt/knowledge_base/entity.rb', line 66

def identifier_files(name)
  get_database(name).identifier_files.dup
end

#identify(name, entity) ⇒ Object



107
108
109
# File 'lib/rbbt/knowledge_base/entity.rb', line 107

def identify(name, entity)
  identify_source(name, entity) || identify_target(name, entity)
end

#identify_source(name, entity) ⇒ Object



92
93
94
95
96
97
# File 'lib/rbbt/knowledge_base/entity.rb', line 92

def identify_source(name, entity)
  return :all if entity == :all
  index = source_index(name)
  return entity if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#identify_target(name, entity) ⇒ Object



100
101
102
103
104
105
# File 'lib/rbbt/knowledge_base/entity.rb', line 100

def identify_target(name, entity)
  return :all if entity == :all
  index = target_index(name)
  return nil if index.nil?
  Array === entity ? index.values_at(*entity) : index[entity]
end

#index_fields(name) ⇒ Object



121
122
123
# File 'lib/rbbt/knowledge_base/registry.rb', line 121

def index_fields(name)
  get_index(name).fields
end

#neighbours(name, entity) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/rbbt/knowledge_base/query.rb', line 75

def neighbours(name, entity)
  hash = _neighbours(name, entity)
  IndiferentHash.setup(hash)
  setup(name, hash[:children]) if hash[:children] 
  setup(name, hash[:parents], true) if hash[:parents]
  hash
end

#parents(name, entity) ⇒ Object



62
63
64
65
# File 'lib/rbbt/knowledge_base/query.rb', line 62

def parents(name, entity)
  entity = identify_target(name, entity)
  setup(name, _parents(name, entity), true)
end

#register(name, file = nil, options = {}, &block) ⇒ Object



6
7
8
9
10
11
12
13
14
15
# File 'lib/rbbt/knowledge_base/registry.rb', line 6

def register(name, file = nil, options = {}, &block)
  if block_given?
    block.define_singleton_method(:filename) do name.to_s end
    Log.debug("Registering #{ name } from code block")
    @registry[name] = [block, options]
  else
    Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
    @registry[name] = [file, options]
  end
end

#select_entities(name, entities, options = {}) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/rbbt/knowledge_base/entity.rb', line 6

def select_entities(name, entities, options = {})
  index = get_index(name, options)
  source_field = index.source_field
  target_field = index.target_field

  source_type = Entity.formats[source_field] 
  target_type = Entity.formats[target_field]

  source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s] 
  target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]

  [source_entities, target_entities]
end

#setup(name, matches, reverse = false) ⇒ Object



27
28
29
# File 'lib/rbbt/knowledge_base.rb', line 27

def setup(name, matches, reverse = false)
  AssociationItem.setup matches, self, name, reverse
end

#source(name) ⇒ Object



25
26
27
# File 'lib/rbbt/knowledge_base/registry.rb', line 25

def source(name)
  description(name)[0]
end

#source_index(name) ⇒ Object



70
71
72
73
74
75
76
77
78
79
# File 'lib/rbbt/knowledge_base/entity.rb', line 70

def source_index(name)
  Persist.memory("Source index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, source(name), nil, :persist => true
  end
end

#source_type(name) ⇒ Object



50
51
52
# File 'lib/rbbt/knowledge_base/entity.rb', line 50

def source_type(name)
  Entity.formats[source(name)]
end

#subset(name, entities, options = {}, &block) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/rbbt/knowledge_base/query.rb', line 12

def subset(name, entities, options = {}, &block)
  entities, options = options, entities if entities.nil? and Hash === options
  entities = case entities
             when :all
               {:target => :all, :source => :all}
             when AnnotatedArray
               format = entities.format if entities.respond_to? :format 
               format ||= entities.base_entity.to_s
               {format => entities.clean_annotations}
             when Hash
               entities
             else
               raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
             end

  source, target = select_entities(name, entities, options)

  return [] if source.nil? or target.nil?
  return [] if Array === target and target.empty?
  return [] if Array === source and source.empty?

  matches = _subset name, source, target, options

  setup(name, matches)

  matches = matches.select(&block) if block_given? 

  matches
end

#syndicate(name, kb) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/rbbt/knowledge_base/syndicate.rb', line 2

def syndicate(name, kb)
  kb.all_databases.each do |database|
    db_name = [database, name] * "@"
    file, kb_options = kb.registry[database]
    options = {}
    options[:entity_options] = kb_options[:entity_options]
    options[:undirected] = true if kb_options and kb_options[:undirected]
    if kb.entity_options
      options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
    end

    register(db_name, nil, options) do
      kb.get_database(database)
    end
  end
end

#target(name) ⇒ Object



29
30
31
# File 'lib/rbbt/knowledge_base/registry.rb', line 29

def target(name)
  description(name)[1]
end

#target_index(name) ⇒ Object



81
82
83
84
85
86
87
88
89
90
# File 'lib/rbbt/knowledge_base/entity.rb', line 81

def target_index(name)
  Persist.memory("Target index #{name}: KB directory #{dir}") do
    identifier_files = identifier_files(name)
    identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
    TSV.translation_index identifier_files, target(name), nil, :persist => true
  end
end

#target_type(name) ⇒ Object



54
55
56
# File 'lib/rbbt/knowledge_base/entity.rb', line 54

def target_type(name)
  Entity.formats[target(name)]
end

#translate(entities, type) ⇒ Object



42
43
44
45
46
47
48
# File 'lib/rbbt/knowledge_base/entity.rb', line 42

def translate(entities, type)
  if format = @format[type] and (entities.respond_to? :format and format != entities.format)
    entities.to format
  else
    entities
  end
end

#undirected(name) ⇒ Object



33
34
35
# File 'lib/rbbt/knowledge_base/registry.rb', line 33

def undirected(name)
  description(name)[2]
end