Module: Association

Defined in:
lib/rbbt/association.rb,
lib/rbbt/association/open.rb,
lib/rbbt/association/util.rb,
lib/rbbt/association/index.rb,
lib/rbbt/association/database.rb

Defined Under Namespace

Modules: Index

Class Method Summary collapse

Class Method Details

.add_reciprocal(tsv) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/rbbt/association/database.rb', line 6

def self.add_reciprocal(tsv)
  new = TSV.open(tsv.dumper_stream)
  tsv.with_unnamed do
    case tsv.type
    when :double
      tsv.through do |source, values|
        Misc.zip_fields(values).each do |info|
          target, *rest = info
          next if target == source
          rest.unshift source
          new.zip_new target, rest
        end
      end
    else
    end
  end

  tsv.annotate(new)

  new
end

.database(file, options = {}) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/rbbt/association/database.rb', line 141

def self.database(file,  options = {})
  database = case file
             when Step
               file.clean if file.error? or file.aborted? or file.dirty?
               file.run(true) unless file.done? or file.started?
               file.join unless file.done?
               open_stream(TSV.get_stream(file), options.dup)
             when TSV
               file = file.to_double unless file.type == :double
               reorder_tsv(file, options.dup)
             when IO
               open_stream(file, options.dup)
             else
               stream = TSV.get_stream(file)
               open_stream(stream, options.dup)
             end

  database.entity_options = options[:entity_options] if options[:entity_options]

  database
end

.extract_specs(all_fields = nil, options = {}) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rbbt/association/util.rb', line 44

def self.extract_specs(all_fields=nil, options = {})
  source, source_format, target, target_format = Misc.process_options options, :source, :source_format, :target, :target_format

  key_field, *fields = all_fields.nil? ? [nil] : all_fields

  source_specs = normalize_specs  source, all_fields
  target_specs = normalize_specs  target, all_fields

  source_specs = [nil, nil, nil] if source_specs.nil?
  target_specs = [nil, nil, nil] if target_specs.nil?

  source_specs[2] = source_format if source_format
  target_specs[2] = target_format if target_format

  if source_specs.first and not all_fields.include? source_specs.first and defined? Entity and (_format = Entity.formats[source_specs.first.to_s])
    _source = all_fields.select{|f| Entity.formats[f.to_s] == _format }.first
    raise "Source not found #{source_specs}. Options: #{Misc.fingerprint all_fields}" if _target.nil?
    source_specs[0] = _source
  end

  if target_specs.first and  not all_fields.include? target_specs.first and defined? Entity and (_format = Entity.formats[target_specs.first.to_s])
    _target = all_fields.select{|f| Entity.formats[f.to_s].to_s == _format.to_s }.first
    raise "Target not found #{target_specs}. Options: #{Misc.fingerprint all_fields}" if _target.nil?
    target_specs[0] = _target
  end

  if source_specs[0].nil? and target_specs[0].nil?
    source_specs[0] = key_field 
    target_specs[0] = fields[0]
  elsif source_specs[0].nil? 
    if target_specs[0] == :key or target_specs[0] == key_field
      source_specs[0] = fields[0]
    else
      source_specs[0] = key_field
    end
  elsif target_specs[0].nil? 
    if source_specs[0] == fields.first 
      target_specs[0] = key_field
    else
      target_specs[0] = fields.first 
    end
  end

  {:source => source_specs, :target => target_specs}
end

.headers(all_fields, info_fields = nil, options = {}) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/rbbt/association/util.rb', line 99

def self.headers(all_fields, info_fields = nil, options = {})
  specs = extract_specs all_fields, options

  source_field = specs[:source][0]
  target_field = specs[:target][0]

  source_pos = all_fields.index source_field
  target_pos = all_fields.index target_field

  source_header = specs[:source][1] || specs[:source][0]
  target_header = specs[:target][1] || specs[:target][0]

  info_fields = all_fields.dup if info_fields.nil?
  info_fields.delete source_field
  info_fields.delete target_field
  info_fields.unshift target_field

  field_headers = [target_header] 
  info_fields[1..-1].each do |field|
    header = case field
             when String 
               field
             when Fixnum
               all_fields[field] 
             when :key
               all_fields.first
             end

    field_headers << header
  end

  field_pos = info_fields.collect{|f| raise "Field #{f} not found. Options: #{all_fields * ", "}" unless all_fields.include?(f); f == :key ? 0 : all_fields.index(f);  }
  field_pos.delete source_pos

  source_format = specs[:source][2]
  target_format = specs[:target][2]


  if format = options[:format]
    source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format unless source_format
    target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format unless target_format
  end

  Log.low "Headers -- #{[source_pos, field_pos, source_header, field_headers, source_format, target_format]}"
  [source_pos, field_pos, source_header, field_headers, source_format, target_format]
end

.identify_entity_format(format, fields) ⇒ Object



4
5
6
7
8
9
10
# File 'lib/rbbt/association/util.rb', line 4

def self.identify_entity_format(format, fields)
  entity_type = Entity.formats[format]
  raise "Field #{ format } could not be resolved: #{fields}" if entity_type.nil?
  main_field = fields.select{|f| Entity.formats[f] == entity_type}.first
  raise "Field #{ format } not present, options: #{Misc.fingerprint fields}" if main_field.nil?
  [main_field, nil, format]
end

.index(file, options = nil, persist_options = nil) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/association/index.rb', line 6

def self.index(file, options = nil, persist_options = nil)
  options = options.nil? ? {} : options.dup
  persist_options = persist_options.nil? ?  Misc.pull_keys(options, :persist)  : persist_options.dup 

  persist_options = Misc.add_defaults persist_options.dup, :persist => true
  persist = persist_options[:persist]

  file = version_file(file, options[:namespace]) if options[:namespace] and String === file
  Persist.persist_tsv(file, "Association Index", options, persist_options.merge(:engine => "BDB")) do |data|
    options = Misc.add_defaults options.dup, :monitor => "Building index for #{Misc.fingerprint file}"
    recycle = options[:recycle]
    undirected = options[:undirected]

    persist_options[:file] = persist_options[:file] + '.database' if persist_options[:file]

    database = open(file, options, persist_options.dup.merge(:engine => "HDB"))

    source_field = database.key_field

    fields = database.fields
    target_field = fields.first.split(":").last

    undirected = true if undirected.nil? and source_field == target_field

    key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"

    TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)

    data.key_field = key_field
    data.fields = fields[1..-1]
    data.type = :list
    data.serializer = :list 

    database.with_unnamed do
      database.with_monitor(options[:monitor]) do
        database.through do |source, values|
          case database.type
          when :single
            values = [[values]]
          when :list
            values = values.collect{|v| [v] }
          when :flat
            values = [values]
          end
          next if values.empty?
          next if source.nil? or source.empty?
          next if values.empty?

          targets, *rest = values

          size = targets ? targets.length : 0

          rest.each_with_index do |list,i|
            list.replace [list.first] * size if list.length == 1
          end if recycle and size > 1


          rest = Misc.zip_fields rest


          annotations = (Array === rest.first and rest.first.length > 1) ?
            targets.zip(rest) :
            targets.zip(rest * targets.length) 

          annotations.each do |target, info|
            next if target.nil? or target.empty?
            key = [source, target] * "~"

            if data[key].nil? or info.nil?
              data[key] = info
            else
              old_info = data[key]
              info = old_info.zip(info).collect{|p| p * ";;" }
              data[key] = info
            end
          end
        end

        if undirected
          new_data = {}

          data.through do |key,values|
            reverse_key = key.split("~").reverse * "~"
            new_data[reverse_key] = values
          end 

          new_data.each do |key,values|
            data[key] = values
          end
        end

      end
    end
  end.tap do |data|
    data.read if not Hash === data and data.respond_to? :read
    Association::Index.setup data
    data.entity_options = options[:entity_options] if options[:entity_options]
    data
  end
end

.normalize_specs(spec, all_fields = nil) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/rbbt/association/util.rb', line 24

def self.normalize_specs(spec, all_fields = nil)
  return nil if spec.nil?
  field, header, format = parse_field_specification spec 

  specs = if all_fields.nil? or all_fields.include? field
             [field, header, format]
           else
             if all_fields.nil?
               begin
                 identify_entity_format field, all_fields 
               rescue
                 [field, header, format]
               end
             else
               [field, header, format]
             end
           end
  specs
end

.open(file, options = nil, persist_options = nil) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/rbbt/association/open.rb', line 10

def self.open(file, options = nil, persist_options = nil)
  options = options.nil? ? {} : options.dup
  persist_options = persist_options.nil? ?  Misc.pull_keys(options, :persist)  : persist_options.dup 

  options = Misc.add_defaults options, :zipped => true, :monitor => {:desc => "Opening database #{Misc.fingerprint file}"}
  persist_options = Misc.add_defaults persist_options, :persist => true, :dir => Rbbt.var.associations
  persist = persist_options[:persist]

  file = version_file(file, options[:namespace]) if options[:namespace] and String === file
  file = file.call if Proc === file

  data = Persist.persist_tsv(file, "Association Database", options, persist_options) do |data|
    options = options.dup
    tsv = Association.database(file, options.merge(:persist => persist, :unnamed => true))
    tsv = tsv.to_double unless tsv.type == :double

    tsv.annotate data

    data.serializer = :double if data.respond_to? :serializer
    tsv.with_unnamed do
      tsv.with_monitor(options[:monitor]) do
        tsv.through do |k,v|
          data[k] = v
        end
      end
    end

    data
  end
  data.entity_options = options[:entity_options] if options[:entity_options]
  data
end

.open_stream(stream, options = {}) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/rbbt/association/database.rb', line 86

def self.open_stream(stream, options = {})
  fields, persist = Misc.process_options options, :fields, :persist

  parser = TSV::Parser.new stream, options.merge(:fields => nil, :key_field => nil)

  key_field, *_fields = all_fields = parser.all_fields

  source_pos, field_pos, source_header, field_headers, source_format, target_format = headers parser.all_fields, fields, options

  parser.key_field = source_pos
  parser.fields = field_pos

  case parser.type
  when :single
    class << parser
      def get_values(parts)
        [parts[@key_field], parts.values_at(*@fields).first]
      end
    end
  when :list
    class << parser
      def get_values(parts)
        [parts[@key_field], parts.values_at(*@fields)]
      end
    end
  when :double, :list, :single
    class << parser
      def get_values(parts)
        [parts[@key_field].split(@sep2,-1), parts.values_at(*@fields).collect{|v| v.nil? ? [] : v.split(@sep2,-1) }]
      end
    end
  when :flat
    class << parser
      def get_values(parts)
        fields = (0..parts.length-1).to_a - [@key_field]
        values = parts.values_at(*fields).compact.collect{|v| v.split(@sep2,-1) }.flatten
        [parts[@key_field].split(@sep2,-1), values]
      end
    end
  end

  open_options = options.merge(parser.options).merge(:parser => parser)
  open_options = Misc.add_defaults open_options, :monitor => {:desc => "Parsing #{ Misc.fingerprint stream }"}

  tsv = TSV.parse parser.stream, {}, open_options
  tsv.key_field = source_header
  tsv.fields = field_headers

  tsv = tsv.to_double unless tsv.type == :double

  tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format

  tsv
end

.parse_field_specification(spec) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/association/util.rb', line 12

def self.parse_field_specification(spec)
  return [2,nil,nil] if Fixnum === spec
  spec = spec.split "=>" unless Array === spec
  field_part, final_format = spec

  field, format = field_part.split "=~", -1

  field = nil if field.nil? or field.empty?

  [field, format, final_format]
end

.process_formats(field, default_format = {}) ⇒ Object



90
91
92
93
94
95
96
97
# File 'lib/rbbt/association/util.rb', line 90

def self.process_formats(field, default_format = {})
  return nil if default_format.nil? or default_format.empty?
  default_format.each do |type, format|
    entity_type = Entity.formats[field] || format
    return format if entity_type.to_s === type 
  end
  return nil
end

.reorder_tsv(tsv, options = {}) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/rbbt/association/database.rb', line 64

def self.reorder_tsv(tsv, options = {})
  fields, persist = Misc.process_options options, :fields, :persist 
  all_fields = tsv.all_fields

  source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options)

  source_field = source_pos == :key ? :key : all_fields[source_pos]
  info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
  options = options.merge({:key_field => source_field, :fields =>  info_fields})

  tsv.with_monitor(options[:monitor]) do
    tsv = tsv.reorder source_field, fields if true or source_field != tsv.key_field or (fields and tsv.fields != fields)
  end

  tsv.key_field = source_header
  tsv.fields = field_headers

  tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format

  tsv
end

.translate(tsv, source_final_format, target_final_format, options = {}) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/rbbt/association/database.rb', line 28

def self.translate(tsv, source_final_format, target_final_format, options = {})
  source_field = tsv.key_field
  target_field = tsv.fields.first
  namespace = tsv.namespace

  if source_final_format and source_field != source_final_format 
    Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")

    identifier_files = tsv.identifier_files.dup
    identifier_files.concat Entity.identifier_files(source_final_format) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}

    tsv = TSV.translate(tsv, source_field, source_final_format, options.merge(:identifier_files => identifier_files))
  end

  # Translate target 
  if target_final_format and target_field != target_final_format
    Log.debug("Changing target format from #{target_field} to #{target_final_format}")
    old_key_field = tsv.key_field 
    tsv.key_field = "MASK"

    identifier_files = tsv.identifier_files.dup 
    identifier_files.concat Entity.identifier_files(target_final_format) if defined? Entity
    identifier_files.uniq!
    identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
    identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}

    tsv = TSV.translate(tsv, target_field, target_final_format, options.merge(:identifier_files => identifier_files))
    tsv.key_field = old_key_field
  end

  tsv
end

.version_file(file, namespace) ⇒ Object



4
5
6
7
8
# File 'lib/rbbt/association/open.rb', line 4

def self.version_file(file, namespace)
  old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
  old_file.annotate file if Path === old_file
  file
end