Class: Graphmatcher

Inherits:
Object
  • Object
show all
Defined in:
lib/graphmatcher.rb

Overview

array such as g=[[,[3],,[]],[‘x’,‘y’,‘y’,‘z’]]. g is adjacency array which points children of given vertex of array index, -e.g. vertex 0 has 1 and 2 as children- and g is labels for vertices. -e.g. vertex 0 has label ‘x’.

@limit: Upper limit for number of matches. Procedure terminates after reaching to an upper limit. @max_allowed_time: Maximum allowed time for procedure to run. @self_loops: Boolean value for representing if query is cyclic or not.

Constant Summary collapse

@@logger =
Logger.new(STDOUT)

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ Graphmatcher

Returns a new instance of Graphmatcher.



18
19
20
21
22
23
24
25
26
# File 'lib/graphmatcher.rb', line 18

def initialize(args)
  @query_graph = args[:query_graph].to_a
  @data_graph = args[:data_graph].to_a
  @limit = (args[:limit] || 1).to_i
  @max_allowed_time = (args[:max_allowed_time] || 4.000).to_f
  @cost_matrix = args[:cost_matrix] || nil
  @self_loops = args[:self_loops] || false
  validate!
end

Instance Method Details

#assess_cost(matches, costs) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/graphmatcher.rb', line 106

def assess_cost(matches, costs)
  # resource_graph =
  #   [
  #       [[],[],[],[],[],[],[]], #adj.
  #       ['SPO2','NRF52','SPO2','NRF52','SPO2','NRF52','SPO2'], #types
  #       ['img_x','img_y','img_z','img_t','img_z','img_q','img_z'], #images
  #       ['12','52','25','61','74','95','11']            #resource_id
  #   ]

  # request_graph =
  #   [
  #       [[],[]],
  #       ['NRF52','NRF52'],
  #       ['img_y','img_z'],
  #       ['NODE_A','HUB_A']
  #   ]

  # costs = {
  #   52 => {0 => 0, 1 => 50} , #y
  #   61 => {0 => 30, 1 => 70} , #t
  #   95 => {0 => 40, 1 => 55} , #q
  # }

  # matches = [
  #   [[1],[2]],[[1],[4]],[[1],[6]],
  #   [[3],[2]],[[3],[4]],[[3],[6]],
  #   [[5],[2]],[[5],[4]],[[5],[6]]
  # ]

  matches.map do |match_set| # [ [1],[2] ]
    match_set.flatten.map.with_index do |match, query_index| # 1
      [match, get_resource_cost(costs, match, query_index).to_f]
    end
  end
end

#contains(phi, depth, vertex_j) ⇒ Object

INFO: Checks if vertex J is contained in any of previous matches. TODO: Change with find method.



238
239
240
241
242
243
244
245
# File 'lib/graphmatcher.rb', line 238

def contains(phi, depth, vertex_j)
  false if depth <= 0
  (0..depth - 1).each do |i|
    # @@logger.info("phi[#{i}]=#{phi[i]},depth=#{depth},vertex_j=#{vertex_j}")
    return true if phi[i].include?(vertex_j)
  end
  false
end

#dot_graph(data, subgraph = nil, prefix = '') ⇒ Object

EXPERIMENTAL INFO: Produce a GraphViz-compliant directed graph syntax. INFO: Needs dot/graphviz tools installed as a dependency. TODO: Unable to handle multiple results, color each result different. Indices are IDs, labels are labels adjencency array is outgoing edges.



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/graphmatcher.rb', line 252

def dot_graph(data, subgraph = nil, prefix = '')
  output = ['digraph {']
  data.transpose.each_with_index do |node, id|
    output <<
      ["#{id} [label=\"#{node[1]}##{id}\"]",
       "#{id}->{#{node[0].join(' ')}}"].join("\n")
  end
  if subgraph
    subgraph.each_with_index do |node, _id|
      output << "#{node} [fontcolor=\"Red\"]"
    end
  end
  output << '}'
  tstamp = Time.new.to_i.to_s
  File.write("#{prefix}#{tstamp}.dot", output.join("\n"))
  dot_produce = ['dot', '-Tpng', "#{prefix}#{tstamp}.dot",
                 '-o', "#{prefix}#{tstamp}.png"].join(' ')
  `#{dot_produce}`
end

#dual_iso(phi, depth) ⇒ Object

INFO: Function call to collect matches from phi object. phi = … depth = … matches = …



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/graphmatcher.rb', line 209

def dual_iso(phi, depth)
  if depth == @query_graph[0].length
    unless phi.nil? || phi.empty?
      @matches <<
        if phi.include?([]) # Unable to match this vertex in graph.
          [nil]
        else
          phi
        end
    end
  elsif !(phi.nil? || phi.empty?)
    phi[depth].sort_by { |value| @cost_matrix ? (@cost_matrix[value][depth] || Float::INFINITY) : next }.each do |value|
      next if contains(phi, depth, value)

      # keys are indices 0...n, values are possible values for that index
      phicopy = phi.map(&:clone)
      # @@logger.info("phicopy=#{phicopy},depth=#{depth},value=#{value}")
      phicopy[depth] = [value]
      if @matches.length >= @limit
        @@logger.info("FINISHED matches=#{@matches}")
        return @matches
      end
      dual_iso(dual_simulation(phicopy), depth + 1)
    end
  end
end

#dual_simulation(phi) ⇒ Object

INFO: Function that uses parameter phi -which is generated by label_match- to determine which matches of data have expected relations in query graph. phi = …



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/graphmatcher.rb', line 145

def dual_simulation(phi)
  # One directional adjacency array for data graph and query graphs.
  data_children = @data_graph[0]
  query_children = @query_graph[0]
  # @@logger.info("Data children: #{data_children.to_s}")
  # @@logger.info("Query children: #{query_children.to_s}")
  changed = true
  while changed
    changed = false
    return nil if (Time.now.to_f - @t0) > @max_allowed_time

    # children = query_edges
    # q_index = query_vertex_index
    query_children.each_with_index do |children, q_index|
      # query_child = query_edge_target
      children.each do |query_child|
        # Create a temporary phi object.
        temp_phi = []
        # Loop over candidates of each vertex in data graph.
        to_delete = []

        phi[q_index].map do |child| # loop 3
          # @@logger.debug("u=#{q_index}, u_c=#{query_child}, child=#{child}")

          # Find intersection of children of 'child' in data graph and
          # candidates of 'query child' in data graph.
          phi_intersection = data_children[child] & phi[query_child]
          # @@logger.debug("datachildren[child]=#{data_children[child]}")
          # @@logger.debug("phi[query_child]=#{phi[query_child]}")
          # @@logger.debug("Intersection=#{phi_intersection}")
          if phi_intersection.nil? || phi_intersection.empty?
            to_delete.push(child)
            return phi if phi[q_index].empty?

            changed = true
          end
          temp_phi |= phi_intersection
        end

        unless to_delete.empty?
          to_delete.each do |td|
            phi[q_index].delete(td)
          end
        end
        return phi if temp_phi.flatten.empty?

        changed = true if temp_phi.size < phi[query_child].size
        if @self_loops && query_child == q_index
          phi[query_child] &= temp_phi
        else
          # @@logger.debug("phi=#{phi} and phi[#{query_child}]=#{temp_phi}")
          phi[query_child] = temp_phi
        end
      end
    end
  end
  @@logger.info("Returning phi=#{phi}")
  phi
end

#find_matchesObject

Public interface for Graphmatcher class.

@matches: Array of matching indices of query graph in data graph.



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/graphmatcher.rb', line 60

def find_matches
  @matches = []
  @t0 = Time.now.to_f
  phi = label_match

  dual_iso(dual_simulation(phi), 0)
  # @@logger.info("FINISHED matches=#{@matches}")
  if @cost_matrix
    # if cost matrix is available, get costs of found matches.

    @matches = assess_cost(@matches, @cost_matrix)

    # sort matches by sum of costs of matched resources.
    #               MATCHES
    # [ [[1,100],[2,10]],[[3,500],[4,800]] ]
    #              MATCH COSTS
    #         110              1300

    # The behaviour here is important !
    # Sum of costs vs. max of costs, depends which one is relevant.

    @matches.reject! { |match_set| match_set.map { |e| e[1] }.include?(nil) }

    @matches = @matches.sort_by do |match_set|
      match_set.reduce(0) { |sum, e| sum + e[1] }
    end

  end
  @matches
end

#get_resource_cost(costs, resource_position, query_index) ⇒ Object



96
97
98
99
100
101
102
103
104
# File 'lib/graphmatcher.rb', line 96

def get_resource_cost(costs, resource_position, query_index)
  # costs = { resource_id => { query_index => cost } }
  # e.g.
  # costs = { 40 => { 0 => 5, 1 => 10 } }
  if costs[resource_position][query_index]
    cost = (costs[resource_position][query_index])
    cost
  end
end

#get_resource_property(_match, property) ⇒ Object



91
92
93
94
# File 'lib/graphmatcher.rb', line 91

def get_resource_property(_match, property)
  truncated_data_graph = @data_graph.truncate
  @matches.map { |match_set| match_set.map { |match| truncated_data_graph[match[0]][2][property] } }
end

#label_matchObject

Function for generating feasible matches for query graph based on labels of vertices of data graph.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/graphmatcher.rb', line 30

def label_match
  data_labels = @data_graph[1]
  query_labels = @query_graph[1]

  feasible = query_labels.map.with_index do |ql, _index|
    data_labels.each_index.select { |i| data_labels[i] == ql }
  end

  if @cost_matrix

    feasible = assess_cost(feasible, @cost_matrix)

    feasible = feasible.select { |f| f[1] }.map do |feasible_set|
      feasible_set.sort_by { |f| f[1] }
    end

    feasible = feasible.map do |f_set|
      f_set.map do |f|
        f[0]
      end
    end
  end

  @@logger.info('Label matches(phi) are: ' + feasible.to_s)
  feasible
end

#validate!Object



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/graphmatcher.rb', line 272

def validate!
  unless @query_graph.is_a?(Array) && @data_graph.is_a?(Array)
    raise ArgumentError,
          'Type mismatch for graphs in initialization !'
  end
  unless @limit.is_a?(Numeric) && @max_allowed_time.is_a?(Numeric)
    raise ArgumentError,
          'Type mismatch for limit or timeout value in initialization !'
  end
  unless @query_graph.length >= 2 && @data_graph.length >= 2
    raise ArgumentError,
          'Input graphs must have at least two dimensions !'
  end
  unless @query_graph.map(&:length).uniq.size == 1 &&
         @data_graph.map(&:length).uniq.size == 1
    raise ArgumentError,
          'Input graphs\' adjencency and label arrays must be sized equal !'
  end
end