Class: PatLattice

Inherits:
Object
  • Object
show all
Defined in:
lib/rubyplb.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ PatLattice

Returns a new instance of PatLattice.



56
57
58
59
60
61
62
63
64
65
# File 'lib/rubyplb.rb', line 56

def initialize(opts)
  @opts = opts
  @levels = []
  @root_level = 0
  @root = nil
  @leaves = []
  @nodes = {}
  @level_data = {}
  @coloring = {}
end

Instance Attribute Details

#leavesObject

Returns the value of attribute leaves.



54
55
56
# File 'lib/rubyplb.rb', line 54

def leaves
  @leaves
end

#levelsObject

Returns the value of attribute levels.



54
55
56
# File 'lib/rubyplb.rb', line 54

def levels
  @levels
end

#nodesObject

Returns the value of attribute nodes.



54
55
56
# File 'lib/rubyplb.rb', line 54

def nodes
  @nodes
end

#rootObject

Returns the value of attribute root.



54
55
56
# File 'lib/rubyplb.rb', line 54

def root
  @root
end

#root_levelObject

Returns the value of attribute root_level.



54
55
56
# File 'lib/rubyplb.rb', line 54

def root_level
  @root_level
end

Instance Method Details

#ary_compact(ary, target = nil) ⇒ Object



67
68
69
70
71
72
73
74
75
76
# File 'lib/rubyplb.rb', line 67

def ary_compact(ary, target = nil)
  prev = nil
  result = []
  ary.each do |n|
    next if (prev == n and n == target)
    prev = n
    result << n
  end
  return result      
end

#create_node(graph, node_id, node_label) ⇒ Object



281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/rubyplb.rb', line 281

def create_node(graph, node_id, node_label)
  case @opts[:coloring] 
  when 1
    colorscheme = "rdylbu11"
  when 2
    colorscheme = "greys9"
  else
    colorscheme = ""
  end
  
  graph.node(node_id, :label => node_label, :shape => "plaintext", 
                      :height => "0.0", :width => "0.0",
                      :margin => "0.0", :colorscheme => colorscheme, :URL => node_id)
end

#create_nodelabel(node) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/rubyplb.rb', line 225

def create_nodelabel(node)
  if (@opts[:coloring] != 0 || !@opts[:simple])
    if node.level != 0 and node.children_instances > 0
      ldata = @level_data[node.level]
      dev = node.children_instances - ldata[:avg_num_children]
      zscore = dev / ldata[:std_dev_num_children]
      zscore = zscore.nan? ? 0.0 : zscore
    else
      zscore = 0.0
    end
  end

  if @opts[:coloring] == 0
      color = "#ffffff"
  else
    if zscore.nan? or zscore == 0.0
      color = "#ffffff"
    elsif zscore >= 3.0
      color = @opts[:coloring] == 1 ? "2" : "6"
    elsif zscore >= 1.5
      color = @opts[:coloring] == 1 ? "3" : "5"
    elsif zscore >= 1.0
      color = @opts[:coloring] == 1 ? "4" : "4"
    elsif zscore >= 0.5
      color = @opts[:coloring] == 1 ? "5" : "3"
    elsif zscore > 0.0
      color = @opts[:coloring] == 1 ? "6" : "2"
    elsif zscore >= -0.5
      color = @opts[:coloring] == 1 ? "7" : "1"
    elsif zscore >= -1.0
      color = @opts[:coloring] == 1 ? "8" : "1"
    elsif zscore >= -1.5
      color = @opts[:coloring] == 1 ? "9" : "1"
    elsif zscore >= -3.5
      color = @opts[:coloring] == 1 ? "10" : "1"
    else
      color = @opts[:coloring] == 1 ? "11" : "1"
    end
  end
  border = "0"
  pat_str = node.data.collect{|td|"<td color='black'>#{td}</td>"}.join
  pat_str = "&nbsp;" * 5 if pat_str == ""        
  label = "<<table bgcolor='#{color}' border='#{border}' cellborder='1' cellspacing='0' cellpadding='5'>" +
          "<tr>#{pat_str}</tr>"
  if !@opts[:simple]
    zscore = ((zscore * 100).round / 100.0).to_s   
    label += "<tr><td color='black' colspan='#{node.data.size.to_s}'> "
    if node.level != 0 and node.children_instances > 0
      label += node.children_instances.to_s + " (" + zscore.to_s + ")"
    end
    label += "</td></tr>"
  end        
  label += "</table>>"
  return label
end

#create_patterns(sentence, compact) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/rubyplb.rb', line 78

def create_patterns(sentence, compact)
  words = sentence.split(/\s+/)

  if /\((\d+)\)/ =~ words[-1]
    words.pop 
    times = $1.to_i
  else
    times = 1
  end
  
  if /\[(.+)\]/ =~ words[-1]
    words.pop
    color = $1
  end
  
  words.each do |w|
    if /\[\]\(\)/ =~ w
      raise "Data contains an invalid string."
    end
  end
  
  idx = (0...words.size).to_a
  words_with_idx = words.zip(idx).collect{|a| a.join("-")}
  masks = words_with_idx.subset
  ptns = []
  masks.each do |mask|
    ptn1 = []
    words_with_idx.each do |t|
      if mask.index(t)
        /\A(.*?)\-\d+\z/ =~ t
        ptn1 << $1
      else
        ptn1 << "_"
      end
    end
    if compact
      ptns << ary_compact(ptn1, "_") 
    else
      ptns << ptn1
    end
  end

  color = color ? color : "gray60"

  if @coloring[color]
    @coloring[color] += ptns
  else
    @coloring[color] = ptns
  end
  
  
  return ptns * times
end

#generate_dotObject



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/rubyplb.rb', line 296

def generate_dot
 setup_data if (@opts[:coloring] != 0 || !@opts[:simple])
 nodes_drawn = []
 rankdir = @opts[:vertical] ? "" : "LR" 
 nodesep = @opts[:nodesep] ? @opts[:nodesep].to_s : "0.8"
 ranksep = @opts[:ranksep] ? @opts[:ranksep].to_s : "0.8"
 plb = RubyGraphviz.new("plb", :rankdir => rankdir, :nodesep => nodesep, :ranksep => ranksep)
 levels.each do |level|
   level.each do |node|
     node_id = node.object_id     
     unless nodes_drawn.index node_id
       node_label = create_nodelabel(node)
       create_node(plb, node_id, node_label) 
       nodes_drawn << node_id
     end
     node.children.each do |cnode|
       cnode_id = cnode.object_id
       unless nodes_drawn.index cnode_id
         cnode_label = create_nodelabel(cnode)
         create_node(plb, cnode_id, cnode_label) 
         nodes_drawn << node_id
       end
       if @opts[:coloring] != 0
         colors = []
         @coloring.each do |color, val|
           if val.index(node.data) and val.index(cnode.data)
             colors << color
           end
         end
       else
         colors = ["gray60"]
       end
       plb.edge(node_id, cnode_id, :color => colors.join(":"))
     end
   end
 end
 result = plb.to_dot.gsub(/\"\</m, "<").gsub(/\>\"/m, ">")
 return result
end

#generate_img(outfile, image_type, straight_line = false) ⇒ Object



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/rubyplb.rb', line 336

def generate_img(outfile, image_type, straight_line = false)
  dot = generate_dot
  isthere_dot = `dot -V 2>&1`
  if isthere_dot !~ /dot.*version/i
    showerror("Graphviz's dot program cannot be found.", 1)
  else
    if straight_line
      cmd = "dot | neato -n -T#{image_type} -o#{outfile} 2>rubyplb.log"
    else
      cmd = "dot -T#{image_type} -o#{outfile} 2>rubyplb.log"
    end
    IO.popen(cmd, 'r+') do |io|
      io.puts dot
    end
  end
end

#insert(sentence, compact) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/rubyplb.rb', line 136

def insert(sentence, compact)
  ptns = create_patterns(sentence, compact)
  
  new_nodes = []    
  ptns.each do |ptn|
    if existing = search(ptn)
      existing.num_instances += 1
    else
      node = Node.new(ptn)
      nodes[node.data.join("+").intern] = node
      new_nodes << node
    end
  end
      
  new_nodes.each do |node|
    level = node.level
    if levels[level]
      levels[level] << node
    else
      levels[level] = [node]
    end
    

    uplevel   = levels[level - 1]
    if level != 0 and uplevel
      uplevel.each do |sup_node|
        rgx = Regexp.new("\\A" + sup_node.data.join(" ").gsub(/(\b_)+/, ".+?") + "\\z")
        if rgx.match(node.data.join(" "))
          sup_node.children << node
          node.parents << sup_node
        end
      end
    end

    downlevel = levels[level + 1]
    if downlevel
      break unless downlevel
      downlevel.each do |sub_node|
        rgx = Regexp.new("\\A" + node.data.join(" ").gsub(/\_/, ".*") + "\\z")
        if rgx.match(sub_node.data.join(" "))
          node.children << sub_node
          sub_node.parents << node  
        end
      end
    end
    @leaves << node if node.leaf
  end

  @root_level = levels.size - 1
  @root = levels[root_level].first
end

#search(pattern) ⇒ Object



132
133
134
# File 'lib/rubyplb.rb', line 132

def search(pattern)
  node = nodes[pattern.join("+").intern]
end

#setup_dataObject



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/rubyplb.rb', line 196

def setup_data
  levels.each_with_index do |level, l_index|
    num_nodes_non_terminal = 0
    sum_node_non_terminal = 0
    avg_num_children = 0
    valid_elements = []
    level.each do |node|
      next if node.children_instances == 0
      valid_elements << node
      num_nodes_non_terminal += 1 
      sum_node_non_terminal += node.children_instances
    end
    if valid_elements.size > 0
      avg_num_children = sum_node_non_terminal.to_f / num_nodes_non_terminal
      x = valid_elements.inject(0){|sum, node| (node.children_instances - avg_num_children) ** 2 + sum}
      std_dev = Math.sqrt( x / num_nodes_non_terminal)
      @level_data[l_index] = {:num_nodes_non_terminal => num_nodes_non_terminal, 
                              :avg_num_children => avg_num_children, 
                              :std_dev_num_children => std_dev
                              }
    else
      @level_data[l_index] = {:num_nodes_non_terminal => 0, 
                              :avg_num_children => 0, 
                              :std_dev_num_children => 0.0
                              }
    end
  end
end

#traverse(&block) ⇒ Object



188
189
190
191
192
193
194
# File 'lib/rubyplb.rb', line 188

def traverse(&block)
  levels.each do |level|
    level.each do |node|
      yield node
    end
  end
end