Class: MemDump::MemoryDump
- Inherits:
-
Object
- Object
- MemDump::MemoryDump
- Defined in:
- lib/memdump/memory_dump.rb
Constant Summary collapse
- COMMON_COLLAPSE_TYPES =
%w{IMEMO HASH ARRAY}
- COMMON_COLLAPSE_CLASSES =
%w{Set RubyVM::Env}
Instance Attribute Summary collapse
-
#address_to_record ⇒ Object
readonly
Returns the value of attribute address_to_record.
Instance Method Summary collapse
- #add_children(roots, with_keepalive_count: false) ⇒ Object
- #addresses ⇒ Object
-
#clear_graph ⇒ Object
private
Force recomputation of the graph representation of the dump the next time it is needed.
-
#collapse(entries) ⇒ Object
Remove entries from this dump, keeping the transitivity in the remaining graph.
-
#common_cleanup ⇒ MemDump
Perform common initial cleanup.
-
#compute_graphs ⇒ Object
private
Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph.
- #depth_first_visit(root, &block) ⇒ Object
-
#diff(to) ⇒ MemoryDump
Compute the set of records that are not in self but are in to.
- #dup ⇒ Object
- #each_record(&block) ⇒ Object
-
#ensure_graphs_computed ⇒ Object
private
Ensure that @forward_graph and @backward_graph are computed.
-
#find_all {|record| ... } ⇒ MemoryDump
Filter the records.
-
#find_and_map {|record| ... } ⇒ MemoryDump
Filter the entries, removing those for which the block returns falsy.
- #find_by_address(address) ⇒ Object
-
#group(name, dump, attributes = Hash.new) ⇒ Object
Replace all objects in dump by a single “group” object.
- #include?(address) ⇒ Boolean
-
#initialize(address_to_record) ⇒ MemoryDump
constructor
A new instance of MemoryDump.
- #inspect ⇒ Object
-
#interface_with(dump) ⇒ Object
Compute the interface between self and the other dump, that is the elements of self that have a child in dump, and the elements of dump that have a parent in self.
-
#map {|record| ... } ⇒ MemoryDump
Map the records.
- #minimum_spanning_tree(root_dump) ⇒ Object
-
#objects_of_class(name) ⇒ MemoryDump
Return the records of a given class.
-
#objects_of_type(name) ⇒ MemoryDump
Return the records of a given type.
-
#parents_of(dump, min: 0, exclude_dump: false) ⇒ (MemoryDump,Hash)
Return the entries that refer to the entries in the dump.
-
#remove(objects) ⇒ Object
Simply remove the given objects.
-
#remove_invalid_references ⇒ (MemoryDump,Set)
Remove entries in the reference for which we can’t find an object with the matching address.
-
#remove_small_components(max_size: 1) ⇒ Object
Remove all components that are smaller than the given number of nodes.
- #replace_class_id_by_class_name(add_reference_to_class: false) ⇒ Object
-
#root_addresses ⇒ Set<String>
private
Return the set of record addresses that are the addresses of roots in the live graph.
-
#roots(with_keepalive_count: false) ⇒ Object
Returns the set of roots.
-
#roots_of(dump, root_dump: nil) ⇒ MemoryDump
Return the graph of object that keeps objects in dump alive.
-
#sample(ratio) ⇒ Object
Get a random sample of the records.
-
#save(io_or_path) ⇒ Object
Save the dump.
- #size ⇒ Object
- #stats ⇒ Object
-
#to_gml(io_or_path) ⇒ Object
Write the dump to a GML file that can loaded by Gephi.
- #to_s ⇒ Object
- #update_keepalive_count(dump) ⇒ Object
-
#validate_references ⇒ Object
Validate that all reference entries have a matching dump entry.
-
#without(entries) ⇒ MemoryDump
Remove entries from the dump, and all references to them.
Constructor Details
#initialize(address_to_record) ⇒ MemoryDump
Returns a new instance of MemoryDump.
5 6 7 8 9 |
# File 'lib/memdump/memory_dump.rb', line 5 def initialize(address_to_record) @address_to_record = address_to_record @forward_graph = nil @backward_graph = nil end |
Instance Attribute Details
#address_to_record ⇒ Object (readonly)
Returns the value of attribute address_to_record.
3 4 5 |
# File 'lib/memdump/memory_dump.rb', line 3 def address_to_record @address_to_record end |
Instance Method Details
#add_children(roots, with_keepalive_count: false) ⇒ Object
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 |
# File 'lib/memdump/memory_dump.rb', line 475 def add_children(roots, with_keepalive_count: false) result = Hash.new roots.each_record do |root_record| result[root_record['address']] = root_record root_record['references'].each do |addr| ref_record = find_by_address(addr) next if !ref_record if with_keepalive_count ref_record = ref_record.dup count = 0 depth_first_visit(addr) { count += 1 } ref_record['keepalive_count'] = count end result[addr] = ref_record end end MemoryDump.new(result) end |
#addresses ⇒ Object
19 20 21 |
# File 'lib/memdump/memory_dump.rb', line 19 def addresses address_to_record.keys end |
#clear_graph ⇒ Object
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Force recomputation of the graph representation of the dump the next time it is needed
379 380 381 382 |
# File 'lib/memdump/memory_dump.rb', line 379 def clear_graph @forward_graph = nil @backward_graph = nil end |
#collapse(entries) ⇒ Object
Remove entries from this dump, keeping the transitivity in the remaining graph
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/memdump/memory_dump.rb', line 153 def collapse(entries) collapsed_entries = Hash.new entries.each_record do |r| collapsed_entries[r['address']] = r['references'].dup end # Remove references in-between the entries to collapse = Hash.new { |h, k| h[k] = Set[k] } begin changed_entries = Hash.new collapsed_entries.each do |address, references| sets = references.classify { |ref_address| collapsed_entries.has_key?(ref_address) } updated_references = sets[false] || Set.new if to_collapse = sets[true] to_collapse.each do |ref_address| next if [address].include?(ref_address) updated_references.merge(collapsed_entries[ref_address]) end [address].merge(to_collapse) changed_entries[address] = updated_references end end puts "#{changed_entries.size} changed entries" collapsed_entries.merge!(changed_entries) end while !changed_entries.empty? find_and_map do |record| next if collapsed_entries.has_key?(record['address']) sets = record['references'].classify do |ref_address| collapsed_entries.has_key?(ref_address) end updated_references = sets[false] || Set.new if to_collapse = sets[true] to_collapse.each do |ref_address| updated_references.merge(collapsed_entries[ref_address]) end record = record.dup record['references'] = updated_references end record end end |
#common_cleanup ⇒ MemDump
Perform common initial cleanup
It basically removes common classes that usually make a dump analysis more complicated without providing more information
Namely, it collapses internal Ruby node types ROOT and IMEMO, as well as common collection classes COMMON_COLLAPSE_CLASSES.
One usually analyses a cleaned-up dump before getting into the full dump
268 269 270 271 272 273 274 275 276 |
# File 'lib/memdump/memory_dump.rb', line 268 def common_cleanup without_weakrefs = remove(objects_of_class 'WeakRef') to_collapse = without_weakrefs.find_all do |r| COMMON_COLLAPSE_CLASSES.include?(r['class']) || COMMON_COLLAPSE_TYPES.include?(r['type']) || r['method'] == 'dump_all' end without_weakrefs.collapse(to_collapse) end |
#compute_graphs ⇒ Object
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
# File 'lib/memdump/memory_dump.rb', line 387 def compute_graphs forward_graph = RGL::DirectedAdjacencyGraph.new forward_graph.add_vertex 'ALL_ROOTS' address_to_record.each do |address, record| forward_graph.add_vertex(address) if record['type'] == 'ROOT' forward_graph.add_edge('ALL_ROOTS', address) end record['references'].each do |ref_address| forward_graph.add_edge(address, ref_address) end end backward_graph = RGL::DirectedAdjacencyGraph.new forward_graph.each_edge do |u, v| backward_graph.add_edge(v, u) end return forward_graph, backward_graph end |
#depth_first_visit(root, &block) ⇒ Object
408 409 410 411 |
# File 'lib/memdump/memory_dump.rb', line 408 def depth_first_visit(root, &block) ensure_graphs_computed @forward_graph.depth_first_visit(root, &block) end |
#diff(to) ⇒ MemoryDump
Compute the set of records that are not in self but are in to
569 570 571 572 573 574 575 576 577 578 |
# File 'lib/memdump/memory_dump.rb', line 569 def diff(to) diff = Hash.new to.each_record do |r| address = r['address'] if !@address_to_record.include?(address) diff[address] = r end end MemoryDump.new(diff) end |
#dup ⇒ Object
496 497 498 |
# File 'lib/memdump/memory_dump.rb', line 496 def dup find_all { true } end |
#each_record(&block) ⇒ Object
15 16 17 |
# File 'lib/memdump/memory_dump.rb', line 15 def each_record(&block) address_to_record.each_value(&block) end |
#ensure_graphs_computed ⇒ Object
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Ensure that @forward_graph and @backward_graph are computed
369 370 371 372 373 |
# File 'lib/memdump/memory_dump.rb', line 369 def ensure_graphs_computed if !@forward_graph @forward_graph, @backward_graph = compute_graphs end end |
#find_all {|record| ... } ⇒ MemoryDump
Filter the records
53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/memdump/memory_dump.rb', line 53 def find_all return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| if yield(r) address_to_record[r['address']] = r end end MemoryDump.new(address_to_record) end |
#find_and_map {|record| ... } ⇒ MemoryDump
Filter the entries, removing those for which the block returns falsy
87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/memdump/memory_dump.rb', line 87 def find_and_map return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| if result = yield(r.dup) address_to_record[r['address']] = result.to_hash end end MemoryDump.new(address_to_record) end |
#find_by_address(address) ⇒ Object
27 28 29 |
# File 'lib/memdump/memory_dump.rb', line 27 def find_by_address(address) address_to_record[address] end |
#group(name, dump, attributes = Hash.new) ⇒ Object
Replace all objects in dump by a single “group” object
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 |
# File 'lib/memdump/memory_dump.rb', line 609 def group(name, dump, attributes = Hash.new) group_addresses = Set.new group_references = Set.new dump.each_record do |r| group_addresses << r['address'] group_references.merge(r['references']) end group_record = attributes.dup group_record['address'] = name group_record['references'] = group_references - group_addresses updated = Hash[name => group_record] each_record do |record| next if group_addresses.include?(record['address']) updated_record = record.dup updated_record['references'] -= group_addresses if updated_record['references'].size != record['references'].size updated_record['references'] << name end if group_addresses.include?(updated_record['class_address']) updated_record['class_address'] = name end if group_addresses.include?(updated_record['class']) updated_record['class'] = name end updated[updated_record['address']] = updated_record end MemoryDump.new(updated) end |
#include?(address) ⇒ Boolean
11 12 13 |
# File 'lib/memdump/memory_dump.rb', line 11 def include?(address) address_to_record.has_key?(address) end |
#inspect ⇒ Object
31 32 33 |
# File 'lib/memdump/memory_dump.rb', line 31 def inspect to_s end |
#interface_with(dump) ⇒ Object
Compute the interface between self and the other dump, that is the elements of self that have a child in dump, and the elements of dump that have a parent in self
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 |
# File 'lib/memdump/memory_dump.rb', line 583 def interface_with(dump) self_border = Hash.new dump_border = Hash.new each_record do |r| next if dump.find_by_address(r['address']) refs_in_dump = r['references'].map do |addr| dump.find_by_address(addr) end.compact if !refs_in_dump.empty? self_border[r['address']] = r refs_in_dump.each do |child| dump_border[child['address']] = child.dup end end end self_border = MemoryDump.new(self_border) dump_border = MemoryDump.new(dump_border) dump.update_keepalive_count(dump_border) return self_border, dump_border end |
#map {|record| ... } ⇒ MemoryDump
Map the records
71 72 73 74 75 76 77 78 79 |
# File 'lib/memdump/memory_dump.rb', line 71 def map return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| address_to_record[r['address']] = yield(r.dup).to_hash end MemoryDump.new(address_to_record) end |
#minimum_spanning_tree(root_dump) ⇒ Object
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 |
# File 'lib/memdump/memory_dump.rb', line 345 def minimum_spanning_tree(root_dump) if root_dump.size != 1 raise ArgumentError, "there should be exactly one root" end root_address, _ = root_dump.address_to_record.first if !(root = address_to_record[root_address]) raise ArgumentError, "no record with address #{root_address} in self" end ensure_graphs_computed mst = @forward_graph.minimum_spanning_tree(root) map = Hash.new mst.each_vertex do |record| record = record.dup record['references'] = record['references'].dup record['references'].delete_if { |ref_address| !mst.has_vertex?(ref_address) } end MemoryDump.new(map) end |
#objects_of_class(name) ⇒ MemoryDump
Return the records of a given class
117 118 119 |
# File 'lib/memdump/memory_dump.rb', line 117 def objects_of_class(name) find_all { |r| name === r['class'] } end |
#objects_of_type(name) ⇒ MemoryDump
Return the records of a given type
106 107 108 |
# File 'lib/memdump/memory_dump.rb', line 106 def objects_of_type(name) find_all { |r| name === r['type'] } end |
#parents_of(dump, min: 0, exclude_dump: false) ⇒ (MemoryDump,Hash)
Return the entries that refer to the entries in the dump
131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/memdump/memory_dump.rb', line 131 def parents_of(dump, min: 0, exclude_dump: false) children = dump.addresses.to_set counts = Hash.new filtered = find_all do |r| next if exclude_dump && children.include?(r['address']) count = r['references'].count { |r| children.include?(r) } if count > min counts[r] = count true end end return filtered, counts end |
#remove(objects) ⇒ Object
Simply remove the given objects
501 502 503 504 505 506 507 508 509 510 511 512 513 |
# File 'lib/memdump/memory_dump.rb', line 501 def remove(objects) removed_addresses = objects.addresses.to_set return dup if removed_addresses.empty? find_and_map do |r| if !removed_addresses.include?(r['address']) references = r['references'].dup references.delete_if { |a| removed_addresses.include?(a) } r['references'] = references r end end end |
#remove_invalid_references ⇒ (MemoryDump,Set)
Remove entries in the reference for which we can’t find an object with the matching address
282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
# File 'lib/memdump/memory_dump.rb', line 282 def remove_invalid_references addresses = self.addresses.to_set missing = Set.new result = map do |r| common = (addresses & r['references']) if common.size != r['references'].size missing.merge(r['references'] - common) end r = r.dup r['references'] = common r end return result, missing end |
#remove_small_components(max_size: 1) ⇒ Object
Remove all components that are smaller than the given number of nodes
It really looks only at the number of nodes reachable from a root (i.e. won’t notice if two smaller-than-threshold roots have nodes in common)
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 |
# File 'lib/memdump/memory_dump.rb', line 520 def remove_small_components(max_size: 1) roots = self.addresses.to_set.dup leaves = Set.new each_record do |r| refs = r['references'] if refs.empty? leaves << r['address'] else roots.subtract(r['references']) end end to_remove = Set.new roots.each do |root_address| component = Set[] queue = Set[root_address] while !queue.empty? && (component.size <= max_size) address = queue.first queue.delete(address) next if component.include?(address) component << address queue.merge(address_to_record[address]['references']) end if component.size <= max_size to_remove.merge(component) end end without(find_all { |r| to_remove.include?(r['address']) }) end |
#replace_class_id_by_class_name(add_reference_to_class: false) ⇒ Object
653 654 655 |
# File 'lib/memdump/memory_dump.rb', line 653 def replace_class_id_by_class_name(add_reference_to_class: false) MemDump.replace_class_address_by_name(self, add_reference_to_class: add_reference_to_class) end |
#root_addresses ⇒ Set<String>
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Return the set of record addresses that are the addresses of roots in the live graph
451 452 453 454 455 456 457 |
# File 'lib/memdump/memory_dump.rb', line 451 def root_addresses roots = self.addresses.to_set.dup each_record do |r| roots.subtract(r['references']) end roots end |
#roots(with_keepalive_count: false) ⇒ Object
Returns the set of roots
460 461 462 463 464 465 466 467 468 469 470 471 472 473 |
# File 'lib/memdump/memory_dump.rb', line 460 def roots(with_keepalive_count: false) result = Hash.new self.root_addresses.each do |addr| record = find_by_address(addr) if with_keepalive_count record = record.dup count = 0 depth_first_visit(addr) { count += 1 } record['keepalive_count'] = count end result[addr] = record end MemoryDump.new(result) end |
#roots_of(dump, root_dump: nil) ⇒ MemoryDump
Return the graph of object that keeps objects in dump alive
It contains only the shortest paths from the roots to the objects in dump
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 |
# File 'lib/memdump/memory_dump.rb', line 304 def roots_of(dump, root_dump: nil) if root_dump && root_dump.empty? raise ArgumentError, "no roots provided" end root_addresses = if root_dump then root_dump.addresses else ['ALL_ROOTS'] end ensure_graphs_computed result_nodes = Set.new dump_addresses = dump.addresses root_addresses.each do |root_address| visitor = RGL::DijkstraVisitor.new(@forward_graph) dijkstra = RGL::DijkstraAlgorithm.new(@forward_graph, Hash.new(1), visitor) dijkstra.find_shortest_paths(root_address) path_builder = RGL::PathBuilder.new(root_address, visitor.parents_map) dump_addresses.each_with_index do |record_address, record_i| if path = path_builder.path(record_address) result_nodes.merge(path) end end end find_and_map do |record| address = record['address'] next if !result_nodes.include?(address) # Prefer records in 'dump' to allow for annotations in the # source record = dump.find_by_address(address) || record record = record.dup record['references'] = result_nodes & record['references'] record end end |
#sample(ratio) ⇒ Object
Get a random sample of the records
The sampling is random, so the returned set might be bigger or smaller than expected. Do not use on small sets.
435 436 437 438 439 440 441 442 443 |
# File 'lib/memdump/memory_dump.rb', line 435 def sample(ratio) result = Hash.new each_record do |record| if rand <= ratio result[record['address']] = record end end MemoryDump.new(result) end |
#save(io_or_path) ⇒ Object
Save the dump
232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/memdump/memory_dump.rb', line 232 def save(io_or_path) if io_or_path.respond_to?(:open) io_or_path.open 'w' do |io| save(io) end else each_record do |r| io_or_path.puts JSON.dump(r) end end end |
#size ⇒ Object
23 24 25 |
# File 'lib/memdump/memory_dump.rb', line 23 def size address_to_record.size end |
#stats ⇒ Object
552 553 554 555 556 557 558 559 560 561 562 563 |
# File 'lib/memdump/memory_dump.rb', line 552 def stats unknown_class = 0 by_class = Hash.new(0) each_record do |r| if klass = (r['class'] || r['type'] || r['root']) by_class[klass] += 1 else unknown_class += 1 end end return unknown_class, by_class end |
#to_gml(io_or_path) ⇒ Object
Write the dump to a GML file that can loaded by Gephi
220 221 222 223 224 225 226 227 228 229 |
# File 'lib/memdump/memory_dump.rb', line 220 def to_gml(io_or_path) if io_or_path.kind_of?(IO) MemDump.convert_to_gml(self, io_or_path) else Pathname(io_or_path).open 'w' do |io| to_gml(io) end end nil end |
#to_s ⇒ Object
657 658 659 |
# File 'lib/memdump/memory_dump.rb', line 657 def to_s "#<MemoryDump size=#{size}>" end |
#update_keepalive_count(dump) ⇒ Object
643 644 645 646 647 648 649 650 651 |
# File 'lib/memdump/memory_dump.rb', line 643 def update_keepalive_count(dump) ensure_graphs_computed dump.each_record do |record| count = 0 dump.depth_first_visit(record['address']) { |obj| count += 1 } record['keepalive_count'] = count record end end |
#validate_references ⇒ Object
Validate that all reference entries have a matching dump entry
416 417 418 419 420 421 422 423 424 425 426 |
# File 'lib/memdump/memory_dump.rb', line 416 def validate_references addresses = self.addresses.to_set each_record do |r| common = addresses & r['references'] if common.size != r['references'].size missing = r['references'] - common raise "#{r} references #{missing.to_a.sort.join(", ")} which do not exist" end end nil end |
#without(entries) ⇒ MemoryDump
Remove entries from the dump, and all references to them
203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/memdump/memory_dump.rb', line 203 def without(entries) find_and_map do |record| next if entries.include?(record['address']) record_refs = record['references'] references = record_refs.find_all { |r| !entries.include?(r) } if references.size != record_refs.size record = record.dup record['references'] = references.to_set end record end end |