Class: MemDump::MemoryDump

Inherits:
Object
  • Object
show all
Defined in:
lib/memdump/memory_dump.rb

Constant Summary collapse

COMMON_COLLAPSE_TYPES =
%w{IMEMO HASH ARRAY}
COMMON_COLLAPSE_CLASSES =
%w{Set RubyVM::Env}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(address_to_record) ⇒ MemoryDump

Returns a new instance of MemoryDump.



5
6
7
8
9
# File 'lib/memdump/memory_dump.rb', line 5

def initialize(address_to_record)
    @address_to_record = address_to_record
    @forward_graph = nil
    @backward_graph = nil
end

Instance Attribute Details

#address_to_recordObject (readonly)

Returns the value of attribute address_to_record.



3
4
5
# File 'lib/memdump/memory_dump.rb', line 3

def address_to_record
  @address_to_record
end

Instance Method Details

#add_children(roots, with_keepalive_count: false) ⇒ Object



475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# File 'lib/memdump/memory_dump.rb', line 475

def add_children(roots, with_keepalive_count: false)
    result = Hash.new
    roots.each_record do |root_record|
        result[root_record['address']] = root_record

        root_record['references'].each do |addr|
            ref_record = find_by_address(addr)
            next if !ref_record

            if with_keepalive_count
                ref_record = ref_record.dup
                count = 0
                depth_first_visit(addr) { count += 1 }
                ref_record['keepalive_count'] = count
            end
            result[addr] = ref_record
        end
    end
    MemoryDump.new(result)
end

#addressesObject



19
20
21
# File 'lib/memdump/memory_dump.rb', line 19

def addresses
    address_to_record.keys
end

#clear_graphObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Force recomputation of the graph representation of the dump the next time it is needed



379
380
381
382
# File 'lib/memdump/memory_dump.rb', line 379

def clear_graph
    @forward_graph = nil
    @backward_graph = nil
end

#collapse(entries) ⇒ Object

Remove entries from this dump, keeping the transitivity in the remaining graph

Examples:

remove all entries that are of type HASH

collapse(objects_of_type('HASH'))

Parameters:



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/memdump/memory_dump.rb', line 153

def collapse(entries)
    collapsed_entries = Hash.new
    entries.each_record do |r|
        collapsed_entries[r['address']] = r['references'].dup
    end


    # Remove references in-between the entries to collapse
    already_expanded = Hash.new { |h, k| h[k] = Set[k] }
    begin
        changed_entries  = Hash.new
        collapsed_entries.each do |address, references|
            sets = references.classify { |ref_address| collapsed_entries.has_key?(ref_address) }
            updated_references = sets[false] || Set.new
            if to_collapse = sets[true]
                to_collapse.each do |ref_address|
                    next if already_expanded[address].include?(ref_address)
                    updated_references.merge(collapsed_entries[ref_address])
                end
                already_expanded[address].merge(to_collapse)
                changed_entries[address] = updated_references
            end
        end
        puts "#{changed_entries.size} changed entries"
        collapsed_entries.merge!(changed_entries)
    end while !changed_entries.empty?

    find_and_map do |record|
        next if collapsed_entries.has_key?(record['address'])

        sets = record['references'].classify do |ref_address|
            collapsed_entries.has_key?(ref_address)
        end
        updated_references = sets[false] || Set.new
        if to_collapse = sets[true]
            to_collapse.each do |ref_address|
                updated_references.merge(collapsed_entries[ref_address])
            end
            record = record.dup
            record['references'] = updated_references
        end
        record
    end
end

#common_cleanupMemDump

Perform common initial cleanup

It basically removes common classes that usually make a dump analysis more complicated without providing more information

Namely, it collapses internal Ruby node types ROOT and IMEMO, as well as common collection classes COMMON_COLLAPSE_CLASSES.

One usually analyses a cleaned-up dump before getting into the full dump

Returns:



268
269
270
271
272
273
274
275
276
# File 'lib/memdump/memory_dump.rb', line 268

def common_cleanup
    without_weakrefs = remove(objects_of_class 'WeakRef')
    to_collapse = without_weakrefs.find_all do |r|
        COMMON_COLLAPSE_CLASSES.include?(r['class']) ||
            COMMON_COLLAPSE_TYPES.include?(r['type']) ||
            r['method'] == 'dump_all'
    end
    without_weakrefs.collapse(to_collapse)
end

#compute_graphsObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph



387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# File 'lib/memdump/memory_dump.rb', line 387

def compute_graphs
    forward_graph  = RGL::DirectedAdjacencyGraph.new
    forward_graph.add_vertex 'ALL_ROOTS'
    address_to_record.each do |address, record|
        forward_graph.add_vertex(address)

        if record['type'] == 'ROOT'
            forward_graph.add_edge('ALL_ROOTS', address)
        end
        record['references'].each do |ref_address|
            forward_graph.add_edge(address, ref_address)
        end
    end

    backward_graph  = RGL::DirectedAdjacencyGraph.new
    forward_graph.each_edge do |u, v|
        backward_graph.add_edge(v, u)
    end
    return forward_graph, backward_graph
end

#depth_first_visit(root, &block) ⇒ Object



408
409
410
411
# File 'lib/memdump/memory_dump.rb', line 408

def depth_first_visit(root, &block)
    ensure_graphs_computed
    @forward_graph.depth_first_visit(root, &block)
end

#diff(to) ⇒ MemoryDump

Compute the set of records that are not in self but are in to

Parameters:

Returns:



569
570
571
572
573
574
575
576
577
578
# File 'lib/memdump/memory_dump.rb', line 569

def diff(to)
    diff = Hash.new
    to.each_record do |r|
        address = r['address']
        if !@address_to_record.include?(address)
            diff[address] = r
        end
    end
    MemoryDump.new(diff)
end

#dupObject



496
497
498
# File 'lib/memdump/memory_dump.rb', line 496

def dup
    find_all { true }
end

#each_record(&block) ⇒ Object



15
16
17
# File 'lib/memdump/memory_dump.rb', line 15

def each_record(&block)
    address_to_record.each_value(&block)
end

#ensure_graphs_computedObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Ensure that @forward_graph and @backward_graph are computed



369
370
371
372
373
# File 'lib/memdump/memory_dump.rb', line 369

def ensure_graphs_computed
    if !@forward_graph
        @forward_graph, @backward_graph = compute_graphs
    end
end

#find_all {|record| ... } ⇒ MemoryDump

Filter the records

Yield Parameters:

  • record

    a record

Yield Returns:

  • (Object)

    the record object that should be included in the returned dump

Returns:



53
54
55
56
57
58
59
60
61
62
63
# File 'lib/memdump/memory_dump.rb', line 53

def find_all
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        if yield(r)
            address_to_record[r['address']] = r
        end
    end
    MemoryDump.new(address_to_record)
end

#find_and_map {|record| ... } ⇒ MemoryDump

Filter the entries, removing those for which the block returns falsy

Yield Parameters:

  • record

    a record

Yield Returns:

  • (nil, Object)

    either a record object, or falsy to remove this record in the returned dump

Returns:



87
88
89
90
91
92
93
94
95
96
97
# File 'lib/memdump/memory_dump.rb', line 87

def find_and_map
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        if result = yield(r.dup)
            address_to_record[r['address']] = result.to_hash
        end
    end
    MemoryDump.new(address_to_record)
end

#find_by_address(address) ⇒ Object



27
28
29
# File 'lib/memdump/memory_dump.rb', line 27

def find_by_address(address)
    address_to_record[address]
end

#group(name, dump, attributes = Hash.new) ⇒ Object

Replace all objects in dump by a single “group” object



609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
# File 'lib/memdump/memory_dump.rb', line 609

def group(name, dump, attributes = Hash.new)
    group_addresses   = Set.new
    group_references  = Set.new
    dump.each_record do |r|
        group_addresses << r['address']
        group_references.merge(r['references'])
    end
    group_record = attributes.dup
    group_record['address']    = name
    group_record['references'] = group_references - group_addresses

    updated = Hash[name => group_record]
    each_record do |record|
        next if group_addresses.include?(record['address'])

        updated_record = record.dup
        updated_record['references'] -= group_addresses
        if updated_record['references'].size != record['references'].size
            updated_record['references'] << name
        end

        if group_addresses.include?(updated_record['class_address'])
            updated_record['class_address'] = name
        end
        if group_addresses.include?(updated_record['class'])
            updated_record['class'] = name
        end

        updated[updated_record['address']] = updated_record
    end

    MemoryDump.new(updated)
end

#include?(address) ⇒ Boolean

Returns:

  • (Boolean)


11
12
13
# File 'lib/memdump/memory_dump.rb', line 11

def include?(address)
    address_to_record.has_key?(address)
end

#inspectObject



31
32
33
# File 'lib/memdump/memory_dump.rb', line 31

def inspect
    to_s
end

#interface_with(dump) ⇒ Object

Compute the interface between self and the other dump, that is the elements of self that have a child in dump, and the elements of dump that have a parent in self



583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
# File 'lib/memdump/memory_dump.rb', line 583

def interface_with(dump)
    self_border = Hash.new
    dump_border = Hash.new
    each_record do |r|
        next if dump.find_by_address(r['address'])

        refs_in_dump = r['references'].map do |addr|
            dump.find_by_address(addr)
        end.compact

        if !refs_in_dump.empty?
            self_border[r['address']] = r
            refs_in_dump.each do |child|
                dump_border[child['address']] = child.dup
            end
        end
    end

    self_border = MemoryDump.new(self_border)
    dump_border = MemoryDump.new(dump_border)

    dump.update_keepalive_count(dump_border)
    return self_border, dump_border
end

#map {|record| ... } ⇒ MemoryDump

Map the records

Yield Parameters:

  • record

    a record

Yield Returns:

  • (Object)

    the record object that should be included in the returned dump

Returns:



71
72
73
74
75
76
77
78
79
# File 'lib/memdump/memory_dump.rb', line 71

def map
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        address_to_record[r['address']] = yield(r.dup).to_hash
    end
    MemoryDump.new(address_to_record)
end

#minimum_spanning_tree(root_dump) ⇒ Object



345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/memdump/memory_dump.rb', line 345

def minimum_spanning_tree(root_dump)
    if root_dump.size != 1
        raise ArgumentError, "there should be exactly one root"
    end
    root_address, _ = root_dump.address_to_record.first
    if !(root = address_to_record[root_address])
        raise ArgumentError, "no record with address #{root_address} in self"
    end

    ensure_graphs_computed

    mst = @forward_graph.minimum_spanning_tree(root)
    map = Hash.new
    mst.each_vertex do |record|
        record = record.dup
        record['references'] = record['references'].dup
        record['references'].delete_if { |ref_address| !mst.has_vertex?(ref_address) }
    end
    MemoryDump.new(map)
end

#objects_of_class(name) ⇒ MemoryDump

Return the records of a given class

Examples:

return all string records

objects_of_class("String")

Parameters:

  • name (String)

    the class

Returns:



117
118
119
# File 'lib/memdump/memory_dump.rb', line 117

def objects_of_class(name)
    find_all { |r| name === r['class'] }
end

#objects_of_type(name) ⇒ MemoryDump

Return the records of a given type

Examples:

return all ICLASS (singleton) records

objects_of_class("ICLASS")

Parameters:

  • name (String)

    the type

Returns:



106
107
108
# File 'lib/memdump/memory_dump.rb', line 106

def objects_of_type(name)
    find_all { |r| name === r['type'] }
end

#parents_of(dump, min: 0, exclude_dump: false) ⇒ (MemoryDump,Hash)

Return the entries that refer to the entries in the dump

Parameters:

  • the (MemoryDump)

    set of entries whose parents we’re looking for

  • min (Integer) (defaults to: 0)

    only return the entries in self that refer to more than this much entries in ‘dump’

  • exclude_dump (Boolean) (defaults to: false)

    exclude the entries that are already in ‘dump’

Returns:

  • ((MemoryDump,Hash))

    the parent entries, and a mapping from records in the parent entries to the count of entries in ‘dump’ they refer to



131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/memdump/memory_dump.rb', line 131

def parents_of(dump, min: 0, exclude_dump: false)
    children = dump.addresses.to_set
    counts = Hash.new
    filtered = find_all do |r|
        next if exclude_dump && children.include?(r['address'])

        count = r['references'].count { |r| children.include?(r) }
        if count > min
            counts[r] = count
            true
        end
    end
    return filtered, counts
end

#remove(objects) ⇒ Object

Simply remove the given objects



501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/memdump/memory_dump.rb', line 501

def remove(objects)
    removed_addresses = objects.addresses.to_set
    return dup if removed_addresses.empty?

    find_and_map do |r|
        if !removed_addresses.include?(r['address'])
            references = r['references'].dup
            references.delete_if { |a| removed_addresses.include?(a) }
            r['references'] = references
            r
        end
    end
end

#remove_invalid_references(MemoryDump,Set)

Remove entries in the reference for which we can’t find an object with the matching address

Returns:

  • ((MemoryDump,Set))

    the filtered dump and the set of missing addresses found



282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/memdump/memory_dump.rb', line 282

def remove_invalid_references
    addresses = self.addresses.to_set
    missing = Set.new
    result = map do |r|
        common = (addresses & r['references'])
        if common.size != r['references'].size
            missing.merge(r['references'] - common)
        end
        r = r.dup
        r['references'] = common
        r
    end
    return result, missing
end

#remove_small_components(max_size: 1) ⇒ Object

Remove all components that are smaller than the given number of nodes

It really looks only at the number of nodes reachable from a root (i.e. won’t notice if two smaller-than-threshold roots have nodes in common)



520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
# File 'lib/memdump/memory_dump.rb', line 520

def remove_small_components(max_size: 1)
    roots = self.addresses.to_set.dup
    leaves  = Set.new
    each_record do |r|
        refs = r['references']
        if refs.empty?
            leaves << r['address']
        else
            roots.subtract(r['references'])
        end
    end

    to_remove = Set.new
    roots.each do |root_address|
        component = Set[]
        queue = Set[root_address]
        while !queue.empty? && (component.size <= max_size)
            address = queue.first
            queue.delete(address)
            next if component.include?(address)
            component << address
            queue.merge(address_to_record[address]['references'])
        end

        if component.size <= max_size
            to_remove.merge(component)
        end
    end

    without(find_all { |r| to_remove.include?(r['address']) })
end

#replace_class_id_by_class_name(add_reference_to_class: false) ⇒ Object



653
654
655
# File 'lib/memdump/memory_dump.rb', line 653

def replace_class_id_by_class_name(add_reference_to_class: false)
    MemDump.replace_class_address_by_name(self, add_reference_to_class: add_reference_to_class)
end

#root_addressesSet<String>

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Return the set of record addresses that are the addresses of roots in the live graph

Returns:

  • (Set<String>)


451
452
453
454
455
456
457
# File 'lib/memdump/memory_dump.rb', line 451

def root_addresses
    roots = self.addresses.to_set.dup
    each_record do |r|
        roots.subtract(r['references'])
    end
    roots
end

#roots(with_keepalive_count: false) ⇒ Object

Returns the set of roots



460
461
462
463
464
465
466
467
468
469
470
471
472
473
# File 'lib/memdump/memory_dump.rb', line 460

def roots(with_keepalive_count: false)
    result = Hash.new
    self.root_addresses.each do |addr|
        record = find_by_address(addr)
        if with_keepalive_count
            record = record.dup
            count = 0
            depth_first_visit(addr) { count += 1 }
            record['keepalive_count'] = count
        end
        result[addr] = record
    end
    MemoryDump.new(result)
end

#roots_of(dump, root_dump: nil) ⇒ MemoryDump

Return the graph of object that keeps objects in dump alive

It contains only the shortest paths from the roots to the objects in dump

Parameters:

Returns:



304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/memdump/memory_dump.rb', line 304

def roots_of(dump, root_dump: nil)
    if root_dump && root_dump.empty?
        raise ArgumentError, "no roots provided"
    end

    root_addresses =
        if root_dump then root_dump.addresses
        else
            ['ALL_ROOTS']
        end

    ensure_graphs_computed

    result_nodes = Set.new
    dump_addresses = dump.addresses
    root_addresses.each do |root_address|
        visitor = RGL::DijkstraVisitor.new(@forward_graph)
        dijkstra = RGL::DijkstraAlgorithm.new(@forward_graph, Hash.new(1), visitor)
        dijkstra.find_shortest_paths(root_address)
        path_builder = RGL::PathBuilder.new(root_address, visitor.parents_map)

        dump_addresses.each_with_index do |record_address, record_i|
            if path = path_builder.path(record_address)
                result_nodes.merge(path)
            end
        end
    end

    find_and_map do |record|
        address = record['address']
        next if !result_nodes.include?(address)

        # Prefer records in 'dump' to allow for annotations in the
        # source
        record = dump.find_by_address(address) || record
        record = record.dup
        record['references'] = result_nodes & record['references']
        record
    end
end

#sample(ratio) ⇒ Object

Get a random sample of the records

The sampling is random, so the returned set might be bigger or smaller than expected. Do not use on small sets.

Parameters:

  • the (Float)

    ratio of selected samples vs. total samples (0.1 will select approximately 10% of the samples)



435
436
437
438
439
440
441
442
443
# File 'lib/memdump/memory_dump.rb', line 435

def sample(ratio)
    result = Hash.new
    each_record do |record|
        if rand <= ratio
            result[record['address']] = record
        end
    end
    MemoryDump.new(result)
end

#save(io_or_path) ⇒ Object

Save the dump



232
233
234
235
236
237
238
239
240
241
242
# File 'lib/memdump/memory_dump.rb', line 232

def save(io_or_path)
    if io_or_path.respond_to?(:open)
        io_or_path.open 'w' do |io|
            save(io)
        end
    else
        each_record do |r|
            io_or_path.puts JSON.dump(r)
        end
    end
end

#sizeObject



23
24
25
# File 'lib/memdump/memory_dump.rb', line 23

def size
    address_to_record.size
end

#statsObject



552
553
554
555
556
557
558
559
560
561
562
563
# File 'lib/memdump/memory_dump.rb', line 552

def stats
    unknown_class = 0
    by_class = Hash.new(0)
    each_record do |r|
        if klass = (r['class'] || r['type'] || r['root'])
            by_class[klass] += 1
        else
            unknown_class += 1
        end
    end
    return unknown_class, by_class
end

#to_gml(io_or_path) ⇒ Object

Write the dump to a GML file that can loaded by Gephi

Parameters:

  • the (Pathname, String, IO)

    path or the IO stream into which we should dump



220
221
222
223
224
225
226
227
228
229
# File 'lib/memdump/memory_dump.rb', line 220

def to_gml(io_or_path)
    if io_or_path.kind_of?(IO)
        MemDump.convert_to_gml(self, io_or_path)
    else
        Pathname(io_or_path).open 'w' do |io|
            to_gml(io)
        end
    end
    nil
end

#to_sObject



657
658
659
# File 'lib/memdump/memory_dump.rb', line 657

def to_s
    "#<MemoryDump size=#{size}>"
end

#update_keepalive_count(dump) ⇒ Object



643
644
645
646
647
648
649
650
651
# File 'lib/memdump/memory_dump.rb', line 643

def update_keepalive_count(dump)
    ensure_graphs_computed
    dump.each_record do |record|
        count = 0
        dump.depth_first_visit(record['address']) { |obj| count += 1 }
        record['keepalive_count'] = count
        record
    end
end

#validate_referencesObject

Validate that all reference entries have a matching dump entry

Raises:

  • (RuntimeError)

    if references have been found



416
417
418
419
420
421
422
423
424
425
426
# File 'lib/memdump/memory_dump.rb', line 416

def validate_references
    addresses = self.addresses.to_set
    each_record do |r|
        common = addresses & r['references']
        if common.size != r['references'].size
            missing = r['references'] - common
            raise "#{r} references #{missing.to_a.sort.join(", ")} which do not exist"
        end
    end
    nil
end

#without(entries) ⇒ MemoryDump

Remove entries from the dump, and all references to them

Parameters:

Returns:



203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/memdump/memory_dump.rb', line 203

def without(entries)
    find_and_map do |record|
        next if entries.include?(record['address'])
        record_refs = record['references']
        references = record_refs.find_all { |r| !entries.include?(r) }
        if references.size != record_refs.size
            record = record.dup
            record['references'] = references.to_set
        end
        record
    end
end