Class: BleakHouse::Analyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/bleak_house/analyzer/analyzer.rb

Constant Summary collapse

MAGIC_KEYS =
{
  -1 => 'timestamp',
  -2 => 'mem usage/swap',
  -3 => 'mem usage/real',
  -4 => 'tag',
  -5 => 'heap/filled',
  -6 => 'heap/free'
}
INITIAL_SKIP =

XXX Might be better as a per-tag skip but that gets kinda complicated

15
CLASS_KEYS =

Skip 0 so that the output of String#to_s is useful

eval('[nil, ' + # Skip 0 so that the output of String#to_s is useful
open(
  File.dirname(__FILE__) + '/../../../ext/bleak_house/logger/snapshot.h'
).read[/\{(.*?)\}/m, 1] + ']')

Class Method Summary collapse

Class Method Details

.backwards_detect(array) ⇒ Object



27
28
29
30
31
32
33
34
# File 'lib/bleak_house/analyzer/analyzer.rb', line 27

def self.backwards_detect(array)
  i = array.size - 1
  while i >= 0
    item = array[i]
    return item if yield(item)
    i -= 1
  end
end

.calculate!(frame, index, total, obj_count = nil) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/bleak_house/analyzer/analyzer.rb', line 36

def self.calculate!(frame, index, total, obj_count = nil)
  bsize = frame['births'].size
  dsize = frame['deaths'].size
  
  # Avoid divide by zero errors
  frame['meta']['ratio'] = ratio = (bsize - dsize) / (bsize + dsize + 1).to_f
  frame['meta']['impact'] = begin
    result = Math.log10((bsize - dsize).abs.to_i / 10.0)
    raise Errno::ERANGE if result.nan? or result.infinite?
    result
  rescue Errno::ERANGE
    0
  end
  
  puts "  F#{index}:#{total} (#{index * 100 / total}%): #{frame['meta']['tag']} (#{obj_count.to_s + ' population, ' if obj_count}#{bsize} births, #{dsize} deaths, ratio #{format('%.2f', frame['meta']['ratio'])}, impact #{format('%.2f', frame['meta']['impact'])})"
end

.run(logfile) ⇒ Object

Parses and correlates a BleakHouse::Logger output file.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/bleak_house/analyzer/analyzer.rb', line 54

def self.run(logfile)
  logfile.chomp!(".cache")
  cachefile = logfile + ".cache"

  unless File.exists? logfile or File.exists? cachefile
    puts "No data file found: #{logfile}"
    exit 
  end
  
  puts "Working..."
  
  frames = []
  last_population = []
  frame = nil
  ix = nil
  
  if File.exist?(cachefile) and (!File.exists? logfile or File.stat(cachefile).mtime > File.stat(logfile).mtime)
    # Cache is fresh
    puts "Using cache"
    frames = Marshal.load(File.open(cachefile).read)
    puts "#{frames.size - 1} frames"        
    frames[0..-2].each_with_index do |frame, index|
      calculate!(frame, index + 1, frames.size - 1)
    end
    
  else                        
    # Rebuild frames
    total_frames = `grep '^-1' #{logfile} | wc`.to_i - 2
            
    puts "#{total_frames} frames"     

    if total_frames < INITIAL_SKIP * 3
      puts "Not enough frames for accurate results. Please record at least #{INITIAL_SKIP * 3} frames."
      exit!
    end
    
    Ccsv.foreach(logfile) do |row|                
    
      # Stupid is fast
      i = row[0].to_i
      row[0] = i if i != 0
      i = row[1].to_i
      row[1] = i if i != 0
      
      if row[0].to_i < 0          
        # Get frame meta-information
        if MAGIC_KEYS[row[0]] == 'timestamp'
  
          # The frame has ended; process the last one            
          if frame
            population = frame['objects'].keys
            births = population - last_population
            deaths = last_population - population
            last_population = population

            # assign births
            frame['births'] = frame['objects'].slice(births).to_a # Work around a Marshal bug
            
            # assign deaths to previous frame
            if final = frames[-2]
              final['deaths'] = final['objects'].slice(deaths).to_a # Work around a Marshal bug
              obj_count = final['objects'].size
              final.delete 'objects'
              calculate!(final, frames.size - 1, total_frames, obj_count)
            end
          end
  
          # Set up a new frame
          frame = {}
          frames << frame
          frame['objects'] ||= {}
          frame['meta'] ||= {}
          
          #puts "  Frame #{frames.size} opened"
        end
        
        frame['meta'][MAGIC_KEYS[row[0]]] = row[1]
      else
        # Assign live objects
        frame['objects'][row[1]] = row[0]
      end
    end
    
    frames = frames[0..-2]       
    frames.last['objects'] = frames.last['objects'].to_a # Work around a Marshal bug x86-64
    
    # Cache the result
    File.open(cachefile, 'w') do |f|
      f.write Marshal.dump(frames)
    end
    
  end
  
  puts "\nRehashing."
  
  # Convert births back to hashes, necessary due to the Marshal workaround    
  frames.each do |frame|
    frame['births_hash'] = {}
    frame['births'].each do |key, value|
      frame['births_hash'][key] = value
    end
    frame.delete('births')
  end

#      require 'ruby-debug'; Debugger.start
#      
#      debugger
                         
  # See what objects are still laying around
  population = frames.last['objects'].reject do |key, value|
    frames.first['births_hash'][key] == value
  end

  puts "\n#{frames.size - 1} full frames. Removing #{INITIAL_SKIP} frames from each end of the run to account for\nstartup overhead and GC lag."

  # Remove border frames
  frames = frames[INITIAL_SKIP..-INITIAL_SKIP]
  
  total_births = frames.inject(0) do |births, frame|
    births + frame['births_hash'].size
  end
  total_deaths = frames.inject(0) do |deaths, frame|
    deaths + frame['deaths'].size
  end
  
  puts "\n#{total_births} total births, #{total_deaths} total deaths, #{population.size} uncollected objects."
  
  leakers = {}
  
#      debugger
  
  # Find the sources of the leftover objects in the final population
  population.each do |id, klass|
    leaker = backwards_detect(frames) do |frame|
      frame['births_hash'][id] == klass
    end
    if leaker
#          debugger
      tag = leaker['meta']['tag']
      klass = CLASS_KEYS[klass] if klass.is_a? Fixnum
      leakers[tag] ||= Hash.new(0)
      leakers[tag][klass] += 1
    end
  end      
  
  # Sort
  leakers = leakers.map do |tag, value| 
    [tag, value.sort_by do |klass, count| 
      -count
    end]
  end.sort_by do |tag, value|
    Hash[*value.flatten].values.inject(0) {|i, v| i - v}
  end
  
  if leakers.any?
    puts "\nTags sorted by persistent uncollected objects. These objects did not exist at\nstartup, were instantiated by the associated tags, and were never garbage\ncollected:"
    leakers.each do |tag, value|
      requests = frames.select do |frame|
        frame['meta']['tag'] == tag
      end.size
      puts "  #{tag} leaked (over #{requests} requests):"
      value.each do |klass, count|
        puts "    #{count} #{klass}"
      end
    end
  else
    puts "\nNo persistent uncollected objects found for any tags."
  end
        
  impacts = {}
  
  frames.each do |frame|
    impacts[frame['meta']['tag']] ||= []
    impacts[frame['meta']['tag']] << frame['meta']['impact'] * frame['meta']['ratio']
  end      
  impacts = impacts.map do |tag, values|
    [tag, values.inject(0) {|acc, i| acc + i} / values.size.to_f]
  end.sort_by do |tag, impact|
    impact.nan? ? 0 : -impact
  end
  
  puts "\nTags sorted by average impact * ratio. Impact is the log10 of the size of the"
  puts "change in object count for a frame:"
  
  impacts.each do |tag, total|
    puts "  #{format('%.4f', total).rjust(7)}: #{tag}"
  end

  puts "\nDone"

end