22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
# File 'lib/stats.rb', line 22
def update_statistics(content)
crawl_counter = @redis.scard("crawled").to_i
queue_counter = @redis.scard("queued").to_i
if @redis.hexists "statistics", "average_response_time"
@redis.hset("statistics", "average_response_time", (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1)))
else
@redis.hset("statistics", "average_response_time", content[:response_time].to_f)
end
@redis.hset "statistics", "maximum_response_time", content[:response_time].to_f if @redis.hget("statistics", "maximum_response_time").nil? or content[:response_time].to_f > @redis.hget("statistics", "maximum_response_time").to_f
@redis.hset "statistics", "minimum_response_time", content[:response_time].to_f if @redis.hget("statistics", "minimum_response_time").nil? or content[:response_time].to_f < @redis.hget("statistics", "minimum_response_time").to_f
if @redis.hexists "statistics", "average_length"
@redis.hset("statistics", "average_length", (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1)))
else
@redis.hset("statistics", "average_length", content[:length].to_i)
end
@redis.hset "statistics", "maximum_length", content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @redis.hget("statistics", "maximum_length").to_i
@redis.hset "statistics", "minimum_length", content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @redis.hget("statistics", "minimum_length").to_i
if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
@redis.hset "statistics", "page_count", @redis.hget("statistics", "page_count").to_i + 1
@redis.hset "statistics", "page_size", @redis.hget("statistics", "page_size").to_i + content[:length].to_i
increment_time_stat("pages_count")
else
@redis.hset "statistics", "asset_count", @redis.hget("statistics", "asset_count").to_i + 1
@redis.hset "statistics", "asset_size", @redis.hget("statistics", "asset_size").to_i + content[:length].to_i
increment_time_stat("assets_count")
end
total_redirects = @redis.hget("statistics", "total_redirects").to_i
@redis.hset "statistics", "total_redirects", 0 if total_redirects.nil?
@redis.hset("statistics", "total_redirects", total_redirects += content[:redirect_through].count) unless content[:redirect_through].nil?
@redis.hset "statistics", "crawl_counter", crawl_counter
@redis.hset "statistics", "queue_counter", queue_counter
total_length = @redis.hget("statistics", "total_length").to_i
@redis.hset "statistics", "total_length", total_length + content[:length].to_i
mime_counts = {}
if @redis.hexists "statistics", "mime_counts"
mime_counts = JSON.parse(@redis.hget("statistics", "mime_counts"))
if mime_counts.has_key? content[:mime_type]
mime_counts[content[:mime_type]] += 1
else
mime_counts[content[:mime_type]] = 1
end
else
mime_counts = {content[:mime_type] => 1}
end
@redis.hset "statistics", "mime_counts", mime_counts.to_json
if content[:mime_type].starts_with? "text"
increment_time_stat("mime_text_count")
elsif content[:mime_type].starts_with? "application"
increment_time_stat("mime_application_count")
elsif content[:mime_type].starts_with? "audio"
increment_time_stat("mime_audio_count")
elsif content[:mime_type].starts_with? "image"
increment_time_stat("mime_image_count")
elsif content[:mime_type].starts_with? "message"
increment_time_stat("mime_message_count")
elsif content[:mime_type].starts_with? "model"
increment_time_stat("mime_model_count")
elsif content[:mime_type].starts_with? "multipart"
increment_time_stat("mime_multipart_count")
elsif content[:mime_type].starts_with? "video"
increment_time_stat("mime_video_count")
end
status_counts = {}
if @redis.hexists "statistics", "status_counts"
status_counts = HashUtil.deep_symbolize_keys(JSON.parse(@redis.hget("statistics", "status_counts")))
status_code = content[:status_code].to_i.to_s.to_sym
if status_counts.has_key? status_code
status_counts[status_code] += 1
else
status_counts[status_code] = 1
end
else
status_counts = {status_code => 1}
end
if content[:status_code] >= 200 && content[:status_code] < 300
increment_time_stat("status_200_count")
elsif content[:status_code] >= 400 && content[:status_code] < 500
increment_time_stat("status|_400_count")
elsif content[:status_code] >= 500 && content[:status_code] < 600
increment_time_stat("status|_500_count")
end
@redis.hset "statistics", "status_counts", status_counts.to_json
increment_time_stat("minute_totals", "minute", 60)
get_statistics
end
|