Class: WCC::Prog

Inherits:
Object
  • Object
show all
Defined in:
lib/wcc.rb

Class Method Summary collapse

Class Method Details

.checkForUpdate(site) ⇒ Object



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/wcc.rb', line 267

def self.checkForUpdate(site)
	WCC.logger.info "Requesting '#{site.uri.to_s}'"
	begin
		res = site.fetch
	rescue Timeout::Error => ex
		# don't claim on this
		return false
	rescue => ex
		WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
		return false
	end
	if not res.kind_of?(Net::HTTPOK)
		WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
		return false
	end
	
	new_content = res.body
	
	# detect encoding from http header, meta element, default utf-8
	# do not use utf-8 regex because it will fail on non utf-8 pages
	encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] || 
				new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
	
	WCC.logger.info "Encoding is '#{encoding}'"
	
	# convert to utf-8
	begin
		new_content = Iconv.conv('utf-8', encoding, new_content)
	rescue => ex
		WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
		return false
	end
	
	# strip html
	new_content = new_content.strip_html if site.strip_html?
	new_hash = Digest::MD5.hexdigest(new_content)
	
	WCC.logger.debug "Compare hashes\n  old: #{site.hash.to_s}\n  new: #{new_hash.to_s}"
	return false if new_hash == site.hash
	
	# do not try diff or anything if site was never checked before
	if site.new?
		site.hash, site.content = new_hash, new_content
		
		# set custom diff message
		diff = "Site was first checked so no diff was possible."
	else
		# save old site to tmp file
		old_site_file = Tempfile.new("wcc-#{site.id}-")
		old_site_file.write(site.content)
		old_site_file.close
		
		# calculate labels before updating
		old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
		new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
	
		site.hash, site.content = new_hash, new_content
		
		# diff between OLD and NEW
		diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
	end
	
	# HACK: there *was* an update but no notification is required
	return false if not Filter.accept(diff, site.filters)
	
	data = OpenStruct.new
	data.title = "[#{Conf[:tag]}] #{site.uri.host} changed"
	data.message = "Change at #{site.uri.to_s} - diff follows:\n\n#{diff}"
	
	Conf.mailer.send(data, @@mail_plain, MailAddress.new(Conf[:from_mail]), site.emails)
	
	system("logger -t '#{Conf[:tag]}' 'Change at #{site.uri.to_s} (tag #{site.id}) detected'") if Conf[:syslog]
	
	true
end

.run!Object

main



344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/wcc.rb', line 344

def self.run!
	# first use of Conf initializes it
	WCC.logger = Logger.new(STDOUT)
	
	mp_path = File.join(Conf[:template_dir], 'mail-plain.erb')
	mp = File.open(mp_path, 'r') { |f| f.read }
	@@mail_plain = ERB.new(mp)
	
	Conf.sites.each do |site|
		if checkForUpdate(site)
			WCC.logger.warn "#{site.uri.host.to_s} has an update!"
		else
			WCC.logger.info "#{site.uri.host.to_s} is unchanged"
		end
	end
end