Class: Rspider::HDBContentStorage

Inherits:
Object
  • Object
show all
Defined in:
lib/rspider/ContentStorage.rb

Overview

this class store the content in tokyocabinet database so she can get perfect performance and uses little memory

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ HDBContentStorage

the file path to hold the HDB file



37
38
39
40
41
42
43
# File 'lib/rspider/ContentStorage.rb', line 37

def initialize(path)
	@hdb = TokyoCabinet::HDB::new
	if(!@hdb.open(path, TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT))
		ecode = @hdb.ecode
		STDERR.printf("open error: %s\n", @hdb.errmsg(ecode))
	end
end

Instance Method Details

#add(url, content) ⇒ Object

store an url and content of the url



45
46
47
# File 'lib/rspider/ContentStorage.rb', line 45

def add(url,content)
	@hdb.put(url,content)
end

#closeObject

close the db



49
50
51
# File 'lib/rspider/ContentStorage.rb', line 49

def close
	@hdb.close
end

#get(url) ⇒ Object

fetch the content of specificed url



62
63
64
# File 'lib/rspider/ContentStorage.rb', line 62

def get(url)
	return @hdb.get(url)	
end

#urlsObject

list all the urls



53
54
55
56
57
58
59
60
# File 'lib/rspider/ContentStorage.rb', line 53

def urls
	@hdb.iterinit
	keys=[]
	while(key = @hdb.iternext)
		keys<< key
	end
	keys
end