Class: Rspider::HDBContentStorage

Inherits:
Object
  • Object
show all
Defined in:
lib/rspider/ContentStorage.rb

Overview

this class store the content in tokyocabinet database so she can get perfect performance and uses little memory

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ HDBContentStorage

the file path to hold the HDB file



37
38
39
40
41
42
43
# File 'lib/rspider/ContentStorage.rb', line 37

def initialize(path)
  @hdb = TokyoCabinet::HDB::new
  if(!@hdb.open(path, TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT))
    ecode = @hdb.ecode
    STDERR.printf("open error: %s\n", @hdb.errmsg(ecode))
  end
end

Instance Method Details

#add(url, content) ⇒ Object

store an url and content of the url



45
46
47
# File 'lib/rspider/ContentStorage.rb', line 45

def add(url,content)
  @hdb.put(url,content)
end

#closeObject

close the db



49
50
51
# File 'lib/rspider/ContentStorage.rb', line 49

def close
  @hdb.close
end

#get(url) ⇒ Object

fetch the content of specificed url



62
63
64
# File 'lib/rspider/ContentStorage.rb', line 62

def get(url)
  return @hdb.get(url) 
end

#urlsObject

list all the urls



53
54
55
56
57
58
59
60
# File 'lib/rspider/ContentStorage.rb', line 53

def urls
  @hdb.iterinit
  keys=[]
  while(key = @hdb.iternext)
    keys<< key
  end
  keys
end