Class: Rspider::MysqlContentStorage

Inherits:
Object
  • Object
show all
Defined in:
lib/rspider/ContentStorage.rb

Overview

class MysqlContentStorage store the content of urls in an msyql_db

Instance Method Summary collapse

Constructor Details

#initialize(hash, source = "default") ⇒ MysqlContentStorage

initialize the object hash must be an hash includes mysql connection information such as host,user,pass,database and so on source specific the task name

Raises:



82
83
84
85
86
# File 'lib/rspider/ContentStorage.rb', line 82

def initialize(hash,source="default")
	@my=Mysql::new(hash["host"],hash["user"],hash["pass"],hash["db"])
	raise MysqlException if @my.nil?
	@source=source
end

Instance Method Details

#add(url, content) ⇒ Object

store an url and content of the url



88
89
90
91
92
93
94
95
96
97
# File 'lib/rspider/ContentStorage.rb', line 88

def add(url,content)
	sql="INSERT INTO `htmls` (`source`,`url`,`url_crc32`,`html`,`html_crc32`,`created`,`ukey`)
VALUES ('"+@my.quote(@source)+"','"+@my.quote(url)+"','0','"+@my.quote(content)+"','0','"+Time.now.to_i.to_s+"','"+@my.quote(md5(url)+@source)+"')"
	begin 
		@my.query(sql)
	rescue Mysql::Error =>e
		return nil	
	end
	return true
end

#closeObject

close the database connection



126
127
128
# File 'lib/rspider/ContentStorage.rb', line 126

def close()
	@my.close
end

#get(url) ⇒ Object

get the content of url #url



99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rspider/ContentStorage.rb', line 99

def get(url)
	sql="select html from htmls where ukey='"+@my.quote(md5(url)+@source)+"'"
	begin 
		rs=@my.query(sql)	
		rs.each do |r|
			return	r[0]
		end
	rescue Mysql::Error => e
		return nil
	end	
end

#md5(string) ⇒ Object

get md5 hash of string



74
75
76
77
78
# File 'lib/rspider/ContentStorage.rb', line 74

def md5(string)
	t=Digest::MD5.new 
	t << string
	t.to_s
end

#urlsObject

list the urls

Returns:

  • Array



112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rspider/ContentStorage.rb', line 112

def urls()
	sql="select url from htmls where source='"+@my.quote(@source)+"'"
	begin 
		rs=@my.query(sql)	
		keys=[]
		rs.each do |r|
			keys.push(r[0])
		end
		return keys
	rescue Mysql::Error => e
		return []
	end
end