Class: Rspider::UrlScorer

Inherits:
Object
  • Object
show all
Defined in:
lib/rspider/UrlScorer.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeUrlScorer

Returns a new instance of UrlScorer.



9
10
# File 'lib/rspider/UrlScorer.rb', line 9

def initialize
end

Class Method Details

.score(url) ⇒ Object

caculate the score of url



12
13
14
# File 'lib/rspider/UrlScorer.rb', line 12

def UrlScorer.score(url)
	UrlScorer.scoreDomain(url) + UrlScorer.scoreExt(url) + UrlScorer.scoreDepth(url)
end

.scoreDepth(url) ⇒ Object

caculate the score of the url’s file depth



37
38
39
40
41
42
# File 'lib/rspider/UrlScorer.rb', line 37

def UrlScorer.scoreDepth(url)
	u=url.gsub("http://","")
	depth=u.split("/").length
	return 1 if depth > 4
	return 5-depth
end

.scoreDomain(url) ⇒ Object

caculate the score of domain



16
17
18
19
20
21
22
# File 'lib/rspider/UrlScorer.rb', line 16

def UrlScorer.scoreDomain(url)
	u=url.gsub(/http:\/\/([^\/]+)\/(.*)/,'\1')
	return 5 if u =~ /org$/
	return 4 if u =~ /org\.cn$/
	return 2 if u =~ /cn$/
	return 3
end

.scoreExt(url) ⇒ Object

caculate the score of the extension name of file



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/rspider/UrlScorer.rb', line 24

def UrlScorer.scoreExt(url)
	ext=File.extname(url).split("?").shift.to_s
	if (ext == ".jsp" or ext == ".asp" or ext == ".php" or ext == ".aspx" or ext == ".cgi" or ext == ".pl")
		return	2
	elsif (ext==".html" or ext == ".html" or ext == ".shtml")
		return 3
	elsif (ext == ".doc" or ext == ".ppt" or ext == ".zip" )
		return 4
	else
		return 2
	end
end