4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
# File 'lib/scrubyt/core/scraping/filters/detail_page_filter.rb', line 4
def evaluate(source)
if source.is_a?(String)
url = source
else
url = XPathUtils.find_nearest_node_with_attribute(source, 'href').attributes['href']
end
@parent_pattern..store_page
original_host_name = @parent_pattern..get_host_name
@parent_pattern..restore_host_name
begin
FetchAction.fetch url, :resolve => @parent_pattern.resolve
rescue
Scrubyt.log :ERROR, "Couldn't get page, probably returned 404 or 500 status code"
end
if .nil?
= .new @parent_pattern..mode, @parent_pattern.
root_results = .result
else
root_results = .
end
@parent_pattern..restore_page
@parent_pattern..store_host_name original_host_name
root_results
end
|