Class: DomainsScanner::Crawlers::Baidu
- Inherits:
-
Base
- Object
- Base
- DomainsScanner::Crawlers::Baidu
show all
- Defined in:
- lib/domains_scanner/crawlers/baidu.rb
Instance Method Summary
collapse
Methods inherited from Base
#agent, #have_next_page?, #parse_next_page_link, #search_by_form, #search_by_link, #search_keyword
Instance Method Details
#host ⇒ Object
4
5
6
|
# File 'lib/domains_scanner/crawlers/baidu.rb', line 4
def host
"https://www.baidu.com"
end
|
#keyword_field_name ⇒ Object
8
9
10
|
# File 'lib/domains_scanner/crawlers/baidu.rb', line 8
def keyword_field_name
"wd"
end
|
#next_page_link_selector ⇒ Object
32
33
34
|
# File 'lib/domains_scanner/crawlers/baidu.rb', line 32
def next_page_link_selector
"#page strong+a"
end
|
#parse_results(doc) ⇒ Object
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# File 'lib/domains_scanner/crawlers/baidu.rb', line 13
def parse_results(doc)
items = doc.search(".result")
items.map do |i|
title = i.search("h3.t > a").text
show_url = i.search("div:last-child > a.c-showurl")
url = if show_url
if show_url.text.start_with?("http")
show_url.text
else
"http://#{show_url.text}"
end
end
{ title: i.text, url: URI.encode(url) }
end
end
|