Class: DomainsScanner::Crawlers::Google

Inherits:
Base
  • Object
show all
Defined in:
lib/domains_scanner/crawlers/google.rb

Instance Method Summary collapse

Methods inherited from Base

#agent, #have_next_page?, #parse_next_page_link, #search_by_form, #search_by_link, #search_keyword

Instance Method Details

#hostObject



4
5
6
# File 'lib/domains_scanner/crawlers/google.rb', line 4

def host
  "https://google.com"
end

#keyword_field_nameObject



8
9
10
# File 'lib/domains_scanner/crawlers/google.rb', line 8

def keyword_field_name
  "q"
end


27
28
29
# File 'lib/domains_scanner/crawlers/google.rb', line 27

def next_page_link_selector
  "div#foot .cur+td>a"
end

#parse_results(doc) ⇒ Object

“xxx”, url: “xxx”, …


13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/domains_scanner/crawlers/google.rb', line 13

def parse_results(doc)
  items = doc.search(".g h3.r a")
  items.map do |i|
    title = i.text
    href = i.attributes["href"] && i.attributes["href"].value
    # https://bbs.abc.net/thread-144889-1-1.html&sa=U&ved=0ahUKEwjpmNT0ltnXAhXMxLwKHQJIAmE4ChAWCBQwAA&usg=AOvVaw31kkGPP7ZVlFGlAby9OkzE
    url = if href
      href.sub("/url?q=", "")
    end

    { title: i.text, url: url }
  end
end