Class: DomainsScanner::Crawlers::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/domains_scanner/crawlers/base.rb

Direct Known Subclasses

Baidu, Google

Instance Method Summary collapse

Instance Method Details

#agentObject



6
7
8
9
10
# File 'lib/domains_scanner/crawlers/base.rb', line 6

def agent
  @agent ||= Mechanize.new do |agent|
    agent.user_agent_alias = "Mac Safari"
  end
end

#have_next_page?(doc) ⇒ Boolean

Returns:

  • (Boolean)

Raises:

  • (NotImplementedError)


54
55
56
# File 'lib/domains_scanner/crawlers/base.rb', line 54

def have_next_page?(doc)
  raise NotImplementedError
end

#keyword_field_nameObject

Raises:

  • (NotImplementedError)


46
47
48
# File 'lib/domains_scanner/crawlers/base.rb', line 46

def keyword_field_name
  raise NotImplementedError
end


34
35
36
37
38
39
40
# File 'lib/domains_scanner/crawlers/base.rb', line 34

def parse_next_page_link(doc)
  next_page_tag = doc.search(next_page_link_selector).first
  return unless next_page_tag

  href = next_page_tag.attributes["href"]
  "#{host}#{href}"
end

#parse_results(doc) ⇒ Object

Raises:

  • (NotImplementedError)


50
51
52
# File 'lib/domains_scanner/crawlers/base.rb', line 50

def parse_results(doc)
  raise NotImplementedError
end

#search_by_form(domain_name, top_level_domain) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/domains_scanner/crawlers/base.rb', line 12

def search_by_form(domain_name, top_level_domain)
  doc = agent.get(host)

  form = doc.forms.first
  query = search_keyword(domain_name, top_level_domain)
  form[keyword_field_name] = query
  doc = form.submit

  results = parse_results(doc)
  next_page_link = parse_next_page_link(doc)

  DomainsScanner::Results.new(results, next_page_link)
end


26
27
28
29
30
31
32
# File 'lib/domains_scanner/crawlers/base.rb', line 26

def search_by_link(link)
  doc = agent.get(link)
  results = parse_results(doc)
  next_page_link = parse_next_page_link(doc)

  DomainsScanner::Results.new(results, next_page_link)
end

#search_keyword(domain_name, top_level_domain) ⇒ Object



42
43
44
# File 'lib/domains_scanner/crawlers/base.rb', line 42

def search_keyword(domain_name, top_level_domain)
  "site:*.#{domain_name}.#{top_level_domain}"
end