Class: DomainsScanner::Crawlers::Base
- Inherits:
-
Object
- Object
- DomainsScanner::Crawlers::Base
- Defined in:
- lib/domains_scanner/crawlers/base.rb
Instance Method Summary collapse
- #agent ⇒ Object
- #have_next_page?(doc) ⇒ Boolean
- #keyword_field_name ⇒ Object
- #parse_next_page_link(doc) ⇒ Object
- #parse_results(doc) ⇒ Object
- #search_by_form(domain_name, top_level_domain) ⇒ Object
- #search_by_link(link) ⇒ Object
- #search_keyword(domain_name, top_level_domain) ⇒ Object
Instance Method Details
#agent ⇒ Object
6 7 8 9 10 |
# File 'lib/domains_scanner/crawlers/base.rb', line 6 def agent @agent ||= Mechanize.new do |agent| agent.user_agent_alias = "Mac Safari" end end |
#have_next_page?(doc) ⇒ Boolean
54 55 56 |
# File 'lib/domains_scanner/crawlers/base.rb', line 54 def have_next_page?(doc) raise NotImplementedError end |
#keyword_field_name ⇒ Object
46 47 48 |
# File 'lib/domains_scanner/crawlers/base.rb', line 46 def keyword_field_name raise NotImplementedError end |
#parse_next_page_link(doc) ⇒ Object
34 35 36 37 38 39 40 |
# File 'lib/domains_scanner/crawlers/base.rb', line 34 def parse_next_page_link(doc) next_page_tag = doc.search(next_page_link_selector).first return unless next_page_tag href = next_page_tag.attributes["href"] "#{host}#{href}" end |
#parse_results(doc) ⇒ Object
50 51 52 |
# File 'lib/domains_scanner/crawlers/base.rb', line 50 def parse_results(doc) raise NotImplementedError end |
#search_by_form(domain_name, top_level_domain) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/domains_scanner/crawlers/base.rb', line 12 def search_by_form(domain_name, top_level_domain) doc = agent.get(host) form = doc.forms.first query = search_keyword(domain_name, top_level_domain) form[keyword_field_name] = query doc = form.submit results = parse_results(doc) next_page_link = parse_next_page_link(doc) DomainsScanner::Results.new(results, next_page_link) end |
#search_by_link(link) ⇒ Object
26 27 28 29 30 31 32 |
# File 'lib/domains_scanner/crawlers/base.rb', line 26 def search_by_link(link) doc = agent.get(link) results = parse_results(doc) next_page_link = parse_next_page_link(doc) DomainsScanner::Results.new(results, next_page_link) end |
#search_keyword(domain_name, top_level_domain) ⇒ Object
42 43 44 |
# File 'lib/domains_scanner/crawlers/base.rb', line 42 def search_keyword(domain_name, top_level_domain) "site:*.#{domain_name}.#{top_level_domain}" end |