Method: Wgit::DSL#crawl_site
- Defined in:
- lib/wgit/dsl.rb
#crawl_site(*urls, follow: @dsl_follow, allow_paths: nil, disallow_paths: nil) {|doc| ... } ⇒ Array<Wgit::Url>? Also known as: crawl_r
Crawls an entire site using Wgit::Crawler#crawl_site underneath. If no url is provided, then the first start URL is used.
130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/wgit/dsl.rb', line 130 def crawl_site( *urls, follow: @dsl_follow, allow_paths: nil, disallow_paths: nil, &block ) urls = (@dsl_start || []) if urls.empty? raise DSL_ERROR__NO_START_URL if urls.empty? xpath = follow || :default opts = { follow: xpath, allow_paths:, disallow_paths: } urls.reduce([]) do |externals, url| externals + get_crawler.crawl_site(Wgit::Url.parse(url), **opts, &block) end end |