Method: Wgit::Crawler#browser_get

Defined in:
lib/wgit/crawler.rb

#browser_get(url) ⇒ Ferrum::Browser (protected)

Performs a HTTP GET request in a web browser allowing the response JS to execute before returning the HTML body of the fully rendered webpage. This allows Javascript (SPA apps etc.) to generate HTML dynamically. See https://github.com/rubycdp/ferrum for more info.



403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/wgit/crawler.rb', line 403

def browser_get(url)
  @browser ||= Ferrum::Browser.new(**@ferrum_opts)

  # Navigate to the url and start parsing the JS on the page.
  @browser.goto(url)

  # Wait for the page's JS to finish dynamically manipulating the DOM.
  html = @browser.body
  loop do
    sleep @parse_javascript_delay
    break if html.size == @browser.body.size

    html = @browser.body
  end

  @browser
end