Class: VideoGrabber::Scraper
- Inherits:
-
Object
- Object
- VideoGrabber::Scraper
- Defined in:
- lib/video_grabber/scraper.rb
Instance Attribute Summary collapse
-
#browser ⇒ Object
readonly
Returns the value of attribute browser.
-
#browser_parameters ⇒ Object
readonly
Returns the value of attribute browser_parameters.
-
#browser_type ⇒ Object
readonly
Returns the value of attribute browser_type.
-
#firefox_extension_path ⇒ Object
readonly
Returns the value of attribute firefox_extension_path.
-
#headless_enabled ⇒ Object
readonly
Returns the value of attribute headless_enabled.
-
#html_attributes ⇒ Object
readonly
Returns the value of attribute html_attributes.
-
#keep_browser_open ⇒ Object
readonly
Returns the value of attribute keep_browser_open.
-
#timeout ⇒ Object
readonly
Returns the value of attribute timeout.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
- #fetch_videos ⇒ Object
-
#initialize(config) ⇒ Scraper
constructor
A new instance of Scraper.
- #start ⇒ Object
- #stop ⇒ Object
Constructor Details
#initialize(config) ⇒ Scraper
Returns a new instance of Scraper.
7 8 9 10 11 12 13 14 15 16 |
# File 'lib/video_grabber/scraper.rb', line 7 def initialize(config) @keep_browser_open = config.keep_browser_open @url = config.url @timeout = config.timeout @headless_enabled = config.headless_enabled @firefox_extension_path = config.firefox_extension_path @html_attributes = config.html_attributes @browser_type = config.browser @browser_parameters = { http_client: browser_http_client } end |
Instance Attribute Details
#browser ⇒ Object (readonly)
Returns the value of attribute browser.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def browser @browser end |
#browser_parameters ⇒ Object (readonly)
Returns the value of attribute browser_parameters.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def browser_parameters @browser_parameters end |
#browser_type ⇒ Object (readonly)
Returns the value of attribute browser_type.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def browser_type @browser_type end |
#firefox_extension_path ⇒ Object (readonly)
Returns the value of attribute firefox_extension_path.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def firefox_extension_path @firefox_extension_path end |
#headless_enabled ⇒ Object (readonly)
Returns the value of attribute headless_enabled.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def headless_enabled @headless_enabled end |
#html_attributes ⇒ Object (readonly)
Returns the value of attribute html_attributes.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def html_attributes @html_attributes end |
#keep_browser_open ⇒ Object (readonly)
Returns the value of attribute keep_browser_open.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def keep_browser_open @keep_browser_open end |
#timeout ⇒ Object (readonly)
Returns the value of attribute timeout.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def timeout @timeout end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
4 5 6 |
# File 'lib/video_grabber/scraper.rb', line 4 def url @url end |
Instance Method Details
#fetch_videos ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/video_grabber/scraper.rb', line 30 def fetch_videos links_list = [] links_list += browser.videos.map(&:html) links_list += ::Nokogiri::HTML(browser.html).xpath('//iframe').map do |iframe_node| ::Nokogiri::HTML(::CGI.unescapeHTML(iframe_node.to_s)).xpath('.//video').map{ |element| element.to_s } end.flatten links_list += begin html = ::CGI.unescapeHTML(browser.html) html = html.split('<video').map{|e| '<video ' + e if e.match('</video>')}.compact html = html.map{|e| e.split('</video>')[0..-2].join('</video>') + '</video>' } end stop unless keep_browser_open links_list = links_list.map{|element| element.split.join(" ") }.reject(&:empty?).uniq add_attributes(links_list) || links_list rescue ::Watir::Exception::Error raise ::VideoGrabber::BrowserIsClosed, 'Please restart the scraper (scraper_instance.start), or keep the browser open' end |
#start ⇒ Object
18 19 20 21 22 23 24 |
# File 'lib/video_grabber/scraper.rb', line 18 def start open_browser browser.goto(url) ; self rescue ::Net::ReadTimeout stop raise ::VideoGrabber::TimeOut end |
#stop ⇒ Object
26 27 28 |
# File 'lib/video_grabber/scraper.rb', line 26 def stop browser.close end |