Class: ContentCrawler::Crawler

Inherits:
Object
  • Object
show all
Includes:
CrawlerProcess
Defined in:
lib/content_crawler.rb

Instance Method Summary collapse

Methods included from CrawlerProcess

#audio_video_collection, #check_local_dir, #collection_attr, #collection_links, #datalist_collection, #iframe_embed_collection, #mechanize_parser, #object_collection, #select_collection, #store_remote_image, #watir_web_browser

Constructor Details

#initialize(crawler, base_url, options = {:timeout=>300, :user_agent=>nil}) ⇒ Crawler

Returns a new instance of Crawler.



13
14
15
# File 'lib/content_crawler.rb', line 13

def initialize(crawler, base_url, options={:timeout=>300, :user_agent=>nil})
   super
end

Instance Method Details

#close_browserObject



60
61
62
# File 'lib/content_crawler.rb', line 60

def close_browser
    super
end

#get_audio_video_elements(xpath = nil, options = {}) ⇒ Object



48
49
50
# File 'lib/content_crawler.rb', line 48

def get_audio_video_elements(xpath=nil, options={})
    audio_video_collection(@page.xpath(xpath), options) if not xpath.nil?
end

#get_datalist_elements(xpath = nil, options = {}) ⇒ Object



56
57
58
# File 'lib/content_crawler.rb', line 56

def get_datalist_elements(xpath=nil, options={})
    datalist_collection(@page.xpath(xpath), options) if not xpath.nil?
end

#get_iframe_embed_elements(xpath = nil, options = {}) ⇒ Object



44
45
46
# File 'lib/content_crawler.rb', line 44

def get_iframe_embed_elements(xpath=nil, options={})
    iframe_embed_collection(@page.xpath(xpath), options) if not xpath.nil?
end


32
33
34
# File 'lib/content_crawler.rb', line 32

def get_link_elements(xpath=nil, options={})
    collection_links(@page.xpath(xpath), options) if not xpath.nil?
end

#get_object_elements(xpath = nil, options = {}) ⇒ Object



52
53
54
# File 'lib/content_crawler.rb', line 52

def get_object_elements(xpath=nil, options={})
    object_collection(@page.xpath(xpath), options) if not xpath.nil?
end

#get_parser_page(crawl_url = nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
# File 'lib/content_crawler.rb', line 17

def get_parser_page(crawl_url=nil)
    if (not @browser.nil? and not crawl_url.nil?)
        @browser.goto(crawl_url)
        @page = Nokogiri::HTML(@browser.html)
    elsif (not @agent.nil? and not crawl_url.nil?)
        @page = @agent.get(crawl_url).parser if not crawl_url.nil?
    else
        "Please select any one of the parser(selenium_webdriver_with_headless, selenium_webdriver_without_headless, mechanize_parser) and pass the crawl_url to crawl content"
    end
end

#get_remote_image(xpath = nil, image_store_dir = nil) ⇒ Object



36
37
38
# File 'lib/content_crawler.rb', line 36

def get_remote_image(xpath=nil, image_store_dir=nil)
    store_remote_image(@page.xpath(xpath), image_store_dir) if not xpath.nil?
end

#get_select_elements(xpath = nil, options = {}) ⇒ Object



40
41
42
# File 'lib/content_crawler.rb', line 40

def get_select_elements(xpath=nil, options={})
    select_collection(@page.xpath(xpath), options) if not xpath.nil?
end

#get_simple_text(xpath = nil) ⇒ Object



28
29
30
# File 'lib/content_crawler.rb', line 28

def get_simple_text(xpath=nil)
    @page.xpath(xpath).text.strip if not xpath.nil?
end