Module: Basilisk::Core
Instance Method Summary collapse
- #close_processors(instances) ⇒ Object
-
#create(search_name, url) ⇒ Object
Create a folder for the processor results, and a default yaml config file in the current directory.
- #create_config_file(search_name, url, filename) ⇒ Object
- #create_results_folder(search_name) ⇒ Object
-
#run(search_opts) ⇒ Object
Takes search options and runs the crawler with any processors.
Instance Method Details
#close_processors(instances) ⇒ Object
35 36 37 38 39 |
# File 'lib/basilisk/core.rb', line 35 def close_processors(instances) instances.each do |processor| processor.close_file end end |
#create(search_name, url) ⇒ Object
Create a folder for the processor results, and a default yaml config file in the current directory.
43 44 45 46 47 48 49 50 51 52 |
# File 'lib/basilisk/core.rb', line 43 def create(search_name, url) filename = create_config_file(search_name, url, filename) foldername = create_results_folder(search_name) Basilisk::Template.output_instructions(search_name, filename, foldername) rescue => e puts "Error: Could not create config file or folder." puts "Please make sure that a folder of the same name doesn't already exist.\n" puts "(#{e})" end |
#create_config_file(search_name, url, filename) ⇒ Object
54 55 56 57 58 59 60 |
# File 'lib/basilisk/core.rb', line 54 def create_config_file(search_name, url, filename) filename = File.join(Dir.pwd, search_name + ".yml") file = File.open(filename, "w") file.write(Basilisk::Template.default(:name => search_name, :url => url)) file.close return filename end |
#create_results_folder(search_name) ⇒ Object
62 63 64 65 66 |
# File 'lib/basilisk/core.rb', line 62 def create_results_folder(search_name) foldername = File.join(Dir.pwd, search_name) Dir.mkdir(foldername) return foldername end |
#run(search_opts) ⇒ Object
Takes search options and runs the crawler with any processors.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/basilisk/core.rb', line 8 def run(search_opts) # We need to close the processors if user presses ctrl-c. trap("INT") do puts "\n**Interrupt received**\n***Closing processors...\n" close_processors(search_opts.processor_instances) Process.exit end Anemone.crawl(search_opts.url, :user_agent => search_opts.user_agent, :verbose => true) do |anemone| anemone.skip_links_like(search_opts.skip_patterns || []) # At least one search processor must be specified. anemone.on_every_page do |page| search_opts.processor_instances.each do |processor| processor.process_page(page, anemone.pages) end end # Close callback on all processors. anemone.after_crawl do |pages| close_processors(search_opts.processor_instances) end end end |