Module: Basilisk::Core

Extended by:
Core
Included in:
Core
Defined in:
lib/basilisk/core.rb

Instance Method Summary collapse

Instance Method Details

#close_processors(instances) ⇒ Object



35
36
37
38
39
# File 'lib/basilisk/core.rb', line 35

def close_processors(instances)
  instances.each do |processor|
    processor.close_file
  end
end

#create(search_name, url) ⇒ Object

Create a folder for the processor results, and a default yaml config file in the current directory.



43
44
45
46
47
48
49
50
51
52
# File 'lib/basilisk/core.rb', line 43

def create(search_name, url)
  filename   = create_config_file(search_name, url, filename)
  foldername = create_results_folder(search_name)

  Basilisk::Template.output_instructions(search_name, filename, foldername)
  rescue => e
    puts "Error: Could not create config file or folder."
    puts "Please make sure that a folder of the same name doesn't already exist.\n"
    puts "(#{e})"
end

#create_config_file(search_name, url, filename) ⇒ Object



54
55
56
57
58
59
60
# File 'lib/basilisk/core.rb', line 54

def create_config_file(search_name, url, filename)
  filename = File.join(Dir.pwd, search_name + ".yml")
  file     = File.open(filename, "w")
  file.write(Basilisk::Template.default(:name => search_name, :url => url))
  file.close
  return filename
end

#create_results_folder(search_name) ⇒ Object



62
63
64
65
66
# File 'lib/basilisk/core.rb', line 62

def create_results_folder(search_name)
  foldername = File.join(Dir.pwd, search_name)
  Dir.mkdir(foldername)
  return foldername
end

#run(search_opts) ⇒ Object

Takes search options and runs the crawler with any processors.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/basilisk/core.rb', line 8

def run(search_opts)

  # We need to close the processors if user presses ctrl-c.
  trap("INT") do 
    puts "\n**Interrupt received**\n***Closing processors...\n"
    close_processors(search_opts.processor_instances)
    Process.exit
  end

  Anemone.crawl(search_opts.url, :user_agent => search_opts.user_agent, :verbose => true) do |anemone|
    anemone.skip_links_like(search_opts.skip_patterns || [])

    # At least one search processor must be specified.
    anemone.on_every_page do |page|
      search_opts.processor_instances.each do |processor|
        processor.process_page(page, anemone.pages)
      end
    end

    # Close callback on all processors.
    anemone.after_crawl do |pages|
      close_processors(search_opts.processor_instances)
    end

  end
end