Method: Scrubyt::Extractor#initialize
- Defined in:
- lib/scrubyt/core/shared/extractor.rb
#initialize(mode, extractor_definition) ⇒ Extractor
Returns a new instance of Extractor.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/scrubyt/core/shared/extractor.rb', line 40 def initialize(mode, extractor_definition) @mode = mode @root_patterns = [] @next_page_pattern = nil # @hpricot_doc = nil # @hpricot_doc_url = nil @evaluating_extractor_definition = false @next_page_list = [] @processed_pages = [] backtrace = SharedUtils.get_backtrace parts = backtrace[1].split(':') source_file = parts[0] Scrubyt.log :MODE, mode == :production ? 'Production' : 'Learning' @evaluating_extractor_definition = true context = Object.new context.extend NavigationActions context.instance_eval do def extractor=(value) @extractor = value end def next_page(*args) @extractor.next_page_pattern = Scrubyt::Pattern.new('next_page', args, @extractor) end def method_missing(method_name, *args, &block) root_pattern = Scrubyt::Pattern.new(method_name.to_s, args, @extractor, nil, &block) @extractor.root_patterns << root_pattern root_pattern end end context.extractor = self context.instance_eval(&extractor_definition) @evaluating_extractor_definition = false if @root_patterns.empty? # TODO: this should be an exception Scrubyt.log :ERROR, 'No extractor defined, exiting...' exit end #Once all is set up, evaluate the extractor from the root pattern! root_results = evaluate_extractor @result = ScrubytResult.new('root') @result.push(*root_results) @result.root_patterns = @root_patterns @result.source_file = source_file @result.source_proc = extractor_definition #Return the root pattern Scrubyt.log :INFO, 'Extraction finished succesfully!' end |