Method: Scrubyt::Extractor#initialize

Defined in:
lib/scrubyt/core/shared/extractor.rb

#initialize(mode, extractor_definition) ⇒ Extractor

Returns a new instance of Extractor.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/scrubyt/core/shared/extractor.rb', line 40

def initialize(mode, extractor_definition)
  @mode = mode
  @root_patterns = []
  @next_page_pattern = nil
  #      @hpricot_doc = nil
  #      @hpricot_doc_url = nil
  @evaluating_extractor_definition = false
  @next_page_list = []
  @processed_pages = []
  
  backtrace = SharedUtils.get_backtrace
  parts = backtrace[1].split(':')
  source_file = parts[0]
  
  Scrubyt.log :MODE, mode == :production ? 'Production' : 'Learning'
  
  @evaluating_extractor_definition = true
  context = Object.new
  context.extend NavigationActions
  context.instance_eval do
    def extractor=(value)
      @extractor = value
    end
    
    def next_page(*args)
      @extractor.next_page_pattern = Scrubyt::Pattern.new('next_page', args, @extractor)
    end
    
    def method_missing(method_name, *args, &block)
      root_pattern = Scrubyt::Pattern.new(method_name.to_s, args, @extractor, nil, &block)
      @extractor.root_patterns << root_pattern
      root_pattern
    end
  end
  context.extractor = self
  context.instance_eval(&extractor_definition)
  @evaluating_extractor_definition = false
  
  if @root_patterns.empty?
    # TODO: this should be an exception
    Scrubyt.log :ERROR, 'No extractor defined, exiting...'
    exit
  end
  
  #Once all is set up, evaluate the extractor from the root pattern!
  root_results = evaluate_extractor
  
  @result = ScrubytResult.new('root')
  @result.push(*root_results)
  @result.root_patterns = @root_patterns
  @result.source_file = source_file
  @result.source_proc = extractor_definition
  
  #Return the root pattern
  Scrubyt.log :INFO, 'Extraction finished succesfully!'
end