Module: Scrappy::Optimizer

Included in:
Agent
Defined in:
lib/scrappy/learning/optimizer.rb

Instance Method Summary collapse

Instance Method Details

#optimize_patterns(kb, sample) ⇒ Object

Iterates through a knowledge base and tries to merge and generalize selectors whenever the output of the resulting kb is the same



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/scrappy/learning/optimizer.rb', line 5

def optimize_patterns kb, sample
  # Build an array of fragments
  root_fragments = kb.find(nil, Node('rdf:type'), Node('sc:Fragment')) - kb.find([], Node('sc:subfragment'), nil)
  fragments = []; root_fragments.each { |f| fragments << kb.node(Node(f.id, RDF::Graph.new(f.all_triples))) }

  # Parse the document
  doc = { :uri=>sample[:uri], :content=>Nokogiri::HTML(sample[:html], nil, 'utf-8') }

  # Optimize the fragment
  fragments = optimize fragments, :docs=>[doc]
  
  graph = RDF::Graph.new
  fragments.each { |fragment| graph << fragment }
  
  graph
end