Class: Sc::Selector

Inherits:
Object
  • Object
show all
Includes:
RDF::NodeProxy, Scrappy::Formats
Defined in:
lib/scrappy/extractor/selector.rb

Instance Method Summary collapse

Methods included from Scrappy::Formats

#format

Instance Method Details

#select(doc) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/scrappy/extractor/selector.rb', line 6

def select doc
  if sc::debug.first=="true" and Scrappy::Agent::Options.debug
    puts '== DEBUG'
    puts '== Selector:'
    puts node.serialize(:yarf, false)
    puts '== On fragment:'
    puts "URI: #{doc[:uri]}"
    puts "Content: #{doc[:content]}"
    puts "Value: #{doc[:value]}"
  end

  # Process selector
  # Filter method is defined in each subclass
  results = filter doc

  if sc::debug.first=="true" and Scrappy::Agent::Options.debug
    puts "== No results" if results.empty?
    results.each_with_index do |result, i|
      puts "== Result ##{i}:"
      puts "URI: #{result[:uri]}"
      puts "Content: #{result[:content]}"
      puts "Value: #{result[:value].inspect}"
    end
    puts
  end
  
  # Return results if no nested selectors
  return results if sc::selector.empty?

  # Process nested selectors
  results.map do |result|
    sc::selector.map { |s| graph.node(s).select result }
  end.flatten
end