Class: Sc::XPathSelector

Inherits:
Selector show all
Defined in:
lib/scrappy/extractor/selectors/xpath.rb

Direct Known Subclasses

CssSelector

Instance Method Summary collapse

Methods inherited from Selector

#select

Methods included from Scrappy::Formats

#format

Instance Method Details

#filter(doc) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/scrappy/extractor/selectors/xpath.rb', line 3

def filter doc
  rdf::value.map do |pattern|
    interval = if sc::index.first
      (sc::index.first.to_i..sc::index.first.to_i)
    else
      (0..-1)
    end
    patterns = sc::keyword
    (doc[:content].search(pattern)[interval] || []).select { |node| patterns.any? ? patterns.include?(node.text.downcase.strip) : true }.map do |result|
      if sc::attribute.first
        # Select node's attribute if given
        sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute], :attribute=>attribute } }
      else
        # Select node
        [ { :uri=>doc[:uri], :content=>result, :value=>format(result, sc::format, doc[:uri]) } ]
      end
    end
  end.flatten
end