Module: Traject::Macros::NokogiriMacros

Included in:
Indexer::NokogiriIndexer
Defined in:
lib/traject/macros/nokogiri_macros.rb

Instance Method Summary collapse

Instance Method Details

#default_namespacesObject



5
6
7
8
9
10
11
# File 'lib/traject/macros/nokogiri_macros.rb', line 5

def default_namespaces
  @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns|
    unless ns.kind_of?(Hash)
      raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}"
    end
  }
end

#extract_xpath(xpath, ns: {}, to_text: true) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/traject/macros/nokogiri_macros.rb', line 13

def extract_xpath(xpath, ns: {}, to_text: true)
  if ns && ns.length > 0
    namespaces = default_namespaces.merge(ns)
  else
    namespaces = default_namespaces
  end

  lambda do |record, accumulator|
    result = record.xpath(xpath, namespaces)

    if to_text
      # take all matches, for each match take all
      # text content, join it together separated with spaces
      # Make sure to avoid text content that was all blank, which is "between the children"
      # whitespace.
      result = result.collect do |n|
        n.xpath('.//text()').collect(&:text).tap do |arr|
          arr.reject! { |s| s =~ (/\A\s+\z/) }
        end.join(" ")
      end
    else
      # just put all matches in accumulator as Nokogiri::XML::Node's
      result = result.to_a
    end

    accumulator.concat result
  end
end