Module: Traject::Macros::NokogiriMacros
- Included in:
- Indexer::NokogiriIndexer
- Defined in:
- lib/traject/macros/nokogiri_macros.rb
Instance Method Summary collapse
Instance Method Details
#default_namespaces ⇒ Object
5 6 7 8 9 10 11 |
# File 'lib/traject/macros/nokogiri_macros.rb', line 5 def default_namespaces @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns| unless ns.kind_of?(Hash) raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}" end } end |
#extract_xpath(xpath, ns: {}, to_text: true) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/traject/macros/nokogiri_macros.rb', line 13 def extract_xpath(xpath, ns: {}, to_text: true) if ns && ns.length > 0 namespaces = default_namespaces.merge(ns) else namespaces = default_namespaces end lambda do |record, accumulator| result = record.xpath(xpath, namespaces) if to_text # take all matches, for each match take all # text content, join it together separated with spaces # Make sure to avoid text content that was all blank, which is "between the children" # whitespace. result = result.collect do |n| n.xpath('.//text()').collect(&:text).tap do |arr| arr.reject! { |s| s =~ (/\A\s+\z/) } end.join(" ") end else # just put all matches in accumulator as Nokogiri::XML::Node's result = result.to_a end accumulator.concat result end end |