Class: Mushy::ParseHtml

Inherits:
Flux
  • Object
show all
Defined in:
lib/mushy/fluxs/parse_html.rb

Instance Attribute Summary

Attributes inherited from Flux

#config, #id, #masher, #parent_fluxs, #subscribed_to, #type

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Flux

#convert_this_to_an_array, #convert_to_symbolized_hash, #execute, #execute_single_event, #group_these_results, #guard, inherited, #initialize, #join_these_results, #limit_these_results, #merge_these_results, #model_these_results, #outgoing_split_these_results, #shape_these, #sort_these_results, #standardize_these

Constructor Details

This class inherits a constructor from Mushy::Flux

Class Method Details

.detailsObject



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/mushy/fluxs/parse_html.rb', line 7

def self.details
  {
    name: 'ParseHtml',
    description: 'Parses HTML.',
    config: {
      path: {
              description: 'The path to the HTML in the incoming event.',
              type:        'text',
              value:       'body',
            },
      extract: {
                 description: 'The form of the event that is meant to be pulled from this event.',
                 type: 'keyvalue',
                 value: { url: 'a|@href' },
               }
    },
  }
end

Instance Method Details

#process(event, config) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/mushy/fluxs/parse_html.rb', line 26

def process event, config

  doc = Nokogiri::HTML event[config[:path]]

  matches = config[:extract].keys.reduce( { } ) do |matches, key|
    css, value = config[:extract][key].split('|')
    value = value || './node()'

    matches[key] = doc.css(css).map { |x| x.xpath(value).to_s }
    matches
  end

  matches[matches.keys.first]
     .each_with_index
     .map { |_, i| i }
     .map do |i|
            matches.keys.reduce(SymbolizedHash.new( { } )) do |record, key|
              record[key] = matches[key][i]
              record[key] = record[key].strip if record[key]
              record
            end
          end
end