Class: Mushy::Browser

Inherits:
Flux
  • Object
show all
Defined in:
lib/mushy/fluxs/browser.rb

Direct Known Subclasses

Pdf, Screenshot

Instance Attribute Summary

Attributes inherited from Flux

#config, #flow, #id, #masher, #parent_fluxs, #subscribed_to, #type

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Flux

#convert_this_to_an_array, #convert_to_symbolized_hash, #execute, #execute_single_event, #group_these_results, #guard, #ignore_these_results, inherited, #initialize, #join_these_results, #limit_these_results, #merge_these_results, #model_these_results, #outgoing_split_these_results, #shape_these, #sort_these_results, #standardize_these

Constructor Details

This class inherits a constructor from Mushy::Flux

Class Method Details

.detailsObject



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/mushy/fluxs/browser.rb', line 7

def self.details
  {
    name: 'Browser',
    description: 'Visit a page in a browser.',
    config: {
      url: {
             description: 'The URL to visit.',
             type:        'text',
             value:       'https://www.google.com',
           },
      headless: {
                  description: 'Run this browser headless.',
                  type:        'boolean',
                  shrink:      true,
                  value:       '',
                },
      timeout: {
                 description: 'The default timeout (in seconds) before closing the browser. Default is 5 seconds.',
                 type:        'integer',
                 shrink:      true,
                 value:       '',
               },
      execute: {
                 description: 'Javascript to run after the page is loaded.',
                 type:        'textarea',
                 shrink:      true,
                 value:       '',
               },
      cookies: {
                 description: 'Cookies for the web request. These can be received from a previous browser event with {{cookies}}, or can be typed manually.',
                 type:        'editgrid',
                 shrink:      true,
                 value:       [],
                 editors: [
                             { id: 'name', target: 'name', field: { type: 'text', value: '', default: '' } },
                             { id: 'value', target: 'value', field: { type: 'text', value: '', default: '' } },
                             { id: 'domain', target: 'domain', field: { type: 'text', value: '', default: '' } },
                             { id: 'path', target: 'path', field: { type: 'text', value: '', default: '' } },
                             { id: 'expires', target: 'expires', field: { type: 'text', value: '', default: '' } },
                             { id: 'size', target: 'size', field: { type: 'integer', value: 0, default: 0 } },
                             { id: 'httpOnly', target: 'httpOnly', field: { type: 'boolean', value: false, default: false } },
                             { id: 'secure', target: 'secure', field: { type: 'boolean', value: true, default: true } },
                             { id: 'sameSite', target: 'sameSite', field: { type: 'text', value: 'None', default: 'None' } },
                             { id: 'priority', target: 'priority', field: { type: 'text', value: 'Medium', default: 'Medium' } },
                          ],
               },
      carry_cookies_from: {
                 description: 'Carry the cookies from this path in the event. Defaults to "cookies".',
                 type:        'text',
                 shrink:      true,
                 value:       '',
               },
      headers: {
                 description: 'Headers for the web request. These can be received from a previous browser event with {{headers}}, or can be typed manually.',
                 type:        'keyvalue',
                 shrink:      true,
                 value:       {},
               },
      carry_headers_from: {
                 description: 'Carry the headers from this path in the event. Defaults to "headers".',
                 type:        'text',
                 shrink:      true,
                 value:       '',
               },
      wait_before_closing: {
                             description: 'Wait this many seconds before closing the browser.',
                             type:        'integer',
                             shrink:      true,
                             value:       '',
                           },
    },
  }
end

Instance Method Details

#adjust(input) ⇒ Object



118
119
120
# File 'lib/mushy/fluxs/browser.rb', line 118

def adjust input
  input[:result]
end

#get_the_cookies_from(event, config) ⇒ Object



122
123
124
125
126
127
128
129
# File 'lib/mushy/fluxs/browser.rb', line 122

def get_the_cookies_from event, config
  carry_cookies_from = config[:carry_cookies_from].to_s == '' ? 'cookies' : config[:carry_cookies_from]
  cookies = event[carry_cookies_from.to_sym]
  cookies = [] unless cookies.is_a?(Array)
  config[:cookies] = [] unless config[:cookies].is_a?(Array)
  config[:cookies].each { |x| cookies << x }
  cookies
end

#get_the_headers_from(event, config) ⇒ Object



131
132
133
134
135
136
137
138
# File 'lib/mushy/fluxs/browser.rb', line 131

def get_the_headers_from event, config
  carry_headers_from = config[:carry_headers_from].to_s == '' ? 'headers' : config[:carry_headers_from]
  headers = event[carry_headers_from.to_sym]
  headers = {} unless headers.is_a?(Hash)
  config[:headers] = {} unless config[:headers].is_a?(Hash)
  config[:headers].each { |k, v| headers[k] = v }
  headers
end

#process(event, config) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/mushy/fluxs/browser.rb', line 81

def process event, config

  timeout = config[:timeout] ? config[:timeout].to_i : 5

  browser = Ferrum::Browser.new(
    headless: (config[:headless].to_s != 'false'),
    timeout: timeout)

  get_the_cookies_from(event, config).each { |c| browser.cookies.set(c) }

  browser.headers.add get_the_headers_from(event, config)

  the_start = Time.now
  browser.goto config[:url]
  time = Time.now - the_start

  browser.execute(config[:execute]) if config[:execute]

  sleep(config[:wait_before_closing].to_i) if config[:wait_before_closing] && config[:wait_before_closing].to_i > 0

  result = {
    url: browser.url,
    status: browser.network.status,
    title: browser.frames[0].title,
    cookies: browser.cookies.all.map { |k, v| v.instance_variable_get('@attributes') },
    headers: browser.headers.get,
    time: time,
    body: browser.body
  }

  result = adjust( { browser: browser, result: result, config: config } )

  browser.quit

  result
end