Class: Mushy::Browser

Inherits:
Flux
  • Object
show all
Defined in:
lib/mushy/fluxs/browser.rb

Direct Known Subclasses

Pdf, Screenshot

Instance Attribute Summary

Attributes inherited from Flux

#config, #id, #masher, #parent_fluxs, #subscribed_to, #type

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Flux

#convert_this_to_an_array, #convert_to_symbolized_hash, #execute, #execute_single_event, #group_these_results, #guard, inherited, #initialize, #join_these_results, #limit_these_results, #merge_these_results, #model_these_results, #outgoing_split_these_results, #shape_these, #sort_these_results, #standardize_these

Constructor Details

This class inherits a constructor from Mushy::Flux

Class Method Details

.detailsObject



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/mushy/fluxs/browser.rb', line 7

def self.details
  {
    name: 'Browser',
    description: 'Visit a page in a browser.',
    config: {
      url: {
             description: 'The URL to visit.',
             type:        'text',
             value:       'https://www.google.com',
           },
      headless: {
                  description: 'Run this browser headless.',
                  type:        'boolean',
                  shrink:      true,
                  value:       '',
                },
      timeout: {
                 description: 'The default timeout (in seconds) before closing the browser. Default is 5 seconds.',
                 type:        'integer',
                 shrink:      true,
                 value:       '',
               },
      execute: {
                 description: 'Javascript to run after the page is loaded.',
                 type:        'textarea',
                 shrink:      true,
                 value:       '',
               },
      cookies: {
                 description: 'Cookies for the web request. These can be received from a previous browser event with {{cookies}}, or can be typed manually.',
                 type:        'editgrid',
                 shrink:      true,
                 value:       [],
                 editors: [
                             { id: 'name', target: 'name', field: { type: 'text', value: '', default: '' } },
                             { id: 'value', target: 'value', field: { type: 'text', value: '', default: '' } },
                             { id: 'domain', target: 'domain', field: { type: 'text', value: '', default: '' } },
                             { id: 'path', target: 'path', field: { type: 'text', value: '', default: '' } },
                             { id: 'expires', target: 'expires', field: { type: 'text', value: '', default: '' } },
                             { id: 'size', target: 'size', field: { type: 'integer', value: 0, default: 0 } },
                             { id: 'httpOnly', target: 'httpOnly', field: { type: 'boolean', value: false, default: false } },
                             { id: 'secure', target: 'secure', field: { type: 'boolean', value: true, default: true } },
                             { id: 'sameSite', target: 'sameSite', field: { type: 'text', value: 'None', default: 'None' } },
                             { id: 'priority', target: 'priority', field: { type: 'text', value: 'Medium', default: 'Medium' } },
                          ],
               },
      carry_cookies_from: {
                 description: 'Carry the cookies from this path in the event. Defaults to "cookies".',
                 type:        'text',
                 shrink:      true,
                 value:       '',
               },
      headers: {
                 description: 'Headers for the web request. These can be received from a previous browser event with {{headers}}, or can be typed manually.',
                 type:        'keyvalue',
                 shrink:      true,
                 value:       {},
               },
      carry_headers_from: {
                 description: 'Carry the headers from this path in the event. Defaults to "headers".',
                 type:        'text',
                 shrink:      true,
                 value:       '',
               },
      wait_before_closing: {
                             description: 'Wait this many seconds before closing the browser.',
                             type:        'integer',
                             shrink:      true,
                             value:       '',
                           },
    },
  }
end

Instance Method Details

#adjust(input) ⇒ Object



115
116
117
# File 'lib/mushy/fluxs/browser.rb', line 115

def adjust input
  input[:result]
end

#get_the_cookies_from(event, config) ⇒ Object



119
120
121
122
123
124
125
126
# File 'lib/mushy/fluxs/browser.rb', line 119

def get_the_cookies_from event, config
  carry_cookies_from = config[:carry_cookies_from].to_s == '' ? 'cookies' : config[:carry_cookies_from]
  cookies = event[carry_cookies_from.to_sym]
  cookies = [] unless cookies.is_a?(Array)
  config[:cookies] = [] unless config[:cookies].is_a?(Array)
  config[:cookies].each { |x| cookies << x }
  cookies
end

#get_the_headers_from(event, config) ⇒ Object



128
129
130
131
132
133
134
135
# File 'lib/mushy/fluxs/browser.rb', line 128

def get_the_headers_from event, config
  carry_headers_from = config[:carry_headers_from].to_s == '' ? 'headers' : config[:carry_headers_from]
  headers = event[carry_headers_from.to_sym]
  headers = {} unless headers.is_a?(Hash)
  config[:headers] = {} unless config[:headers].is_a?(Hash)
  config[:headers].each { |k, v| headers[k] = v }
  headers
end

#process(event, config) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/mushy/fluxs/browser.rb', line 81

def process event, config

  timeout = config[:timeout] ? config[:timeout].to_i : 5

  browser = Ferrum::Browser.new(
    headless: (config[:headless].to_s != 'false'),
    timeout: timeout)

  get_the_cookies_from(event, config).each { |c| browser.cookies.set(c) }

  browser.headers.add get_the_headers_from(event, config)

  browser.goto config[:url]

  browser.execute(config[:execute]) if config[:execute]

  sleep(config[:wait_before_closing].to_i) if config[:wait_before_closing] && config[:wait_before_closing].to_i > 0

  result = {
    url: browser.url,
    status: browser.network.status,
    title: browser.frames[0].title,
    cookies: browser.cookies.all.map { |k, v| v.instance_variable_get('@attributes') },
    headers: browser.headers.get,
    body: browser.body
  }

  result = adjust( { browser: browser, result: result, config: config } )

  browser.quit

  result
end