Class: Glib::JsonCrawler::Router

Inherits:
Object
  • Object
show all
Defined in:
lib/glib/json_crawler/router.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRouter

Returns a new instance of Router.



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/glib/json_crawler/router.rb', line 54

def initialize
  @depth = -1
  @logger = ''
  @visitor = Glib::Json::Traversal::Visitor.new(crawler_test: true)
  @read_only_actions = Set.new
  @http_actions = Set.new
  # default rails's development host
  @host ||= 'localhost:3000'
  @page_specs = []
  @page_urls = []
  @skip_similar_page = false
end

Instance Attribute Details

#deferred_actionsObject (readonly)

Returns the value of attribute deferred_actions.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def deferred_actions
  @deferred_actions
end

#hostObject

Returns the value of attribute host.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def host
  @host
end

#http_actionsObject (readonly)

Returns the value of attribute http_actions.



8
9
10
# File 'lib/glib/json_crawler/router.rb', line 8

def http_actions
  @http_actions
end

#last_logObject (readonly)

Returns the value of attribute last_log.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def last_log
  @last_log
end

#loggerObject (readonly)

Returns the value of attribute logger.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def logger
  @logger
end

#read_only_actionsObject (readonly)

deprecated



6
7
8
# File 'lib/glib/json_crawler/router.rb', line 6

def read_only_actions
  @read_only_actions
end

#skip_similar_pageObject

Returns the value of attribute skip_similar_page.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def skip_similar_page
  @skip_similar_page
end

Instance Method Details

#_puts(text) ⇒ Object



29
30
31
# File 'lib/glib/json_crawler/router.rb', line 29

def _puts(text)
  puts '  ' * @depth + text
end

#allowed?(url) ⇒ Boolean

Returns:

  • (Boolean)


224
225
226
227
# File 'lib/glib/json_crawler/router.rb', line 224

def allowed?(url)
  regex = Regexp.new("#{host}.+(?<!\.pdf)$")
  regex.match(url)
end

#assert_target_ids_exist(args) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/glib/json_crawler/router.rb', line 33

def assert_target_ids_exist(args)
  # This saves targetId so that later we can check to make sure that it indeed exists
  # within the page.
  if (target_ids = args['targetIds'])
    target_ids.each do |target_id|
      @visitor.defer_action(nil, target_id)
    end
  elsif (target_id = args['targetId'])
    @visitor.defer_action(nil, target_id)
  end
end

#begin_page(spec, url) ⇒ Object



204
205
206
207
208
# File 'lib/glib/json_crawler/router.rb', line 204

def begin_page(spec, url)
  @page_specs << spec
  @page_urls << url
  @visitor.begin_page(spec)
end

#crawl_multiple(views, block) ⇒ Object



200
201
202
# File 'lib/glib/json_crawler/router.rb', line 200

def crawl_multiple(views, block)
  @visitor.traverse_multiple views, block
end

#end_page(spec) ⇒ Object



210
211
212
213
214
# File 'lib/glib/json_crawler/router.rb', line 210

def end_page(spec)
  @page_specs.pop
  @page_urls.pop
  @visitor.end_page(spec)
end

#follow_v2(http, crawler_actions) ⇒ Object

@depth += 1

target_actions.each do |crawler_action|
  action, url = crawler_action
  http.get(url, action, {})
end

end



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/glib/json_crawler/router.rb', line 170

def follow_v2(http, crawler_actions)
  @depth += 1
  crawler_actions.each do |crawler_action|
    action, url, params = crawler_action

    # In full mode, wrap each action in a transaction that gets rolled back
    # to ensure database state is reset between each URL check (prevent database contamination)
    if ENV['GLIB_DISABLE_PERMISSION_TEST_SKIP'] == 'true'
      # This solution is important for permissions tests (not as much in the crawler tests),
      # because in permission tests, the user hits every single available URLs with a single purpose
      # of checking the permission of every URL, meaning that one incorrect result (e.g. 403 instead of 200 due to
      # side effect from previous URL requests) cannot be tolerated.
      #
      # On the other hand, crawler tests are expected to cover only one scenario anyway, so
      # having the scenario changed (due to side effects) is fine. We decided it's better not
      # to apply this solution for crawler tests out of performance considerations.
      ActiveRecord::Base.transaction do
        execute_crawler_action(http, action, url, params)
        raise ActiveRecord::Rollback
      end
    else
      # In skip mode, add the permission test parameter
      if url.present?
        url = add_params(url, __glib_permission_test: true)
      end
      execute_crawler_action(http, action, url, params)
    end
  end
end

#last_formObject



148
149
150
# File 'lib/glib/json_crawler/router.rb', line 148

def last_form
  @visitor.forms.last
end

#log(action, key_data, response = nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/glib/json_crawler/router.rb', line 11

def log(action, key_data, response = nil)
  # Sometimes `key_data` may not be an actual URL, e.g. in the context of dialogs_alert,
  # it is the alert message.
  if key_data&.start_with?('http://', 'https://')
    key_data = remove_params(key_data, [:__glib_permission_test])
  end

  @last_log = [
    action,
    response.present? ? response.code : nil,
    key_data
  ].compact.join(
    ' :: '
  )

  @logger += '  ' * @depth + @last_log + "\n"
end

#page_specObject



216
217
218
# File 'lib/glib/json_crawler/router.rb', line 216

def page_spec
  @page_specs.last
end

#page_urlObject



220
221
222
# File 'lib/glib/json_crawler/router.rb', line 220

def page_url
  @page_urls.last
end

#process_action(http, spec) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/glib/json_crawler/router.rb', line 90

def process_action(http, spec)
  action = spec&.fetch('action')
  params = spec

  assert_target_ids_exist(params)

  if action.present?
    return if similar_page?(params)

    @depth += 1
    case action
    when 'initiate_navigation'
      # @read_only_actions.add([action, params['url']])
      http_actions.add([action, params['url']])
      JsonCrawler::NavInitiate.new(http, params, action)
    when 'runMultiple-v1', 'runMultiple'
      JsonCrawler::RunMultiple.new(http, params, action)
    when 'windows/open-v1', 'dialogs/open-v1', 'windows/reload-v1', 'windows/open',
      'dialogs/open', 'windows/reload', 'windows/openWeb', 'windows/openWeb-v1'
      if allowed?(params['url'])
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
        JsonCrawler::WindowsOpen.new(http, params, action)
      else
        self.log action, params['url']
      end
    when 'dialogs/show-v1', 'dialogs/show', 'popovers/show-v1', 'popovers/show'
      JsonCrawler::DialogsShow.new(http, params, action)
    when 'sheets/select-v1', 'sheets/select'
      JsonCrawler::Menu.new(http, params, action)
    when 'http/post-v1', 'http/post'
      JsonCrawler::ActionHttp.new(:post, http, params, action)
    when 'forms/submit-v1', 'forms/submit'
      # forms = @visitor.forms
      # JsonCrawler::FormsSubmit.new(http, params, forms.last)
      JsonCrawler::FormsSubmit.new(http, params)
    when 'dialogs/alert-v1', 'dialogs/alert'
      JsonCrawler::DialogsAlert.new(http, params, action)
    when 'dialogs/close-v1', 'dialogs/close', 'popovers/close', 'popovers/close-v1'
      JsonCrawler::DialogsClose.new(http, params, action)
    else
      unless [
        'http/delete-v1',
        'dialogs/oauth-v1',
        'http/delete',
        'dialogs/oauth'
      ].include?(
        action
             )
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
      end
      self.log action, params['url']
    end
    @depth -= 1
  end
end

#should_defer_crawl?(action_crawler, args) ⇒ Boolean

Returns:

  • (Boolean)


45
46
47
48
49
50
51
52
# File 'lib/glib/json_crawler/router.rb', line 45

def should_defer_crawl?(action_crawler, args)
  if (target_id = args['targetId'])
    @visitor.defer_action(action_crawler, target_id)
    return true
  end

  false
end

#step(http, args) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/glib/json_crawler/router.rb', line 67

def step(http, args)
  # TODO: Refactor
  case args['view']
  when 'fields/submit-v1', 'fields/submit'
    @depth += 1
    # forms = @visitor.forms
    # JsonCrawler::FormsSubmit.new(http, args, forms.last)
    JsonCrawler::FormsSubmit.new(http, args)
    @depth -= 1
    return
  end

  if args.is_a?(Hash) && args['rel'] != 'nofollow'
    on_click = args.fetch('onClick', nil)

    if on_click && !args['disabled']
      process_action(http, on_click)
    end
  end

  # @read_only_actions.replace(@read_only_actions.sort_by { |e| e[1].to_s })
end