Class: Glib::JsonCrawler::Router

Inherits:
Object
  • Object
show all
Defined in:
lib/glib/json_crawler/router.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRouter

Returns a new instance of Router.



48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/glib/json_crawler/router.rb', line 48

def initialize
  @depth = -1
  @logger = ''
  @visitor = Glib::Json::Traversal::Visitor.new(crawler_test: true)
  @read_only_actions = Set.new
  @http_actions = Set.new
  # default rails's development host
  @host ||= 'localhost:3000'
  @page_specs = []
  @page_urls = []
  @skip_similar_page = false
end

Instance Attribute Details

#deferred_actionsObject (readonly)

Returns the value of attribute deferred_actions.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def deferred_actions
  @deferred_actions
end

#hostObject

Returns the value of attribute host.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def host
  @host
end

#http_actionsObject (readonly)

Returns the value of attribute http_actions.



8
9
10
# File 'lib/glib/json_crawler/router.rb', line 8

def http_actions
  @http_actions
end

#last_logObject (readonly)

Returns the value of attribute last_log.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def last_log
  @last_log
end

#loggerObject (readonly)

Returns the value of attribute logger.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def logger
  @logger
end

#read_only_actionsObject (readonly)

deprecated



6
7
8
# File 'lib/glib/json_crawler/router.rb', line 6

def read_only_actions
  @read_only_actions
end

#skip_similar_pageObject

Returns the value of attribute skip_similar_page.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def skip_similar_page
  @skip_similar_page
end

Instance Method Details

#_puts(text) ⇒ Object



23
24
25
# File 'lib/glib/json_crawler/router.rb', line 23

def _puts(text)
  puts '  ' * @depth + text
end

#allowed?(url) ⇒ Boolean

Returns:

  • (Boolean)


209
210
211
212
# File 'lib/glib/json_crawler/router.rb', line 209

def allowed?(url)
  regex = Regexp.new("#{host}.+(?<!\.pdf)$")
  regex.match(url)
end

#assert_target_ids_exist(args) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/glib/json_crawler/router.rb', line 27

def assert_target_ids_exist(args)
  # This saves targetId so that later we can check to make sure that it indeed exists
  # within the page.
  if (target_ids = args['targetIds'])
    target_ids.each do |target_id|
      @visitor.defer_action(nil, target_id)
    end
  elsif (target_id = args['targetId'])
    @visitor.defer_action(nil, target_id)
  end
end

#begin_page(spec, url) ⇒ Object



189
190
191
192
193
# File 'lib/glib/json_crawler/router.rb', line 189

def begin_page(spec, url)
  @page_specs << spec
  @page_urls << url
  @visitor.begin_page(spec)
end

#crawl_multiple(views, block) ⇒ Object



185
186
187
# File 'lib/glib/json_crawler/router.rb', line 185

def crawl_multiple(views, block)
  @visitor.traverse_multiple views, block
end

#end_page(spec) ⇒ Object



195
196
197
198
199
# File 'lib/glib/json_crawler/router.rb', line 195

def end_page(spec)
  @page_specs.pop
  @page_urls.pop
  @visitor.end_page(spec)
end

#follow_v2(http, crawler_actions) ⇒ Object

@depth += 1

target_actions.each do |crawler_action|
  action, url = crawler_action
  http.get(url, action, {})
end

end



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/glib/json_crawler/router.rb', line 162

def follow_v2(http, crawler_actions)
  @depth += 1
  crawler_actions.each do |crawler_action|
    action, url, params = crawler_action

    params = JSON.parse(params) if params.is_a?(String)
    params ||= {}

    case action.to_s.downcase
    when 'http/post-v1', 'forms/post'
      http.post(url, action, params)
    when 'http/patch-v1', 'forms/patch'
      http.patch(url, action, params)
    when 'http/put-v1', 'forms/put'
      http.put(url, action, params)
    when 'http/delete-v1'
      http.delete(url, action, params)
    else
      http.get(url, action, params)
    end
  end
end

#last_formObject



140
141
142
# File 'lib/glib/json_crawler/router.rb', line 140

def last_form
  @visitor.forms.last
end

#log(action, url, response = nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
# File 'lib/glib/json_crawler/router.rb', line 11

def log(action, url, response = nil)
  @last_log = [
    action,
    response.present? ? response.code : nil,
    url
  ].compact.join(' :: ')

  # puts @last_log

  @logger += '  ' * @depth + @last_log + "\n"
end

#page_specObject



201
202
203
# File 'lib/glib/json_crawler/router.rb', line 201

def page_spec
  @page_specs.last
end

#page_urlObject



205
206
207
# File 'lib/glib/json_crawler/router.rb', line 205

def page_url
  @page_urls.last
end

#process_action(http, spec) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/glib/json_crawler/router.rb', line 84

def process_action(http, spec)
  action = spec&.fetch('action')
  params = spec

  assert_target_ids_exist(params)

  if action.present?
    return if similar_page?(params)

    @depth += 1
    case action
    when 'initiate_navigation'
      # @read_only_actions.add([action, params['url']])
      http_actions.add([action, params['url']])
      JsonCrawler::NavInitiate.new(http, params, action)
    when 'runMultiple-v1', 'runMultiple'
      JsonCrawler::RunMultiple.new(http, params, action)
    when 'windows/open-v1', 'dialogs/open-v1', 'windows/reload-v1', 'windows/open',
      'dialogs/open', 'windows/reload', 'windows/openWeb', 'windows/openWeb-v1'
      if allowed?(params['url'])
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
        JsonCrawler::WindowsOpen.new(http, params, action)
      else
        self.log action, params['url']
      end
    when 'dialogs/show-v1', 'dialogs/show', 'popovers/show-v1', 'popovers/show'
      JsonCrawler::DialogsShow.new(http, params, action)
    when 'sheets/select-v1', 'sheets/select'
      JsonCrawler::Menu.new(http, params, action)
    when 'http/post-v1', 'http/post'
      JsonCrawler::ActionHttp.new(:post, http, params, action)
    when 'forms/submit-v1', 'forms/submit'
      # forms = @visitor.forms
      # JsonCrawler::FormsSubmit.new(http, params, forms.last)
      JsonCrawler::FormsSubmit.new(http, params)
    when 'dialogs/alert-v1', 'dialogs/alert'
      JsonCrawler::DialogsAlert.new(http, params, action)
    when 'dialogs/close-v1', 'dialogs/close', 'popovers/close', 'popovers/close-v1'
      JsonCrawler::DialogsClose.new(http, params, action)
    else
      unless [
        'http/delete-v1',
        'dialogs/oauth-v1',
        'http/delete',
        'dialogs/oauth'
      ].include?(action)
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
      end
      self.log action, params['url']
    end
    @depth -= 1
  end
end

#should_defer_crawl?(action_crawler, args) ⇒ Boolean

Returns:

  • (Boolean)


39
40
41
42
43
44
45
46
# File 'lib/glib/json_crawler/router.rb', line 39

def should_defer_crawl?(action_crawler, args)
  if (target_id = args['targetId'])
    @visitor.defer_action(action_crawler, target_id)
    return true
  end

  false
end

#step(http, args) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/glib/json_crawler/router.rb', line 61

def step(http, args)
  # TODO: Refactor
  case args['view']
  when 'fields/submit-v1', 'fields/submit'
    @depth += 1
    # forms = @visitor.forms
    # JsonCrawler::FormsSubmit.new(http, args, forms.last)
    JsonCrawler::FormsSubmit.new(http, args)
    @depth -= 1
    return
  end

  if args.is_a?(Hash) && args['rel'] != 'nofollow'
    on_click = args.fetch('onClick', nil)

    if on_click && !args['disabled']
      process_action(http, on_click)
    end
  end

  # @read_only_actions.replace(@read_only_actions.sort_by { |e| e[1].to_s })
end