Module: Spidr::Filters

Included in:
Agent
Defined in:
lib/spidr_epg/filters.rb

Overview

The Filters module adds methods to Agent for controlling which URLs the agent will visit.

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#schemesObject

List of acceptable URL schemes to follow



10
11
12
# File 'lib/spidr_epg/filters.rb', line 10

def schemes
  @schemes
end

Instance Method Details

#ignore_extsArray<String, Regexp, Proc>

Specifies the patterns that match URI path extensions to not visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The URI path extension patterns to not visit.



331
332
333
# File 'lib/spidr_epg/filters.rb', line 331

def ignore_exts
  @ext_rules.reject
end

#ignore_exts_like(pattern = nil) {|ext| ... } ⇒ Object

Adds a given pattern to the #ignore_exts.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match URI path extensions with.

Yields:

  • (ext)

    If a block is given, it will be used to filter URI path extensions.

Yield Parameters:

  • ext (String)

    A URI path extension to reject or accept.



347
348
349
350
351
352
353
354
355
# File 'lib/spidr_epg/filters.rb', line 347

def ignore_exts_like(pattern=nil,&block)
  if pattern
    ignore_exts << pattern
  elsif block
    ignore_exts << block
  end

  return self
end

#ignore_hostsArray<String, Regexp, Proc>

Specifies the patterns that match host-names to not visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The host-name patterns to not visit.



63
64
65
# File 'lib/spidr_epg/filters.rb', line 63

def ignore_hosts
  @host_rules.reject
end

#ignore_hosts_like(pattern = nil) {|host| ... } ⇒ Object

Adds a given pattern to the #ignore_hosts.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match host-names with.

Yields:

  • (host)

    If a block is given, it will be used to filter host-names.

Yield Parameters:

  • host (String)

    A host-name to reject or accept.



79
80
81
82
83
84
85
86
87
# File 'lib/spidr_epg/filters.rb', line 79

def ignore_hosts_like(pattern=nil,&block)
  if pattern
    ignore_hosts << pattern
  elsif block
    ignore_hosts << block
  end

  return self
end

Specifies the patterns that match links to not visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The link patterns to not visit.



195
196
197
# File 'lib/spidr_epg/filters.rb', line 195

def ignore_links
  @link_rules.reject
end

Adds a given pattern to the #ignore_links.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match links with.

Yields:

  • (link)

    If a block is given, it will be used to filter links.

Yield Parameters:

  • link (String)

    A link to reject or accept.



211
212
213
214
215
216
217
218
219
# File 'lib/spidr_epg/filters.rb', line 211

def ignore_links_like(pattern=nil,&block)
  if pattern
    ignore_links << pattern
  elsif block
    ignore_links << block
  end

  return self
end

#ignore_portsArray<Integer, Regexp, Proc>

Specifies the patterns that match ports to not visit.

Returns:

  • (Array<Integer, Regexp, Proc>)

    The port patterns to not visit.



127
128
129
# File 'lib/spidr_epg/filters.rb', line 127

def ignore_ports
  @port_rules.reject
end

#ignore_ports_like(pattern = nil) {|port| ... } ⇒ Object

Adds a given pattern to the #ignore_ports.

Parameters:

  • pattern (Integer, Regexp) (defaults to: nil)

    The pattern to match ports with.

Yields:

  • (port)

    If a block is given, it will be used to filter ports.

Yield Parameters:

  • port (Integer)

    A port to reject or accept.



143
144
145
146
147
148
149
150
151
# File 'lib/spidr_epg/filters.rb', line 143

def ignore_ports_like(pattern=nil,&block)
  if pattern
    ignore_ports << pattern
  elsif block
    ignore_ports << block
  end

  return self
end

#ignore_urlsArray<String, Regexp, Proc>

Specifies the patterns that match URLs to not visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The URL patterns to not visit.

Since:

  • 0.2.4



265
266
267
# File 'lib/spidr_epg/filters.rb', line 265

def ignore_urls
  @url_rules.reject
end

#ignore_urls_like(pattern = nil) {|url| ... } ⇒ Object

Adds a given pattern to the #ignore_urls.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match URLs with.

Yields:

  • (url)

    If a block is given, it will be used to filter URLs.

Yield Parameters:

  • url (URI::HTTP, URI::HTTPS)

    A URL to reject or accept.

Since:

  • 0.2.4



283
284
285
286
287
288
289
290
291
# File 'lib/spidr_epg/filters.rb', line 283

def ignore_urls_like(pattern=nil,&block)
  if pattern
    ignore_urls << pattern
  elsif block
    ignore_urls << block
  end

  return self
end

#visit_extsArray<String, Regexp, Proc>

Specifies the patterns that match the URI path extensions to visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The URI path extensions patterns to visit.



299
300
301
# File 'lib/spidr_epg/filters.rb', line 299

def visit_exts
  @ext_rules.accept
end

#visit_exts_like(pattern = nil) {|ext| ... } ⇒ Object

Adds a given pattern to the #visit_exts.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match URI path extensions with.

Yields:

  • (ext)

    If a block is given, it will be used to filter URI path extensions.

Yield Parameters:

  • ext (String)

    A URI path extension to accept or reject.



315
316
317
318
319
320
321
322
323
# File 'lib/spidr_epg/filters.rb', line 315

def visit_exts_like(pattern=nil,&block)
  if pattern
    visit_exts << pattern
  elsif block
    visit_exts << block
  end

  return self
end

#visit_hostsArray<String, Regexp, Proc>

Specifies the patterns that match host-names to visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The host-name patterns to visit.



31
32
33
# File 'lib/spidr_epg/filters.rb', line 31

def visit_hosts
  @host_rules.accept
end

#visit_hosts_like(pattern = nil) {|host| ... } ⇒ Object

Adds a given pattern to the #visit_hosts.

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match host-names with.

Yields:

  • (host)

    If a block is given, it will be used to filter host-names.

Yield Parameters:

  • host (String)

    A host-name to accept or reject.



47
48
49
50
51
52
53
54
55
# File 'lib/spidr_epg/filters.rb', line 47

def visit_hosts_like(pattern=nil,&block)
  if pattern
    visit_hosts << pattern
  elsif block
    visit_hosts << block
  end

  return self
end

Specifies the patterns that match the links to visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The link patterns to visit.

Since:

  • 0.2.4



161
162
163
# File 'lib/spidr_epg/filters.rb', line 161

def visit_links
  @link_rules.accept
end

Adds a given pattern to the #visit_links

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match link with.

Yields:

  • (link)

    If a block is given, it will be used to filter links.

Yield Parameters:

  • link (String)

    A link to accept or reject.

Since:

  • 0.2.4



179
180
181
182
183
184
185
186
187
# File 'lib/spidr_epg/filters.rb', line 179

def visit_links_like(pattern=nil,&block)
  if pattern
    visit_links << pattern
  elsif block
    visit_links << block
  end

  return self
end

#visit_portsArray<Integer, Regexp, Proc>

Specifies the patterns that match the ports to visit.

Returns:

  • (Array<Integer, Regexp, Proc>)

    The port patterns to visit.



95
96
97
# File 'lib/spidr_epg/filters.rb', line 95

def visit_ports
  @port_rules.accept
end

#visit_ports_like(pattern = nil) {|port| ... } ⇒ Object

Adds a given pattern to the #visit_ports.

Parameters:

  • pattern (Integer, Regexp) (defaults to: nil)

    The pattern to match ports with.

Yields:

  • (port)

    If a block is given, it will be used to filter ports.

Yield Parameters:

  • port (Integer)

    A port to accept or reject.



111
112
113
114
115
116
117
118
119
# File 'lib/spidr_epg/filters.rb', line 111

def visit_ports_like(pattern=nil,&block)
  if pattern
    visit_ports << pattern
  elsif block
    visit_ports << block
  end

  return self
end

#visit_urlsArray<String, Regexp, Proc>

Specifies the patterns that match the URLs to visit.

Returns:

  • (Array<String, Regexp, Proc>)

    The link patterns to visit.

Since:

  • 0.2.4



229
230
231
# File 'lib/spidr_epg/filters.rb', line 229

def visit_urls
  @url_rules.accept
end

#visit_urls_like(pattern = nil) {|url| ... } ⇒ Object

Adds a given pattern to the #visit_urls

Parameters:

  • pattern (String, Regexp) (defaults to: nil)

    The pattern to match URLs with.

Yields:

  • (url)

    If a block is given, it will be used to filter URLs.

Yield Parameters:

  • url (URI::HTTP, URI::HTTPS)

    A URL to accept or reject.

Since:

  • 0.2.4



247
248
249
250
251
252
253
254
255
# File 'lib/spidr_epg/filters.rb', line 247

def visit_urls_like(pattern=nil,&block)
  if pattern
    visit_urls << pattern
  elsif block
    visit_urls << block
  end

  return self
end