Module: Spidr::Filters
- Included in:
- Agent
- Defined in:
- lib/spidr_epg/filters.rb
Overview
Instance Attribute Summary collapse
-
#schemes ⇒ Object
List of acceptable URL schemes to follow.
Instance Method Summary collapse
-
#ignore_exts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match URI path extensions to not visit.
-
#ignore_exts_like(pattern = nil) {|ext| ... } ⇒ Object
Adds a given pattern to the #ignore_exts.
-
#ignore_hosts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match host-names to not visit.
-
#ignore_hosts_like(pattern = nil) {|host| ... } ⇒ Object
Adds a given pattern to the #ignore_hosts.
-
#ignore_links ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match links to not visit.
-
#ignore_links_like(pattern = nil) {|link| ... } ⇒ Object
Adds a given pattern to the #ignore_links.
-
#ignore_ports ⇒ Array<Integer, Regexp, Proc>
Specifies the patterns that match ports to not visit.
-
#ignore_ports_like(pattern = nil) {|port| ... } ⇒ Object
Adds a given pattern to the #ignore_ports.
-
#ignore_urls ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match URLs to not visit.
-
#ignore_urls_like(pattern = nil) {|url| ... } ⇒ Object
Adds a given pattern to the #ignore_urls.
-
#visit_exts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the URI path extensions to visit.
-
#visit_exts_like(pattern = nil) {|ext| ... } ⇒ Object
Adds a given pattern to the #visit_exts.
-
#visit_hosts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match host-names to visit.
-
#visit_hosts_like(pattern = nil) {|host| ... } ⇒ Object
Adds a given pattern to the #visit_hosts.
-
#visit_links ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the links to visit.
-
#visit_links_like(pattern = nil) {|link| ... } ⇒ Object
Adds a given pattern to the #visit_links.
-
#visit_ports ⇒ Array<Integer, Regexp, Proc>
Specifies the patterns that match the ports to visit.
-
#visit_ports_like(pattern = nil) {|port| ... } ⇒ Object
Adds a given pattern to the #visit_ports.
-
#visit_urls ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the URLs to visit.
-
#visit_urls_like(pattern = nil) {|url| ... } ⇒ Object
Adds a given pattern to the #visit_urls.
Instance Attribute Details
#schemes ⇒ Object
List of acceptable URL schemes to follow
10 11 12 |
# File 'lib/spidr_epg/filters.rb', line 10 def schemes @schemes end |
Instance Method Details
#ignore_exts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match URI path extensions to not visit.
331 332 333 |
# File 'lib/spidr_epg/filters.rb', line 331 def ignore_exts @ext_rules.reject end |
#ignore_exts_like(pattern = nil) {|ext| ... } ⇒ Object
Adds a given pattern to the #ignore_exts.
347 348 349 350 351 352 353 354 355 |
# File 'lib/spidr_epg/filters.rb', line 347 def ignore_exts_like(pattern=nil,&block) if pattern ignore_exts << pattern elsif block ignore_exts << block end return self end |
#ignore_hosts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match host-names to not visit.
63 64 65 |
# File 'lib/spidr_epg/filters.rb', line 63 def ignore_hosts @host_rules.reject end |
#ignore_hosts_like(pattern = nil) {|host| ... } ⇒ Object
Adds a given pattern to the #ignore_hosts.
79 80 81 82 83 84 85 86 87 |
# File 'lib/spidr_epg/filters.rb', line 79 def ignore_hosts_like(pattern=nil,&block) if pattern ignore_hosts << pattern elsif block ignore_hosts << block end return self end |
#ignore_links ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match links to not visit.
195 196 197 |
# File 'lib/spidr_epg/filters.rb', line 195 def ignore_links @link_rules.reject end |
#ignore_links_like(pattern = nil) {|link| ... } ⇒ Object
Adds a given pattern to the #ignore_links.
211 212 213 214 215 216 217 218 219 |
# File 'lib/spidr_epg/filters.rb', line 211 def ignore_links_like(pattern=nil,&block) if pattern ignore_links << pattern elsif block ignore_links << block end return self end |
#ignore_ports ⇒ Array<Integer, Regexp, Proc>
Specifies the patterns that match ports to not visit.
127 128 129 |
# File 'lib/spidr_epg/filters.rb', line 127 def ignore_ports @port_rules.reject end |
#ignore_ports_like(pattern = nil) {|port| ... } ⇒ Object
Adds a given pattern to the #ignore_ports.
143 144 145 146 147 148 149 150 151 |
# File 'lib/spidr_epg/filters.rb', line 143 def ignore_ports_like(pattern=nil,&block) if pattern ignore_ports << pattern elsif block ignore_ports << block end return self end |
#ignore_urls ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match URLs to not visit.
265 266 267 |
# File 'lib/spidr_epg/filters.rb', line 265 def ignore_urls @url_rules.reject end |
#ignore_urls_like(pattern = nil) {|url| ... } ⇒ Object
Adds a given pattern to the #ignore_urls.
283 284 285 286 287 288 289 290 291 |
# File 'lib/spidr_epg/filters.rb', line 283 def ignore_urls_like(pattern=nil,&block) if pattern ignore_urls << pattern elsif block ignore_urls << block end return self end |
#visit_exts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the URI path extensions to visit.
299 300 301 |
# File 'lib/spidr_epg/filters.rb', line 299 def visit_exts @ext_rules.accept end |
#visit_exts_like(pattern = nil) {|ext| ... } ⇒ Object
Adds a given pattern to the #visit_exts.
315 316 317 318 319 320 321 322 323 |
# File 'lib/spidr_epg/filters.rb', line 315 def visit_exts_like(pattern=nil,&block) if pattern visit_exts << pattern elsif block visit_exts << block end return self end |
#visit_hosts ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match host-names to visit.
31 32 33 |
# File 'lib/spidr_epg/filters.rb', line 31 def visit_hosts @host_rules.accept end |
#visit_hosts_like(pattern = nil) {|host| ... } ⇒ Object
Adds a given pattern to the #visit_hosts.
47 48 49 50 51 52 53 54 55 |
# File 'lib/spidr_epg/filters.rb', line 47 def visit_hosts_like(pattern=nil,&block) if pattern visit_hosts << pattern elsif block visit_hosts << block end return self end |
#visit_links ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the links to visit.
161 162 163 |
# File 'lib/spidr_epg/filters.rb', line 161 def visit_links @link_rules.accept end |
#visit_links_like(pattern = nil) {|link| ... } ⇒ Object
Adds a given pattern to the #visit_links
179 180 181 182 183 184 185 186 187 |
# File 'lib/spidr_epg/filters.rb', line 179 def visit_links_like(pattern=nil,&block) if pattern visit_links << pattern elsif block visit_links << block end return self end |
#visit_ports ⇒ Array<Integer, Regexp, Proc>
Specifies the patterns that match the ports to visit.
95 96 97 |
# File 'lib/spidr_epg/filters.rb', line 95 def visit_ports @port_rules.accept end |
#visit_ports_like(pattern = nil) {|port| ... } ⇒ Object
Adds a given pattern to the #visit_ports.
111 112 113 114 115 116 117 118 119 |
# File 'lib/spidr_epg/filters.rb', line 111 def visit_ports_like(pattern=nil,&block) if pattern visit_ports << pattern elsif block visit_ports << block end return self end |
#visit_urls ⇒ Array<String, Regexp, Proc>
Specifies the patterns that match the URLs to visit.
229 230 231 |
# File 'lib/spidr_epg/filters.rb', line 229 def visit_urls @url_rules.accept end |
#visit_urls_like(pattern = nil) {|url| ... } ⇒ Object
Adds a given pattern to the #visit_urls
247 248 249 250 251 252 253 254 255 |
# File 'lib/spidr_epg/filters.rb', line 247 def visit_urls_like(pattern=nil,&block) if pattern visit_urls << pattern elsif block visit_urls << block end return self end |