Top Level Namespace
Defined Under Namespace
Modules: Socialinvestigator Classes: PageKnowledge
Instance Method Summary collapse
- #check_regex(mashed_regex, value) ⇒ Object
-
#find_domain(hostname) ⇒ Object
Look up the domain.
- #find_id_path(links, regex) ⇒ Object
- #hrefs(links, filter_shared = false) ⇒ Object
-
#matching_links(parsed, regex) ⇒ Object
Look inside the body:.
Instance Method Details
#check_regex(mashed_regex, value) ⇒ Object
426 427 428 429 430 431 432 433 434 435 436 437 438 |
# File 'lib/socialinvestigator/client/standalone_net.rb', line 426 def check_regex( mashed_regex, value ) regex,result = mashed_regex.split( /\\;/ ) md = Regexp.new( regex ).match( value ) if md if result result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") ) else true end else false end end |
#find_domain(hostname) ⇒ Object
Look up the domain
113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/socialinvestigator/client/standalone_net.rb', line 113 def find_domain( hostname ) # puts "Looking for SOA of #{hostname}" dns = Dnsruby::Resolver.new soa = dns.query( hostname, "SOA" ).answer.select do |rr| rr.is_a? Dnsruby::RR::IN::SOA end return hostname if soa.length > 0 parts = hostname.split( /\./ ) return nil if parts.length <= 2 find_domain( parts.slice(1,100).join( "." ) ) end |
#find_id_path(links, regex) ⇒ Object
307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
# File 'lib/socialinvestigator/client/standalone_net.rb', line 307 def find_id_path( links, regex ) links.collect do |link| if regex.match( link ) res = $1 || link if (res =~ /share/) nil else res end end end.select do |x| x end.uniq end |
#hrefs(links, filter_shared = false) ⇒ Object
295 296 297 298 299 300 301 302 303 304 305 |
# File 'lib/socialinvestigator/client/standalone_net.rb', line 295 def hrefs( links, filter_shared = false ) links.collect do |x| x['href'] end.select do |url| if filter_shared !(url =~ /share/) else true end end.uniq end |
#matching_links(parsed, regex) ⇒ Object
Look inside the body:
283 284 285 286 287 288 289 290 291 292 293 |
# File 'lib/socialinvestigator/client/standalone_net.rb', line 283 def matching_links( parsed, regex ) parsed.css( "a" ).collect do |x| if regex.match( x['href'] ) x else nil end end.select do |x| x end end |