Top Level Namespace

Defined Under Namespace

Modules: Socialinvestigator Classes: PageKnowledge

Instance Method Summary collapse

Instance Method Details

#check_regex(mashed_regex, value) ⇒ Object



426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/socialinvestigator/client/standalone_net.rb', line 426

def check_regex( mashed_regex, value )
  regex,result = mashed_regex.split( /\\;/ )
  md = Regexp.new( regex ).match( value )
  if md
    if result
      result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
    else
      true
    end
  else
    false
  end
end

#find_domain(hostname) ⇒ Object

Look up the domain



113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/socialinvestigator/client/standalone_net.rb', line 113

def find_domain( hostname )
  # puts "Looking for SOA of #{hostname}"
  dns = Dnsruby::Resolver.new
  soa = dns.query( hostname, "SOA" ).answer.select do |rr|
    rr.is_a? Dnsruby::RR::IN::SOA
  end

  return hostname if soa.length > 0

  parts = hostname.split( /\./ )
  return nil if parts.length <= 2

  find_domain( parts.slice(1,100).join( "." ) )
end

#find_id_path(links, regex) ⇒ Object



307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/socialinvestigator/client/standalone_net.rb', line 307

def find_id_path( links, regex )
  links.collect do |link|
    if regex.match( link )
      res = $1 || link
      if (res =~ /share/)
        nil
      else
        res
      end
    end
  end.select do |x|
    x
  end.uniq
end

#hrefs(links, filter_shared = false) ⇒ Object



295
296
297
298
299
300
301
302
303
304
305
# File 'lib/socialinvestigator/client/standalone_net.rb', line 295

def hrefs( links, filter_shared = false )
  links.collect do |x|
    x['href']
  end.select do |url|
    if filter_shared
      !(url =~ /share/)
    else
      true
    end
  end.uniq
end

Look inside the body:



283
284
285
286
287
288
289
290
291
292
293
# File 'lib/socialinvestigator/client/standalone_net.rb', line 283

def matching_links( parsed, regex )
  parsed.css( "a" ).collect do |x|
    if regex.match( x['href'] )
      x
    else
      nil
    end
  end.select do |x|
    x
  end
end