Module: BlackStack::Netting

Defined in:
lib/functions.rb

Overview


Network


Defined Under Namespace

Classes: ApiCallException

Constant Summary collapse

CALL_METHOD_GET =
'get'
CALL_METHOD_POST =
'post'
DEFAULT_SSL_VERIFY_MODE =
OpenSSL::SSL::VERIFY_NONE
SUCCESS =
'success'
@@lockfiles =
[]
@@max_api_call_channels =

0 means infinite

0

Class Method Summary collapse

Class Method Details

.add_param(url, param_name, param_value) ⇒ Object

Add a parameter to the url. It doesn’t validate if the param already exists.



704
705
706
707
708
709
710
711
712
713
714
715
716
# File 'lib/functions.rb', line 704

def self.add_param(url, param_name, param_value)
  uri = URI(url)
  params = URI.decode_www_form(uri.query || '')
  
  if (params.size==0)
    params << [param_name, param_value]
    uri.query = URI.encode_www_form(params)
    return uri.to_s
  else
    uri.query = URI.encode_www_form(params)
    return uri.to_s + "&" + param_name + "=" + param_value    
  end
end

.api_call(url, params = {}, method = BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries = 5) ⇒ Object



583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
# File 'lib/functions.rb', line 583

def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5)
  nTries = 0
  bSuccess = false
  parsed = nil
  sError = ""
  while (nTries < max_retries && bSuccess == false)
    begin
      nTries = nTries + 1
      uri = URI(url)
      res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST
      res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET
      parsed = JSON.parse(res.body)
      if (parsed['status']==BlackStack::Netting::SUCCESS)
        bSuccess = true
      else
        sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}." 
      end
    rescue Errno::ECONNREFUSED => e
      sError = "Errno::ECONNREFUSED:" + e.to_console
    rescue => e2
      sError = "Exception:" + e2.to_console
    end
  end # while

  if (bSuccess==false)
    raise "#{sError}"
  end
end

.call_get(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE) ⇒ Object

New call_get



515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
# File 'lib/functions.rb', line 515

def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE) 
  uri = URI(url)
  uri.query = URI.encode_www_form(params)
  Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
    req = Net::HTTP::Get.new uri
    #req.body = body if !body.nil?
    res = http.request req
    case res
    when Net::HTTPSuccess then res
    when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params)
    else
      res.error!
    end
  end
end

.call_post(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, use_lockfile = true) ⇒ Object

Call the API and return th result. url: valid internet address params: hash of params to attach in the call ssl_verify_mode: you can disabele SSL verification here. max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels. TODO: setup max_simultaneus_calls in the configurtion file.



537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
# File 'lib/functions.rb', line 537

def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, use_lockfile=true)
  # build the lockfile name
  x = 0
  if BlackStack::Netting.max_api_call_channels.to_i > 0
    raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size
    x = rand(BlackStack::Netting.max_api_call_channels)
    # lock the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile
  end
        
  begin
    
    # do the call
    uri = URI(url)
    ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
      req = Net::HTTP::Post.new(uri)
      req['Content-Type'] = 'application/json'
      req.set_form_data(params)
      #req.body = body if !body.nil?
      res = http.request req
      case res 
      when Net::HTTPSuccess then res
      when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false)
      else
        res.error!
      end
    end
    
    # release the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
  rescue => e
    # release the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
    
    # elevo la excepcion
    raise e
  ensure
    # release the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
  end
  
  # return 
  ret
end

.change_param(url, param_name, param_value) ⇒ Object

Changes the value of a parameter in the url. It doesn’t validate if the param already exists.



719
720
721
722
723
724
725
726
# File 'lib/functions.rb', line 719

def self.change_param(url, param_name, param_value)
  uri = URI(url)
#  params = URI.decode_www_form(uri.query || [])
  params = CGI.parse(uri.query)
  params["start"] = param_value
  uri.query = URI.encode_www_form(params)
  uri.to_s
end

.download(url, to) ⇒ Object

Download a file from an url to a local folder. url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception. to: must be a valid path to a folder.



615
616
617
618
619
620
621
622
623
624
625
626
# File 'lib/functions.rb', line 615

def self.download(url, to)
  uri = URI(url)
  domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host    
  path = uri.path
  filename = path.split("/").last
  Net::HTTP.start(domain) do |http|
    resp = http.get(path)
    open(to, "wb") do |file|
      file.write(resp.body)
    end
  end
end

.file_age(filename) ⇒ Object

returns the age in days of the given file



651
652
653
# File 'lib/functions.rb', line 651

def self.file_age(filename)
  (Time.now - File.ctime(filename))/(24*3600)
end

.get_host_without_www(url) ⇒ Object

Removes the ‘www.’ from an URL.



635
636
637
638
639
# File 'lib/functions.rb', line 635

def self.get_host_without_www(url)
  url = "http://#{url}" if URI.parse(url).scheme.nil?
  host = URI.parse(url).host.downcase
  host.start_with?('www.') ? host[4..-1] : host
end

.get_redirect(url) ⇒ Object

Get the final URL if a web page is redirecting.



642
643
644
645
646
647
648
# File 'lib/functions.rb', line 642

def self.get_redirect(url)
  uri = URI.parse(url)
  protocol = uri.scheme
  host = uri.host.downcase
  res = Net::HTTP.get_response(uri)
  "#{protocol}://#{host}#{res['location']}"
end

.get_url_extension(url) ⇒ Object

Return the extension of the last path into an URL. Example: get_url_extension(“connect.data.com/sitemap_index.xml?foo_param=foo_value”) => “.xml”



630
631
632
# File 'lib/functions.rb', line 630

def self.get_url_extension(url)
  return File.extname(URI.parse(url).path.to_s)
end

.getDomainFromEmail(email) ⇒ Object



756
757
758
759
760
761
762
# File 'lib/functions.rb', line 756

def self.getDomainFromEmail(email)
  if email.email?
    return email.split("@").last
  else
    raise "getDomainFromEmail: Wrong email format."
  end
end

.getDomainFromUrl(url) ⇒ Object

get the domain from any url



740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
# File 'lib/functions.rb', line 740

def self.getDomainFromUrl(url)
  if (url !~ /^http:\/\//i && url !~ /^https:\/\//i) 
    url = "http://#{url}"
  end
  
  if (URI.parse(url).host == nil)
    raise "Cannot get domain for #{url}" 
  end

  if (url.to_s.length>0)
    return URI.parse(url).host.sub(/^www\./, '')
  else
    return nil
  end
end

.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies = false) ⇒ Object



764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
# File 'lib/functions.rb', line 764

def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false)
  a = Array.new
  c = Whois::Client.new
  r = c.lookup(domain)

  res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  # remover duplicados
  a = a.uniq

  # 
  if (allow_heuristic_to_avoid_hosting_companies==true)
    # TODO: develop this feature
  end

  return a
end

.lockfilesObject



486
487
488
# File 'lib/functions.rb', line 486

def self.lockfiles()
  @@lockfiles
end

.max_api_call_channelsObject



482
483
484
# File 'lib/functions.rb', line 482

def self.max_api_call_channels()
  @@max_api_call_channels
end

.params(url) ⇒ Object

returns a hash with the parametes in the url



691
692
693
694
695
696
697
698
699
700
701
# File 'lib/functions.rb', line 691

def self.params(url)
  # TODO: Corregir este parche:
  # => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros.
  # => Ejecutar las 2 lineas de abajo para verificar.
  # => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir"
  # => p = CGI::parse(URI.parse(url).query)
  # => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla.
  url = url.gsub("webhp#q=", "webhp?q=")

  return CGI::parse(URI.parse(url).query)
end

.set(h) ⇒ Object



490
491
492
493
494
495
496
497
498
499
# File 'lib/functions.rb', line 490

def self.set(h)
  @@max_api_call_channels = h[:max_api_call_channels]
  @@lockfiles = []
  
  i = 0
  while i<@@max_api_call_channels
    @@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w")
    i+=1
  end
end

.set_param(url, param_name, param_value) ⇒ Object

Change or add the value of a parameter in the url, depending if the parameter already exists or not.



729
730
731
732
733
734
735
736
737
# File 'lib/functions.rb', line 729

def self.set_param(url, param_name, param_value)
  params = BlackStack::Netting::params(url) 
  if ( params.has_key?(param_name) == true )
    newurl = BlackStack::Netting::change_param(url, param_name, param_value)
  else
    newurl = BlackStack::Netting::add_param(url, param_name, param_value)
  end
  return newurl
end