Module: BlackStack::Netting

Defined in:
lib/functions.rb

Overview


Network


Defined Under Namespace

Classes: ApiCallException

Constant Summary collapse

CALL_METHOD_GET =
'get'
CALL_METHOD_POST =
'post'
DEFAULT_SSL_VERIFY_MODE =
OpenSSL::SSL::VERIFY_NONE
SUCCESS =
'success'
@@lockfiles =
[]
@@max_api_call_channels =

0 means infinite

0

Class Method Summary collapse

Class Method Details

.add_param(url, param_name, param_value) ⇒ Object

Add a parameter to the url. It doesn’t validate if the param already exists.



736
737
738
739
740
741
742
743
744
745
746
747
748
# File 'lib/functions.rb', line 736

def self.add_param(url, param_name, param_value)
  uri = URI(url)
  params = URI.decode_www_form(uri.query || '')
  
  if (params.size==0)
    params << [param_name, param_value]
    uri.query = URI.encode_www_form(params)
    return uri.to_s
  else
    uri.query = URI.encode_www_form(params)
    return uri.to_s + "&" + param_name + "=" + param_value    
  end
end

.api_call(url, params = {}, method = BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries = 5) ⇒ Object



615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# File 'lib/functions.rb', line 615

def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5)
  nTries = 0
  bSuccess = false
  parsed = nil
  sError = ""
  while (nTries < max_retries && bSuccess == false)
    begin
      nTries = nTries + 1
      uri = URI(url)
      res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST
      res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET
      parsed = JSON.parse(res.body)
      if (parsed['status']==BlackStack::Netting::SUCCESS)
        bSuccess = true
      else
        sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}." 
      end
    rescue Errno::ECONNREFUSED => e
      sError = "Errno::ECONNREFUSED:" + e.to_console
    rescue => e2
      sError = "Exception:" + e2.to_console
    end
  end # while


  if (bSuccess==false)
    raise "#{sError}"
  end
end

.call_get(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections = true) ⇒ Object

New call_get



545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
# File 'lib/functions.rb', line 545

def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true) 
  uri = URI(url)
  uri.query = URI.encode_www_form(params)
  Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
    req = Net::HTTP::Get.new uri
    #req.body = body if !body.nil?

    res = http.request req
    case res
    when Net::HTTPSuccess then res
    when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params, false) if support_redirections
    else
      res.error!
    end
  end
end

.call_post(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections = true) ⇒ Object

Call the API and return th result. url: valid internet address params: hash of params to attach in the call ssl_verify_mode: you can disabele SSL verification here. max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels. TODO: setup max_simultaneus_calls in the configurtion file.



567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# File 'lib/functions.rb', line 567

def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true)
=begin
  # build the lockfile name
  x = 0
  if BlackStack::Netting.max_api_call_channels.to_i > 0
    raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size
    x = rand(BlackStack::Netting.max_api_call_channels)
    # lock the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile
  end
=end

  begin
#puts 

#puts "call_post:#{url}:."

    # do the call

    uri = URI(url)
    ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
      req = Net::HTTP::Post.new(uri)
      req['Content-Type'] = 'application/json'
      req.set_form_data(params)
      #req.body = body if !body.nil?

      res = http.request req
      case res 
      when Net::HTTPSuccess then res
      when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false) if support_redirections
      else
        res.error!
      end
    end
    
    # release the file

#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0

  rescue => e
    # release the file

#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0

    
    # elevo la excepcion

    raise e
  ensure
    # release the file

#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0

  end
  
  # return 

  ret
end

.change_param(url, param_name, param_value) ⇒ Object

Changes the value of a parameter in the url. It doesn’t validate if the param already exists.



751
752
753
754
755
756
757
758
# File 'lib/functions.rb', line 751

def self.change_param(url, param_name, param_value)
  uri = URI(url)
#  params = URI.decode_www_form(uri.query || [])

  params = CGI.parse(uri.query)
  params["start"] = param_value
  uri.query = URI.encode_www_form(params)
  uri.to_s
end

.download(url, to) ⇒ Object

Download a file from an url to a local folder. url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception. to: must be a valid path to a folder.



647
648
649
650
651
652
653
654
655
656
657
658
# File 'lib/functions.rb', line 647

def self.download(url, to)
  uri = URI(url)
  domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host    
  path = uri.path
  filename = path.split("/").last
  Net::HTTP.start(domain) do |http|
    resp = http.get(path)
    open(to, "wb") do |file|
      file.write(resp.body)
    end
  end
end

.file_age(filename) ⇒ Object

returns the age in days of the given file



683
684
685
# File 'lib/functions.rb', line 683

def self.file_age(filename)
  (Time.now - File.ctime(filename))/(24*3600)
end

.get_host_without_www(url) ⇒ Object

Removes the ‘www.’ from an URL.



667
668
669
670
671
# File 'lib/functions.rb', line 667

def self.get_host_without_www(url)
  url = "http://#{url}" if URI.parse(url).scheme.nil?
  host = URI.parse(url).host.downcase
  host.start_with?('www.') ? host[4..-1] : host
end

.get_redirect(url) ⇒ Object

Get the final URL if a web page is redirecting.



674
675
676
677
678
679
680
# File 'lib/functions.rb', line 674

def self.get_redirect(url)
  uri = URI.parse(url)
  protocol = uri.scheme
  host = uri.host.downcase
  res = Net::HTTP.get_response(uri)
  "#{protocol}://#{host}#{res['location']}"
end

.get_url_extension(url) ⇒ Object

Return the extension of the last path into an URL. Example: get_url_extension(“connect.data.com/sitemap_index.xml?foo_param=foo_value”) => “.xml”



662
663
664
# File 'lib/functions.rb', line 662

def self.get_url_extension(url)
  return File.extname(URI.parse(url).path.to_s)
end

.getDomainFromEmail(email) ⇒ Object



788
789
790
791
792
793
794
# File 'lib/functions.rb', line 788

def self.getDomainFromEmail(email)
  if email.email?
    return email.split("@").last
  else
    raise "getDomainFromEmail: Wrong email format."
  end
end

.getDomainFromUrl(url) ⇒ Object

get the domain from any url



772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
# File 'lib/functions.rb', line 772

def self.getDomainFromUrl(url)
  if (url !~ /^http:\/\//i && url !~ /^https:\/\//i) 
    url = "http://#{url}"
  end
  
  if (URI.parse(url).host == nil)
    raise "Cannot get domain for #{url}" 
  end

  if (url.to_s.length>0)
    return URI.parse(url).host.sub(/^www\./, '')
  else
    return nil
  end
end

.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies = false) ⇒ Object



796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
# File 'lib/functions.rb', line 796

def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false)
  a = Array.new
  c = Whois::Client.new
  r = c.lookup(domain)

  res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  # remover duplicados

  a = a.uniq

  # 

  if (allow_heuristic_to_avoid_hosting_companies==true)
    # TODO: develop this feature

  end

  return a
end

.lockfilesObject



516
517
518
# File 'lib/functions.rb', line 516

def self.lockfiles()
  @@lockfiles
end

.max_api_call_channelsObject



512
513
514
# File 'lib/functions.rb', line 512

def self.max_api_call_channels()
  @@max_api_call_channels
end

.params(url) ⇒ Object

returns a hash with the parametes in the url



723
724
725
726
727
728
729
730
731
732
733
# File 'lib/functions.rb', line 723

def self.params(url)
  # TODO: Corregir este parche:

  # => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros.

  # => Ejecutar las 2 lineas de abajo para verificar.

  # => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir"

  # => p = CGI::parse(URI.parse(url).query)

  # => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla.

  url = url.gsub("webhp#q=", "webhp?q=")

  return CGI::parse(URI.parse(url).query)
end

.set(h) ⇒ Object



520
521
522
523
524
525
526
527
528
529
# File 'lib/functions.rb', line 520

def self.set(h)
  @@max_api_call_channels = h[:max_api_call_channels]
  @@lockfiles = []
  
  i = 0
  while i<@@max_api_call_channels
    @@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w")
    i+=1
  end
end

.set_param(url, param_name, param_value) ⇒ Object

Change or add the value of a parameter in the url, depending if the parameter already exists or not.



761
762
763
764
765
766
767
768
769
# File 'lib/functions.rb', line 761

def self.set_param(url, param_name, param_value)
  params = BlackStack::Netting::params(url) 
  if ( params.has_key?(param_name) == true )
    newurl = BlackStack::Netting::change_param(url, param_name, param_value)
  else
    newurl = BlackStack::Netting::add_param(url, param_name, param_value)
  end
  return newurl
end