Module: BlackStack::Netting

Defined in:
lib/functions.rb

Overview


Network


Defined Under Namespace

Classes: ApiCallException

Constant Summary collapse

CALL_METHOD_GET =
'get'
CALL_METHOD_POST =
'post'
DEFAULT_SSL_VERIFY_MODE =
OpenSSL::SSL::VERIFY_NONE
SUCCESS =
'success'
@@lockfiles =
[]
@@max_api_call_channels =

0 means infinite

0

Class Method Summary collapse

Class Method Details

.add_param(url, param_name, param_value) ⇒ Object

Add a parameter to the url. It doesn’t validate if the param already exists.



706
707
708
709
710
711
712
713
714
715
716
717
718
# File 'lib/functions.rb', line 706

def self.add_param(url, param_name, param_value)
  uri = URI(url)
  params = URI.decode_www_form(uri.query || '')
  
  if (params.size==0)
    params << [param_name, param_value]
    uri.query = URI.encode_www_form(params)
    return uri.to_s
  else
    uri.query = URI.encode_www_form(params)
    return uri.to_s + "&" + param_name + "=" + param_value    
  end
end

.api_call(url, params = {}, method = BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries = 5) ⇒ Object



585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# File 'lib/functions.rb', line 585

def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5)
  nTries = 0
  bSuccess = false
  parsed = nil
  sError = ""
  while (nTries < max_retries && bSuccess == false)
    begin
      nTries = nTries + 1
      uri = URI(url)
      res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST
      res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET
      parsed = JSON.parse(res.body)
      if (parsed['status']==BlackStack::Netting::SUCCESS)
        bSuccess = true
      else
        sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}." 
      end
    rescue Errno::ECONNREFUSED => e
      sError = "Errno::ECONNREFUSED:" + e.to_console
    rescue => e2
      sError = "Exception:" + e2.to_console
    end
  end # while

  if (bSuccess==false)
    raise "#{sError}"
  end
end

.call_get(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE) ⇒ Object

New call_get



515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
# File 'lib/functions.rb', line 515

def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE) 
  uri = URI(url)
  uri.query = URI.encode_www_form(params)
  Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
    req = Net::HTTP::Get.new uri
    #req.body = body if !body.nil?
    res = http.request req
    case res
    when Net::HTTPSuccess then res
    when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params)
    else
      res.error!
    end
  end
end

.call_post(url, params = {}, ssl_verify_mode = BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, use_lockfile = true) ⇒ Object

Call the API and return th result. url: valid internet address params: hash of params to attach in the call ssl_verify_mode: you can disabele SSL verification here. max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels. TODO: setup max_simultaneus_calls in the configurtion file.



537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
# File 'lib/functions.rb', line 537

def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, use_lockfile=true)
=begin
  # build the lockfile name
  x = 0
  if BlackStack::Netting.max_api_call_channels.to_i > 0
    raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size
    x = rand(BlackStack::Netting.max_api_call_channels)
    # lock the file
    BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile
  end
=end
  begin
puts 
puts "call_post:#{url}:."
    # do the call
    uri = URI(url)
    ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
      req = Net::HTTP::Post.new(uri)
      req['Content-Type'] = 'application/json'
      req.set_form_data(params)
      #req.body = body if !body.nil?
      res = http.request req
      case res 
      when Net::HTTPSuccess then res
      when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false) if use_lockfile
      else
        res.error!
      end
    end
    
    # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
  rescue => e
    # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
    
    # elevo la excepcion
    raise e
  ensure
    # release the file
#        BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
  end
  
  # return 
  ret
end

.change_param(url, param_name, param_value) ⇒ Object

Changes the value of a parameter in the url. It doesn’t validate if the param already exists.



721
722
723
724
725
726
727
728
# File 'lib/functions.rb', line 721

def self.change_param(url, param_name, param_value)
  uri = URI(url)
#  params = URI.decode_www_form(uri.query || [])
  params = CGI.parse(uri.query)
  params["start"] = param_value
  uri.query = URI.encode_www_form(params)
  uri.to_s
end

.download(url, to) ⇒ Object

Download a file from an url to a local folder. url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception. to: must be a valid path to a folder.



617
618
619
620
621
622
623
624
625
626
627
628
# File 'lib/functions.rb', line 617

def self.download(url, to)
  uri = URI(url)
  domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host    
  path = uri.path
  filename = path.split("/").last
  Net::HTTP.start(domain) do |http|
    resp = http.get(path)
    open(to, "wb") do |file|
      file.write(resp.body)
    end
  end
end

.file_age(filename) ⇒ Object

returns the age in days of the given file



653
654
655
# File 'lib/functions.rb', line 653

def self.file_age(filename)
  (Time.now - File.ctime(filename))/(24*3600)
end

.get_host_without_www(url) ⇒ Object

Removes the ‘www.’ from an URL.



637
638
639
640
641
# File 'lib/functions.rb', line 637

def self.get_host_without_www(url)
  url = "http://#{url}" if URI.parse(url).scheme.nil?
  host = URI.parse(url).host.downcase
  host.start_with?('www.') ? host[4..-1] : host
end

.get_redirect(url) ⇒ Object

Get the final URL if a web page is redirecting.



644
645
646
647
648
649
650
# File 'lib/functions.rb', line 644

def self.get_redirect(url)
  uri = URI.parse(url)
  protocol = uri.scheme
  host = uri.host.downcase
  res = Net::HTTP.get_response(uri)
  "#{protocol}://#{host}#{res['location']}"
end

.get_url_extension(url) ⇒ Object

Return the extension of the last path into an URL. Example: get_url_extension(“connect.data.com/sitemap_index.xml?foo_param=foo_value”) => “.xml”



632
633
634
# File 'lib/functions.rb', line 632

def self.get_url_extension(url)
  return File.extname(URI.parse(url).path.to_s)
end

.getDomainFromEmail(email) ⇒ Object



758
759
760
761
762
763
764
# File 'lib/functions.rb', line 758

def self.getDomainFromEmail(email)
  if email.email?
    return email.split("@").last
  else
    raise "getDomainFromEmail: Wrong email format."
  end
end

.getDomainFromUrl(url) ⇒ Object

get the domain from any url



742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
# File 'lib/functions.rb', line 742

def self.getDomainFromUrl(url)
  if (url !~ /^http:\/\//i && url !~ /^https:\/\//i) 
    url = "http://#{url}"
  end
  
  if (URI.parse(url).host == nil)
    raise "Cannot get domain for #{url}" 
  end

  if (url.to_s.length>0)
    return URI.parse(url).host.sub(/^www\./, '')
  else
    return nil
  end
end

.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies = false) ⇒ Object



766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
# File 'lib/functions.rb', line 766

def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false)
  a = Array.new
  c = Whois::Client.new
  r = c.lookup(domain)

  res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
  if (res!=nil)
    a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
  end

  # remover duplicados
  a = a.uniq

  # 
  if (allow_heuristic_to_avoid_hosting_companies==true)
    # TODO: develop this feature
  end

  return a
end

.lockfilesObject



486
487
488
# File 'lib/functions.rb', line 486

def self.lockfiles()
  @@lockfiles
end

.max_api_call_channelsObject



482
483
484
# File 'lib/functions.rb', line 482

def self.max_api_call_channels()
  @@max_api_call_channels
end

.params(url) ⇒ Object

returns a hash with the parametes in the url



693
694
695
696
697
698
699
700
701
702
703
# File 'lib/functions.rb', line 693

def self.params(url)
  # TODO: Corregir este parche:
  # => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros.
  # => Ejecutar las 2 lineas de abajo para verificar.
  # => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir"
  # => p = CGI::parse(URI.parse(url).query)
  # => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla.
  url = url.gsub("webhp#q=", "webhp?q=")

  return CGI::parse(URI.parse(url).query)
end

.set(h) ⇒ Object



490
491
492
493
494
495
496
497
498
499
# File 'lib/functions.rb', line 490

def self.set(h)
  @@max_api_call_channels = h[:max_api_call_channels]
  @@lockfiles = []
  
  i = 0
  while i<@@max_api_call_channels
    @@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w")
    i+=1
  end
end

.set_param(url, param_name, param_value) ⇒ Object

Change or add the value of a parameter in the url, depending if the parameter already exists or not.



731
732
733
734
735
736
737
738
739
# File 'lib/functions.rb', line 731

def self.set_param(url, param_name, param_value)
  params = BlackStack::Netting::params(url) 
  if ( params.has_key?(param_name) == true )
    newurl = BlackStack::Netting::change_param(url, param_name, param_value)
  else
    newurl = BlackStack::Netting::add_param(url, param_name, param_value)
  end
  return newurl
end