Class: BOAST::CKernel

Inherits:
Object
  • Object
show all
Includes:
Inspectable, PrivateStateAccessor, TypeTransition, Rake::DSL
Defined in:
lib/BOAST/CKernel.rb

Constant Summary collapse

@@extensions =
{
  C => ".c",
  CUDA => ".cu",
  FORTRAN => ".f90"
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from TypeTransition

#get_transition, #set_transition, #transition

Methods included from PrivateStateAccessor

private_boolean_state_accessor, private_state_accessor

Methods included from Inspectable

#inspect

Constructor Details

#initialize(options = {}) ⇒ CKernel

Returns a new instance of CKernel.



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/BOAST/CKernel.rb', line 105

def initialize(options={})
  if options[:code] then
    @code = options[:code]
  elsif get_chain_code
    @code = get_output
    @code.seek(0,SEEK_END)
  else
    @code = StringIO::new
  end
  set_output(@code)
  if options[:kernels] then
    @kernels = options[:kernels]
  else
    @kernels  = []
  end
  if options[:lang] then
    @lang = options[:lang]
  else
    @lang = get_lang
  end
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(meth, *args, &block) ⇒ Object



831
832
833
834
835
836
837
838
# File 'lib/BOAST/CKernel.rb', line 831

def method_missing(meth, *args, &block)
 if meth.to_s == "run" then
   build
   run(*args,&block)
 else
   super
 end
end

Instance Attribute Details

#binaryObject

Returns the value of attribute binary.



101
102
103
# File 'lib/BOAST/CKernel.rb', line 101

def binary
  @binary
end

#codeObject

Returns the value of attribute code.



98
99
100
# File 'lib/BOAST/CKernel.rb', line 98

def code
  @code
end

#cost_functionObject

Returns the value of attribute cost_function.



103
104
105
# File 'lib/BOAST/CKernel.rb', line 103

def cost_function
  @cost_function
end

#kernelsObject

Returns the value of attribute kernels.



102
103
104
# File 'lib/BOAST/CKernel.rb', line 102

def kernels
  @kernels
end

#langObject

Returns the value of attribute lang.



100
101
102
# File 'lib/BOAST/CKernel.rb', line 100

def lang
  @lang
end

#procedureObject

Returns the value of attribute procedure.



99
100
101
# File 'lib/BOAST/CKernel.rb', line 99

def procedure
  @procedure
end

Instance Method Details

#build(options = {}) ⇒ Object



495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
# File 'lib/BOAST/CKernel.rb', line 495

def build(options = {})
  compiler_options = BOAST::get_compiler_options
  compiler_options.update(options)
  return build_opencl(compiler_options) if @lang == CL

  linker, ldflags = setup_compilers(compiler_options)

  extension = @@extensions[@lang]

  source_file, path, target = create_source

  module_file_name, module_name = create_module_source(path)

  module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
  module_final = module_file_name.chomp(File::extname(module_file_name))+".so"


  kernel_files = get_sub_kernels

  file module_final => [module_target, target] do
    #puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
    sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
  end
  Rake::Task[module_final].invoke

  require(module_final)
  eval "self.extend(#{module_name})"

  save_binary(target)

  [target, module_target, module_file_name, module_final].each { |fn|
    File::unlink(fn)
  }
  kernel_files.each { |f|
    f.unlink
  }
  return self
end

#build_opencl(options) ⇒ Object



412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# File 'lib/BOAST/CKernel.rb', line 412

def build_opencl(options)
  init_opencl(options)

  run_method = <<EOF
def self.run(*args)
  raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
  params = []
  opts = {}
  opts = args.pop if args.length == #{@procedure.parameters.length+1}
  @procedure.parameters.each_index { |i|
params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
  }
  params.each_index{ |i|
@kernel.set_arg(i, params[i])
  }
  event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
  @procedure.parameters.each_index { |i|
if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
  read_opencl_param( params[i], args[i], @procedure.parameters[i] )
end
  }
  result = {}
  result[:start] = event.profiling_command_start
  result[:end] = event.profiling_command_end
  result[:duration] = (result[:end] - result[:start])/1000000000.0
  return result
end
EOF
eval run_method
return self
end

#check_args(module_file) ⇒ Object



609
610
611
612
613
614
615
616
617
618
619
620
621
# File 'lib/BOAST/CKernel.rb', line 609

def check_args(module_file)
  if @lang == CUDA then
    module_file.print <<EOF
  if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
EOF
  else
    module_file.print <<EOF
  if( argc != #{@procedure.parameters.length} )
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
EOF
  end
end

#compare_ref(ref_outputs, outputs, epsilon = nil) ⇒ Object



848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
# File 'lib/BOAST/CKernel.rb', line 848

def compare_ref(ref_outputs, outputs, epsilon = nil)
  res = {}
  @procedure.parameters.each_with_index { |param, indx|
    if param.direction == :in or param.constant then
      next
    end
    if param.dimension then
      diff = (outputs[indx] - ref_outputs[indx]).abs
      if epsilon then
        diff.each { |elem|
          raise "Error: #{param.name} different from ref by: #{elem}!" if elem > epsilon
        }
      end
      res[param.name] = diff.max
    else
      raise "Error: #{param.name} different from ref: #{outputs[indx]} != #{ref_outputs[indx]} !" if epsilon and (outputs[indx] - ref_outputs[indx]).abs > epsilon
      res[param.name] = (outputs[indx] - ref_outputs[indx]).abs
    end
  }
  return res
end

#cost(*args) ⇒ Object



990
991
992
# File 'lib/BOAST/CKernel.rb', line 990

def cost(*args)
  @cost_function.call(*args)
end

#create_module_source(path) ⇒ Object



461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
# File 'lib/BOAST/CKernel.rb', line 461

def create_module_source(path)
  previous_lang = get_lang
  previous_output = get_output
  set_lang( C )
  module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
  module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
  module_file = File::open(module_file_name,"w+")
  set_output( module_file )
  fill_module(module_file, module_name)
  module_file.rewind
 #puts module_file.read
  module_file.close
  set_lang( previous_lang )
  set_output( previous_output )
  return [module_file_name, module_name]
end

#create_opencl_array(arg, parameter) ⇒ Object



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/BOAST/CKernel.rb', line 368

def create_opencl_array(arg, parameter)
  if parameter.direction == :in then
    flags = OpenCL::Mem::Flags::READ_ONLY
  elsif parameter.direction == :out then
    flags = OpenCL::Mem::Flags::WRITE_ONLY
  else
    flags = OpenCL::Mem::Flags::READ_WRITE
  end
  if parameter.texture then
    param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
    @queue.enqueue_write_image( param, arg, :blocking => true )
  else
    param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
    @queue.enqueue_write_buffer( param, arg, :blocking => true )
  end
  return param
end

#create_opencl_param(arg, parameter) ⇒ Object



396
397
398
399
400
401
402
# File 'lib/BOAST/CKernel.rb', line 396

def create_opencl_param(arg, parameter)
  if parameter.dimension then
    return create_opencl_array(arg, parameter)
  else
    return create_opencl_scalar(arg, parameter)
  end
end

#create_opencl_scalar(arg, parameter) ⇒ Object



386
387
388
389
390
391
392
393
394
# File 'lib/BOAST/CKernel.rb', line 386

def create_opencl_scalar(arg, parameter)
  if parameter.type.is_a?(Real) then
    return @@opencl_real_types[parameter.type.size]::new(arg)
  elsif parameter.type.is_a?(Int) then
    return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
  else
    return arg
  end
end

#create_procedure_call(module_file) ⇒ Object



714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
# File 'lib/BOAST/CKernel.rb', line 714

def create_procedure_call(module_file)
  if @lang == CUDA then
    module_file.print "  duration = "
  elsif @procedure.properties[:return] then
    module_file.print "  ret = "
  end
  module_file.print "  #{@procedure.name}"
  module_file.print "_" if @lang == FORTRAN
  module_file.print "_wrapper" if @lang == CUDA
  module_file.print "("
  params = []
  if(@lang == FORTRAN) then
    @procedure.parameters.each { |param|
      if param.dimension then
        params.push( param.name )
      else
        params.push( "&"+param.name )
      end
    }
  else
    @procedure.parameters.each { |param|
      if param.dimension then
        params.push( param.name )
      elsif param.direction == :out or param.direction == :inout then
        params.push( "&"+param.name )
      else
        params.push( param.name )
      end
    }
  end
  if @lang == CUDA then
    params.push( "block_number", "block_size" )
  end
  module_file.print params.join(", ")
  module_file.print "  );\n"
end

#create_sourceObject



485
486
487
488
489
490
491
492
493
# File 'lib/BOAST/CKernel.rb', line 485

def create_source
  extension = @@extensions[@lang]
  source_file = Tempfile::new([@procedure.name,extension])
  path = source_file.path
  target = path.chomp(File::extname(path))+".o"
  fill_code(source_file)
  source_file.close
  return [source_file, path, target]
end

#decl_module_params(module_file) ⇒ Object



664
665
666
667
668
669
670
671
672
673
674
675
# File 'lib/BOAST/CKernel.rb', line 664

def decl_module_params(module_file)
  @procedure.parameters.each { |param| 
    param_copy = param.copy
    param_copy.constant = nil
    param_copy.direction = nil
    param_copy.decl
  }
  module_file.print "  #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
  module_file.print "  VALUE stats = rb_hash_new();\n"
  module_file.print "  struct timespec start, stop;\n"
  module_file.print "  unsigned long long int duration;\n"
end

#fill_code(source_file) ⇒ Object



534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
# File 'lib/BOAST/CKernel.rb', line 534

def fill_code(source_file)
  @code.rewind
  source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
  source_file.puts "#include <cuda.h>" if @lang == CUDA
  # check for too long FORTRAN lines
  if @lang == FORTRAN then
    @code.each_line { |line|
      # check for omp pragmas
      if line.match(/^\s*!\$/) then
        if line.match(/^\s*!\$(omp|OMP)/) then
          chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-7}}/)
          source_file.puts chunks.join("&\n!$omp&")
        else
          chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-4}}/)
          source_file.puts chunks.join("&\n!$&")
        end
      elsif line.match(/^\w*!/) then
        source_file.write line
      else
        chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-2}}/)
        source_file.puts chunks.join("&\n&")
      end
    }
  else
    source_file.write @code.read
  end
  if @lang == CUDA then
    source_file.write <<EOF
extern "C" {
  #{@procedure.boast_header_s(CUDA)}{
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
cudaEvent_t __start, __stop;
float __time;
cudaEventCreate(&__start);
cudaEventCreate(&__stop);
cudaEventRecord(__start, 0);
#{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
cudaEventRecord(__stop, 0);
cudaEventSynchronize(__stop);
cudaEventElapsedTime(&__time, __start, __stop);
return (unsigned long long int)((double)__time*(double)1e6);
  }
}
EOF
  end
  @code.rewind
end

#fill_module(module_file, module_name) ⇒ Object



794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
# File 'lib/BOAST/CKernel.rb', line 794

def fill_module(module_file, module_name)
  module_header(module_file)
  @procedure.boast_header(@lang)
  module_preamble(module_file, module_name)

  module_file.puts "VALUE method_run(int argc, VALUE *argv, VALUE self) {"

  check_args(module_file)

  argc = @procedure.parameters.length
  argv = Variable::new("argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
  rb_ptr = Variable::new("rb_ptr", CustomType, :type_name => "VALUE")
  set_transition("VALUE", "VALUE", :default,  CustomType::new(:type_name => "VALUE"))
  rb_ptr.decl

  decl_module_params(module_file)

  get_params_value(module_file, argv, rb_ptr)

  if @lang == CUDA then
    module_file.print get_cuda_launch_bounds(module_file)
  end

  module_file.print "  clock_gettime(CLOCK_REALTIME, &start);\n"

  create_procedure_call(module_file)

  module_file.print "  clock_gettime(CLOCK_REALTIME, &stop);\n"

  get_results(module_file, argv, rb_ptr)

  store_result(module_file)

  module_file.print "  return stats;\n"
  module_file.print "}"
end

#get_array_type(param) ⇒ Object



870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
# File 'lib/BOAST/CKernel.rb', line 870

def get_array_type(param)
  if param.type.class == Real then
    case param.type.size
    when 4
      type = NArray::SFLOAT
    when 8
      type = NArray::FLOAT
    else
      STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
      type = NArray::BYTE
    end
  elsif param.type.class == Int then
    case param.type.size
    when 1
      type = NArray::BYTE
    when 2
      type = NArray::SINT
    when 4
      type = NArray::SINT
    else
      STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
      type = NArray::BYTE
    end
  else
    STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
    type = NArray::BYTE
  end
  return type
end

#get_cuda_launch_bounds(module_file) ⇒ Object



677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
# File 'lib/BOAST/CKernel.rb', line 677

def get_cuda_launch_bounds(module_file)
  module_file.print <<EOF
  VALUE rb_opts;
  size_t block_size[3] = {1,1,1};
  size_t block_number[3] = {1,1,1};
  if( argc == #{@procedure.parameters.length + 1} ) {
rb_opts = argv[argc -1];
if ( rb_opts != Qnil ) {
  VALUE rb_array_data = Qnil;
  int i;
  if (TYPE(rb_opts) != T_HASH)
    rb_raise(rb_eArgError, "Cuda options should be passed as a hash");
  rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_size")));
  if( rb_ptr != Qnil ) {
    if (TYPE(rb_ptr) != T_ARRAY)
      rb_raise(rb_eArgError, "Cuda option block_size should be an array");
    for(i=0; i<3; i++) {
      rb_array_data = rb_ary_entry(rb_ptr, i);
      if( rb_array_data != Qnil )
        block_size[i] = (size_t) NUM2LONG( rb_array_data );
    }
  }
  rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_number")));
  if( rb_ptr != Qnil ) {
    if (TYPE(rb_ptr) != T_ARRAY)
      rb_raise(rb_eArgError, "Cuda option block_number should be an array");
    for(i=0; i<3; i++) {
      rb_array_data = rb_ary_entry(rb_ptr, i);
      if( rb_array_data != Qnil )
        block_number[i] = (size_t) NUM2LONG( rb_array_data );
    }
  }
}
  }
EOF
end

#get_gpu_dim(directory) ⇒ Object



950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
# File 'lib/BOAST/CKernel.rb', line 950

def get_gpu_dim(directory)
  f = File::new( directory + "/problem_size", "r")
  s = f.read
  local_dim, global_dim = s.scan(/<(.*?)>/)
  local_dim  = local_dim.pop.split(",").collect!{ |e| e.to_i }
  global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
  (local_dim.length..2).each{ |i| local_dim[i] = 1 }
  (global_dim.length..2).each{ |i| global_dim[i] = 1 }
  if @lang == CL then
    local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
    res = { :global_work_size => global_dim, :local_work_size => local_dim }
  else
    res = { :block_number => global_dim, :block_size => local_dim }
  end
  f.close
  return res
end

#get_includes(narray_path) ⇒ Object



149
150
151
152
153
154
155
# File 'lib/BOAST/CKernel.rb', line 149

def get_includes(narray_path)
  includes = "-I#{RbConfig::CONFIG["archdir"]}"
  includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
  includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
  includes += " -I#{narray_path}" if narray_path
  return includes
end

#get_narray_pathObject



157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/BOAST/CKernel.rb', line 157

def get_narray_path
  narray_path = nil
  begin
    spec = Gem::Specification::find_by_name('narray')
    narray_path = spec.full_gem_path
  rescue Gem::LoadError => e
  rescue NoMethodError => e
    spec = Gem::available?('narray')
    if spec then
      require 'narray' 
      narray_path = Gem.loaded_specs['narray'].full_gem_path
    end
  end
end

#get_openmp_flags(compiler) ⇒ Object



138
139
140
141
142
143
144
145
146
147
# File 'lib/BOAST/CKernel.rb', line 138

def get_openmp_flags(compiler)
  openmp_flags = BOAST::get_openmp_flags[compiler]
  if not openmp_flags then
    keys = BOAST::get_openmp_flags.keys
    keys.each { |k|
      openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
    }
  end
  return openmp_flags
end

#get_params_value(module_file, argv, rb_ptr) ⇒ Object



623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
# File 'lib/BOAST/CKernel.rb', line 623

def get_params_value(module_file, argv, rb_ptr)
  @procedure.parameters.each_index do |i|
    param = @procedure.parameters[i]
    if not param.dimension then
      case param.type
      when Int 
        (param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
        (param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
      when Real
        (param === FuncCall::new("NUM2DBL", argv[i])).pr
      end
    else
      (rb_ptr === argv[i]).pr
      if @lang == CUDA then
        module_file.print <<EOF
  if ( IsNArray(rb_ptr) ) {
struct NARRAY *n_ary;
size_t array_size;
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
array_size = n_ary->total * na_sizeof[n_ary->type];
cudaMalloc( (void **) &#{param.name}, array_size);
cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
  } else
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
EOF
      else
        module_file.print <<EOF
  if (TYPE(rb_ptr) == T_STRING) {
#{param.name} = (void *) RSTRING_PTR(rb_ptr);
  } else if ( IsNArray(rb_ptr) ) {
struct NARRAY *n_ary;
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
#{param.name} = (void *) n_ary->ptr;
  } else
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
EOF
      end
    end
  end
end

#get_results(module_file, argv, rb_ptr) ⇒ Object



751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
# File 'lib/BOAST/CKernel.rb', line 751

def get_results(module_file, argv, rb_ptr)
  if @lang == CUDA then
    @procedure.parameters.each_index do |i|
      param = @procedure.parameters[i]
      if param.dimension then
        (rb_ptr === argv[i]).pr
        module_file.print <<EOF
  if ( IsNArray(rb_ptr) ) {
EOF
        if param.direction == :out or param.direction == :inout then
        module_file.print <<EOF
struct NARRAY *n_ary;
size_t array_size;
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
array_size = n_ary->total * na_sizeof[n_ary->type];
cudaMemcpy((void *) n_ary->ptr, #{param.name}, array_size, cudaMemcpyDeviceToHost);
EOF
        end
        module_file.print <<EOF
cudaFree( (void *) #{param.name});
  } else
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
  
EOF
      end
    end
  end
end

#get_scalar_type(param) ⇒ Object



900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
# File 'lib/BOAST/CKernel.rb', line 900

def get_scalar_type(param)
  if param.type.class == Real then
    case param.type.size
    when 4
      type = "f"
    when 8
      type = "d"
    else
      raise "Unsupported Real scalar size: #{param.type.size}!"
    end
  elsif param.type.class == Int then
    case param.type.size
    when 1
      type = "C"
    when 2
      type = "S"
    when 4
      type = "L"
    when 8
      type = "Q"
    else
      raise "Unsupported Int scalar size: #{param.type.size}!"
    end
    if param.type.signed? then
      type.downcase!
    end
  end
  return type
end

#get_sub_kernelsObject



450
451
452
453
454
455
456
457
458
459
# File 'lib/BOAST/CKernel.rb', line 450

def get_sub_kernels
  kernel_files = []
  @kernels.each { |kernel|
    kernel_file = Tempfile::new([kernel.procedure.name,".o"])
    kernel.binary.rewind
    kernel_file.write( kernel.binary.read )
    kernel_file.close
    kernel_files.push(kernel_file)
  }
end

#init_opencl(options) ⇒ Object



340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/BOAST/CKernel.rb', line 340

def init_opencl(options)
  require 'opencl_ruby_ffi'
  init_opencl_types
  device = select_cl_device(options)
  @context = OpenCL::create_context([device])
  program = @context.create_program_with_source([@code.string])
  opts = options[:CLFLAGS]
  begin
    program.build(:options => options[:CLFLAGS])
  rescue OpenCL::Error => e
    puts e.to_s
    puts program.build_status
    puts program.build_log
    if options[:verbose] or get_verbose then
      puts @code.string
    end
    raise "OpenCL Failed to build #{@procedure.name}"
  end
  if options[:verbose] or get_verbose then
    program.build_log.each {|dev,log|
      puts "#{device.name}: #{log}"
    }
  end
  @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
  @kernel = program.create_kernel(@procedure.name)
  return self
end

#init_opencl_typesObject



317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/BOAST/CKernel.rb', line 317

def init_opencl_types
  @@opencl_real_types = {
    2 => OpenCL::Half,
    4 => OpenCL::Float,
    8 => OpenCL::Double
  }

  @@opencl_int_types = {
    true => {
      1 => OpenCL::Char,
      2 => OpenCL::Short,
      4 => OpenCL::Int,
      8 => OpenCL::Long
    },
    false => {
      1 => OpenCL::UChar,
      2 => OpenCL::UShort,
      4 => OpenCL::UInt,
      8 => OpenCL::ULong
    }
  }
end

#load_ref_files(path = "", suffix = "", intent) ⇒ Object



968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
# File 'lib/BOAST/CKernel.rb', line 968

def load_ref_files(  path = "", suffix = "", intent )
  proc_path = path + "/#{@procedure.name}/"
  res_h = {}
  begin
    dirs = Pathname.new(proc_path).children.select { |c| c.directory? }
  rescue
    return res_h
  end
  dirs.collect! { |d| d.to_s }
  dirs.each { |d|
    res = [] 
    @procedure.parameters.collect { |param|
      res.push read_param(param, d, suffix, intent)
    }
    if @lang == CUDA or @lang == CL then
      res.push get_gpu_dim(d)
    end
    res_h[d] =  res
  }
  return res_h
end

#load_ref_inputs(path = "", suffix = ".in") ⇒ Object



840
841
842
# File 'lib/BOAST/CKernel.rb', line 840

def load_ref_inputs(path = "", suffix = ".in" )
  return load_ref_files( path, suffix, :in )
end

#load_ref_outputs(path = "", suffix = ".out") ⇒ Object



844
845
846
# File 'lib/BOAST/CKernel.rb', line 844

def load_ref_outputs(path = "", suffix = ".out" )
  return load_ref_files( path, suffix, :out )
end

#module_header(module_file) ⇒ Object



583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/BOAST/CKernel.rb', line 583

def module_header(module_file)
  module_file.print <<EOF
#include "ruby.h"
#include <inttypes.h>
#include <time.h>
#ifdef HAVE_NARRAY_H
#include "narray.h"
#endif
EOF
  if( @lang == CUDA ) then
    module_file.print "#include <cuda_runtime.h>\n"
  end
end

#module_preamble(module_file, module_name) ⇒ Object



597
598
599
600
601
602
603
604
605
606
607
# File 'lib/BOAST/CKernel.rb', line 597

def module_preamble(module_file, module_name)
  module_file.print <<EOF
VALUE #{module_name} = Qnil;
void Init_#{module_name}();
VALUE method_run(int argc, VALUE *argv, VALUE self);
void Init_#{module_name}() {
  #{module_name} = rb_define_module("#{module_name}");
  rb_define_method(#{module_name}, "run", method_run, -1);
}
EOF
end


127
128
129
130
# File 'lib/BOAST/CKernel.rb', line 127

def print
  @code.rewind
  puts @code.read
end

#read_opencl_param(param, arg, parameter) ⇒ Object



404
405
406
407
408
409
410
# File 'lib/BOAST/CKernel.rb', line 404

def read_opencl_param(param, arg, parameter)
  if parameter.texture then
    @queue.enqueue_read_image( param, arg, :blocking => true )
  else
    @queue.enqueue_read_buffer( param, arg, :blocking => true )
  end
end

#read_param(param, directory, suffix, intent) ⇒ Object



930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
# File 'lib/BOAST/CKernel.rb', line 930

def read_param(param, directory, suffix, intent)
  if intent == :out and ( param.direction == :in or param.constant ) then
    return nil
  end
  f = File::new( directory + "/" + param.name+suffix, "rb" )
  if param.dimension then
    type = get_array_type(param)
    if f.size == 0 then
      res = NArray::new(type, 1)
    else
      res = NArray.to_na(f.read, type)
    end
  else
    type = get_scalar_type(param)
    res = f.read.unpack(type).first
  end
  f.close
  return res
end

#save_binary(target) ⇒ Object



478
479
480
481
482
483
# File 'lib/BOAST/CKernel.rb', line 478

def save_binary(target)
  f = File::open(target,"rb")
  @binary = StringIO::new
  @binary.write( f.read )
  f.close
end

#select_cl_device(options) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/BOAST/CKernel.rb', line 301

def select_cl_device(options)
  platform = select_cl_platform(options)
  type = options[:device_type] ? OpenCL::Device::Type.const_get(options[:device_type]) : options[:CLDEVICETYPE] ? OpenCL::Device::Type.const_get(options[:CLDEVICETYPE]) : OpenCL::Device::Type::ALL
  devices = platform.devices(type)
  if options[:device_name] then
    devices.select!{ |d|
      d.name.match(options[:device_name])
    }
  elsif options[:CLDEVICE] then
    devices.select!{ |d|
      d.name.match(options[:CLDEVICE])
    }
  end
  return devices.first
end

#select_cl_platform(options) ⇒ Object



282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/BOAST/CKernel.rb', line 282

def select_cl_platform(options)
  platforms = OpenCL::get_platforms
  if options[:platform_vendor] then
    platforms.select!{ |p|
      p.vendor.match(options[:platform_vendor])
    }
  elsif options[:CLVENDOR] then
    platforms.select!{ |p|
      p.vendor.match(options[:CLVENDOR])
    }
  end
  if options[:CLPLATFORM] then
    platforms.select!{ |p|
      p.name.match(options[:CLPLATFORM])
    }
  end
  return platforms.first
end

#setup_c_compiler(options, includes, narray_path, runner) ⇒ Object



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/BOAST/CKernel.rb', line 172

def setup_c_compiler(options, includes, narray_path, runner)
  c_compiler = options[:CC]
  cflags = options[:CFLAGS]
  cflags += " -fPIC #{includes}"
  cflags += " -DHAVE_NARRAY_H" if narray_path
  if options[:openmp] and @lang == C then
      openmp_cflags = get_openmp_flags(c_compiler)
      raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
      cflags += " #{openmp_cflags}"
  end

  rule '.o' => '.c' do |t|
    c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
    runner.call(t, c_call_string)
  end
end

#setup_compilers(options = {}) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/BOAST/CKernel.rb', line 250

def setup_compilers(options = {})
  Rake::Task::clear
  verbose = options[:verbose]
  verbose = get_verbose if not verbose
  Rake::verbose(verbose)
  Rake::FileUtilsExt.verbose_flag=verbose

  narray_path = get_narray_path
  includes = get_includes(narray_path)

  runner = lambda { |t, call_string|
    if verbose then
      sh call_string
    else
      status, stdout, stderr = systemu call_string
      if not status.success? then
        puts stderr
        fail "#{t.source}: compilation failed"
      end
      status.success?
    end
  }

  setup_c_compiler(options, includes, narray_path, runner)
  setup_cxx_compiler(options, includes, runner)
  setup_fortran_compiler(options, runner)
  setup_cuda_compiler(options, runner)

  return setup_linker(options)

end

#setup_cuda_compiler(options, runner) ⇒ Object



222
223
224
225
226
227
228
229
230
231
# File 'lib/BOAST/CKernel.rb', line 222

def setup_cuda_compiler(options, runner)
  cuda_compiler = options[:NVCC]
  cudaflags = options[:NVCCFLAGS]
  cudaflags += " --compiler-options '-fPIC'"

  rule '.o' => '.cu' do |t|
    cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
    runner.call(t, cuda_call_string)
  end
end

#setup_cxx_compiler(options, includes, runner) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/BOAST/CKernel.rb', line 189

def setup_cxx_compiler(options, includes, runner)
  cxx_compiler = options[:CXX]
  cxxflags = options[:CXXFLAGS]
  cxxflags += " -fPIC #{includes}"
  if options[:openmp] and @lang == C then
      openmp_cxxflags = get_openmp_flags(cxx_compiler)
      raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
      cxxflags += " #{openmp_cxxflags}"
  end

  rule '.o' => '.cpp' do |t|
    cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
    runner.call(t, cxx_call_string)
  end
end

#setup_fortran_compiler(options, runner) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/BOAST/CKernel.rb', line 205

def setup_fortran_compiler(options, runner)
  f_compiler = options[:FC]
  fcflags = options[:FCFLAGS]
  fcflags += " -fPIC"
  fcflags += " -fno-second-underscore" if f_compiler == 'g95'
  if options[:openmp] and @lang == FORTRAN then
      openmp_fcflags = get_openmp_flags(f_compiler)
      raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
      fcflags += " #{openmp_fcflags}"
  end

  rule '.o' => '.f90' do |t|
    f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
    runner.call(t, f_call_string)
  end
end

#setup_linker(options) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/BOAST/CKernel.rb', line 233

def setup_linker(options)
  ldflags = options[:LDFLAGS]
  ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
  ldflags += " -lcudart" if @lang == CUDA
  c_compiler = options[:CC]
  c_compiler = "cc" if not c_compiler
  linker = options[:LD]
  linker = c_compiler if not linker
  if options[:openmp] then
    openmp_ldflags = get_openmp_flags(linker)
    raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
    ldflags += " #{openmp_ldflags}"
  end

  return [linker, ldflags]
end

#store_result(module_file) ⇒ Object



780
781
782
783
784
785
786
787
788
789
790
791
792
# File 'lib/BOAST/CKernel.rb', line 780

def store_result(module_file)
  if @lang != CUDA then
    module_file.print "  duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
    module_file.print "  duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
  end
  module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)duration*(double)1e-9));\n"
  if @procedure.properties[:return] then
    type_ret = @procedure.properties[:return].type
    module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
    module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
    module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
  end
end

#to_sObject



132
133
134
135
# File 'lib/BOAST/CKernel.rb', line 132

def to_s
  @code.rewind
  return code.read
end