#
#     Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#

# Include information about the supported compute capabilities for
# each architecture (cinlude will conditionally include, any of those
# files exist only in their proper architecture)
cinclude ../rcfiles/acclin8664rc;
cinclude ../rcfiles/acclinuxarm64rc;
cinclude ../rcfiles/acclinuxpower64rc;
cinclude ../rcfiles/accwin64rc;
cinclude ../rcfiles/nvvmversionlinx8664rc;
cinclude ../rcfiles/nvvmversionaarch64rc;
cinclude ../rcfiles/nvvmversionwinx8664rc;

# nvdd was moved to bin/tools directory, so we need the "../" to get to bin/rcfiles.
include ../rcfiles/cudaselectrc;

# NVDD cross-platform configuration
#
variable PATH is environment(PATH);
variable NEWWINPATH is default($NVVMWINBINDIR;$PATH);
variable NEWPATH is default($PATH);

# LIBRARY path settings
variable LDLIB is environment(LD_LIBRARY_PATH);
variable NEWLDLIB is default($LDLIB);

variable DYLDLIB is environment(DYLD_LIBRARY_PATH);
variable NEWDYLDLIB is default($DYLDLIB);

variable GENMODID is default(1);
variable NOPTXCU is default(0);
variable MSC_VER is default(1400);

# TODO: can we detect this instead of hard coding it? It is the oldest CC
# we support, which is described in acc<target>rc SYSCAP.
variable COMPUTECAP is default(35);
variable CCOMPUTECAP is default($COMPUTECAP);

variable DEFPTXOPT is default(3);
variable PTXOPT is default($DEFPTXOPT);

variable MAJOR is default(2025) help(Major release version);

variable CACHEL12 is default() help(Set when caching in both Level 1 and 2);
variable CACHEL2 is default() help(Set when caching in Level 2 only);

variable USENVVMDEV is default(0);
variable USENVVMNEXT is default(0);
variable NVVMCGVER is default($if($ISNVVMSOLID,nvvm-next,nvvm70));
variable NVVMVERSIONENV is environment(NVVM_VERSION);
variable NVVMVERSION is default($if($NVVMVERSIONENV,$NVVMVERSIONENV,$NVVMLATESTVERSION));
# Directory name for the bitcode files under `compilers/lib/`, can be `nvvm70`, `nvvm-next`
variable NVVM is default($if($ISNVVMSOLID,nvvm-next,nvvm70));
variable NVVMBC is default($if($lor($USENVVMNEXT,$USENVVMDEV,$ISNVVMSOLID),nvvm-next,$NVVM));
variable CUDA_MAJOR_VAL is default($substr($CUDAXXYY,0,1));
variable CUDA_MINOR_VAL is default($substr($CUDAXXYY,3,3));
variable CUDA_MAJOR is default($if($expr($CUDAXXYY<12000),11,$(CUDA_MAJOR_VAL)));
variable CUDA_MAJOR_BC is default($if($expr($CUDA_MAJOR < 13),$CUDA_MAJOR,13));

# Environment variables to select custom tools from the CUDA toolkit
variable NVVM_PATH_ENV is environment(NVCOMPILER_NVVM_PATH);
variable PTXAS_PATH_ENV is environment(NVCOMPILER_PTXAS_PATH);
variable FATBINARY_PATH_ENV is environment(NVCOMPILER_FATBINARY_PATH);
variable NVLINK_PATH_ENV is environment(NVCOMPILER_NVLINK_PATH);
variable USEF18CUDART is default(0);

variable ISNVVMSOLID is default(0);

switch -nvvm70 is hide
    help(Use nvvm70 and convert .gpu to .bc)
    set(USEGPUBC=1);

switch -nvvm-dev is hide
    help(Use nvvm-dev (from nvvm SOLID kitpick))
    set(USENVVMDEV=$if($expr($CUDAXXYY>=12000),1,0))
    set(NVVM=$if($expr($CUDAXXYY>=12000),nvvm-next,nvvm70))
    set(NVVMVEROPT=nvvm-latest);

switch -nvvm-next is hide
    help(Use nvvm-next (from CUDA toolkit))
    set(USENVVMNEXT=$if($expr($CUDAXXYY>=12000),1,0))
    set(NVVM=$if($expr($CUDAXXYY>=12000),nvvm-next,nvvm70))
    set(NVVMBC=nvvm-next)
    set(NVVMCGVER=nvvm-next)
    set(NVVMVEROPT=nvvm-latest);

switch -vasp-fix is hide
    help(Enable the -vasp-fix flag in nvvm)
    append(NVVMFLAGS=-vasp-fix);

variable NVVMNEXTISDEFAULT is default(0); # TBD
variable NVVM70BASEDIR is default(nvvm);
variable NVVMNEXTBASEDIR is default(nvvm);
variable NVVMDEVBASE is default(/proj/cuda/nvvm);
variable NVVMPKGDIR is default($NVVMVERSION);
variable NVVMDEVBASEDIRTOOLS is default($NVVMDEVBASE/$TARGETSTR/$NVVMPKGDIR);
variable NVVMDEVBASEDIR is default($NVVMDEVBASE/$TARGETSTR/$NVVMPKGDIR/nvvm);
variable NVVMBASEDIR is default($if($land($USENVVMDEV,$expr($CUDAXXYY>=12000)),$NVVMDEVBASEDIR,$USECUDAROOT/$if($USENVVMNEXT,$NVVMNEXTBASEDIR,$NVVM70BASEDIR)));
variable NVVMOSXLIBDIR is default($NVVMBASEDIR/lib);
variable NVVMLINUXLIBDIRSUFFIX is default(64);
variable NVVMLINUXLIBDIR is default($if($USENODEFAULTCUDA,$if($NVVM_PATH_ENV,$NVVM_PATH_ENV/lib$NVVMLINUXLIBDIRSUFFIX,$NVVMBASEDIR/lib$NVVMLINUXLIBDIRSUFFIX),$NVVMBASEDIR/lib$NVVMLINUXLIBDIRSUFFIX));
variable NVVMWINBINDIR is default($NVVMBASEDIR/bin);

variable NVVMVERIFY is default(0);
switch -nvvmverify is hide
    help(verify nvvm code before compilation)
    set(NVVMVERIFY=1);

variable NVVMNOFLAGS is default(0);
variable NVVMOPTLEVEL is default(3); #only opt levels 0 and 3 are supported, at least as of cuda 9.2
variable NVVMVEROPT is default() help(flag to set nvvm version (default nvvm70 with no option));
variable NVVMDEBUG is default();
variable NVVMFLAGS is help(flags to nvvm);
variable NVVMLINEINFO is default();
variable NVVMDIR is default($NVVMBASEDIR/bin);
variable NVVMCCDIR is default($USECUDAROOT/libnvvm);
variable NVOPEN64DIR is default($USECUDAROOT/open64/lib);
variable PGICUDAROOT is default($dirname($CDRIVERDIR));
variable F18CUDAROOT is default($COMPBASE/$COMPSYS/$COMPVER/$(COMPLIBPREFIX)lib);
variable PGICUDAINCDIR is default($PGICUDAROOT/include_acc);
variable PGICUDAINC is default($PGICUDAINCDIR $PGICUDAINC_OT);
variable CUDAFEDIR is default($USECUDAFEROOT/bin);
variable CUDALIB is default($USECUDAROOT/lib64);
variable CUDAINC is default($USECUDAROOT/include);
variable CUDAMATHDIR is default($dirname($dirname($dirname($COMPBIN)))/math_libs/$CUDAVERSION);
variable CUDAMATHINCDIR is default($CUDAMATHDIR/include);
variable DEBUG is default(0);
variable DEVDEBUG is default(0);
variable DEBUG1 is default(0);
variable DEBUG2 is default(0);
variable DEBUG3 is default(0);
variable LINEINFO is default(0);
variable ALLOWUNDEFGBLS is default(0);

variable LLFILEPRELINKED is default(0);
variable INC is   # set by -I
  default();
variable STDINC is
  default($CUDAMATHINCDIR $CUDAINC $CUDAINC/cudart $PGICUDAINC);
variable DEF is   # set by -D
  default();
variable DEFFASTMATHMACRO is
  default(-D__USE_FAST_MATH__=0);
variable FASTMATHMACRO is
  default($DEFFASTMATHMACRO);
variable CUDEF23 is
  default($FASTMATHMACRO);
variable CUPRECISE is
  default(__CUDA_PREC_DIV __CUDA_PREC_SQRT);
variable PGIEXT is default();
variable CUSTDDEF1 is
  default(__CUDACC__ __NVCC__ $if($FTZ,__CUDA_FTZ) $CUPRECISE $if($index($TARGET,win64,win64-llvm,win64edg-llvm),_USE_DECLSPECS_FOR_SAL) $PGIEXT __CUDACC_VER_MAJOR__=$CUDA_MAJOR_VAL __CUDACC_VER_MINOR__=$CUDA_MINOR_VAL);
variable CUSTDDEF2 is
  default(__CUDACC__ __NVCC__ $if($FTZ,__CUDA_FTZ) $CUPRECISE __CUDANVVM__ $PGIEXT __CUDACC_VER_MAJOR__=$CUDA_MAJOR_VAL __CUDACC_VER_MINOR__=$CUDA_MINOR_VAL);
variable CUSTDDEF3 is
  default(__CUDABE__ $if($FTZ,__CUDA_FTZ) $CUPRECISE $if($index($TARGET,win64,win64-llvm,win64edg-llvm),__GNUC__) __CUDANVVM__ $PGIEXT);
# cicc as of version CUDA 10.2 does not support __float128 or __ieee128 type as built-in. Since we do not generate __float128 in our GPU files,
# ensure preprocessor does not think it is supported and thus generate incompatible code for cicc.
variable CUSTUDEF1 is
  default(__FLOAT128__);
variable STDDEF is
  default();

variable ECHOBIN is default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),pgecho,echo));

variable DRYRUN is default(0);

variable KEEPTEMP is default(0);

variable CUPREPRO is default(1);

variable PREINC is default();

variable DOPTXINFO is default(0);
variable PTXINFO is default();

variable CUOPT is default(3);
variable GOPT is default();

variable FTZ is default(0);

variable USELL is default(0);
variable USENVVM is default(0);
variable USEOMPGPUCG is default(0);
variable USEOMPGPUCGDBG is default();
variable USEOMPGPUCGNVCC is default();
variable OMPTARGETOFFLOAD is default(0);

variable REGFILE is default();
variable REGOBJ is default();
variable DOREG is default(0);
variable CUDALINK is default(0);
variable UNIFIEDMEM is default(0);
variable NORDC is default(0);
variable DEVICEPARTIALLINKING is default(0);

variable DEVICE_BITCODEVSN is default(10);
variable DEVICE_BACKUP_CAP is default(30);
variable DEVICE_BITCODE_BACKUP2_FILE is default($if($USENODEFAULTCUDA,$if($NVVM_PATH_ENV,$NVVM_PATH_ENV,$NVVMBASEDIR),$NVVMBASEDIR)/libdevice/libdevice.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE_BACKUP_FILE is default($NVVMBASEDIR/libdevice/libdevice.compute_$DEVICE_BACKUP_CAP.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE_FILE is default($NVVMBASEDIR/libdevice/libdevice.compute_$COMPUTECAP.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE is default($if($isfile($DEVICE_BITCODE_FILE),$DEVICE_BITCODE_FILE,$if($isfile($DEVICE_BITCODE_BACKUP_FILE),$DEVICE_BITCODE_BACKUP_FILE,$if($isfile($DEVICE_BITCODE_BACKUP2_FILE),$DEVICE_BITCODE_BACKUP2_FILE))));

# List of runtime bitcode files, add in this variable a new bitcode
# file when needed.
variable DEVICE_BITCODE_FILES is default(
      $if($DEVICE_MATHUNIFORM,$PGICUDAROOT/lib/libnvgpumath.bc)
      $if($NEEDCURAND,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_curand_runtime.bc)
      $if($NEEDNVSHMEM,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_nvshmem_runtime.bc)
      $if($expr($COMPUTECAP>=60),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_cuda_real2_runtime.bc)
      $if($expr($COMPUTECAP>=70),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_cuda_wmma_runtime$if($expr($COMPUTECAP>=80),_cc80).bc)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_cuda_builtin_intrinsics_runtime$if($expr($COMPUTECAP>=80),_cc80).bc
      # NVOMP should be after `nvhpc_omp_runtime.bc` to make sure it
      # can resolve any references to the OpenMP RT
      $if($OMPTARGETOFFLOAD,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_omp_runtime.bc)
      $if($USEOMPGPUCG,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/nvomp_dev$(USEOMPGPUCGNVCC)$(USEOMPGPUCGDBG).bc)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_utils_runtime$if($expr($COMPUTECAP>=90),_cc90).bc
      $ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$NVHPCCUDACPPBUILTINS)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$NVHPCRUNTIME
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$NVHPCRUNTIMEBUILTINS
      # If there is no available LLVM BC part of the F18 runtime,
      # the PTX/CUBIN F18 runtime will be linked at the link stage
      # to resolve all the references.
      $if($land($USEF18CUDART,$isdir($F18CUDAROOT),$equal($NVVMBC,nvvm70)),$F18CUDAROOT/$NVVMBC/$CUDA_MAJOR_BC/$F18CUDABCRUNTIME)
      # Now that we are using the `nvvmLazyAddModuleToProgram` to add
      # our bitcode files, the `libdevice.10.bc` needs to be added
      # last, because when using the "lazy approach", nvvm need to
      # have seen all the requirements of functions in current module.
      $DEVICE_BITCODE
      $USERBCFILE
);

variable NVGNUVERSION is default($if($index($TARGET,win32,win64,win64-llvm,win64edg-llvm),0,$GCCVERSION));

variable DEVICE_MATHUNIFORM is default(0);

# Device Link Time Optimization
variable NEEDCUDALTO is default(0);
variable LINKCUDALTO is default(0);
variable LTOFLAGS is default();

# Pass maxrregcount to nvlink
variable MAXRREGCOUNT is default(0);

# Set for VC 17; uses PGI '14' libraries
variable PGC_OT_REL is
  hide default(14);

# FPIFP
variable NEEDFPIFP is default(0);
variable FPIFPFPRELINKOBJFILE is default();
variable FPIFPFINDEXFILE is default();

variable USEFMA is default(1);
variable NOFASTMATH is default(1);
variable USELLC is default(0);
switch -usellc is
   help(Using alternative PTX generator)
   set(USELLC=1);

switch -dryrun is
    help(Display commands that would be executed)
    set(DRYRUN=1) set(KEEPTEMP=1);

variable WINNUL is default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),nul));
switch -showerr is hide
    set(WINNUL=);

switch -keeptemp is
    help(Keep intermediate temporary files)
    set(KEEPTEMP=1);

switch -usegpubc is
    help(Enable conersion from .gpu to .bc using llvm-as before passing it to libNVVM)
    set(USEGPUBC=1);

switch -useoldnvvmd is
    help(Switch back to using old nvvmd. It is still the default libNVVM driver for Windows)
    set(USEOLDNVVMD=1);

switch -debuglibnvvm is
    help(Show GPU toolchain commands using cicc instead of using nvnvvmd and libNVVM)
    set(DEBUGLIBNVVM=1);

switch -keepgpubc is
    help(Keep GPU file in bitcode format (.bc))
    set(KEEPGPUBC=1);

switch -nodefaultcuda is
    help(Use component from non-standard CUDA Toolkit)
    set(USENODEFAULTCUDA=1);

switch -ot12 is  hide
    helpgroup(target)
    help(Open Tools version 12 )
    set(PSDK="C:/Program Files/PGI/Microsoft Open Tools 12")
    set(WSDK="C:/Program Files (x86)/Windows Kits/8.1")
    set(MSC_VER=1700)
    set(WSDK_VERSION=)
    set(PGI_OT_VER=__PGI_TOOLS12)
    set(PGICUDAINC_OT=$PGICUDAROOT/include_acc/OT_12);
switch -ot14 is  hide
    helpgroup(target)
    help(Open Tools version 14)
    set(PSDK="C:/Program Files/PGI/Microsoft Open Tools 14")
    set(WSDK="C:/Program Files (x86)/Windows Kits/10")
    set(MSC_VER=1700)
    #WSDK_VERSION must be set by makelocalrc . This is just a guess
    #set(WSDK_VERSION=10.0.10586.0)
    error($ifn($WSDK_VERSION," ot14 switch: unknown Windows Kit version: run makelocalrc"))
    set(PGI_OT_VER=__PGI_TOOLS14)
    set(PGICUDAINC_OT=$PGICUDAROOT/include_acc/OT_14);

switch -preinclude is
    help(Preinclude CUDA header files)
    set(PREINC=cuda_runtime.h $(PFY)_cuda_runtime.h);

switch -prepro is
    help(Run preprocessor)
    set(CUPREPRO=1);
switch -noprepro is
    help(Do not run preprocessor)
    set(CUPREPRO=0);
variable INFILE is default(0);
switch -infile is
    help(Run from a file)
    set(INFILE=1);

variable SMCAP is default(sm_$COMPUTECAP);
variable COMPCAP is default(compute_$CCOMPUTECAP);

variable CUBINFILE is default();
variable CUPTXFILE is default();

variable NVVMARGS is default();
tool tpgnvvm is program($TNVVM) directory($DRIVERDIR);
tool tnvvm is program(cicc) directory($NVVMDIR);
tool tptx is program(ptxas) directory($if($USENODEFAULTCUDA,$if($PTXAS_PATH_ENV,$PTXAS_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
variable LLVMAS is environment(LLVMAS);
variable LLVMVER is default(3.1);
variable LLCDIR is default($PGICUDAROOT/share/llvm/bin);
tool llvmasllc is
  program(llvm-as)
  directory($if($LLVMAS,$LLVMAS,$LLCDIR));

tool llvmas is
  program(llvm-as)
  directory($if($LLVMAS,$LLVMAS,$LLCDIR));

tool nvvmcc is
  program(nvvmcc)
  directory($NVVMCCDIR);

tool tllc is
  program(llc)
  directory($CDRIVERDIR/llvmptx);

tool tllvmlink is
  program(llvm-link)
  directory($CDRIVERDIR/tools/$NVVM);

tool tllvmas is
  program(llvm-as)
  directory($CDRIVERDIR/tools/$NVVM);

tool tllc38 is
  program(llc)
  directory($LLCDIR);

tool tllvmlink38 is
  program(llvm-link)
  directory($LLCDIR);

tool tllvmopt38 is
  program(opt)
  directory($LLCDIR);

tool tfatbin is
  program(fatbinary) directory($if($USENODEFAULTCUDA,$if($FATBINARY_PATH_ENV,$FATBINARY_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
tool tnvlink is
  program(nvlink) directory($if($USENODEFAULTCUDA,$if($NVLINK_PATH_ENV,$NVLINK_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
tool timport is
  program($TIMPORT) directory($DRIVERDIR);
tool as is
  program($AS) directory($PATH $LLCDIR $CDRIVERDIR/tools);
variable AS is
  default(as);
variable EXTRAASARGS is
  default();

# Tool to generate FPIFP index file
tool tgenfpifpidx is
  program(genfpifpidx) directory($DRIVERDIR);

suffix ptx is
  string(ptx);

# Tools for converting `.gpu` file (text LLVM IR) to `.bc` (binary LLVM IR)
# variable USEGPUBC is default($if($expr($CUDAXXYY>=12080),1,0));
variable USEGPUBC is default(0);
variable KEEPGPUBC is default(0);
variable NVVMLINK is default(llvm-link);
variable USEOLDNVVMD is default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),1,0));
variable DEBUGLIBNVVM is default(0);
variable OUTRUNTIMEBC is default();
tool tnvvmlink is program($NVVMLINK) directory($DRIVERDIR/$NVVMCGVER);
variable USENODEFAULTCUDA is default(0);

suffix other is
  default;

variable INLINE is default(1);
variable MAXRCOUNT is default(0);
variable MAXNVVMTHREADS is default(-1);

switch -regs=n is
  help(Set maximum register count to use)
  set(MAXRCOUNT=$n);

switch -split-compile=n is
  help(Set maximum thread count to use during libNVVM compilation)
  set(MAXNVVMTHREADS=$n);

switch -inline is set(INLINE=1);
switch -noinline is set(INLINE=0);
switch -dp is hide ;

variable HIGHESTSUPPORTEDSYSCAP is default($last($SYSCAP));
variable NVHPCRUNTIME is default($(PFY)_cuda_runtime$if($expr(($COMPUTECAP>=60)&&($COMPUTECAP<70)),_cc60, $if($expr($COMPUTECAP>=70), _cc70)).bc);
variable NVHPCCUDACPPBUILTINS is default($(PFY)_cuda_cpp_builtins$if($expr($COMPUTECAP>=70),_cc70).bc);
variable NVHPCRUNTIMEBUILTINS is default($(PFY)_cuda_runtime_builtins_cc$if($expr($COMPUTECAP>$HIGHESTSUPPORTEDSYSCAP),$HIGHESTSUPPORTEDSYSCAP,$COMPUTECAP).bc);
variable F18CUDABCRUNTIME is default(libFortranRuntime$if($expr($COMPUTECAP<60), _cc50, $if($expr($COMPUTECAP<70), _cc60, $if($expr($COMPUTECAP<80), _cc70, $if($expr($COMPUTECAP<90), _cc80, _cc90)))).bc);

variable RELOC is default(0);
variable DOLINK is default(0);

switch -reloc is
  help(Generate relocatable code)
  set(RELOC=1);

switch -rcomputecap=n is
  set(COMPUTECAP=$n)
  set(CCOMPUTECAP=$if($expr($n<70),$n,60));

switch -noflags is
  help("Don't pass -opt -arch -ftz -prec-sqrt -prec-div -fma to nvvm")
  set(NVVMNOFLAGS=1);

switch -computecap=n is
  set(COMPUTECAP=$n)  # sets CCOMPUTECAP as well
  set(ISNVVMSOLID=$if($expr($COMPUTECAP>=100),1,0));

switch -cc$n is
  set(COMPUTECAP=$n)
  set(CCOMPUTECAP=$if($expr($n<70),$n,60));

switch -dlto $file is
    set(LTOFLAGS=-gen-lto-and-llc -olto $file)
    set(NEEDCUDALTO=1);

switch -llto is
    set(NEEDCUDALTO=1)
    set(LINKCUDALTO=1);

switch -nordc is
    append(F901ARGS=-x 68 0x200)
    set(NORDC=1);

switch -maxrregcount n is
    set(MAXRREGCOUNT=$n);

variable CUDAARCH is default($(CCOMPUTECAP)0);

variable CFEDEBUG is default();
variable FATDEBUG is default();
# Add user bc lib if any was set through env var.
variable USERBCFILE is environment(NVCOMPILER_USERBCFILE);

command prepro90 is
  help(Include preprocessor for CUDA >= 9.0)
  tool(cuprepro)
  suffix(other)
  set(outp1=$if($KEEPTEMP,$basename($input).ii,$tempfile(ii)))
  stdout($if($index($TARGET,win64,win64-llvm,win64edg-llvm),$outp1))
  stderr($WINNUL)
  arguments($CUPREPRO1ARGS $input
    -D__CUDA_ARCH__=$CUDAARCH -D__CUDA_VER__=$CUDAXXYY -D$PGIM
    $foreach(i,$INC, -I$i)
    $foreach(i,$STDINC $if($PSDK,$PSDK/include) $if($PGC_OT_REL,$WSDK/Include/$WSDKMORE/shared $WSDK/Include/$WSDKMORE/ucrt $WSDK/Include/$WSDKMORE/um, $if($WSDK,$WSDK/Include/shared $WSDK/Include/um,$if($PSDK,$PSDK/PlatformSDK/include))), -I$i)
    $foreach(d,$STDDEF, -D$d)
    $foreach(d,$HOSTDEF, -D$d)
    $foreach(d,$CUSTDDEF1, -D$d)
    $foreach(u,$CUSTUDEF1, -U$u)
    $foreach(i,$PREINC, -include $i)
    -DV$CUDAVER
    $DEF
    $if($expr($NVGNUVERSION>=110000),--std=c++17,
    $if($land($expr($NVGNUVERSION >= 60000),$expr($NVGNUVERSION < 110000)),--std=c++14,
    $if($expr($NVGNUVERSION < 60000),--std=c++11)))
    $ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),-o $outp1)
    )
  execute(
      input($outp1)
      command(nvvm)
  );

command cstartllc is
  help(LLVM assembler)
  tool(llvmasllc)
  suffix(other)
  set(USEPGILL=1)
  set(bcfile=$if($KEEPTEMP,$basename($input).bc,$tempfile(bc)))
  arguments(-o=$bcfile $input)
  execute(
      input($bcfile)
      command(cpreplibs) #$if($USEPGILL,cpreplibs,cllvmlink38))
  );

command cpreplibs is
  help(Prepping PGI LLVM libs)
  tool(llvmas)
  execute(
    input($bcfile)
    command(cpreplibs2)
  );

variable DISABLE_INTERNALIZE is environment(DISABLE_INTERNALIZE);

command cpreplibs2 is
  help(Prepping PGI LLVM libs 2)
  tool(llvmas)
  set(pgill=$PGICUDAROOT/lib/llc/$(PFY)_cuda_runtime.ll)
  set(nvhpc_cuda = $tempfile(bc))
  arguments($pgill -o $nvhpc_cuda)
  execute(
    input($bcfile)
    command($if($DISABLE_INTERNALIZE,cprelink,cllvmlink38))
  );

command cprelink is
  help(Linking before internalize)
  tool(tllvmlink38)
  arguments($DEVICE_BITCODE $nvhpc_cuda -o prelinked.bc)
  execute(
    input($bcfile)
    command(cpreinternalize)
  );

command cpreinternalize is
  help(Internalizing CUDA runtime)
  tool(tllvmopt38)
  arguments(prelinked.bc -passes=internalize,always-inline -o internalized.bc)
  execute(
    input($bcfile)
    command(cllvmlink38)
  );

command cllvmlink38 is
  help(LLVM linker)
  tool(tllvmlink38)
  set(linkedbcfile=$bcfile.linked.bc)
  arguments($bcfile $if($DISABLE_INTERNALIZE,internalized.bc,$DEVICE_BITCODE $nvhpc_cuda) -o $linkedbcfile)
  execute(
      input($bcfile)
      command(cllvmopt38)
  );

variable INTERNALIZE_EXCLUDES is environment(INTERNALIZE_EXCLUDES);

command cllvmopt38 is
  help(LLVM opt)
  tool(tllvmopt38)
  set(linkedbcfile=$bcfile.linked.bc)
  set(optbcfile=$bcfile.opt.bc)
  arguments($if($DISABLE_INTERNALIZE,, "-passes=internalize,always-inline,function(nvvm-reflect)" -internalize-public-api-list=kernel$INTERNALIZE_EXCLUDES) $linkedbcfile -o $optbcfile)
  execute(
      input($bcfile)
      command(cllc38)
  );

command cllc38 is
  help(LLC LLVM to PTX)
  tool(tllc38)
  set(optbcfile=$bcfile.opt.bc)
  set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
  arguments(-mcpu=sm_$CCOMPUTECAP $optbcfile -O3 -o $out3 -march=nvptx64 -mattr=+ptx42)
  execute(
      input($out3)
      command(ptx)
  );

command nvvm is
  help(NVidia GPU compiler)
  tool(tnvvm)
  suffix(other)
  set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
  arguments(
  -arch $COMPCAP -m$CUWIDTH -ftz=$FTZ -prec_div=$NOFASTMATH -prec_sqrt=$NOFASTMATH -fmad=$USEFMA
  $ifn($NOFASTMATH,-fast-math)
  $if($RELOC,--device-c)
  $NVVMARGS -O$CUOPT $input -o $out3
  $ifn($index($CUDAVERSION,7.5,8.0),-w)
  $if($DEVICE_BITCODE,-nvvmir-library $DEVICE_BITCODE)
  $if($expr($NVGNUVERSION>=100000),--gnu_version=$NVGNUVERSION)
  $if($index($TARGET,win64,win64-llvm,win64edg-llvm),--c++14,
  $if($expr($NVGNUVERSION>=110000),--c++17,
  $if($land($expr($NVGNUVERSION >= 60000),$expr($NVGNUVERSION < 110000)),--c++14,
  $if($expr($NVGNUVERSION < 60000),--c++11))))
    )
  execute(
      input($out3)
      command(ptx)
  );

command cllvmas is
  help(LLVM assembler)
  tool(llvmas)
  suffix(other)
  set(bcfile=$if($KEEPTEMP,$basename($input).bc,$tempfile(bc)))
  arguments(-o=$bcfile $input)
  execute(
      input($bcfile)
      command(cnvvmcc)
  );

command cnvvmcc is
  help(NVidia LLVM GPU compiler)
  tool(nvvmcc)
  set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
  arguments(-m$CUWIDTH $input -o $out3)
  execute(
      input($out3)
      command(ptx)
  );

command cllc is
  help(LLVM-PTX compiler)
  tool(tllc)
  set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
  arguments(-march=nvptx64 -mattr=ptx30 -mcpu=$SMCAP -o=$out3 $input)
  execute(
      input($out3)
      command(ptx)
  );

command cpgnvvmdbg is
  help(LLVM-PTX compiler)
  tool(tllvmlink)
  set(out3=$if($KEEPTEMP,$basename($input).ll,$tempfile(ll)))
  set(LLFILEPRELINKED=1)
  set(NVVMOPTLEVEL=0)
  arguments($DEVICE_BITCODE $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$NVHPCRUNTIME $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$NVHPCRUNTIMEBUILTINS $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR_BC/$(PFY)_nvshmem_runtime.bc $input -S -o $out3)
  execute(
      input($out3)
      command(cpgnvvm)
  );

command cnvvmas is
  help(Convert text based (.gpu) to binary (.bc) LLVM IR )
  tool(tllvmas)
  set(out3=$if($KEEPGPUBC,$basename($first($input)).bc,$tempfile(bc)))
  echo($if($DEBUGLIBNVVM,"########## Debug commands for libNVVM - Start ##########"))
  arguments($input -o $out3)
  execute(
      input($out3)
      command(cpgnvvm)
  );

command cllvmlink is
    help(LLVM Linker)
    tool(tnvvmlink)
    set(OUTRUNTIMEBC=$if($KEEPGPUBC,nvhpc_runtime.bc,$tempfile(bc)))
    arguments(
        -o $OUTRUNTIMEBC
        $DEVICE_BITCODE_FILES
    )
    execute(
        input($input)
        command(cnvvmas)
    );

command cpgnvvm is
  help(NVVM LLVM-PTX compiler)
  suffix(other)
  tool($ifn($DEBUGLIBNVVM,tpgnvvm,tnvvm))
  set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
  set(extra_llfiles=$DEVICE_BITCODE_FILES)
  arguments(
      $if($DEBUGLIBNVVM,-libnvvm)
      $if($VERBOSE,-v)
      $if($NVVMVERIFY,$ifn($DEBUGLIBNVVM,-nvvmverify,))
      $ifn($DEBUGLIBNVVM,-ptx $out3,-o $out3)
      $input
      $if($DEBUGLIBNVVM,-nvvmir-library $OUTRUNTIMEBC)
      # The `-gpubc` flag separates (used only with old nvvmd tool)
      # the bitcode files from the user code so that we can process
      # those with the `nvvmLazyAddModuleToProgram` function from
      # libNVVM.
      $ifn($DEBUGLIBNVVM,$ifn($LLFILEPRELINKED,$if($USEOLDNVVMD,-gpubc) $extra_llfiles))
      $ifn($NVVMNOFLAGS,-opt=$NVVMOPTLEVEL $NVVMDEBUG -arch=$COMPCAP -ftz=$FTZ -prec-div=$NOFASTMATH -prec-sqrt=$NOFASTMATH -fma=$USEFMA $NVVMLINEINFO $if($notequal($MAXNVVMTHREADS,-1),-split-compile=$MAXNVVMTHREADS) $if($land($lor($USENVVMNEXT,$USENVVMDEV),$notequal($NVVMVEROPT,)),-nvvm-version=$NVVMVEROPT) $if($NEEDCUDALTO, $LTOFLAGS))
      $ifn($DEBUGLIBNVVM,$ifn($equal($NVVMFLAGS,),$if($USEOLDNVVMD,-flags) $NVVMFLAGS $if($USEOLDNVVMD,-endflags)))
  )
  execute(
      input($out3)
      command(ptx)
  );

command ptx is
  help(PTX Assembler)
  tool(tptx)
  suffix(ptx)
  set(out4=$if($CUBINFILE,$CUBINFILE,$basename($file).cubin))
  stderr($PTXINFO)
  arguments(-arch=$SMCAP
    -fmad $if($USEFMA,true,false)
    $if($CACHEL12,-dlcm=ca,$if($CACHEL2,-dlcm=cg))
    $ifn($NOPTXCU,-m$CUWIDTH)
    $if($DEBUG1,--dont-merge-basicblocks)
    $if($DEBUG2,-g)
    $if($DEBUG3,--return-at-end)
    $ifn($DEBUG2,-O$PTXOPT)
    $if($LINEINFO,--generate-line-info)
    $if($notequal($MAXRCOUNT,0),-maxrregcount=$MAXRCOUNT) -o $out4
    $if($RELOC,--compile-only)
    $if($DOPTXINFO,-v) $input
    $PTXASARGS
    )
    echo($if($DEBUGLIBNVVM,"########## Debug commands for libNVVM - End ##########"));

variable FATNAME is
  help(Identifier for the fat binary)
  default();

variable FATARGS is
  help(Fat binary arguments)
  default();

variable FATKEY is
  help(Fat binary key)
  default();

variable FATOBJ is
  help(Build object file containing fat binary)
  default();

command fatbin is
  help(Create fat binary)
  tool(tfatbin)
  set(out=$if($FATOBJ,$if($or($KEEPTEMP,$INFILE,$GPUFILE),$if($CUBINFILE,$basename($CUBINFILE).fatbin,nvfat.fatbin),$tempfile(fatbin)),$if($CUBINFILE,$CUBINFILE,$tempfile(fatbin))))
  arguments(
    $if($RELOC,$if($expr($CUDAXY<101),--cuda) --device-c)
    -$CUWIDTH
    --create=$out
    $if($LINKCUDALTO,--ident=\"lto\",--ident=$FATNAME)
    $if($or($and($DEBUG,$LINEINFO),$NEEDCUDALTO),--cmdline=\"--compile-only\" $if($LINKCUDALTO,-link))
    $FATARGS
    $if($and($DEBUG,$LINEINFO),
        --cmdline=" -g --dont-merge-basicblocks --return-at-end --generate-line-info " -g,
        $if($DEBUG,--cmdline=" -g --dont-merge-basicblocks --return-at-end" -g,$if($LINEINFO,--cmdline=" --generate-line-info "))
    )
    $FATDEBUG
  )
  execute(
      condition($and($FATOBJ,$not($INFILE)))
      input($out)
      command(import)
  );

variable IMPARGS is default();

variable DYNINIT is default(0);

switch -dyninit is set(DYNINIT=1);

variable GPUFILE is default(0);

switch -gpufile is set(GPUFILE=1);

# list of routine name suffixes (with a __pgi_uacc_set_ prefix)
# that should be called at .init time
variable ACCINITS is default();

switch -init=routine is
  append(ACCINITS=$routine);

switch -cudalink is
  set(CUDALINK=1);

switch -unifiedmem is
  set(UNIFIEDMEM=1);

command import is
  help(Import fat binary to assembly file)
  tool(timport)
  set(in=$ifn($equal($input,),$input,import))
  set(out=$if($KEEPTEMP,$in.s,$tempfile(s)))
  arguments($out $input $if($DEVICEPARTIALLINKING,__nv_relfatbin,.nv_fatbin)
    $IMPORTARGS
    -cudaver $CUDAXXYY
    $ifn($DEVICEPARTIALLINKING,
      $if($equal($RTBUILD,),
        $if($smfiles,-var __NV_CUDA_LOC -ccname __NV_CUDA_CAP $IMPARGS -linkinit),
        -var $RTBUILD)
      $ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),$if($not($NORDC),-slot 4 __pgi_fatbin_start -slot 5 __pgi_fatbin_end))
      $if($NEEDFPIFP,-slot 6 __nvhpc_nvuft_start -slot 7 __nvhpc_nvuft_end -slot 8 __nvhpc_nvudt_start -slot 9 __nvhpc_nvudt_end))
    $if($DYNINIT,-dyninit) $if($GPUFILE,-gpufile)
    $foreach(init,$ACCINITS, -init __pgi_uacc_set_$init) $if($CUDALINK,-cudalink) $if($UNIFIEDMEM,-unifiedmem))
  execute(
      input($out)
      command(fatas)
  );

variable COMPBIN is default($DRIVERDIR);
variable CCOMPDIR is    # set by -Y0
  help(Directory containing the C compiler)
  default($COMPBIN);

variable WINCONV is default(nvdd);  # dummy name
tool winconv is
  program($if($index($TARGET,win64,win64-llvm,win64edg-llvm),$WINCONV))
  directory($CCOMPDIR);

command conv is
  help(Convert fat object to PECOFF)
  tool(winconv)
  set(out=$if($CUBINFILE,$CUBINFILE,$tempfile(o)))
  arguments($input $out -dcuda $USECUDAROOT -cuda$CUDAXXYY $if($VERBOSE,-v))
  ;

command fatas is
  help(Assemble fat binary to object file)
  tool(as)
  set(out=$if($CUBINFILE,$CUBINFILE,$tempfile(o)))
  append(EXTRAASARGS=$if($index($TARGET,win64-llvm,win64edg-llvm),-filetype=obj))
  # Order matters here on windows: $input MUST come before the flags.
  arguments(
      $input $EXTRAASARGS -o $out
  )
  # TODO: do we need to convert to PECOFF on windows with the new llvm-mc?
# execute(
#     condition($index($TARGET,win64,win64-llvm,win64edg-llvm))
#     input($out)
#     command(conv)
# )
  ;

variable LDLIBS is default();

command cgenfpifpidx is
    help(Generate FPIFP index file expected by nvlink)
    tool(tgenfpifpidx)
    # Add `-debug` for verbose output and generate human readabe index file
    arguments(-i $FPIFPFPRELINKOBJFILE -o $FPIFPFINDEXFILE);

command cnvlink is
  help(Link device objects)
  tool(tnvlink)
  set(out=$if($CUBINFILE,$CUBINFILE,$basename($first($files)).cubin))
  set(reg=$if($DOREG,$if($REGFILE,$REGFILE,$dirname($out)/$basename($out).reg.c)))
  # The NVOMP RT is built with `--maxrregcount 128`, force the same at link time
  arguments(
    $NVLINKARGS
    $if($DEVICEPARTIALLINKING,-r)
    --arch=$ifn($PTXLINK,$SMCAP,$COMPCAP)
    -m$CUWIDTH
    $if($NEEDCUDALTO,-lto -nvvmpath=$NVVMBASEDIR $if($MAXRREGCOUNT,--maxrregcount=$MAXRREGCOUNT) -L$CUDALIB -lcudadevrt)
    $if($ALLOWUNDEFGBLS,-allow-undefined-globals)
    $LDLIBS
    $input
    $if($DOREG,--register-link-binaries=$reg)
    $if($and($NEEDFPIFP,$isfile($FPIFPFINDEXFILE)),-uidx $FPIFPFINDEXFILE)
    -o $out
  )
  execute(
      condition($and($DOREG, $not($DEVICEPARTIALLINKING)))
      input($reg)
      command(compreg)
  );

variable OUTFLAG is default(-o);
command compreg is
  help(Compile the registration routines)
  tool(cuprepro)
  set(out=$if($REGOBJ,$REGOBJ,$basename($first($files)).$(OBJSUFFIX)))
  tempfile($ifn($REGFILE,$input))
  stdout($WINNUL)
  arguments(
    $COMPARGS
    $COMPREGARGS
    -c -I.
    $OUTFLAG $out
    -DREGFILE=$quote(\")$quote($input)$quote(\")
    -D__CUDA_VER__=$CUDAXXYY 
    $ifn($PARTIALLINK,$PGICUDAINCDIR/linkstub.c)
    $if($USECLANG,-stdlib=libstdc++)
    $if($DYNINIT,$FPICFLAG));

# Show phase does -show
phase Show is hide
  action(helpvariable helpphase)
  forall
  help(Show variables);

switch -show is #not
  enable(Show)
  helpgroup(overall)
  stopafter(Show)
  help(Show compiler configuration);

phase Build is
  help(Build binary)
  default
  execute(
      input($files)
      command($if($USELLC,cstartllc,$if($USENVVM,$ifn($DEBUGLIBNVVM,$if($USEGPUBC,cnvvmas,cpgnvvm),cllvmlink),$if($USELL,cllvmas,$if($CUPREPRO,prepro90,nvvm)))) ptx)
  );

phase Fat is
  help(Build fat binary)
  forall
  execute(
      noinput(1)
      #input($if($expr($smfiles=1),$SMFILE))
      command(fatbin) #$if($expr($smfiles>1),fatbin,import)
  );

phase Link is
  help(Link relocatable objects)
  forall
  execute( input($files) command(cnvlink) );

phase Fpifp is
  help(Generate FPIFP index file expected by nvlink)
  forall
  execute( noinput(1) command(cgenfpifpidx) );

switch -fat=name is
  set(FATNAME=$name)
  disable(Build)
  enable(Fat);

switch -fatobj=name is
  set(FATNAME=$name)
  set(FATOBJ=1)
  disable(Build)
  enable(Fat);

switch -dolink is
  set(DOLINK=1)
  disable(Build)
  enable(Link);

variable PTXLINK is default();
switch -linkptx is
  set(PTXLINK=YES);

switch -regfile=file is
  set(DOREG=1)
  set(REGFILE=$file);
switch -regobj=file is
  set(DOREG=1)
  set(REGOBJ=$file);

switch -L$libdir is
  help(Passed to linker; Add directory to library search path)
  append(LDLIBS=-L$path($libdir));

switch -l$lib is
  help(Passed to linker; Add library name to library search list)
  positional(cnvlink);

switch -key=key is
  set(FATKEY=$key);

# when building runtime components, special handling
variable RTBUILD is default();
switch -rtbuildvar=name is set(RTBUILD=$name);

variable SMFILE is default();
variable smfiles is default(0);

switch -sm sm file is
  add(smfiles=1)
  set(SMFILE=$file)
  append(FATARGS=--image3=kind=elf,sm=$sm,file=$file $if($and($NEEDFPIFP,$not($NORDC),$expr($CUDAXXYY>=12050),$isfile($FPIFPFPRELINKOBJFILE),$isfile($FPIFPFINDEXFILE)),--image2=kind=reloc\,file=$FPIFPFPRELINKOBJFILE --image2=kind=index\,file=$FPIFPFINDEXFILE))
  append(IMPARGS=-cc$sm);

switch -compute cc file is
  append(FATARGS=--image3=kind=ptx,sm=$cc,file=$file);

switch -lto sm file is
    set(NEEDCUDALTO=1)
    set(CICCARGS=-ftz=$FTZ -prec_div=$NOFASTMATH -prec_sqrt=$NOFASTMATH -fmad=$USEFMA)
    append(FATARGS=--image3=kind=nvvm,sm=$sm,file=$file --cicc-cmdline=\"$quote($CICCARGS)\");

switch -v is
  help(Display each command as it is run)
  set(VERBOSE=2);

variable VERBOSE is   # set by -#, -v
  help(Display each command as it is executed)
  default();

switch -bin $file is hide
  help(Name output file)
  set(CUBINFILE=$file);

switch -o $file is
  help(Name output file)
  set(CUBINFILE=$file);

switch -ptx $file is
  help(Name PTX file)
  set(CUPTXFILE=$file);

switch -cuda$ver is
  help(Set cuda version)
  set(CUDAVERSION=$substr($ver,0,-4).$substr($ver,-2,-2));

switch -I$incdir is
  help(Add directory to include file search path)
  append(INC=$incdir);

switch -D$macro is
  help(Define a preprocessor macro)
  append(DEF=-D$macro);

switch -fastmath is
  help(Use faster, less accurate math intrinsic functions)
  set(NOFASTMATH=0)
  set(FASTMATHMACRO=-D__USE_FAST_MATH__=1);

switch -fpifp is
  help(Enable FPIFP support)
  set(NEEDFPIFP=1);

switch -fpifpprelinkobj file is
  help(Pass FPIFP prelinked object file to generate index file)
  enable(Fpifp)
  set(FPIFPFPRELINKOBJFILE=$file);

switch -fpifpindexfile file is
  help(Enable FPIFP support)
  set(FPIFPFINDEXFILE=$file)
  set(NEEDFPIFP=1);

switch -debug is
  help(Generate debug information)
  set(GOPT=-g2)
  set(NVVMOPTLEVEL=0)
        set(CUOPT=0)
  set(DEBUG=1)
  set(NVVMDEBUG=-g)
        set(DEBUG1=$DEBUG)
  set(DEBUG2=$DEBUG)
  set(DEBUG3=$DEBUG);

switch -Wnvvm arg is
  help(Pass argument to nvvm)
  append(NVVMFLAGS=$replace($arg,",", ))
  ;

variable PTXASARGS is default();
switch -Wptxas arg is
  help(Pass argument to ptxas)
  append(PTXASARGS=$replace($arg,",", ))
  ;

variable NVLINKARGS is default();
switch -Wnvlink,arg is
  help(Pass argument to nvlink)
  append(NVLINKARGS=$replace($arg,",", ))
  ;

switch -Wfatbinary $arg is
  help(Pass argument to fatbinary)
  append(FATARGS=$replace($arg,",", ))
  ;

variable IMPORTARGS is default();
switch -Wimport,arg is
  hide
  help(Pass argument to import)
  append(IMPORTARGS=$replace($arg,",", ))
  ;

switch -cudaallowundefgbls is hide
        set(ALLOWUNDEFGBLS=1);

switch --devdebug is hide
        set(DEVDEBUG=1)
  set(CFEDEBUG=--debug_mode)
  append(NVVMARGS=-g)
  set(NVVMOPTLEVEL=0)
  set(CUOPT=0)
  set(DEBUG1=1)
  set(DEBUG2=1)
  set(DEBUG3=1)
  set(FATDEBUG=-g);

switch -lineinfo is
  help(Generate line info information)
  set(LINEINFO=1)
  set(NVVMLINEINFO=-generate-line-info);

switch -O0 is set(NVVMOPTLEVEL=0) set(CUOPT=0) set(DEFPTXOPT=0);
switch -O1 is set(NVVMOPTLEVEL=0) set(CUOPT=1);
switch -O2 is set(NVVMOPTLEVEL=3) set(CUOPT=2) set(DEFPTXOPT=3);
switch -O3 is set(NVVMOPTLEVEL=3) set(CUOPT=3) set(DEFPTXOPT=3);
switch -noopt is set(NVVMOPTLEVEL=0) set(CUOPT=0) set(DEFPTXOPT=0);

switch -ptxinfo=file is
  help(Save ptxas information)
  set(DOPTXINFO=1)
  set(PTXINFO=$file);

switch -info is
  help(Print ptxas information)
  set(DOPTXINFO=1);

switch -m64 is
  help(Use 64-bit pointers)
  set(CUWIDTH=64);

switch -dcuda=dir is
  help(Set CUDA root directory)
  set(CUDAROOT=$dir);

switch -cache is
  keyword(
  l1(set(CACHEL12=YES) set(CACHEL2=))
  l2(set(CACHEL2=YES) set(CACHEL12=))
  );

switch -nofma is
  set(USEFMA=0);

switch -ftz is
  set(FTZ=1);

switch -restrict is
  append(NVVMARGS=-kernel-params-are-retrict);

switch -usell is
  set(USELL=1);

switch  -math_uniform is
  set(DEVICE_MATHUNIFORM=1);

variable NEEDNVSHMEM is default(0);
switch -nvshmem is
  set(NEEDNVSHMEM=1);

variable NEEDCURAND is default(0);
switch -curand is
  set(NEEDCURAND=1);

switch -usenvvm is
  set(USENVVM=1);

switch -useompgpucg is
    set(USEOMPGPUCG=1)
    keyword(
        nvcc(set(USEOMPGPUCGNVCC=_nvcc))
        dbg(set(USEOMPGPUCGDBG=_debug))
    )
    nokeyword();

switch -omptarget is
  set(OMPTARGETOFFLOAD=1);

switch -ptxopt $level is
  set(PTXOPT=$level);

export LD_LIBRARY_PATH=$NVVMLINUXLIBDIR:$NEWLDLIB;
export DYLD_LIBRARY_PATH=$NVVMOSXLIBDIR:$NEWDYLDLIB;
export PATH=$if($index($TARGET,win64,win64-llvm,win64edg-llvm),$NEWWINPATH,$NEWPATH);

error($if($and($notequal($USECUDAROOT,),$not($isdir($USECUDAROOT)),$not($DRYRUN)),USECUDAROOT value is not a directory: $USECUDAROOT));

switch -f18rt is hide
        help(Use F18 CUDA runtime)
        set(USEF18CUDART=1);

switch -devicepartiallink is
    help(Enable device partial linking)
    set(DEVICEPARTIALLINKING=1);
