#
#     Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#

include rcfiles/cudaselectrc;

variable DETECTCUDA is default($or($ANYCUF
         ,$ANYCU,$contains($ACCELS,tesla),$CUDALIBNEEDED));
variable CUDAVERSIONDEFINE is default($if($or($CUDARTNEEDED,$CUDALIBNEEDED,$contains($ACCELS,tesla)),CUDA_VERSION=$CUDAXXYY));
variable ACCDEFINES is default($if($or($TA,$LNGACC),_ACCEL=201003 _OPENACC=201711));
variable NEEDCUDA80 is default();
variable NEEDCUDA90 is default();
variable NEEDCUDA100 is default();
variable NEEDCUDA110 is default();
variable NEEDCUDA111 is default();
variable NEEDCUDA114 is default();
variable NEEDCUDA118 is default();
variable NEEDCUDA127 is default();
variable NEEDCUDA128 is default();
variable DEFTESLAONLY is default($if($contains($SYSACCELS,tesla),1,0));
variable ACCTESLAONLY is default();
variable DEFMULTIONLY is default($equal($ACCELS,multicore));
variable ACCMULTIONLY is default();
variable ACCMULTI is default($contains($ACCELS,multicore));
# Indicates compilation for OpenACC multicore.
variable ACCMC is default($and($or($TA,$LNGACC),$ACCMULTI));
# Indicates compilation for OpenACC GPU.
variable ACCGPU is default($and($or($TA,$LNGACC),$contains($ACCELS,tesla)));
# Indicates compilation for OpenACC host.
variable ACCHOST is default($and($or($TA,$LNGACC),$contains($ACCELS,host)));
variable ACCRPATH is default();
variable MULTICORETRACE is default(1);
variable PADDFLAG is default(-x 186 0x80);
variable DEVDEBUG is default(0);
variable VERYVERBOSE is default();

# This variable is set to one when compilation for GPU is requested, which means -acc, -acc=gpu, -mp=gpu, -stdpar, -stdpar=gpu, -cuda, .cu, .cuf
variable ISCOMPFORGPU is default($or($TA,$land($LNGACC,$expr($TGTACC & $TGTGPU)),$land($LNGOMP,$expr($TGTOMP & $TGTGPU)),$land($LNGACC,$expr($TGTACC & $TGTGPU)),$CUDA,$ANYCU,$ANYCUF));

error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<8000),$NEEDCUDA80),CUDA 8.0 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<9000),$NEEDCUDA90),CUDA 9.0 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<10000),$NEEDCUDA100),CUDA 10.0 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<11000),$NEEDCUDA110),CUDA 11.0 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<11010),$NEEDCUDA111),CUDA 11.1 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<11040),$NEEDCUDA114),CUDA 11.4 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<11080),$NEEDCUDA118),CUDA 11.8 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<12070),$NEEDCUDA127),CUDA 12.7 or later required));
error($if($and($ISCOMPFORGPU,$expr($CUDAXXYY<12080),$NEEDCUDA128),CUDA 12.8 or later required));

variable DETECTEDCAPS is default($if($DETECTCUDA,$ifn($SETCAPS,$action(cudacc()))));
variable SETCAPS is default(0);

# Default compute capabilities per programming languages: OpenMP cc70, STDPAR cc60, OpenACC cc50 (like nvcc)
variable PLDEFAULTCOMPUTECAPS is default($if($land($LNGOMP,$expr($TGTOMP & $TGTGPU)),70,
                                       $if($land($or($land($LNGSPA,$expr($TGTSPA & $TGTGPU),$equal($DRIVERLANG,CPP)),$land($TGTCUDA,$equal($DRIVERLANG,CPP))),$not($land($LNGACC,$expr($TGTACC & $TGTGPU)))),60)
                                       50));

# When compiling for GPU, emit warning when using `-g` and compiling
# multiple compute capabilities that include cc90 or older (NVVM70)
# and cc100+ (NVVM SOLID). Mixing NVVM70 and NVVM SOLID is not
# supported with `-g` because they have incompatible metadata.  This
# can happen only if we can actually enable compilation for cc100,
# which is possible only with CUDA 12.7 or newer.
warning($if($land($equal($DRIVERLANG,Fortran),$DETECTCUDA,$DEBUGFLAG,$expr($CUDAXXYY>=12070),
            $lor(
                $land($SETCAPS,$ISPREBLACKWELL,$ISBLACKWELLPLUS), # multiple cc are being requested with `-gpu=ccX.Y` and "cc100 or cc101 or cc120" is there, e.g. -gpu=cc90,cc100
                $CCALL, # `-g -gpu=ccall`
                $CCALLMAJOR # `-g -gpu=ccall-major`
            )),Support for '-g' is available for cc100 and newer compute capabilities but not when combined with older compute capabilities (cc90 and older). Blackwell support will be disabled.));

warning($if($land($equal($DRIVERLANG,Fortran),$DETECTCUDA,$DEBUGFLAG,$expr($CUDAXXYY>=12070),$equal($DETECTEDCAPS,),$not($SETCAPS)), # compiling with -g on a machine with no GPU (compilation for all supported CC)
        No gpu detected\, compiling for all compute capabilities lower than cc100. Support for '-g' is available for cc100 and newer compute capabilities but not when combined with older compute capabilities (cc90 and older).));

variable DEFCOMPUTECAP is default(
    $if($notequal($DETECTEDCAPS,),$DETECTEDCAPS,
    $if($ISCCNATIVE,$PLDEFAULTCOMPUTECAPS,
    $if($and($contains($SYSCAP,35),$expr($CUDAXXYY<12000)),35)
    $if($contains($SYSCAP,50),50)
    $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),60)
    $if($and($contains($SYSCAP,60),$expr($CUDAXXYY>=8000)),61)
    $if($and($contains($SYSCAP,62),$expr($CUDAXXYY>=10000)),62)
    $if($and($contains($SYSCAP,70),$expr($CUDAXXYY>=9000)),70)
    $if($and($contains($SYSCAP,72),$expr($CUDAXXYY>=10000)),72)
    $if($and($contains($SYSCAP,75),$expr($CUDAXXYY>=10000)),75)
    $if($and($contains($SYSCAP,80),$expr($CUDAXXYY>=11000)),80)
    $if($and($contains($SYSCAP,86),$expr($CUDAXXYY>=11010)),86)
    $if($and($contains($SYSCAP,87),$expr($CUDAXXYY>=11040)),87)
    $if($and($contains($SYSCAP,89),$expr($CUDAXXYY>=11080)),89)
    $if($and($contains($SYSCAP,90),$expr($CUDAXXYY>=11080)),90)
    # NVVM70 and NVVM SOLID are incompatible at the debug metadata
    # level. When compiling with `-g` for all CC (e.g. in a machine
    # without GPU or with `-gpu=ccall) disable all archs that require
    # NVVM SOLID.
    $ifn($land($equal($DRIVERLANG,Fortran),$DEBUGFLAG),$if($land($contains($SYSCAP,100),$expr($CUDAXXYY>=12070)),100))
    $ifn($land($equal($DRIVERLANG,Fortran),$DEBUGFLAG),$if($land($contains($SYSCAP,101),$expr($CUDAXXYY>=12080)),101))
    $ifn($land($equal($DRIVERLANG,Fortran),$DEBUGFLAG),$if($land($contains($SYSCAP,120),$expr($CUDAXXYY>=12080)),120))
    )));
variable DEFCOMPUTECAPS is default($DEFCOMPUTECAP);
variable COMPUTECAP is default($DEFCOMPUTECAPS);    # default may be set by the user
variable COMPUTECAPS is default($if($land($LNGOMP,$expr($TGTOMP & $TGTGPU),$notequal($OMPCOMPUTECAPS,)),$OMPCOMPUTECAPS,$if($land($or($land($LNGSPA,$expr($TGTSPA & $TGTGPU),$equal($DRIVERLANG,CPP)),$land($TGTCUDA,$equal($DRIVERLANG,CPP))),$not($land($LNGACC,$expr($TGTACC & $TGTGPU)))),$SPACOMPUTECAPS,$COMPUTECAP)));
variable CCNEXTCOUNT is default(0);

# default value is empty; this is set by -acc=[no]required
variable ACCREQUIRED is default();
variable ACCBUILDLIB is default();

# Whether or not OpenACC routine parallelism should be inferred; this is set by -acc=[no]routinepar
variable ACCROUTINEPAR is default();

# Suppress GPU driver warnings
variable NV_SUPPRESS_GPU_FLAGS_WARNINGS is default(0) environment(NVCOMPILER_SUPPRESS_GPU_FLAGS_WARNINGS);

# OpenACC Stub Library
variable NEEDACCSTUBLIB is default(0);
variable ACCSTUBLIB is default($if($NEEDACCSTUBLIB,-laccstub$if($ISSTATIC,_static)));

variable DEFHOST is default($ifn($or($ANYCUF,$ANYCU),host));
# To change the -acc default from -acc=gpu,host to to
# -acc=gpu,multicore change the DEFHOST default value from 'host' to
# 'multicore' above
variable DEFACCEL is default($if($contains($SYSACCELS,tesla),tesla) $DEFHOST);
variable DDEFACCEL is default();
variable ACCELS is default($if($suffixused(cu,cup,cuf,CUF),tesla,$DDEFACCEL));
variable AOPT is default();
variable ACOPT is default();

variable MANAGED is default(0);
variable AUTOCOMPARE is default(0);
variable REDUNDANT is default(0);
variable F901MANPIN is default($DEFF901MANPIN);
variable DEFF901MANPIN is default();
variable CPP1MANPIN is default($DEFCPP1MANPIN);
variable DEFCPP1MANPIN is default();
variable CGMANPIN is default($DEFCGMANPIN);
variable DEFCGMANPIN is default();
variable MAYNEEDMANAGEDMEMORY is default(0);
variable NEEDMANAGEDMEMORY is default(0);
variable DEFNEEDMANPINMEMORYINTERCEPTION is default(0);
variable NEEDMANPINMEMORYINTERCEPTION is default($DEFNEEDMANPINMEMORYINTERCEPTION);
variable MANPINMEMORYINTERCEPTION is default(
    $PGISTATICX $if($notequal($PGISTATICX,),--whole-archive)
    $if($and($NVMALLOCNEEDED, $notequal($PGISTATICX,)),
        $(LIBSW)nvhpcmanaux_nvmalloc,
        $(LIBSW)nvhpcmanaux)
    $if($notequal($PGISTATICX,),--no-whole-archive));
variable MANCUDALIB is default(
    $if($NEEDMANAGEDMEMORY,
        $PGISTATICX $(LIBSW)nvhpcman $PGIUNSTATICX,
        $if($MAYNEEDMANAGEDMEMORY, $PGISTATICX $ASNEEDED $(LIBSW)nvhpcman $NOASNEEDED $PGIUNSTATICX)));
variable NEEDACCLIB is default(0);
variable TIMEINIT is default(0);
variable ZEROINIT is default(0);
variable PININIT is default(0);
variable MANINIT is default(0);
variable EXTRAINIT is default();
variable MANPREFER is default();
variable CUDA_NOATTACH is default(0);
variable NORDC is default(0);
variable CUDARTSTATIC is default($if($notequal($PGISTATICX,),_static));
variable CUDALIBSTATIC is default($if($index($TARGET,linux86-64,linuxpower,linuxarm64),$if($notequal($PGISTATICX,),_static)));
variable CULIBOSSTATIC is default(
    $if($notequal($PGISTATICX,),$(LIBSW)culibos $(LIBSW)cublas$(CUDARTSTATIC) $if($expr($CUDAXXYY>10000),$(LIBSW)cublasLt$(CUDARTSTATIC)) $(LIBSW)curand$(CUDARTSTATIC) $(LIBSW)cudaforwrapblas $if($expr($CUDAXXYY>11060),$(LIBSW)cudaforwrapblas117)));
variable CUBLASSTDLIB is default($if($and($expr($index($TARGET,linux86-64,linuxpower,linuxarm64,win64,win64-llvm) > 0),$expr($CUDAXXYY>=9020)),stdc++,c));

# The CFI acc lib should only be linked if using nvflang - and checking for USEFLANG1 is
# means of checking this aspect. Note that this just sets the string name - the inclusion
# of this is still only appended if -acc is used.
variable CFIACCLIB is default($if($USEFLANG1,-lacchostcfi));

include rcfiles/acc$PGSYS$(PGLEN)rc;
variable ACCPREFIX is default();

variable ACCLIBSUFF is default($if($land($index($TARGET,linux86-64),$STATICLIB),s));

# what accelerators we support on this target, now only Tesla
variable SYSACCELS is default( tesla );

# if -acc with gpu target (-acc=gpu or -acc -target=gpu)
variable ACCGPULINK is default($land($LNGACC,$expr($TGTACC & $TGTGPU))) help(Set if compiling OpenACC for GPU);
# if -mp and gpu target (-mp=gpu or -mp -target=gpu)
variable MPGPULINK is default($land($LNGOMP,$expr($TGTOMP & $TGTGPU))) help(Set if compiling OpenMP for GPU);

# Set to one when -no-default-cuda is used (dev-only)
variable USENODEFAULTCUDA is default(0);

# now the same targets on all platforms
variable SYSACCOBJS is default(
	$if($contains($ACCELS,tesla),cuda)
	$if($contains($ACCELS,multicore),multicore)
	$if($contains($ACCELS,host),host)
	$if($MPGPULINK,mp)
	$if($ACCGPULINK,acc)
	);
variable ACCCUDAVSNSUFF is default($if($expr($CUDAXXYY<11030),110,
                                   $if($expr($CUDAXXYY<11080),113,
                                   $if($expr($CUDAXXYY<12070),118))));
# The else case is to allow using `-gpu=managed` with `nvfortran -cuda`.
variable SYSACCLIBS is default($if($or($contains($ACCELS,tesla),$ISCUDALIB),devaux$ACCCUDAVSNSUFF device$ACCLIBSUFF,$if($land($CUDARTNEEDED,$equal($DRIVERLANG,Fortran),$NEEDMANAGEDMEMORY),devaux$ACCCUDAVSNSUFF device$ACCLIBSUFF)));

variable ACCLIB is default($if($NEEDACCLIB,
    $foreach(oo,$SYSACCOBJS, $lookup($COMPLIBOBJ,acc_init_link_$oo.$OBJSUFFIX))
    $if($land($contains($ACCELS,tesla),$CUDARTNEEDED,$NORDC),$lookup($COMPLIBOBJ,acc_init_set_cuda.$OBJSUFFIX))
    $if($and($NEEDUNIFIED,$NORDC),$lookup($COMPLIBOBJ,acc_init_set_unified.$OBJSUFFIX))
    $PGISTATIC
    $ifn($USEOTHEROMPLIB,$if($ISSTATIC,$OMPINTEROPSTART))
    $foreach(ll,
      $if($notequal($PGISTATICX,), $SYSACCLIBS) host$ACCLIBSUFF $SYSACCLIBS, 
      $(LIBSW)$(ACCPREFIX)acc$(ll)
    )
    $CFIACCLIB
    $ifn($USEOTHEROMPLIB,$if($ISSTATIC,$OMPINTEROPEND))
    $PGIUNSTATIC));

variable ACCLIB2 is default($if($NEEDACCLIB,
    $ifn($STATICLIB,$PGIUNSTATICX $DLLIB)
    $if($and($not($STATICLIB),$or($NEEDCUDANVSHMEM,$NEEDCUDACUSPARSE)),$(LIBSW_NOLIB)cuda)
    $if($index($TARGET,win64,win64-llvm),-defaultlib:ws2_32.lib))
    $if($NEEDUNIFIED, $lookup($COMPLIBOBJ,__gpu_unified_compiled.$OBJSUFFIX))
    $if($or($CUDARTNEEDED,$land($contains($ACCELS,tesla,multicore),$NEEDACCLIB)),$PGISTATICX $(LIBSW)cudadevice $PGIUNSTATICX));

variable CUDAINC is default($if($land($not($NVCCHOST),$lor($CUDARTNEEDED,$CUDALIBNEEDED,$contains($ACCELS,tesla))),
                                $if($or($TGTCUDA,$ISCUDALIB),
                                    $if($notequal($CUDAMATHINCDIR,),
                                        $if($NEEDCUFFTMP,$path($CUDAMATHINCDIR/cufftmp))
                                        $if($notequal($CUDACUPTIINCDIR,),$path($CUDACUPTIINCDIR))
                                        $path($CUDAMATHINCDIR))
                                    $if($notequal($COMMLIBSINCDIR,),$foreach(i,$COMMLIBSINCDIR,$path($i) )))
                                $if($notequal($USECUDAROOT,),$path($USECUDAROOT/include))));

set NOBOUNDSCHECK=$if($or($contains($ACCELS,tesla),$FNEEDCUDA),1,0);
warning($if($and($NOBOUNDSCHECK,$BOUNDSCHECK),CUDA Fortran or OpenACC GPU targets disables -Mbounds));

# append USRINC=$CUDAINC;

variable ACCELFLAG1 is default($foreach(a,$ACCELS,-accel $a ));
variable ACCELFLAG is default($ACCELFLAG1 $if($notequal($AOPT,),-aopt $AOPT) $if($notequal($ACOPT,),-x 63 $expr($ACOPT + 1)));
variable ACCAUTOPAR is default(1);

variable ACCLINK is default($if($DOACCLINK,$tool(acclink) $if($ISSTATIC,-static-nvidia) $NVILINKARGS $INITARGS $if($VERYVERBOSE,-vv,)));

# Partial Link Support (when we decide to make partial link the
# default behavior we should remove the line below and uncomment the
# next line)
variable NEEDPARTIALLINK is default(0);
# If partial link is enabled by default it still should be disabled
# when creatng shared object
# variable NEEDPARTIALLINK is default($if($DYNAMICLINK,0,1));

# FPIFP Link support
variable NEEDFPIFP is default(0);

variable NKEEP is default(0);

# `tesla` should be added to ACCELS when compiling CUF/CU programs
append ACCELS=$ifn($contains($ACCELS,tesla),$if($lor($ANYCUF,$ANYCU),tesla));

# Support for exposing specific symbols to dlsym in statically linked applications
variable NVHPCLDSYMSPATH is default($lookup($COMPBASE/$COMPSYS/$COMPVER/$(COMPLIBPREFIX)lib,nvhpc.syms));
variable NVHPCLDSYMS is default($if($NVHPCLDSYMSPATH,--dynamic-list=$NVHPCLDSYMSPATH));
variable NEEDNVHPCLDSYMS is default(0);

# This variable is set to 1 when `-cudaforlibs` is used, it helps
# enabling fortran wrappers when used with `-cudalib` and we are not
# linking with `nvfortran`
variable ISCUDAFORLIBS is default();

# pgnvd is used to compile the gpu code generated by the accelerator compiler backend.
# The invocation of pgnvd is done from functions 'compile_cuda_kernel'/'acc_compile'
# and its options are set up by 'cuda_build_compile_options'.
# pgnvd is also invoked by pgacclnk as part of the accelerator link step.
variable NVDD is default(pgnvd);
tool pgnvd is
    program($NVDD) directory($CCOMPDIR);

variable ACCLNK is default(pgacclnk);
tool acclink is    program($ACCLNK) directory($CCOMPDIR);

switch -Wnvvm,arg is
	help(Pass argument to nvvm)
	helpgroup(target)
	# pass to the backend, which forwards to nvdd, which forwards to nvvm
	append(ACCCGFLAGS=-wnvvm ,$arg)
	;

switch -Wptxas,arg is
	help(Specify options directly to ptxas, the PTX optimizing assembler.)
	helpgroup(target)
	# pass to the backend, which forwards to nvdd, which forwards to ptxas
	append(ACCCGFLAGS=-wptxas ,$arg)
	;

switch -Wfatbinary,arg is
	help(Pass argument to fatbinary)
	helpgroup(target)
	# pass to the backend, which forwards to nvdd, which forwards to fatbinary
	append(ACCCGFLAGS=-wfatbinary ,$arg)
	;

variable EXTRAACCLNK is default();
switch -Wacclnk,arg is
	hide
	help(Pass argument to acclnk)
	helpgroup(target)
	append(EXTRAACCLNK=$replace($arg,",", ))
	;

switch -Wnvlink,arg is
	help(Specify options directly to nvlink, the device linker.)
	helpgroup(linker)
	# pass to acclnk, which forwards on to nvlink
	append(EXTRAACCLNK=-Wnvlink,$arg)
	;

switch -Wimport,arg is
	hide
	help(Pass argument to import)
	helpgroup(target)
	# pass to acclnk, which forwards on to import
	append(EXTRAACCLNK=-Wimport,$arg)
	;

variable NVILINKARGS is default(
    $if($or($contains($ACCELS,tesla),$FNEEDCUDA,$CNEEDCUDA),
        $if($DEVDEBUG, -debug)
        -nvidia $tool(pgnvd) -cuda$CUDAXXYY
        $if($notequal($USECUDAROOT,),-cudaroot $USECUDAROOT)
        $if($or($FNEEDCUDA,$CNEEDCUDA),-cudalink)
        $if($NEEDUNIFIED,-unifiedmem)
        $if($ACCRELOC,$foreach(cc,$COMPUTECAPS, $ifn($equal($cc,next),-computecap=$cc)))
        $if($DYNAMICLINK,-dyninit)
        $if($NORDC,-nordc)
        $if($GGPUFILE,-gpufile)
        $if($NKEEP,-keep)
        $NVVMFLAGS
        $if($NEEDPARTIALLINK,-partiallink)
        $if($NEEDCUDALTO,-lto $if($expr($TGTOMP & $TGTGPU),-maxrregcount 128))
        $if($NEEDFPIFP,-fpifp)
        $if($USENODEFAULTCUDA,-nodefaultcuda)
        $EXTRAACCLNK
        $if($NEEDACCLIB,
            $if($contains($ACCELS,tesla),-init=ctxrel)
            $if($CUDA_NOATTACH,-init=noattach)
            $if($ZEROINIT,-init=zeroinit)
            $if($PININIT,-init=pinned)
            $if($MANINIT,-init=managed $if($notequal($MANPREFER,),-init=managed_prefer$MANPREFER))
            $if($and($CUDARTNEEDED,$contains($ACCELS,tesla)),-init=cuda)
            $EXTRAINIT
        )
    )
);

variable INITARGS is default($if($TIMEINIT,-init=tatime));
variable DOACCLINK is default($if($or($ACCRELOC,$NEEDACCLIB),1,0));
variable ADDCUDA is default($or($contains($ACCELS,tesla),$FNEEDCUDA,$CNEEDCUDA));

set IPANOARG=$contains($ACCELS,tesla);

variable MINBLKSPERSM is default(0);
variable MAXREGCOUNT is default(0);
variable MAXNVVMTHREADS is default(0xFFFF);
variable DEFVECTLEN is default(0);
variable DEFWORKERS is default(0);
variable STACKLIMIT is default(512);

append CPP1ARGS=$if($ACCROUTINEPAR, --accel_routinepar);

append CGARGS=$ACCELFLAG $ACCCGFLAGS $ACCCGDEF $ACCDEBUG $LLVMFLAGS
              $if($ACCAUTOPAR,-y 189 0x4000000,-x 189 0x4000000)
              $select($ACCREQUIRED,1,-x 180 6,0,-x 180 2)
              $if($ACCBUILDLIB,-x 192 0x2000000)
              $if($ADDCUDA,$if($notequal($USECUDAROOT,),-cudaroot $USECUDAROOT))
              $NVVMXBITS
              $if($equal($STACKLIMIT,),-x 205 0x1000,-x 60 $STACKLIMIT);

append F901ARGS=$ACCFEFLAGS $ACCCGDEF $ACCDEBUG $LLVMFLAGS $ACCELFLAG1;
append USRDDEF=$ACCDEF $MPDEFCPP $if($CUDAVERSIONDEFINE, -D$CUDAVERSIONDEFINE);
append USRDEFDEF=$ACCDEFDEF $if($CUDAVERSIONDEFINE, -def $CUDAVERSIONDEFINE);

variable ACCDEF is default();
variable ACCDEFDEF is default();

variable DEFAULTCAPFLAG is default($if($ADDCUDA,$if($notequal($COMPUTECAPS,),-x 176 0x100
            $foreach(c,$COMPUTECAPS,-cudacap $c ))));

variable CUDALIBNEEDED is default(0);

# -cudalib
variable NEEDCUDAALL is default(0);
set LDLIBARGS=$if($lor($CUDARTNEEDED,$CUDALIBNEEDED),
                  $if($or($TGTCUDA,$ISCUDALIB),
                      $if($notequal($CUDAMATHLIBDIR,),$(LDIRSW)$CUDAMATHLIBDIR)
                      $if($notequal($COMMLIBSLPATHDIR,),$COMMLIBSLPATHDIR))
                  $if($NEEDCUPTI,$if($notequal($CUDACUPTILIBDIR,),$(LDIRSW)$CUDACUPTILIBDIR))
                  $if($notequal($CUDALIBDIR,),$(LDIRSW)$CUDALIBDIR));
# Math Libs

# If we are using NVHPC_CUDA_HOME, we need to use the CUDA default directory
# structure, otherwise we need to use the shipped directory structure.
variable DEFAULT_CUDAMATHDIR is default($if($equal($USER_SET_CUDA_HOME,1), $USECUDAROOT, $COMPBASE/$COMPSYS/$COMPVER/math_libs/$CUDAVERSION));
variable NVCOMPILER_MATH_LIBS_HOME is default($DEFAULT_CUDAMATHDIR) environment(NVCOMPILER_MATH_LIBS_HOME);
variable CUDAMATHDIR is default($if($equal($PFX,nv),$if($CUDAVERSION, $NVCOMPILER_MATH_LIBS_HOME)));
variable CUDAMATHINCDIR is default($if($CUDAMATHDIR,$CUDAMATHDIR/include));
variable CUDAMATHBINDIR is default($if($CUDAMATHDIR,$CUDAMATHDIR/bin));
variable CUDAMATHLIBDIR is default($if($CUDAMATHDIR,$if($index($TARGET,win64,win64-llvm),$CUDAMATHDIR/lib,$CUDAMATHDIR/lib64)));

# CUPTI
variable DEFAULT_CUDACUPTIDIR is default($if($equal($USER_SET_CUDA_HOME,1), $USECUDAROOT, $COMPBASE/$COMPSYS/$COMPVER/cuda/$CUDAVERSION/extras/CUPTI));
variable NVCOMPILER_CUPTI_LIBS_HOME is default($DEFAULT_CUDACUPTIDIR) environment(NVCOMPILER_CUPTI_LIBS_HOME);
variable CUDACUPTIDIR is default($if($equal($PFX,nv),$if($CUDAVERSION, $NVCOMPILER_CUPTI_LIBS_HOME)));
variable CUDACUPTIINCDIR is default($if($CUDACUPTIDIR,$CUDACUPTIDIR/include));
variable CUDACUPTILIBDIR is default($if($CUDACUPTIDIR,$CUDACUPTIDIR/lib64));

# CUBLAS
variable NEEDCUBLAS is default(0);
variable NEEDCUDACUBLAS is default($or($NEEDCUDAALL,$NEEDCUBLAS));

# CUBLASMP
variable NEEDCUBLASMP is default(0);
variable NEEDCUDACUBLASMP is default($or($ifn($index($TARGET,linuxpower),$NEEDCUDAALL,0),$NEEDCUBLASMP));

# CUFFT
variable NEEDCUFFT is default(0);
variable NEEDCUFFTCALLBACK is default(0);
variable NEEDCUDACUFFT is default($or($NEEDCUDAALL,$NEEDCUFFT));

# CUFFTW
variable NEEDCUFFTW is default(0);
variable NEEDCUDACUFFTW is default($or($NEEDCUDAALL,$NEEDCUFFTW));

# CUFFTMP
variable NEEDCUFFTMP is default(0);
# cuFFT and cuFFTMP are not compatible together, for now when `-cudalib` is used only link cufft
# variable NEEDCUDACUFFTMP is default($or($NEEDCUDAALL,$NEEDCUFFTMP));
variable NEEDCUDACUFFTMP is default($NEEDCUFFTMP);

# CURAND
variable NEEDCURAND is default(0);
variable NEEDCUDACURAND is default($or($NEEDCUDAALL,$NEEDCURAND));

# CUSOLVER
variable NEEDCUSOLVER is default(0);
variable NEEDCUDACUSOLVER is default($or($NEEDCUDAALL,$NEEDCUSOLVER));

# CUSOLVERMP
variable NEEDCUSOLVERMP is default(0);
variable NEEDCUDACUSOLVERMP is default($or($NEEDCUDAALL,$NEEDCUSOLVERMP));

# CUSPARSE
variable NEEDCUSPARSE is default(0);
variable NEEDCUDACUSPARSE is default($or($NEEDCUDAALL,$NEEDCUSPARSE));

# CUPTI
variable NEEDCUPTI is default(0);

# CUTENSOR
variable NEEDCUTENSOR is default(0);
variable NEEDCUDACUTENSOR is default($or($NEEDCUDAALL,$NEEDCUTENSOR));

# NVBLAS
variable NEEDNVBLAS is default(0);
variable NEEDCUDANVBLAS is default($or($NEEDCUDAALL,$NEEDNVBLAS));

# NVLAMATH
variable NEEDNVLAMATH is default(0);
variable NEEDCUDANVLAMATH is default($or($NEEDCUDAALL,$NEEDNVLAMATH));
variable NVLAMATH_SFX is default("");

# NVTX3
variable NEEDNVTX3 is default(0);
variable NEEDCUDANVTX3 is default($or($NEEDCUDAALL,$NEEDNVTX3));

variable NEEDCUDAMATHPATHS is default($or($NEEDCUDACUBLAS,$NEEDCUDACUBLASMP,$NEEDCUDACUFFT,$NEEDCUDACUFFTW,$NEEDCUFFTMP,$NEEDCUDACURAND,$NEEDCUDACUSOLVER,$NEEDCUDACUSOLVERMP,$NEEDCUDACUSPARSE,$NEEDCUDACUTENSOR,$NEEDCUDANVBLAS,$NEEDCUDANVLAMATH));

# Math Libs

# NCCL Path
variable DEFAULT_CUDANCCLDIR is default($if($COMMLIBSDIR,$COMMLIBSDIR/nccl));
variable NVCOMPILER_NCCL_HOME is default($DEFAULT_CUDANCCLDIR) environment(NVCOMPILER_NCCL_HOME);
variable CUDANCCLDIR is default($NVCOMPILER_NCCL_HOME);
variable CUDANCCLINCDIR is default($if($CUDANCCLDIR,$CUDANCCLDIR/include));
variable CUDANCCLLIBDIR is default($if($CUDANCCLDIR,$CUDANCCLDIR/lib));
variable NEEDNCCL is default(0);
variable NEEDCUDANCCL is default($or($NEEDCUDAALL,$NEEDNCCL));

# NVSHMEM Path
variable DEFAULT_CUDANVSHMEMDIR is default($if($COMMLIBSDIR,$COMMLIBSDIR/nvshmem));
variable NVCOMPILER_SHMEM_HOME is default($DEFAULT_CUDANVSHMEMDIR) environment(NVCOMPILER_SHMEM_HOME);
variable CUDANVSHMEMDIR is default($NVCOMPILER_SHMEM_HOME);
variable CUDANVSHMEMINCDIR is default($CUDANVSHMEMDIR/include);
variable CUDANVSHMEMLIBDIR is default($CUDANVSHMEMDIR/lib);
variable NEEDNVSHMEM is default(0);
variable NEEDCUDANVSHMEM is default($or($if($land($index($TARGET,linuxarm64),$expr($CUDAXXYY<12020)),0,$NEEDCUDAALL),$NEEDNVSHMEM,$NEEDCUFFTMP,$if($expr($CUDAXXYY>=12000),$NEEDCUBLASMP)));

# Comm Libs Path
variable COMMLIBSINCDIR is default($if($NEEDCUDANCCL,$path($CUDANCCLINCDIR)) $if($NEEDCUDANVSHMEM,$path($CUDANVSHMEMINCDIR)));
variable COMMLIBSRPATHDIR is default($if($NEEDCUDANCCL,-rpath $CUDANCCLLIBDIR) $if($NEEDCUDANVSHMEM,-rpath $CUDANVSHMEMLIBDIR) $if($lor($NEEDCUDACUBLASMP,$NEEDCUDACUSOLVERMP),-rpath $HPCXUCCLIBDIR -rpath $HPCXUCXLIBDIR));
variable COMMLIBSLPATHDIR is default($if($NEEDCUDANCCL,$(LDIRSW)$CUDANCCLLIBDIR) $if($NEEDCUDANVSHMEM,$(LDIRSW)$CUDANVSHMEMLIBDIR));
variable COMMLIBSLDDIR is default($if($NEEDCUDANCCL,$(LPRE)$CUDANCCLLIBDIR) $if($NEEDCUDANVSHMEM,$(LPRE)$CUDANVSHMEMLIBDIR));
variable NEEDCUDACOMMPATHS is default($or($NEEDCUDANCCL,$NEEDCUDANVSHMEM));
# Comm Libs

# If at least one cuda-optimized library is needed
variable NEEDCUDALIB is default(or($NEEDCUDACUBLAS,$NEEDCUDACUFFT,$NEEDCUDACUFFTW,$NEEDCUDACURAND,$NEEDCUDACUSOLVER,$NEEDCUDACUSPARSE,$NEEDCUDACUTENSOR,$NEEDCUDANCCL,$NEEDCUDANVSHMEM,$NEEDNVLAMATH));
variable LDLIBSLIST is default();
variable MATHCUDALIBLIST is default();
variable ASNEEDED is default(--as-needed);
variable NOASNEEDED is default(--no-as-needed);
# Use linker option `--as-needed/--no-as-needed` when `-cudalib` is used with no sub-option
variable CUDAASNEEDED is default($if($NEEDCUDAALL,$ASNEEDED));
variable CUDAASNONEEDED is default($if($NEEDCUDAALL,$NOASNEEDED));
variable LIBCULIBOS is default($if($ISSTATIC,-lculibos));
variable ISCUDALIB is default();
# -cudalib

variable TOOLKITFLAG is default($if($ADDCUDA,-cudaver $CUDAXXYY));

variable CHECKCUDALIB is default($if($or($ANYCUF,$ANYCU,$contains($ACCELS,tesla)),1,0));
variable IGNORECUDALIB is default();

variable ACCFLAGS is default();
variable DEF901ACC is default($ACC901FLAGS);
variable ACC901FLAGS is default();
variable ACCFEFLAGS is default();
variable ACCCGFLAGS is default();
append ACCCGFLAGS= -x 197 $MINBLKSPERSM -x 175 $MAXREGCOUNT -x 203 $DEFVECTLEN -x 204 $DEFWORKERS -x 227 $MAXNVVMTHREADS;
variable IMPLICITSECTIONS is default(0);
variable ACCCGDEF is default(
		$if($ACCRELOC,-x 189 0x8000)	# generate CUDA RDC (relocatable device code)
		$select($ACCWAIT,0,-x 163 0x40000000 -y 163 0x80000000,1,-y 163 0xc0000000,2,-x 163 0x80000000)
			# 163 0 = default, host waits for kernel completion if not async
			# 163 0x40000000, host never waits for kernel completion, only waits for data
			# 163 0x80000000, host always waits for kernel completion, even if async
		$if($IMPLICITSECTIONS,-x 201 0xf0000000));	# enable implicit array element=>array section translation in OpenMP/OpenACC data clauses (old PGI behavior)
variable ACCWAIT is default(1);
variable ACCCACHE is default();
variable DEFACC is default($ACCFLAGS $DEFAULTCAPFLAG);
variable FORCEDBGLLVM is default(0);
variable FORCELILLVM is default(0);
variable DEFAULTLLVM is default($if($equal($TARGETARCH,64),1,0));
variable FORCELLVM is default($if($or($DEFAULTLLVM,$or($FORCELILLVM, $FORCEDBGLLVM)), 1, 0));
variable ACCMINDEBUG is default($if($DEBUGFLAG,-x 192 0x40000000));
variable DEFACCDEBUG is default($ifn($expr($CUDAXXYY=8000),$if($and($expr($OPTLEVELUSE < 1),$DEBUGFLAG),-x 163 0x800000,$ACCMINDEBUG)));
variable ACCDEBUG is default($if($equal($FORCEDBGLLVM,1),-x 163 0x800000,$DEFACCDEBUG));
variable LLVMFLAGS is default($if($equal($FORCELLVM,1),-x 189 0x10));
variable DEFDEFDEF4ACCRELOC is default($if($or($suffixused(cuf,CUF),$suffixused(cu,cup),$contains($ACCELS,tesla)),1,0));
variable DEFDEFDEFACCRELOC is default($DEFDEFDEF4ACCRELOC);
variable DEFDEFACCRELOC is default($DEFDEFDEFACCRELOC);
variable DEFACCRELOC is default($DEFDEFACCRELOC);
variable ACCRELOC is default($DEFACCRELOC);    # default is now always link
variable ACCLLVM is default();
variable GGPUFILE is default(0);
variable DEBUGFLAG_64 is default($and($DEBUGFLAG,$equal($TARGET,linux86-64)));

# OpenACC Flags
# -acc={gpu|gpu,multicore|multicore}
#         gpu                     OpenACC directives compiled for GPU execution only
#         gpu,host                (default) OpenACC directives compiled for GPU (default) or multicore CPU execution (UNTIL WE IMPLEMENT GPU,MULTICORE)
#         gpu,multicore           (default) OpenACC directives compiled for GPU (default) or multicore CPU execution (NOT READY YET)
#         multicore               OpenACC directives compiled for multicore CPU execution only

switch -acc is
    help(Enable OpenACC directives)
    helpname(-acc)
    helpgroup(target)
    set(LNGACC=1)
    set(NEEDLOCSCRIPT=1)

    # Common settings
    set(nkey=0)
    set(DEFACC=)
    set(DEF901ACC=)
    append(ACCCGFLAGS=-x 180 0x4000400 -x 121 0xc00)
    # Support for exposing `acc_get_device_type` in statically linked application, needed by LIBCUPTI
    set(NEEDNVHPCLDSYMS=1)

    # ACCDEPRECATE PGI Accelerator Directives
    append(ACCCGFLAGS=$PADDFLAG)
    append(ACCFEFLAGS=$PADDFLAG)

    set(ACCDEF=$foreach(f,$ACCDEFINES, -D$f))
    set(ACCDEFDEF=$foreach(f,$ACCDEFINES, -def $f))
    set(NEEDACCLIB=1)
    append(CPP1ARGS=--accel --preinclude openacc_predef.h)
    fatal($ifn($contains($SYSACCELS,tesla),Target accelerator -acc=gpu is not supported for $PGSYS-$PGLEN systems))
    add(nkey=1)
    max(OPTLEVELINITDEF1=2)
    # Common settings

    # GPU
    set(ACCTESLAONLY=$DEFTESLAONLY)
    set(DEFDEFDEFACCRELOC=$if($expr($TGTACC & $TGTGPU),1,$DEFDEFDEF4ACCRELOC))

    append(OPTLEVELMINLIMIT=$if($expr($TGTACC & $TGTGPU)," -acc"))
    append(OPTLEVELMINLIMIT=$if($expr($TGTACC & $TGTGPU)," -ta=acc"))

    append(ACCFEFLAGS=$if($expr($TGTACC & $TGTGPU), -x 180 0x400 -x 163 0x1 $TOOLKITFLAG $ACCCACHE))
    append(ACCFEFLAGS=$if($expr($TGTACC & $TGTGPU), -x 186 0x80000 $DEF901ACC))

    append(ACCCGFLAGS=$if($expr($TGTACC & $TGTGPU),-x 180 0x4000400 $DEFACC $ACCCACHE -x 121 0xc00 -x 194 0x40000))
    append(ACCCGFLAGS=$if($expr($TGTACC & $TGTGPU),-x 163 0x1 -x 186 0x80000 $TOOLKITFLAG))

    append(ACCELS=$if($expr($TGTACC & $TGTGPU),tesla))
    set(CUDARPATHNEEDED=$if($ISCUDARTNEEDED,1,0))
    set(CHECKCUDALIB=$if($ISCUDARTNEEDED,1,0))

    set(DDEFACCEL=$if($expr($TGTACC & $TGTGPU),$DEFACCEL))
    set(ACCFLAGS=$if($expr($TGTACC & $TGTGPU),-x 163 0x1 $TOOLKITFLAG))
    set(ACC901FLAGS=$if($expr($TGTACC & $TGTGPU),-x 163 0x1 $TOOLKITFLAG))

    # Host
    append(ACCELS=$if($expr($TGTACC & $TGTSEQ),$if($and($not($or($ANYCUF,$ANYCU,$TGTCUDA)),$expr($TGTACC & $TGTGPU)),host)))
    # Host
    # GPU

    # Multicore
    set(ACCMULTI=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),1))
    set(ACCMULTIONLY=$DEFMULTIONLY)
    set(DEFTESLAONLY=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),0,1)) # Keep value at one if Multicore was not selected
    append(OPTLEVELMINLIMIT=$if($or($TACPU,$expr($TGTACC & $TGTCPU))," -ta=multicore"))
    append(ACCFEFLAGS=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),-x 163 1 -x 186 0x80000 -x 180 0x400))
    append(ACCCGFLAGS=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),-x 163 1 -x 186 0x80000 -x 180 0x400 -x 121 0xc00))
    # Multicore

    keyword(
        gpu(
            help(OpenACC directives are compiled for GPU execution only; please refer to -gpu for target specific options)
            set(TGLACC=1)
            set(TGLACCGPU=$TGTGPU)
            set(TGTACC=$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ))
        )
        host(
            set(TGLACC=1)
            set(TGLACCSEQ=$TGTSEQ)
            set(TGTACC=$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ))
            set(THISACCTARGET=host)
            set(NEEDACCLIB=1)
            add(nkey=1)
            help(Compile for serial execution on the host CPU)
            append(ACCELS=host)
            keyword(
                time(hide
                    help(Collect simple timing information for the host version)
                    set(TIMEINIT=1)
                    set(DOACCLINK=1)
                )
            )
        )
        multicore(
            help(Compile for parallel execution on the host CPU)
            set(TGLACC=1)
            set(TGLACCCPU=$TGTCPU)
            set(TGTACC=$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ))
            keyword(
                trace(hide
                    set(MULTICORETRACE=$if($expr($TGTACC & $TGTCPU),1))
                )
                notrace(hide
                    set(MULTICORETRACE=$if($expr($TGTACC & $TGTCPU),0))
                )
                guided(hide
                    help(Use guided loop scheduling)
                    error($if($expr($TGTACC & $TGTCPU),$ifn($equal($PGLLVMTARGET,yes), guided suboption for -acc=multicore may only be used with the PGI LLVM compilers)))
                    append(ACCCGFLAGS=$if($expr($TGTACC & $TGTCPU),-x 210 0x20))
                )
                dbg(hide
                    help(Use debug version of OpenMP RT library)
                    set(OMPLIBDEBUG=_debug)
                )
                libomp(hide
                    help(Link with LLVM OpenMP library)
                    set(OMPPRESTDINC=$COMPBASE/$COMPSYS/$COMPVER/$quote($COMPINCPREFIX)include/libomp)
                    set(COMPLIBMP=$ifn($USEOTHEROMPLIB,$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),$COMPBASE/$COMPSYS/$COMPVER/$dd/mp )),$LIBOMPPATH))
                    set(STDRPATHMP=$ifn($USEOTHEROMPLIB,$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),-rpath $COMPBASE/$COMPSYS/$COMPVER/$dd/mp )),$LIBOMPRPATH))
                    set(USEOTHEROMPLIB=1)
                )
            )
        )
        stub(hide
            help(Link in the OpenACC stub library)
            set(NEEDACCSTUBLIB=1)
        )
        autopar(
            helpname([no]autopar)
            help(Enable (default) or disable loop autoparallelization within acc parallel)
            set(ACCAUTOPAR=$if($expr($TGTACC & $TGTALL),1))
        )
        noautopar(hide
            help(Disable loop autoparallelization within acc parallel)
            set(ACCAUTOPAR=$if($expr($TGTACC & $TGTALL),0))
        )
        defnone(hide
            help(Implicit default(none) on all compute construct)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 194 0x8000))
        )
        defpresent(hide
            help(Implicit default(present) on all compute construct)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 194 0x10000000))
        )
        required(hide
            help(Issue compiler error if the compute regions fail to accelerate)
            helpname([no]required)
            set(ACCREQUIRED=$if($expr($TGTACC & $TGTALL),1))
        )
        norequired(hide
            help(Generate host code if the compute regions fail to accelerate)
        )
        routinepar(
            if($not($ISFTN))
            help(Infer parallelism level in implicit routines for the device)
            helpname([no]routinepar)
            set(ACCROUTINEPAR=$if($expr($TGTACC & $TGTALL),1))
        )
        noroutinepar(hide
            if($not($ISFTN))
            help(Do not infer parallelism level in implicit routines for the device)
            set(ACCROUTINEPAR=$if($expr($TGTACC & $TGTALL),0))
        )

        routineseq(
            help(Compile every routine for the device)
            helpname([no]routineseq)
            set(ACCBUILDLIB=$if($expr($TGTACC & $TGTALL),1))
        )
        noroutineseq(hide
            help(Do not compile every routine for the device)
        )
        scalar(hide
            help(Generate scalar code for all OpenACC compute constructs)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 194 0x1000))
        )
        legacy(
            help(Suppress warnings about deprecated PGI accelerator directives)
            set(PADDFLAG=$if($expr($TGTACC & $TGTALL),))
        )
        strict(
            help(Issue warnings for non-OpenACC accelerator directives)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 186 0x100000))
            append(ACCFEFLAGS=$if($expr($TGTACC & $TGTALL),-x 186 0x100000))
        )
        verystrict(
            help(Fail with an error for any non-OpenACC accelerator directive)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 186 0x300000))
            append(ACCFEFLAGS=$if($expr($TGTACC & $TGTALL),-x 186 0x300000))
        )
        task(hide
            help(Enable async clause on multicore)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 210 0x10))
        )
        sync(
            help(Ignore async clauses)
            set(ACCWAIT=$if($expr($TGTACC & $TGTALL),2))
        )
        wait(
            helpname([no]wait)
            help(Wait for each device kernel to finish)
            set(ACCWAIT=$if($expr($TGTACC & $TGTALL),1))
        )
        nowait(hide
            help(Execute device kernels asynchronously)
            append(ACCWAIT=$if($expr($TGTACC & $TGTALL),0))
        )
        cache(hide
            set(ACCCACHE=$if($expr($TGTACC & $TGTALL),-x 163 0x100))
        )
        nocache(hide
            set(ACCCACHE=$if($expr($TGTACC & $TGTALL),-y 163 0x100))
        )
        autooffload(hide
            help(Automatically offload do concurrent and parallelized loops)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 215 0x4000))
        )
        hpsums(hide
            help(Compute sum reductions in high precision)
            append(ACCCGFLAGS=$if($expr($TGTACC & $TGTALL),-x 215 0x80000))
        )
        noldscript(hide
            help(Disable OpenACC linker script)
            set(LOCSCRIPT=0)
        )
    )
    # GPU
    append(ACCCGFLAGS=$if($expr($TGTACC & $TGTGPU),$DEFAULTCAPFLAG))
    append(ACCCGFLAGS=$if($expr($TGTACC & $TGTGPU),$if($notequal($USECUDAROOT,),-cudaroot $USECUDAROOT)))
    # GPU
    # Multicore
    append(ACCELS=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),multicore))
    append(ACCCGFLAGS=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),$if($MULTICORETRACE, -x 210 8, -y 210 8)))
    set(ACCMULTI=$if($or($TACPU,$expr($TGTACC & $TGTCPU)),1))
    # Multicore
    # OpenMP libraries are always needed
    set(OMPLIB=$if($equal($PGLLVMTARGET,yes), $if($USEOTHEROMPLIB,$LIBOMP,$NVOMPLIBS)))
    nokeyword();

switch -noacc is
    help(Disable OpenACC directives and do not link with OpenACC libraries.)
    helpname(-noacc)
    helpgroup(target)

    # Disable both OpenACC host and device
    set(LNGACC=0)
    set(TGLACC=0)
    set(TGLACCGPU=0)
    set(TGLACCCPU=0)
    set(TGLACCSEQ=0)
    set(TGTACC=$expr($TGLACCCPU | $TGLACCGPU | $TGLACCSEQ));

variable CUDA_DRIVER_VERSION is default($if($DETECTCUDA,$action(cudadriver())));
variable CUDA_DRIVER_VERSION_STR is default($if($DETECTCUDA,$remove($CUDA_DRIVER_VERSION,.)));
variable CUDA_DRIVERS_SUPPORTED is default($if($DETECTCUDA,$if($expr($CUDA_DRIVER_VERSION_STR>110),11.0 or $CUDAVERSION,$CUDAVERSION)));

variable BLANKCUDALIB is default($and($equal($CUDALIBDIR,),$equal($CHECKCUDALIB,1),$equal($IGNORECUDALIB,)));
variable BADCUDALIB is default($and($notequal($CUDALIBDIR,),$not($isdir($CUDALIBDIR)),$equal($CHECKCUDALIB,1),$equal($IGNORECUDALIB,)));
error($if($BLANKCUDALIB,A CUDA toolkit matching the current driver version ($CUDA_DRIVER_VERSION) or a supported older version ($CUDA_DRIVERS_SUPPORTED) was not installed with this HPC SDK.));
error($if($BADCUDALIB,CUDA version $CUDAXDY is not available in this installation.));
error($if($and($MANAGED,$AUTOCOMPARE),The -acc=gpu suboptions managed and autocompare are not compatible with each other));
error($if($and($KNL,$contains($ACCELS,tesla)),OpenACC for Tesla GPU targets is not supported on Knights Landing host systems));

switch -Mqqa,qflag is hide #not
    help(Pass qqa flag to compiler)
    helpgroup(overall)
    append(IPAADD=-Mqqa,$qflag)
    append(CGARGS=-qqa $replace($qflag,",", ));

append USRDDEF=$if($ACCTESLAONLY,-DPGI_TESLA_TARGET) $if($ACCMULTIONLY,-DPGI_MULTICORE_TARGET);
append USRDEFDEF=$if($ACCTESLAONLY,-def PGI_TESLA_TARGET) $if($ACCMULTIONLY,-def PGI_MULTICORE_TARGET);

append USRDDEF=$if($ACCGPU, -D__NVCOMPILER_OPENACC_GPU) $if($ACCMC, -D__NVCOMPILER_OPENACC_MULTICORE) $if($ACCHOST, -D__NVCOMPILER_OPENACC_HOST);
append USRDEFDEF=$if($ACCGPU, -def __NVCOMPILER_OPENACC_GPU) $if($ACCMC, -def __NVCOMPILER_OPENACC_MULTICORE) $if($ACCHOST, -def __NVCOMPILER_OPENACC_HOST);

variable CUDAFORLIBSUF is default($if($expr($CUDAXXYY<11030),_110,
                                  $if($expr($CUDAXXYY<11080),_113,
                                  $if($expr($CUDAXXYY<12000),_118,
                                  $if($expr($CUDAXXYY<12070),_120,_128)))));
variable CUDAFORLIB_SELECTION is default($if($lor($equal($DRIVERLANG,Fortran),$ISCUDAFORLIBS), $(LIBSW)cudafor$CUDAFORLIBSUF));
switch -cudalib is
    help(Add appropriate versions of the CUDA-optimized libraries)
    helpgroup(linker)
    set(CUDALIBNEEDED=1)
    set(ISCUDALIB=1)

    # On Windows, the acc runtime isn't built, so do not require it.
    set(NEEDACCLIB=$if($or($and($notequal($TARGET,win64),$notequal($TARGET,win64-llvm)),$equal($NEEDACCLIB,1)),1,0))

    # Math and Comm Libraries Paths (double check)
    append(LDLIBSLIST=$if($NEEDCUDACOMMPATHS,$if($notequal($COMMLIBSLDDIR,),$COMMLIBSLDDIR)) $if($NEEDCUDAMATHPATHS,$if($notequal($CUDAMATHLIBDIR,),$CUDAMATHLIBDIR)) $if($NEEDCUPTI,$if($notequal($CUDACUPTILIBDIR,),$CUDACUPTILIBDIR)) $if($notequal($CUDALIBDIR,),$(LPRE)$CUDALIBDIR))

    # CUBLASMP
    append(LDLIBSLIST=$if($land($NEEDCUDACUBLASMP,$index($TARGET,linux86-64),$expr($CUDAXXYY>11040)),$(LPRE)cublasMp$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($land($NEEDCUDACUBLASMP,$expr($CUDAXXYY>=11020)),$(LIBSW_NOLIB)cublasmp$CUDALIBSTATIC
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcublasmp$CUDALIBSTATIC)
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcal$CUDALIBSTATIC)
            $PGIUNSTATICX $(LIBSW_NOLIB)cal $PGISTATICX))

    # CUFFT
    append(LDLIBSLIST=$if($NEEDCUDACUFFT,$(LPRE)cufft$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACUFFT,
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcufft)
            $if($notequal($PGISTATICX,),$if($NEEDCUDACUFFTCALLBACK,$(LIBSW_NOLIB)cufft_static,$(LIBSW_NOLIB)cufft_static_nocallback),$(LIBSW_NOLIB)cufft)))

    # CUFFTW
    append(LDLIBSLIST=$if($NEEDCUDACUFFTW,$(LPRE)cufftw$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACUFFTW,$if($notequal($PGISTATICX,),$(LIBSW_NOLIB)cufftw_static $(LIBSW_NOLIB)cufft_static_nocallback,$(LIBSW_NOLIB)cufftw)))

    # CUFFTMP
    append(LDLIBSLIST=$if($land($NEEDCUDACUFFTMP,$lor($land($index($TARGET,linux86-64),$expr($CUDAXXYY>=11020)),$land($index($TARGET,linuxpower),$expr($CUDAXXYY>=11040)))),$(LPRE)cuFFTMp$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($land($NEEDCUDACUFFTMP,$lor($land($index($TARGET,linux86-64),$expr($CUDAXXYY>=11020)),$land($index($TARGET,linuxarm64),$expr($CUDAXXYY>=12000)))),
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcufftmp)
            $(LIBSW_NOLIB)cufftMp$CUDALIBSTATIC))

    # CURAND
    append(LDLIBSLIST=$if($NEEDCUDACURAND,$(LPRE)curand$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACURAND,$if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW)cudaforwraprand) $(LIBSW_NOLIB)curand$CUDALIBSTATIC))

    # CUSOLVER
    append(LDLIBSLIST=$if($NEEDCUDACUSOLVER,$(LPRE)cusolver$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACUSOLVER,$(LIBSW_NOLIB)cusolver$CUDALIBSTATIC $if($notequal($PGISTATICX,),$(LIBSW_NOLIB)cusparse$CUDALIBSTATIC $if($expr($CUDAXXYY>=11070),$(LIBSW_NOLIB)cusolver_lapack$CUDALIBSTATIC,$(LIBSW_NOLIB)lapack$CUDALIBSTATIC)) $if($expr($CUDAXXYY>=12000),$(LIBSW_NOLIB)nvJitLink$CUDALIBSTATIC)))

    # CUSOLVERMP
    append(LDLIBSLIST=$if($land($NEEDCUDACUSOLVERMP,$expr($CUDAXXYY>11040)),$(LPRE)cusolverMp$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($land($NEEDCUDACUSOLVERMP,$expr($CUDAXXYY>11040)),$(LIBSW_NOLIB)cusolverMp$CUDALIBSTATIC 
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcusolvermp$CUDALIBSTATIC)
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapcal$CUDALIBSTATIC)
            $if($expr($CUDAXXYY>=12000),$(LIBSW_NOLIB)nvJitLink$CUDALIBSTATIC)
            $PGIUNSTATICX $(LIBSW_NOLIB)cal $PGISTATICX))

    # CUSPARSE
    append(LDLIBSLIST=$if($NEEDCUDACUSPARSE,$(LPRE)cusparse$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACUSPARSE,
            $if($lor($ISFTN,$ISCUDAFORLIBS),
                $if($and($expr($CUDAXXYY>=11000),$expr($CUDAXXYY<12000)),$(LIBSW)cudaforwrapsparse11)
                $if($expr($CUDAXXYY>=12000),$(LIBSW)cudaforwrapsparse12))
            $(LIBSW_NOLIB)cusparse$CUDALIBSTATIC
            $if($expr($CUDAXXYY>=12000),$(LIBSW_NOLIB)nvJitLink$CUDALIBSTATIC)))

    # CUPTI
    append(LDLIBSLIST=$if($NEEDCUPTI,$(LPRE)cupti$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUPTI,$(LIBSW_NOLIB)cupti$CUDALIBSTATIC))

    # CUTENSOR
    append(LDLIBSLIST=$if($NEEDCUDACUTENSOR,$(LPRE)cutensor$CUDALIBSTATIC $(LPRE)cutensorMg$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDACUTENSOR,
            $if($lor($ISFTN,$ISCUDAFORLIBS),
            $(LIBSW)cudaforwraptensor
                $if($expr($CUDAXXYY<11080),$(LIBSW)cudaforwraptensor_113)
                $if($expr($CUDAXXYY>=11080),$(LIBSW)cudaforwraptensor_118))
            $(LIBSW_NOLIB)cutensor$CUDALIBSTATIC $(LIBSW_NOLIB)cutensorMg$CUDALIBSTATIC))

    # NVBLAS
    # There is no static version so force dynamic linking
    append(LDLIBSLIST=$if($NEEDCUDANVBLAS,$(LPRE)nvblas))
    append(MATHCUDALIBLIST=$if($NEEDCUDANVBLAS,$PGIUNSTATICX $(LIBSW_NOLIB)nvblas $PGISTATICX))

    # NCCL
    append(LDLIBSLIST=$if($NEEDCUDANCCL,$(LPRE)nccl$CUDALIBSTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDANCCL,$if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)cudaforwrapnccl) $(LIBSW_NOLIB)nccl$CUDALIBSTATIC))

    # NVSHMEM
    append(LDLIBSLIST=$if($NEEDCUDANVSHMEM,$(LPRE)nvshmem_device $PGIUNSTATICX $(LPRE)nvshmem_host -L$(CUDALIBDIR)/stubs $(LPRE)nvidia-ml $PGISTATIC))
    append(MATHCUDALIBLIST=$if($NEEDCUDANVSHMEM,$(LIBSW_NOLIB)nvhpcwrapshmem $(LIBSW_NOLIB)nvshmem_device $PGIUNSTATICX $(LIBSW_NOLIB)nvshmem_host -L$(CUDALIBDIR)/stubs $(LIBSW_NOLIB)nvidia-ml $PGISTATIC))

    # NVLAMATH
    append(LDLIBSLIST=$if($NEEDCUDANVLAMATH,$(LPRE)nvlamath $(LPRE)blas$(NVLAMATH_SFX) $(LPRE)lapack$(NVLAMATH_SFX)))
    append(MATHCUDALIBLIST=$if($NEEDCUDANVLAMATH,$(LIBSW_NOLIB)nvlamath $(LIBSW_NOLIB)blas$(NVLAMATH_SFX) $(LIBSW_NOLIB)lapack$(NVLAMATH_SFX)))

    # NVTX3
    append(MATHCUDALIBLIST=$if($NEEDCUDANVTX3,$if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW_NOLIB)nvhpcwrapnvtx)))

    # CUBLAS
    append(LDLIBSLIST=$if($NEEDCUDACUBLAS,$(LPRE)cublas$CUDALIBSTATIC))
    # Needed when `cusolver` option is requested and we are statically linking
    append(MATHCUDALIBLIST=$if($or($NEEDCUDACUBLAS,$and($or($NEEDCUDACUSOLVER,$NEEDCUDACUTENSOR),$notequal($PGISTATICX,))),$(LIBSW_NOLIB)cublas$CUDALIBSTATIC
            $ifn($contains($COMPUTECAPS,30),$if($expr($CUDAXXYY>10000),$(LIBSW_NOLIB)cublasLt$CUDALIBSTATIC))
            $if($lor($ISFTN,$ISCUDAFORLIBS),$(LIBSW)cudaforwrapblas $if($expr($CUDAXXYY>11060),$(LIBSW)cudaforwrapblas117))))

    append(MATHCUDALIBLIST=$if($or($NEEDCUDACUBLAS,$NEEDCUDACUFFT,$NEEDCUDACUFFTW),$CULIBOSSTATIC))

    # CUDA Interoperability
    set(cudaforlib=)
    # when using -static-nvidia we need to group libcudaforXY.a and libcudafor.a to preserve the ordering and resolve symbols.
    append(cudaforlib=$CUDAFORLIB_SELECTION)
    append(cudaforlib=$if($equal($DRIVERLANG,Fortran),$(LIBSW)cudafor))
    append(cudaforlib=$if($expr($CUDAXXYY>=10010),$lookup($COMPLIBOBJ,cuda_init_register_end.$OBJSUFFIX)))
    set(cudafor2lib=$if($equal($DRIVERLANG,Fortran), $(LIBSW)cudafor2, $(LIBSW)cudanvhpc))
    # CUDA Interoperability

    keyword(
        cublas(
            set(NEEDCUBLAS=1)
        )
        cublasmp(
            error($if($index($TARGET,linuxpower),The option '-cudalib=cublasmp' is supported on x86_64 and aaarch64 architectures.))
            set(NEEDCUBLAS=1)
            set(NEEDCUBLASMP=1)
        )
        cufft(
            set(NEEDCUFFT=1)
            keyword(
                callback(
                    set(NEEDCUFFTCALLBACK=1)
                )
            )
        )
        cufftw(
            set(NEEDCUFFTW=1)
        )
        cufftmp(
            set(NEEDCUFFTMP=1)
        )
        curand(
            set(NEEDCURAND=1)
            append(ACCCGFLAGS=-x 186 0x200)
        )
        cusolver(
            set(NEEDCUSOLVER=1)
        )
        cusolvermp(
            error($if($index($TARGET,linuxarm64,linuxpower),The option '-cudalib=cusolvermp' is currently supported only on x86_64 architectures.))
            set(NEEDCUSOLVERMP=1)
            set(NEEDNCCL=1)
        )
        cusparse(
            set(NEEDCUSPARSE=1)
        )
        cupti(
            set(NEEDCUPTI=1)
        )
        cutensor(
            set(NEEDCUTENSOR=1)
        )
        nvblas(
            set(NEEDNVBLAS=1)
        )
        nccl(
            set(NEEDNCCL=1)
        )
        nvshmem(
            set(NEEDNVSHMEM=1)
            append(ACCCGFLAGS=-x 186 0x100)
        )
        nvlamath(
            set(NEEDCUSOLVER=1)
            set(NEEDCUBLAS=1)
            set(NEEDCUTENSOR=1)
            set(NEEDNVLAMATH=1)
        )
        nvlamath_ilp64(
            set(NEEDCUSOLVER=1)
            set(NEEDCUBLAS=1)
            set(NEEDCUTENSOR=1)
            set(NEEDNVLAMATH=1)
            set(NVLAMATH_SFX="_ilp64")
        )
        nvtx3(
            set(NEEDNVTX3=1)
        )
    )
    nokeyword(
        set(NEEDCUDAALL=1)
    )

    append(CGARGS=$if($notequal($CUDAROOT,),-cudaroot $CUDAROOT))
    set(LCUDAFORLIB=$PGISTATICX $cudaforlib $PGIUNSTATICX)
    set(LCUDAFOR2LIB=$PGISTATICX $cudafor2lib $PGIUNSTATICX)
    set(CUDANEEDED=1)
    set(LRTLIB=$LRTLIBNAME)
    append(CGARGS=$if($equal($DRIVERLANG,Fortran),$DEFAULTCAPFLAG $TOOLKITFLAG))

    # Some math libraries need libstdc++ when statically linked, the nvshmem library is only static and it is always needed.
    append(DEFSTDLIBS=$if($or($notequal($PGISTATICX,),$NEEDCUDANVSHMEM),$(LIBSW)$CUBLASSTDLIB))
    append(CUDALIB=$if($or($and($NEEDCUDANVSHMEM,$not($NEEDACCLIB)),$NEEDCUDACUSPARSE),$(LIBSW_NOLIB)cuda))

    append(LDLIBS=$if($NEEDCUDALIB,$LDLIBSLIST))
    append(MATHCUDALIB=$if($NEEDCUDALIB,$PGISTATICX $CUDAASNEEDED $MATHCUDALIBLIST $LIBCULIBOS $(LIBSW_NOLIB)cudart$CUDALIBSTATIC $CUDAASNONEEDED $PGIUNSTATICX));

switch -cudaforlibs is
    help(Link in CUDA Fortran libraries, implies '-fortranlibs'.)
    set(ISCUDAFORLIBS=1)
    # CUDA Interoperability
    set(cudaforlib=)
    # when using -static-nvidia we need to group libcudaforXY.a and libcudafor.a to preserve the ordering and resolve symbols.
    append(cudaforlib=$CUDAFORLIB_SELECTION)
    append(cudaforlib=$(LIBSW)cudafor)
    append(cudaforlib=$if($expr($CUDAXXYY>=10010),$lookup($COMPLIBOBJ,cuda_init_register_end.$OBJSUFFIX)))
    set(cudafor2lib=$(LIBSW)cudafor2)
    # CUDA Interoperability
    append(CGARGS=$if($notequal($CUDAROOT,),-cudaroot $CUDAROOT))
    set(LCUDAFORLIB=$PGISTATICX $cudaforlib $PGIUNSTATICX)
    set(LCUDAFOR2LIB=$PGISTATICX $cudafor2lib $PGIUNSTATICX)
    set(CUDANEEDED=1)
    set(LRTLIB=$LRTLIBNAME)
    append(CGARGS=$DEFAULTCAPFLAG $TOOLKITFLAG)
    shorthand(-fortranlibs);

switch -cudalibs is hide shorthand(-cudalib);

switch -Mnoautoprivatize is hide
    help(Disable automatic privatization of arrays in nested scopes)
    append(CGARGS=-x 198 0x40000);

switch -Mautoprivatize is hide
    help(Enable automatic privatization of arrays in nested scopes)
    append(CGARGS=-y 198 0x40000);

# Local rcfile for enabling OpenACC stale data detection tool
cinclude rcfiles/acctoolsrc;

phase printcudaversion is forall
  before(NoCompile)
  set(detected=$action(cudadriver()))
  set(default=$action(cudatoolkit($COMPBASE/$CUDAMAJOR,,,$OLDESTCUDASHIPPED)))
  set(selected=$CUDAXDY)
  echo(CUDA Driver=$detected)
  echo($ifn($equal($default,$selected),Default CUDA Version=$default))
  echo(Selected CUDA Version=$selected)
  echo(CUDAXXYY=$CUDAXXYY)
  echo(CUDA Path=$USECUDAROOT);

switch -printcudaversion is hide
  help(Show CUDA driver and selected CUDA toolkit version)
  enable(printcudaversion)
  disable(NoFiles)
  enable(NoCompile)
  set(DETECTCUDA=1)
  stopafter(NoCompile);

phase printcudadir is forall
  before(NoCompile)
  set(detected=$action(cudadriver()))
  set(default=$action(cudatoolkit($COMPBASE/$CUDAMAJOR,,,$OLDESTCUDASHIPPED)))
  echo($USECUDAROOT/bin);

switch -printcudadir is hide
  help(Show selected CUDA toolkit bin directory)
  enable(printcudadir)
  disable(NoFiles)
  enable(NoCompile)
  set(DETECTCUDA=1)
  stopafter(NoCompile);

phase printmathlibsdir is forall
  before(NoCompile)
  set(detected=$action(cudadriver()))
  set(default=$action(cudatoolkit($COMPBASE/$CUDAMAJOR,,,$OLDESTCUDASHIPPED)))
  echo($CUDAMATHBINDIR);

switch -printmathlibsdir is hide
  help(Show selected CUDA toolkit math_libs bin directory)
  enable(printmathlibsdir)
  disable(NoFiles)
  enable(NoCompile)
  set(DETECTCUDA=1)
  stopafter(NoCompile);

switch -nvflangcudabclibs is hide
  help(Link F18 CUDA BC runtime libraries)
  append(CGARGS=-x 192 0x400000);

switch -nonvflangcudabclibs is hide
  help(Do not link F18 CUDA BC runtime libraries)
  append(CGARGS=-y 192 0x400000);
