/*
 *     Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 *
 */

// CUDA C++ built-in functions support

#define SHUFFLE_FUNCTION_BODY(foo, mask, var, srcLane, width) \
    {                                                         \
        return foo(mask, var, srcLane, width);                \
    }

/* __shfl_sync */
__DEVICE int
_Z11__shfl_syncjiii(unsigned mask, int var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE unsigned int
_Z11__shfl_syncjjii(unsigned mask, unsigned int var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE long
_Z11__shfl_syncjlii(unsigned mask, long var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE unsigned long
_Z11__shfl_syncjmii(unsigned mask, unsigned long var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE long long
_Z11__shfl_syncjxii(unsigned mask, long long var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE unsigned long long
_Z11__shfl_syncjyii(unsigned mask, unsigned long long var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE float
_Z11__shfl_syncjfii(unsigned mask, float var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

__DEVICE double
_Z11__shfl_syncjdii(unsigned mask, double var, int srcLane, int width)
SHUFFLE_FUNCTION_BODY(__shfl_sync, mask, var, srcLane, width)

/* __shfl_up */
__DEVICE int
_Z14__shfl_up_syncjiji(unsigned mask, int var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE unsigned int
_Z14__shfl_up_syncjjji(unsigned mask, unsigned int var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE long
_Z14__shfl_up_syncjlji(unsigned mask, long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE unsigned long
_Z14__shfl_up_syncjmji(unsigned mask, unsigned long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE long long
_Z14__shfl_up_syncjxji(unsigned mask, long long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE unsigned long long
_Z14__shfl_up_syncjyji(unsigned mask, unsigned long long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE float
_Z14__shfl_up_syncjfji(unsigned mask, float var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

__DEVICE double
_Z14__shfl_up_syncjdji(unsigned mask, double var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_up_sync, mask, var, delta, width)

/* __shfl_down */
__DEVICE int
_Z16__shfl_down_syncjiji(unsigned mask, int var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE unsigned int
_Z16__shfl_down_syncjjji(unsigned mask, unsigned int var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE long
_Z16__shfl_down_syncjlji(unsigned mask, long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE unsigned long
_Z16__shfl_down_syncjmji(unsigned mask, unsigned long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE long long
_Z16__shfl_down_syncjxji(unsigned mask, long long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE unsigned long long
_Z16__shfl_down_syncjyji(unsigned mask, unsigned long long var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE float
_Z16__shfl_down_syncjfji(unsigned mask, float var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

__DEVICE double
_Z16__shfl_down_syncjdji(unsigned mask, double var, unsigned int delta, int width)
SHUFFLE_FUNCTION_BODY(__shfl_down_sync, mask, var, delta, width)

/* __shfl_xor */
__DEVICE int
_Z15__shfl_xor_syncjiii(unsigned mask, int var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE unsigned int
_Z15__shfl_xor_syncjjii(unsigned mask, unsigned int var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE long
_Z15__shfl_xor_syncjlii(unsigned mask, long var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE unsigned long
_Z15__shfl_xor_syncjmii(unsigned mask, unsigned long var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE long long
_Z15__shfl_xor_syncjxii(unsigned mask, long long var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE unsigned long long
_Z15__shfl_xor_syncjyii(unsigned mask, unsigned long long var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE float
_Z15__shfl_xor_syncjfii(unsigned mask, float var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

__DEVICE double
_Z15__shfl_xor_syncjdii(unsigned mask, double var, int laneMask, int width)
SHUFFLE_FUNCTION_BODY(__shfl_xor_sync, mask, var, laneMask, width)

/* warp vote */
__DEVICE void _Z14__barrier_syncj(unsigned id)
{
    __barrier_sync(id);
}

__DEVICE void _Z20__barrier_sync_countjj(unsigned id, unsigned cnt)
{
    __barrier_sync_count(id, cnt);
}

__DEVICE void _Z10__syncwarpj(unsigned mask)
{
    __syncwarp(mask);
}

__DEVICE int _Z10__all_syncji(unsigned mask, int predicate)
{
    return __all_sync(mask, predicate);
}

__DEVICE int _Z10__any_syncji(unsigned mask, int predicate)
{
    return __any_sync(mask, predicate);
}

__DEVICE int _Z10__uni_syncji(unsigned mask, int predicate)
{
    return __uni_sync(mask, predicate);
}

__DEVICE unsigned _Z13__ballot_syncji(unsigned mask, int predicate)
{
    return __ballot_sync(mask, predicate);
}

__DEVICE unsigned _Z12__activemaskv()
{
    return __activemask();
}

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
__DEVICE unsigned int _Z16__match_any_syncji(unsigned int mask, int value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjj(unsigned int mask, unsigned int value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjl(unsigned int mask, long value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjx(unsigned int mask, long long value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjm(unsigned int mask, unsigned long value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjy(unsigned int mask, unsigned long long value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjf(unsigned int mask, float value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_any_syncjd(unsigned int mask, double value) {
    return __match_any_sync(mask, value);
}

__DEVICE unsigned int _Z16__match_all_syncjjPi(unsigned mask, int value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjiPi(unsigned mask, unsigned value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjlPi(unsigned mask, long value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjmPi(unsigned mask, unsigned long value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjxPi(unsigned mask, long long value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjyPi(unsigned mask, unsigned long long value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjfPi(unsigned mask, float value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}

__DEVICE unsigned int _Z16__match_all_syncjdPi(unsigned mask, double value, signed char *pred) {
    return __match_all_sync(mask, value, (int *) pred);
}
#endif
