cocos-engine-external/sources/enoki/fwd.h

331 lines
9.8 KiB
C++

/*
enoki/fwd.h -- Preprocessor definitions and forward declarations
Enoki is a C++ template library that enables transparent vectorization
of numerical kernels using SIMD instruction sets available on current
processor architectures.
Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
All rights reserved. Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
#pragma once
#if defined(_MSC_VER)
# if !defined(_USE_MATH_DEFINES)
# define _USE_MATH_DEFINES
# endif
#endif
#include <cstddef>
#include <cstring>
#include <type_traits>
#if defined(_MSC_VER)
# define ENOKI_NOINLINE __declspec(noinline)
# define ENOKI_INLINE __forceinline
# define ENOKI_INLINE_LAMBDA
# define ENOKI_PURE
# define ENOKI_MALLOC __declspec(restrict)
# define ENOKI_MAY_ALIAS
# define ENOKI_ASSUME_ALIGNED(x, s) x
# define ENOKI_UNROLL
# define ENOKI_NOUNROLL
# define ENOKI_IVDEP __pragma(loop(ivdep))
# define ENOKI_PACK
# define ENOKI_LIKELY(x) x
# define ENOKI_UNLIKELY(x) x
# define ENOKI_REGCALL
# define ENOKI_IMPORT __declspec(dllimport)
# define ENOKI_EXPORT __declspec(dllexport)
#else
# define ENOKI_NOINLINE __attribute__ ((noinline))
# define ENOKI_INLINE __attribute__ ((always_inline)) inline
# define ENOKI_INLINE_LAMBDA __attribute__ ((always_inline))
# define ENOKI_PURE __attribute__ ((const,nothrow))
# define ENOKI_MALLOC __attribute__ ((malloc))
# define ENOKI_ASSUME_ALIGNED(x, s) __builtin_assume_aligned(x, s)
# define ENOKI_LIKELY(x) __builtin_expect(!!(x), 1)
# define ENOKI_UNLIKELY(x) __builtin_expect(!!(x), 0)
# define ENOKI_PACK __attribute__ ((packed))
# if defined(__clang__)
# define ENOKI_UNROLL _Pragma("unroll")
# define ENOKI_NOUNROLL _Pragma("nounroll")
# define ENOKI_IVDEP
# define ENOKI_MAY_ALIAS __attribute__ ((may_alias))
# define ENOKI_REGCALL __attribute__ ((regcall))
# elif defined(__INTEL_COMPILER)
# define ENOKI_MAY_ALIAS
# define ENOKI_UNROLL _Pragma("unroll")
# define ENOKI_NOUNROLL _Pragma("nounroll")
# define ENOKI_IVDEP _Pragma("ivdep")
# define ENOKI_REGCALL __attribute__ ((regcall))
# else
# define ENOKI_MAY_ALIAS __attribute__ ((may_alias))
# define ENOKI_UNROLL
# define ENOKI_NOUNROLL
# if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
# define ENOKI_IVDEP _Pragma("GCC ivdep")
# else
# define ENOKI_IVDEP
# endif
# define ENOKI_REGCALL
# endif
# define ENOKI_IMPORT
# define ENOKI_EXPORT __attribute__ ((visibility("default")))
#endif
#define ENOKI_MARK_USED(x) (void) x
#if !defined(NAMESPACE_BEGIN)
# define NAMESPACE_BEGIN(name) namespace name {
#endif
#if !defined(NAMESPACE_END)
# define NAMESPACE_END(name) }
#endif
#define ENOKI_VERSION_MAJOR 0
#define ENOKI_VERSION_MINOR 1
#define ENOKI_VERSION_PATCH 0
#define ENOKI_STRINGIFY(x) #x
#define ENOKI_TOSTRING(x) ENOKI_STRINGIFY(x)
#define ENOKI_VERSION \
(ENOKI_TOSTRING(ENOKI_VERSION_MAJOR) "." \
ENOKI_TOSTRING(ENOKI_VERSION_MINOR) "." \
ENOKI_TOSTRING(ENOKI_VERSION_PATCH))
#if defined(__clang__) && defined(__apple_build_version__)
# if __clang_major__ < 10
# error Enoki requires a very recent version of AppleClang (XCode >= 10.0)
# endif
#elif defined(__clang__)
# if __clang_major__ < 7 && !defined(EMSCRIPTEN)
# error Enoki requires a very recent version of Clang/LLVM (>= 7.0)
# endif
#elif defined(__GNUC__)
# if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 2)
# error Enoki requires a very recent version of GCC (>= 8.2)
# endif
#endif
#if defined(__x86_64__) || defined(_M_X64)
# define ENOKI_X86_64 1
#endif
#if (defined(__i386__) || defined(_M_IX86)) && !defined(ENOKI_X86_64)
# define ENOKI_X86_32 1
#endif
#if defined(__aarch64__)
# define ENOKI_ARM_64 1
#elif defined(__arm__)
# define ENOKI_ARM_32 1
#endif
#if (defined(_MSC_VER) && defined(ENOKI_X86_32)) && !defined(ENOKI_DISABLE_VECTORIZATION)
// Enoki does not support vectorization on 32-bit Windows due to various
// platform limitations (unaligned stack, calling conventions don't allow
// passing vector registers, etc.).
# define ENOKI_DISABLE_VECTORIZATION 1
#endif
# if !defined(ENOKI_DISABLE_VECTORIZATION)
# if defined(__AVX512F__)
# define ENOKI_X86_AVX512F 1
# endif
# if defined(__AVX512CD__)
# define ENOKI_X86_AVX512CD 1
# endif
# if defined(__AVX512DQ__)
# define ENOKI_X86_AVX512DQ 1
# endif
# if defined(__AVX512VL__)
# define ENOKI_X86_AVX512VL 1
# endif
# if defined(__AVX512BW__)
# define ENOKI_X86_AVX512BW 1
# endif
# if defined(__AVX512PF__)
# define ENOKI_X86_AVX512PF 1
# endif
# if defined(__AVX512ER__)
# define ENOKI_X86_AVX512ER 1
# endif
# if defined(__AVX512VBMI__)
# define ENOKI_X86_AVX512VBMI 1
# endif
# if defined(__AVX512VPOPCNTDQ__)
# define ENOKI_X86_AVX512VPOPCNTDQ 1
# endif
# if defined(__AVX2__)
# define ENOKI_X86_AVX2 1
# endif
# if defined(__FMA__)
# define ENOKI_X86_FMA 1
# endif
# if defined(__F16C__)
# define ENOKI_X86_F16C 1
# endif
# if defined(__AVX__)
# define ENOKI_X86_AVX 1
# endif
# if defined(__SSE4_2__)
# define ENOKI_X86_SSE42 1
# endif
# if defined(__ARM_NEON)
# define ENOKI_ARM_NEON
# endif
# if defined(__ARM_FEATURE_FMA)
# define ENOKI_ARM_FMA
# endif
#endif
/* Fix missing/inconsistent preprocessor flags */
#if defined(ENOKI_X86_AVX512F) && !defined(ENOKI_X86_AVX2)
# define ENOKI_X86_AVX2
#endif
#if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_F16C)
# define ENOKI_X86_F16C
#endif
#if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_FMA)
# define ENOKI_X86_FMA
#endif
#if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_AVX)
# define ENOKI_X86_AVX
#endif
#if defined(ENOKI_X86_AVX) && !defined(ENOKI_X86_SSE42)
# define ENOKI_X86_SSE42
#endif
/* The following macro is used by the test suite to detect
unimplemented methods in vectorized backends */
#if !defined(ENOKI_TRACK_SCALAR)
# define ENOKI_TRACK_SCALAR(reason)
#endif
#if defined(ENOKI_ALLOC_VERBOSE)
# define ENOKI_TRACK_ALLOC(ptr, size) \
printf("Enoki: %p: alloc(%llu)\n", (ptr), (unsigned long long) (size));
# define ENOKI_TRACK_DEALLOC(ptr, size) \
printf("Enoki: %p: dealloc(%llu)\n", (ptr), (unsigned long long) (size));
#endif
#if !defined(ENOKI_TRACK_ALLOC)
# define ENOKI_TRACK_ALLOC(ptr, size)
#endif
#if !defined(ENOKI_TRACK_DEALLOC)
# define ENOKI_TRACK_DEALLOC(ptr, size)
#endif
#define ENOKI_CHKSCALAR(reason) \
if (std::is_arithmetic_v<std::decay_t<Value>>) { \
ENOKI_TRACK_SCALAR(reason) \
}
#if !defined(ENOKI_APPROX_DEFAULT)
# define ENOKI_APPROX_DEFAULT 1
#endif
NAMESPACE_BEGIN(enoki)
using ssize_t = std::make_signed_t<size_t>;
/// Maximum hardware-supported packet size in bytes
#if defined(ENOKI_X86_AVX512F)
static constexpr size_t max_packet_size = 64;
#elif defined(ENOKI_X86_AVX)
static constexpr size_t max_packet_size = 32;
#elif defined(ENOKI_X86_SSE42) || defined(ENOKI_ARM_NEON)
static constexpr size_t max_packet_size = 16;
#else
static constexpr size_t max_packet_size = 4;
#endif
constexpr size_t array_default_size = max_packet_size / 4;
/// Base class of all arrays
template <typename Value_, typename Derived_> struct ArrayBase;
/// Base class of all statically sized arrays
template <typename Value_, size_t Size_, bool IsMask_, typename Derived_>
struct StaticArrayBase;
/// Generic array class, which broadcasts from the outer to inner dimensions
template <typename Value_, size_t Size_ = array_default_size>
struct Array;
/// Generic array class, which broadcasts from the inner to outer dimensions
template <typename Value_, size_t Size_ = array_default_size>
struct Packet;
/// Generic mask class, which broadcasts from the outer to inner dimensions
template <typename Value_, size_t Size_ = array_default_size>
struct Mask;
/// Generic mask class, which broadcasts from the inner to outer dimensions
template <typename Value_, size_t Size_ = array_default_size>
struct PacketMask;
/// Dynamically sized array
template <typename Packet_> struct DynamicArray;
template <typename Packet_> struct DynamicMask;
/// Reverse-mode autodiff array
template <typename Value> struct DiffArray;
template <typename Value_, size_t Size_>
struct Matrix;
template <typename Value_>
struct Complex;
template <typename Value_>
struct Quaternion;
/// Helper class for custom data structures
template <typename T, typename = int>
struct struct_support;
template <typename Value>
struct CUDAArray;
template <typename T> class cuda_host_allocator;
template <typename T> class cuda_managed_allocator;
extern ENOKI_IMPORT void* cuda_host_malloc(size_t);
extern ENOKI_IMPORT void cuda_host_free(void *);
/// Half-precision floating point value
struct half;
template <typename T> struct MaskBit;
namespace detail {
struct reinterpret_flag { };
}
template <typename T, bool UseIntrinsic = false, typename = int>
struct divisor;
template <typename T>
struct divisor_ext;
/// Reinterpret the binary represesentation of a data type
template<typename T, typename U> ENOKI_INLINE T memcpy_cast(const U &val) {
static_assert(sizeof(T) == sizeof(U), "memcpy_cast: sizes did not match!");
T result;
std::memcpy(&result, &val, sizeof(T));
return result;
}
NAMESPACE_END(enoki)