/* enoki/fwd.h -- Preprocessor definitions and forward declarations Enoki is a C++ template library that enables transparent vectorization of numerical kernels using SIMD instruction sets available on current processor architectures. Copyright (c) 2019 Wenzel Jakob All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. */ #pragma once #if defined(_MSC_VER) # if !defined(_USE_MATH_DEFINES) # define _USE_MATH_DEFINES # endif #endif #include #include #include #if defined(_MSC_VER) # define ENOKI_NOINLINE __declspec(noinline) # define ENOKI_INLINE __forceinline # define ENOKI_INLINE_LAMBDA # define ENOKI_PURE # define ENOKI_MALLOC __declspec(restrict) # define ENOKI_MAY_ALIAS # define ENOKI_ASSUME_ALIGNED(x, s) x # define ENOKI_UNROLL # define ENOKI_NOUNROLL # define ENOKI_IVDEP __pragma(loop(ivdep)) # define ENOKI_PACK # define ENOKI_LIKELY(x) x # define ENOKI_UNLIKELY(x) x # define ENOKI_REGCALL # define ENOKI_IMPORT __declspec(dllimport) # define ENOKI_EXPORT __declspec(dllexport) #else # define ENOKI_NOINLINE __attribute__ ((noinline)) # define ENOKI_INLINE __attribute__ ((always_inline)) inline # define ENOKI_INLINE_LAMBDA __attribute__ ((always_inline)) # define ENOKI_PURE __attribute__ ((const,nothrow)) # define ENOKI_MALLOC __attribute__ ((malloc)) # define ENOKI_ASSUME_ALIGNED(x, s) __builtin_assume_aligned(x, s) # define ENOKI_LIKELY(x) __builtin_expect(!!(x), 1) # define ENOKI_UNLIKELY(x) __builtin_expect(!!(x), 0) # define ENOKI_PACK __attribute__ ((packed)) # if defined(__clang__) # define ENOKI_UNROLL _Pragma("unroll") # define ENOKI_NOUNROLL _Pragma("nounroll") # define ENOKI_IVDEP # define ENOKI_MAY_ALIAS __attribute__ ((may_alias)) # define ENOKI_REGCALL __attribute__ ((regcall)) # elif defined(__INTEL_COMPILER) # define ENOKI_MAY_ALIAS # define ENOKI_UNROLL _Pragma("unroll") # define ENOKI_NOUNROLL _Pragma("nounroll") # define ENOKI_IVDEP _Pragma("ivdep") # define ENOKI_REGCALL __attribute__ ((regcall)) # else # define ENOKI_MAY_ALIAS __attribute__ ((may_alias)) # define ENOKI_UNROLL # define ENOKI_NOUNROLL # if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) # define ENOKI_IVDEP _Pragma("GCC ivdep") # else # define ENOKI_IVDEP # endif # define ENOKI_REGCALL # endif # define ENOKI_IMPORT # define ENOKI_EXPORT __attribute__ ((visibility("default"))) #endif #define ENOKI_MARK_USED(x) (void) x #if !defined(NAMESPACE_BEGIN) # define NAMESPACE_BEGIN(name) namespace name { #endif #if !defined(NAMESPACE_END) # define NAMESPACE_END(name) } #endif #define ENOKI_VERSION_MAJOR 0 #define ENOKI_VERSION_MINOR 1 #define ENOKI_VERSION_PATCH 0 #define ENOKI_STRINGIFY(x) #x #define ENOKI_TOSTRING(x) ENOKI_STRINGIFY(x) #define ENOKI_VERSION \ (ENOKI_TOSTRING(ENOKI_VERSION_MAJOR) "." \ ENOKI_TOSTRING(ENOKI_VERSION_MINOR) "." \ ENOKI_TOSTRING(ENOKI_VERSION_PATCH)) #if defined(__clang__) && defined(__apple_build_version__) # if __clang_major__ < 10 # error Enoki requires a very recent version of AppleClang (XCode >= 10.0) # endif #elif defined(__clang__) # if __clang_major__ < 7 && !defined(EMSCRIPTEN) # error Enoki requires a very recent version of Clang/LLVM (>= 7.0) # endif #elif defined(__GNUC__) # if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 2) # error Enoki requires a very recent version of GCC (>= 8.2) # endif #endif #if defined(__x86_64__) || defined(_M_X64) # define ENOKI_X86_64 1 #endif #if (defined(__i386__) || defined(_M_IX86)) && !defined(ENOKI_X86_64) # define ENOKI_X86_32 1 #endif #if defined(__aarch64__) # define ENOKI_ARM_64 1 #elif defined(__arm__) # define ENOKI_ARM_32 1 #endif #if (defined(_MSC_VER) && defined(ENOKI_X86_32)) && !defined(ENOKI_DISABLE_VECTORIZATION) // Enoki does not support vectorization on 32-bit Windows due to various // platform limitations (unaligned stack, calling conventions don't allow // passing vector registers, etc.). # define ENOKI_DISABLE_VECTORIZATION 1 #endif # if !defined(ENOKI_DISABLE_VECTORIZATION) # if defined(__AVX512F__) # define ENOKI_X86_AVX512F 1 # endif # if defined(__AVX512CD__) # define ENOKI_X86_AVX512CD 1 # endif # if defined(__AVX512DQ__) # define ENOKI_X86_AVX512DQ 1 # endif # if defined(__AVX512VL__) # define ENOKI_X86_AVX512VL 1 # endif # if defined(__AVX512BW__) # define ENOKI_X86_AVX512BW 1 # endif # if defined(__AVX512PF__) # define ENOKI_X86_AVX512PF 1 # endif # if defined(__AVX512ER__) # define ENOKI_X86_AVX512ER 1 # endif # if defined(__AVX512VBMI__) # define ENOKI_X86_AVX512VBMI 1 # endif # if defined(__AVX512VPOPCNTDQ__) # define ENOKI_X86_AVX512VPOPCNTDQ 1 # endif # if defined(__AVX2__) # define ENOKI_X86_AVX2 1 # endif # if defined(__FMA__) # define ENOKI_X86_FMA 1 # endif # if defined(__F16C__) # define ENOKI_X86_F16C 1 # endif # if defined(__AVX__) # define ENOKI_X86_AVX 1 # endif # if defined(__SSE4_2__) # define ENOKI_X86_SSE42 1 # endif # if defined(__ARM_NEON) # define ENOKI_ARM_NEON # endif # if defined(__ARM_FEATURE_FMA) # define ENOKI_ARM_FMA # endif #endif /* Fix missing/inconsistent preprocessor flags */ #if defined(ENOKI_X86_AVX512F) && !defined(ENOKI_X86_AVX2) # define ENOKI_X86_AVX2 #endif #if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_F16C) # define ENOKI_X86_F16C #endif #if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_FMA) # define ENOKI_X86_FMA #endif #if defined(ENOKI_X86_AVX2) && !defined(ENOKI_X86_AVX) # define ENOKI_X86_AVX #endif #if defined(ENOKI_X86_AVX) && !defined(ENOKI_X86_SSE42) # define ENOKI_X86_SSE42 #endif /* The following macro is used by the test suite to detect unimplemented methods in vectorized backends */ #if !defined(ENOKI_TRACK_SCALAR) # define ENOKI_TRACK_SCALAR(reason) #endif #if defined(ENOKI_ALLOC_VERBOSE) # define ENOKI_TRACK_ALLOC(ptr, size) \ printf("Enoki: %p: alloc(%llu)\n", (ptr), (unsigned long long) (size)); # define ENOKI_TRACK_DEALLOC(ptr, size) \ printf("Enoki: %p: dealloc(%llu)\n", (ptr), (unsigned long long) (size)); #endif #if !defined(ENOKI_TRACK_ALLOC) # define ENOKI_TRACK_ALLOC(ptr, size) #endif #if !defined(ENOKI_TRACK_DEALLOC) # define ENOKI_TRACK_DEALLOC(ptr, size) #endif #define ENOKI_CHKSCALAR(reason) \ if (std::is_arithmetic_v>) { \ ENOKI_TRACK_SCALAR(reason) \ } #if !defined(ENOKI_APPROX_DEFAULT) # define ENOKI_APPROX_DEFAULT 1 #endif NAMESPACE_BEGIN(enoki) using ssize_t = std::make_signed_t; /// Maximum hardware-supported packet size in bytes #if defined(ENOKI_X86_AVX512F) static constexpr size_t max_packet_size = 64; #elif defined(ENOKI_X86_AVX) static constexpr size_t max_packet_size = 32; #elif defined(ENOKI_X86_SSE42) || defined(ENOKI_ARM_NEON) static constexpr size_t max_packet_size = 16; #else static constexpr size_t max_packet_size = 4; #endif constexpr size_t array_default_size = max_packet_size / 4; /// Base class of all arrays template struct ArrayBase; /// Base class of all statically sized arrays template struct StaticArrayBase; /// Generic array class, which broadcasts from the outer to inner dimensions template struct Array; /// Generic array class, which broadcasts from the inner to outer dimensions template struct Packet; /// Generic mask class, which broadcasts from the outer to inner dimensions template struct Mask; /// Generic mask class, which broadcasts from the inner to outer dimensions template struct PacketMask; /// Dynamically sized array template struct DynamicArray; template struct DynamicMask; /// Reverse-mode autodiff array template struct DiffArray; template struct Matrix; template struct Complex; template struct Quaternion; /// Helper class for custom data structures template struct struct_support; template struct CUDAArray; template class cuda_host_allocator; template class cuda_managed_allocator; extern ENOKI_IMPORT void* cuda_host_malloc(size_t); extern ENOKI_IMPORT void cuda_host_free(void *); /// Half-precision floating point value struct half; template struct MaskBit; namespace detail { struct reinterpret_flag { }; } template struct divisor; template struct divisor_ext; /// Reinterpret the binary represesentation of a data type template ENOKI_INLINE T memcpy_cast(const U &val) { static_assert(sizeof(T) == sizeof(U), "memcpy_cast: sizes did not match!"); T result; std::memcpy(&result, &val, sizeof(T)); return result; } NAMESPACE_END(enoki)