1146 lines
44 KiB
C++
1146 lines
44 KiB
C++
/*
|
||
enoki/dynamic.h -- Dynamic heap-allocated array
|
||
|
||
Enoki is a C++ template library that enables transparent vectorization
|
||
of numerical kernels using SIMD instruction sets available on current
|
||
processor architectures.
|
||
|
||
Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
|
||
|
||
All rights reserved. Use of this source code is governed by a BSD-style
|
||
license that can be found in the LICENSE file.
|
||
*/
|
||
|
||
#pragma once
|
||
|
||
#include <enoki/array.h>
|
||
|
||
#if defined(__GNUC__) && !defined(__clang__)
|
||
# pragma GCC diagnostic push
|
||
# pragma GCC diagnostic ignored "-Wclass-memaccess"
|
||
#endif
|
||
|
||
#define ENOKI_DYNAMIC_H 1
|
||
|
||
NAMESPACE_BEGIN(enoki)
|
||
|
||
template <typename Packet_>
|
||
struct DynamicArrayReference : ArrayBase<value_t<Packet_>, DynamicArrayReference<Packet_>> {
|
||
using Base = ArrayBase<value_t<Packet_>, DynamicArrayReference<Packet_>>;
|
||
using Packet = Packet_;
|
||
using ArrayType = DynamicArrayReference<array_t<Packet>>;
|
||
using MaskType = DynamicArrayReference<mask_t<Packet>>;
|
||
|
||
static constexpr size_t PacketSize = Packet::Size;
|
||
static constexpr bool IsMask = Packet::IsMask;
|
||
|
||
DynamicArrayReference(Packet *packets = nullptr) : m_packets(packets) { }
|
||
|
||
ENOKI_INLINE Packet &packet(size_t i) {
|
||
return ((Packet *) ENOKI_ASSUME_ALIGNED(m_packets, alignof(Packet)))[i];
|
||
}
|
||
|
||
ENOKI_INLINE const Packet &packet(size_t i) const {
|
||
return ((const Packet *) ENOKI_ASSUME_ALIGNED(m_packets, alignof(Packet)))[i];
|
||
}
|
||
|
||
template <typename T>
|
||
using ReplaceValue = DynamicArrayReference<replace_scalar_t<Packet, T>>;
|
||
|
||
private:
|
||
Packet *m_packets;
|
||
};
|
||
|
||
template <typename Packet_, typename Derived_>
|
||
struct DynamicArrayImpl : ArrayBase<value_t<Packet_>, Derived_> {
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Aliases and constants
|
||
// -----------------------------------------------------------------------
|
||
|
||
using Size = uint32_t;
|
||
using Base = ArrayBase<value_t<Packet_>, Derived_>;
|
||
using Packet = Packet_;
|
||
using IndexPacket = uint_array_t<array_t<Packet_>, false>;
|
||
using IndexScalar = scalar_t<IndexPacket>;
|
||
using PacketHolder = std::unique_ptr<Packet[]>;
|
||
|
||
static constexpr size_t PacketSize = Packet::Size;
|
||
static constexpr bool IsMask = Packet::IsMask;
|
||
|
||
using typename Base::Derived;
|
||
using typename Base::Value;
|
||
using typename Base::Scalar;
|
||
using Base::derived;
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Constructors
|
||
// -----------------------------------------------------------------------
|
||
|
||
DynamicArrayImpl() = default;
|
||
|
||
ENOKI_INLINE ~DynamicArrayImpl() {
|
||
reset();
|
||
}
|
||
|
||
/// Initialize from a list of component values
|
||
template <typename... Ts, enable_if_t<sizeof...(Ts) >= 2 &&
|
||
std::conjunction_v<detail::is_constructible<Value, Ts>...>> = 0>
|
||
ENOKI_INLINE DynamicArrayImpl(Ts... args) {
|
||
Value storage[] = { (Value) args... };
|
||
resize(sizeof...(Ts));
|
||
memcpy(m_packets.get(), storage, sizeof(Value) * sizeof...(Ts));
|
||
}
|
||
|
||
DynamicArrayImpl(const DynamicArrayImpl &value) {
|
||
operator=(value);
|
||
}
|
||
|
||
ENOKI_INLINE DynamicArrayImpl(DynamicArrayImpl &&value) {
|
||
operator=(std::move(value));
|
||
}
|
||
|
||
template <typename Packet2, typename Derived2>
|
||
DynamicArrayImpl(const DynamicArrayImpl<Packet2, Derived2> &value) {
|
||
operator=(value);
|
||
}
|
||
|
||
template <typename Value2, typename Derived2>
|
||
DynamicArrayImpl(const ArrayBase<Value2, Derived2> &value) {
|
||
operator=(value);
|
||
}
|
||
|
||
template <typename Packet2, typename Derived2>
|
||
DynamicArrayImpl(const DynamicArrayImpl<Packet2, Derived2> &other,
|
||
detail::reinterpret_flag) {
|
||
static_assert(Packet2::Size == Packet::Size, "Packet sizes must match!");
|
||
resize(other.size());
|
||
for (size_t i = 0; i < other.packets(); ++i)
|
||
packet(i) = reinterpret_array<Packet>(other.packet(i));
|
||
}
|
||
|
||
#if defined(__GNUC__)
|
||
// Don't be so noisy about sign conversion in constructor
|
||
# pragma GCC diagnostic push
|
||
# pragma GCC diagnostic ignored "-Wsign-conversion"
|
||
#endif
|
||
|
||
template <typename T = Packet_, enable_if_mask_t<T> = 0>
|
||
DynamicArrayImpl(bool value, detail::reinterpret_flag) {
|
||
resize(1);
|
||
packet(0) = Packet(value);
|
||
}
|
||
|
||
template <typename T, enable_if_t<is_scalar_v<T>> = 0>
|
||
DynamicArrayImpl(const T &value) {
|
||
using S = std::conditional_t<IsMask, bool, Scalar>;
|
||
resize(1);
|
||
packet(0) = Packet((S) value);
|
||
}
|
||
|
||
#if defined(__GNUC__)
|
||
# pragma GCC diagnostic pop
|
||
#endif
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Assignment operators
|
||
// -----------------------------------------------------------------------
|
||
|
||
template <typename T, enable_if_t<is_scalar_v<T>> = 0>
|
||
ENOKI_NOINLINE DynamicArrayImpl &operator=(const T &value) {
|
||
resize(1);
|
||
packet(0) = Packet(value);
|
||
return derived();
|
||
}
|
||
|
||
ENOKI_NOINLINE DynamicArrayImpl &operator=(const DynamicArrayImpl &other) {
|
||
resize(other.size());
|
||
memcpy(m_packets.get(), other.m_packets.get(),
|
||
packets() * sizeof(Packet));
|
||
return derived();
|
||
}
|
||
|
||
template <typename Packet2, typename Derived2>
|
||
ENOKI_NOINLINE DynamicArrayImpl &operator=(const DynamicArrayImpl<Packet2, Derived2> &other) {
|
||
static_assert(Packet2::Size == Packet::Size, "Packet sizes must match!");
|
||
resize(other.size());
|
||
for (size_t i = 0; i < other.packets(); ++i)
|
||
packet(i) = Packet(other.packet(i));
|
||
return derived();
|
||
}
|
||
|
||
template <typename Value2, typename Derived2>
|
||
ENOKI_NOINLINE DynamicArrayImpl &operator=(const ArrayBase<Value2, Derived2> &other) {
|
||
resize(other.derived().size());
|
||
for (size_t i = 0; i < other.derived().size(); ++i)
|
||
coeff(i) = other.derived().coeff(i);
|
||
return derived();
|
||
}
|
||
|
||
ENOKI_INLINE DynamicArrayImpl &operator=(DynamicArrayImpl &&other) {
|
||
m_packets.swap(other.m_packets);
|
||
std::swap(m_packets_allocated, other.m_packets_allocated);
|
||
std::swap(m_size, other.m_size);
|
||
return derived();
|
||
}
|
||
|
||
void reset() {
|
||
if (is_mapped()) {
|
||
m_packets.release();
|
||
} else if (m_packets.get()) {
|
||
ENOKI_TRACK_DEALLOC(m_packets.get(), packets_allocated() * sizeof(Packet));
|
||
m_packets.reset();
|
||
}
|
||
|
||
m_size = m_packets_allocated = 0;
|
||
}
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Functions to access the array contents
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool is_mapped() const { return (m_packets_allocated & 0x80000000u) != 0; }
|
||
size_t size() const { return (size_t) m_size; }
|
||
size_t packets() const { return ((size_t) m_size + PacketSize - 1) / PacketSize; }
|
||
size_t packets_allocated() const { return (size_t) (m_packets_allocated & 0x7fffffffu); }
|
||
size_t capacity() const { return packets_allocated() * Packet::Size; }
|
||
|
||
bool empty() const { return m_size == 0; }
|
||
|
||
size_t nbytes() const {
|
||
return packets_allocated() * sizeof(Packet) + sizeof(Derived);
|
||
}
|
||
|
||
ENOKI_INLINE const Value *data() const {
|
||
return (const Value *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet));
|
||
}
|
||
|
||
ENOKI_INLINE Value *data() {
|
||
return (Value *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet));
|
||
}
|
||
|
||
ENOKI_INLINE const Packet *packet_ptr() const {
|
||
return (const Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet));
|
||
}
|
||
|
||
ENOKI_INLINE Packet *packet_ptr() {
|
||
return (Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet));
|
||
}
|
||
|
||
ENOKI_INLINE decltype(auto) coeff(size_t i) {
|
||
return m_packets[i / PacketSize].coeff(i % PacketSize);
|
||
}
|
||
|
||
ENOKI_INLINE decltype(auto) coeff(size_t i) const {
|
||
return m_packets[i / PacketSize].coeff(i % PacketSize);
|
||
}
|
||
|
||
ENOKI_INLINE Packet &packet(size_t i) {
|
||
#if !defined(NDEBUG) && !defined(ENOKI_DISABLE_RANGE_CHECK)
|
||
if (i >= packets())
|
||
throw std::out_of_range(
|
||
"DynamicArray: out of range access (tried to access packet " +
|
||
std::to_string(i) + " in an array of size " +
|
||
std::to_string(packets()) + ")");
|
||
#endif
|
||
return ((Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)))[i];
|
||
}
|
||
|
||
ENOKI_INLINE const Packet &packet(size_t i) const {
|
||
#if !defined(NDEBUG) && !defined(ENOKI_DISABLE_RANGE_CHECK)
|
||
if (i >= packets())
|
||
throw std::out_of_range(
|
||
"DynamicArray: out of range access (tried to access packet " +
|
||
std::to_string(i) + " in an array of size " +
|
||
std::to_string(packets()) + ")");
|
||
#endif
|
||
return ((const Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)))[i];
|
||
}
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Vertical array operations
|
||
// -----------------------------------------------------------------------
|
||
|
||
#define ENOKI_FWD_UNARY_OPERATION(name, Return, op) \
|
||
auto name##_() const { \
|
||
Return result; \
|
||
result.resize(size()); \
|
||
auto p1 = packet_ptr(); \
|
||
auto pr = result.packet_ptr(); \
|
||
for (size_t i = 0, n = result.packets(); \
|
||
i < n; ++i, ++p1, ++pr) { \
|
||
Packet a = *p1; \
|
||
*pr = op; \
|
||
} \
|
||
return result; \
|
||
}
|
||
|
||
#define ENOKI_FWD_UNARY_OPERATION_IMM(name, Return, op) \
|
||
template <size_t Imm> auto name##_() const { \
|
||
Return result; \
|
||
result.resize(size()); \
|
||
auto p1 = packet_ptr(); \
|
||
auto pr = result.packet_ptr(); \
|
||
for (size_t i = 0, n = result.packets(); \
|
||
i < n; ++i, ++p1, ++pr) { \
|
||
Packet a = *p1; \
|
||
*pr = op; \
|
||
} \
|
||
return result; \
|
||
}
|
||
|
||
#define ENOKI_FWD_BINARY_OPERATION(name, Return, op) \
|
||
template <typename T> \
|
||
auto name##_(const T &d) const { \
|
||
Return result; \
|
||
result.resize_like(*this, d); \
|
||
auto p1 = packet_ptr(); \
|
||
auto p2 = d.packet_ptr(); \
|
||
auto pr = result.packet_ptr(); \
|
||
size_t s1 = size() == 1 ? 0 : 1, \
|
||
s2 = d.size() == 1 ? 0 : 1; \
|
||
for (size_t i = 0, n = result.packets(); i < n; \
|
||
++i, ++pr, p1 += s1, p2 += s2) { \
|
||
auto a1 = *p1; \
|
||
auto a2 = *p2; \
|
||
*pr = op; \
|
||
} \
|
||
return result; \
|
||
}
|
||
|
||
#define ENOKI_FWD_BINARY_OPERATION_SIZE(name, Return, op) \
|
||
auto name##_(size_t a2) const { \
|
||
Return result; \
|
||
result.resize_like(*this); \
|
||
auto p1 = packet_ptr(); \
|
||
auto pr = result.packet_ptr(); \
|
||
for (size_t i = 0, n = result.packets(); i < n; \
|
||
++i, ++pr, p1++) { \
|
||
auto a1 = *p1; \
|
||
*pr = op; \
|
||
} \
|
||
return result; \
|
||
}
|
||
|
||
#define ENOKI_FWD_TERNARY_OPERATION(name, Return, op) \
|
||
template <typename T1, typename T2> \
|
||
auto name##_(const T1 &d1, const T2 &d2) const { \
|
||
Return result; \
|
||
result.resize_like(*this, d1, d2); \
|
||
auto p1 = packet_ptr(); \
|
||
auto p2 = d1.packet_ptr(); \
|
||
auto p3 = d2.packet_ptr(); \
|
||
auto pr = result.packet_ptr(); \
|
||
size_t s1 = size() == 1 ? 0 : 1, \
|
||
s2 = d1.size() == 1 ? 0 : 1, \
|
||
s3 = d2.size() == 1 ? 0 : 1; \
|
||
for (size_t i = 0, n = result.packets(); i < n; \
|
||
++i, ++pr, p1 += s1, p2 += s2, p3 += s3) { \
|
||
auto a1 = *p1; \
|
||
auto a2 = *p2; \
|
||
auto a3 = *p3; \
|
||
*pr = op; \
|
||
} \
|
||
return result; \
|
||
}
|
||
|
||
#define ENOKI_FWD_MASKED_OPERATION(name, expr) \
|
||
template <typename Mask> \
|
||
void m##name##_(const Derived &e, const Mask &m) { \
|
||
resize_like(*this, e, m); \
|
||
auto pr = packet_ptr(); \
|
||
auto p1 = e.packet_ptr(); \
|
||
auto p2 = m.packet_ptr(); \
|
||
size_t s1 = e.size() == 1 ? 0 : 1, \
|
||
s2 = m.size() == 1 ? 0 : 1; \
|
||
for (size_t i = 0, n = packets(); i < n; \
|
||
++i, ++pr, p1 += s1, p2 += s2) \
|
||
(*pr).m##name##_(*p1, *p2); \
|
||
}
|
||
|
||
ENOKI_FWD_BINARY_OPERATION(add, Derived, a1 + a2)
|
||
ENOKI_FWD_BINARY_OPERATION(sub, Derived, a1 - a2)
|
||
ENOKI_FWD_BINARY_OPERATION(mul, Derived, a1 * a2)
|
||
ENOKI_FWD_BINARY_OPERATION(div, Derived, a1 / a2)
|
||
ENOKI_FWD_BINARY_OPERATION(mod, Derived, a1 % a2)
|
||
ENOKI_FWD_BINARY_OPERATION(sl, Derived, a1 << a2)
|
||
ENOKI_FWD_BINARY_OPERATION(sr, Derived, a1 >> a2)
|
||
ENOKI_FWD_BINARY_OPERATION(rol, Derived, rol(a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(ror, Derived, ror(a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(mulhi, Derived, mulhi(a1, a2))
|
||
|
||
ENOKI_FWD_BINARY_OPERATION_SIZE(sl, Derived, a1 << a2)
|
||
ENOKI_FWD_BINARY_OPERATION_SIZE(sr, Derived, a1 >> a2)
|
||
|
||
ENOKI_FWD_UNARY_OPERATION_IMM(sl, Derived, sl<Imm>(a))
|
||
ENOKI_FWD_UNARY_OPERATION_IMM(sr, Derived, sr<Imm>(a))
|
||
ENOKI_FWD_UNARY_OPERATION_IMM(rol, Derived, rol<Imm>(a))
|
||
ENOKI_FWD_UNARY_OPERATION_IMM(ror, Derived, ror<Imm>(a))
|
||
|
||
ENOKI_FWD_UNARY_OPERATION(lzcnt, Derived, lzcnt(a))
|
||
ENOKI_FWD_UNARY_OPERATION(tzcnt, Derived, tzcnt(a))
|
||
ENOKI_FWD_UNARY_OPERATION(popcnt, Derived, popcnt(a))
|
||
|
||
ENOKI_FWD_BINARY_OPERATION(or, Derived, a1 | a2)
|
||
ENOKI_FWD_BINARY_OPERATION(and, Derived, a1 & a2)
|
||
ENOKI_FWD_BINARY_OPERATION(andnot, Derived, andnot(a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(xor, Derived, a1 ^ a2)
|
||
|
||
ENOKI_FWD_UNARY_OPERATION(not, Derived, ~a);
|
||
ENOKI_FWD_UNARY_OPERATION(neg, Derived, -a);
|
||
|
||
ENOKI_FWD_BINARY_OPERATION(eq, mask_t<Derived>, eq (a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(neq, mask_t<Derived>, neq(a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(gt, mask_t<Derived>, a1 > a2)
|
||
ENOKI_FWD_BINARY_OPERATION(ge, mask_t<Derived>, a1 >= a2)
|
||
ENOKI_FWD_BINARY_OPERATION(lt, mask_t<Derived>, a1 < a2)
|
||
ENOKI_FWD_BINARY_OPERATION(le, mask_t<Derived>, a1 <= a2)
|
||
|
||
ENOKI_FWD_TERNARY_OPERATION(fmadd, Derived, fmadd(a1, a2, a3))
|
||
ENOKI_FWD_TERNARY_OPERATION(fmsub, Derived, fmsub(a1, a2, a3))
|
||
ENOKI_FWD_TERNARY_OPERATION(fnmadd, Derived, fnmadd(a1, a2, a3))
|
||
ENOKI_FWD_TERNARY_OPERATION(fnmsub, Derived, fnmsub(a1, a2, a3))
|
||
ENOKI_FWD_TERNARY_OPERATION(fmsubadd, Derived, fmsubadd(a1, a2, a3))
|
||
ENOKI_FWD_TERNARY_OPERATION(fmaddsub, Derived, fmaddsub(a1, a2, a3))
|
||
|
||
ENOKI_FWD_BINARY_OPERATION(min, Derived, min(a1, a2))
|
||
ENOKI_FWD_BINARY_OPERATION(max, Derived, max(a1, a2))
|
||
|
||
ENOKI_FWD_UNARY_OPERATION(abs, Derived, abs(a));
|
||
ENOKI_FWD_UNARY_OPERATION(ceil, Derived, ceil(a));
|
||
ENOKI_FWD_UNARY_OPERATION(floor, Derived, floor(a));
|
||
ENOKI_FWD_UNARY_OPERATION(sqrt, Derived, sqrt(a));
|
||
ENOKI_FWD_UNARY_OPERATION(round, Derived, round(a));
|
||
ENOKI_FWD_UNARY_OPERATION(trunc, Derived, trunc(a));
|
||
|
||
ENOKI_FWD_UNARY_OPERATION(rsqrt, Derived, rsqrt(a));
|
||
ENOKI_FWD_UNARY_OPERATION(rcp, Derived, rcp(a));
|
||
|
||
ENOKI_FWD_MASKED_OPERATION(assign, b)
|
||
ENOKI_FWD_MASKED_OPERATION(add, a + b)
|
||
ENOKI_FWD_MASKED_OPERATION(sub, a - b)
|
||
ENOKI_FWD_MASKED_OPERATION(mul, a * b)
|
||
ENOKI_FWD_MASKED_OPERATION(div, a / b)
|
||
ENOKI_FWD_MASKED_OPERATION(or, a | b)
|
||
ENOKI_FWD_MASKED_OPERATION(and, a & b)
|
||
ENOKI_FWD_MASKED_OPERATION(xor, a ^ b)
|
||
|
||
#undef ENOKI_FWD_UNARY_OPERATION
|
||
#undef ENOKI_FWD_UNARY_OPERATION_IMM
|
||
#undef ENOKI_FWD_BINARY_OPERATION
|
||
#undef ENOKI_FWD_TERNARY_OPERATION
|
||
#undef ENOKI_FWD_MASKED_OPERATION
|
||
|
||
template <typename Mask>
|
||
static Derived select_(const Mask &mask, const Derived &t, const Derived &f) {
|
||
if (ENOKI_UNLIKELY(f.empty())) {
|
||
if (all(mask))
|
||
return t;
|
||
else
|
||
throw std::runtime_error(
|
||
"DynamicArray::select(): array for false branch is empty, "
|
||
"and some entries were referenced.");
|
||
}
|
||
|
||
if (ENOKI_UNLIKELY(t.empty())) {
|
||
if (none(mask))
|
||
return f;
|
||
else
|
||
throw std::runtime_error(
|
||
"DynamicArray::select(): array for true branch is empty, "
|
||
"and some entries were referenced.");
|
||
}
|
||
|
||
Derived result;
|
||
result.resize_like(mask, t, f);
|
||
size_t i1 = 0, i1i = mask.size() == 1 ? 0 : 1,
|
||
i2 = 0, i2i = t.size() == 1 ? 0 : 1,
|
||
i3 = 0, i3i = f.size() == 1 ? 0 : 1;
|
||
|
||
for (size_t i = 0; i < result.packets();
|
||
++i, i1 += i1i, i2 += i2i, i3 += i3i) {
|
||
result.packet(i) = select(mask.packet(i1), t.packet(i2), f.packet(i3));
|
||
}
|
||
return result;
|
||
}
|
||
|
||
template <size_t Stride, typename Index, typename Mask>
|
||
static Derived gather_(const void *mem, const Index &index, const Mask &mask) {
|
||
Derived result;
|
||
result.resize_like(index, mask);
|
||
size_t i1 = 0, i1i = index.size() == 1 ? 0 : 1,
|
||
i2 = 0, i2i = mask.size() == 1 ? 0 : 1,
|
||
i = 0;
|
||
if (!result.empty()) {
|
||
for (; i < result.packets() - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i)
|
||
result.packet(i) = gather<Packet, Stride>(mem, index.packet(i1), mask.packet(i2));
|
||
if constexpr (PacketSize > 1) {
|
||
auto mask2 = arange<IndexPacket>() <= IndexScalar((result.size() - 1) % PacketSize);
|
||
result.packet(i) = gather<Packet, Stride>(mem, index.packet(i1), mask.packet(i2) & mask2);
|
||
if (result.size() == 1)
|
||
result.packet(0) = result.coeff(0);
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
template <size_t Stride, typename Index, typename Mask>
|
||
void scatter_(void *mem, const Index &index, const Mask &mask) const {
|
||
size_t i1 = 0, i1i = this->size() == 1 ? 0 : 1,
|
||
i2 = 0, i2i = index.size() == 1 ? 0 : 1,
|
||
i3 = 0, i3i = mask.size() == 1 ? 0 : 1,
|
||
size = check_size(*this, index, mask),
|
||
n_packets = (size + PacketSize - 1) / PacketSize,
|
||
i = 0;
|
||
|
||
if (n_packets > 0) {
|
||
for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i, i3 += i3i)
|
||
scatter<Stride>(mem, packet(i1), index.packet(i2), mask.packet(i3));
|
||
if constexpr (PacketSize > 1) {
|
||
auto mask2 = arange<IndexPacket>() <= IndexScalar((size - 1) % PacketSize);
|
||
scatter<Stride>(mem, packet(i1), index.packet(i2), mask.packet(i3) & mask2);
|
||
}
|
||
}
|
||
}
|
||
|
||
template <size_t Stride, typename Index, typename Mask>
|
||
void scatter_add_(void *mem, const Index &index, const Mask &mask) const {
|
||
size_t i1 = 0, i1i = this->size() == 1 ? 0 : 1,
|
||
i2 = 0, i2i = index.size() == 1 ? 0 : 1,
|
||
i3 = 0, i3i = mask.size() == 1 ? 0 : 1,
|
||
size = check_size(*this, index, mask),
|
||
n_packets = (size + PacketSize - 1) / PacketSize,
|
||
i = 0;
|
||
|
||
if (n_packets > 0) {
|
||
for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i, i3 += i3i)
|
||
scatter_add<Stride>(mem, packet(i1), index.packet(i2), mask.packet(i3));
|
||
if constexpr (PacketSize > 1) {
|
||
auto mask2 = arange<IndexPacket>() <= IndexScalar((size - 1) % PacketSize);
|
||
scatter_add<Stride>(mem, packet(i1), index.packet(i2), mask.packet(i3) & mask2);
|
||
}
|
||
}
|
||
}
|
||
|
||
template <size_t Stride, typename Index, typename Func, typename... Args, typename Mask>
|
||
static ENOKI_INLINE void transform_(void *ptr, const Index &index, const Mask &mask,
|
||
const Func &func, const Args &... args) {
|
||
size_t size = check_size(index, mask, args...),
|
||
n_packets = (size + PacketSize - 1) / PacketSize;
|
||
|
||
if (n_packets > 0) {
|
||
size_t i = 0;
|
||
for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i)
|
||
transform<Packet, Stride>(
|
||
ptr,
|
||
enoki::packet(index, enoki::slices(index) <= 1 ? 0 : i),
|
||
func,
|
||
enoki::packet(args, enoki::slices(args) <= 1 ? 0 : i)...);
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
auto mask2 = arange<IndexPacket>() <= IndexScalar((size - 1) % PacketSize);
|
||
transform<Packet, Stride>(
|
||
ptr,
|
||
enoki::packet(index, enoki::slices(index) <= 1 ? 0 : i),
|
||
func,
|
||
enoki::packet(args, enoki::slices(args) <= 1 ? 0 : i) & mask2...);
|
||
}
|
||
}
|
||
}
|
||
|
||
template <typename Mask> Derived compress_(const Mask &mask) const {
|
||
assert(mask.size() == size());
|
||
size_t count = 0;
|
||
Derived result;
|
||
set_slices(result, size());
|
||
Value *ptr = result.data();
|
||
|
||
for (size_t i = 0; i < packets(); ++i)
|
||
count += compress(ptr, packet(i), mask.packet(i));
|
||
set_slices(result, count);
|
||
return result;
|
||
}
|
||
|
||
template <typename T> T ceil2int_() const {
|
||
T result;
|
||
result.resize(size());
|
||
auto p1 = packet_ptr();
|
||
auto pr = result.packet_ptr();
|
||
for (size_t i = 0, n = result.packets();
|
||
i < n; ++i, ++p1, ++pr)
|
||
*pr = ceil2int<typename T::Packet>(*p1);
|
||
return result;
|
||
}
|
||
|
||
template <typename T> T floor2int_() const {
|
||
T result;
|
||
result.resize(size());
|
||
auto p1 = packet_ptr();
|
||
auto pr = result.packet_ptr();
|
||
for (size_t i = 0, n = result.packets();
|
||
i < n; ++i, ++p1, ++pr)
|
||
*pr = floor2int<typename T::Packet>(*p1);
|
||
return result;
|
||
}
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Horizontal array operations
|
||
// -----------------------------------------------------------------------
|
||
|
||
Derived reverse_() const {
|
||
using CoeffValue = std::conditional_t<IsMask, bool, Value>;
|
||
|
||
size_t n = size();
|
||
|
||
Derived result;
|
||
set_slices(result, n);
|
||
|
||
for (size_t i = 0; i < n; ++i)
|
||
result.coeff(i) = (CoeffValue) coeff(n - 1 - i);
|
||
|
||
return result;
|
||
}
|
||
|
||
Derived psum_() const {
|
||
Derived result;
|
||
set_slices(result, size());
|
||
|
||
if (!empty()) {
|
||
// Difficult to vectorize this..
|
||
result.coeff(0) = coeff(0);
|
||
for (size_t i = 1; i < size(); ++i)
|
||
result.coeff(i) = result.coeff(i - 1) + coeff(i);
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
Value hsum_() const {
|
||
if (size() == 0) {
|
||
return Value(Scalar(0));
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result = zero<Packet>();
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result += packet(i);
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] +=
|
||
packet(packets() - 1);
|
||
}
|
||
return hsum(result);
|
||
}
|
||
}
|
||
|
||
Value hprod_() const {
|
||
if (size() == 0) {
|
||
return Value(Scalar(1));
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result = Scalar(1);
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result *= packet(i);
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] *=
|
||
packet(packets() - 1);
|
||
}
|
||
return hprod(result);
|
||
}
|
||
}
|
||
|
||
Value hmin_() const {
|
||
if (size() == 0) {
|
||
return Value(std::numeric_limits<Scalar>::max());
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result = coeff(0);
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result = min(result, packet(i));
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] =
|
||
min(result, packet(packets() - 1));
|
||
}
|
||
return hmin(result);
|
||
}
|
||
}
|
||
|
||
Value hmax_() const {
|
||
if (size() == 0) {
|
||
return Value(std::numeric_limits<Scalar>::min());
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result = coeff(0);
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result = max(result, packet(i));
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] =
|
||
max(result, packet(packets() - 1));
|
||
}
|
||
return hmax(result);
|
||
}
|
||
}
|
||
|
||
bool any_() const {
|
||
if (size() == 0) {
|
||
return false;
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result(false);
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result |= packet(i);
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] |=
|
||
packet(packets() - 1);
|
||
}
|
||
return any(result);
|
||
}
|
||
}
|
||
|
||
bool all_() const {
|
||
if (size() == 0) {
|
||
return true;
|
||
} else if (size() == 1) {
|
||
return coeff(0);
|
||
} else {
|
||
Packet result(true);
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result &= packet(i);
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
result[arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize)] &=
|
||
packet(packets() - 1);
|
||
}
|
||
return all(result);
|
||
}
|
||
}
|
||
|
||
size_t count_() const {
|
||
size_t result = 0;
|
||
if (!empty()) {
|
||
for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i)
|
||
result += enoki::count(packet(i));
|
||
|
||
if constexpr (PacketSize > 1) {
|
||
auto mask = arange<IndexPacket>() <= IndexScalar((size() - 1) % PacketSize);
|
||
result += enoki::count(packet(packets() - 1) & mask);
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
// -----------------------------------------------------------------------
|
||
//! @{ \name Initialization helper functions
|
||
// -----------------------------------------------------------------------
|
||
|
||
/**
|
||
* \brief Resize the buffer to the desired size
|
||
*
|
||
* When the capacity is insufficient, the implementation destroys the
|
||
* current contents and allocates a new (uninitialized) buffer
|
||
*
|
||
* When compiled in debug mode, newly allocated floating point arrays will
|
||
* be initialized with NaNs.
|
||
*/
|
||
ENOKI_NOINLINE void resize(size_t size) {
|
||
if (size == (size_t) m_size)
|
||
return;
|
||
|
||
if (is_mapped())
|
||
throw std::runtime_error("Can't resize a mapped dynamic array!");
|
||
|
||
using CoeffValue = std::conditional_t<IsMask, bool, Value>;
|
||
|
||
CoeffValue scalar = (m_size == 1) ? coeff(0) : zero<CoeffValue>();
|
||
size_t n_packets = (size + PacketSize - 1) / PacketSize;
|
||
|
||
if (n_packets > packets_allocated()) {
|
||
if (!empty()) {
|
||
ENOKI_TRACK_DEALLOC(m_packets.get(), packets_allocated() * sizeof(Packet));
|
||
}
|
||
m_packets = PacketHolder(new Packet[n_packets]);
|
||
m_packets_allocated = (Size) n_packets;
|
||
ENOKI_TRACK_ALLOC(m_packets.get(),
|
||
n_packets * sizeof(Packet));
|
||
}
|
||
|
||
if (m_size == 1) {
|
||
/* Resizing a scalar array -- broadcast. */
|
||
Packet p(scalar);
|
||
for (size_t i = 0; i < n_packets; ++i)
|
||
m_packets[i] = p;
|
||
} else if (m_size == 0) {
|
||
/* Potentially initialize array contents with NaNs */
|
||
#if !defined(NDEBUG)
|
||
for (size_t i = 0; i < n_packets; ++i)
|
||
new (&m_packets[i]) Packet();
|
||
#endif
|
||
}
|
||
|
||
m_size = (Size) size;
|
||
clean_trailing_();
|
||
}
|
||
|
||
// Clear the unused portion of a potential trailing partial packet
|
||
void clean_trailing_() {
|
||
IndexScalar remainder = (IndexScalar) (m_size % PacketSize);
|
||
if (remainder > 0 && m_size != 1) {
|
||
void *addr = m_packets.get() + packets_allocated() - 1;
|
||
auto mask = arange<IndexPacket>() < IndexScalar(remainder);
|
||
store(addr, load<Packet>(addr) & mask);
|
||
}
|
||
}
|
||
|
||
static Derived map(void *ptr, size_t size, bool dealloc = false) {
|
||
assert((uintptr_t) ptr % alignof(Packet) == 0);
|
||
|
||
Derived r;
|
||
r.m_packets = PacketHolder((Packet *) ptr);
|
||
r.m_size = (Size) size;
|
||
r.m_packets_allocated =
|
||
(Size) ((size + PacketSize - 1) / PacketSize);
|
||
|
||
if (!dealloc)
|
||
r.m_packets_allocated |= 0x80000000u;
|
||
|
||
return r;
|
||
}
|
||
|
||
static Derived copy(const void *ptr, size_t size) {
|
||
Derived r;
|
||
r.m_size = (Size) size;
|
||
r.m_packets_allocated =
|
||
(Size) ((size + PacketSize - 1) / PacketSize);
|
||
r.m_packets = PacketHolder(new Packet[r.m_packets_allocated]);
|
||
memcpy(r.m_packets.get(), ptr, size * sizeof(Value));
|
||
return r;
|
||
}
|
||
|
||
Derived &managed() { return derived(); }
|
||
Derived &eval() { return derived(); }
|
||
Derived &managed() const { return derived(); }
|
||
Derived &eval() const { return derived(); }
|
||
|
||
template <typename... Args> void resize_like(const Args&... args) {
|
||
resize(check_size(args...));
|
||
}
|
||
|
||
private:
|
||
|
||
#if defined(__GNUC__)
|
||
// GCC 8.2: quench nonsensical warning in parameter pack expansion
|
||
# pragma GCC diagnostic push
|
||
# pragma GCC diagnostic ignored "-Wparentheses" // warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
|
||
#endif
|
||
|
||
template <typename... Args> static size_t check_size(const Args&... args) {
|
||
size_t max_size = std::max({ slices(args)... });
|
||
if ((... || (slices(args) != max_size && slices(args) != 1))) {
|
||
#if defined(NDEBUG)
|
||
throw std::runtime_error(
|
||
"Incompatible sizes in dynamic array operation");
|
||
#else
|
||
std::string msg = "[";
|
||
bool result[] = { ((msg += (std::to_string(slices(args)) + ", ")), false)... };
|
||
(void) result;
|
||
if (msg.size() > 2)
|
||
msg = msg.substr(0, msg.size() - 2);
|
||
msg += "]";
|
||
throw std::runtime_error(
|
||
"Incompatible sizes in dynamic array operation: " + msg);
|
||
#endif
|
||
}
|
||
return max_size;
|
||
}
|
||
|
||
#if defined(__GNUC__)
|
||
# pragma GCC diagnostic pop
|
||
#endif
|
||
|
||
public:
|
||
static Derived empty_(size_t size) {
|
||
Derived result;
|
||
result.resize(size);
|
||
return result;
|
||
}
|
||
|
||
static Derived zero_(size_t size) {
|
||
Derived result;
|
||
result.resize(size);
|
||
Packet value_p = zero<Packet>();
|
||
for (size_t i = 0; i < result.packets(); ++i)
|
||
result.packet(i) = value_p;
|
||
return result;
|
||
}
|
||
|
||
static Derived full_(const Value &value, size_t size) {
|
||
Derived result;
|
||
result.resize(size);
|
||
Packet value_p(value);
|
||
for (size_t i = 0; i < result.packets(); ++i)
|
||
result.packet(i) = value_p;
|
||
return result;
|
||
}
|
||
|
||
/// Construct an evenly spaced integer sequence
|
||
static Derived arange_(ssize_t start, ssize_t stop, ssize_t step) {
|
||
Derived result;
|
||
result.resize(size_t((stop - start + step - (step > 0 ? 1 : -1)) / step));
|
||
Packet value_p = arange<Packet>(start, start + (ssize_t) Packet::Size * step, step),
|
||
shift = Value((ssize_t) PacketSize * step);
|
||
for (size_t i = 0; i < result.packets(); ++i) {
|
||
result.packet(i) = value_p;
|
||
value_p += shift;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
static Derived linspace_(Value min, Value max, size_t size) {
|
||
Derived result;
|
||
result.resize(size);
|
||
|
||
Value step = (max - min) / Value(size - 1);
|
||
|
||
Packet value_p = linspace<Packet>(min, min + step * (PacketSize - 1)),
|
||
shift = Value(step * PacketSize);
|
||
|
||
for (size_t i = 0; i < result.packets(); ++i) {
|
||
result.packet(i) = value_p;
|
||
value_p += shift;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
//! @}
|
||
// -----------------------------------------------------------------------
|
||
|
||
auto operator->() const {
|
||
using BaseType = std::decay_t<std::remove_pointer_t<scalar_t<Derived_>>>;
|
||
return call_support<BaseType, Derived_>(derived());
|
||
}
|
||
|
||
template <typename Mask>
|
||
ENOKI_INLINE Value extract_(const Mask &mask) const {
|
||
check_size(derived(), mask);
|
||
for (size_t i = 0; i < mask.size(); ++i)
|
||
if (mask.coeff(i))
|
||
return coeff(i);
|
||
return zero<Value>();
|
||
}
|
||
|
||
DynamicArrayReference<Packet> ref_wrap_() const {
|
||
return m_packets.get();
|
||
}
|
||
private:
|
||
PacketHolder m_packets;
|
||
Size m_size = 0;
|
||
Size m_packets_allocated = 0;
|
||
};
|
||
|
||
template <typename Packet_>
|
||
struct DynamicArray : DynamicArrayImpl<Packet_, DynamicArray<Packet_>> {
|
||
using Base = DynamicArrayImpl<Packet_, DynamicArray<Packet_>>;
|
||
using Base::Base;
|
||
using Base::operator=;
|
||
|
||
using ArrayType = DynamicArray;
|
||
using MaskType = DynamicMask<mask_t<Packet_>>;
|
||
|
||
template <typename T> using ReplaceValue =
|
||
DynamicArray<typename Packet_::template ReplaceValue<T>>;
|
||
|
||
DynamicArray(const DynamicArray &) = default;
|
||
DynamicArray(DynamicArray &&) = default;
|
||
DynamicArray &operator=(const DynamicArray &) = default;
|
||
DynamicArray &operator=(DynamicArray &&) = default;
|
||
};
|
||
|
||
template <typename Packet_>
|
||
struct DynamicMask : DynamicArrayImpl<Packet_, DynamicMask<Packet_>> {
|
||
using Base = DynamicArrayImpl<Packet_, DynamicMask<Packet_>>;
|
||
|
||
using ArrayType = DynamicArray<array_t<Packet_>>;
|
||
using MaskType = DynamicMask;
|
||
|
||
template <typename T> using ReplaceValue =
|
||
DynamicMask<typename Packet_::template ReplaceValue<T>>;
|
||
|
||
DynamicMask() = default;
|
||
|
||
template <typename T> DynamicMask(T &&value)
|
||
: Base(std::forward<T>(value), detail::reinterpret_flag()) { }
|
||
|
||
template <typename T> DynamicMask(T &&value, detail::reinterpret_flag)
|
||
: Base(std::forward<T>(value), detail::reinterpret_flag()) { }
|
||
};
|
||
|
||
namespace detail {
|
||
template <typename T> struct mutable_ref { using type = std::add_lvalue_reference_t<T>; };
|
||
template <typename T> struct mutable_ref<const T &> { using type = T &; };
|
||
template <typename T> using mutable_ref_t = typename mutable_ref<T>::type;
|
||
|
||
/// Vectorized inner loop (void return value)
|
||
template <typename Func, typename... Args, size_t... Index>
|
||
ENOKI_INLINE void vectorize_inner_1(std::index_sequence<Index...>, Func &&f,
|
||
size_t packet_count, Args &&... args) {
|
||
ENOKI_NOUNROLL ENOKI_IVDEP for (size_t i = 0; i < packet_count; ++i)
|
||
f(packet(args, i)...);
|
||
}
|
||
|
||
/// Vectorized inner loop (non-void return value)
|
||
template <typename Func, typename Out, typename... Args, size_t... Index>
|
||
ENOKI_INLINE void vectorize_inner_2(std::index_sequence<Index...>, Func &&f,
|
||
size_t packet_count, Out &&out, Args &&... args) {
|
||
ENOKI_NOUNROLL ENOKI_IVDEP for (size_t i = 0; i < packet_count; ++i)
|
||
packet(out, i) = f(packet(args, i)...);
|
||
}
|
||
}
|
||
|
||
template <bool Resize = false, typename Func, typename... Args>
|
||
auto vectorize(Func &&f, Args &&... args)
|
||
-> make_dynamic_t<decltype(f(packet(args, 0)...))> /* LLVM bug #39326 */ {
|
||
#if defined(NDEBUG)
|
||
constexpr bool Check = false;
|
||
#else
|
||
constexpr bool Check = true;
|
||
#endif
|
||
|
||
/** Determine the number of slices and packets of the input arrays,
|
||
and broadcast scalar input arrays if requested */
|
||
size_t packet_count = 0, slice_count = 0;
|
||
|
||
bool unused1[] = { ((packet_count = !is_dynamic_v<Args> ? packet_count
|
||
: (Resize ? std::max(packet_count, packets(args)) : packets(args))), false)... };
|
||
|
||
bool unused2[] = { ((slice_count = !is_dynamic_v<Args> ? slice_count
|
||
: (Resize ? std::max(slice_count, slices(args)) : slices(args))), false)... };
|
||
|
||
(void) unused1; (void) unused2;
|
||
|
||
if constexpr (Check || Resize) {
|
||
size_t status[] = { (
|
||
(!is_dynamic_v<Args> || array_size_v<Args> == 0) ||
|
||
((slice_count != 1 && slices(args) == 1 && Resize)
|
||
? (set_slices((detail::mutable_ref_t<decltype(args)>) args, slice_count), true)
|
||
: (slices(args) == slice_count)))... };
|
||
|
||
bool status_combined = true;
|
||
for (bool s : status)
|
||
status_combined &= s;
|
||
|
||
if (!status_combined)
|
||
throw std::runtime_error("vectorize(): vector arguments have incompatible lengths");
|
||
}
|
||
|
||
using Result = make_dynamic_t<decltype(f(packet(args, 0)...))>;
|
||
if constexpr (std::is_void_v<Result>) {
|
||
detail::vectorize_inner_1(std::make_index_sequence<sizeof...(Args)>(),
|
||
f, packet_count, ref_wrap(args)...);
|
||
} else {
|
||
Result result;
|
||
set_slices(result, slice_count);
|
||
|
||
detail::vectorize_inner_2(std::make_index_sequence<sizeof...(Args)>(),
|
||
f, packet_count, ref_wrap(result),
|
||
ref_wrap(args)...);
|
||
return result;
|
||
}
|
||
}
|
||
|
||
template <typename Func, typename... Args>
|
||
auto vectorize_safe(Func &&f, Args &&... args)
|
||
-> decltype(vectorize<true>(f, args...)) /* LLVM bug #39326 */ {
|
||
return vectorize<true>(f, args...);
|
||
}
|
||
|
||
namespace detail {
|
||
template <typename T>
|
||
using reference_dynamic_t = std::conditional_t<
|
||
is_dynamic_v<T>,
|
||
std::add_lvalue_reference_t<T>,
|
||
T
|
||
>;
|
||
|
||
/// Strip the class from a method type
|
||
template <typename T> struct remove_class { };
|
||
template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { typedef R type(A...); };
|
||
template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { typedef R type(A...); };
|
||
}
|
||
|
||
template <typename Func, typename Return, typename... Args>
|
||
auto vectorize_wrapper_detail(Func &&f_, Return (*)(Args...)) {
|
||
return [f = std::forward<Func>(f_)](detail::reference_dynamic_t<enoki::make_dynamic_t<Args>>... args) {
|
||
return vectorize_safe(f, args...);
|
||
};
|
||
}
|
||
|
||
/// Vectorize a vanilla function pointer
|
||
template <typename Return, typename... Args>
|
||
auto vectorize_wrapper(Return (*f)(Args...)) {
|
||
return vectorize_wrapper_detail(f, f);
|
||
}
|
||
|
||
/// Vectorize a lambda function method (possibly with internal state)
|
||
template <typename Func,
|
||
typename FuncType = typename detail::remove_class<
|
||
decltype(&std::remove_reference<Func>::type::operator())>::type>
|
||
auto vectorize_wrapper(Func &&f) {
|
||
return vectorize_wrapper_detail(std::forward<Func>(f), (FuncType *) nullptr);
|
||
}
|
||
|
||
/// Vectorize a class method (non-const)
|
||
template <typename Return, typename Class, typename... Arg>
|
||
auto vectorize_wrapper(Return (Class::*f)(Arg...)) {
|
||
return vectorize_wrapper_detail(
|
||
[f](Class *c, Arg... args) -> Return { return (c->*f)(args...); },
|
||
(Return(*)(Class *, Arg...)) nullptr);
|
||
}
|
||
|
||
/// Vectorize a class method (const)
|
||
template <typename Return, typename Class, typename... Arg>
|
||
auto vectorize_wrapper(Return (Class::*f)(Arg...) const) {
|
||
return vectorize_wrapper_detail(
|
||
[f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); },
|
||
(Return(*)(const Class *, Arg...)) nullptr);
|
||
}
|
||
|
||
#if defined(ENOKI_AUTODIFF_H) && !defined(ENOKI_BUILD)
|
||
ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT Tape<DynamicArray<Packet<float>>>;
|
||
ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT DiffArray<DynamicArray<Packet<float>>>;
|
||
|
||
ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT Tape<DynamicArray<Packet<double>>>;
|
||
ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT DiffArray<DynamicArray<Packet<double>>>;
|
||
#endif
|
||
|
||
NAMESPACE_END(enoki)
|
||
|
||
#if defined(__GNUC__) && !defined(__clang__)
|
||
# pragma GCC diagnostic pop
|
||
#endif
|