/* enoki/dynamic.h -- Dynamic heap-allocated array Enoki is a C++ template library that enables transparent vectorization of numerical kernels using SIMD instruction sets available on current processor architectures. Copyright (c) 2019 Wenzel Jakob All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. */ #pragma once #include #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wclass-memaccess" #endif #define ENOKI_DYNAMIC_H 1 NAMESPACE_BEGIN(enoki) template struct DynamicArrayReference : ArrayBase, DynamicArrayReference> { using Base = ArrayBase, DynamicArrayReference>; using Packet = Packet_; using ArrayType = DynamicArrayReference>; using MaskType = DynamicArrayReference>; static constexpr size_t PacketSize = Packet::Size; static constexpr bool IsMask = Packet::IsMask; DynamicArrayReference(Packet *packets = nullptr) : m_packets(packets) { } ENOKI_INLINE Packet &packet(size_t i) { return ((Packet *) ENOKI_ASSUME_ALIGNED(m_packets, alignof(Packet)))[i]; } ENOKI_INLINE const Packet &packet(size_t i) const { return ((const Packet *) ENOKI_ASSUME_ALIGNED(m_packets, alignof(Packet)))[i]; } template using ReplaceValue = DynamicArrayReference>; private: Packet *m_packets; }; template struct DynamicArrayImpl : ArrayBase, Derived_> { // ----------------------------------------------------------------------- //! @{ \name Aliases and constants // ----------------------------------------------------------------------- using Size = uint32_t; using Base = ArrayBase, Derived_>; using Packet = Packet_; using IndexPacket = uint_array_t, false>; using IndexScalar = scalar_t; using PacketHolder = std::unique_ptr; static constexpr size_t PacketSize = Packet::Size; static constexpr bool IsMask = Packet::IsMask; using typename Base::Derived; using typename Base::Value; using typename Base::Scalar; using Base::derived; //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Constructors // ----------------------------------------------------------------------- DynamicArrayImpl() = default; ENOKI_INLINE ~DynamicArrayImpl() { reset(); } /// Initialize from a list of component values template = 2 && std::conjunction_v...>> = 0> ENOKI_INLINE DynamicArrayImpl(Ts... args) { Value storage[] = { (Value) args... }; resize(sizeof...(Ts)); memcpy(m_packets.get(), storage, sizeof(Value) * sizeof...(Ts)); } DynamicArrayImpl(const DynamicArrayImpl &value) { operator=(value); } ENOKI_INLINE DynamicArrayImpl(DynamicArrayImpl &&value) { operator=(std::move(value)); } template DynamicArrayImpl(const DynamicArrayImpl &value) { operator=(value); } template DynamicArrayImpl(const ArrayBase &value) { operator=(value); } template DynamicArrayImpl(const DynamicArrayImpl &other, detail::reinterpret_flag) { static_assert(Packet2::Size == Packet::Size, "Packet sizes must match!"); resize(other.size()); for (size_t i = 0; i < other.packets(); ++i) packet(i) = reinterpret_array(other.packet(i)); } #if defined(__GNUC__) // Don't be so noisy about sign conversion in constructor # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wsign-conversion" #endif template = 0> DynamicArrayImpl(bool value, detail::reinterpret_flag) { resize(1); packet(0) = Packet(value); } template > = 0> DynamicArrayImpl(const T &value) { using S = std::conditional_t; resize(1); packet(0) = Packet((S) value); } #if defined(__GNUC__) # pragma GCC diagnostic pop #endif //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Assignment operators // ----------------------------------------------------------------------- template > = 0> ENOKI_NOINLINE DynamicArrayImpl &operator=(const T &value) { resize(1); packet(0) = Packet(value); return derived(); } ENOKI_NOINLINE DynamicArrayImpl &operator=(const DynamicArrayImpl &other) { resize(other.size()); memcpy(m_packets.get(), other.m_packets.get(), packets() * sizeof(Packet)); return derived(); } template ENOKI_NOINLINE DynamicArrayImpl &operator=(const DynamicArrayImpl &other) { static_assert(Packet2::Size == Packet::Size, "Packet sizes must match!"); resize(other.size()); for (size_t i = 0; i < other.packets(); ++i) packet(i) = Packet(other.packet(i)); return derived(); } template ENOKI_NOINLINE DynamicArrayImpl &operator=(const ArrayBase &other) { resize(other.derived().size()); for (size_t i = 0; i < other.derived().size(); ++i) coeff(i) = other.derived().coeff(i); return derived(); } ENOKI_INLINE DynamicArrayImpl &operator=(DynamicArrayImpl &&other) { m_packets.swap(other.m_packets); std::swap(m_packets_allocated, other.m_packets_allocated); std::swap(m_size, other.m_size); return derived(); } void reset() { if (is_mapped()) { m_packets.release(); } else if (m_packets.get()) { ENOKI_TRACK_DEALLOC(m_packets.get(), packets_allocated() * sizeof(Packet)); m_packets.reset(); } m_size = m_packets_allocated = 0; } //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Functions to access the array contents // ----------------------------------------------------------------------- bool is_mapped() const { return (m_packets_allocated & 0x80000000u) != 0; } size_t size() const { return (size_t) m_size; } size_t packets() const { return ((size_t) m_size + PacketSize - 1) / PacketSize; } size_t packets_allocated() const { return (size_t) (m_packets_allocated & 0x7fffffffu); } size_t capacity() const { return packets_allocated() * Packet::Size; } bool empty() const { return m_size == 0; } size_t nbytes() const { return packets_allocated() * sizeof(Packet) + sizeof(Derived); } ENOKI_INLINE const Value *data() const { return (const Value *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)); } ENOKI_INLINE Value *data() { return (Value *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)); } ENOKI_INLINE const Packet *packet_ptr() const { return (const Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)); } ENOKI_INLINE Packet *packet_ptr() { return (Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)); } ENOKI_INLINE decltype(auto) coeff(size_t i) { return m_packets[i / PacketSize].coeff(i % PacketSize); } ENOKI_INLINE decltype(auto) coeff(size_t i) const { return m_packets[i / PacketSize].coeff(i % PacketSize); } ENOKI_INLINE Packet &packet(size_t i) { #if !defined(NDEBUG) && !defined(ENOKI_DISABLE_RANGE_CHECK) if (i >= packets()) throw std::out_of_range( "DynamicArray: out of range access (tried to access packet " + std::to_string(i) + " in an array of size " + std::to_string(packets()) + ")"); #endif return ((Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)))[i]; } ENOKI_INLINE const Packet &packet(size_t i) const { #if !defined(NDEBUG) && !defined(ENOKI_DISABLE_RANGE_CHECK) if (i >= packets()) throw std::out_of_range( "DynamicArray: out of range access (tried to access packet " + std::to_string(i) + " in an array of size " + std::to_string(packets()) + ")"); #endif return ((const Packet *) ENOKI_ASSUME_ALIGNED(m_packets.get(), alignof(Packet)))[i]; } //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Vertical array operations // ----------------------------------------------------------------------- #define ENOKI_FWD_UNARY_OPERATION(name, Return, op) \ auto name##_() const { \ Return result; \ result.resize(size()); \ auto p1 = packet_ptr(); \ auto pr = result.packet_ptr(); \ for (size_t i = 0, n = result.packets(); \ i < n; ++i, ++p1, ++pr) { \ Packet a = *p1; \ *pr = op; \ } \ return result; \ } #define ENOKI_FWD_UNARY_OPERATION_IMM(name, Return, op) \ template auto name##_() const { \ Return result; \ result.resize(size()); \ auto p1 = packet_ptr(); \ auto pr = result.packet_ptr(); \ for (size_t i = 0, n = result.packets(); \ i < n; ++i, ++p1, ++pr) { \ Packet a = *p1; \ *pr = op; \ } \ return result; \ } #define ENOKI_FWD_BINARY_OPERATION(name, Return, op) \ template \ auto name##_(const T &d) const { \ Return result; \ result.resize_like(*this, d); \ auto p1 = packet_ptr(); \ auto p2 = d.packet_ptr(); \ auto pr = result.packet_ptr(); \ size_t s1 = size() == 1 ? 0 : 1, \ s2 = d.size() == 1 ? 0 : 1; \ for (size_t i = 0, n = result.packets(); i < n; \ ++i, ++pr, p1 += s1, p2 += s2) { \ auto a1 = *p1; \ auto a2 = *p2; \ *pr = op; \ } \ return result; \ } #define ENOKI_FWD_BINARY_OPERATION_SIZE(name, Return, op) \ auto name##_(size_t a2) const { \ Return result; \ result.resize_like(*this); \ auto p1 = packet_ptr(); \ auto pr = result.packet_ptr(); \ for (size_t i = 0, n = result.packets(); i < n; \ ++i, ++pr, p1++) { \ auto a1 = *p1; \ *pr = op; \ } \ return result; \ } #define ENOKI_FWD_TERNARY_OPERATION(name, Return, op) \ template \ auto name##_(const T1 &d1, const T2 &d2) const { \ Return result; \ result.resize_like(*this, d1, d2); \ auto p1 = packet_ptr(); \ auto p2 = d1.packet_ptr(); \ auto p3 = d2.packet_ptr(); \ auto pr = result.packet_ptr(); \ size_t s1 = size() == 1 ? 0 : 1, \ s2 = d1.size() == 1 ? 0 : 1, \ s3 = d2.size() == 1 ? 0 : 1; \ for (size_t i = 0, n = result.packets(); i < n; \ ++i, ++pr, p1 += s1, p2 += s2, p3 += s3) { \ auto a1 = *p1; \ auto a2 = *p2; \ auto a3 = *p3; \ *pr = op; \ } \ return result; \ } #define ENOKI_FWD_MASKED_OPERATION(name, expr) \ template \ void m##name##_(const Derived &e, const Mask &m) { \ resize_like(*this, e, m); \ auto pr = packet_ptr(); \ auto p1 = e.packet_ptr(); \ auto p2 = m.packet_ptr(); \ size_t s1 = e.size() == 1 ? 0 : 1, \ s2 = m.size() == 1 ? 0 : 1; \ for (size_t i = 0, n = packets(); i < n; \ ++i, ++pr, p1 += s1, p2 += s2) \ (*pr).m##name##_(*p1, *p2); \ } ENOKI_FWD_BINARY_OPERATION(add, Derived, a1 + a2) ENOKI_FWD_BINARY_OPERATION(sub, Derived, a1 - a2) ENOKI_FWD_BINARY_OPERATION(mul, Derived, a1 * a2) ENOKI_FWD_BINARY_OPERATION(div, Derived, a1 / a2) ENOKI_FWD_BINARY_OPERATION(mod, Derived, a1 % a2) ENOKI_FWD_BINARY_OPERATION(sl, Derived, a1 << a2) ENOKI_FWD_BINARY_OPERATION(sr, Derived, a1 >> a2) ENOKI_FWD_BINARY_OPERATION(rol, Derived, rol(a1, a2)) ENOKI_FWD_BINARY_OPERATION(ror, Derived, ror(a1, a2)) ENOKI_FWD_BINARY_OPERATION(mulhi, Derived, mulhi(a1, a2)) ENOKI_FWD_BINARY_OPERATION_SIZE(sl, Derived, a1 << a2) ENOKI_FWD_BINARY_OPERATION_SIZE(sr, Derived, a1 >> a2) ENOKI_FWD_UNARY_OPERATION_IMM(sl, Derived, sl(a)) ENOKI_FWD_UNARY_OPERATION_IMM(sr, Derived, sr(a)) ENOKI_FWD_UNARY_OPERATION_IMM(rol, Derived, rol(a)) ENOKI_FWD_UNARY_OPERATION_IMM(ror, Derived, ror(a)) ENOKI_FWD_UNARY_OPERATION(lzcnt, Derived, lzcnt(a)) ENOKI_FWD_UNARY_OPERATION(tzcnt, Derived, tzcnt(a)) ENOKI_FWD_UNARY_OPERATION(popcnt, Derived, popcnt(a)) ENOKI_FWD_BINARY_OPERATION(or, Derived, a1 | a2) ENOKI_FWD_BINARY_OPERATION(and, Derived, a1 & a2) ENOKI_FWD_BINARY_OPERATION(andnot, Derived, andnot(a1, a2)) ENOKI_FWD_BINARY_OPERATION(xor, Derived, a1 ^ a2) ENOKI_FWD_UNARY_OPERATION(not, Derived, ~a); ENOKI_FWD_UNARY_OPERATION(neg, Derived, -a); ENOKI_FWD_BINARY_OPERATION(eq, mask_t, eq (a1, a2)) ENOKI_FWD_BINARY_OPERATION(neq, mask_t, neq(a1, a2)) ENOKI_FWD_BINARY_OPERATION(gt, mask_t, a1 > a2) ENOKI_FWD_BINARY_OPERATION(ge, mask_t, a1 >= a2) ENOKI_FWD_BINARY_OPERATION(lt, mask_t, a1 < a2) ENOKI_FWD_BINARY_OPERATION(le, mask_t, a1 <= a2) ENOKI_FWD_TERNARY_OPERATION(fmadd, Derived, fmadd(a1, a2, a3)) ENOKI_FWD_TERNARY_OPERATION(fmsub, Derived, fmsub(a1, a2, a3)) ENOKI_FWD_TERNARY_OPERATION(fnmadd, Derived, fnmadd(a1, a2, a3)) ENOKI_FWD_TERNARY_OPERATION(fnmsub, Derived, fnmsub(a1, a2, a3)) ENOKI_FWD_TERNARY_OPERATION(fmsubadd, Derived, fmsubadd(a1, a2, a3)) ENOKI_FWD_TERNARY_OPERATION(fmaddsub, Derived, fmaddsub(a1, a2, a3)) ENOKI_FWD_BINARY_OPERATION(min, Derived, min(a1, a2)) ENOKI_FWD_BINARY_OPERATION(max, Derived, max(a1, a2)) ENOKI_FWD_UNARY_OPERATION(abs, Derived, abs(a)); ENOKI_FWD_UNARY_OPERATION(ceil, Derived, ceil(a)); ENOKI_FWD_UNARY_OPERATION(floor, Derived, floor(a)); ENOKI_FWD_UNARY_OPERATION(sqrt, Derived, sqrt(a)); ENOKI_FWD_UNARY_OPERATION(round, Derived, round(a)); ENOKI_FWD_UNARY_OPERATION(trunc, Derived, trunc(a)); ENOKI_FWD_UNARY_OPERATION(rsqrt, Derived, rsqrt(a)); ENOKI_FWD_UNARY_OPERATION(rcp, Derived, rcp(a)); ENOKI_FWD_MASKED_OPERATION(assign, b) ENOKI_FWD_MASKED_OPERATION(add, a + b) ENOKI_FWD_MASKED_OPERATION(sub, a - b) ENOKI_FWD_MASKED_OPERATION(mul, a * b) ENOKI_FWD_MASKED_OPERATION(div, a / b) ENOKI_FWD_MASKED_OPERATION(or, a | b) ENOKI_FWD_MASKED_OPERATION(and, a & b) ENOKI_FWD_MASKED_OPERATION(xor, a ^ b) #undef ENOKI_FWD_UNARY_OPERATION #undef ENOKI_FWD_UNARY_OPERATION_IMM #undef ENOKI_FWD_BINARY_OPERATION #undef ENOKI_FWD_TERNARY_OPERATION #undef ENOKI_FWD_MASKED_OPERATION template static Derived select_(const Mask &mask, const Derived &t, const Derived &f) { if (ENOKI_UNLIKELY(f.empty())) { if (all(mask)) return t; else throw std::runtime_error( "DynamicArray::select(): array for false branch is empty, " "and some entries were referenced."); } if (ENOKI_UNLIKELY(t.empty())) { if (none(mask)) return f; else throw std::runtime_error( "DynamicArray::select(): array for true branch is empty, " "and some entries were referenced."); } Derived result; result.resize_like(mask, t, f); size_t i1 = 0, i1i = mask.size() == 1 ? 0 : 1, i2 = 0, i2i = t.size() == 1 ? 0 : 1, i3 = 0, i3i = f.size() == 1 ? 0 : 1; for (size_t i = 0; i < result.packets(); ++i, i1 += i1i, i2 += i2i, i3 += i3i) { result.packet(i) = select(mask.packet(i1), t.packet(i2), f.packet(i3)); } return result; } template static Derived gather_(const void *mem, const Index &index, const Mask &mask) { Derived result; result.resize_like(index, mask); size_t i1 = 0, i1i = index.size() == 1 ? 0 : 1, i2 = 0, i2i = mask.size() == 1 ? 0 : 1, i = 0; if (!result.empty()) { for (; i < result.packets() - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i) result.packet(i) = gather(mem, index.packet(i1), mask.packet(i2)); if constexpr (PacketSize > 1) { auto mask2 = arange() <= IndexScalar((result.size() - 1) % PacketSize); result.packet(i) = gather(mem, index.packet(i1), mask.packet(i2) & mask2); if (result.size() == 1) result.packet(0) = result.coeff(0); } } return result; } template void scatter_(void *mem, const Index &index, const Mask &mask) const { size_t i1 = 0, i1i = this->size() == 1 ? 0 : 1, i2 = 0, i2i = index.size() == 1 ? 0 : 1, i3 = 0, i3i = mask.size() == 1 ? 0 : 1, size = check_size(*this, index, mask), n_packets = (size + PacketSize - 1) / PacketSize, i = 0; if (n_packets > 0) { for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i, i3 += i3i) scatter(mem, packet(i1), index.packet(i2), mask.packet(i3)); if constexpr (PacketSize > 1) { auto mask2 = arange() <= IndexScalar((size - 1) % PacketSize); scatter(mem, packet(i1), index.packet(i2), mask.packet(i3) & mask2); } } } template void scatter_add_(void *mem, const Index &index, const Mask &mask) const { size_t i1 = 0, i1i = this->size() == 1 ? 0 : 1, i2 = 0, i2i = index.size() == 1 ? 0 : 1, i3 = 0, i3i = mask.size() == 1 ? 0 : 1, size = check_size(*this, index, mask), n_packets = (size + PacketSize - 1) / PacketSize, i = 0; if (n_packets > 0) { for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i, i1 += i1i, i2 += i2i, i3 += i3i) scatter_add(mem, packet(i1), index.packet(i2), mask.packet(i3)); if constexpr (PacketSize > 1) { auto mask2 = arange() <= IndexScalar((size - 1) % PacketSize); scatter_add(mem, packet(i1), index.packet(i2), mask.packet(i3) & mask2); } } } template static ENOKI_INLINE void transform_(void *ptr, const Index &index, const Mask &mask, const Func &func, const Args &... args) { size_t size = check_size(index, mask, args...), n_packets = (size + PacketSize - 1) / PacketSize; if (n_packets > 0) { size_t i = 0; for (; i < n_packets - (PacketSize > 1 ? 1 : 0); ++i) transform( ptr, enoki::packet(index, enoki::slices(index) <= 1 ? 0 : i), func, enoki::packet(args, enoki::slices(args) <= 1 ? 0 : i)...); if constexpr (PacketSize > 1) { auto mask2 = arange() <= IndexScalar((size - 1) % PacketSize); transform( ptr, enoki::packet(index, enoki::slices(index) <= 1 ? 0 : i), func, enoki::packet(args, enoki::slices(args) <= 1 ? 0 : i) & mask2...); } } } template Derived compress_(const Mask &mask) const { assert(mask.size() == size()); size_t count = 0; Derived result; set_slices(result, size()); Value *ptr = result.data(); for (size_t i = 0; i < packets(); ++i) count += compress(ptr, packet(i), mask.packet(i)); set_slices(result, count); return result; } template T ceil2int_() const { T result; result.resize(size()); auto p1 = packet_ptr(); auto pr = result.packet_ptr(); for (size_t i = 0, n = result.packets(); i < n; ++i, ++p1, ++pr) *pr = ceil2int(*p1); return result; } template T floor2int_() const { T result; result.resize(size()); auto p1 = packet_ptr(); auto pr = result.packet_ptr(); for (size_t i = 0, n = result.packets(); i < n; ++i, ++p1, ++pr) *pr = floor2int(*p1); return result; } //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Horizontal array operations // ----------------------------------------------------------------------- Derived reverse_() const { using CoeffValue = std::conditional_t; size_t n = size(); Derived result; set_slices(result, n); for (size_t i = 0; i < n; ++i) result.coeff(i) = (CoeffValue) coeff(n - 1 - i); return result; } Derived psum_() const { Derived result; set_slices(result, size()); if (!empty()) { // Difficult to vectorize this.. result.coeff(0) = coeff(0); for (size_t i = 1; i < size(); ++i) result.coeff(i) = result.coeff(i - 1) + coeff(i); } return result; } Value hsum_() const { if (size() == 0) { return Value(Scalar(0)); } else if (size() == 1) { return coeff(0); } else { Packet result = zero(); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result += packet(i); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] += packet(packets() - 1); } return hsum(result); } } Value hprod_() const { if (size() == 0) { return Value(Scalar(1)); } else if (size() == 1) { return coeff(0); } else { Packet result = Scalar(1); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result *= packet(i); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] *= packet(packets() - 1); } return hprod(result); } } Value hmin_() const { if (size() == 0) { return Value(std::numeric_limits::max()); } else if (size() == 1) { return coeff(0); } else { Packet result = coeff(0); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result = min(result, packet(i)); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] = min(result, packet(packets() - 1)); } return hmin(result); } } Value hmax_() const { if (size() == 0) { return Value(std::numeric_limits::min()); } else if (size() == 1) { return coeff(0); } else { Packet result = coeff(0); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result = max(result, packet(i)); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] = max(result, packet(packets() - 1)); } return hmax(result); } } bool any_() const { if (size() == 0) { return false; } else if (size() == 1) { return coeff(0); } else { Packet result(false); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result |= packet(i); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] |= packet(packets() - 1); } return any(result); } } bool all_() const { if (size() == 0) { return true; } else if (size() == 1) { return coeff(0); } else { Packet result(true); for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result &= packet(i); if constexpr (PacketSize > 1) { result[arange() <= IndexScalar((size() - 1) % PacketSize)] &= packet(packets() - 1); } return all(result); } } size_t count_() const { size_t result = 0; if (!empty()) { for (size_t i = 0, count = packets() - (PacketSize > 1 ? 1 : 0); i < count; ++i) result += enoki::count(packet(i)); if constexpr (PacketSize > 1) { auto mask = arange() <= IndexScalar((size() - 1) % PacketSize); result += enoki::count(packet(packets() - 1) & mask); } } return result; } //! @} // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- //! @{ \name Initialization helper functions // ----------------------------------------------------------------------- /** * \brief Resize the buffer to the desired size * * When the capacity is insufficient, the implementation destroys the * current contents and allocates a new (uninitialized) buffer * * When compiled in debug mode, newly allocated floating point arrays will * be initialized with NaNs. */ ENOKI_NOINLINE void resize(size_t size) { if (size == (size_t) m_size) return; if (is_mapped()) throw std::runtime_error("Can't resize a mapped dynamic array!"); using CoeffValue = std::conditional_t; CoeffValue scalar = (m_size == 1) ? coeff(0) : zero(); size_t n_packets = (size + PacketSize - 1) / PacketSize; if (n_packets > packets_allocated()) { if (!empty()) { ENOKI_TRACK_DEALLOC(m_packets.get(), packets_allocated() * sizeof(Packet)); } m_packets = PacketHolder(new Packet[n_packets]); m_packets_allocated = (Size) n_packets; ENOKI_TRACK_ALLOC(m_packets.get(), n_packets * sizeof(Packet)); } if (m_size == 1) { /* Resizing a scalar array -- broadcast. */ Packet p(scalar); for (size_t i = 0; i < n_packets; ++i) m_packets[i] = p; } else if (m_size == 0) { /* Potentially initialize array contents with NaNs */ #if !defined(NDEBUG) for (size_t i = 0; i < n_packets; ++i) new (&m_packets[i]) Packet(); #endif } m_size = (Size) size; clean_trailing_(); } // Clear the unused portion of a potential trailing partial packet void clean_trailing_() { IndexScalar remainder = (IndexScalar) (m_size % PacketSize); if (remainder > 0 && m_size != 1) { void *addr = m_packets.get() + packets_allocated() - 1; auto mask = arange() < IndexScalar(remainder); store(addr, load(addr) & mask); } } static Derived map(void *ptr, size_t size, bool dealloc = false) { assert((uintptr_t) ptr % alignof(Packet) == 0); Derived r; r.m_packets = PacketHolder((Packet *) ptr); r.m_size = (Size) size; r.m_packets_allocated = (Size) ((size + PacketSize - 1) / PacketSize); if (!dealloc) r.m_packets_allocated |= 0x80000000u; return r; } static Derived copy(const void *ptr, size_t size) { Derived r; r.m_size = (Size) size; r.m_packets_allocated = (Size) ((size + PacketSize - 1) / PacketSize); r.m_packets = PacketHolder(new Packet[r.m_packets_allocated]); memcpy(r.m_packets.get(), ptr, size * sizeof(Value)); return r; } Derived &managed() { return derived(); } Derived &eval() { return derived(); } Derived &managed() const { return derived(); } Derived &eval() const { return derived(); } template void resize_like(const Args&... args) { resize(check_size(args...)); } private: #if defined(__GNUC__) // GCC 8.2: quench nonsensical warning in parameter pack expansion # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wparentheses" // warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses] #endif template static size_t check_size(const Args&... args) { size_t max_size = std::max({ slices(args)... }); if ((... || (slices(args) != max_size && slices(args) != 1))) { #if defined(NDEBUG) throw std::runtime_error( "Incompatible sizes in dynamic array operation"); #else std::string msg = "["; bool result[] = { ((msg += (std::to_string(slices(args)) + ", ")), false)... }; (void) result; if (msg.size() > 2) msg = msg.substr(0, msg.size() - 2); msg += "]"; throw std::runtime_error( "Incompatible sizes in dynamic array operation: " + msg); #endif } return max_size; } #if defined(__GNUC__) # pragma GCC diagnostic pop #endif public: static Derived empty_(size_t size) { Derived result; result.resize(size); return result; } static Derived zero_(size_t size) { Derived result; result.resize(size); Packet value_p = zero(); for (size_t i = 0; i < result.packets(); ++i) result.packet(i) = value_p; return result; } static Derived full_(const Value &value, size_t size) { Derived result; result.resize(size); Packet value_p(value); for (size_t i = 0; i < result.packets(); ++i) result.packet(i) = value_p; return result; } /// Construct an evenly spaced integer sequence static Derived arange_(ssize_t start, ssize_t stop, ssize_t step) { Derived result; result.resize(size_t((stop - start + step - (step > 0 ? 1 : -1)) / step)); Packet value_p = arange(start, start + (ssize_t) Packet::Size * step, step), shift = Value((ssize_t) PacketSize * step); for (size_t i = 0; i < result.packets(); ++i) { result.packet(i) = value_p; value_p += shift; } return result; } static Derived linspace_(Value min, Value max, size_t size) { Derived result; result.resize(size); Value step = (max - min) / Value(size - 1); Packet value_p = linspace(min, min + step * (PacketSize - 1)), shift = Value(step * PacketSize); for (size_t i = 0; i < result.packets(); ++i) { result.packet(i) = value_p; value_p += shift; } return result; } //! @} // ----------------------------------------------------------------------- auto operator->() const { using BaseType = std::decay_t>>; return call_support(derived()); } template ENOKI_INLINE Value extract_(const Mask &mask) const { check_size(derived(), mask); for (size_t i = 0; i < mask.size(); ++i) if (mask.coeff(i)) return coeff(i); return zero(); } DynamicArrayReference ref_wrap_() const { return m_packets.get(); } private: PacketHolder m_packets; Size m_size = 0; Size m_packets_allocated = 0; }; template struct DynamicArray : DynamicArrayImpl> { using Base = DynamicArrayImpl>; using Base::Base; using Base::operator=; using ArrayType = DynamicArray; using MaskType = DynamicMask>; template using ReplaceValue = DynamicArray>; DynamicArray(const DynamicArray &) = default; DynamicArray(DynamicArray &&) = default; DynamicArray &operator=(const DynamicArray &) = default; DynamicArray &operator=(DynamicArray &&) = default; }; template struct DynamicMask : DynamicArrayImpl> { using Base = DynamicArrayImpl>; using ArrayType = DynamicArray>; using MaskType = DynamicMask; template using ReplaceValue = DynamicMask>; DynamicMask() = default; template DynamicMask(T &&value) : Base(std::forward(value), detail::reinterpret_flag()) { } template DynamicMask(T &&value, detail::reinterpret_flag) : Base(std::forward(value), detail::reinterpret_flag()) { } }; namespace detail { template struct mutable_ref { using type = std::add_lvalue_reference_t; }; template struct mutable_ref { using type = T &; }; template using mutable_ref_t = typename mutable_ref::type; /// Vectorized inner loop (void return value) template ENOKI_INLINE void vectorize_inner_1(std::index_sequence, Func &&f, size_t packet_count, Args &&... args) { ENOKI_NOUNROLL ENOKI_IVDEP for (size_t i = 0; i < packet_count; ++i) f(packet(args, i)...); } /// Vectorized inner loop (non-void return value) template ENOKI_INLINE void vectorize_inner_2(std::index_sequence, Func &&f, size_t packet_count, Out &&out, Args &&... args) { ENOKI_NOUNROLL ENOKI_IVDEP for (size_t i = 0; i < packet_count; ++i) packet(out, i) = f(packet(args, i)...); } } template auto vectorize(Func &&f, Args &&... args) -> make_dynamic_t /* LLVM bug #39326 */ { #if defined(NDEBUG) constexpr bool Check = false; #else constexpr bool Check = true; #endif /** Determine the number of slices and packets of the input arrays, and broadcast scalar input arrays if requested */ size_t packet_count = 0, slice_count = 0; bool unused1[] = { ((packet_count = !is_dynamic_v ? packet_count : (Resize ? std::max(packet_count, packets(args)) : packets(args))), false)... }; bool unused2[] = { ((slice_count = !is_dynamic_v ? slice_count : (Resize ? std::max(slice_count, slices(args)) : slices(args))), false)... }; (void) unused1; (void) unused2; if constexpr (Check || Resize) { size_t status[] = { ( (!is_dynamic_v || array_size_v == 0) || ((slice_count != 1 && slices(args) == 1 && Resize) ? (set_slices((detail::mutable_ref_t) args, slice_count), true) : (slices(args) == slice_count)))... }; bool status_combined = true; for (bool s : status) status_combined &= s; if (!status_combined) throw std::runtime_error("vectorize(): vector arguments have incompatible lengths"); } using Result = make_dynamic_t; if constexpr (std::is_void_v) { detail::vectorize_inner_1(std::make_index_sequence(), f, packet_count, ref_wrap(args)...); } else { Result result; set_slices(result, slice_count); detail::vectorize_inner_2(std::make_index_sequence(), f, packet_count, ref_wrap(result), ref_wrap(args)...); return result; } } template auto vectorize_safe(Func &&f, Args &&... args) -> decltype(vectorize(f, args...)) /* LLVM bug #39326 */ { return vectorize(f, args...); } namespace detail { template using reference_dynamic_t = std::conditional_t< is_dynamic_v, std::add_lvalue_reference_t, T >; /// Strip the class from a method type template struct remove_class { }; template struct remove_class { typedef R type(A...); }; template struct remove_class { typedef R type(A...); }; } template auto vectorize_wrapper_detail(Func &&f_, Return (*)(Args...)) { return [f = std::forward(f_)](detail::reference_dynamic_t>... args) { return vectorize_safe(f, args...); }; } /// Vectorize a vanilla function pointer template auto vectorize_wrapper(Return (*f)(Args...)) { return vectorize_wrapper_detail(f, f); } /// Vectorize a lambda function method (possibly with internal state) template ::type::operator())>::type> auto vectorize_wrapper(Func &&f) { return vectorize_wrapper_detail(std::forward(f), (FuncType *) nullptr); } /// Vectorize a class method (non-const) template auto vectorize_wrapper(Return (Class::*f)(Arg...)) { return vectorize_wrapper_detail( [f](Class *c, Arg... args) -> Return { return (c->*f)(args...); }, (Return(*)(Class *, Arg...)) nullptr); } /// Vectorize a class method (const) template auto vectorize_wrapper(Return (Class::*f)(Arg...) const) { return vectorize_wrapper_detail( [f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); }, (Return(*)(const Class *, Arg...)) nullptr); } #if defined(ENOKI_AUTODIFF_H) && !defined(ENOKI_BUILD) ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT Tape>>; ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT DiffArray>>; ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT Tape>>; ENOKI_AUTODIFF_EXTERN template struct ENOKI_AUTODIFF_EXPORT DiffArray>>; #endif NAMESPACE_END(enoki) #if defined(__GNUC__) && !defined(__clang__) # pragma GCC diagnostic pop #endif