cocos-engine-external/sources/enoki/array_utils.h

201 lines
6.1 KiB
C++

/*
enoki/array_router.h -- Helper functions which route function calls
in the enoki namespace to the intended recipients
Enoki is a C++ template library that enables transparent vectorization
of numerical kernels using SIMD instruction sets available on current
processor architectures.
Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
All rights reserved. Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
#pragma once
#include <enoki/array_generic.h>
#include <enoki/array_idiv.h>
NAMESPACE_BEGIN(enoki)
/// Analagous to meshgrid() in NumPy or MATLAB; for dynamic arrays
template <typename T, enable_if_dynamic_array_t<T> = 0>
Array<T, 2> meshgrid(const T &x, const T &y) {
if constexpr (is_cuda_array_v<T> || is_diff_array_v<T>) {
x.eval(); y.eval();
if (x.size() == 1) {
T x2(x);
set_slices(x2, slices(y));
return Array<T, 2>(
std::move(x2),
y
);
}
uint32_t n = (uint32_t) x.size() * (uint32_t) y.size();
divisor<uint32_t> div((uint32_t) x.size());
using UInt32 = uint32_array_t<T>;
UInt32 index = arange<UInt32>(n),
yi = div(index),
xi = index - yi * (uint32_t) x.size();
return Array<T, 2>(
gather<T>(x, xi),
gather<T>(y, yi)
);
} else {
T X, Y;
set_slices(X, x.size() * y.size());
set_slices(Y, x.size() * y.size());
size_t pos = 0;
if (x.size() % T::PacketSize == 0) {
/* Fast path */
for (size_t i = 0; i < y.size(); ++i) {
for (size_t j = 0; j < packets(x); ++j) {
packet(X, pos) = packet(x, j);
packet(Y, pos) = y.coeff(i);
pos++;
}
}
} else {
for (size_t i = 0; i < y.size(); ++i) {
for (size_t j = 0; j < x.size(); ++j) {
X.coeff(pos) = x.coeff(j);
Y.coeff(pos) = y.coeff(i);
pos++;
}
}
}
return Array<T, 2>(std::move(X), std::move(Y));
}
}
/// Vectorized N-dimensional 'range' iterable with automatic mask computation
template <typename Value> struct range {
static constexpr size_t Dimension = array_depth_v<Value> == 2 ?
array_size_v<Value> : 1;
static constexpr size_t PacketSize = array_depth_v<Value> == 2 ?
array_size_v<value_t<Value>> : array_size_v<Value>;
using Scalar = scalar_t<Value>;
using Packet = Array<Scalar, PacketSize>;
using Size = Array<Scalar, Dimension>;
struct iterator {
iterator(size_t index) : index(index) { }
iterator(size_t index, Size size)
: index(index), index_p(arange<Packet>()), size(size) {
for (size_t i = 0; i < Dimension - 1; ++i)
div[i] = size[i];
}
bool operator==(const iterator &it) const { return it.index == index; }
bool operator!=(const iterator &it) const { return it.index != index; }
iterator &operator++() {
index += 1;
index_p += Scalar(Packet::Size);
return *this;
}
std::pair<Value, mask_t<Packet>> operator*() const {
if constexpr (array_depth_v<Value> == 1) {
return { index_p, index_p < size[0] };
} else {
Value value;
value[0] = index_p;
ENOKI_UNROLL for (size_t i = 0; i < Dimension - 1; ++i)
value[i + 1] = div[i](value[i]);
Packet offset = zero<Packet>();
ENOKI_UNROLL for (size_t i = Dimension - 2; ; --i) {
offset = size[i] * (value[i + 1] + offset);
value[i] -= offset;
if (i == 0)
break;
}
return { value, value[Dimension - 1] < size[Dimension - 1] };
}
}
private:
size_t index;
Packet index_p;
Size size;
divisor<Scalar> div[Dimension > 1 ? (Dimension - 1) : 1];
};
template <typename... Args>
range(Args&&... args) : size(args...) { }
iterator begin() {
return iterator(0, size);
}
iterator end() {
return iterator((hprod(size) + Packet::Size - 1) / Packet::Size);
}
private:
Size size;
};
template <typename Predicate,
typename Args = typename function_traits<Predicate>::Args,
typename Index = std::decay_t<std::tuple_element_t<0, Args>>>
Index binary_search(scalar_t<Index> start_,
scalar_t<Index> end_,
const Predicate &pred) {
Index start(start_), end(end_);
scalar_t<Index> iterations = (start_ < end_) ?
(log2i(end_ - start_) + 1) : 0;
for (size_t i = 0; i < iterations; ++i) {
Index middle = sr<1>(start + end);
mask_t<Index> cond = pred(middle);
masked(start, cond) = min(middle + 1, end);
masked(end, !cond) = middle;
}
return start;
}
// -----------------------------------------------------------------------
//! @{ \name Stack memory allocation
// -----------------------------------------------------------------------
/**
* \brief Wrapper around alloca(), which returns aligned (and, optionally,
* zero-initialized) memory
*/
#define ENOKI_ALIGNED_ALLOCA(Array, Count, Clear) \
enoki::detail::alloca_helper<Array, Clear>((uint8_t *) alloca( \
sizeof(Array) * (Count) + enoki::max_packet_size - 4), \
sizeof(Array) * (Count))
namespace detail {
template <typename Array, bool Clear>
ENOKI_INLINE Array *alloca_helper(uint8_t *ptr, size_t size) {
(uintptr_t &) ptr +=
((max_packet_size - (uintptr_t) ptr) % max_packet_size);
if constexpr (Clear)
memset(ptr, 0, size);
return (Array *) ptr;
}
}
//! @}
// -----------------------------------------------------------------------
NAMESPACE_END(enoki)