cocos-engine-external/sources/enoki/special.h

676 lines
23 KiB
C++

/*
enoki/special.h -- Special functions: Bessel functions, Elliptic
and exponential integrals, etc. (still incomplete)
Enoki is a C++ template library that enables transparent vectorization
of numerical kernels using SIMD instruction sets available on current
processor architectures.
Copyright (c) 2019 Wenzel Jakob <wenzel.jakob@epfl.ch>
All rights reserved. Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
#include <enoki/array.h>
#pragma once
NAMESPACE_BEGIN(enoki)
/// Evaluates a series of Chebyshev polynomials at argument x/2.
template <typename T, typename T2, size_t Size,
typename Expr = expr_t<T>> Expr chbevl(const T &x, T2 (&coeffs)[Size]) {
using Scalar = scalar_t<Expr>;
Expr b0 = Scalar(coeffs[0]);
Expr b1 = Scalar(0);
Expr b2;
ENOKI_UNROLL for (size_t i = 0; i < Size; ++i) {
b2 = b1;
b1 = b0;
b0 = fmsub(x, b1, b2 - Scalar(coeffs[i]));
}
return (b0 - b2) * Scalar(0.5f);
}
template <typename T, enable_if_not_array_t<T> = 0> T erf(const T &x) {
return std::erf(x);
}
template <typename T, enable_if_not_array_t<T> = 0> T erfc(const T &x) {
return std::erfc(x);
}
template <typename T, bool Recurse = true, typename Expr = expr_t<T>,
enable_if_array_t<T> = 0>
Expr erfc(const T &x);
template <typename T, bool Recurse = true, typename Expr = expr_t<T>,
enable_if_array_t<T> = 0>
Expr erf(const T &x);
template <typename T, bool Recurse, typename Expr, enable_if_array_t<T>>
Expr erfc(const T &x) {
constexpr bool Single = std::is_same_v<scalar_t<T>, float>;
using Scalar = scalar_t<T>;
Expr r;
Expr xa = abs(x),
z = exp(-x*x);
auto erf_mask = xa < Scalar(1),
large_mask = xa > Scalar(Single ? 2 : 8);
ENOKI_MARK_USED(erf_mask);
if constexpr (Single) {
Expr q = rcp(xa),
y = q*q, p_small, p_large;
if (is_cuda_array_v<Expr> || !all_nested(large_mask))
p_small = poly8(y, 5.638259427386472e-1, -2.741127028184656e-1,
3.404879937665872e-1, -4.944515323274145e-1,
6.210004621745983e-1, -5.824733027278666e-1,
3.687424674597105e-1, -1.387039388740657e-1,
2.326819970068386e-2);
if (is_cuda_array_v<Expr> || any_nested(large_mask))
p_large = poly7(y, 5.641895067754075e-1, -2.820767439740514e-1,
4.218463358204948e-1, -1.015265279202700e+0,
2.921019019210786e+0, -7.495518717768503e+0,
1.297719955372516e+1, -1.047766399936249e+1);
r = z * q * select(large_mask, p_large, p_small);
} else {
Expr p_small, p_large, q_small, q_large;
if (is_cuda_array_v<Expr> || !all_nested(large_mask)) {
p_small = poly8(xa, 5.57535335369399327526e2, 1.02755188689515710272e3,
9.34528527171957607540e2, 5.26445194995477358631e2,
1.96520832956077098242e2, 4.86371970985681366614e1,
7.46321056442269912687e0, 5.64189564831068821977e-1,
2.46196981473530512524e-10);
q_small = poly8(xa, 5.57535340817727675546e2, 1.65666309194161350182e3,
2.24633760818710981792e3, 1.82390916687909736289e3,
9.75708501743205489753e2, 3.54937778887819891062e2,
8.67072140885989742329e1, 1.32281951154744992508e1,
1.00000000000000000000e0);
}
if (is_cuda_array_v<Expr> || any_nested(large_mask)) {
p_large = poly5(xa, 2.97886665372100240670e0, 7.40974269950448939160e0,
6.16021097993053585195e0, 5.01905042251180477414e0,
1.27536670759978104416e0, 5.64189583547755073984e-1);
q_large = poly6(xa, 3.36907645100081516050e0, 9.60896809063285878198e0,
1.70814450747565897222e1, 1.20489539808096656605e1,
9.39603524938001434673e0, 2.26052863220117276590e0,
1.00000000000000000000e0);
}
r = (z * select(large_mask, p_large, p_small)) /
select(large_mask, q_large, q_small);
r &= neq(z, zero<Expr>());
}
r[x < Scalar(0)] = Scalar(2) - r;
if constexpr (Recurse) {
if (ENOKI_UNLIKELY(is_cuda_array_v<Expr> || any_nested(erf_mask)))
r[erf_mask] = Scalar(1) - erf<T, false>(x);
}
return r;
}
template <typename T, bool Recurse, typename Expr, enable_if_array_t<T>>
Expr erf(const T &x) {
using Scalar = scalar_t<T>;
Expr r;
auto erfc_mask = abs(x) > Scalar(1);
ENOKI_MARK_USED(erfc_mask);
Expr z = x * x;
constexpr bool Single = std::is_same_v<scalar_t<T>, float>;
if constexpr (Single) {
r = poly6(z, 1.128379165726710e+0, -3.761262582423300e-1,
1.128358514861418e-1, -2.685381193529856e-2,
5.188327685732524e-3, -8.010193625184903e-4,
7.853861353153693e-5);
} else {
r = poly4(z, 5.55923013010394962768e4, 7.00332514112805075473e3,
2.23200534594684319226e3, 9.00260197203842689217e1,
9.60497373987051638749e0) /
poly5(z, 4.92673942608635921086e4, 2.26290000613890934246e4,
4.59432382970980127987e3, 5.21357949780152679795e2,
3.35617141647503099647e1, 1.00000000000000000000e0);
}
r *= x;
if constexpr (Recurse) {
if (ENOKI_UNLIKELY(is_cuda_array_v<Expr> || any_nested(erfc_mask)))
r[erfc_mask] = Scalar(1) - erfc<T, false>(x);
}
return r;
}
/// Modified Bessel function of the first kind, order zero (exponentially scaled)
template <typename T, typename Expr = expr_t<T>> Expr i0e(const T &x_) {
using Scalar = scalar_t<T>;
/* Chebyshev coefficients for exp(-x) I0(x)
* in the interval [0,8].
*
* lim(x->0) { exp(-x) I0(x) } = 1.
*/
static Scalar A[] = {
Scalar(-1.30002500998624804212E-8), Scalar(6.04699502254191894932E-8),
Scalar(-2.67079385394061173391E-7), Scalar(1.11738753912010371815E-6),
Scalar(-4.41673835845875056359E-6), Scalar(1.64484480707288970893E-5),
Scalar(-5.75419501008210370398E-5), Scalar(1.88502885095841655729E-4),
Scalar(-5.76375574538582365885E-4), Scalar(1.63947561694133579842E-3),
Scalar(-4.32430999505057594430E-3), Scalar(1.05464603945949983183E-2),
Scalar(-2.37374148058994688156E-2), Scalar(4.93052842396707084878E-2),
Scalar(-9.49010970480476444210E-2), Scalar(1.71620901522208775349E-1),
Scalar(-3.04682672343198398683E-1), Scalar(6.76795274409476084995E-1)
};
/* Chebyshev coefficients for exp(-x) sqrt(x) I0(x)
* in the inverted interval [8,infinity].
*
* lim(x->inf) { exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi).
*/
static Scalar B[] = {
Scalar(3.39623202570838634515E-9), Scalar(2.26666899049817806459E-8),
Scalar(2.04891858946906374183E-7), Scalar(2.89137052083475648297E-6),
Scalar(6.88975834691682398426E-5), Scalar(3.36911647825569408990E-3),
Scalar(8.04490411014108831608E-1)
};
Expr x = abs(x_);
auto mask_big = x > Scalar(8);
Expr r_big, r_small;
if (!all_nested(mask_big))
r_small = chbevl(fmsub(x, Expr(Scalar(0.5)), Expr(Scalar(2))), A);
if (any_nested(mask_big))
r_big = chbevl(fmsub(Expr(Scalar(32)), rcp(x), Expr(Scalar(2))), B) *
rsqrt(x);
return select(mask_big, r_big, r_small);
}
// Inverse real error function approximation based on on "Approximating the
// erfinv function" by Mark Giles
template <typename T, typename Expr = expr_t<T>> Expr erfinv(const T &x_) {
using Scalar = scalar_t<T>;
Expr x(x_);
Expr w = -log((Expr(Scalar(1)) - x) * (Expr(Scalar(1)) + x));
Expr w1 = w - Scalar(2.5);
Expr w2 = sqrt(w) - Scalar(3);
Expr p1 = poly8(w1,
1.50140941, 0.246640727,
-0.00417768164, -0.00125372503,
0.00021858087, -4.39150654e-06,
-3.5233877e-06, 3.43273939e-07,
2.81022636e-08);
Expr p2 = poly8(w2,
2.83297682, 1.00167406,
0.00943887047, -0.0076224613,
0.00573950773, -0.00367342844,
0.00134934322, 0.000100950558,
-0.000200214257);
return select(w < Scalar(5), p1, p2) * x;
}
/// Evaluates Dawson's integral (e^(-x^2) \int_0^x e^(y^2) dy)
template <typename T, typename Expr = expr_t<T>> Expr dawson(const T &x) {
// Rational minimax approximation to Dawson's integral with relative
// error < 1e-6 on the real number line. July 2017, Wenzel Jakob
Expr x2 = x*x;
Expr num = poly6(x2, 1.00000080272429,9.18170212243285e-2,
4.25835373536124e-2, 6.0536496345054e-3,
9.88555033724111e-4, 3.64943550840577e-5,
1.55942290996993e-5);
Expr denom = poly7(x2, 1.0, 7.58517175815194e-1,
2.81364355593059e-1, 6.81783097841267e-2,
1.13586116798019e-2, 1.92020805811771e-3,
5.74217664074868e-5, 3.11884331363595e-5);
return num / denom * x;
}
/// Imaginary component of the error function
template <typename T, typename Expr = expr_t<T>> Expr erfi(const T &x) {
using Scalar = scalar_t<T>;
return Scalar(M_2_SQRTPI) * dawson(x) * exp(x * x);
}
/// Natural logarithm of the Gamma function
template <typename Value> Value lgamma(Value x_) {
using Mask = mask_t<Value>;
using Scalar = scalar_t<Value>;
// 'g' and 'n' parameters of the Lanczos approximation
// See mrob.com/pub/ries/lanczos-gamma.html
const int n = 6;
const Scalar g = 5.0f;
const Scalar log_sqrt2pi = Scalar(0.91893853320467274178);
const Scalar coeff[n + 1] = { (Scalar) 1.000000000190015, (Scalar) 76.18009172947146,
(Scalar) -86.50532032941677, (Scalar) 24.01409824083091,
(Scalar) -1.231739572450155, (Scalar) 0.1208650973866179e-2,
(Scalar) -0.5395239384953e-5 };
// potentially reflect using gamma(x) = pi / (sin(pi*x) * gamma(1-x))
Mask reflect = x_ < .5f;
Value x = select(reflect, -x_, x_ - 1.f),
b = x + g + .5f; // base
Value sum = 0;
for (int i = n; i >= 1; --i)
sum += coeff[i] / (x + Scalar(i));
sum += coeff[0];
// gamma(x) = sqrt(2*pi) * sum * b^(x + .5) / exp(b)
Value result = ((log_sqrt2pi + log(sum)) - b) + log(b) * (x + .5f);
if (is_cuda_array_v<Value> || any_nested(reflect)) {
masked(result, reflect) = log(abs(Scalar(M_PI) / sin(Scalar(M_PI) * x_))) - result;
masked(result, reflect && eq(x_, round(x_))) = std::numeric_limits<Scalar>::infinity();
}
return result;
}
/// Gamma function
template <typename Value> Value tgamma(Value x) { return exp(lgamma(x)); }
/**
* Computes a Carlson integral of the form
*
* R_F(X, Y, Z) = 1/2 * \int_{0}^\infty ((t + x) (t + y) (t + z))^(-1/2) dt
*
* Based on
*
* Computing elliptic integrals by duplication
* B. C. Carlson
* Numerische Mathematik, March 1979, Volume 33, Issue 1
*/
template <typename Vector3,
typename Value = value_t<Vector3>,
typename Scalar = scalar_t<Vector3>>
Value carlson_rf(Vector3 xyz) {
static_assert(
Vector3::Size == 3,
"carlson_rf(): Expected a three-dimensional input vector (x, y, z)");
assert(all_nested(xyz.x() >= Scalar(0) && xyz.y() > Scalar(0) && xyz.z() > Scalar(0)));
Vector3 XYZ;
Value mu_inv;
mask_t<Value> active = true;
int iterations = 0;
while (true) {
Vector3 sqrt_xyz = sqrt(xyz);
Value lambda = dot(shuffle<1, 2, 0>(sqrt_xyz), sqrt_xyz);
Value mu = hsum(xyz) * Scalar(1.0 / 3.0);
mu_inv = rcp(mu);
XYZ = fnmadd(xyz, mu_inv, Scalar(1));
Value eps = hmax(abs(XYZ));
active &= eps > Scalar(std::is_same_v<Scalar, double>
? 0.0024608
: 0.070154); // eps ^ (1/6)
if (none(active) || ++iterations == 10)
break;
xyz[mask_t<Vector3>(active)] = (xyz + lambda) * Scalar(0.25);
}
/* Use recurrences for cheaper polynomial evaluation. Based
on Numerical Recipes (3rd ed) by Press, Teukolsky,
Vetterling, and Flannery */
Value e2 = XYZ.x() * XYZ.y() - XYZ.z() * XYZ.z(),
e3 = hprod(XYZ),
er = (Scalar(1.0 / 24.0) * e2 - Scalar(1.0 / 10.0) -
Scalar(3.0 / 44.0) * e3) * e2 + Scalar(1.0 / 14.0) * e3;
return sqrt(mu_inv) * (Scalar(1) + er);
}
/**
* Computes a Carlson integral of the form
*
* R_D(x, y, z) = 3/2 * \int_{0}^\infty (t + x)^(-1/2) (t + y)^(-1/2) (t + z)^(-3/2) dt
*
* Based on
*
* Computing elliptic integrals by duplication
* B. C. Carlson
* Numerische Mathematik, March 1979, Volume 33, Issue 1
*/
template <typename Vector3,
typename Value = value_t<Vector3>,
typename Scalar = scalar_t<Vector3>>
Value carlson_rd(Vector3 xyz) {
static_assert(
Vector3::Size == 3,
"carlson_rd(): Expected a three-dimensional input vector (x, y, z)");
assert(all_nested(xyz.x() >= Scalar(0) && xyz.y() > Scalar(0) && xyz.z() > Scalar(0)));
Vector3 XYZ;
Value mu_inv;
mask_t<Value> active = true;
int iterations = 0;
Value sum = 0;
Value num = 1;
const Vector3 W(Scalar(1.0 / 5.0), Scalar(1.0 / 5.0), Scalar(3.0 / 5.0));
while (true) {
Vector3 sqrt_xyz = sqrt(xyz);
Value lambda = dot(shuffle<1, 2, 0>(sqrt_xyz), sqrt_xyz);
Value mu = hsum(xyz * W);
mu_inv = rcp(mu);
XYZ = fnmadd(xyz, mu_inv, Scalar(1));
Value eps = hmax(abs(XYZ));
active &= eps > Scalar(std::is_same_v<Scalar, double>
? (0.0024608 * 0.6)
: (0.070154 * 0.6)); // eps ^ (1/6) * 0.6
if (none(active) || ++iterations == 10)
break;
masked(sum, active) += num / (sqrt(xyz.z()) * (xyz.z() + lambda));
masked(num, active) *= Scalar(0.25f);
masked(xyz, mask_t<Vector3>(active)) = (xyz + lambda) * Scalar(0.25f);
}
/* Use recurrences for cheaper polynomial evaluation. Based
on Numerical Recipes (3rd ed) by Press, Teukolsky,
Vetterling, and Flannery */
Value z = XYZ.z(),
ea = XYZ.x() * XYZ.y(),
eb = z * z,
ec = ea - eb,
ed = fnmadd(Scalar(6), eb, ea),
ee = fmadd(ec, Scalar(2), ed);
Value p = ed * (-Scalar(3.0 / 14.0) + Scalar(9.0 / 88.0) * ed -
Scalar(1.0 / 4.0) * z * ee) +
z * (Scalar(1.0 / 6.0) * ee + z *
(-Scalar(9.0 / 22.0) * ec + z * Scalar(3.0 / 26.0) * ea));
return Scalar(3) * sum + num * mu_inv * sqrt(mu_inv) * (Scalar(1.0) + p);
}
/**
* Computes a Carlson integral of the form
*
* R_C(x, y) = 1/2 * \int_{0}^\infty (t + x)^(-1/2) (t + y)^-1 dt
*
* Based on
*
* Computing elliptic integrals by duplication
* B. C. Carlson
* Numerische Mathematik, March 1979, Volume 33, Issue 1
*/
template <typename Vector2,
typename Value = value_t<Vector2>,
typename Scalar = scalar_t<Vector2>>
Value carlson_rc(Vector2 xy) {
static_assert(
Vector2::Size == 2,
"carlson_rc(): Expected a two-dimensional input vector (x, y)");
assert(all(xy.x() >= Scalar(0) && xy.y() > Scalar(0)));
mask_t<Value> active = true;
Value inv_mu, s;
int iterations = 0;
while (true) {
Value lambda = hprod(sqrt(xy));
lambda += lambda + xy.y();
Value mu = fmadd(xy.x(), Scalar(1.0 / 3.0), xy.y() * Scalar(2.0 / 3.0));
inv_mu = rcp(mu);
s = (xy.y() - mu) * inv_mu;
active &= abs(s) > Scalar(std::is_same_v<Scalar, double>
? (0.0024608 * 0.48)
: (0.070154 * 0.48)); // eps ^ (1/6) * 0.48
if (none(active) || ++iterations == 10)
break;
masked(xy, mask_t<Vector2>(active)) = (xy + lambda) * Scalar(0.25f);
}
/* Use recurrences for cheaper polynomial evaluation. Based
on Numerical Recipes (3rd ed) by Press, Teukolsky,
Vetterling, and Flannery */
return sqrt(inv_mu) * (Scalar(1) + s * s *
(Scalar(0.3) + s * (Scalar(1.0 / 7.0) +
s * (Scalar(0.375) + s * Scalar(9.0 / 22.0)))));
}
/**
* Computes a Carlson integral of the form
*
* R_J(x, y, z, rho) = 3/2 * \int_{0}^\infty ((t + x) (t + y) (t + z))^(-1/2) (t+rho)^(-1) dt
*
* Based on
*
* Computing elliptic integrals by duplication
* B. C. Carlson
* Numerische Mathematik, March 1979, Volume 33, Issue 1
*/
template <typename Vector4,
typename Value = value_t<Vector4>,
typename Vector2 = Array<Value, 2>,
typename Scalar = scalar_t<Vector4>>
Value carlson_rj(Vector4 xyzr) {
static_assert(
Vector4::Size == 4,
"carlson_rj(): Expected a four-dimensional input vector (x, y, z, rho)");
assert(all(xyzr.x() >= Scalar(0) && xyzr.y() > Scalar(0) && xyzr.z() > Scalar(0) && xyzr.w() > Scalar(0)));
Vector4 XYZR;
Value mu_inv;
mask_t<Value> active = true;
int iterations = 0;
Value sum = 0;
Value num = 1;
while (true) {
auto xyz = head<3>(xyzr);
auto rho = xyzr.w();
auto sqrt_xyz = sqrt(xyz);
Value lambda = dot(shuffle<1, 2, 0>(sqrt_xyz), sqrt_xyz);
Value mu = (hsum(xyzr) + rho) * Scalar(1.0 / 5.0);
mu_inv = rcp(mu);
XYZR = fnmadd(xyzr, mu_inv, Scalar(1));
Value eps = hmax(abs(XYZR));
active &= eps > Scalar(std::is_same_v<Scalar, double>
? (0.0024608 * 0.6)
: (0.070154 * 0.6)); // eps ^ (1/6) * 0.6
Value alpha = rho * hsum(sqrt(xyz)) + sqrt(hprod(xyz));
alpha *= alpha;
Value beta = rho * (rho + lambda) * (rho + lambda);
if (none(active) || ++iterations == 10)
break;
masked(sum, active) += num * carlson_rc(Vector2(alpha, beta));
masked(num, active) *= Scalar(0.25f);
masked(xyzr, mask_t<Vector4>(active)) = (xyzr + lambda) * Scalar(0.25f);
}
/* Use recurrences for cheaper polynomial evaluation. Based
on Numerical Recipes (3rd ed) by Press, Teukolsky,
Vetterling, and Flannery */
Value ea = XYZR.x() * (XYZR.y() + XYZR.z()) + XYZR.y() * XYZR.z(),
eb = XYZR.x() * XYZR.y() * XYZR.z(),
R = XYZR.w(),
ec = R * R,
ed = ea - Scalar(3) * ec,
ee = eb + Scalar(2) * R * (ea - ec);
return Scalar(3) * sum +
num * mu_inv * sqrt(mu_inv) *
(Scalar(1) +
ed * (-Scalar(3.0 / 14.0) + Scalar(9.0 / 88.0) * ed -
Scalar(9.0 / 52.0) * ee) +
eb * (Scalar(1.0 / 6.0) +
R * (-Scalar(3.0 / 11.0) + R * Scalar(3.0 / 26.0))) +
R * ea * (Scalar(1.0 / 3.0) - R * Scalar(3.0 / 22.0)) -
Scalar(1.0 / 3.0) * R * ec);
}
// -----------------------------------------------------------------------
//! @{ \name Complete and incomplete elliptic integrals
//! Caution: the 'k' factor is squared in the elliptic integral, which
//! differs from the convention of Mathematica's EllipticK etc.
// -----------------------------------------------------------------------
/// Complete elliptic integral of the first kind
template <typename K, typename Value = expr_t<K>,
typename Scalar = scalar_t<Value>,
typename Vector3 = Array<Value, 3>>
Value comp_ellint_1(K k) {
return carlson_rf(Vector3(Scalar(0), Scalar(1) - k * k, Scalar(1)));
}
/// Incomplete elliptic integral of the first kind
template <typename Phi, typename K,
typename Value = expr_t<Phi, K>,
typename Scalar = scalar_t<Value>,
typename Vector3 = Array<Value, 3>>
Value ellint_1(Phi phi_, K k) {
Value phi = phi_,
n = floor(fmadd(phi, Scalar(1.0 / M_PI), Scalar(.5f))),
result = 0;
if (ENOKI_UNLIKELY(any(neq(n, Scalar(0))))) {
result = comp_ellint_1(k) * n * Scalar(2);
phi = fnmadd(n, Scalar(M_PI), phi);
}
auto [sin_phi, cos_phi] = sincos(phi);
Vector3 xyz(cos_phi * cos_phi, Scalar(1) - k * k * sin_phi * sin_phi,
Scalar(1));
result += sin_phi * carlson_rf(xyz);
return result;
}
/// Complete elliptic integral of the second kind
template <typename K, typename Value = expr_t<K>,
typename Scalar = scalar_t<Value>,
typename Vector3 = Array<Value, 3>>
Value comp_ellint_2(K k) {
auto k2 = k*k;
Vector3 xyz(Scalar(0), Scalar(1) - k2, Scalar(1));
return carlson_rf(xyz) - Scalar(1.0 / 3.0) * k2 * carlson_rd(xyz);
}
/// Incomplete elliptic integral of the second kind
template <typename Phi, typename K,
typename Value = expr_t<Phi, K>,
typename Scalar = scalar_t<Value>,
typename Vector3 = Array<Value, 3>>
Value ellint_2(Phi phi_, K k) {
Value phi = phi_,
k2 = k*k,
n = floor(fmadd(phi, Scalar(1.0 / M_PI), Scalar(.5f))),
result = 0;
if (ENOKI_UNLIKELY(any(neq(n, Scalar(0))))) {
result = comp_ellint_2(k) * n * Scalar(2);
phi = fnmadd(n, Scalar(M_PI), phi);
}
auto [sin_phi, cos_phi] = sincos(phi);
auto sin_phi_k_2 = sin_phi * sin_phi * k2;
Vector3 xyz(cos_phi * cos_phi, Scalar(1) - sin_phi_k_2, Scalar(1));
result += sin_phi * (carlson_rf(xyz) -
Scalar(1.0 / 3.0) * sin_phi_k_2 * carlson_rd(xyz));
return result;
}
/// Complete elliptic integral of the third kind
template <typename K, typename Nu,
typename Value = expr_t<K, Nu>,
typename Scalar = scalar_t<Value>,
typename Vector4 = Array<Value, 4>>
Value comp_ellint_3(K k, Nu nu) {
auto k2 = k*k;
Vector4 xyzr(Scalar(0), Scalar(1) - k2, Scalar(1), Scalar(1) + nu);
return carlson_rf(head<3>(xyzr)) -
Scalar(1.0 / 3.0) * nu * carlson_rj(xyzr);
}
/// Incomplete elliptic integral of the third kind
template <typename Phi, typename K, typename Nu,
typename Value = expr_t<Phi, K, Nu>,
typename Scalar = scalar_t<Value>,
typename Vector4 = Array<Value, 4>>
Value ellint_3(Phi phi_, K k, Nu nu) {
Value phi = phi_,
k2 = k*k,
n = floor(fmadd(phi, Scalar(1.0 / M_PI), Scalar(.5f))),
result = 0;
if (ENOKI_UNLIKELY(any(neq(n, Scalar(0))))) {
result = comp_ellint_3(k, nu) * n * Scalar(2);
phi = fnmadd(n, Scalar(M_PI), phi);
}
auto [sin_phi, cos_phi] = sincos(phi);
auto sin_phi_2 = sin_phi * sin_phi;
Vector4 xyzr(cos_phi * cos_phi, Scalar(1) - k2 * sin_phi_2, Scalar(1),
Scalar(1) + nu * sin_phi_2);
result += sin_phi * (carlson_rf(head<3>(xyzr)) -
Scalar(1.0 / 3.0) * nu * sin_phi_2 * carlson_rj(xyzr));
return result;
}
//! @}
// -----------------------------------------------------------------------
NAMESPACE_END(enoki)