/* enoki/quaternion.h -- Matrix data structure Enoki is a C++ template library that enables transparent vectorization of numerical kernels using SIMD instruction sets available on current processor architectures. Copyright (c) 2019 Wenzel Jakob All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. */ #pragma once #include NAMESPACE_BEGIN(enoki) /// Value trait to access the column type of a matrix template using column_t = typename std::decay_t::Column; /// Value trait to access the entry type of a matrix template using entry_t = value_t>; /// SFINAE helper for matrixs template using is_matrix_helper = enable_if_t::IsMatrix>; template constexpr bool is_matrix_v = is_detected_v; template using enable_if_matrix_t = enable_if_t>; template using enable_if_not_matrix_t = enable_if_t>; template struct Matrix : StaticArrayImpl, Size_, false, Matrix> { using Entry = Value_; using Column = Array; using Base = StaticArrayImpl>; using Base::coeff; ENOKI_ARRAY_IMPORT_BASIC(Base, Matrix); using Base::operator=; static constexpr bool IsMatrix = true; static constexpr bool IsVector = false; using ArrayType = Matrix; using MaskType = Mask, Size_>; template using ReplaceValue = Matrix, Size_>; Matrix() = default; /// Initialize from a incompatible matrix template = 0> ENOKI_INLINE Matrix(const Matrix &m) : Base(m) { } /// Initialize from an incompatible matrix template = 0> ENOKI_INLINE Matrix(const Matrix &m) { if constexpr (Size2 > Size) { /// Other matrix is bigger -- retain the top left part for (size_t i = 0; i < Size; ++i) coeff(i) = head(m.coeff(i)); } else { /// Other matrix is smaller -- copy the top left part and set remainder to identity using Remainder = Array; for (size_t i = 0; i < Size2; ++i) coeff(i) = concat(m.coeff(i), zero()); for (size_t i = Size2; i < Size; ++i) { auto col = zero(); col.coeff(i) = 1; coeff(i) = col; } } } template <= Base::Depth - 2)> = 0, enable_if_not_matrix_t = 0> ENOKI_INLINE Matrix(T&& v) { for (size_t i = 0; i < Size; ++i) { coeff(i) = zero(); coeff(i, i) = v; } } template == Base::Depth)> = 0, enable_if_not_matrix_t = 0> ENOKI_INLINE Matrix(T&& v) : Base(std::forward(v)) { } /// Initialize the matrix from a list of columns template ...>> = 0> ENOKI_INLINE Matrix(const Args&... args) : Base(args...) { } /// Initialize the matrix from a list of entries in row-major order template ...>> = 0> ENOKI_INLINE Matrix(const Args&... args) { alignas(alignof(Column)) Entry values[sizeof...(Args)] = { Entry(args)... }; for (size_t j = 0; j < Size; ++j) for (size_t i = 0; i < Size; ++i) coeff(j, i) = values[i * Size + j]; } template ENOKI_INLINE static Matrix from_cols(const Column&... cols) { return Matrix(cols...); } template ENOKI_INLINE static Matrix from_rows(const Row&... rows) { return transpose(Matrix(rows...)); } ENOKI_INLINE Column& col(size_t index) { return coeff(index); } ENOKI_INLINE const Column& col(size_t index) const { return coeff(index); } ENOKI_INLINE Column row(size_t index) const { using Index = Array; return gather(coeff(0).data() + index, arange() * uint32_t(Size)); } /// Return a reference to the (i, j) element ENOKI_INLINE decltype(auto) operator()(size_t i, size_t j) { return coeff(j, i); } /// Return a reference to the (i, j) element (const) ENOKI_INLINE decltype(auto) operator()(size_t i, size_t j) const { return coeff(j, i); } static ENOKI_INLINE Derived zero_(size_t size) { Derived result; for (size_t i = 0; i < Size; ++i) result.coeff(i) = zero(size); return result; } static ENOKI_INLINE Derived empty_(size_t size) { Derived result; for (size_t i = 0; i < Size; ++i) result.coeff(i) = empty(size); return result; } template ENOKI_INLINE static Matrix full_(const T &value, size_t size) { return Array::full_(value, size); } }; template , Size>, typename Column = column_t> ENOKI_INLINE Result operator*(const Matrix &m0, const Matrix &m1) { Result result; /* 4x4 case reduced to 4 multiplications, 12 fused multiply-adds, and 16 broadcasts (also fused on AVX512VL) */ for (size_t j = 0; j < Size; ++j) { Column sum = m0.coeff(0) * Column::full_(m1(0, j), 1); for (size_t i = 1; i < Size; ++i) sum = fmadd(m0.coeff(i), Column::full_(m1(i, j), 1), sum); result.coeff(j) = sum; } return result; } template = 0> ENOKI_INLINE auto operator*(const Matrix &m, const T1 &s) { if constexpr (array_size_v == Size && T1::IsVector) { using EValue = expr_t>; using EVector = Array; EVector sum = m.coeff(0) * EVector::full_(s.coeff(0), 1); for (size_t i = 1; i < Size; ++i) sum = fmadd(m.coeff(i), EVector::full_(s.coeff(i), 1), sum); return sum; } else { using EValue = expr_t; using EArray = Array, Size>; using EMatrix = Matrix; return EMatrix(EArray(m) * EArray::full_(EValue(s), 1)); } } template = 0> ENOKI_INLINE auto operator*(const T0 &s, const Matrix &m) { using EValue = expr_t; using EArray = Array, Size>; using EMatrix = Matrix; return EMatrix(EArray::full_(EValue(s), 1) * EArray(m)); } template = 0> ENOKI_INLINE auto operator/(const Matrix &m, const T1 &s) { using EValue = expr_t; using EArray = Array, Size>; using EMatrix = Matrix; return EMatrix(EArray(m) * EArray::full_(rcp(EValue(s)), 1)); } template ENOKI_INLINE expr_t trace(const Matrix &m) { expr_t result = m.coeff(0, 0); for (size_t i = 1; i < Size; ++i) result += m(i, i); return result; } template ENOKI_INLINE expr_t frob(const Matrix &matrix) { expr_t>> result = sqr(matrix.coeff(0)); for (size_t i = 1; i < Size; ++i) result = fmadd(matrix.coeff(i), matrix.coeff(i), result); return hsum(result); } template = 0> ENOKI_INLINE T identity(size_t size = 1) { T result = zero(size); for (size_t i = 0; i < T::Size; ++i) result(i, i) = full(scalar_t(1.f), size); return result; } template = 0> ENOKI_INLINE Matrix diag(const column_t &value) { Matrix result = zero(); for (size_t i = 0; i < Matrix::Size; ++i) result(i, i) = value.coeff(i); return result; } template = 0> ENOKI_INLINE column_t> diag(const Matrix &value) { column_t> result; for (size_t i = 0; i < Matrix::Size; ++i) result.coeff(i) = value(i, i); return result; } template > ENOKI_INLINE Matrix inverse(const Matrix &m) { return rcp(m(0, 0)); } template > ENOKI_INLINE Matrix inverse_transpose(const Matrix &m) { return rcp(m(0, 0)); } template > ENOKI_INLINE E det(const Matrix &m) { return m(0, 0); } template > ENOKI_INLINE Matrix inverse(const Matrix &m) { E inv_det = rcp(fmsub(m(0, 0), m(1, 1), m(0, 1) * m(1, 0))); return Matrix( m(1, 1) * inv_det, -m(0, 1) * inv_det, -m(1, 0) * inv_det, m(0, 0) * inv_det ); } template > ENOKI_INLINE E det(const Matrix &m) { return fmsub(m(0, 0), m(1, 1), m(0, 1) * m(1, 0)); } template > ENOKI_INLINE Matrix inverse_transpose(const Matrix &m) { E inv_det = rcp(fmsub(m(0, 0), m(1, 1), m(0, 1) * m(1, 0))); return Matrix( m(1, 1) * inv_det, -m(1, 0) * inv_det, -m(0, 1) * inv_det, m(0, 0) * inv_det ); } template > ENOKI_INLINE Matrix inverse_transpose(const Matrix &m) { using Vector = Array; Vector col0 = m.coeff(0), col1 = m.coeff(1), col2 = m.coeff(2); Vector row0 = cross(col1, col2), row1 = cross(col2, col0), row2 = cross(col0, col1); Vector inv_det = Vector(rcp(dot(col0, row0))); return Matrix( row0 * inv_det, row1 * inv_det, row2 * inv_det ); } template > ENOKI_INLINE Matrix inverse(const Matrix &m) { return transpose(inverse_transpose(m)); } template > ENOKI_INLINE E det(const Matrix &m) { return dot(m.coeff(0), cross(m.coeff(1), m.coeff(2))); } template > ENOKI_INLINE Matrix inverse_transpose(const Matrix &m) { using Vector = Array; Vector col0 = m.coeff(0), col1 = m.coeff(1), col2 = m.coeff(2), col3 = m.coeff(3); col1 = shuffle<2, 3, 0, 1>(col1); col3 = shuffle<2, 3, 0, 1>(col3); Vector tmp, row0, row1, row2, row3; tmp = shuffle<1, 0, 3, 2>(col2 * col3); row0 = col1 * tmp; row1 = col0 * tmp; tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fmsub(col1, tmp, row0); row1 = shuffle<2, 3, 0, 1>(fmsub(col0, tmp, row1)); tmp = shuffle<1, 0, 3, 2>(col1 * col2); row0 = fmadd(col3, tmp, row0); row3 = col0 * tmp; tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fnmadd(col3, tmp, row0); row3 = shuffle<2, 3, 0, 1>(fmsub(col0, tmp, row3)); tmp = shuffle<1, 0, 3, 2>(shuffle<2, 3, 0, 1>(col1) * col3); col2 = shuffle<2, 3, 0, 1>(col2); row0 = fmadd(col2, tmp, row0); row2 = col0 * tmp; tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fnmadd(col2, tmp, row0); row2 = shuffle<2, 3, 0, 1>(fmsub(col0, tmp, row2)); tmp = shuffle<1, 0, 3, 2>(col0 * col1); row2 = fmadd(col3, tmp, row2); row3 = fmsub(col2, tmp, row3); tmp = shuffle<2, 3, 0, 1>(tmp); row2 = fmsub(col3, tmp, row2); row3 = fnmadd(col2, tmp, row3); tmp = shuffle<1, 0, 3, 2>(col0 * col3); row1 = fnmadd(col2, tmp, row1); row2 = fmadd(col1, tmp, row2); tmp = shuffle<2, 3, 0, 1>(tmp); row1 = fmadd(col2, tmp, row1); row2 = fnmadd(col1, tmp, row2); tmp = shuffle<1, 0, 3, 2>(col0 * col2); row1 = fmadd(col3, tmp, row1); row3 = fnmadd(col1, tmp, row3); tmp = shuffle<2, 3, 0, 1>(tmp); row1 = fnmadd(col3, tmp, row1); row3 = fmadd(col1, tmp, row3); Vector inv_det = Vector(rcp(dot(col0, row0))); return Matrix( row0 * inv_det, row1 * inv_det, row2 * inv_det, row3 * inv_det ); } template > ENOKI_INLINE Matrix inverse(const Matrix &m) { return transpose(inverse_transpose(m)); } template > ENOKI_INLINE E det(const Matrix &m) { using Vector = Array; Vector col0 = m.coeff(0), col1 = m.coeff(1), col2 = m.coeff(2), col3 = m.coeff(3); col1 = shuffle<2, 3, 0, 1>(col1); col3 = shuffle<2, 3, 0, 1>(col3); Vector tmp, row0; tmp = shuffle<1, 0, 3, 2>(col2 * col3); row0 = col1 * tmp; tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fmsub(col1, tmp, row0); tmp = shuffle<1, 0, 3, 2>(col1 * col2); row0 = fmadd(col3, tmp, row0); tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fnmadd(col3, tmp, row0); col1 = shuffle<2, 3, 0, 1>(col1); col2 = shuffle<2, 3, 0, 1>(col2); tmp = shuffle<1, 0, 3, 2>(col1 * col3); row0 = fmadd(col2, tmp, row0); tmp = shuffle<2, 3, 0, 1>(tmp); row0 = fnmadd(col2, tmp, row0); return dot(col0, row0); } template ENOKI_INLINE auto transpose(const StaticArrayBase &a) { static_assert(Value::Size == Size && array_depth::value >= 2, "Array must be a square matrix!"); using Column = value_t; if constexpr (Column::IsNative) { #if defined(ENOKI_X86_SSE42) if constexpr (std::is_same_v, float> && Size == 3) { __m128 c0 = a.derived().coeff(0).m, c1 = a.derived().coeff(1).m, c2 = a.derived().coeff(2).m; __m128 t0 = _mm_unpacklo_ps(c0, c1); __m128 t1 = _mm_unpacklo_ps(c2, c2); __m128 t2 = _mm_unpackhi_ps(c0, c1); __m128 t3 = _mm_unpackhi_ps(c2, c2); return Derived( _mm_movelh_ps(t0, t1), _mm_movehl_ps(t1, t0), _mm_movelh_ps(t2, t3) ); } else if constexpr (std::is_same_v, float> && Size == 4) { __m128 c0 = a.derived().coeff(0).m, c1 = a.derived().coeff(1).m, c2 = a.derived().coeff(2).m, c3 = a.derived().coeff(3).m; __m128 t0 = _mm_unpacklo_ps(c0, c1); __m128 t1 = _mm_unpacklo_ps(c2, c3); __m128 t2 = _mm_unpackhi_ps(c0, c1); __m128 t3 = _mm_unpackhi_ps(c2, c3); return Derived( _mm_movelh_ps(t0, t1), _mm_movehl_ps(t1, t0), _mm_movelh_ps(t2, t3), _mm_movehl_ps(t3, t2) ); } #endif #if defined(ENOKI_X86_AVX) if constexpr (std::is_same_v, double> && Size == 3) { __m256d c0 = a.derived().coeff(0).m, c1 = a.derived().coeff(1).m, c2 = a.derived().coeff(2).m; __m256d t3 = _mm256_shuffle_pd(c2, c2, 0b0000), t2 = _mm256_shuffle_pd(c2, c2, 0b1111), t1 = _mm256_shuffle_pd(c0, c1, 0b0000), t0 = _mm256_shuffle_pd(c0, c1, 0b1111); return Derived( _mm256_permute2f128_pd(t1, t3, 0b0010'0000), _mm256_permute2f128_pd(t0, t2, 0b0010'0000), _mm256_permute2f128_pd(t1, t3, 0b0011'0001) ); } else if constexpr (std::is_same_v, double> && Size == 4) { __m256d c0 = a.derived().coeff(0).m, c1 = a.derived().coeff(1).m, c2 = a.derived().coeff(2).m, c3 = a.derived().coeff(3).m; __m256d t3 = _mm256_shuffle_pd(c2, c3, 0b0000), t2 = _mm256_shuffle_pd(c2, c3, 0b1111), t1 = _mm256_shuffle_pd(c0, c1, 0b0000), t0 = _mm256_shuffle_pd(c0, c1, 0b1111); return Derived( _mm256_permute2f128_pd(t1, t3, 0b0010'0000), _mm256_permute2f128_pd(t0, t2, 0b0010'0000), _mm256_permute2f128_pd(t1, t3, 0b0011'0001), _mm256_permute2f128_pd(t0, t2, 0b0011'0001) ); } #endif #if defined(ENOKI_ARM_NEON) if constexpr (std::is_same_v, float> && Size == 3) { float32x4x2_t v01 = vtrnq_f32(a.derived().coeff(0).m, a.derived().coeff(1).m); float32x4x2_t v23 = vtrnq_f32(a.derived().coeff(2).m, a.derived().coeff(2).m); return Derived( vcombine_f32(vget_low_f32 (v01.val[0]), vget_low_f32 (v23.val[0])), vcombine_f32(vget_low_f32 (v01.val[1]), vget_low_f32 (v23.val[1])), vcombine_f32(vget_high_f32(v01.val[0]), vget_high_f32(v23.val[0])) ); } else if constexpr (std::is_same_v, float> && Size == 4) { float32x4x2_t v01 = vtrnq_f32(a.derived().coeff(0).m, a.derived().coeff(1).m); float32x4x2_t v23 = vtrnq_f32(a.derived().coeff(2).m, a.derived().coeff(3).m); return Derived( vcombine_f32(vget_low_f32 (v01.val[0]), vget_low_f32 (v23.val[0])), vcombine_f32(vget_low_f32 (v01.val[1]), vget_low_f32 (v23.val[1])), vcombine_f32(vget_high_f32(v01.val[0]), vget_high_f32(v23.val[0])), vcombine_f32(vget_high_f32(v01.val[1]), vget_high_f32(v23.val[1])) ); } #endif } ENOKI_CHKSCALAR("transpose"); Derived result; for (size_t i = 0; i < Size; ++i) for (size_t j = 0; j < Size; ++j) result.coeff(i, j) = a.derived().coeff(j, i); return result; } template , typename Matrix = Matrix> std::pair ENOKI_INLINE polar_decomp(const enoki::Matrix &A, size_t it = 10) { using Arr = Array, Size>; Matrix Q = A; for (size_t i = 0; i < it; ++i) { Matrix Qi = inverse_transpose(Q); Expr gamma = sqrt(frob(Qi) / frob(Q)); Q = fmadd(Arr(Q), gamma * .5f, Arr(Qi) * (rcp(gamma) * 0.5f)); } return std::make_pair(Q, transpose(Q) * A); } // ======================================================================= //! @{ \name Enoki accessors for static & dynamic vectorization // ======================================================================= template struct struct_support, enable_if_static_array_t>> { static constexpr bool IsDynamic = enoki::is_dynamic_v; using Dynamic = Matrix, Size>; using Value = Matrix; using Column = column_t; static ENOKI_INLINE size_t slices(const Value &value) { return enoki::slices(value.coeff(0, 0)); } static ENOKI_INLINE size_t packets(const Value &value) { return enoki::packets(value.coeff(0, 0)); } static ENOKI_INLINE void set_slices(Value &value, size_t size) { for (size_t i = 0; i < Size; ++i) enoki::set_slices(value.coeff(i), size); } template static ENOKI_INLINE auto packet(T2&& value, size_t i) { return packet(value, i, std::make_index_sequence()); } template static ENOKI_INLINE auto slice(T2&& value, size_t i) { return slice(value, i, std::make_index_sequence()); } template static ENOKI_INLINE auto slice_ptr(T2&& value, size_t i) { return slice_ptr(value, i, std::make_index_sequence()); } template static ENOKI_INLINE auto ref_wrap(T2&& value) { return ref_wrap(value, std::make_index_sequence()); } template static ENOKI_INLINE auto detach(T2&& value) { return detach(value, std::make_index_sequence()); } template static ENOKI_INLINE auto gradient(T2&& value) { return gradient(value, std::make_index_sequence()); } static ENOKI_INLINE Value zero(size_t size) { return Value::zero_(size); } static ENOKI_INLINE Value empty(size_t size) { return Value::empty_(size); } template ::value == array_size::value> = 0> static ENOKI_INLINE auto masked(T2 &value, const Mask &mask) { return detail::MaskedArray{ value, mask_t(mask) }; } template ::value != array_size::value> = 0> static ENOKI_INLINE auto masked(T2 &value, const Mask &mask) { using Arr = Array, Size>; return enoki::masked((Arr&) value, mask_t(mask)); } private: template static ENOKI_INLINE auto packet(T2&& value, size_t i, std::index_sequence) { return Matrix( enoki::packet(value.coeff(Index), i)...); } template static ENOKI_INLINE auto slice(T2&& value, size_t i, std::index_sequence) { return Matrix( enoki::slice(value.coeff(Index), i)...); } template static ENOKI_INLINE auto slice_ptr(T2&& value, size_t i, std::index_sequence) { return Matrix( enoki::slice_ptr(value.coeff(Index), i)...); } template static ENOKI_INLINE auto ref_wrap(T2&& value, std::index_sequence) { return Matrix( enoki::ref_wrap(value.coeff(Index))...); } template static ENOKI_INLINE auto detach(T2&& value, std::index_sequence) { return Matrix( enoki::detach(value.coeff(Index))...); } template static ENOKI_INLINE auto gradient(T2&& value, std::index_sequence) { return Matrix( enoki::gradient(value.coeff(Index))...); } }; //! @} // ======================================================================= NAMESPACE_END(enoki)