#pragma once //////////////////////////////////////////////////////////////////////////////// // The MIT License (MIT) // // Copyright (c) 2019 Nicholas Frechette & Realtime Math contributors // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //////////////////////////////////////////////////////////////////////////////// #include "rtm/math.h" #include "rtm/quatf.h" #include "rtm/vector4f.h" #include "rtm/impl/compiler_utils.h" RTM_IMPL_FILE_PRAGMA_PUSH namespace rtm { ////////////////////////////////////////////////////////////////////////// // Returns the quaternion on the hypersphere with a positive [w] component // that represents the same 3D rotation as the input. ////////////////////////////////////////////////////////////////////////// RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE quatf RTM_SIMD_CALL quat_ensure_positive_w(quatf_arg0 input) RTM_NO_EXCEPT { #if defined(RTM_SSE2_INTRINSICS) constexpr __m128 sign_bit = { -0.0F, -0.0F, -0.0F, -0.0F }; const __m128 input_sign = _mm_and_ps(input, sign_bit); const __m128 bias = _mm_shuffle_ps(input_sign, input_sign, _MM_SHUFFLE(3, 3, 3, 3)); return _mm_xor_ps(input, bias); #elif defined(RTM_NEON_INTRINSICS) alignas(16) constexpr uint32_t sign_bit_i[4] = { 0x80000000U, 0x80000000U, 0x80000000U, 0x80000000U }; const uint32x4_t sign_bit = *reinterpret_cast(&sign_bit_i[0]); const uint32x4_t input_u32 = vreinterpretq_u32_f32(input); const uint32x4_t input_sign = vandq_u32(input_u32, sign_bit); const uint32x4_t bias = vmovq_n_f32(vgetq_lane_f32(input_sign, 3)); return vreinterpretq_f32_u32(veorq_u32(input_u32, bias)); #else return quat_get_w(input) >= 0.f ? input : quat_neg(input); #endif } ////////////////////////////////////////////////////////////////////////// // Returns a quaternion constructed from a vector3 representing the [xyz] // components while reconstructing the [w] component by assuming it is positive. ////////////////////////////////////////////////////////////////////////// RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE quatf RTM_SIMD_CALL quat_from_positive_w(vector4f_arg0 input) RTM_NO_EXCEPT { #if defined(RTM_SSE2_INTRINSICS) const __m128i abs_mask = _mm_set_epi32(0x7FFFFFFFULL, 0x7FFFFFFFULL, 0x7FFFFFFFULL, 0x7FFFFFFFULL); __m128 x2y2z2 = _mm_mul_ps(input, input); __m128 one = _mm_set_ss(1.0F); __m128 w_squared = _mm_sub_ss(_mm_sub_ss(_mm_sub_ss(one, x2y2z2), _mm_shuffle_ps(x2y2z2, x2y2z2, _MM_SHUFFLE(1, 1, 1, 1))), _mm_shuffle_ps(x2y2z2, x2y2z2, _MM_SHUFFLE(2, 2, 2, 2))); w_squared = _mm_and_ps(w_squared, _mm_castsi128_ps(abs_mask)); __m128 w = _mm_sqrt_ss(w_squared); #if defined(RTM_SSE4_INTRINSICS) return _mm_insert_ps(input, w, 0x30); #else __m128 input_wyzx = _mm_shuffle_ps(input, input, _MM_SHUFFLE(0, 2, 1, 3)); __m128 result_wyzx = _mm_move_ss(input_wyzx, w); return _mm_shuffle_ps(result_wyzx, result_wyzx, _MM_SHUFFLE(0, 2, 1, 3)); #endif #elif defined(RTM_NEON_INTRINSICS) && 0 // TODO: This is slower on ARMv7-A, measure again on ARM64, fewer instructions but the first // sub is dependent on the result of the mul where the C impl below pipelines a bit better it seems float32x4_t x2y2z2 = vmulq_f32(input, input); float w_squared = ((1.0F - vgetq_lane_f32(x2y2z2, 0)) - vgetq_lane_f32(x2y2z2, 1)) - vgetq_lane_f32(x2y2z2, 2); float w = rtm::scalar_sqrt(rtm::scalar_abs(w_squared)); return vsetq_lane_f32(w, input, 3); #else // Operation order is important here, due to rounding, ((1.0 - (X*X)) - Y*Y) - Z*Z is more accurate than 1.0 - dot3(xyz, xyz) float w_squared = ((1.0F - vector_get_x(input) * vector_get_x(input)) - vector_get_y(input) * vector_get_y(input)) - vector_get_z(input) * vector_get_z(input); // w_squared can be negative either due to rounding or due to quantization imprecision, we take the absolute value // to ensure the resulting quaternion is always normalized with a positive W component float w = scalar_sqrt(scalar_abs(w_squared)); return quat_set_w(vector_to_quat(input), w); #endif } } RTM_IMPL_FILE_PRAGMA_POP