189 lines
8.7 KiB
C++
189 lines
8.7 KiB
C++
#pragma once
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// The MIT License (MIT)
|
|
//
|
|
// Copyright (c) 2020 Nicholas Frechette & Realtime Math contributors
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "rtm/math.h"
|
|
#include "rtm/impl/compiler_utils.h"
|
|
#include "rtm/impl/mask_common.h"
|
|
|
|
RTM_IMPL_FILE_PRAGMA_PUSH
|
|
|
|
namespace rtm
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns the mask4f [x] component.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE uint32_t RTM_SIMD_CALL mask_get_x(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtsi128_si32(_mm_castps_si128(input));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vgetq_lane_u32(vreinterpretq_u32_f32(input), 0);
|
|
#else
|
|
return input.x;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns the mask4f [y] component.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE uint32_t RTM_SIMD_CALL mask_get_y(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtsi128_si32(_mm_castps_si128(_mm_shuffle_ps(input, input, _MM_SHUFFLE(1, 1, 1, 1))));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vgetq_lane_u32(vreinterpretq_u32_f32(input), 1);
|
|
#else
|
|
return input.y;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns the mask4f [z] component.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE uint32_t RTM_SIMD_CALL mask_get_z(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtsi128_si32(_mm_castps_si128(_mm_shuffle_ps(input, input, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vgetq_lane_u32(vreinterpretq_u32_f32(input), 2);
|
|
#else
|
|
return input.z;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns the mask4f [w] component.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE uint32_t RTM_SIMD_CALL mask_get_w(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtsi128_si32(_mm_castps_si128(_mm_shuffle_ps(input, input, _MM_SHUFFLE(3, 3, 3, 3))));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vgetq_lane_u32(vreinterpretq_u32_f32(input), 3);
|
|
#else
|
|
return input.w;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if all 4 components are true, otherwise false: all(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_all_true(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_movemask_ps(input) == 0xF;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
uint32x4_t mask = vreinterpretq_u32_f32(input);
|
|
uint8x8x2_t mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15 = vzip_u8(vget_low_u8(mask), vget_high_u8(mask));
|
|
uint16x4x2_t mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15 = vzip_u16(mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[0], mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[1]);
|
|
return vget_lane_u32(mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15.val[0], 0) == 0xFFFFFFFFU;
|
|
#else
|
|
return input.x != 0 && input.y != 0 && input.z != 0 && input.w != 0;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if all [xy] components are true, otherwise false: all(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_all_true2(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return (_mm_movemask_ps(input) & 0x3) == 0x3;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vget_lane_u64(vget_low_u32(vreinterpretq_u32_f32(input)), 0) == 0xFFFFFFFFFFFFFFFFULL;
|
|
#else
|
|
return input.x != 0 && input.y != 0;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if all [xyz] components are true, otherwise false: all(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_all_true3(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return (_mm_movemask_ps(input) & 0x7) == 0x7;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
uint32x4_t mask = vreinterpretq_u32_f32(input);
|
|
uint8x8x2_t mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15 = vzip_u8(vget_low_u8(mask), vget_high_u8(mask));
|
|
uint16x4x2_t mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15 = vzip_u16(mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[0], mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[1]);
|
|
return (vget_lane_u32(mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15.val[0], 0) & 0x00FFFFFFU) == 0x00FFFFFFU;
|
|
#else
|
|
return input.x != 0 && input.y != 0 && input.z != 0;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if any 4 components are true, otherwise false: any(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_any_true(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_movemask_ps(input) != 0;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
uint32x4_t mask = vreinterpretq_u32_f32(input);
|
|
uint8x8x2_t mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15 = vzip_u8(vget_low_u8(mask), vget_high_u8(mask));
|
|
uint16x4x2_t mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15 = vzip_u16(mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[0], mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[1]);
|
|
return vget_lane_u32(mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15.val[0], 0) != 0;
|
|
#else
|
|
return input.x != 0 || input.y != 0 || input.z != 0 || input.w != 0;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if any [xy] components are true, otherwise false: any(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_any_true2(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return (_mm_movemask_ps(input) & 0x3) != 0;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vget_lane_u64(vget_low_u32(vreinterpretq_u32_f32(input)), 0) != 0;
|
|
#else
|
|
return input.x != 0 || input.y != 0;
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if any [xyz] components are true, otherwise false: any(input != 0)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE bool RTM_SIMD_CALL mask_any_true3(mask4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return (_mm_movemask_ps(input) & 0x7) != 0;
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
uint32x4_t mask = vreinterpretq_u32_f32(input);
|
|
uint8x8x2_t mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15 = vzip_u8(vget_low_u8(mask), vget_high_u8(mask));
|
|
uint16x4x2_t mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15 = vzip_u16(mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[0], mask_0_8_1_9_2_10_3_11_4_12_5_13_6_14_7_15.val[1]);
|
|
return (vget_lane_u32(mask_0_8_4_12_1_9_5_13_2_10_6_14_3_11_7_15.val[0], 0) & 0x00FFFFFFU) != 0;
|
|
#else
|
|
return input.x != 0 || input.y != 0 || input.z != 0;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
RTM_IMPL_FILE_PRAGMA_POP
|