613 lines
24 KiB
C++
613 lines
24 KiB
C++
#pragma once
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// The MIT License (MIT)
|
|
//
|
|
// Copyright (c) 2018 Nicholas Frechette & Realtime Math contributors
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "rtm/types.h"
|
|
#include "rtm/impl/compiler_utils.h"
|
|
#include "rtm/scalarf.h"
|
|
#include "rtm/scalard.h"
|
|
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
|
|
RTM_IMPL_FILE_PRAGMA_PUSH
|
|
|
|
namespace rtm
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float x, float y, float z, float w) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_set_ps(w, z, y, x);
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
#if 1
|
|
float32x2_t V0 = vcreate_f32(((uint64_t)*(const uint32_t*)&x) | ((uint64_t)(*(const uint32_t*)&y) << 32));
|
|
float32x2_t V1 = vcreate_f32(((uint64_t)*(const uint32_t*)&z) | ((uint64_t)(*(const uint32_t*)&w) << 32));
|
|
return vcombine_f32(V0, V1);
|
|
#else
|
|
float __attribute__((aligned(16))) data[4] = { x, y, z, w };
|
|
return vld1q_f32(data);
|
|
#endif
|
|
#else
|
|
return vector4f{ x, y, z, w };
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float x, float y, float z) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_set_ps(0.0F, z, y, x);
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
#if 1
|
|
float32x2_t V0 = vcreate_f32(((uint64_t)*(const uint32_t*)&x) | ((uint64_t)(*(const uint32_t*)&y) << 32));
|
|
float32x2_t V1 = vcreate_f32((uint64_t)*(const uint32_t*)&z);
|
|
return vcombine_f32(V0, V1);
|
|
#else
|
|
float __attribute__((aligned(16))) data[4] = { x, y, z };
|
|
return vld1q_f32(data);
|
|
#endif
|
|
#else
|
|
return vector4f{ x, y, z, 0.0f };
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from a single value for all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float xyzw) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_set_ps1(xyzw);
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vdupq_n_f32(xyzw);
|
|
#else
|
|
return vector4f{ xyzw, xyzw, xyzw, xyzw };
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 x, scalarf_arg1 y, scalarf_arg2 z, scalarf_arg3 w) RTM_NO_EXCEPT
|
|
{
|
|
const __m128 xy = _mm_unpacklo_ps(x.value, y.value);
|
|
const __m128 zw = _mm_unpacklo_ps(z.value, w.value);
|
|
return _mm_shuffle_ps(xy, zw, _MM_SHUFFLE(1, 0, 1, 0));
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 x, scalarf_arg1 y, scalarf_arg2 z) RTM_NO_EXCEPT
|
|
{
|
|
const __m128 xy = _mm_unpacklo_ps(x.value, y.value);
|
|
const __m128 zw = _mm_unpacklo_ps(z.value, _mm_setzero_ps());
|
|
return _mm_shuffle_ps(xy, zw, _MM_SHUFFLE(1, 0, 1, 0));
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from a single value for all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 xyzw) RTM_NO_EXCEPT
|
|
{
|
|
return _mm_shuffle_ps(xyzw.value, xyzw.value, _MM_SHUFFLE(0, 0, 0, 0));
|
|
}
|
|
#endif
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double x, double y, double z, double w) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return vector4d{ _mm_set_pd(y, x), _mm_set_pd(w, z) };
|
|
#else
|
|
return vector4d{ x, y, z, w };
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double x, double y, double z) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return vector4d{ _mm_set_pd(y, x), _mm_set_pd(0.0, z) };
|
|
#else
|
|
return vector4d{ x, y, z, 0.0 };
|
|
#endif
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from a single value for all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double xyzw) RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
const __m128d xyzw_pd = _mm_set1_pd(xyzw);
|
|
return vector4d{ xyzw_pd, xyzw_pd };
|
|
#else
|
|
return vector4d{ xyzw, xyzw, xyzw, xyzw };
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard x, scalard y, scalard z, scalard w) RTM_NO_EXCEPT
|
|
{
|
|
const __m128d xy = _mm_unpacklo_pd(x.value, y.value);
|
|
const __m128d zw = _mm_unpacklo_pd(z.value, w.value);
|
|
return vector4d{ xy, zw };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard x, scalard y, scalard z) RTM_NO_EXCEPT
|
|
{
|
|
const __m128d xy = _mm_unpacklo_pd(x.value, y.value);
|
|
const __m128d zw = _mm_unpacklo_pd(z.value, _mm_setzero_pd());
|
|
return vector4d{ xy, zw };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Creates a vector4 from a single value for all 4 components.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard xyzw) RTM_NO_EXCEPT
|
|
{
|
|
const __m128d xyzw_pd = _mm_shuffle_pd(xyzw.value, xyzw.value, 0);
|
|
return vector4d{ xyzw_pd, xyzw_pd };
|
|
}
|
|
#endif
|
|
|
|
namespace rtm_impl
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if mix4 component is one of [xyzw]
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr bool is_mix_xyzw(mix4 arg) RTM_NO_EXCEPT { return uint32_t(arg) <= uint32_t(mix4::w); }
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns true if mix4 component is one of [abcd]
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr bool is_mix_abcd(mix4 arg) RTM_NO_EXCEPT { return uint32_t(arg) >= uint32_t(mix4::a); }
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to help manipulate SIMD masks.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
union mask_converter
|
|
{
|
|
uint64_t u64;
|
|
uint32_t u32[2];
|
|
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK explicit RTM_FORCE_INLINE constexpr mask_converter(uint64_t value) RTM_NO_EXCEPT : u64(value) {}
|
|
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr operator uint32_t() const RTM_NO_EXCEPT { return u32[0]; }
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr operator uint64_t() const RTM_NO_EXCEPT { return u64; }
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns a SIMD mask value from a boolean.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr mask_converter get_mask_value(bool is_true) RTM_NO_EXCEPT
|
|
{
|
|
return mask_converter(is_true ? uint64_t(0xFFFFFFFFFFFFFFFFULL) : uint64_t(0));
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Selects if_false if the SIMD mask value is 0, otherwise if_true.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr double select(uint64_t mask, double if_true, double if_false) RTM_NO_EXCEPT
|
|
{
|
|
return mask == 0 ? if_false : if_true;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Selects if_false if the SIMD mask value is 0, otherwise if_true.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr float select(uint32_t mask, float if_true, float if_false) RTM_NO_EXCEPT
|
|
{
|
|
return mask == 0 ? if_false : if_true;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector_zero_impl
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4d() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
const __m128d zero_pd = _mm_setzero_pd();
|
|
return vector4d{ zero_pd, zero_pd };
|
|
#else
|
|
return vector_set(0.0);
|
|
#endif
|
|
}
|
|
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4f() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_setzero_ps();
|
|
#else
|
|
return vector_set(0.0F);
|
|
#endif
|
|
}
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Various vector widths we can load
|
|
//////////////////////////////////////////////////////////////////////////
|
|
enum class vector_unaligned_loader_width
|
|
{
|
|
vec1,
|
|
vec2,
|
|
vec3,
|
|
vec4,
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
template<vector_unaligned_loader_width width>
|
|
struct vector_unaligned_loader
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4d() const RTM_NO_EXCEPT
|
|
{
|
|
switch (width)
|
|
{
|
|
case vector_unaligned_loader_width::vec1:
|
|
{
|
|
double data[1];
|
|
std::memcpy(&data[0], ptr, sizeof(double) * 1);
|
|
return vector_set(data[0], 0.0, 0.0, 0.0);
|
|
}
|
|
case vector_unaligned_loader_width::vec2:
|
|
{
|
|
double data[2];
|
|
std::memcpy(&data[0], ptr, sizeof(double) * 2);
|
|
return vector_set(data[0], data[1], 0.0, 0.0);
|
|
}
|
|
case vector_unaligned_loader_width::vec3:
|
|
{
|
|
double data[3];
|
|
std::memcpy(&data[0], ptr, sizeof(double) * 3);
|
|
return vector_set(data[0], data[1], data[2], 0.0);
|
|
}
|
|
case vector_unaligned_loader_width::vec4:
|
|
default:
|
|
{
|
|
vector4d result;
|
|
std::memcpy(&result, ptr, sizeof(vector4d));
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4f() const RTM_NO_EXCEPT
|
|
{
|
|
switch (width)
|
|
{
|
|
case vector_unaligned_loader_width::vec1:
|
|
{
|
|
float data[1];
|
|
std::memcpy(&data[0], ptr, sizeof(float) * 1);
|
|
return vector_set(data[0], 0.0F, 0.0F, 0.0F);
|
|
}
|
|
case vector_unaligned_loader_width::vec2:
|
|
{
|
|
float data[2];
|
|
std::memcpy(&data[0], ptr, sizeof(float) * 2);
|
|
return vector_set(data[0], data[1], 0.0F, 0.0F);
|
|
}
|
|
case vector_unaligned_loader_width::vec3:
|
|
{
|
|
float data[3];
|
|
std::memcpy(&data[0], ptr, sizeof(float) * 3);
|
|
return vector_set(data[0], data[1], data[2], 0.0F);
|
|
}
|
|
case vector_unaligned_loader_width::vec4:
|
|
default:
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_loadu_ps((const float*)ptr);
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vreinterpretq_f32_u8(vld1q_u8(ptr));
|
|
#else
|
|
vector4f result;
|
|
std::memcpy(&result, ptr, sizeof(vector4f));
|
|
return result;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
const uint8_t* ptr;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4f_to_scalarf
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtss_f32(value);
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
return vgetq_lane_f32(value, 0);
|
|
#else
|
|
return value.x;
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
|
|
{
|
|
return scalarf{ value };
|
|
}
|
|
#endif
|
|
|
|
vector4f value;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4d_to_scalard
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
return _mm_cvtsd_f64(value.xy);
|
|
#else
|
|
return value.x;
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
|
|
{
|
|
return scalard{ value.xy };
|
|
}
|
|
#endif
|
|
|
|
vector4d value;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4f_get_min_component
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
__m128 zwzw = _mm_movehl_ps(value, value);
|
|
__m128 xz_yw_zz_ww = _mm_min_ps(value, zwzw);
|
|
__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
|
|
return _mm_cvtss_f32(_mm_min_ps(xz_yw_zz_ww, yw_yw_yw_yw));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
float32x2_t xy_zw = vpmin_f32(vget_low_f32(value), vget_high_f32(value));
|
|
return vget_lane_f32(vpmin_f32(xy_zw, xy_zw), 0);
|
|
#else
|
|
return scalar_min(scalar_min(value.x, value.y), scalar_min(value.z, value.w));
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
|
|
{
|
|
__m128 zwzw = _mm_movehl_ps(value, value);
|
|
__m128 xz_yw_zz_ww = _mm_min_ps(value, zwzw);
|
|
__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
|
|
return scalarf{ _mm_min_ps(xz_yw_zz_ww, yw_yw_yw_yw) };
|
|
}
|
|
#endif
|
|
|
|
vector4f value;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4f_get_max_component
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
__m128 zwzw = _mm_movehl_ps(value, value);
|
|
__m128 xz_yw_zz_ww = _mm_max_ps(value, zwzw);
|
|
__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
|
|
return _mm_cvtss_f32(_mm_max_ps(xz_yw_zz_ww, yw_yw_yw_yw));
|
|
#elif defined(RTM_NEON_INTRINSICS)
|
|
float32x2_t xy_zw = vpmax_f32(vget_low_f32(value), vget_high_f32(value));
|
|
return vget_lane_f32(vpmax_f32(xy_zw, xy_zw), 0);
|
|
#else
|
|
return scalar_max(scalar_max(value.x, value.y), scalar_max(value.z, value.w));
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
|
|
{
|
|
__m128 zwzw = _mm_movehl_ps(value, value);
|
|
__m128 xz_yw_zz_ww = _mm_max_ps(value, zwzw);
|
|
__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
|
|
return scalarf{ _mm_max_ps(xz_yw_zz_ww, yw_yw_yw_yw) };
|
|
}
|
|
#endif
|
|
|
|
vector4f value;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4d_get_min_component
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
__m128d xz_yw = _mm_min_pd(value.xy, value.zw);
|
|
__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
|
|
return _mm_cvtsd_f64(_mm_min_pd(xz_yw, yw_yw));
|
|
#else
|
|
return scalar_min(scalar_min(value.x, value.y), scalar_min(value.z, value.w));
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
|
|
{
|
|
__m128d xz_yw = _mm_min_pd(value.xy, value.zw);
|
|
__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
|
|
return scalard{ _mm_min_pd(xz_yw, yw_yw) };
|
|
}
|
|
#endif
|
|
|
|
vector4d value;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// This is a helper struct to allow a single consistent API between
|
|
// various vector types when the semantics are identical but the return
|
|
// type differs. Implicit coercion is used to return the desired value
|
|
// at the call site.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
struct vector4d_get_max_component
|
|
{
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
__m128d xz_yw = _mm_max_pd(value.xy, value.zw);
|
|
__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
|
|
return _mm_cvtsd_f64(_mm_max_pd(xz_yw, yw_yw));
|
|
#else
|
|
return scalar_max(scalar_max(value.x, value.y), scalar_max(value.z, value.w));
|
|
#endif
|
|
}
|
|
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
|
|
{
|
|
__m128d xz_yw = _mm_max_pd(value.xy, value.zw);
|
|
__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
|
|
return scalard{ _mm_max_pd(xz_yw, yw_yw) };
|
|
}
|
|
#endif
|
|
|
|
vector4d value;
|
|
};
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Returns a vector consisting of all zeros.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_zero_impl RTM_SIMD_CALL vector_zero() RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector_zero_impl();
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Loads an unaligned vector4 from memory.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec4> RTM_SIMD_CALL vector_load(const uint8_t* input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec4>{ input };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Loads an unaligned vector1 from memory and sets the [yzw] components to zero.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec1> RTM_SIMD_CALL vector_load1(const uint8_t* input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec1>{ input };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Loads an unaligned vector2 from memory and sets the [zw] components to zero.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec2> RTM_SIMD_CALL vector_load2(const uint8_t* input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec2>{ input };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Loads an unaligned vector3 from memory and sets the [w] component to zero.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec3> RTM_SIMD_CALL vector_load3(const uint8_t* input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec3>{ input };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Coerces an vector4 input into a scalar by grabbing the first SIMD lane.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector4f_to_scalarf RTM_SIMD_CALL vector_as_scalar(vector4f_arg0 input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector4f_to_scalarf{ input };
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Coerces an vector4 input into a scalar by grabbing the first SIMD lane.
|
|
//////////////////////////////////////////////////////////////////////////
|
|
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector4d_to_scalard RTM_SIMD_CALL vector_as_scalar(const vector4d& input) RTM_NO_EXCEPT
|
|
{
|
|
return rtm_impl::vector4d_to_scalard{ input };
|
|
}
|
|
}
|
|
|
|
RTM_IMPL_FILE_PRAGMA_POP
|