cocos-engine-external/sources/rtm/impl/vector_common.h

#pragma once

////////////////////////////////////////////////////////////////////////////////
// The MIT License (MIT)
//
// Copyright (c) 2018 Nicholas Frechette & Realtime Math contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
////////////////////////////////////////////////////////////////////////////////

#include "rtm/types.h"
#include "rtm/impl/compiler_utils.h"
#include "rtm/scalarf.h"
#include "rtm/scalard.h"

#include <cstdint>
#include <cstring>

RTM_IMPL_FILE_PRAGMA_PUSH

namespace rtm
{
	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float x, float y, float z, float w) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		return _mm_set_ps(w, z, y, x);
#elif defined(RTM_NEON_INTRINSICS)
#if 1
		float32x2_t V0 = vcreate_f32(((uint64_t)*(const uint32_t*)&x) | ((uint64_t)(*(const uint32_t*)&y) << 32));
		float32x2_t V1 = vcreate_f32(((uint64_t)*(const uint32_t*)&z) | ((uint64_t)(*(const uint32_t*)&w) << 32));
		return vcombine_f32(V0, V1);
#else
		float __attribute__((aligned(16))) data[4] = { x, y, z, w };
		return vld1q_f32(data);
#endif
#else
		return vector4f{ x, y, z, w };
#endif
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float x, float y, float z) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		return _mm_set_ps(0.0F, z, y, x);
#elif defined(RTM_NEON_INTRINSICS)
#if 1
		float32x2_t V0 = vcreate_f32(((uint64_t)*(const uint32_t*)&x) | ((uint64_t)(*(const uint32_t*)&y) << 32));
		float32x2_t V1 = vcreate_f32((uint64_t)*(const uint32_t*)&z);
		return vcombine_f32(V0, V1);
#else
		float __attribute__((aligned(16))) data[4] = { x, y, z };
		return vld1q_f32(data);
#endif
#else
		return vector4f{ x, y, z, 0.0f };
#endif
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from a single value for all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(float xyzw) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		return _mm_set_ps1(xyzw);
#elif defined(RTM_NEON_INTRINSICS)
		return vdupq_n_f32(xyzw);
#else
		return vector4f{ xyzw, xyzw, xyzw, xyzw };
#endif
	}

#if defined(RTM_SSE2_INTRINSICS)
	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 x, scalarf_arg1 y, scalarf_arg2 z, scalarf_arg3 w) RTM_NO_EXCEPT
	{
		const __m128 xy = _mm_unpacklo_ps(x.value, y.value);
		const __m128 zw = _mm_unpacklo_ps(z.value, w.value);
		return _mm_shuffle_ps(xy, zw, _MM_SHUFFLE(1, 0, 1, 0));
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 x, scalarf_arg1 y, scalarf_arg2 z) RTM_NO_EXCEPT
	{
		const __m128 xy = _mm_unpacklo_ps(x.value, y.value);
		const __m128 zw = _mm_unpacklo_ps(z.value, _mm_setzero_ps());
		return _mm_shuffle_ps(xy, zw, _MM_SHUFFLE(1, 0, 1, 0));
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from a single value for all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL vector_set(scalarf_arg0 xyzw) RTM_NO_EXCEPT
	{
		return _mm_shuffle_ps(xyzw.value, xyzw.value, _MM_SHUFFLE(0, 0, 0, 0));
	}
#endif

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double x, double y, double z, double w) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		return vector4d{ _mm_set_pd(y, x), _mm_set_pd(w, z) };
#else
		return vector4d{ x, y, z, w };
#endif
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double x, double y, double z) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		return vector4d{ _mm_set_pd(y, x), _mm_set_pd(0.0, z) };
#else
		return vector4d{ x, y, z, 0.0 };
#endif
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from a single value for all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d vector_set(double xyzw) RTM_NO_EXCEPT
	{
#if defined(RTM_SSE2_INTRINSICS)
		const __m128d xyzw_pd = _mm_set1_pd(xyzw);
		return vector4d{ xyzw_pd, xyzw_pd };
#else
		return vector4d{ xyzw, xyzw, xyzw, xyzw };
#endif
	}

#if defined(RTM_SSE2_INTRINSICS)
	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard x, scalard y, scalard z, scalard w) RTM_NO_EXCEPT
	{
		const __m128d xy = _mm_unpacklo_pd(x.value, y.value);
		const __m128d zw = _mm_unpacklo_pd(z.value, w.value);
		return vector4d{ xy, zw };
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from the [xyz] components and sets [w] to 0.0.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard x, scalard y, scalard z) RTM_NO_EXCEPT
	{
		const __m128d xy = _mm_unpacklo_pd(x.value, y.value);
		const __m128d zw = _mm_unpacklo_pd(z.value, _mm_setzero_pd());
		return vector4d{ xy, zw };
	}

	//////////////////////////////////////////////////////////////////////////
	// Creates a vector4 from a single value for all 4 components.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4d RTM_SIMD_CALL vector_set(scalard xyzw) RTM_NO_EXCEPT
	{
		const __m128d xyzw_pd = _mm_shuffle_pd(xyzw.value, xyzw.value, 0);
		return vector4d{ xyzw_pd, xyzw_pd };
	}
#endif

	namespace rtm_impl
	{
		//////////////////////////////////////////////////////////////////////////
		// Returns true if mix4 component is one of [xyzw]
		//////////////////////////////////////////////////////////////////////////
		RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr bool is_mix_xyzw(mix4 arg) RTM_NO_EXCEPT { return uint32_t(arg) <= uint32_t(mix4::w); }

		//////////////////////////////////////////////////////////////////////////
		// Returns true if mix4 component is one of [abcd]
		//////////////////////////////////////////////////////////////////////////
		RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr bool is_mix_abcd(mix4 arg) RTM_NO_EXCEPT { return uint32_t(arg) >= uint32_t(mix4::a); }

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to help manipulate SIMD masks.
		//////////////////////////////////////////////////////////////////////////
		union mask_converter
		{
			uint64_t u64;
			uint32_t u32[2];

			RTM_DISABLE_SECURITY_COOKIE_CHECK explicit RTM_FORCE_INLINE constexpr mask_converter(uint64_t value) RTM_NO_EXCEPT : u64(value) {}

			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr operator uint32_t() const RTM_NO_EXCEPT { return u32[0]; }
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr operator uint64_t() const RTM_NO_EXCEPT { return u64; }
		};

		//////////////////////////////////////////////////////////////////////////
		// Returns a SIMD mask value from a boolean.
		//////////////////////////////////////////////////////////////////////////
		RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr mask_converter get_mask_value(bool is_true) RTM_NO_EXCEPT
		{
			return mask_converter(is_true ? uint64_t(0xFFFFFFFFFFFFFFFFULL) : uint64_t(0));
		}

		//////////////////////////////////////////////////////////////////////////
		// Selects if_false if the SIMD mask value is 0, otherwise if_true.
		//////////////////////////////////////////////////////////////////////////
		RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr double select(uint64_t mask, double if_true, double if_false) RTM_NO_EXCEPT
		{
			return mask == 0 ? if_false : if_true;
		}

		//////////////////////////////////////////////////////////////////////////
		// Selects if_false if the SIMD mask value is 0, otherwise if_true.
		//////////////////////////////////////////////////////////////////////////
		RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr float select(uint32_t mask, float if_true, float if_false) RTM_NO_EXCEPT
		{
			return mask == 0 ? if_false : if_true;
		}

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector_zero_impl
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4d() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				const __m128d zero_pd = _mm_setzero_pd();
				return vector4d{ zero_pd, zero_pd };
#else
				return vector_set(0.0);
#endif
			}

			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4f() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				return _mm_setzero_ps();
#else
				return vector_set(0.0F);
#endif
			}
		};

		//////////////////////////////////////////////////////////////////////////
		// Various vector widths we can load
		//////////////////////////////////////////////////////////////////////////
		enum class vector_unaligned_loader_width
		{
			vec1,
			vec2,
			vec3,
			vec4,
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		template<vector_unaligned_loader_width width>
		struct vector_unaligned_loader
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4d() const RTM_NO_EXCEPT
			{
				switch (width)
				{
				case vector_unaligned_loader_width::vec1:
				{
					double data[1];
					std::memcpy(&data[0], ptr, sizeof(double) * 1);
					return vector_set(data[0], 0.0, 0.0, 0.0);
				}
				case vector_unaligned_loader_width::vec2:
				{
					double data[2];
					std::memcpy(&data[0], ptr, sizeof(double) * 2);
					return vector_set(data[0], data[1], 0.0, 0.0);
				}
				case vector_unaligned_loader_width::vec3:
				{
					double data[3];
					std::memcpy(&data[0], ptr, sizeof(double) * 3);
					return vector_set(data[0], data[1], data[2], 0.0);
				}
				case vector_unaligned_loader_width::vec4:
				default:
				{
					vector4d result;
					std::memcpy(&result, ptr, sizeof(vector4d));
					return result;
				}
				}
			}

			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator vector4f() const RTM_NO_EXCEPT
			{
				switch (width)
				{
				case vector_unaligned_loader_width::vec1:
				{
					float data[1];
					std::memcpy(&data[0], ptr, sizeof(float) * 1);
					return vector_set(data[0], 0.0F, 0.0F, 0.0F);
				}
				case vector_unaligned_loader_width::vec2:
				{
					float data[2];
					std::memcpy(&data[0], ptr, sizeof(float) * 2);
					return vector_set(data[0], data[1], 0.0F, 0.0F);
				}
				case vector_unaligned_loader_width::vec3:
				{
					float data[3];
					std::memcpy(&data[0], ptr, sizeof(float) * 3);
					return vector_set(data[0], data[1], data[2], 0.0F);
				}
				case vector_unaligned_loader_width::vec4:
				default:
				{
#if defined(RTM_SSE2_INTRINSICS)
					return _mm_loadu_ps((const float*)ptr);
#elif defined(RTM_NEON_INTRINSICS)
					return vreinterpretq_f32_u8(vld1q_u8(ptr));
#else
					vector4f result;
					std::memcpy(&result, ptr, sizeof(vector4f));
					return result;
#endif
				}
				}
			}

			const uint8_t* ptr;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4f_to_scalarf
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				return _mm_cvtss_f32(value);
#elif defined(RTM_NEON_INTRINSICS)
				return vgetq_lane_f32(value, 0);
#else
				return value.x;
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
			{
				return scalarf{ value };
			}
#endif

			vector4f value;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4d_to_scalard
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				return _mm_cvtsd_f64(value.xy);
#else
				return value.x;
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
			{
				return scalard{ value.xy };
			}
#endif

			vector4d value;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4f_get_min_component
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				__m128 zwzw = _mm_movehl_ps(value, value);
				__m128 xz_yw_zz_ww = _mm_min_ps(value, zwzw);
				__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
				return _mm_cvtss_f32(_mm_min_ps(xz_yw_zz_ww, yw_yw_yw_yw));
#elif defined(RTM_NEON_INTRINSICS)
				float32x2_t xy_zw = vpmin_f32(vget_low_f32(value), vget_high_f32(value));
				return vget_lane_f32(vpmin_f32(xy_zw, xy_zw), 0);
#else
				return scalar_min(scalar_min(value.x, value.y), scalar_min(value.z, value.w));
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
			{
				__m128 zwzw = _mm_movehl_ps(value, value);
				__m128 xz_yw_zz_ww = _mm_min_ps(value, zwzw);
				__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
				return scalarf{ _mm_min_ps(xz_yw_zz_ww, yw_yw_yw_yw) };
			}
#endif

			vector4f value;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4f_get_max_component
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator float() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				__m128 zwzw = _mm_movehl_ps(value, value);
				__m128 xz_yw_zz_ww = _mm_max_ps(value, zwzw);
				__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
				return _mm_cvtss_f32(_mm_max_ps(xz_yw_zz_ww, yw_yw_yw_yw));
#elif defined(RTM_NEON_INTRINSICS)
				float32x2_t xy_zw = vpmax_f32(vget_low_f32(value), vget_high_f32(value));
				return vget_lane_f32(vpmax_f32(xy_zw, xy_zw), 0);
#else
				return scalar_max(scalar_max(value.x, value.y), scalar_max(value.z, value.w));
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalarf() const RTM_NO_EXCEPT
			{
				__m128 zwzw = _mm_movehl_ps(value, value);
				__m128 xz_yw_zz_ww = _mm_max_ps(value, zwzw);
				__m128 yw_yw_yw_yw = _mm_shuffle_ps(xz_yw_zz_ww, xz_yw_zz_ww, _MM_SHUFFLE(1, 1, 1, 1));
				return scalarf{ _mm_max_ps(xz_yw_zz_ww, yw_yw_yw_yw) };
			}
#endif

			vector4f value;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4d_get_min_component
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				__m128d xz_yw = _mm_min_pd(value.xy, value.zw);
				__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
				return _mm_cvtsd_f64(_mm_min_pd(xz_yw, yw_yw));
#else
				return scalar_min(scalar_min(value.x, value.y), scalar_min(value.z, value.w));
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
			{
				__m128d xz_yw = _mm_min_pd(value.xy, value.zw);
				__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
				return scalard{ _mm_min_pd(xz_yw, yw_yw) };
			}
#endif

			vector4d value;
		};

		//////////////////////////////////////////////////////////////////////////
		// This is a helper struct to allow a single consistent API between
		// various vector types when the semantics are identical but the return
		// type differs. Implicit coercion is used to return the desired value
		// at the call site.
		//////////////////////////////////////////////////////////////////////////
		struct vector4d_get_max_component
		{
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator double() const RTM_NO_EXCEPT
			{
#if defined(RTM_SSE2_INTRINSICS)
				__m128d xz_yw = _mm_max_pd(value.xy, value.zw);
				__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
				return _mm_cvtsd_f64(_mm_max_pd(xz_yw, yw_yw));
#else
				return scalar_max(scalar_max(value.x, value.y), scalar_max(value.z, value.w));
#endif
			}

#if defined(RTM_SSE2_INTRINSICS)
			RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE RTM_SIMD_CALL operator scalard() const RTM_NO_EXCEPT
			{
				__m128d xz_yw = _mm_max_pd(value.xy, value.zw);
				__m128d yw_yw = _mm_shuffle_pd(xz_yw, xz_yw, 1);
				return scalard{ _mm_max_pd(xz_yw, yw_yw) };
			}
#endif

			vector4d value;
		};
	}

	//////////////////////////////////////////////////////////////////////////
	// Returns a vector consisting of all zeros.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_zero_impl RTM_SIMD_CALL vector_zero() RTM_NO_EXCEPT
	{
		return rtm_impl::vector_zero_impl();
	}

	//////////////////////////////////////////////////////////////////////////
	// Loads an unaligned vector4 from memory.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec4> RTM_SIMD_CALL vector_load(const uint8_t* input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec4>{ input };
	}

	//////////////////////////////////////////////////////////////////////////
	// Loads an unaligned vector1 from memory and sets the [yzw] components to zero.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec1> RTM_SIMD_CALL vector_load1(const uint8_t* input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec1>{ input };
	}

	//////////////////////////////////////////////////////////////////////////
	// Loads an unaligned vector2 from memory and sets the [zw] components to zero.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec2> RTM_SIMD_CALL vector_load2(const uint8_t* input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec2>{ input };
	}

	//////////////////////////////////////////////////////////////////////////
	// Loads an unaligned vector3 from memory and sets the [w] component to zero.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec3> RTM_SIMD_CALL vector_load3(const uint8_t* input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector_unaligned_loader<rtm_impl::vector_unaligned_loader_width::vec3>{ input };
	}

	//////////////////////////////////////////////////////////////////////////
	// Coerces an vector4 input into a scalar by grabbing the first SIMD lane.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector4f_to_scalarf RTM_SIMD_CALL vector_as_scalar(vector4f_arg0 input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector4f_to_scalarf{ input };
	}

	//////////////////////////////////////////////////////////////////////////
	// Coerces an vector4 input into a scalar by grabbing the first SIMD lane.
	//////////////////////////////////////////////////////////////////////////
	RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE constexpr rtm_impl::vector4d_to_scalard RTM_SIMD_CALL vector_as_scalar(const vector4d& input) RTM_NO_EXCEPT
	{
		return rtm_impl::vector4d_to_scalard{ input };
	}
}

RTM_IMPL_FILE_PRAGMA_POP