364 lines
12 KiB
C++
364 lines
12 KiB
C++
#pragma once
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// The MIT License (MIT)
|
|
//
|
|
// Copyright (c) 2017 Nicholas Frechette & Animation Compression Library contributors
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "acl/core/impl/compiler_utils.h"
|
|
#include "acl/core/error.h"
|
|
|
|
#include <rtm/math.h>
|
|
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <type_traits>
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <algorithm>
|
|
|
|
// For byte swapping intrinsics
|
|
#if defined(_MSC_VER)
|
|
#include <cstdlib>
|
|
#elif defined(__APPLE__)
|
|
#include <libkern/OSByteOrder.h>
|
|
#endif
|
|
|
|
// For __prefetch
|
|
#if defined(RTM_NEON64_INTRINSICS) && defined(ACL_COMPILER_MSVC)
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
ACL_IMPL_FILE_PRAGMA_PUSH
|
|
|
|
namespace acl
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Allows static branching without any warnings
|
|
|
|
template<bool expression_result>
|
|
struct static_condition { static constexpr bool test() { return true; } };
|
|
|
|
template<>
|
|
struct static_condition<false> { static constexpr bool test() { return false; } };
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Various miscellaneous utilities related to alignment
|
|
|
|
constexpr bool is_power_of_two(size_t input)
|
|
{
|
|
return input != 0 && (input & (input - 1)) == 0;
|
|
}
|
|
|
|
template<typename Type>
|
|
constexpr bool is_alignment_valid(size_t alignment)
|
|
{
|
|
return is_power_of_two(alignment) && alignment >= alignof(Type);
|
|
}
|
|
|
|
template<typename PtrType>
|
|
inline bool is_aligned_to(PtrType* value, size_t alignment)
|
|
{
|
|
ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two");
|
|
return (reinterpret_cast<intptr_t>(value) & (alignment - 1)) == 0;
|
|
}
|
|
|
|
template<typename IntegralType>
|
|
inline bool is_aligned_to(IntegralType value, size_t alignment)
|
|
{
|
|
ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two");
|
|
return (static_cast<size_t>(value) & (alignment - 1)) == 0;
|
|
}
|
|
|
|
template<typename PtrType>
|
|
constexpr bool is_aligned(PtrType* value)
|
|
{
|
|
return is_aligned_to(value, alignof(PtrType));
|
|
}
|
|
|
|
template<typename PtrType>
|
|
inline PtrType* align_to(PtrType* value, size_t alignment)
|
|
{
|
|
ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two");
|
|
return reinterpret_cast<PtrType*>((reinterpret_cast<intptr_t>(value) + (alignment - 1)) & ~(alignment - 1));
|
|
}
|
|
|
|
template<typename IntegralType>
|
|
inline IntegralType align_to(IntegralType value, size_t alignment)
|
|
{
|
|
ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two");
|
|
return static_cast<IntegralType>((static_cast<size_t>(value) + (alignment - 1)) & ~(alignment - 1));
|
|
}
|
|
|
|
template<typename PreviousMemberType, typename NextMemberType>
|
|
constexpr size_t get_required_padding()
|
|
{
|
|
// align_to(sizeof(PreviousMemberType), alignof(NextMemberType)) - sizeof(PreviousMemberType)
|
|
return ((sizeof(PreviousMemberType) + (alignof(NextMemberType) - 1)) & ~(alignof(NextMemberType)- 1)) - sizeof(PreviousMemberType);
|
|
}
|
|
|
|
template<typename ElementType, size_t num_elements>
|
|
constexpr size_t get_array_size(ElementType const (&)[num_elements]) { return num_elements; }
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Type safe casting
|
|
|
|
namespace memory_impl
|
|
{
|
|
template<typename DestPtrType, typename SrcType>
|
|
struct safe_ptr_to_ptr_cast_impl
|
|
{
|
|
inline static DestPtrType* cast(SrcType* input)
|
|
{
|
|
ACL_ASSERT(is_aligned_to(input, alignof(DestPtrType)), "reinterpret_cast would result in an unaligned pointer");
|
|
return reinterpret_cast<DestPtrType*>(input);
|
|
}
|
|
};
|
|
|
|
template<typename SrcType>
|
|
struct safe_ptr_to_ptr_cast_impl<void, SrcType>
|
|
{
|
|
static constexpr void* cast(SrcType* input) { return input; }
|
|
};
|
|
|
|
template<typename DestPtrType, typename SrcType>
|
|
struct safe_int_to_ptr_cast_impl
|
|
{
|
|
inline static DestPtrType* cast(SrcType input)
|
|
{
|
|
ACL_ASSERT(is_aligned_to(input, alignof(DestPtrType)), "reinterpret_cast would result in an unaligned pointer");
|
|
return reinterpret_cast<DestPtrType*>(input);
|
|
}
|
|
};
|
|
|
|
template<typename SrcType>
|
|
struct safe_int_to_ptr_cast_impl<void, SrcType>
|
|
{
|
|
static constexpr void* cast(SrcType input) { return reinterpret_cast<void*>(input); }
|
|
};
|
|
}
|
|
|
|
template<typename DestPtrType, typename SrcType>
|
|
inline DestPtrType* safe_ptr_cast(SrcType* input)
|
|
{
|
|
return memory_impl::safe_ptr_to_ptr_cast_impl<DestPtrType, SrcType>::cast(input);
|
|
}
|
|
|
|
template<typename DestPtrType, typename SrcType>
|
|
inline DestPtrType* safe_ptr_cast(SrcType input)
|
|
{
|
|
return memory_impl::safe_int_to_ptr_cast_impl<DestPtrType, SrcType>::cast(input);
|
|
}
|
|
|
|
#if defined(ACL_COMPILER_GCC)
|
|
// GCC sometimes complains about comparisons being always true due to partial template
|
|
// evaluation. Disable that warning since we know it is safe.
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wtype-limits"
|
|
#endif
|
|
|
|
namespace memory_impl
|
|
{
|
|
template<typename Type, bool is_enum = true>
|
|
struct safe_underlying_type { using type = typename std::underlying_type<Type>::type; };
|
|
|
|
template<typename Type>
|
|
struct safe_underlying_type<Type, false> { using type = Type; };
|
|
|
|
template<typename DstType, typename SrcType, bool is_floating_point = false>
|
|
struct is_static_cast_safe_s
|
|
{
|
|
static bool test(SrcType input)
|
|
{
|
|
using SrcRealType = typename safe_underlying_type<SrcType, std::is_enum<SrcType>::value>::type;
|
|
|
|
if (static_condition<(std::is_signed<DstType>::value == std::is_signed<SrcRealType>::value)>::test())
|
|
return SrcType(DstType(input)) == input;
|
|
else if (static_condition<(std::is_signed<SrcRealType>::value)>::test())
|
|
return int64_t(input) >= 0 && SrcType(DstType(input)) == input;
|
|
else
|
|
return uint64_t(input) <= uint64_t(std::numeric_limits<DstType>::max());
|
|
};
|
|
};
|
|
|
|
template<typename DstType, typename SrcType>
|
|
struct is_static_cast_safe_s<DstType, SrcType, true>
|
|
{
|
|
static bool test(SrcType input)
|
|
{
|
|
return SrcType(DstType(input)) == input;
|
|
}
|
|
};
|
|
|
|
template<typename DstType, typename SrcType>
|
|
inline bool is_static_cast_safe(SrcType input)
|
|
{
|
|
// TODO: In C++17 this should be folded to constexpr if
|
|
return is_static_cast_safe_s<DstType, SrcType, static_condition<(std::is_floating_point<SrcType>::value || std::is_floating_point<DstType>::value)>::test()>::test(input);
|
|
}
|
|
}
|
|
|
|
template<typename DstType, typename SrcType>
|
|
inline DstType safe_static_cast(SrcType input)
|
|
{
|
|
#if defined(ACL_HAS_ASSERT_CHECKS)
|
|
const bool is_safe = memory_impl::is_static_cast_safe<DstType, SrcType>(input);
|
|
ACL_ASSERT(is_safe, "Unsafe static cast resulted in data loss");
|
|
#endif
|
|
|
|
return static_cast<DstType>(input);
|
|
}
|
|
|
|
#if defined(ACL_COMPILER_GCC)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Endian and raw memory support
|
|
|
|
template<typename OutputPtrType, typename InputPtrType, typename offset_type>
|
|
inline OutputPtrType* add_offset_to_ptr(InputPtrType* ptr, offset_type offset)
|
|
{
|
|
return safe_ptr_cast<OutputPtrType>(reinterpret_cast<uintptr_t>(ptr) + offset);
|
|
}
|
|
|
|
inline uint16_t byte_swap(uint16_t value)
|
|
{
|
|
#if defined(_MSC_VER)
|
|
return _byteswap_ushort(value);
|
|
#elif defined(__APPLE__)
|
|
return OSSwapInt16(value);
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
return __builtin_bswap16(value);
|
|
#else
|
|
return (value & 0x00FF) << 8 | (value & 0xFF00) >> 8;
|
|
#endif
|
|
}
|
|
|
|
inline uint32_t byte_swap(uint32_t value)
|
|
{
|
|
#if defined(_MSC_VER)
|
|
return _byteswap_ulong(value);
|
|
#elif defined(__APPLE__)
|
|
return OSSwapInt32(value);
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
return __builtin_bswap32(value);
|
|
#else
|
|
value = (value & 0x0000FFFF) << 16 | (value & 0xFFFF0000) >> 16;
|
|
value = (value & 0x00FF00FF) << 8 | (value & 0xFF00FF00) >> 8;
|
|
return value;
|
|
#endif
|
|
}
|
|
|
|
inline uint64_t byte_swap(uint64_t value)
|
|
{
|
|
#if defined(_MSC_VER)
|
|
return _byteswap_uint64(value);
|
|
#elif defined(__APPLE__)
|
|
return OSSwapInt64(value);
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
return __builtin_bswap64(value);
|
|
#else
|
|
value = (value & 0x00000000FFFFFFFF) << 32 | (value & 0xFFFFFFFF00000000) >> 32;
|
|
value = (value & 0x0000FFFF0000FFFF) << 16 | (value & 0xFFFF0000FFFF0000) >> 16;
|
|
value = (value & 0x00FF00FF00FF00FF) << 8 | (value & 0xFF00FF00FF00FF00) >> 8;
|
|
return value;
|
|
#endif
|
|
}
|
|
|
|
// We copy bits assuming big-endian ordering for 'dest' and 'src'
|
|
inline void memcpy_bits(void* dest, uint64_t dest_bit_offset, const void* src, uint64_t src_bit_offset, uint64_t num_bits_to_copy)
|
|
{
|
|
while (true)
|
|
{
|
|
uint64_t src_byte_offset = src_bit_offset / 8;
|
|
uint8_t src_byte_bit_offset = safe_static_cast<uint8_t>(src_bit_offset % 8);
|
|
uint64_t dest_byte_offset = dest_bit_offset / 8;
|
|
uint8_t dest_byte_bit_offset = safe_static_cast<uint8_t>(dest_bit_offset % 8);
|
|
|
|
const uint8_t* src_bytes = add_offset_to_ptr<const uint8_t>(src, src_byte_offset);
|
|
uint8_t* dest_byte = add_offset_to_ptr<uint8_t>(dest, dest_byte_offset);
|
|
|
|
// We'll copy only as many bits as there fits within 'dest' or as there are left
|
|
uint8_t num_bits_dest_remain_in_byte = 8 - dest_byte_bit_offset;
|
|
uint8_t num_bits_src_remain_in_byte = 8 - src_byte_bit_offset;
|
|
uint64_t num_bits_copied = std::min<uint64_t>(std::min<uint8_t>(num_bits_dest_remain_in_byte, num_bits_src_remain_in_byte), num_bits_to_copy);
|
|
uint8_t num_bits_copied_u8 = safe_static_cast<uint8_t>(num_bits_copied);
|
|
|
|
// We'll shift and mask to retain the 'dest' bits prior to our offset and whatever remains after the copy
|
|
uint8_t dest_shift_offset = dest_byte_bit_offset;
|
|
uint8_t dest_byte_mask = ~(0xFF >> dest_shift_offset) | ~(0xFF << (8 - num_bits_copied_u8 - dest_byte_bit_offset));
|
|
|
|
uint8_t src_shift_offset = 8 - src_byte_bit_offset - num_bits_copied_u8;
|
|
uint8_t src_byte_mask = 0xFF >> (8 - num_bits_copied_u8);
|
|
uint8_t src_insert_shift_offset = 8 - num_bits_copied_u8 - dest_byte_bit_offset;
|
|
|
|
uint8_t partial_dest_value = *dest_byte & dest_byte_mask;
|
|
uint8_t partial_src_value = (*src_bytes >> src_shift_offset) & src_byte_mask;
|
|
*dest_byte = partial_dest_value | (partial_src_value << src_insert_shift_offset);
|
|
|
|
if (num_bits_to_copy <= num_bits_copied)
|
|
break; // Done
|
|
|
|
num_bits_to_copy -= num_bits_copied;
|
|
dest_bit_offset += num_bits_copied;
|
|
src_bit_offset += num_bits_copied;
|
|
}
|
|
}
|
|
|
|
template<typename data_type>
|
|
inline data_type unaligned_load(const void* input)
|
|
{
|
|
data_type result;
|
|
std::memcpy(&result, input, sizeof(data_type));
|
|
return result;
|
|
}
|
|
|
|
template<typename data_type>
|
|
inline data_type aligned_load(const void* input)
|
|
{
|
|
return *safe_ptr_cast<const data_type, const void*>(input);
|
|
}
|
|
|
|
template<typename data_type>
|
|
inline void unaligned_write(data_type input, void* output)
|
|
{
|
|
std::memcpy(output, &input, sizeof(data_type));
|
|
}
|
|
|
|
// TODO: Add support for streaming prefetch (ptr, 0, 0) for arm
|
|
inline void memory_prefetch(const void* ptr)
|
|
{
|
|
#if defined(RTM_SSE2_INTRINSICS)
|
|
_mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
|
|
#elif defined(ACL_COMPILER_GCC) || defined(ACL_COMPILER_CLANG)
|
|
__builtin_prefetch(ptr, 0, 3);
|
|
#elif defined(RTM_NEON64_INTRINSICS) && defined(ACL_COMPILER_MSVC)
|
|
__prefetch(ptr);
|
|
#else
|
|
(void)ptr;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
ACL_IMPL_FILE_PRAGMA_POP
|