#pragma once //////////////////////////////////////////////////////////////////////////////// // The MIT License (MIT) // // Copyright (c) 2017 Nicholas Frechette & Animation Compression Library contributors // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //////////////////////////////////////////////////////////////////////////////// #include "acl/core/impl/compiler_utils.h" #include "acl/core/error.h" #include #include #include #include #include #include #include // For byte swapping intrinsics #if defined(_MSC_VER) #include #elif defined(__APPLE__) #include #endif // For __prefetch #if defined(RTM_NEON64_INTRINSICS) && defined(ACL_COMPILER_MSVC) #include #endif ACL_IMPL_FILE_PRAGMA_PUSH namespace acl { ////////////////////////////////////////////////////////////////////////// // Allows static branching without any warnings template struct static_condition { static constexpr bool test() { return true; } }; template<> struct static_condition { static constexpr bool test() { return false; } }; ////////////////////////////////////////////////////////////////////////// // Various miscellaneous utilities related to alignment constexpr bool is_power_of_two(size_t input) { return input != 0 && (input & (input - 1)) == 0; } template constexpr bool is_alignment_valid(size_t alignment) { return is_power_of_two(alignment) && alignment >= alignof(Type); } template inline bool is_aligned_to(PtrType* value, size_t alignment) { ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two"); return (reinterpret_cast(value) & (alignment - 1)) == 0; } template inline bool is_aligned_to(IntegralType value, size_t alignment) { ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two"); return (static_cast(value) & (alignment - 1)) == 0; } template constexpr bool is_aligned(PtrType* value) { return is_aligned_to(value, alignof(PtrType)); } template inline PtrType* align_to(PtrType* value, size_t alignment) { ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two"); return reinterpret_cast((reinterpret_cast(value) + (alignment - 1)) & ~(alignment - 1)); } template inline IntegralType align_to(IntegralType value, size_t alignment) { ACL_ASSERT(is_power_of_two(alignment), "Alignment value must be a power of two"); return static_cast((static_cast(value) + (alignment - 1)) & ~(alignment - 1)); } template constexpr size_t get_required_padding() { // align_to(sizeof(PreviousMemberType), alignof(NextMemberType)) - sizeof(PreviousMemberType) return ((sizeof(PreviousMemberType) + (alignof(NextMemberType) - 1)) & ~(alignof(NextMemberType)- 1)) - sizeof(PreviousMemberType); } template constexpr size_t get_array_size(ElementType const (&)[num_elements]) { return num_elements; } ////////////////////////////////////////////////////////////////////////// // Type safe casting namespace memory_impl { template struct safe_ptr_to_ptr_cast_impl { inline static DestPtrType* cast(SrcType* input) { ACL_ASSERT(is_aligned_to(input, alignof(DestPtrType)), "reinterpret_cast would result in an unaligned pointer"); return reinterpret_cast(input); } }; template struct safe_ptr_to_ptr_cast_impl { static constexpr void* cast(SrcType* input) { return input; } }; template struct safe_int_to_ptr_cast_impl { inline static DestPtrType* cast(SrcType input) { ACL_ASSERT(is_aligned_to(input, alignof(DestPtrType)), "reinterpret_cast would result in an unaligned pointer"); return reinterpret_cast(input); } }; template struct safe_int_to_ptr_cast_impl { static constexpr void* cast(SrcType input) { return reinterpret_cast(input); } }; } template inline DestPtrType* safe_ptr_cast(SrcType* input) { return memory_impl::safe_ptr_to_ptr_cast_impl::cast(input); } template inline DestPtrType* safe_ptr_cast(SrcType input) { return memory_impl::safe_int_to_ptr_cast_impl::cast(input); } #if defined(ACL_COMPILER_GCC) // GCC sometimes complains about comparisons being always true due to partial template // evaluation. Disable that warning since we know it is safe. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wtype-limits" #endif namespace memory_impl { template struct safe_underlying_type { using type = typename std::underlying_type::type; }; template struct safe_underlying_type { using type = Type; }; template struct is_static_cast_safe_s { static bool test(SrcType input) { using SrcRealType = typename safe_underlying_type::value>::type; if (static_condition<(std::is_signed::value == std::is_signed::value)>::test()) return SrcType(DstType(input)) == input; else if (static_condition<(std::is_signed::value)>::test()) return int64_t(input) >= 0 && SrcType(DstType(input)) == input; else return uint64_t(input) <= uint64_t(std::numeric_limits::max()); }; }; template struct is_static_cast_safe_s { static bool test(SrcType input) { return SrcType(DstType(input)) == input; } }; template inline bool is_static_cast_safe(SrcType input) { // TODO: In C++17 this should be folded to constexpr if return is_static_cast_safe_s::value || std::is_floating_point::value)>::test()>::test(input); } } template inline DstType safe_static_cast(SrcType input) { #if defined(ACL_HAS_ASSERT_CHECKS) const bool is_safe = memory_impl::is_static_cast_safe(input); ACL_ASSERT(is_safe, "Unsafe static cast resulted in data loss"); #endif return static_cast(input); } #if defined(ACL_COMPILER_GCC) #pragma GCC diagnostic pop #endif ////////////////////////////////////////////////////////////////////////// // Endian and raw memory support template inline OutputPtrType* add_offset_to_ptr(InputPtrType* ptr, offset_type offset) { return safe_ptr_cast(reinterpret_cast(ptr) + offset); } inline uint16_t byte_swap(uint16_t value) { #if defined(_MSC_VER) return _byteswap_ushort(value); #elif defined(__APPLE__) return OSSwapInt16(value); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap16(value); #else return (value & 0x00FF) << 8 | (value & 0xFF00) >> 8; #endif } inline uint32_t byte_swap(uint32_t value) { #if defined(_MSC_VER) return _byteswap_ulong(value); #elif defined(__APPLE__) return OSSwapInt32(value); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap32(value); #else value = (value & 0x0000FFFF) << 16 | (value & 0xFFFF0000) >> 16; value = (value & 0x00FF00FF) << 8 | (value & 0xFF00FF00) >> 8; return value; #endif } inline uint64_t byte_swap(uint64_t value) { #if defined(_MSC_VER) return _byteswap_uint64(value); #elif defined(__APPLE__) return OSSwapInt64(value); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap64(value); #else value = (value & 0x00000000FFFFFFFF) << 32 | (value & 0xFFFFFFFF00000000) >> 32; value = (value & 0x0000FFFF0000FFFF) << 16 | (value & 0xFFFF0000FFFF0000) >> 16; value = (value & 0x00FF00FF00FF00FF) << 8 | (value & 0xFF00FF00FF00FF00) >> 8; return value; #endif } // We copy bits assuming big-endian ordering for 'dest' and 'src' inline void memcpy_bits(void* dest, uint64_t dest_bit_offset, const void* src, uint64_t src_bit_offset, uint64_t num_bits_to_copy) { while (true) { uint64_t src_byte_offset = src_bit_offset / 8; uint8_t src_byte_bit_offset = safe_static_cast(src_bit_offset % 8); uint64_t dest_byte_offset = dest_bit_offset / 8; uint8_t dest_byte_bit_offset = safe_static_cast(dest_bit_offset % 8); const uint8_t* src_bytes = add_offset_to_ptr(src, src_byte_offset); uint8_t* dest_byte = add_offset_to_ptr(dest, dest_byte_offset); // We'll copy only as many bits as there fits within 'dest' or as there are left uint8_t num_bits_dest_remain_in_byte = 8 - dest_byte_bit_offset; uint8_t num_bits_src_remain_in_byte = 8 - src_byte_bit_offset; uint64_t num_bits_copied = std::min(std::min(num_bits_dest_remain_in_byte, num_bits_src_remain_in_byte), num_bits_to_copy); uint8_t num_bits_copied_u8 = safe_static_cast(num_bits_copied); // We'll shift and mask to retain the 'dest' bits prior to our offset and whatever remains after the copy uint8_t dest_shift_offset = dest_byte_bit_offset; uint8_t dest_byte_mask = ~(0xFF >> dest_shift_offset) | ~(0xFF << (8 - num_bits_copied_u8 - dest_byte_bit_offset)); uint8_t src_shift_offset = 8 - src_byte_bit_offset - num_bits_copied_u8; uint8_t src_byte_mask = 0xFF >> (8 - num_bits_copied_u8); uint8_t src_insert_shift_offset = 8 - num_bits_copied_u8 - dest_byte_bit_offset; uint8_t partial_dest_value = *dest_byte & dest_byte_mask; uint8_t partial_src_value = (*src_bytes >> src_shift_offset) & src_byte_mask; *dest_byte = partial_dest_value | (partial_src_value << src_insert_shift_offset); if (num_bits_to_copy <= num_bits_copied) break; // Done num_bits_to_copy -= num_bits_copied; dest_bit_offset += num_bits_copied; src_bit_offset += num_bits_copied; } } template inline data_type unaligned_load(const void* input) { data_type result; std::memcpy(&result, input, sizeof(data_type)); return result; } template inline data_type aligned_load(const void* input) { return *safe_ptr_cast(input); } template inline void unaligned_write(data_type input, void* output) { std::memcpy(output, &input, sizeof(data_type)); } // TODO: Add support for streaming prefetch (ptr, 0, 0) for arm inline void memory_prefetch(const void* ptr) { #if defined(RTM_SSE2_INTRINSICS) _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T0); #elif defined(ACL_COMPILER_GCC) || defined(ACL_COMPILER_CLANG) __builtin_prefetch(ptr, 0, 3); #elif defined(RTM_NEON64_INTRINSICS) && defined(ACL_COMPILER_MSVC) __prefetch(ptr); #else (void)ptr; #endif } } ACL_IMPL_FILE_PRAGMA_POP