#pragma once //////////////////////////////////////////////////////////////////////////////// // The MIT License (MIT) // // Copyright (c) 2020 Nicholas Frechette & Animation Compression Library contributors // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //////////////////////////////////////////////////////////////////////////////// #include "acl/core/compressed_tracks.h" #include "acl/core/compressed_tracks_version.h" #include "acl/core/interpolation_utils.h" #include "acl/core/track_writer.h" #include "acl/core/variable_bit_rates.h" #include "acl/core/impl/compiler_utils.h" #include "acl/math/scalar_packing.h" #include "acl/math/vector4_packing.h" #include #include #include #include ACL_IMPL_FILE_PRAGMA_PUSH namespace acl { namespace acl_impl { struct alignas(64) persistent_scalar_decompression_context_v0 { // Clip related data // offsets // Only member used to detect if we are initialized, must be first const compressed_tracks* tracks; // 0 | 0 uint32_t tracks_hash; // 4 | 8 float duration; // 8 | 12 // Seeking related data float interpolation_alpha; // 12 | 16 float sample_time; // 16 | 20 uint32_t key_frame_bit_offsets[2]; // 20 | 24 // Variable quantization uint8_t padding_tail[sizeof(void*) == 4 ? 36 : 32]; ////////////////////////////////////////////////////////////////////////// const compressed_tracks* get_compressed_tracks() const { return tracks; } compressed_tracks_version16 get_version() const { return tracks->get_version(); } bool is_initialized() const { return tracks != nullptr; } void reset() { tracks = nullptr; } }; static_assert(sizeof(persistent_scalar_decompression_context_v0) == 64, "Unexpected size"); template inline bool initialize_v0(persistent_scalar_decompression_context_v0& context, const compressed_tracks& tracks) { ACL_ASSERT(tracks.get_algorithm_type() == algorithm_type8::uniformly_sampled, "Invalid algorithm type [%s], expected [%s]", get_algorithm_name(tracks.get_algorithm_type()), get_algorithm_name(algorithm_type8::uniformly_sampled)); context.tracks = &tracks; context.tracks_hash = tracks.get_hash(); context.duration = tracks.get_duration(); context.sample_time = -1.0F; context.interpolation_alpha = 0.0; return true; } inline bool is_dirty_v0(const persistent_scalar_decompression_context_v0& context, const compressed_tracks& tracks) { if (context.tracks != &tracks) return true; if (context.tracks_hash != tracks.get_hash()) return true; return false; } template inline void seek_v0(persistent_scalar_decompression_context_v0& context, float sample_time, sample_rounding_policy rounding_policy) { // Clamp for safety, the caller should normally handle this but in practice, it often isn't the case if (decompression_settings_type::clamp_sample_time()) sample_time = rtm::scalar_clamp(sample_time, 0.0F, context.duration); if (context.sample_time == sample_time) return; context.sample_time = sample_time; const acl_impl::tracks_header& header = acl_impl::get_tracks_header(*context.tracks); uint32_t key_frame0; uint32_t key_frame1; find_linear_interpolation_samples_with_sample_rate(header.num_samples, header.sample_rate, sample_time, rounding_policy, key_frame0, key_frame1, context.interpolation_alpha); const acl_impl::scalar_tracks_header& scalars_header = acl_impl::get_scalar_tracks_header(*context.tracks); context.key_frame_bit_offsets[0] = key_frame0 * scalars_header.num_bits_per_frame; context.key_frame_bit_offsets[1] = key_frame1 * scalars_header.num_bits_per_frame; } template inline void decompress_tracks_v0(const persistent_scalar_decompression_context_v0& context, track_writer_type& writer) { ACL_ASSERT(context.sample_time >= 0.0f, "Context not set to a valid sample time"); if (context.sample_time < 0.0F) return; // Invalid sample time, we didn't seek yet // Due to the SIMD operations, we sometimes overflow in the SIMD lanes not used. // Disable floating point exceptions to avoid issues. fp_environment fp_env; if (decompression_settings_type::disable_fp_exeptions()) disable_fp_exceptions(fp_env); const acl_impl::tracks_header& header = acl_impl::get_tracks_header(*context.tracks); const acl_impl::scalar_tracks_header& scalars_header = acl_impl::get_scalar_tracks_header(*context.tracks); const rtm::scalarf interpolation_alpha = rtm::scalar_set(context.interpolation_alpha); const acl_impl::track_metadata* per_track_metadata = scalars_header.get_track_metadata(); const float* constant_values = scalars_header.get_track_constant_values(); const float* range_values = scalars_header.get_track_range_values(); const uint8_t* animated_values = scalars_header.get_track_animated_values(); uint32_t track_bit_offset0 = context.key_frame_bit_offsets[0]; uint32_t track_bit_offset1 = context.key_frame_bit_offsets[1]; const track_type8 track_type = header.track_type; const uint32_t num_tracks = header.num_tracks; for (uint32_t track_index = 0; track_index < num_tracks; ++track_index) { const acl_impl::track_metadata& metadata = per_track_metadata[track_index]; const uint8_t bit_rate = metadata.bit_rate; const uint32_t num_bits_per_component = get_num_bits_at_bit_rate(bit_rate); if (track_type == track_type8::float1f && decompression_settings_type::is_track_type_supported(track_type8::float1f)) { rtm::scalarf value; if (is_constant_bit_rate(bit_rate)) { value = rtm::scalar_load(constant_values); constant_values += 1; } else { rtm::scalarf value0; rtm::scalarf value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_scalarf_32_unsafe(animated_values, track_bit_offset0); value1 = unpack_scalarf_32_unsafe(animated_values, track_bit_offset1); } else { value0 = unpack_scalarf_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset0); value1 = unpack_scalarf_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset1); const rtm::scalarf range_min = rtm::scalar_load(range_values); const rtm::scalarf range_extent = rtm::scalar_load(range_values + 1); value0 = rtm::scalar_mul_add(value0, range_extent, range_min); value1 = rtm::scalar_mul_add(value1, range_extent, range_min); range_values += 2; } value = rtm::scalar_lerp(value0, value1, interpolation_alpha); const uint32_t num_sample_bits = num_bits_per_component; track_bit_offset0 += num_sample_bits; track_bit_offset1 += num_sample_bits; } writer.write_float1(track_index, value); } else if (track_type == track_type8::float2f && decompression_settings_type::is_track_type_supported(track_type8::float2f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) { value = rtm::vector_load(constant_values); constant_values += 2; } else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector2_64_unsafe(animated_values, track_bit_offset0); value1 = unpack_vector2_64_unsafe(animated_values, track_bit_offset1); } else { value0 = unpack_vector2_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset0); value1 = unpack_vector2_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset1); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + 2); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); range_values += 4; } value = rtm::vector_lerp(value0, value1, interpolation_alpha); const uint32_t num_sample_bits = num_bits_per_component * 2; track_bit_offset0 += num_sample_bits; track_bit_offset1 += num_sample_bits; } writer.write_float2(track_index, value); } else if (track_type == track_type8::float3f && decompression_settings_type::is_track_type_supported(track_type8::float3f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) { value = rtm::vector_load(constant_values); constant_values += 3; } else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector3_96_unsafe(animated_values, track_bit_offset0); value1 = unpack_vector3_96_unsafe(animated_values, track_bit_offset1); } else { value0 = unpack_vector3_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset0); value1 = unpack_vector3_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset1); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + 3); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); range_values += 6; } value = rtm::vector_lerp(value0, value1, interpolation_alpha); const uint32_t num_sample_bits = num_bits_per_component * 3; track_bit_offset0 += num_sample_bits; track_bit_offset1 += num_sample_bits; } writer.write_float3(track_index, value); } else if (track_type == track_type8::float4f && decompression_settings_type::is_track_type_supported(track_type8::float4f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) { value = rtm::vector_load(constant_values); constant_values += 4; } else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector4_128_unsafe(animated_values, track_bit_offset0); value1 = unpack_vector4_128_unsafe(animated_values, track_bit_offset1); } else { value0 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset0); value1 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset1); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + 4); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); range_values += 8; } value = rtm::vector_lerp(value0, value1, interpolation_alpha); const uint32_t num_sample_bits = num_bits_per_component * 4; track_bit_offset0 += num_sample_bits; track_bit_offset1 += num_sample_bits; } writer.write_float4(track_index, value); } else if (track_type == track_type8::vector4f && decompression_settings_type::is_track_type_supported(track_type8::vector4f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) { value = rtm::vector_load(constant_values); constant_values += 4; } else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector4_128_unsafe(animated_values, track_bit_offset0); value1 = unpack_vector4_128_unsafe(animated_values, track_bit_offset1); } else { value0 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset0); value1 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, track_bit_offset1); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + 4); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); range_values += 8; } value = rtm::vector_lerp(value0, value1, interpolation_alpha); const uint32_t num_sample_bits = num_bits_per_component * 4; track_bit_offset0 += num_sample_bits; track_bit_offset1 += num_sample_bits; } writer.write_vector4(track_index, value); } } if (decompression_settings_type::disable_fp_exeptions()) restore_fp_exceptions(fp_env); } template inline void decompress_track_v0(const persistent_scalar_decompression_context_v0& context, uint32_t track_index, track_writer_type& writer) { ACL_ASSERT(context.sample_time >= 0.0f, "Context not set to a valid sample time"); if (context.sample_time < 0.0F) return; // Invalid sample time, we didn't seek yet const tracks_header& header = get_tracks_header(*context.tracks); ACL_ASSERT(track_index < header.num_tracks, "Invalid track index"); if (track_index >= header.num_tracks) return; // Invalid track index // Due to the SIMD operations, we sometimes overflow in the SIMD lanes not used. // Disable floating point exceptions to avoid issues. fp_environment fp_env; if (decompression_settings_type::disable_fp_exeptions()) disable_fp_exceptions(fp_env); const scalar_tracks_header& scalars_header = get_scalar_tracks_header(*context.tracks); const rtm::scalarf interpolation_alpha = rtm::scalar_set(context.interpolation_alpha); const float* constant_values = scalars_header.get_track_constant_values(); const float* range_values = scalars_header.get_track_range_values(); const track_type8 track_type = header.track_type; const uint32_t num_element_components = get_track_num_sample_elements(track_type); uint32_t track_bit_offset = 0; const acl_impl::track_metadata* per_track_metadata = scalars_header.get_track_metadata(); for (uint32_t scan_track_index = 0; scan_track_index < track_index; ++scan_track_index) { const acl_impl::track_metadata& metadata = per_track_metadata[scan_track_index]; const uint8_t bit_rate = metadata.bit_rate; const uint32_t num_bits_per_component = get_num_bits_at_bit_rate(bit_rate); track_bit_offset += num_bits_per_component * num_element_components; if (is_constant_bit_rate(bit_rate)) constant_values += num_element_components; else if (!is_raw_bit_rate(bit_rate)) range_values += num_element_components * 2; } const acl_impl::track_metadata& metadata = per_track_metadata[track_index]; const uint8_t bit_rate = metadata.bit_rate; const uint32_t num_bits_per_component = get_num_bits_at_bit_rate(bit_rate); const uint8_t* animated_values = scalars_header.get_track_animated_values(); if (track_type == track_type8::float1f && decompression_settings_type::is_track_type_supported(track_type8::float1f)) { rtm::scalarf value; if (is_constant_bit_rate(bit_rate)) value = rtm::scalar_load(constant_values); else { rtm::scalarf value0; rtm::scalarf value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_scalarf_32_unsafe(animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_scalarf_32_unsafe(animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); } else { value0 = unpack_scalarf_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_scalarf_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); const rtm::scalarf range_min = rtm::scalar_load(range_values); const rtm::scalarf range_extent = rtm::scalar_load(range_values + num_element_components); value0 = rtm::scalar_mul_add(value0, range_extent, range_min); value1 = rtm::scalar_mul_add(value1, range_extent, range_min); } value = rtm::scalar_lerp(value0, value1, interpolation_alpha); } writer.write_float1(track_index, value); } else if (track_type == track_type8::float2f && decompression_settings_type::is_track_type_supported(track_type8::float2f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) value = rtm::vector_load(constant_values); else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector2_64_unsafe(animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector2_64_unsafe(animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); } else { value0 = unpack_vector2_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector2_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + num_element_components); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); } value = rtm::vector_lerp(value0, value1, interpolation_alpha); } writer.write_float2(track_index, value); } else if (track_type == track_type8::float3f && decompression_settings_type::is_track_type_supported(track_type8::float3f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) value = rtm::vector_load(constant_values); else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector3_96_unsafe(animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector3_96_unsafe(animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); } else { value0 = unpack_vector3_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector3_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + num_element_components); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); } value = rtm::vector_lerp(value0, value1, interpolation_alpha); } writer.write_float3(track_index, value); } else if (track_type == track_type8::float4f && decompression_settings_type::is_track_type_supported(track_type8::float4f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) value = rtm::vector_load(constant_values); else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector4_128_unsafe(animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector4_128_unsafe(animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); } else { value0 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + num_element_components); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); } value = rtm::vector_lerp(value0, value1, interpolation_alpha); } writer.write_float4(track_index, value); } else if (track_type == track_type8::vector4f && decompression_settings_type::is_track_type_supported(track_type8::vector4f)) { rtm::vector4f value; if (is_constant_bit_rate(bit_rate)) value = rtm::vector_load(constant_values); else { rtm::vector4f value0; rtm::vector4f value1; if (is_raw_bit_rate(bit_rate)) { value0 = unpack_vector4_128_unsafe(animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector4_128_unsafe(animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); } else { value0 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[0] + track_bit_offset); value1 = unpack_vector4_uXX_unsafe(num_bits_per_component, animated_values, context.key_frame_bit_offsets[1] + track_bit_offset); const rtm::vector4f range_min = rtm::vector_load(range_values); const rtm::vector4f range_extent = rtm::vector_load(range_values + num_element_components); value0 = rtm::vector_mul_add(value0, range_extent, range_min); value1 = rtm::vector_mul_add(value1, range_extent, range_min); } value = rtm::vector_lerp(value0, value1, interpolation_alpha); } writer.write_vector4(track_index, value); } if (decompression_settings_type::disable_fp_exeptions()) restore_fp_exceptions(fp_env); } } } ACL_IMPL_FILE_PRAGMA_POP