#pragma once //////////////////////////////////////////////////////////////////////////////// // The MIT License (MIT) // // Copyright (c) 2019 Nicholas Frechette & Animation Compression Library contributors // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. //////////////////////////////////////////////////////////////////////////////// #include "acl/core/impl/compiler_utils.h" #include "acl/core/track_types.h" #include "acl/core/variable_bit_rates.h" #include "acl/compression/impl/track_list_context.h" #include #include #include ACL_IMPL_FILE_PRAGMA_PUSH namespace acl { namespace acl_impl { struct quantization_scales { rtm::vector4f max_value; rtm::vector4f inv_max_value; explicit quantization_scales(uint32_t num_bits) { ACL_ASSERT(num_bits > 0, "Cannot decay with 0 bits"); ACL_ASSERT(num_bits < 31, "Attempting to decay on too many bits"); const float max_value_ = rtm::scalar_safe_to_float((1 << num_bits) - 1); max_value = rtm::vector_set(max_value_); inv_max_value = rtm::vector_set(1.0F / max_value_); } }; // Decays the input value through quantization by packing and unpacking a normalized input value inline rtm::vector4f RTM_SIMD_CALL decay_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales) { using namespace rtm; ACL_ASSERT(vector_all_greater_equal(value, vector_zero()) && vector_all_less_equal(value, rtm::vector_set(1.0F)), "Expected normalized unsigned input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value)); const vector4f packed_value = vector_round_symmetric(vector_mul(value, scales.max_value)); const vector4f decayed_value = vector_mul(packed_value, scales.inv_max_value); return decayed_value; } // Packs a normalized input value through quantization inline rtm::vector4f RTM_SIMD_CALL pack_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales) { using namespace rtm; ACL_ASSERT(vector_all_greater_equal(value, vector_zero()) && vector_all_less_equal(value, rtm::vector_set(1.0F)), "Expected normalized unsigned input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value)); return vector_round_symmetric(vector_mul(value, scales.max_value)); } inline void quantize_scalarf_track(track_list_context& context, uint32_t track_index) { using namespace rtm; const track& ref_track = (*context.reference_list)[track_index]; track_vector4f& mut_track = track_cast(context.track_list[track_index]); const vector4f precision = vector_load1(&mut_track.get_description().precision); const uint32_t ref_element_size = ref_track.get_sample_size(); const uint32_t num_samples = mut_track.get_num_samples(); const scalarf_range& range = context.range_list[track_index].range.scalarf; const vector4f range_min = range.get_min(); const vector4f range_extent = range.get_extent(); const vector4f zero = vector_zero(); const mask4f all_true_mask = mask_set(true, true, true, true); mask4f sample_mask = mask_set(false, false, false, false); std::memcpy(&sample_mask, &all_true_mask, ref_element_size); vector4f raw_sample = zero; uint8_t best_bit_rate = k_highest_bit_rate; // Default to raw if we fail to find something better // First we look for the best bit rate possible that keeps us within our precision target for (uint8_t bit_rate = k_highest_bit_rate - 1; bit_rate != 0; --bit_rate) // Skip the raw bit rate and the constant bit rate { const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(bit_rate); const quantization_scales scales(num_bits_at_bit_rate); bool is_error_to_high = false; for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index) { std::memcpy(&raw_sample, ref_track[sample_index], ref_element_size); const vector4f normalized_sample = mut_track[sample_index]; // Decay our value through quantization const vector4f decayed_normalized_sample = decay_vector4_uXX(normalized_sample, scales); // Undo normalization const vector4f decayed_sample = vector_mul_add(decayed_normalized_sample, range_extent, range_min); const vector4f delta = vector_abs(vector_sub(raw_sample, decayed_sample)); const vector4f masked_delta = vector_select(sample_mask, delta, zero); if (!vector_all_less_equal(masked_delta, precision)) { is_error_to_high = true; break; } } if (is_error_to_high) break; // Our error is too high, use the previous bit rate // We were accurate enough, this is the best bit rate so far best_bit_rate = bit_rate; } context.bit_rate_list[track_index].scalar.value = best_bit_rate; // Done, update our track with the final result if (best_bit_rate == k_highest_bit_rate) { // We can't quantize this track, keep it raw for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index) std::memcpy(&mut_track[sample_index], ref_track[sample_index], ref_element_size); } else { // Use the selected bit rate to quantize our track const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(best_bit_rate); const quantization_scales scales(num_bits_at_bit_rate); for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index) mut_track[sample_index] = pack_vector4_uXX(mut_track[sample_index], scales); } } inline void quantize_tracks(track_list_context& context) { ACL_ASSERT(context.is_valid(), "Invalid context"); context.bit_rate_list = allocate_type_array(*context.allocator, context.num_tracks); for (uint32_t track_index = 0; track_index < context.num_tracks; ++track_index) { const bool is_track_constant = context.is_constant(track_index); if (is_track_constant) continue; // Constant tracks don't need to be modified const track_range& range = context.range_list[track_index]; switch (range.category) { case track_category8::scalarf: quantize_scalarf_track(context, track_index); break; default: ACL_ASSERT(false, "Invalid track category"); break; } } } } } ACL_IMPL_FILE_PRAGMA_POP