cocos-engine-external/sources/acl/compression/impl/quantize_track_impl.h

188 lines
7.2 KiB
C++

#pragma once
////////////////////////////////////////////////////////////////////////////////
// The MIT License (MIT)
//
// Copyright (c) 2019 Nicholas Frechette & Animation Compression Library contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
#include "acl/core/impl/compiler_utils.h"
#include "acl/core/track_types.h"
#include "acl/core/variable_bit_rates.h"
#include "acl/compression/impl/track_list_context.h"
#include <rtm/mask4i.h>
#include <rtm/vector4f.h>
#include <cstdint>
ACL_IMPL_FILE_PRAGMA_PUSH
namespace acl
{
namespace acl_impl
{
struct quantization_scales
{
rtm::vector4f max_value;
rtm::vector4f inv_max_value;
explicit quantization_scales(uint32_t num_bits)
{
ACL_ASSERT(num_bits > 0, "Cannot decay with 0 bits");
ACL_ASSERT(num_bits < 31, "Attempting to decay on too many bits");
const float max_value_ = rtm::scalar_safe_to_float((1 << num_bits) - 1);
max_value = rtm::vector_set(max_value_);
inv_max_value = rtm::vector_set(1.0F / max_value_);
}
};
// Decays the input value through quantization by packing and unpacking a normalized input value
inline rtm::vector4f RTM_SIMD_CALL decay_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;
ACL_ASSERT(vector_all_greater_equal(value, vector_zero()) && vector_all_less_equal(value, rtm::vector_set(1.0F)), "Expected normalized unsigned input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value));
const vector4f packed_value = vector_round_symmetric(vector_mul(value, scales.max_value));
const vector4f decayed_value = vector_mul(packed_value, scales.inv_max_value);
return decayed_value;
}
// Packs a normalized input value through quantization
inline rtm::vector4f RTM_SIMD_CALL pack_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;
ACL_ASSERT(vector_all_greater_equal(value, vector_zero()) && vector_all_less_equal(value, rtm::vector_set(1.0F)), "Expected normalized unsigned input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value));
return vector_round_symmetric(vector_mul(value, scales.max_value));
}
inline void quantize_scalarf_track(track_list_context& context, uint32_t track_index)
{
using namespace rtm;
const track& ref_track = (*context.reference_list)[track_index];
track_vector4f& mut_track = track_cast<track_vector4f>(context.track_list[track_index]);
const vector4f precision = vector_load1(&mut_track.get_description().precision);
const uint32_t ref_element_size = ref_track.get_sample_size();
const uint32_t num_samples = mut_track.get_num_samples();
const scalarf_range& range = context.range_list[track_index].range.scalarf;
const vector4f range_min = range.get_min();
const vector4f range_extent = range.get_extent();
const vector4f zero = vector_zero();
const mask4f all_true_mask = mask_set(true, true, true, true);
mask4f sample_mask = mask_set(false, false, false, false);
std::memcpy(&sample_mask, &all_true_mask, ref_element_size);
vector4f raw_sample = zero;
uint8_t best_bit_rate = k_highest_bit_rate; // Default to raw if we fail to find something better
// First we look for the best bit rate possible that keeps us within our precision target
for (uint8_t bit_rate = k_highest_bit_rate - 1; bit_rate != 0; --bit_rate) // Skip the raw bit rate and the constant bit rate
{
const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(bit_rate);
const quantization_scales scales(num_bits_at_bit_rate);
bool is_error_to_high = false;
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
std::memcpy(&raw_sample, ref_track[sample_index], ref_element_size);
const vector4f normalized_sample = mut_track[sample_index];
// Decay our value through quantization
const vector4f decayed_normalized_sample = decay_vector4_uXX(normalized_sample, scales);
// Undo normalization
const vector4f decayed_sample = vector_mul_add(decayed_normalized_sample, range_extent, range_min);
const vector4f delta = vector_abs(vector_sub(raw_sample, decayed_sample));
const vector4f masked_delta = vector_select(sample_mask, delta, zero);
if (!vector_all_less_equal(masked_delta, precision))
{
is_error_to_high = true;
break;
}
}
if (is_error_to_high)
break; // Our error is too high, use the previous bit rate
// We were accurate enough, this is the best bit rate so far
best_bit_rate = bit_rate;
}
context.bit_rate_list[track_index].scalar.value = best_bit_rate;
// Done, update our track with the final result
if (best_bit_rate == k_highest_bit_rate)
{
// We can't quantize this track, keep it raw
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
std::memcpy(&mut_track[sample_index], ref_track[sample_index], ref_element_size);
}
else
{
// Use the selected bit rate to quantize our track
const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(best_bit_rate);
const quantization_scales scales(num_bits_at_bit_rate);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
mut_track[sample_index] = pack_vector4_uXX(mut_track[sample_index], scales);
}
}
inline void quantize_tracks(track_list_context& context)
{
ACL_ASSERT(context.is_valid(), "Invalid context");
context.bit_rate_list = allocate_type_array<track_bit_rate>(*context.allocator, context.num_tracks);
for (uint32_t track_index = 0; track_index < context.num_tracks; ++track_index)
{
const bool is_track_constant = context.is_constant(track_index);
if (is_track_constant)
continue; // Constant tracks don't need to be modified
const track_range& range = context.range_list[track_index];
switch (range.category)
{
case track_category8::scalarf:
quantize_scalarf_track(context, track_index);
break;
default:
ACL_ASSERT(false, "Invalid track category");
break;
}
}
}
}
}
ACL_IMPL_FILE_PRAGMA_POP