cocos-engine-external/sources/acl/compression/impl/normalize_streams.h

402 lines
19 KiB
C++

#pragma once
////////////////////////////////////////////////////////////////////////////////
// The MIT License (MIT)
//
// Copyright (c) 2017 Nicholas Frechette & Animation Compression Library contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
#include "acl/core/iallocator.h"
#include "acl/core/impl/compiler_utils.h"
#include "acl/core/error.h"
#include "acl/core/enum_utils.h"
#include "acl/core/track_formats.h"
#include "acl/core/track_types.h"
#include "acl/core/range_reduction_types.h"
#include "acl/compression/impl/clip_context.h"
#include <rtm/vector4f.h>
#include <cstdint>
ACL_IMPL_FILE_PRAGMA_PUSH
namespace acl
{
namespace acl_impl
{
inline TrackStreamRange calculate_track_range(const TrackStream& stream, bool is_vector4)
{
rtm::vector4f min = rtm::vector_set(1e10F);
rtm::vector4f max = rtm::vector_set(-1e10F);
const uint32_t num_samples = stream.get_num_samples();
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
const rtm::vector4f sample = stream.get_raw_sample<rtm::vector4f>(sample_index);
min = rtm::vector_min(min, sample);
max = rtm::vector_max(max, sample);
}
// Set the 4th component to zero if we don't need it
if (!is_vector4)
{
min = rtm::vector_set_w(min, 0.0F);
max = rtm::vector_set_w(max, 0.0F);
}
return TrackStreamRange::from_min_max(min, max);
}
inline void extract_bone_ranges_impl(const SegmentContext& segment, BoneRanges* bone_ranges)
{
const bool has_scale = segment_context_has_scale(segment);
for (uint32_t bone_index = 0; bone_index < segment.num_bones; ++bone_index)
{
const BoneStreams& bone_stream = segment.bone_streams[bone_index];
BoneRanges& bone_range = bone_ranges[bone_index];
bone_range.rotation = calculate_track_range(bone_stream.rotations, true);
bone_range.translation = calculate_track_range(bone_stream.translations, false);
if (has_scale)
bone_range.scale = calculate_track_range(bone_stream.scales, false);
else
bone_range.scale = TrackStreamRange();
}
}
inline void extract_clip_bone_ranges(iallocator& allocator, clip_context& context)
{
context.ranges = allocate_type_array<BoneRanges>(allocator, context.num_bones);
ACL_ASSERT(context.num_segments == 1, "context must contain a single segment!");
const SegmentContext& segment = context.segments[0];
acl_impl::extract_bone_ranges_impl(segment, context.ranges);
}
inline void extract_segment_bone_ranges(iallocator& allocator, clip_context& context)
{
const rtm::vector4f one = rtm::vector_set(1.0F);
const rtm::vector4f zero = rtm::vector_zero();
const float max_range_value_flt = float((1 << k_segment_range_reduction_num_bits_per_component) - 1);
const rtm::vector4f max_range_value = rtm::vector_set(max_range_value_flt);
const rtm::vector4f inv_max_range_value = rtm::vector_set(1.0F / max_range_value_flt);
// Segment ranges are always normalized and live between [0.0 ... 1.0]
auto fixup_range = [&](const TrackStreamRange& range)
{
// In our compressed format, we store the minimum value of the track range quantized on 8 bits.
// To get the best accuracy, we pick the value closest to the true minimum that is slightly lower.
// This is to ensure that we encompass the lowest value even after quantization.
const rtm::vector4f range_min = range.get_min();
const rtm::vector4f scaled_min = rtm::vector_mul(range_min, max_range_value);
const rtm::vector4f quantized_min0 = rtm::vector_clamp(rtm::vector_floor(scaled_min), zero, max_range_value);
const rtm::vector4f quantized_min1 = rtm::vector_max(rtm::vector_sub(quantized_min0, one), zero);
const rtm::vector4f padded_range_min0 = rtm::vector_mul(quantized_min0, inv_max_range_value);
const rtm::vector4f padded_range_min1 = rtm::vector_mul(quantized_min1, inv_max_range_value);
// Check if min0 is below or equal to our original range minimum value, if it is, it is good
// enough to use otherwise min1 is guaranteed to be lower.
const rtm::mask4f is_min0_lower_mask = rtm::vector_less_equal(padded_range_min0, range_min);
const rtm::vector4f padded_range_min = rtm::vector_select(is_min0_lower_mask, padded_range_min0, padded_range_min1);
// The story is different for the extent. We do not store the max, instead we use the extent
// for performance reasons: a single mul/add is required to reconstruct the original value.
// Now that our minimum value changed, our extent also changed.
// We want to pick the extent value that brings us closest to our original max value while
// being slightly larger to encompass it.
const rtm::vector4f range_max = range.get_max();
const rtm::vector4f range_extent = rtm::vector_sub(range_max, padded_range_min);
const rtm::vector4f scaled_extent = rtm::vector_mul(range_extent, max_range_value);
const rtm::vector4f quantized_extent0 = rtm::vector_clamp(rtm::vector_ceil(scaled_extent), zero, max_range_value);
const rtm::vector4f quantized_extent1 = rtm::vector_min(rtm::vector_add(quantized_extent0, one), max_range_value);
const rtm::vector4f padded_range_extent0 = rtm::vector_mul(quantized_extent0, inv_max_range_value);
const rtm::vector4f padded_range_extent1 = rtm::vector_mul(quantized_extent1, inv_max_range_value);
// Check if extent0 is above or equal to our original range maximum value, if it is, it is good
// enough to use otherwise extent1 is guaranteed to be higher.
const rtm::mask4f is_extent0_higher_mask = rtm::vector_greater_equal(padded_range_extent0, range_max);
const rtm::vector4f padded_range_extent = rtm::vector_select(is_extent0_higher_mask, padded_range_extent0, padded_range_extent1);
return TrackStreamRange::from_min_extent(padded_range_min, padded_range_extent);
};
for (SegmentContext& segment : context.segment_iterator())
{
segment.ranges = allocate_type_array<BoneRanges>(allocator, segment.num_bones);
acl_impl::extract_bone_ranges_impl(segment, segment.ranges);
for (uint32_t bone_index = 0; bone_index < segment.num_bones; ++bone_index)
{
const BoneStreams& bone_stream = segment.bone_streams[bone_index];
BoneRanges& bone_range = segment.ranges[bone_index];
if (!bone_stream.is_rotation_constant && context.are_rotations_normalized)
bone_range.rotation = fixup_range(bone_range.rotation);
if (!bone_stream.is_translation_constant && context.are_translations_normalized)
bone_range.translation = fixup_range(bone_range.translation);
if (!bone_stream.is_scale_constant && context.are_scales_normalized)
bone_range.scale = fixup_range(bone_range.scale);
}
}
}
inline rtm::vector4f RTM_SIMD_CALL normalize_sample(rtm::vector4f_arg0 sample, const TrackStreamRange& range)
{
const rtm::vector4f range_min = range.get_min();
const rtm::vector4f range_extent = range.get_extent();
const rtm::mask4f is_range_zero_mask = rtm::vector_less_than(range_extent, rtm::vector_set(0.000000001F));
rtm::vector4f normalized_sample = rtm::vector_div(rtm::vector_sub(sample, range_min), range_extent);
// Clamp because the division might be imprecise
normalized_sample = rtm::vector_min(normalized_sample, rtm::vector_set(1.0F));
return rtm::vector_select(is_range_zero_mask, rtm::vector_zero(), normalized_sample);
}
inline void normalize_rotation_streams(BoneStreams* bone_streams, const BoneRanges* bone_ranges, uint32_t num_bones)
{
const rtm::vector4f one = rtm::vector_set(1.0F);
const rtm::vector4f zero = rtm::vector_zero();
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
BoneStreams& bone_stream = bone_streams[bone_index];
const BoneRanges& bone_range = bone_ranges[bone_index];
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(bone_stream.rotations.get_sample_size() == sizeof(rtm::vector4f), "Unexpected rotation sample size. %u != %zu", bone_stream.rotations.get_sample_size(), sizeof(rtm::vector4f));
// Constant or default tracks are not normalized
if (bone_stream.is_rotation_constant)
continue;
const uint32_t num_samples = bone_stream.rotations.get_num_samples();
const rtm::vector4f range_min = bone_range.rotation.get_min();
const rtm::vector4f range_extent = bone_range.rotation.get_extent();
const rtm::mask4f is_range_zero_mask = rtm::vector_less_than(range_extent, rtm::vector_set(0.000000001F));
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
// normalized value is between [0.0 .. 1.0]
// value = (normalized value * range extent) + range min
// normalized value = (value - range min) / range extent
const rtm::vector4f rotation = bone_stream.rotations.get_raw_sample<rtm::vector4f>(sample_index);
rtm::vector4f normalized_rotation = rtm::vector_div(rtm::vector_sub(rotation, range_min), range_extent);
// Clamp because the division might be imprecise
normalized_rotation = rtm::vector_min(normalized_rotation, one);
normalized_rotation = rtm::vector_select(is_range_zero_mask, zero, normalized_rotation);
#if defined(ACL_HAS_ASSERT_CHECKS)
switch (bone_stream.rotations.get_rotation_format())
{
case rotation_format8::quatf_full:
ACL_ASSERT(rtm::vector_all_greater_equal(normalized_rotation, zero) && rtm::vector_all_less_equal(normalized_rotation, one), "Invalid normalized rotation. 0.0 <= [%f, %f, %f, %f] <= 1.0", (float)rtm::vector_get_x(normalized_rotation), (float)rtm::vector_get_y(normalized_rotation), (float)rtm::vector_get_z(normalized_rotation), (float)rtm::vector_get_w(normalized_rotation));
break;
case rotation_format8::quatf_drop_w_full:
case rotation_format8::quatf_drop_w_variable:
ACL_ASSERT(rtm::vector_all_greater_equal3(normalized_rotation, zero) && rtm::vector_all_less_equal3(normalized_rotation, one), "Invalid normalized rotation. 0.0 <= [%f, %f, %f] <= 1.0", (float)rtm::vector_get_x(normalized_rotation), (float)rtm::vector_get_y(normalized_rotation), (float)rtm::vector_get_z(normalized_rotation));
break;
}
#endif
bone_stream.rotations.set_raw_sample(sample_index, normalized_rotation);
}
}
}
inline void normalize_translation_streams(BoneStreams* bone_streams, const BoneRanges* bone_ranges, uint32_t num_bones)
{
const rtm::vector4f one = rtm::vector_set(1.0F);
const rtm::vector4f zero = rtm::vector_zero();
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
BoneStreams& bone_stream = bone_streams[bone_index];
const BoneRanges& bone_range = bone_ranges[bone_index];
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(bone_stream.translations.get_sample_size() == sizeof(rtm::vector4f), "Unexpected translation sample size. %u != %zu", bone_stream.translations.get_sample_size(), sizeof(rtm::vector4f));
// Constant or default tracks are not normalized
if (bone_stream.is_translation_constant)
continue;
const uint32_t num_samples = bone_stream.translations.get_num_samples();
const rtm::vector4f range_min = bone_range.translation.get_min();
const rtm::vector4f range_extent = bone_range.translation.get_extent();
const rtm::mask4f is_range_zero_mask = rtm::vector_less_than(range_extent, rtm::vector_set(0.000000001F));
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
// normalized value is between [0.0 .. 1.0]
// value = (normalized value * range extent) + range min
// normalized value = (value - range min) / range extent
const rtm::vector4f translation = bone_stream.translations.get_raw_sample<rtm::vector4f>(sample_index);
rtm::vector4f normalized_translation = rtm::vector_div(rtm::vector_sub(translation, range_min), range_extent);
// Clamp because the division might be imprecise
normalized_translation = rtm::vector_min(normalized_translation, one);
normalized_translation = rtm::vector_select(is_range_zero_mask, zero, normalized_translation);
ACL_ASSERT(rtm::vector_all_greater_equal3(normalized_translation, zero) && rtm::vector_all_less_equal3(normalized_translation, one), "Invalid normalized translation. 0.0 <= [%f, %f, %f] <= 1.0", (float)rtm::vector_get_x(normalized_translation), (float)rtm::vector_get_y(normalized_translation), (float)rtm::vector_get_z(normalized_translation));
bone_stream.translations.set_raw_sample(sample_index, normalized_translation);
}
}
}
inline void normalize_scale_streams(BoneStreams* bone_streams, const BoneRanges* bone_ranges, uint32_t num_bones)
{
const rtm::vector4f one = rtm::vector_set(1.0F);
const rtm::vector4f zero = rtm::vector_zero();
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
BoneStreams& bone_stream = bone_streams[bone_index];
const BoneRanges& bone_range = bone_ranges[bone_index];
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(bone_stream.scales.get_sample_size() == sizeof(rtm::vector4f), "Unexpected scale sample size. %u != %zu", bone_stream.scales.get_sample_size(), sizeof(rtm::vector4f));
// Constant or default tracks are not normalized
if (bone_stream.is_scale_constant)
continue;
const uint32_t num_samples = bone_stream.scales.get_num_samples();
const rtm::vector4f range_min = bone_range.scale.get_min();
const rtm::vector4f range_extent = bone_range.scale.get_extent();
const rtm::mask4f is_range_zero_mask = rtm::vector_less_than(range_extent, rtm::vector_set(0.000000001F));
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
// normalized value is between [0.0 .. 1.0]
// value = (normalized value * range extent) + range min
// normalized value = (value - range min) / range extent
const rtm::vector4f scale = bone_stream.scales.get_raw_sample<rtm::vector4f>(sample_index);
rtm::vector4f normalized_scale = rtm::vector_div(rtm::vector_sub(scale, range_min), range_extent);
// Clamp because the division might be imprecise
normalized_scale = rtm::vector_min(normalized_scale, one);
normalized_scale = rtm::vector_select(is_range_zero_mask, zero, normalized_scale);
ACL_ASSERT(rtm::vector_all_greater_equal3(normalized_scale, zero) && rtm::vector_all_less_equal3(normalized_scale, one), "Invalid normalized scale. 0.0 <= [%f, %f, %f] <= 1.0", (float)rtm::vector_get_x(normalized_scale), (float)rtm::vector_get_y(normalized_scale), (float)rtm::vector_get_z(normalized_scale));
bone_stream.scales.set_raw_sample(sample_index, normalized_scale);
}
}
}
inline void normalize_clip_streams(clip_context& context, range_reduction_flags8 range_reduction)
{
ACL_ASSERT(context.num_segments == 1, "context must contain a single segment!");
SegmentContext& segment = context.segments[0];
const bool has_scale = segment_context_has_scale(segment);
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::rotations))
{
normalize_rotation_streams(segment.bone_streams, context.ranges, segment.num_bones);
context.are_rotations_normalized = true;
}
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::translations))
{
normalize_translation_streams(segment.bone_streams, context.ranges, segment.num_bones);
context.are_translations_normalized = true;
}
if (has_scale && are_any_enum_flags_set(range_reduction, range_reduction_flags8::scales))
{
normalize_scale_streams(segment.bone_streams, context.ranges, segment.num_bones);
context.are_scales_normalized = true;
}
}
inline void normalize_segment_streams(clip_context& context, range_reduction_flags8 range_reduction)
{
for (SegmentContext& segment : context.segment_iterator())
{
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::rotations))
{
normalize_rotation_streams(segment.bone_streams, segment.ranges, segment.num_bones);
segment.are_rotations_normalized = true;
}
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::translations))
{
normalize_translation_streams(segment.bone_streams, segment.ranges, segment.num_bones);
segment.are_translations_normalized = true;
}
const bool has_scale = segment_context_has_scale(segment);
if (has_scale && are_any_enum_flags_set(range_reduction, range_reduction_flags8::scales))
{
normalize_scale_streams(segment.bone_streams, segment.ranges, segment.num_bones);
segment.are_scales_normalized = true;
}
uint32_t range_data_size = 0;
uint32_t range_data_rotation_num = 0;
for (uint32_t bone_index = 0; bone_index < segment.num_bones; ++bone_index)
{
const BoneStreams& bone_stream = segment.bone_streams[bone_index];
if (bone_stream.is_stripped_from_output())
continue;
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::rotations) && !bone_stream.is_rotation_constant)
{
ACL_ASSERT(bone_stream.rotations.get_rotation_format() != rotation_format8::quatf_full, "Normalization only supported on drop W variants");
range_data_size += k_segment_range_reduction_num_bytes_per_component * 6;
range_data_rotation_num++;
}
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::translations) && !bone_stream.is_translation_constant)
range_data_size += k_segment_range_reduction_num_bytes_per_component * 6;
if (are_any_enum_flags_set(range_reduction, range_reduction_flags8::scales) && !bone_stream.is_scale_constant)
range_data_size += k_segment_range_reduction_num_bytes_per_component * 6;
}
// The last partial rotation group is padded to 4 elements to keep decompression fast
const uint32_t partial_group_size_rotation = range_data_rotation_num % 4;
if (partial_group_size_rotation != 0)
range_data_size += (4 - partial_group_size_rotation) * k_segment_range_reduction_num_bytes_per_component * 6;
segment.range_data_size = range_data_size;
}
}
}
}
ACL_IMPL_FILE_PRAGMA_POP