cocos-engine-external/sources/acl/compression/impl/quantize_streams.h

1558 lines
74 KiB
C++

#pragma once
////////////////////////////////////////////////////////////////////////////////
// The MIT License (MIT)
//
// Copyright (c) 2017 Nicholas Frechette & Animation Compression Library contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
#include "acl/core/iallocator.h"
#include "acl/core/impl/compiler_utils.h"
#include "acl/core/error.h"
#include "acl/core/track_formats.h"
#include "acl/core/utils.h"
#include "acl/core/variable_bit_rates.h"
#include "acl/math/quat_packing.h"
#include "acl/math/vector4_packing.h"
#include "acl/compression/impl/track_bit_rate_database.h"
#include "acl/compression/impl/transform_bit_rate_permutations.h"
#include "acl/compression/impl/clip_context.h"
#include "acl/compression/impl/sample_streams.h"
#include "acl/compression/impl/normalize_streams.h"
#include "acl/compression/impl/convert_rotation_streams.h"
#include "acl/compression/transform_error_metrics.h"
#include "acl/compression/compression_settings.h"
#include <rtm/quatf.h>
#include <rtm/vector4f.h>
#include <cstddef>
#include <cstdint>
#include <functional>
// 0 = no debug info, 1 = basic info, 2 = verbose
#define ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION 0
// 0 = no profiling, 1 = we perform quantization 10 times in a row for every segment
#define ACL_IMPL_PROFILE_MATH 0
#if ACL_IMPL_PROFILE_MATH && defined(__ANDROID__)
#include <android/log.h>
#endif
ACL_IMPL_FILE_PRAGMA_PUSH
namespace acl
{
namespace acl_impl
{
struct quantization_context
{
iallocator& allocator;
clip_context& clip;
const clip_context& raw_clip;
const clip_context& additive_base_clip;
SegmentContext* segment;
BoneStreams* bone_streams;
const transform_metadata* metadata;
uint32_t num_bones;
const itransform_error_metric* error_metric;
track_bit_rate_database bit_rate_database;
single_track_query local_query;
hierarchical_track_query object_query;
uint32_t num_samples;
uint32_t segment_sample_start_index;
float sample_rate;
float clip_duration;
float error_threshold; // Error threshold of the current bone being optimized
bool has_scale;
bool has_additive_base;
bool needs_conversion;
rotation_format8 rotation_format;
vector_format8 translation_format;
vector_format8 scale_format;
compression_level8 compression_level;
const BoneStreams* raw_bone_streams;
rtm::qvvf* additive_local_pose; // 1 per transform
rtm::qvvf* raw_local_pose; // 1 per transform
rtm::qvvf* lossy_local_pose; // 1 per transform
uint8_t* raw_local_transforms; // 1 per transform per sample in segment
uint8_t* base_local_transforms; // 1 per transform per sample in segment
uint8_t* raw_object_transforms; // 1 per transform per sample in segment
uint8_t* base_object_transforms; // 1 per transform per sample in segment
uint8_t* local_transforms_converted; // 1 per transform
uint8_t* lossy_object_pose; // 1 per transform
size_t metric_transform_size;
BoneBitRate* bit_rate_per_bone; // 1 per transform
uint32_t* parent_transform_indices; // 1 per transform
uint32_t* self_transform_indices; // 1 per transform
uint32_t* chain_bone_indices; // 1 per transform
uint32_t num_bones_in_chain;
uint32_t padding1; // unused
quantization_context(iallocator& allocator_, clip_context& clip_, const clip_context& raw_clip_, const clip_context& additive_base_clip_, const compression_settings& settings_)
: allocator(allocator_)
, clip(clip_)
, raw_clip(raw_clip_)
, additive_base_clip(additive_base_clip_)
, segment(nullptr)
, bone_streams(nullptr)
, metadata(clip_.metadata)
, num_bones(clip_.num_bones)
, error_metric(settings_.error_metric)
, bit_rate_database(allocator_, settings_.rotation_format, settings_.translation_format, settings_.scale_format, clip_.segments->bone_streams, raw_clip_.segments->bone_streams, clip_.num_bones, clip_.segments->num_samples)
, local_query()
, object_query(allocator_)
, num_samples(~0U)
, segment_sample_start_index(~0U)
, sample_rate(clip_.sample_rate)
, clip_duration(clip_.duration)
, error_threshold(0.0F)
, has_scale(clip_.has_scale)
, has_additive_base(clip_.has_additive_base)
, rotation_format(settings_.rotation_format)
, translation_format(settings_.translation_format)
, scale_format(settings_.scale_format)
, compression_level(settings_.level)
, raw_bone_streams(raw_clip_.segments[0].bone_streams)
, num_bones_in_chain(0)
{
local_query.bind(bit_rate_database);
object_query.bind(bit_rate_database);
needs_conversion = settings_.error_metric->needs_conversion(clip_.has_scale);
const size_t metric_transform_size_ = settings_.error_metric->get_transform_size(clip_.has_scale);
metric_transform_size = metric_transform_size_;
additive_local_pose = clip_.has_additive_base ? allocate_type_array<rtm::qvvf>(allocator, num_bones) : nullptr;
raw_local_pose = allocate_type_array<rtm::qvvf>(allocator, num_bones);
lossy_local_pose = allocate_type_array<rtm::qvvf>(allocator, num_bones);
raw_local_transforms = allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones * clip_.segments->num_samples, 64);
base_local_transforms = clip_.has_additive_base ? allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones * clip_.segments->num_samples, 64) : nullptr;
raw_object_transforms = allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones * clip_.segments->num_samples, 64);
base_object_transforms = clip_.has_additive_base ? allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones * clip_.segments->num_samples, 64) : nullptr;
local_transforms_converted = needs_conversion ? allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones, 64) : nullptr;
lossy_object_pose = allocate_type_array_aligned<uint8_t>(allocator, metric_transform_size_ * num_bones, 64);
bit_rate_per_bone = allocate_type_array<BoneBitRate>(allocator, num_bones);
parent_transform_indices = allocate_type_array<uint32_t>(allocator, num_bones);
self_transform_indices = allocate_type_array<uint32_t>(allocator, num_bones);
chain_bone_indices = allocate_type_array<uint32_t>(allocator, num_bones);
for (uint32_t transform_index = 0; transform_index < num_bones; ++transform_index)
{
const transform_metadata& metadata_ = clip_.metadata[transform_index];
parent_transform_indices[transform_index] = metadata_.parent_index;
self_transform_indices[transform_index] = transform_index;
}
}
~quantization_context()
{
deallocate_type_array(allocator, additive_local_pose, num_bones);
deallocate_type_array(allocator, raw_local_pose, num_bones);
deallocate_type_array(allocator, lossy_local_pose, num_bones);
deallocate_type_array(allocator, raw_local_transforms, metric_transform_size * num_bones * clip.segments->num_samples);
deallocate_type_array(allocator, base_local_transforms, metric_transform_size * num_bones * clip.segments->num_samples);
deallocate_type_array(allocator, raw_object_transforms, metric_transform_size * num_bones * clip.segments->num_samples);
deallocate_type_array(allocator, base_object_transforms, metric_transform_size * num_bones * clip.segments->num_samples);
deallocate_type_array(allocator, local_transforms_converted, metric_transform_size * num_bones);
deallocate_type_array(allocator, lossy_object_pose, metric_transform_size * num_bones);
deallocate_type_array(allocator, bit_rate_per_bone, num_bones);
deallocate_type_array(allocator, parent_transform_indices, num_bones);
deallocate_type_array(allocator, self_transform_indices, num_bones);
deallocate_type_array(allocator, chain_bone_indices, num_bones);
}
void set_segment(SegmentContext& segment_)
{
segment = &segment_;
bone_streams = segment_.bone_streams;
num_samples = segment_.num_samples;
segment_sample_start_index = segment_.clip_sample_offset;
bit_rate_database.set_segment(segment_.bone_streams, segment_.num_bones, segment_.num_samples);
// Cache every raw local/object transforms and the base local transforms since they never change
const itransform_error_metric* error_metric_ = error_metric;
const size_t sample_transform_size = metric_transform_size * num_bones;
const auto convert_transforms_impl = std::mem_fn(has_scale ? &itransform_error_metric::convert_transforms : &itransform_error_metric::convert_transforms_no_scale);
const auto apply_additive_to_base_impl = std::mem_fn(has_scale ? &itransform_error_metric::apply_additive_to_base : &itransform_error_metric::apply_additive_to_base_no_scale);
const auto local_to_object_space_impl = std::mem_fn(has_scale ? &itransform_error_metric::local_to_object_space : &itransform_error_metric::local_to_object_space_no_scale);
itransform_error_metric::convert_transforms_args convert_transforms_args_raw;
convert_transforms_args_raw.dirty_transform_indices = self_transform_indices;
convert_transforms_args_raw.num_dirty_transforms = num_bones;
convert_transforms_args_raw.transforms = raw_local_pose;
convert_transforms_args_raw.num_transforms = num_bones;
itransform_error_metric::convert_transforms_args convert_transforms_args_base = convert_transforms_args_raw;
convert_transforms_args_base.transforms = additive_local_pose;
itransform_error_metric::apply_additive_to_base_args apply_additive_to_base_args_raw;
apply_additive_to_base_args_raw.dirty_transform_indices = self_transform_indices;
apply_additive_to_base_args_raw.num_dirty_transforms = num_bones;
apply_additive_to_base_args_raw.local_transforms = nullptr;
apply_additive_to_base_args_raw.base_transforms = nullptr;
apply_additive_to_base_args_raw.num_transforms = num_bones;
itransform_error_metric::local_to_object_space_args local_to_object_space_args_raw;
local_to_object_space_args_raw.dirty_transform_indices = self_transform_indices;
local_to_object_space_args_raw.num_dirty_transforms = num_bones;
local_to_object_space_args_raw.parent_transform_indices = parent_transform_indices;
local_to_object_space_args_raw.local_transforms = nullptr;
local_to_object_space_args_raw.num_transforms = num_bones;
for (uint32_t sample_index = 0; sample_index < segment_.num_samples; ++sample_index)
{
// Sample our streams and calculate the error
// The sample time is calculated from the full clip duration to be consistent with decompression
const float sample_time = rtm::scalar_min(float(segment_.clip_sample_offset + sample_index) / sample_rate, clip_duration);
sample_streams(raw_bone_streams, num_bones, sample_time, raw_local_pose);
uint8_t* sample_raw_local_transforms = raw_local_transforms + (sample_index * sample_transform_size);
if (needs_conversion)
convert_transforms_impl(error_metric_, convert_transforms_args_raw, sample_raw_local_transforms);
else
std::memcpy(sample_raw_local_transforms, raw_local_pose, sample_transform_size);
if (has_additive_base)
{
const float normalized_sample_time = additive_base_clip.num_samples > 1 ? (sample_time / clip_duration) : 0.0F;
const float additive_sample_time = additive_base_clip.num_samples > 1 ? (normalized_sample_time * additive_base_clip.duration) : 0.0F;
sample_streams(additive_base_clip.segments[0].bone_streams, num_bones, additive_sample_time, additive_local_pose);
uint8_t* sample_base_local_transforms = base_local_transforms + (sample_index * sample_transform_size);
if (needs_conversion)
convert_transforms_impl(error_metric_, convert_transforms_args_base, sample_base_local_transforms);
else
std::memcpy(sample_base_local_transforms, additive_local_pose, sample_transform_size);
apply_additive_to_base_args_raw.local_transforms = sample_raw_local_transforms;
apply_additive_to_base_args_raw.base_transforms = sample_base_local_transforms;
apply_additive_to_base_impl(error_metric_, apply_additive_to_base_args_raw, sample_raw_local_transforms);
}
local_to_object_space_args_raw.local_transforms = sample_raw_local_transforms;
uint8_t* sample_raw_object_transforms = raw_object_transforms + (sample_index * sample_transform_size);
local_to_object_space_impl(error_metric_, local_to_object_space_args_raw, sample_raw_object_transforms);
}
}
bool is_valid() const { return segment != nullptr; }
quantization_context(const quantization_context&) = delete;
quantization_context(quantization_context&&) = delete;
quantization_context& operator=(const quantization_context&) = delete;
quantization_context& operator=(quantization_context&&) = delete;
};
inline void quantize_fixed_rotation_stream(iallocator& allocator, const RotationTrackStream& raw_stream, rotation_format8 rotation_format, RotationTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected rotation sample size. %u != %zu", raw_stream.get_sample_size(), sizeof(rtm::vector4f));
const uint32_t num_samples = raw_stream.get_num_samples();
const uint32_t rotation_sample_size = get_packed_rotation_size(rotation_format);
const float sample_rate = raw_stream.get_sample_rate();
RotationTrackStream quantized_stream(allocator, num_samples, rotation_sample_size, sample_rate, rotation_format);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
const rtm::quatf rotation = raw_stream.get_raw_sample<rtm::quatf>(sample_index);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
switch (rotation_format)
{
case rotation_format8::quatf_full:
pack_vector4_128(rtm::quat_to_vector(rotation), quantized_ptr);
break;
case rotation_format8::quatf_drop_w_full:
pack_vector3_96(rtm::quat_to_vector(rotation), quantized_ptr);
break;
case rotation_format8::quatf_drop_w_variable:
default:
ACL_ASSERT(false, "Invalid or unsupported rotation format: %s", get_rotation_format_name(rotation_format));
break;
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_fixed_rotation_stream(quantization_context& context, uint32_t bone_index, rotation_format8 rotation_format)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_rotation_default)
return;
quantize_fixed_rotation_stream(context.allocator, bone_stream.rotations, rotation_format, bone_stream.rotations);
}
inline void quantize_variable_rotation_stream(quantization_context& context, const RotationTrackStream& raw_clip_stream, const RotationTrackStream& raw_segment_stream, const TrackStreamRange& clip_range, uint8_t bit_rate, RotationTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_segment_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected rotation sample size. %u != %zu", raw_segment_stream.get_sample_size(), sizeof(rtm::vector4f));
const uint32_t num_samples = is_constant_bit_rate(bit_rate) ? 1 : raw_segment_stream.get_num_samples();
const uint32_t sample_size = sizeof(uint64_t) * 2;
const float sample_rate = raw_segment_stream.get_sample_rate();
RotationTrackStream quantized_stream(context.allocator, num_samples, sample_size, sample_rate, rotation_format8::quatf_drop_w_variable, bit_rate);
if (is_constant_bit_rate(bit_rate))
{
rtm::vector4f rotation = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index);
rotation = convert_rotation(rotation, rotation_format8::quatf_full, rotation_format8::quatf_drop_w_variable);
const rtm::vector4f normalized_rotation = normalize_sample(rotation, clip_range);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);
pack_vector3_u48_unsafe(normalized_rotation, quantized_ptr);
}
else
{
const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(bit_rate);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
if (is_raw_bit_rate(bit_rate))
{
rtm::vector4f rotation = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index + sample_index);
rotation = convert_rotation(rotation, rotation_format8::quatf_full, rotation_format8::quatf_drop_w_variable);
pack_vector3_96(rotation, quantized_ptr);
}
else
{
const rtm::quatf rotation = raw_segment_stream.get_raw_sample<rtm::quatf>(sample_index);
pack_vector3_uXX_unsafe(rtm::quat_to_vector(rotation), num_bits_at_bit_rate, quantized_ptr);
}
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_variable_rotation_stream(quantization_context& context, uint32_t bone_index, uint8_t bit_rate)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_rotation_default)
return;
const BoneStreams& raw_bone_stream = context.raw_bone_streams[bone_index];
const rotation_format8 highest_bit_rate = get_highest_variant_precision(rotation_variant8::quat_drop_w);
const TrackStreamRange& bone_range = context.clip.ranges[bone_index].rotation;
// If our format is variable, we keep them fixed at the highest bit rate in the variant
if (bone_stream.is_rotation_constant)
quantize_fixed_rotation_stream(context.allocator, bone_stream.rotations, highest_bit_rate, bone_stream.rotations);
else
quantize_variable_rotation_stream(context, raw_bone_stream.rotations, bone_stream.rotations, bone_range, bit_rate, bone_stream.rotations);
}
inline void quantize_fixed_translation_stream(iallocator& allocator, const TranslationTrackStream& raw_stream, vector_format8 translation_format, TranslationTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected translation sample size. %u != %zu", raw_stream.get_sample_size(), sizeof(rtm::vector4f));
ACL_ASSERT(raw_stream.get_vector_format() == vector_format8::vector3f_full, "Expected a vector3f_full vector format, found: %s", get_vector_format_name(raw_stream.get_vector_format()));
const uint32_t num_samples = raw_stream.get_num_samples();
const uint32_t sample_size = get_packed_vector_size(translation_format);
const float sample_rate = raw_stream.get_sample_rate();
TranslationTrackStream quantized_stream(allocator, num_samples, sample_size, sample_rate, translation_format);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
const rtm::vector4f translation = raw_stream.get_raw_sample<rtm::vector4f>(sample_index);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
switch (translation_format)
{
case vector_format8::vector3f_full:
pack_vector3_96(translation, quantized_ptr);
break;
case vector_format8::vector3f_variable:
default:
ACL_ASSERT(false, "Invalid or unsupported vector format: %s", get_vector_format_name(translation_format));
break;
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_fixed_translation_stream(quantization_context& context, uint32_t bone_index, vector_format8 translation_format)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_translation_default)
return;
// Constant translation tracks store the remaining sample with full precision
const vector_format8 format = bone_stream.is_translation_constant ? vector_format8::vector3f_full : translation_format;
quantize_fixed_translation_stream(context.allocator, bone_stream.translations, format, bone_stream.translations);
}
inline void quantize_variable_translation_stream(quantization_context& context, const TranslationTrackStream& raw_clip_stream, const TranslationTrackStream& raw_segment_stream, const TrackStreamRange& clip_range, uint8_t bit_rate, TranslationTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_segment_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected translation sample size. %u != %zu", raw_segment_stream.get_sample_size(), sizeof(rtm::vector4f));
ACL_ASSERT(raw_segment_stream.get_vector_format() == vector_format8::vector3f_full, "Expected a vector3f_full vector format, found: %s", get_vector_format_name(raw_segment_stream.get_vector_format()));
const uint32_t num_samples = is_constant_bit_rate(bit_rate) ? 1 : raw_segment_stream.get_num_samples();
const uint32_t sample_size = sizeof(uint64_t) * 2;
const float sample_rate = raw_segment_stream.get_sample_rate();
TranslationTrackStream quantized_stream(context.allocator, num_samples, sample_size, sample_rate, vector_format8::vector3f_variable, bit_rate);
if (is_constant_bit_rate(bit_rate))
{
const rtm::vector4f translation = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index);
const rtm::vector4f normalized_translation = normalize_sample(translation, clip_range);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);
pack_vector3_u48_unsafe(normalized_translation, quantized_ptr);
}
else
{
const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(bit_rate);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
if (is_raw_bit_rate(bit_rate))
{
const rtm::vector4f translation = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index + sample_index);
pack_vector3_96(translation, quantized_ptr);
}
else
{
const rtm::vector4f translation = raw_segment_stream.get_raw_sample<rtm::vector4f>(sample_index);
pack_vector3_uXX_unsafe(translation, num_bits_at_bit_rate, quantized_ptr);
}
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_variable_translation_stream(quantization_context& context, uint32_t bone_index, uint8_t bit_rate)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_translation_default)
return;
const TrackStreamRange& bone_range = context.clip.ranges[bone_index].translation;
const BoneStreams& raw_bone_stream = context.raw_bone_streams[bone_index];
// Constant translation tracks store the remaining sample with full precision
if (bone_stream.is_translation_constant)
quantize_fixed_translation_stream(context.allocator, bone_stream.translations, vector_format8::vector3f_full, bone_stream.translations);
else
quantize_variable_translation_stream(context, raw_bone_stream.translations, bone_stream.translations, bone_range, bit_rate, bone_stream.translations);
}
inline void quantize_fixed_scale_stream(iallocator& allocator, const ScaleTrackStream& raw_stream, vector_format8 scale_format, ScaleTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected scale sample size. %u != %zu", raw_stream.get_sample_size(), sizeof(rtm::vector4f));
ACL_ASSERT(raw_stream.get_vector_format() == vector_format8::vector3f_full, "Expected a vector3f_full vector format, found: %s", get_vector_format_name(raw_stream.get_vector_format()));
const uint32_t num_samples = raw_stream.get_num_samples();
const uint32_t sample_size = get_packed_vector_size(scale_format);
const float sample_rate = raw_stream.get_sample_rate();
ScaleTrackStream quantized_stream(allocator, num_samples, sample_size, sample_rate, scale_format);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
const rtm::vector4f scale = raw_stream.get_raw_sample<rtm::vector4f>(sample_index);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
switch (scale_format)
{
case vector_format8::vector3f_full:
pack_vector3_96(scale, quantized_ptr);
break;
case vector_format8::vector3f_variable:
default:
ACL_ASSERT(false, "Invalid or unsupported vector format: %s", get_vector_format_name(scale_format));
break;
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_fixed_scale_stream(quantization_context& context, uint32_t bone_index, vector_format8 scale_format)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_scale_default)
return;
// Constant scale tracks store the remaining sample with full precision
const vector_format8 format = bone_stream.is_scale_constant ? vector_format8::vector3f_full : scale_format;
quantize_fixed_scale_stream(context.allocator, bone_stream.scales, format, bone_stream.scales);
}
inline void quantize_variable_scale_stream(quantization_context& context, const ScaleTrackStream& raw_clip_stream, const ScaleTrackStream& raw_segment_stream, const TrackStreamRange& clip_range, uint8_t bit_rate, ScaleTrackStream& out_quantized_stream)
{
// We expect all our samples to have the same width of sizeof(rtm::vector4f)
ACL_ASSERT(raw_segment_stream.get_sample_size() == sizeof(rtm::vector4f), "Unexpected scale sample size. %u != %zu", raw_segment_stream.get_sample_size(), sizeof(rtm::vector4f));
ACL_ASSERT(raw_segment_stream.get_vector_format() == vector_format8::vector3f_full, "Expected a vector3f_full vector format, found: %s", get_vector_format_name(raw_segment_stream.get_vector_format()));
const uint32_t num_samples = is_constant_bit_rate(bit_rate) ? 1 : raw_segment_stream.get_num_samples();
const uint32_t sample_size = sizeof(uint64_t) * 2;
const float sample_rate = raw_segment_stream.get_sample_rate();
ScaleTrackStream quantized_stream(context.allocator, num_samples, sample_size, sample_rate, vector_format8::vector3f_variable, bit_rate);
if (is_constant_bit_rate(bit_rate))
{
const rtm::vector4f scale = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index);
const rtm::vector4f normalized_scale = normalize_sample(scale, clip_range);
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);
pack_vector3_u48_unsafe(normalized_scale, quantized_ptr);
}
else
{
const uint32_t num_bits_at_bit_rate = get_num_bits_at_bit_rate(bit_rate);
for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{
uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(sample_index);
if (is_raw_bit_rate(bit_rate))
{
const rtm::vector4f scale = raw_clip_stream.get_raw_sample<rtm::vector4f>(context.segment_sample_start_index + sample_index);
pack_vector3_96(scale, quantized_ptr);
}
else
{
const rtm::vector4f scale = raw_segment_stream.get_raw_sample<rtm::vector4f>(sample_index);
pack_vector3_uXX_unsafe(scale, num_bits_at_bit_rate, quantized_ptr);
}
}
}
out_quantized_stream = std::move(quantized_stream);
}
inline void quantize_variable_scale_stream(quantization_context& context, uint32_t bone_index, uint8_t bit_rate)
{
ACL_ASSERT(bone_index < context.num_bones, "Invalid bone index: %u", bone_index);
BoneStreams& bone_stream = context.bone_streams[bone_index];
// Default tracks aren't quantized
if (bone_stream.is_scale_default)
return;
const TrackStreamRange& bone_range = context.clip.ranges[bone_index].scale;
const BoneStreams& raw_bone_stream = context.raw_bone_streams[bone_index];
// Constant scale tracks store the remaining sample with full precision
if (bone_stream.is_scale_constant)
quantize_fixed_scale_stream(context.allocator, bone_stream.scales, vector_format8::vector3f_full, bone_stream.scales);
else
quantize_variable_scale_stream(context, raw_bone_stream.scales, bone_stream.scales, bone_range, bit_rate, bone_stream.scales);
}
enum class error_scan_stop_condition { until_error_too_high, until_end_of_segment };
inline float calculate_max_error_at_bit_rate_local(quantization_context& context, uint32_t target_bone_index, error_scan_stop_condition stop_condition)
{
const itransform_error_metric* error_metric = context.error_metric;
const bool needs_conversion = context.needs_conversion;
const bool has_additive_base = context.has_additive_base;
const transform_metadata& target_bone = context.metadata[target_bone_index];
const uint32_t num_transforms = context.num_bones;
const size_t sample_transform_size = context.metric_transform_size * context.num_bones;
const float sample_rate = context.sample_rate;
const float clip_duration = context.clip_duration;
const rtm::scalarf error_threshold = rtm::scalar_set(context.error_threshold);
const auto convert_transforms_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::convert_transforms : &itransform_error_metric::convert_transforms_no_scale);
const auto apply_additive_to_base_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::apply_additive_to_base : &itransform_error_metric::apply_additive_to_base_no_scale);
const auto calculate_error_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::calculate_error : &itransform_error_metric::calculate_error_no_scale);
itransform_error_metric::convert_transforms_args convert_transforms_args_lossy;
convert_transforms_args_lossy.dirty_transform_indices = &target_bone_index;
convert_transforms_args_lossy.num_dirty_transforms = 1;
convert_transforms_args_lossy.transforms = context.lossy_local_pose;
convert_transforms_args_lossy.num_transforms = num_transforms;
itransform_error_metric::apply_additive_to_base_args apply_additive_to_base_args_lossy;
apply_additive_to_base_args_lossy.dirty_transform_indices = &target_bone_index;
apply_additive_to_base_args_lossy.num_dirty_transforms = 1;
apply_additive_to_base_args_lossy.local_transforms = needs_conversion ? (const void*)context.local_transforms_converted : (const void*)context.lossy_local_pose;
apply_additive_to_base_args_lossy.base_transforms = nullptr;
apply_additive_to_base_args_lossy.num_transforms = num_transforms;
itransform_error_metric::calculate_error_args calculate_error_args;
calculate_error_args.transform0 = nullptr;
calculate_error_args.transform1 = needs_conversion ? (const void*)(context.local_transforms_converted + (context.metric_transform_size * target_bone_index)) : (const void*)(context.lossy_local_pose + target_bone_index);
calculate_error_args.construct_sphere_shell(target_bone.shell_distance);
const uint8_t* raw_transform = context.raw_local_transforms + (target_bone_index * context.metric_transform_size);
const uint8_t* base_transforms = context.base_local_transforms;
context.local_query.build(target_bone_index, context.bit_rate_per_bone[target_bone_index]);
float sample_indexf = float(context.segment_sample_start_index);
rtm::scalarf max_error = rtm::scalar_set(0.0F);
for (uint32_t sample_index = 0; sample_index < context.num_samples; ++sample_index)
{
// Sample our streams and calculate the error
// The sample time is calculated from the full clip duration to be consistent with decompression
const float sample_time = rtm::scalar_min(sample_indexf / sample_rate, clip_duration);
context.bit_rate_database.sample(context.local_query, sample_time, context.lossy_local_pose, num_transforms);
if (needs_conversion)
convert_transforms_impl(error_metric, convert_transforms_args_lossy, context.local_transforms_converted);
if (has_additive_base)
{
apply_additive_to_base_args_lossy.base_transforms = base_transforms;
base_transforms += sample_transform_size;
apply_additive_to_base_impl(error_metric, apply_additive_to_base_args_lossy, context.lossy_local_pose);
}
calculate_error_args.transform0 = raw_transform;
raw_transform += sample_transform_size;
const rtm::scalarf error = calculate_error_impl(error_metric, calculate_error_args);
max_error = rtm::scalar_max(max_error, error);
if (stop_condition == error_scan_stop_condition::until_error_too_high && rtm::scalar_greater_equal(error, error_threshold))
break;
sample_indexf += 1.0F;
}
return rtm::scalar_cast(max_error);
}
inline float calculate_max_error_at_bit_rate_object(quantization_context& context, uint32_t target_bone_index, error_scan_stop_condition stop_condition)
{
const itransform_error_metric* error_metric = context.error_metric;
const bool needs_conversion = context.needs_conversion;
const bool has_additive_base = context.has_additive_base;
const transform_metadata& target_bone = context.metadata[target_bone_index];
const size_t sample_transform_size = context.metric_transform_size * context.num_bones;
const float sample_rate = context.sample_rate;
const float clip_duration = context.clip_duration;
const rtm::scalarf error_threshold = rtm::scalar_set(context.error_threshold);
const auto convert_transforms_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::convert_transforms : &itransform_error_metric::convert_transforms_no_scale);
const auto apply_additive_to_base_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::apply_additive_to_base : &itransform_error_metric::apply_additive_to_base_no_scale);
const auto local_to_object_space_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::local_to_object_space : &itransform_error_metric::local_to_object_space_no_scale);
const auto calculate_error_impl = std::mem_fn(context.has_scale ? &itransform_error_metric::calculate_error : &itransform_error_metric::calculate_error_no_scale);
itransform_error_metric::convert_transforms_args convert_transforms_args_lossy;
convert_transforms_args_lossy.dirty_transform_indices = context.chain_bone_indices;
convert_transforms_args_lossy.num_dirty_transforms = context.num_bones_in_chain;
convert_transforms_args_lossy.transforms = context.lossy_local_pose;
convert_transforms_args_lossy.num_transforms = context.num_bones;
itransform_error_metric::apply_additive_to_base_args apply_additive_to_base_args_lossy;
apply_additive_to_base_args_lossy.dirty_transform_indices = context.chain_bone_indices;
apply_additive_to_base_args_lossy.num_dirty_transforms = context.num_bones_in_chain;
apply_additive_to_base_args_lossy.local_transforms = needs_conversion ? (const void*)(context.local_transforms_converted) : (const void*)context.lossy_local_pose;
apply_additive_to_base_args_lossy.base_transforms = nullptr;
apply_additive_to_base_args_lossy.num_transforms = context.num_bones;
itransform_error_metric::local_to_object_space_args local_to_object_space_args_lossy;
local_to_object_space_args_lossy.dirty_transform_indices = context.chain_bone_indices;
local_to_object_space_args_lossy.num_dirty_transforms = context.num_bones_in_chain;
local_to_object_space_args_lossy.parent_transform_indices = context.parent_transform_indices;
local_to_object_space_args_lossy.local_transforms = needs_conversion ? (const void*)(context.local_transforms_converted) : (const void*)context.lossy_local_pose;
local_to_object_space_args_lossy.num_transforms = context.num_bones;
itransform_error_metric::calculate_error_args calculate_error_args;
calculate_error_args.transform0 = nullptr;
calculate_error_args.transform1 = context.lossy_object_pose + (target_bone_index * context.metric_transform_size);
calculate_error_args.construct_sphere_shell(target_bone.shell_distance);
const uint8_t* raw_transform = context.raw_object_transforms + (target_bone_index * context.metric_transform_size);
const uint8_t* base_transforms = context.base_local_transforms;
context.object_query.build(target_bone_index, context.bit_rate_per_bone, context.bone_streams);
float sample_indexf = float(context.segment_sample_start_index);
rtm::scalarf max_error = rtm::scalar_set(0.0F);
for (uint32_t sample_index = 0; sample_index < context.num_samples; ++sample_index)
{
// Sample our streams and calculate the error
// The sample time is calculated from the full clip duration to be consistent with decompression
const float sample_time = rtm::scalar_min(sample_indexf / sample_rate, clip_duration);
context.bit_rate_database.sample(context.object_query, sample_time, context.lossy_local_pose, context.num_bones);
if (needs_conversion)
convert_transforms_impl(error_metric, convert_transforms_args_lossy, context.local_transforms_converted);
if (has_additive_base)
{
apply_additive_to_base_args_lossy.base_transforms = base_transforms;
base_transforms += sample_transform_size;
apply_additive_to_base_impl(error_metric, apply_additive_to_base_args_lossy, context.lossy_local_pose);
}
local_to_object_space_impl(error_metric, local_to_object_space_args_lossy, context.lossy_object_pose);
calculate_error_args.transform0 = raw_transform;
raw_transform += sample_transform_size;
const rtm::scalarf error = calculate_error_impl(error_metric, calculate_error_args);
max_error = rtm::scalar_max(max_error, error);
if (stop_condition == error_scan_stop_condition::until_error_too_high && rtm::scalar_greater_equal(error, error_threshold))
break;
sample_indexf += 1.0F;
}
return rtm::scalar_cast(max_error);
}
inline void calculate_local_space_bit_rates(quantization_context& context)
{
// To minimize the bit rate, we first start by trying every permutation in local space
// until our error is acceptable.
// We try permutations from the lowest memory footprint to the highest.
const uint32_t num_bones = context.num_bones;
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
// Update our error threshold
const float error_threshold = context.metadata[bone_index].precision;
context.error_threshold = error_threshold;
// Bit rates at this point are one of three value:
// 0: if the segment track is normalized, it can be constant within the segment
// 1: if the segment track isn't normalized, it starts at the lowest bit rate
// 255: if the track is constant/default for the whole clip
const BoneBitRate bone_bit_rates = context.bit_rate_per_bone[bone_index];
if (bone_bit_rates.rotation == k_invalid_bit_rate && bone_bit_rates.translation == k_invalid_bit_rate && bone_bit_rates.scale == k_invalid_bit_rate)
{
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
printf("%u: Best bit rates: %u | %u | %u\n", bone_index, bone_bit_rates.rotation, bone_bit_rates.translation, bone_bit_rates.scale);
#endif
continue; // Every track bit rate is constant/default, nothing else to do
}
BoneBitRate best_bit_rates = bone_bit_rates;
float best_error = 1.0E10F;
uint32_t prev_transform_size = ~0U;
bool is_error_good_enough = false;
if (context.has_scale)
{
const size_t num_permutations = get_array_size(acl_impl::k_local_bit_rate_permutations);
for (size_t permutation_index = 0; permutation_index < num_permutations; ++permutation_index)
{
const uint8_t rotation_bit_rate = acl_impl::k_local_bit_rate_permutations[permutation_index][0];
if (bone_bit_rates.rotation == 1)
{
if (rotation_bit_rate == 0)
continue; // Skip permutations we aren't interested in
}
else if (bone_bit_rates.rotation == k_invalid_bit_rate)
{
if (rotation_bit_rate != 0)
continue; // Skip permutations we aren't interested in
}
const uint8_t translation_bit_rate = acl_impl::k_local_bit_rate_permutations[permutation_index][1];
if (bone_bit_rates.translation == 1)
{
if (translation_bit_rate == 0)
continue; // Skip permutations we aren't interested in
}
else if (bone_bit_rates.translation == k_invalid_bit_rate)
{
if (translation_bit_rate != 0)
continue; // Skip permutations we aren't interested in
}
const uint8_t scale_bit_rate = acl_impl::k_local_bit_rate_permutations[permutation_index][2];
if (bone_bit_rates.scale == 1)
{
if (scale_bit_rate == 0)
continue; // Skip permutations we aren't interested in
}
else if (bone_bit_rates.scale == k_invalid_bit_rate)
{
if (scale_bit_rate != 0)
continue; // Skip permutations we aren't interested in
}
const uint32_t rotation_size = get_num_bits_at_bit_rate(rotation_bit_rate);
const uint32_t translation_size = get_num_bits_at_bit_rate(translation_bit_rate);
const uint32_t scale_size = get_num_bits_at_bit_rate(scale_bit_rate);
const uint32_t transform_size = rotation_size + translation_size + scale_size;
if (transform_size != prev_transform_size && is_error_good_enough)
{
// We already found the lowest transform size and we tried every permutation with that same size
break;
}
prev_transform_size = transform_size;
context.bit_rate_per_bone[bone_index].rotation = bone_bit_rates.rotation != k_invalid_bit_rate ? rotation_bit_rate : k_invalid_bit_rate;
context.bit_rate_per_bone[bone_index].translation = bone_bit_rates.translation != k_invalid_bit_rate ? translation_bit_rate : k_invalid_bit_rate;
context.bit_rate_per_bone[bone_index].scale = bone_bit_rates.scale != k_invalid_bit_rate ? scale_bit_rate : k_invalid_bit_rate;
const float error = calculate_max_error_at_bit_rate_local(context, bone_index, error_scan_stop_condition::until_error_too_high);
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION > 1
printf("%u: %u | %u | %u (%u) = %f\n", bone_index, rotation_bit_rate, translation_bit_rate, scale_bit_rate, transform_size, error);
#endif
if (error < best_error)
{
best_error = error;
best_bit_rates = context.bit_rate_per_bone[bone_index];
is_error_good_enough = error < error_threshold;
}
}
}
else
{
const size_t num_permutations = get_array_size(acl_impl::k_local_bit_rate_permutations_no_scale);
for (size_t permutation_index = 0; permutation_index < num_permutations; ++permutation_index)
{
const uint8_t rotation_bit_rate = acl_impl::k_local_bit_rate_permutations_no_scale[permutation_index][0];
if (bone_bit_rates.rotation == 1)
{
if (rotation_bit_rate == 0)
continue; // Skip permutations we aren't interested in
}
else if (bone_bit_rates.rotation == k_invalid_bit_rate)
{
if (rotation_bit_rate != 0)
continue; // Skip permutations we aren't interested in
}
const uint8_t translation_bit_rate = acl_impl::k_local_bit_rate_permutations_no_scale[permutation_index][1];
if (bone_bit_rates.translation == 1)
{
if (translation_bit_rate == 0)
continue; // Skip permutations we aren't interested in
}
else if (bone_bit_rates.translation == k_invalid_bit_rate)
{
if (translation_bit_rate != 0)
continue; // Skip permutations we aren't interested in
}
const uint32_t rotation_size = get_num_bits_at_bit_rate(rotation_bit_rate);
const uint32_t translation_size = get_num_bits_at_bit_rate(translation_bit_rate);
const uint32_t transform_size = rotation_size + translation_size;
if (transform_size != prev_transform_size && is_error_good_enough)
{
// We already found the lowest transform size and we tried every permutation with that same size
break;
}
prev_transform_size = transform_size;
context.bit_rate_per_bone[bone_index].rotation = bone_bit_rates.rotation != k_invalid_bit_rate ? rotation_bit_rate : k_invalid_bit_rate;
context.bit_rate_per_bone[bone_index].translation = bone_bit_rates.translation != k_invalid_bit_rate ? translation_bit_rate : k_invalid_bit_rate;
const float error = calculate_max_error_at_bit_rate_local(context, bone_index, error_scan_stop_condition::until_error_too_high);
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION > 1
printf("%u: %u | %u | %u (%u) = %f\n", bone_index, rotation_bit_rate, translation_bit_rate, k_invalid_bit_rate, transform_size, error);
#endif
if (error < best_error)
{
best_error = error;
best_bit_rates = context.bit_rate_per_bone[bone_index];
is_error_good_enough = error < error_threshold;
}
}
}
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
printf("%u: Best bit rates: %u | %u | %u\n", bone_index, best_bit_rates.rotation, best_bit_rates.translation, best_bit_rates.scale);
#endif
context.bit_rate_per_bone[bone_index] = best_bit_rates;
}
}
constexpr uint32_t increment_and_clamp_bit_rate(uint32_t bit_rate, uint32_t increment)
{
return bit_rate >= k_highest_bit_rate ? bit_rate : std::min<uint32_t>(bit_rate + increment, k_highest_bit_rate);
}
inline float increase_bone_bit_rate(quantization_context& context, uint32_t bone_index, uint32_t num_increments, float old_error, BoneBitRate& out_best_bit_rates)
{
const BoneBitRate bone_bit_rates = context.bit_rate_per_bone[bone_index];
const uint32_t num_scale_increments = context.has_scale ? num_increments : 0;
BoneBitRate best_bit_rates = bone_bit_rates;
float best_error = old_error;
for (uint32_t rotation_increment = 0; rotation_increment <= num_increments; ++rotation_increment)
{
const uint32_t rotation_bit_rate = increment_and_clamp_bit_rate(bone_bit_rates.rotation, rotation_increment);
for (uint32_t translation_increment = 0; translation_increment <= num_increments; ++translation_increment)
{
const uint32_t translation_bit_rate = increment_and_clamp_bit_rate(bone_bit_rates.translation, translation_increment);
for (uint32_t scale_increment = 0; scale_increment <= num_scale_increments; ++scale_increment)
{
const uint32_t scale_bit_rate = increment_and_clamp_bit_rate(bone_bit_rates.scale, scale_increment);
if (rotation_increment + translation_increment + scale_increment != num_increments)
{
if (scale_bit_rate >= k_highest_bit_rate)
break;
else
continue;
}
context.bit_rate_per_bone[bone_index] = BoneBitRate{ (uint8_t)rotation_bit_rate, (uint8_t)translation_bit_rate, (uint8_t)scale_bit_rate };
const float error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_error_too_high);
if (error < best_error)
{
best_error = error;
best_bit_rates = context.bit_rate_per_bone[bone_index];
}
context.bit_rate_per_bone[bone_index] = bone_bit_rates;
if (scale_bit_rate >= k_highest_bit_rate)
break;
}
if (translation_bit_rate >= k_highest_bit_rate)
break;
}
if (rotation_bit_rate >= k_highest_bit_rate)
break;
}
out_best_bit_rates = best_bit_rates;
return best_error;
}
inline float calculate_bone_permutation_error(quantization_context& context, BoneBitRate* permutation_bit_rates, uint8_t* bone_chain_permutation, uint32_t bone_index, BoneBitRate* best_bit_rates, float old_error)
{
const float error_threshold = context.error_threshold;
float best_error = old_error;
do
{
// Copy our current bit rates to the permutation rates
std::memcpy(permutation_bit_rates, context.bit_rate_per_bone, sizeof(BoneBitRate) * context.num_bones);
bool is_permutation_valid = false;
const uint32_t num_bones_in_chain = context.num_bones_in_chain;
for (uint32_t chain_link_index = 0; chain_link_index < num_bones_in_chain; ++chain_link_index)
{
if (bone_chain_permutation[chain_link_index] != 0)
{
// Increase bit rate
const uint32_t chain_bone_index = context.chain_bone_indices[chain_link_index];
BoneBitRate chain_bone_best_bit_rates;
increase_bone_bit_rate(context, chain_bone_index, bone_chain_permutation[chain_link_index], old_error, chain_bone_best_bit_rates);
is_permutation_valid |= chain_bone_best_bit_rates.rotation != permutation_bit_rates[chain_bone_index].rotation;
is_permutation_valid |= chain_bone_best_bit_rates.translation != permutation_bit_rates[chain_bone_index].translation;
is_permutation_valid |= chain_bone_best_bit_rates.scale != permutation_bit_rates[chain_bone_index].scale;
permutation_bit_rates[chain_bone_index] = chain_bone_best_bit_rates;
}
}
if (!is_permutation_valid)
continue; // Couldn't increase any bit rate, skip this permutation
// Measure error
std::swap(context.bit_rate_per_bone, permutation_bit_rates);
const float permutation_error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_error_too_high);
std::swap(context.bit_rate_per_bone, permutation_bit_rates);
if (permutation_error < best_error)
{
best_error = permutation_error;
std::memcpy(best_bit_rates, permutation_bit_rates, sizeof(BoneBitRate) * context.num_bones);
if (permutation_error < error_threshold)
break;
}
} while (std::next_permutation(bone_chain_permutation, bone_chain_permutation + context.num_bones_in_chain));
return best_error;
}
inline uint32_t calculate_bone_chain_indices(const clip_context& clip, uint32_t bone_index, uint32_t* out_chain_bone_indices)
{
const BoneChain bone_chain = clip.get_bone_chain(bone_index);
uint32_t num_bones_in_chain = 0;
for (uint32_t chain_bone_index : bone_chain)
out_chain_bone_indices[num_bones_in_chain++] = chain_bone_index;
return num_bones_in_chain;
}
inline void initialize_bone_bit_rates(const SegmentContext& segment, rotation_format8 rotation_format, vector_format8 translation_format, vector_format8 scale_format, BoneBitRate* out_bit_rate_per_bone)
{
const bool is_rotation_variable = is_rotation_format_variable(rotation_format);
const bool is_translation_variable = is_vector_format_variable(translation_format);
const bool is_scale_variable = segment_context_has_scale(segment) && is_vector_format_variable(scale_format);
const uint32_t num_bones = segment.num_bones;
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
BoneBitRate& bone_bit_rate = out_bit_rate_per_bone[bone_index];
const bool rotation_supports_constant_tracks = segment.are_rotations_normalized;
if (is_rotation_variable && !segment.bone_streams[bone_index].is_rotation_constant)
bone_bit_rate.rotation = rotation_supports_constant_tracks ? 0 : k_lowest_bit_rate;
else
bone_bit_rate.rotation = k_invalid_bit_rate;
const bool translation_supports_constant_tracks = segment.are_translations_normalized;
if (is_translation_variable && !segment.bone_streams[bone_index].is_translation_constant)
bone_bit_rate.translation = translation_supports_constant_tracks ? 0 : k_lowest_bit_rate;
else
bone_bit_rate.translation = k_invalid_bit_rate;
const bool scale_supports_constant_tracks = segment.are_scales_normalized;
if (is_scale_variable && !segment.bone_streams[bone_index].is_scale_constant)
bone_bit_rate.scale = scale_supports_constant_tracks ? 0 : k_lowest_bit_rate;
else
bone_bit_rate.scale = k_invalid_bit_rate;
}
}
inline void quantize_all_streams(quantization_context& context)
{
ACL_ASSERT(context.is_valid(), "quantization_context isn't valid");
const bool is_rotation_variable = is_rotation_format_variable(context.rotation_format);
const bool is_translation_variable = is_vector_format_variable(context.translation_format);
const bool is_scale_variable = is_vector_format_variable(context.scale_format);
for (uint32_t bone_index = 0; bone_index < context.num_bones; ++bone_index)
{
const BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[bone_index];
if (is_rotation_variable)
quantize_variable_rotation_stream(context, bone_index, bone_bit_rate.rotation);
else
quantize_fixed_rotation_stream(context, bone_index, context.rotation_format);
if (is_translation_variable)
quantize_variable_translation_stream(context, bone_index, bone_bit_rate.translation);
else
quantize_fixed_translation_stream(context, bone_index, context.translation_format);
if (context.has_scale)
{
if (is_scale_variable)
quantize_variable_scale_stream(context, bone_index, bone_bit_rate.scale);
else
quantize_fixed_scale_stream(context, bone_index, context.scale_format);
}
}
}
inline void find_optimal_bit_rates(quantization_context& context)
{
ACL_ASSERT(context.is_valid(), "quantization_context isn't valid");
initialize_bone_bit_rates(*context.segment, context.rotation_format, context.translation_format, context.scale_format, context.bit_rate_per_bone);
// First iterate over all bones and find the optimal bit rate for each track using the local space error.
// We use the local space error to prime the algorithm. If each parent bone has infinite precision,
// the local space error is equivalent. Since parents are lossy, it is a good approximation. It means
// that whatever bit rate we find for a bone, it cannot be lower to reach our error threshold since
// a lossy parent means we need to be equally or more accurate to maintain the threshold.
//
// In practice, the error from a child can compensate the error introduced by the parent but
// this is unlikely to hold true for a whole track at every key. We thus make the assumption
// that increasing the precision is always good regardless of the hierarchy level.
calculate_local_space_bit_rates(context);
// Now that we found an approximate lower bound for the bit rates, we start at the root and perform a brute force search.
// For each bone, we do the following:
// - If object space error meets our error threshold, do nothing
// - Iterate over each bone in the chain and increment the bit rate by 1 (rotation or translation, pick lowest error)
// - Pick the bone that improved the error the most and increment the bit rate by 1
// - Repeat until we meet our error threshold
//
// The root is already optimal from the previous step since the local space error is equal to the object space error.
// Next we'll add one bone to the chain under the root. Performing the above steps, we perform an exhaustive search
// to find the smallest memory footprint that will meet our error threshold. No combination with a lower memory footprint
// could yield a smaller error.
// Next we'll add another bone to the chain. By performing these steps recursively, we can ensure that the accuracy always
// increases and the memory footprint is always as low as possible.
// 3 bone chain expansion:
// 3: [bone 0] + 1 [bone 1] + 0 [bone 2] + 0 (3)
// [bone 0] + 0 [bone 1] + 1 [bone 2] + 0 (3)
// [bone 0] + 0 [bone 1] + 0 [bone 2] + 1 (3)
// 6: [bone 0] + 2 [bone 1] + 0 [bone 2] + 0 (6)
// [bone 0] + 1 [bone 1] + 1 [bone 2] + 0 (6)
// [bone 0] + 1 [bone 1] + 0 [bone 2] + 1 (6)
// [bone 0] + 0 [bone 1] + 1 [bone 2] + 1 (6)
// [bone 0] + 0 [bone 1] + 2 [bone 2] + 0 (6)
// [bone 0] + 0 [bone 1] + 0 [bone 2] + 2 (6)
//10: [bone 0] + 3 [bone 1] + 0 [bone 2] + 0 (9)
// [bone 0] + 2 [bone 1] + 1 [bone 2] + 0 (9)
// [bone 0] + 2 [bone 1] + 0 [bone 2] + 1 (9)
// [bone 0] + 1 [bone 1] + 2 [bone 2] + 0 (9)
// [bone 0] + 1 [bone 1] + 1 [bone 2] + 1 (9)
// [bone 0] + 1 [bone 1] + 0 [bone 2] + 2 (9)
// [bone 0] + 0 [bone 1] + 3 [bone 2] + 0 (9)
// [bone 0] + 0 [bone 1] + 2 [bone 2] + 1 (9)
// [bone 0] + 0 [bone 1] + 1 [bone 2] + 2 (9)
// [bone 0] + 0 [bone 1] + 0 [bone 2] + 3 (9)
uint8_t* bone_chain_permutation = allocate_type_array<uint8_t>(context.allocator, context.num_bones);
BoneBitRate* permutation_bit_rates = allocate_type_array<BoneBitRate>(context.allocator, context.num_bones);
BoneBitRate* best_permutation_bit_rates = allocate_type_array<BoneBitRate>(context.allocator, context.num_bones);
BoneBitRate* best_bit_rates = allocate_type_array<BoneBitRate>(context.allocator, context.num_bones);
std::memcpy(best_bit_rates, context.bit_rate_per_bone, sizeof(BoneBitRate) * context.num_bones);
const uint32_t num_bones = context.num_bones;
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
// Update our context with the new bone data
const float error_threshold = context.metadata[bone_index].precision;
context.error_threshold = error_threshold;
const uint32_t num_bones_in_chain = calculate_bone_chain_indices(context.clip, bone_index, context.chain_bone_indices);
context.num_bones_in_chain = num_bones_in_chain;
float error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_error_too_high);
if (error < error_threshold)
continue;
const float initial_error = error;
while (error >= error_threshold)
{
// Generate permutations for up to 3 bit rate increments
// Perform an exhaustive search of the permutations and pick the best result
// If our best error is under the threshold, we are done, otherwise we will try again from there
const float original_error = error;
float best_error = error;
// The first permutation increases the bit rate of a single track/bone
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 1] = 1;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
if (context.compression_level >= compression_level8::high)
{
// The second permutation increases the bit rate of 2 track/bones
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 1] = 2;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
if (num_bones_in_chain > 1)
{
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 2] = 1;
bone_chain_permutation[num_bones_in_chain - 1] = 1;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
}
}
if (context.compression_level >= compression_level8::highest)
{
// The third permutation increases the bit rate of 3 track/bones
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 1] = 3;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
if (num_bones_in_chain > 1)
{
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 2] = 2;
bone_chain_permutation[num_bones_in_chain - 1] = 1;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
if (num_bones_in_chain > 2)
{
std::fill(bone_chain_permutation, bone_chain_permutation + num_bones, uint8_t(0));
bone_chain_permutation[num_bones_in_chain - 3] = 1;
bone_chain_permutation[num_bones_in_chain - 2] = 1;
bone_chain_permutation[num_bones_in_chain - 1] = 1;
error = calculate_bone_permutation_error(context, permutation_bit_rates, bone_chain_permutation, bone_index, best_permutation_bit_rates, original_error);
if (error < best_error)
{
best_error = error;
std::memcpy(best_bit_rates, best_permutation_bit_rates, sizeof(BoneBitRate) * num_bones);
if (error < error_threshold)
break;
}
}
}
}
if (best_error >= original_error)
break; // No progress made
error = best_error;
if (error < original_error)
{
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
std::swap(context.bit_rate_per_bone, best_bit_rates);
float new_error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
std::swap(context.bit_rate_per_bone, best_bit_rates);
for (uint32_t i = 0; i < context.num_bones; ++i)
{
const BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[i];
const BoneBitRate& best_bone_bit_rate = best_bit_rates[i];
bool rotation_differs = bone_bit_rate.rotation != best_bone_bit_rate.rotation;
bool translation_differs = bone_bit_rate.translation != best_bone_bit_rate.translation;
bool scale_differs = bone_bit_rate.scale != best_bone_bit_rate.scale;
if (rotation_differs || translation_differs || scale_differs)
printf("%u: %u | %u | %u => %u %u %u (%f)\n", i, bone_bit_rate.rotation, bone_bit_rate.translation, bone_bit_rate.scale, best_bone_bit_rate.rotation, best_bone_bit_rate.translation, best_bone_bit_rate.scale, new_error);
}
#endif
std::memcpy(context.bit_rate_per_bone, best_bit_rates, sizeof(BoneBitRate) * num_bones);
}
}
if (error < initial_error)
{
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
std::swap(context.bit_rate_per_bone, best_bit_rates);
float new_error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
std::swap(context.bit_rate_per_bone, best_bit_rates);
for (uint32_t i = 0; i < context.num_bones; ++i)
{
const BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[i];
const BoneBitRate& best_bone_bit_rate = best_bit_rates[i];
bool rotation_differs = bone_bit_rate.rotation != best_bone_bit_rate.rotation;
bool translation_differs = bone_bit_rate.translation != best_bone_bit_rate.translation;
bool scale_differs = bone_bit_rate.scale != best_bone_bit_rate.scale;
if (rotation_differs || translation_differs || scale_differs)
printf("%u: %u | %u | %u => %u %u %u (%f)\n", i, bone_bit_rate.rotation, bone_bit_rate.translation, bone_bit_rate.scale, best_bone_bit_rate.rotation, best_bone_bit_rate.translation, best_bone_bit_rate.scale, new_error);
}
#endif
std::memcpy(context.bit_rate_per_bone, best_bit_rates, sizeof(BoneBitRate) * num_bones);
}
// Our error remains too high, this should be rare.
// Attempt to increase the bit rate as much as we can while still back tracking if it doesn't help.
error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
while (error >= error_threshold)
{
// From child to parent, increase the bit rate indiscriminately
uint32_t num_maxed_out = 0;
for (int32_t chain_link_index = num_bones_in_chain - 1; chain_link_index >= 0; --chain_link_index)
{
const uint32_t chain_bone_index = context.chain_bone_indices[chain_link_index];
// Work with a copy. We'll increase the bit rate as much as we can and retain the values
// that yield the smallest error BUT increasing the bit rate does NOT always means
// that the error will reduce and improve. It could get worse in which case we'll do nothing.
BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[chain_bone_index];
// Copy original values
BoneBitRate best_bone_bit_rate = bone_bit_rate;
float best_bit_rate_error = error;
while (error >= error_threshold)
{
static_assert(offsetof(BoneBitRate, rotation) == 0 && offsetof(BoneBitRate, scale) == sizeof(BoneBitRate) - 1, "Invalid BoneBitRate offsets");
uint8_t& smallest_bit_rate = *std::min_element<uint8_t*>(&bone_bit_rate.rotation, &bone_bit_rate.scale + 1);
if (smallest_bit_rate >= k_highest_bit_rate)
{
num_maxed_out++;
break;
}
// If rotation == translation and translation has room, bias translation
// This seems to yield an overall tiny win but it isn't always the case.
// TODO: Brute force this?
if (bone_bit_rate.rotation == bone_bit_rate.translation && bone_bit_rate.translation < k_highest_bit_rate && bone_bit_rate.scale >= k_highest_bit_rate)
bone_bit_rate.translation++;
else
smallest_bit_rate++;
ACL_ASSERT((bone_bit_rate.rotation <= k_highest_bit_rate || bone_bit_rate.rotation == k_invalid_bit_rate) && (bone_bit_rate.translation <= k_highest_bit_rate || bone_bit_rate.translation == k_invalid_bit_rate) && (bone_bit_rate.scale <= k_highest_bit_rate || bone_bit_rate.scale == k_invalid_bit_rate), "Invalid bit rate! [%u, %u, %u]", bone_bit_rate.rotation, bone_bit_rate.translation, bone_bit_rate.scale);
error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
if (error < best_bit_rate_error)
{
best_bone_bit_rate = bone_bit_rate;
best_bit_rate_error = error;
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
printf("%u: => %u %u %u (%f)\n", chain_bone_index, bone_bit_rate.rotation, bone_bit_rate.translation, bone_bit_rate.scale, error);
for (uint32_t i = chain_link_index + 1; i < num_bones_in_chain; ++i)
{
const uint32_t chain_bone_index2 = context.chain_bone_indices[chain_link_index];
float error2 = calculate_max_error_at_bit_rate_object(context, chain_bone_index2, error_scan_stop_condition::until_end_of_segment);
printf(" %u: => (%f)\n", i, error2);
}
#endif
}
}
// Only retain the lowest error bit rates
bone_bit_rate = best_bone_bit_rate;
error = best_bit_rate_error;
if (error < error_threshold)
break;
}
if (num_maxed_out == num_bones_in_chain)
break;
// TODO: Try to lower the bit rate again in the reverse direction?
}
// Despite our best efforts, we failed to meet the threshold with our heuristics.
// No longer attempt to find what is best for size, max out the bit rates until we meet the threshold.
// Only do this if the rotation format is full precision quaternions. This last step is not guaranteed
// to reach the error threshold but it will very likely increase the memory footprint. Even if we do
// reach the error threshold for the given bone, another sibling bone already processed might now
// have an error higher than it used to if quantization caused its error to compensate. More often than
// not, sibling bones will remain fairly close in their error. Some packed rotation formats, namely
// drop W component can have a high error even with raw values, it is assumed that if such a format
// is used then a best effort approach to reach the error threshold is entirely fine.
if (error >= error_threshold && context.rotation_format == rotation_format8::quatf_full)
{
// From child to parent, max out the bit rate
for (int32_t chain_link_index = num_bones_in_chain - 1; chain_link_index >= 0; --chain_link_index)
{
const uint32_t chain_bone_index = context.chain_bone_indices[chain_link_index];
BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[chain_bone_index];
bone_bit_rate.rotation = std::max<uint8_t>(bone_bit_rate.rotation, k_highest_bit_rate);
bone_bit_rate.translation = std::max<uint8_t>(bone_bit_rate.translation, k_highest_bit_rate);
bone_bit_rate.scale = std::max<uint8_t>(bone_bit_rate.scale, k_highest_bit_rate);
error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
if (error < error_threshold)
break;
}
}
}
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
printf("Variable quantization optimization results:\n");
for (uint32_t bone_index = 0; bone_index < num_bones; ++bone_index)
{
// Update our context with the new bone data
const float error_threshold = context.metadata[bone_index].precision;
context.error_threshold = error_threshold;
const uint32_t num_bones_in_chain = calculate_bone_chain_indices(context.clip, bone_index, context.chain_bone_indices);
context.num_bones_in_chain = num_bones_in_chain;
float error = calculate_max_error_at_bit_rate_object(context, bone_index, error_scan_stop_condition::until_end_of_segment);
const BoneBitRate& bone_bit_rate = context.bit_rate_per_bone[bone_index];
printf("%u: %u | %u | %u => %f %s\n", bone_index, bone_bit_rate.rotation, bone_bit_rate.translation, bone_bit_rate.scale, error, error >= error_threshold ? "!" : "");
}
#endif
deallocate_type_array(context.allocator, bone_chain_permutation, num_bones);
deallocate_type_array(context.allocator, permutation_bit_rates, num_bones);
deallocate_type_array(context.allocator, best_permutation_bit_rates, num_bones);
deallocate_type_array(context.allocator, best_bit_rates, num_bones);
}
inline void quantize_streams(iallocator& allocator, clip_context& clip, const compression_settings& settings, const clip_context& raw_clip_context, const clip_context& additive_base_clip_context, output_stats& out_stats)
{
(void)out_stats;
const bool is_rotation_variable = is_rotation_format_variable(settings.rotation_format);
const bool is_translation_variable = is_vector_format_variable(settings.translation_format);
const bool is_scale_variable = is_vector_format_variable(settings.scale_format);
const bool is_any_variable = is_rotation_variable || is_translation_variable || is_scale_variable;
quantization_context context(allocator, clip, raw_clip_context, additive_base_clip_context, settings);
for (SegmentContext& segment : clip.segment_iterator())
{
#if ACL_IMPL_DEBUG_VARIABLE_QUANTIZATION
printf("Quantizing segment %u...\n", segment.segment_index);
#endif
#if ACL_IMPL_PROFILE_MATH
{
scope_profiler timer;
for (int32_t i = 0; i < 10; ++i)
{
context.set_segment(segment);
if (is_any_variable)
find_optimal_bit_rates(context);
}
timer.stop();
#if defined(__ANDROID__)
__android_log_print(ANDROID_LOG_INFO, "acl", "Quantization optimization for segment %u took: %.4f ms", segment.segment_index, timer.get_elapsed_milliseconds());
#else
printf("Quantization optimization for segment %u took: %.4f ms\n", segment.segment_index, timer.get_elapsed_milliseconds());
#endif
}
#endif
context.set_segment(segment);
if (is_any_variable)
find_optimal_bit_rates(context);
// Quantize our streams now that we found the optimal bit rates
quantize_all_streams(context);
}
#if defined(SJSON_CPP_WRITER)
if (are_all_enum_flags_set(out_stats.logging, stat_logging::detailed))
{
sjson::ObjectWriter& writer = *out_stats.writer;
writer["track_bit_rate_database_size"] = static_cast<uint32_t>(context.bit_rate_database.get_allocated_size());
size_t transform_cache_size = 0;
transform_cache_size += sizeof(rtm::qvvf) * context.num_bones; // raw_local_pose
transform_cache_size += sizeof(rtm::qvvf) * context.num_bones; // lossy_local_pose
transform_cache_size += context.metric_transform_size * context.num_bones; // lossy_object_pose
transform_cache_size += context.metric_transform_size * context.num_bones * context.clip.segments->num_samples; // raw_local_transforms
transform_cache_size += context.metric_transform_size * context.num_bones * context.clip.segments->num_samples; // raw_object_transforms
if (context.needs_conversion)
transform_cache_size += context.metric_transform_size * context.num_bones; // local_transforms_converted
if (context.has_additive_base)
{
transform_cache_size += sizeof(rtm::qvvf) * context.num_bones; // additive_local_pose
transform_cache_size += context.metric_transform_size * context.num_bones * context.clip.segments->num_samples; // base_local_transforms
transform_cache_size += context.metric_transform_size * context.num_bones * context.clip.segments->num_samples; // base_object_transforms
}
writer["transform_cache_size"] = static_cast<uint32_t>(transform_cache_size);
}
#endif
}
}
}
ACL_IMPL_FILE_PRAGMA_POP