diff --git a/mac/include/spirv_cross/spirv.hpp b/mac/include/spirv_cross/spirv.hpp index 9f98683d..dae36cf2 100644 --- a/mac/include/spirv_cross/spirv.hpp +++ b/mac/include/spirv_cross/spirv.hpp @@ -50,11 +50,11 @@ namespace spv { typedef unsigned int Id; #define SPV_VERSION 0x10500 -#define SPV_REVISION 1 +#define SPV_REVISION 3 static const unsigned int MagicNumber = 0x07230203; static const unsigned int Version = 0x00010500; -static const unsigned int Revision = 1; +static const unsigned int Revision = 3; static const unsigned int OpCodeMask = 0xffff; static const unsigned int WordCountShift = 16; @@ -1895,6 +1895,13 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryProvisionalKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; @@ -1914,30 +1921,6 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; case OpTraceNV: *hasResult = false; *hasResultType = false; break; case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; - case OpTypeRayQueryProvisionalKHR: *hasResult = true; *hasResultType = false; break; - case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; - case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; - case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; - case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; - case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; - case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; @@ -2092,6 +2075,23 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; } } #endif /* SPV_ENABLE_UTILITY_CODE */ diff --git a/mac/include/spirv_cross/spirv_cfg.hpp b/mac/include/spirv_cross/spirv_cfg.hpp index 9f3e62a1..b5459933 100644 --- a/mac/include/spirv_cross/spirv_cfg.hpp +++ b/mac/include/spirv_cross/spirv_cfg.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_CFG_HPP #define SPIRV_CROSS_CFG_HPP diff --git a/mac/include/spirv_cross/spirv_common.hpp b/mac/include/spirv_cross/spirv_common.hpp index 252d0329..ecb84077 100644 --- a/mac/include/spirv_cross/spirv_common.hpp +++ b/mac/include/spirv_cross/spirv_common.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_COMMON_HPP #define SPIRV_CROSS_COMMON_HPP @@ -262,6 +269,29 @@ inline std::string convert_to_string(double t, char locale_radix_point) return buf; } +template +struct ValueSaver +{ + explicit ValueSaver(T ¤t_) + : current(current_) + , saved(current_) + { + } + + void release() + { + current = saved; + } + + ~ValueSaver() + { + release(); + } + + T ¤t; + T saved; +}; + #if defined(__clang__) || defined(__GNUC__) #pragma GCC diagnostic pop #elif defined(_MSC_VER) @@ -334,28 +364,6 @@ public: return TypedID(*this); } - bool operator==(const TypedID &other) const - { - return id == other.id; - } - - bool operator!=(const TypedID &other) const - { - return id != other.id; - } - - template - bool operator==(const TypedID &other) const - { - return id == uint32_t(other); - } - - template - bool operator!=(const TypedID &other) const - { - return id != uint32_t(other); - } - private: uint32_t id = 0; }; @@ -380,26 +388,6 @@ public: return id; } - bool operator==(const TypedID &other) const - { - return id == other.id; - } - - bool operator!=(const TypedID &other) const - { - return id != other.id; - } - - bool operator==(const TypedID &other) const - { - return id == uint32_t(other); - } - - bool operator!=(const TypedID &other) const - { - return id != uint32_t(other); - } - private: uint32_t id = 0; }; @@ -535,6 +523,7 @@ struct SPIRType : IVariant // Keep internal types at the end. ControlPointArray, + Interpolant, Char }; @@ -558,11 +547,16 @@ struct SPIRType : IVariant // Keep track of how many pointer layers we have. uint32_t pointer_depth = 0; bool pointer = false; + bool forward_pointer = false; spv::StorageClass storage = spv::StorageClassGeneric; SmallVector member_types; + // If member order has been rewritten to handle certain scenarios with Offset, + // allow codegen to rewrite the index. + SmallVector member_type_index_redirection; + struct ImageType { TypeID type; @@ -636,7 +630,7 @@ struct SPIREntryPoint SmallVector interface_variables; Bitset flags; - struct + struct WorkgroupSize { uint32_t x = 0, y = 0, z = 0; uint32_t constant = 0; // Workgroup size can be expressed as a constant/spec-constant instead. @@ -694,6 +688,9 @@ struct SPIRExpression : IVariant // Used by access chain Store and Load since we read multiple expressions in this case. SmallVector implied_read_expressions; + // The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads. + uint32_t emitted_loop_level = 0; + SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) }; @@ -776,7 +773,7 @@ struct SPIRBlock : IVariant ComplexLoop }; - enum + enum : uint32_t { NoDominator = 0xffffffffu }; @@ -1064,7 +1061,8 @@ struct SPIRConstant : IVariant type = TypeConstant }; - union Constant { + union Constant + { uint32_t u32; int32_t i32; float f32; @@ -1102,7 +1100,8 @@ struct SPIRConstant : IVariant int e = (u16_value >> 10) & 0x1f; int m = (u16_value >> 0) & 0x3ff; - union { + union + { float f32; uint32_t u32; } u; @@ -1521,6 +1520,7 @@ struct AccessChainMeta bool need_transpose = false; bool storage_is_packed = false; bool storage_is_invariant = false; + bool flattened_struct = false; }; enum ExtendedDecorations @@ -1555,8 +1555,10 @@ enum ExtendedDecorations // Marks a buffer block for using explicit offsets (GLSL/HLSL). SPIRVCrossDecorationExplicitOffset, - // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(). - // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables. + // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(), + // or the base vertex and instance indices passed to vkCmdDrawIndexed(). + // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables in compute shaders, + // and to hold the BaseVertex and BaseInstance variables in vertex shaders. SPIRVCrossDecorationBuiltInDispatchBase, // Apply to a variable that is a function parameter; marks it as being a "dynamic" @@ -1565,6 +1567,27 @@ enum ExtendedDecorations // Y'CbCr conversion. SPIRVCrossDecorationDynamicImageSampler, + // Apply to a variable in the Input storage class; marks it as holding the size of the stage + // input grid. + // In MSL, this is used to hold the vertex and instance counts in a tessellation pipeline + // vertex shader. + SPIRVCrossDecorationBuiltInStageInputSize, + + // Apply to any access chain of a tessellation I/O variable; stores the type of the sub-object + // that was chained to, as recorded in the input variable itself. This is used in case the pointer + // is itself used as the base of an access chain, to calculate the original type of the sub-object + // chained to, in case a swizzle needs to be applied. This should not happen normally with valid + // SPIR-V, but the MSL backend can change the type of input variables, necessitating the + // addition of swizzles to keep the generated code compiling. + SPIRVCrossDecorationTessIOOriginalInputTypeID, + + // Apply to any access chain of an interface variable used with pull-model interpolation, where the variable is a + // vector but the resulting pointer is a scalar; stores the component index that is to be accessed by the chain. + // This is used when emitting calls to interpolation functions on the chain in MSL: in this case, the component + // must be applied to the result, since pull-model interpolants in MSL cannot be swizzled directly, but the + // results of interpolation can. + SPIRVCrossDecorationInterpolantComponentExpr, + SPIRVCrossDecorationCount }; @@ -1584,6 +1607,7 @@ struct Meta uint32_t offset = 0; uint32_t xfb_buffer = 0; uint32_t xfb_stride = 0; + uint32_t stream = 0; uint32_t array_stride = 0; uint32_t matrix_stride = 0; uint32_t input_attachment = 0; diff --git a/mac/include/spirv_cross/spirv_cross.hpp b/mac/include/spirv_cross/spirv_cross.hpp index 373f9976..47f1d794 100644 --- a/mac/include/spirv_cross/spirv_cross.hpp +++ b/mac/include/spirv_cross/spirv_cross.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_HPP #define SPIRV_CROSS_HPP @@ -513,9 +520,22 @@ protected: SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; + uint32_t current_loop_level = 0; std::unordered_set active_interface_variables; bool check_active_interface_variables = false; + void add_loop_level(); + + void set_initializers(SPIRExpression &e) + { + e.emitted_loop_level = current_loop_level; + } + + template + void set_initializers(const T &) + { + } + // If our IDs are out of range here as part of opcodes, throw instead of // undefined behavior. template @@ -524,6 +544,7 @@ protected: ir.add_typed_id(static_cast(T::type), id); auto &var = variant_set(ir.ids[id], std::forward

(args)...); var.self = id; + set_initializers(var); return var; } @@ -611,7 +632,7 @@ protected: bool expression_is_lvalue(uint32_t id) const; bool variable_storage_is_aliased(const SPIRVariable &var); SPIRVariable *maybe_get_backing_variable(uint32_t chain); - spv::StorageClass get_backing_variable_storage(uint32_t ptr); + spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); void register_read(uint32_t expr, uint32_t chain, bool forwarded); void register_write(uint32_t chain); @@ -1037,6 +1058,7 @@ protected: void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); bool type_is_array_of_pointers(const SPIRType &type) const; + bool type_is_top_level_physical_pointer(const SPIRType &type) const; bool type_is_block_like(const SPIRType &type) const; bool type_is_opaque_value(const SPIRType &type) const; @@ -1045,6 +1067,11 @@ protected: bool flush_phi_required(BlockID from, BlockID to) const; + uint32_t evaluate_spec_constant_u32(const SPIRConstantOp &spec) const; + uint32_t evaluate_constant_u32(uint32_t id) const; + + bool is_vertex_like_shader() const; + private: // Used only to implement the old deprecated get_entry_point() interface. const SPIREntryPoint &get_first_entry_point(const std::string &name) const; diff --git a/mac/include/spirv_cross/spirv_cross_containers.hpp b/mac/include/spirv_cross/spirv_cross_containers.hpp index f5760a0e..c68f670b 100644 --- a/mac/include/spirv_cross/spirv_cross_containers.hpp +++ b/mac/include/spirv_cross/spirv_cross_containers.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_CONTAINERS_HPP #define SPIRV_CROSS_CONTAINERS_HPP @@ -63,7 +70,8 @@ public: private: #if defined(_MSC_VER) && _MSC_VER < 1900 // MSVC 2013 workarounds, sigh ... - union { + union + { char aligned_char[sizeof(T) * N]; double dummy_aligner; } u; @@ -211,6 +219,10 @@ public: this->buffer_size = count; } + SmallVector(std::initializer_list init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end()) + { + } + SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector() { *this = std::move(other); @@ -327,8 +339,9 @@ public: size_t target_capacity = buffer_capacity; if (target_capacity == 0) target_capacity = 1; - if (target_capacity < N) - target_capacity = N; + + // Weird parens works around macro issues on Windows if NOMINMAX is not used. + target_capacity = (std::max)(target_capacity, N); // Need to ensure there is a POT value of target capacity which is larger than count, // otherwise this will overflow. diff --git a/mac/include/spirv_cross/spirv_cross_error_handling.hpp b/mac/include/spirv_cross/spirv_cross_error_handling.hpp index fa90610b..c0927e45 100644 --- a/mac/include/spirv_cross/spirv_cross_error_handling.hpp +++ b/mac/include/spirv_cross/spirv_cross_error_handling.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_ERROR_HANDLING #define SPIRV_CROSS_ERROR_HANDLING diff --git a/mac/include/spirv_cross/spirv_cross_parsed_ir.hpp b/mac/include/spirv_cross/spirv_cross_parsed_ir.hpp index 4880c841..ee202d2f 100644 --- a/mac/include/spirv_cross/spirv_cross_parsed_ir.hpp +++ b/mac/include/spirv_cross/spirv_cross_parsed_ir.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_PARSED_IR_HPP #define SPIRV_CROSS_PARSED_IR_HPP @@ -139,6 +146,7 @@ public: void mark_used_as_array_length(ID id); uint32_t increase_bound_by(uint32_t count); Bitset get_buffer_block_flags(const SPIRVariable &var) const; + Bitset get_buffer_block_type_flags(const SPIRType &type) const; void add_typed_id(Types type, ID id); void remove_typed_id(Types type, ID id); @@ -208,6 +216,12 @@ public: void make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set); + void fixup_reserved_names(); + + static void sanitize_underscores(std::string &str); + static void sanitize_identifier(std::string &str, bool member, bool allow_reserved_prefixes); + static bool is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes); + private: template T &get(uint32_t id) @@ -225,6 +239,8 @@ private: mutable uint32_t loop_iteration_depth_soft = 0; std::string empty_string; Bitset cleared_bitset; + + std::unordered_set meta_needing_name_fixup; }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/mac/include/spirv_cross/spirv_glsl.hpp b/mac/include/spirv_cross/spirv_glsl.hpp index 1eafc2ce..c382db67 100644 --- a/mac/include/spirv_cross/spirv_glsl.hpp +++ b/mac/include/spirv_cross/spirv_glsl.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_GLSL_HPP #define SPIRV_CROSS_GLSL_HPP @@ -57,7 +64,8 @@ enum AccessChainFlagBits ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1, ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2, ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3, - ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4 + ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4, + ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5 }; typedef uint32_t AccessChainFlags; @@ -120,6 +128,10 @@ public: // which would otherwise be uninitialized will now be initialized to 0 instead. bool force_zero_initialized_variables = false; + // In GLSL, force use of I/O block flattening, similar to + // what happens on legacy GLSL targets for blocks and structs. + bool force_flattened_io_blocks = false; + enum Precision { DontCare, @@ -128,14 +140,16 @@ public: Highp }; - struct + struct VertexOptions { - // GLSL: In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). - // MSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. - // HLSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // "Vertex-like shader" here is any shader stage that can write BuiltInPosition. + + // GLSL: In vertex-like shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + // MSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // HLSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. bool fixup_clipspace = false; - // Inverts gl_Position.y or equivalent. + // In vertex-like shaders, inverts gl_Position.y or equivalent. bool flip_vert_y = false; // GLSL only, for HLSL version of this option, see CompilerHLSL. @@ -145,7 +159,7 @@ public: bool support_nonzero_base_instance = true; } vertex; - struct + struct FragmentOptions { // Add precision mediump float in ES targets when emitting GLES source. // Add precision highp int in ES targets when emitting GLES source. @@ -237,6 +251,82 @@ public: bool variable_is_depth_or_compare(VariableID id) const; protected: + struct ShaderSubgroupSupportHelper + { + // lower enum value = greater priority + enum Candidate + { + KHR_shader_subgroup_ballot, + KHR_shader_subgroup_basic, + KHR_shader_subgroup_vote, + NV_gpu_shader_5, + NV_shader_thread_group, + NV_shader_thread_shuffle, + ARB_shader_ballot, + ARB_shader_group_vote, + AMD_gcn_shader, + + CandidateCount + }; + + static const char *get_extension_name(Candidate c); + static SmallVector get_extra_required_extension_names(Candidate c); + static const char *get_extra_required_extension_predicate(Candidate c); + + enum Feature + { + SubgroupMask = 0, + SubgroupSize = 1, + SubgroupInvocationID = 2, + SubgroupID = 3, + NumSubgroups = 4, + SubgroupBroadcast_First = 5, + SubgroupBallotFindLSB_MSB = 6, + SubgroupAll_Any_AllEqualBool = 7, + SubgroupAllEqualT = 8, + SubgroupElect = 9, + SubgroupBarrier = 10, + SubgroupMemBarrier = 11, + SubgroupBallot = 12, + SubgroupInverseBallot_InclBitCount_ExclBitCout = 13, + SubgroupBallotBitExtract = 14, + SubgroupBallotBitCount = 15, + + FeatureCount + }; + + using FeatureMask = uint32_t; + static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits."); + + using CandidateVector = SmallVector; + using FeatureVector = SmallVector; + + static FeatureVector get_feature_dependencies(Feature feature); + static FeatureMask get_feature_dependency_mask(Feature feature); + static bool can_feature_be_implemented_without_extensions(Feature feature); + static Candidate get_KHR_extension_for_feature(Feature feature); + + struct Result + { + Result(); + uint32_t weights[CandidateCount]; + }; + + void request_feature(Feature feature); + bool is_feature_requested(Feature feature) const; + Result resolve() const; + + static CandidateVector get_candidates_for_feature(Feature ft, const Result &r); + + private: + static CandidateVector get_candidates_for_feature(Feature ft); + static FeatureMask build_mask(const SmallVector &features); + FeatureMask feature_mask = 0; + }; + + // TODO remove this function when all subgroup ops are supported (or make it always return true) + static bool is_supported_subgroup_op_in_opengl(spv::Op op); + void reset(); void emit_function(SPIRFunction &func, const Bitset &return_flags); @@ -267,9 +357,11 @@ protected: void build_workgroup_size(SmallVector &arguments, const SpecializationConstant &x, const SpecializationConstant &y, const SpecializationConstant &z); + void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature); + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); - virtual void emit_texture_op(const Instruction &i); - virtual std::string to_texture_op(const Instruction &i, bool *forward, + virtual void emit_texture_op(const Instruction &i, bool sparse); + virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, SmallVector &inherited_expressions); virtual void emit_subgroup_op(const Instruction &i); virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); @@ -284,14 +376,42 @@ protected: virtual void emit_fixup(); virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); - virtual std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, - bool has_dref, uint32_t lod, uint32_t minlod); - virtual std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, - bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, - uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, - uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, - uint32_t minlod, bool *p_forward); + + struct TextureFunctionBaseArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionBaseArguments() = default; + VariableID img = 0; + const SPIRType *imgtype = nullptr; + bool is_fetch = false, is_gather = false, is_proj = false; + }; + + struct TextureFunctionNameArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionNameArguments() = default; + TextureFunctionBaseArguments base; + bool has_array_offsets = false, has_offset = false, has_grad = false; + bool has_dref = false, is_sparse_feedback = false, has_min_lod = false; + uint32_t lod = 0; + }; + virtual std::string to_function_name(const TextureFunctionNameArguments &args); + + struct TextureFunctionArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionArguments() = default; + TextureFunctionBaseArguments base; + uint32_t coord = 0, coord_components = 0, dref = 0; + uint32_t grad_x = 0, grad_y = 0, lod = 0, coffset = 0, offset = 0; + uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0; + bool nonuniform_expression = false; + }; + virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward); + + void emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id); + uint32_t get_sparse_feedback_texel_id(uint32_t id) const; virtual void emit_buffer_block(const SPIRVariable &type); virtual void emit_push_constant_block(const SPIRVariable &var); virtual void emit_uniform(const SPIRVariable &var); @@ -441,25 +561,33 @@ protected: bool supports_extensions = false; bool supports_empty_struct = false; bool array_is_value_type = true; + bool buffer_offset_array_is_value_type = true; bool comparison_image_samples_scalar = false; bool native_pointers = false; bool support_small_type_sampling_result = false; bool support_case_fallthrough = true; bool use_array_constructor = false; + bool needs_row_major_load_workaround = false; } backend; void emit_struct(SPIRType &type); void emit_resources(); + void emit_extension_workarounds(spv::ExecutionModel model); void emit_buffer_block_native(const SPIRVariable &var); void emit_buffer_reference_block(SPIRType &type, bool forward_declaration); void emit_buffer_block_legacy(const SPIRVariable &var); void emit_buffer_block_flattened(const SPIRVariable &type); + void fixup_implicit_builtin_block_names(); void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); bool should_force_emit_builtin_block(spv::StorageClass storage); void emit_push_constant_block_vulkan(const SPIRVariable &var); void emit_push_constant_block_glsl(const SPIRVariable &var); void emit_interface_block(const SPIRVariable &type); void emit_flattened_io_block(const SPIRVariable &var, const char *qual); + void emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); + void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); void emit_block_chain(SPIRBlock &block); void emit_hoisted_temporaries(SmallVector> &temporaries); std::string constant_value_macro_name(uint32_t id); @@ -522,6 +650,7 @@ protected: void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); bool expression_is_forwarded(uint32_t id) const; bool expression_suppresses_usage_tracking(uint32_t id) const; + bool expression_read_implies_multiple_reads(uint32_t id) const; SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false); @@ -531,6 +660,9 @@ protected: std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta); + virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + spv::StorageClass storage, bool &is_packed); + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, AccessChainMeta *meta = nullptr, bool ptr_chain = false); @@ -559,7 +691,7 @@ protected: SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); std::string to_expression(uint32_t id, bool register_expression_read = true); - std::string to_composite_constructor_expression(uint32_t id); + std::string to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset); std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type); std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); @@ -575,6 +707,7 @@ protected: void strip_enclosed_expression(std::string &expr); std::string to_member_name(const SPIRType &type, uint32_t index); virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain); + std::string to_multi_member_reference(const SPIRType &type, const SmallVector &indices); std::string type_to_glsl_constructor(const SPIRType &type); std::string argument_decl(const SPIRFunction::Parameter &arg); virtual std::string to_qualifiers_glsl(uint32_t id); @@ -624,8 +757,7 @@ protected: void replace_fragment_output(SPIRVariable &var); void replace_fragment_outputs(); - bool check_explicit_lod_allowed(uint32_t lod); - std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t id); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t id); uint32_t indent = 0; @@ -635,11 +767,16 @@ protected: std::unordered_set flushed_phi_variables; std::unordered_set flattened_buffer_blocks; - std::unordered_set flattened_structs; + std::unordered_map flattened_structs; - std::string load_flattened_struct(SPIRVariable &var); - std::string to_flattened_struct_member(const SPIRVariable &var, uint32_t index); - void store_flattened_struct(SPIRVariable &var, uint32_t value); + ShaderSubgroupSupportHelper shader_subgroup_supporter; + + std::string load_flattened_struct(const std::string &basename, const SPIRType &type); + std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index); + void store_flattened_struct(uint32_t lhs_id, uint32_t value); + void store_flattened_struct(const std::string &basename, uint32_t rhs, const SPIRType &type, + const SmallVector &indices); + std::string to_flattened_access_chain_expression(uint32_t id); // Usage tracking. If a temporary is used more than once, use the temporary instead to // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. @@ -654,6 +791,10 @@ protected: // Currently used by NMin/Max/Clamp implementations. std::unordered_map extra_sub_expressions; + SmallVector workaround_ubo_load_overload_types; + void request_workaround_wrapper_overload(TypeID id); + void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr); + uint32_t statement_count = 0; inline bool is_legacy() const @@ -671,6 +812,10 @@ protected: return !options.es && options.version < 130; } + bool requires_transpose_2x2 = false; + bool requires_transpose_3x3 = false; + bool requires_transpose_4x4 = false; + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); void register_call_out_argument(uint32_t id); void register_impure_function_call(); @@ -712,8 +857,6 @@ protected: virtual void declare_undefined_values(); - static std::string sanitize_underscores(const std::string &str); - bool can_use_io_location(spv::StorageClass storage, bool block); const Instruction *get_next_instruction_in_block(const Instruction &instr); static uint32_t mask_relevant_memory_semantics(uint32_t semantics); @@ -726,9 +869,9 @@ protected: // Builtins in GLSL are always specific signedness, but the SPIR-V can declare them // as either unsigned or signed. - // Sometimes we will need to automatically perform bitcasts on load and store to make this work. - virtual void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); - virtual void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); + // Sometimes we will need to automatically perform casts on load and store to make this work. + virtual void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); + virtual void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr); void convert_non_uniform_expression(const SPIRType &type, std::string &expr); @@ -736,6 +879,7 @@ protected: void disallow_forwarding_in_expression_chain(const SPIRExpression &expr); bool expression_is_constant_null(uint32_t id) const; + bool expression_is_non_value_type_array(uint32_t ptr); virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression); uint32_t get_integer_width_for_instruction(const Instruction &instr) const; diff --git a/mac/include/spirv_cross/spirv_msl.hpp b/mac/include/spirv_cross/spirv_msl.hpp index e56d27b9..003fb83a 100644 --- a/mac/include/spirv_cross/spirv_msl.hpp +++ b/mac/include/spirv_cross/spirv_msl.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_MSL_HPP #define SPIRV_CROSS_MSL_HPP @@ -27,33 +34,44 @@ namespace SPIRV_CROSS_NAMESPACE { -// Indicates the format of the vertex attribute. Currently limited to specifying -// if the attribute is an 8-bit unsigned integer, 16-bit unsigned integer, or +// Indicates the format of a shader input. Currently limited to specifying +// if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or // some other format. -enum MSLVertexFormat +enum MSLShaderInputFormat { - MSL_VERTEX_FORMAT_OTHER = 0, - MSL_VERTEX_FORMAT_UINT8 = 1, - MSL_VERTEX_FORMAT_UINT16 = 2, - MSL_VERTEX_FORMAT_INT_MAX = 0x7fffffff + MSL_SHADER_INPUT_FORMAT_OTHER = 0, + MSL_SHADER_INPUT_FORMAT_UINT8 = 1, + MSL_SHADER_INPUT_FORMAT_UINT16 = 2, + MSL_SHADER_INPUT_FORMAT_ANY16 = 3, + MSL_SHADER_INPUT_FORMAT_ANY32 = 4, + + // Deprecated aliases. + MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER, + MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8, + MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16, + + MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff }; -// Defines MSL characteristics of a vertex attribute at a particular location. +// Defines MSL characteristics of an input variable at a particular location. // After compilation, it is possible to query whether or not this location was used. -struct MSLVertexAttr +// If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, +// or behavior is undefined. +struct MSLShaderInput { uint32_t location = 0; - uint32_t msl_buffer = 0; - uint32_t msl_offset = 0; - uint32_t msl_stride = 0; - bool per_instance = false; - MSLVertexFormat format = MSL_VERTEX_FORMAT_OTHER; + MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER; spv::BuiltIn builtin = spv::BuiltInMax; + uint32_t vecsize = 0; }; // Matches the binding index of a MSL resource for a binding within a descriptor set. // Taken together, the stage, desc_set and binding combine to form a reference to a resource -// descriptor used in a particular shading stage. +// descriptor used in a particular shading stage. The count field indicates the number of +// resources consumed by this binding, if the binding represents an array of resources. +// If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value +// will be used to declare the array size in MSL, which does not support run-time-sized arrays. +// For resources that are not held in a run-time-sized array, the count field does not need to be populated. // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. @@ -64,6 +82,7 @@ struct MSLResourceBinding spv::ExecutionModel stage = spv::ExecutionModelMax; uint32_t desc_set = 0; uint32_t binding = 0; + uint32_t count = 0; uint32_t msl_buffer = 0; uint32_t msl_texture = 0; uint32_t msl_sampler = 0; @@ -242,6 +261,9 @@ static const uint32_t kArgumentBufferBinding = ~(3u); static const uint32_t kMaxArgumentBuffers = 8; +// The arbitrary maximum for the nesting of array of array copies. +static const uint32_t kArrayCopyMultidimMax = 6; + // Decompiles SPIR-V to Metal Shading Language class CompilerMSL : public CompilerGLSL { @@ -258,6 +280,8 @@ public: Platform platform = macOS; uint32_t msl_version = make_msl_version(1, 2); uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers + uint32_t r32ui_linear_texture_alignment = 4; + uint32_t r32ui_alignment_constant_id = 65535; uint32_t swizzle_buffer_index = 30; uint32_t indirect_params_buffer_index = 29; uint32_t shader_output_buffer_index = 28; @@ -266,9 +290,15 @@ public: uint32_t buffer_size_buffer_index = 25; uint32_t view_mask_buffer_index = 24; uint32_t dynamic_offsets_buffer_index = 23; + uint32_t shader_input_buffer_index = 22; + uint32_t shader_index_buffer_index = 21; uint32_t shader_input_wg_index = 0; uint32_t device_index = 0; uint32_t enable_frag_output_mask = 0xffffffff; + // Metal doesn't allow setting a fixed sample mask directly in the pipeline. + // We can evade this restriction by ANDing the internal sample_mask output + // of the shader with the additional fixed sample mask. + uint32_t additional_fixed_sample_mask = 0xffffffff; bool enable_point_size_builtin = true; bool enable_frag_depth_builtin = true; bool enable_frag_stencil_ref_builtin = true; @@ -277,6 +307,7 @@ public: bool swizzle_texture_samples = false; bool tess_domain_origin_lower_left = false; bool multiview = false; + bool multiview_layered_rendering = true; bool view_index_from_device_index = false; bool dispatch_base = false; bool texture_1D_as_2D = false; @@ -296,7 +327,7 @@ public: bool ios_support_base_vertex_instance = false; // Use Metal's native frame-buffer fetch API for subpass inputs. - bool ios_use_framebuffer_fetch_subpasses = false; + bool use_framebuffer_fetch_subpasses = false; // Enables use of "fma" intrinsic for invariant float math bool invariant_float_math = false; @@ -324,6 +355,64 @@ public: // can be read in subsequent stages. bool enable_clip_distance_user_varying = true; + // In a tessellation control shader, assume that more than one patch can be processed in a + // single workgroup. This requires changes to the way the InvocationId and PrimitiveId + // builtins are processed, but should result in more efficient usage of the GPU. + bool multi_patch_workgroup = false; + + // If set, a vertex shader will be compiled as part of a tessellation pipeline. + // It will be translated as a compute kernel, so it can use the global invocation ID + // to index the output buffer. + bool vertex_for_tessellation = false; + + // Assume that SubpassData images have multiple layers. Layered input attachments + // are addressed relative to the Layer output from the vertex pipeline. This option + // has no effect with multiview, since all input attachments are assumed to be layered + // and will be addressed using the current ViewIndex. + bool arrayed_subpass_input = false; + + // Whether to use SIMD-group or quadgroup functions to implement group nnon-uniform + // operations. Some GPUs on iOS do not support the SIMD-group functions, only the + // quadgroup functions. + bool ios_use_simdgroup_functions = false; + + // If set, the subgroup size will be assumed to be one, and subgroup-related + // builtins and operations will be emitted accordingly. This mode is intended to + // be used by MoltenVK on hardware/software configurations which do not provide + // sufficient support for subgroups. + bool emulate_subgroups = false; + + // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, + // allows the SIMD-group size (aka thread execution width) to vary depending on + // register usage and requirements. In certain circumstances--for example, a pipeline + // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- + // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of + // mapping it to the Metal builtin [[thread_execution_width]]. If the thread + // execution width is reduced, the extra invocations will appear to be inactive. + // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped + // to the Metal [[thread_execution_width]] builtin. + uint32_t fixed_subgroup_size = 0; + + enum class IndexType + { + None = 0, + UInt16 = 1, + UInt32 = 2 + }; + + // The type of index in the index buffer, if present. For a compute shader, Metal + // requires specifying the indexing at pipeline creation, rather than at draw time + // as with graphics pipelines. This means we must create three different pipelines, + // for no indexing, 16-bit indices, and 32-bit indices. Each requires different + // handling for the gl_VertexIndex builtin. We may as well, then, create three + // different shaders for these three scenarios. + IndexType vertex_index_type = IndexType::None; + + // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. + // This will force the shader to run at sample rate, assuming Metal does not optimize + // the extra threads away. + bool force_sample_rate_shading = false; + bool is_ios() const { return platform == iOS; @@ -423,11 +512,10 @@ public: explicit CompilerMSL(const ParsedIR &ir); explicit CompilerMSL(ParsedIR &&ir); - // attr is a vertex attribute binding used to match - // vertex content locations to MSL attributes. If vertex attributes are provided, - // is_msl_vertex_attribute_used() will return true after calling ::compile() if - // the location was used by the MSL code. - void add_msl_vertex_attribute(const MSLVertexAttr &attr); + // input is a shader input description used to fix up shader input variables. + // If shader inputs are provided, is_msl_shader_input_used() will return true after + // calling ::compile() if the location was used by the MSL code. + void add_msl_shader_input(const MSLShaderInput &input); // resource is a resource binding to indicate the MSL buffer, // texture or sampler index to use for a particular SPIR-V description set @@ -459,8 +547,8 @@ public: // constant. Opt-in to this behavior here on a per set basis. void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); - // Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader. - bool is_msl_vertex_attribute_used(uint32_t location); + // Query after compilation is done. This allows you to check if an input location was used by the shader. + bool is_msl_shader_input_used(uint32_t location); // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. // Constexpr samplers are always assumed to be emitted. @@ -509,6 +597,9 @@ public: // to use for a particular location. The default is 4 if number of components is not overridden. void set_fragment_output_components(uint32_t location, uint32_t components); + void set_combined_sampler_suffix(const char *suffix); + const char *get_combined_sampler_suffix() const; + protected: // An enum of SPIR-V functions that are implemented in additional // source code that is added to the shader if necessary. @@ -548,12 +639,20 @@ protected: SPVFuncImplTextureSwizzle, SPVFuncImplGatherSwizzle, SPVFuncImplGatherCompareSwizzle, + SPVFuncImplSubgroupBroadcast, + SPVFuncImplSubgroupBroadcastFirst, SPVFuncImplSubgroupBallot, SPVFuncImplSubgroupBallotBitExtract, SPVFuncImplSubgroupBallotFindLSB, SPVFuncImplSubgroupBallotFindMSB, SPVFuncImplSubgroupBallotBitCount, SPVFuncImplSubgroupAllEqual, + SPVFuncImplSubgroupShuffle, + SPVFuncImplSubgroupShuffleXor, + SPVFuncImplSubgroupShuffleUp, + SPVFuncImplSubgroupShuffleDown, + SPVFuncImplQuadBroadcast, + SPVFuncImplQuadSwap, SPVFuncImplReflectScalar, SPVFuncImplRefractScalar, SPVFuncImplFaceForwardScalar, @@ -577,13 +676,11 @@ protected: SPVFuncImplConvertYCbCrBT601, SPVFuncImplConvertYCbCrBT2020, SPVFuncImplDynamicImageSampler, - - SPVFuncImplArrayCopyMultidimMax = 6 }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too // Use Metal's native frame-buffer fetch API for subpass inputs. - void emit_texture_op(const Instruction &i) override; + void emit_texture_op(const Instruction &i, bool sparse) override; void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); void emit_instruction(const Instruction &instr) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, @@ -594,7 +691,7 @@ protected: void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_subgroup_op(const Instruction &i) override; - std::string to_texture_op(const Instruction &i, bool *forward, + std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, SmallVector &inherited_expressions) override; void emit_fixup() override; std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, @@ -614,17 +711,12 @@ protected: std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; - std::string sampler_type(const SPIRType &type); + std::string sampler_type(const SPIRType &type, uint32_t id); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_name(uint32_t id, bool allow_alias = true) const override; - std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod, - uint32_t minlod) override; - std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, - uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, - uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override; + std::string to_function_name(const TextureFunctionNameArguments &args) override; + std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; std::string to_initializer_expression(const SPIRVariable &var) override; std::string to_zero_initialized_expression(uint32_t type_id) override; @@ -693,9 +785,9 @@ protected: void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); - void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage); + void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, spv::StorageClass storage); uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); - uint32_t ensure_correct_attribute_type(uint32_t type_id, uint32_t location, uint32_t num_components = 0); + uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components = 0); void emit_custom_templates(); void emit_custom_functions(); @@ -703,6 +795,7 @@ protected: void emit_specialization_constants_and_structs(); void emit_interface_block(uint32_t ib_var_id); bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); + uint32_t get_resource_array_size(uint32_t id) const; void fix_up_shader_inputs_outputs(); @@ -717,6 +810,8 @@ protected: std::string to_sampler_expression(uint32_t id); std::string to_swizzle_expression(uint32_t id); std::string to_buffer_size_expression(uint32_t id); + bool is_sample_rate() const; + bool is_direct_input_builtin(spv::BuiltIn builtin); std::string builtin_qualifier(spv::BuiltIn builtin); std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); @@ -739,7 +834,13 @@ protected: uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; + SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, bool ignore_padding = false) const; @@ -758,6 +859,8 @@ protected: SPIRType &get_patch_stage_in_struct_type(); SPIRType &get_patch_stage_out_struct_type(); std::string get_tess_factor_struct_name(); + SPIRType &get_uint_type(); + uint32_t get_uint_type_id(); void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); @@ -772,6 +875,7 @@ protected: void emit_entry_point_declarations() override; uint32_t builtin_frag_coord_id = 0; uint32_t builtin_sample_id_id = 0; + uint32_t builtin_sample_mask_id = 0; uint32_t builtin_vertex_idx_id = 0; uint32_t builtin_base_vertex_id = 0; uint32_t builtin_instance_idx_id = 0; @@ -783,17 +887,26 @@ protected: uint32_t builtin_subgroup_invocation_id_id = 0; uint32_t builtin_subgroup_size_id = 0; uint32_t builtin_dispatch_base_id = 0; + uint32_t builtin_stage_input_size_id = 0; + uint32_t builtin_local_invocation_index_id = 0; + uint32_t builtin_workgroup_size_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; uint32_t dynamic_offsets_buffer_id = 0; + uint32_t uint_type_id = 0; - void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; - void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; + bool does_shader_write_sample_mask = false; + + void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; + void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void analyze_sampled_image_usage(); + void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, + bool &is_packed) override; + void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); @@ -806,9 +919,10 @@ protected: Options msl_options; std::set spv_function_implementations; - std::unordered_map vtx_attrs_by_location; - std::unordered_map vtx_attrs_by_builtin; - std::unordered_set vtx_attrs_in_use; + // Must be ordered to ensure declarations are in a specific order. + std::map inputs_by_location; + std::unordered_map inputs_by_builtin; + std::unordered_set inputs_in_use; std::unordered_map fragment_output_components; std::set pragma_lines; std::set typedef_lines; @@ -851,6 +965,8 @@ protected: bool used_swizzle_buffer = false; bool added_builtin_tess_level = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -861,9 +977,11 @@ protected: std::string buffer_size_name_suffix = "BufferSize"; std::string plane_name_suffix = "Plane"; std::string input_wg_var_name = "gl_in"; + std::string input_buffer_var_name = "spvIn"; std::string output_buffer_var_name = "spvOut"; std::string patch_output_buffer_var_name = "spvPatchOut"; std::string tess_factor_buffer_var_name = "spvTessLevel"; + std::string index_buffer_var_name = "spvIndices"; spv::Op previous_instruction_opcode = spv::OpNop; // Must be ordered since declaration is in a specific order. @@ -874,6 +992,7 @@ protected: std::unordered_set buffers_requiring_array_length; SmallVector buffer_arrays; std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations + std::unordered_set pull_model_inputs; // Must be ordered since array is in a specific order. std::map> buffers_requiring_dynamic_offset; @@ -890,7 +1009,9 @@ protected: bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; uint32_t get_target_components_for_fragment_location(uint32_t location) const; - uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components); + uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, + SPIRType::BaseType basetype = SPIRType::Unknown); + uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); bool suppress_missing_prototypes = false; @@ -919,6 +1040,8 @@ protected: bool uses_atomics = false; bool uses_resource_write = false; bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; }; // OpcodeHandler that scans for uses of sampled images diff --git a/mac/include/spirv_cross/spirv_parser.hpp b/mac/include/spirv_cross/spirv_parser.hpp index dafa3e86..190266a4 100644 --- a/mac/include/spirv_cross/spirv_parser.hpp +++ b/mac/include/spirv_cross/spirv_parser.hpp @@ -14,6 +14,13 @@ * limitations under the License. */ +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + #ifndef SPIRV_CROSS_PARSER_HPP #define SPIRV_CROSS_PARSER_HPP @@ -84,6 +91,7 @@ private: // This must be an ordered data structure so we always pick the same type aliases. SmallVector global_struct_cache; + SmallVector> forward_pointer_fixups; bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; bool variable_storage_is_aliased(const SPIRVariable &v) const; diff --git a/mac/libs/glslang/libOGLCompiler.a b/mac/libs/glslang/libOGLCompiler.a index d6de367e..8fb7b746 100644 Binary files a/mac/libs/glslang/libOGLCompiler.a and b/mac/libs/glslang/libOGLCompiler.a differ diff --git a/mac/libs/glslang/libOSDependent.a b/mac/libs/glslang/libOSDependent.a index 2ab1f52b..b8a14ca5 100644 Binary files a/mac/libs/glslang/libOSDependent.a and b/mac/libs/glslang/libOSDependent.a differ diff --git a/mac/libs/glslang/libSPIRV.a b/mac/libs/glslang/libSPIRV.a index c4b07f2b..bc9cd084 100644 Binary files a/mac/libs/glslang/libSPIRV.a and b/mac/libs/glslang/libSPIRV.a differ diff --git a/mac/libs/glslang/libglslang-default-resource-limits.a b/mac/libs/glslang/libglslang-default-resource-limits.a index f0ed5dab..792b66e8 100644 Binary files a/mac/libs/glslang/libglslang-default-resource-limits.a and b/mac/libs/glslang/libglslang-default-resource-limits.a differ diff --git a/mac/libs/glslang/libglslang.a b/mac/libs/glslang/libglslang.a index 8afc0b26..95e55661 100644 Binary files a/mac/libs/glslang/libglslang.a and b/mac/libs/glslang/libglslang.a differ diff --git a/mac/libs/libcrypto.a b/mac/libs/libcrypto.a index 1078caac..8bb3819f 100644 Binary files a/mac/libs/libcrypto.a and b/mac/libs/libcrypto.a differ diff --git a/mac/libs/libcurl.a b/mac/libs/libcurl.a index 0166c854..475e5d85 100644 Binary files a/mac/libs/libcurl.a and b/mac/libs/libcurl.a differ diff --git a/mac/libs/libfreetype.a b/mac/libs/libfreetype.a index 30363218..4d74de31 100644 Binary files a/mac/libs/libfreetype.a and b/mac/libs/libfreetype.a differ diff --git a/mac/libs/libinspector.a b/mac/libs/libinspector.a index 27a2d7c1..3d79a201 100644 Binary files a/mac/libs/libinspector.a and b/mac/libs/libinspector.a differ diff --git a/mac/libs/libjpeg.a b/mac/libs/libjpeg.a index d8eeb3fe..5ce71aca 100644 Binary files a/mac/libs/libjpeg.a and b/mac/libs/libjpeg.a differ diff --git a/mac/libs/libpng.a b/mac/libs/libpng.a index 1c3a326e..51665a99 100644 Binary files a/mac/libs/libpng.a and b/mac/libs/libpng.a differ diff --git a/mac/libs/libspirv-cross-core.a b/mac/libs/libspirv-cross-core.a index 3bf2df6c..3ff4abeb 100644 Binary files a/mac/libs/libspirv-cross-core.a and b/mac/libs/libspirv-cross-core.a differ diff --git a/mac/libs/libspirv-cross-glsl.a b/mac/libs/libspirv-cross-glsl.a index a64bf6d2..4009c829 100644 Binary files a/mac/libs/libspirv-cross-glsl.a and b/mac/libs/libspirv-cross-glsl.a differ diff --git a/mac/libs/libspirv-cross-msl.a b/mac/libs/libspirv-cross-msl.a index c9fc1471..be74e81d 100644 Binary files a/mac/libs/libspirv-cross-msl.a and b/mac/libs/libspirv-cross-msl.a differ diff --git a/mac/libs/libssl.a b/mac/libs/libssl.a index feabaabe..5bfab03f 100644 Binary files a/mac/libs/libssl.a and b/mac/libs/libssl.a differ diff --git a/mac/libs/libuv_a.a b/mac/libs/libuv_a.a index 188290c1..c8cbea12 100644 Binary files a/mac/libs/libuv_a.a and b/mac/libs/libuv_a.a differ diff --git a/mac/libs/libv8_monolith.a b/mac/libs/libv8_monolith.a index 5d222f3f..a8f6a2d1 100644 Binary files a/mac/libs/libv8_monolith.a and b/mac/libs/libv8_monolith.a differ diff --git a/mac/libs/libwebp.a b/mac/libs/libwebp.a index 3acc3299..b4a16761 100644 Binary files a/mac/libs/libwebp.a and b/mac/libs/libwebp.a differ diff --git a/mac/libs/libwebsockets.a b/mac/libs/libwebsockets.a index 8a2174e3..f32680ba 100644 Binary files a/mac/libs/libwebsockets.a and b/mac/libs/libwebsockets.a differ diff --git a/mac/libs/libz.a b/mac/libs/libz.a index 0c6142b9..eeb62c6a 100644 Binary files a/mac/libs/libz.a and b/mac/libs/libz.a differ