From 640fc1418b08449f2ba729588aef5e06bc5df636 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Fri, 24 Dec 2021 17:53:36 -0500
Subject: [PATCH 1/5] emit_glsl_floating_point: Fix FPNeg on newer Nvidia
 drivers

---
 .../backend/glsl/emit_glsl_floating_point.cpp                 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
index b765a251b..474189d87 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -125,11 +125,11 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i
 }
 
 void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddF32("{}=-({});", inst, value);
+    ctx.AddF32("{}=0.f-({});", inst, value);
 }
 
 void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddF64("{}=-({});", inst, value);
+    ctx.AddF64("{}=double(0.)-({});", inst, value);
 }
 
 void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {

From 14ac0c2923c41df9c6fc4833d2a8e46a6efe5b59 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Fri, 24 Dec 2021 20:00:28 -0500
Subject: [PATCH 2/5] shader: Add integer attribute get optimization pass

Works around an nvidia driver bug, where casting the integer attributes to float and back to an integer always returned 0.
---
 .../glasm/emit_glasm_context_get_set.cpp      | 16 ++++++++++++
 .../backend/glasm/emit_glasm_instructions.h   |  1 +
 .../glsl/emit_glsl_context_get_set.cpp        | 16 ++++++++++++
 .../backend/glsl/emit_glsl_instructions.h     |  2 ++
 .../spirv/emit_spirv_context_get_set.cpp      | 25 +++++++++++++++++++
 .../backend/spirv/emit_spirv_instructions.h   |  1 +
 src/shader_recompiler/frontend/ir/opcodes.inc |  1 +
 .../ir_opt/collect_shader_info_pass.cpp       |  1 +
 .../ir_opt/constant_propagation_pass.cpp      | 23 +++++++++++++++++
 9 files changed, 86 insertions(+)

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
index 081b2c8e0..c0f5fc402 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
     }
 }
 
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.Add("MOV.S {}.x,primitive.id;", inst);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+        break;
+    default:
+        throw NotImplementedException("Get U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
                       [[maybe_unused]] ScalarU32 vertex) {
     const u32 element{static_cast<u32>(attr) % 4};
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 1f343bff5..b48007856 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
 void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
 void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
 void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
 void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 6477bd192..5ef46d634 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -221,6 +221,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
     }
 }
 
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.AddU32("{}=uint(gl_PrimitiveID);", inst);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.AddU32("{}=uint(gl_InstanceID);", inst);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.AddU32("{}=uint(gl_VertexID);", inst);
+        break;
+    default:
+        throw NotImplementedException("Get U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
                       [[maybe_unused]] std::string_view vertex) {
     if (IR::IsGeneric(attr)) {
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index f86502e4c..6cabbc717 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
                       const IR::Value& offset);
 void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
                       std::string_view vertex);
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+                         std::string_view vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
                       std::string_view vertex);
 void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 14f470812..8ea730c80 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
     }
 }
 
+Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
+    case IR::Attribute::InstanceId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
+            return ctx.OpISub(ctx.U32[1], index, base);
+        }
+    case IR::Attribute::VertexId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
+            return ctx.OpISub(ctx.U32[1], index, base);
+        }
+    default:
+        throw NotImplementedException("Read U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
     const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
     if (!output) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 6cd22dd3e..887112deb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
 Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
+Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
 Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
 void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 6929919df..b94ce7406 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -40,6 +40,7 @@ OPCODE(GetCbufU32,                                          U32,            U32,
 OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )
 OPCODE(GetCbufU32x2,                                        U32x2,          U32,            U32,                                                            )
 OPCODE(GetAttribute,                                        F32,            Attribute,      U32,                                                            )
+OPCODE(GetAttributeU32,                                     U32,            Attribute,      U32,                                                            )
 OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            )
 OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            )
 OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            )
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 1e476d83d..a78c469be 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         info.uses_demote_to_helper_invocation = true;
         break;
     case IR::Opcode::GetAttribute:
+    case IR::Opcode::GetAttributeU32:
         info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
         break;
     case IR::Opcode::SetAttribute:
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index d089fdd12..c134a12bc 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
             return;
         }
     }
+    if constexpr (op == IR::Opcode::BitCastU32F32) {
+        // Workaround for new NVIDIA driver bug, where:
+        // uint attr = ftou(itof(gl_InstanceID));
+        // always returned 0.
+        // We can instead manually optimize this and work around the driver bug:
+        // uint attr = uint(gl_InstanceID);
+        if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) {
+            const IR::Attribute attr{arg_inst->Arg(0).Attribute()};
+            switch (attr) {
+            case IR::Attribute::PrimitiveId:
+            case IR::Attribute::InstanceId:
+            case IR::Attribute::VertexId:
+                break;
+            default:
+                return;
+            }
+            // Replace the bitcasts with an integer attribute get
+            inst.ReplaceOpcode(IR::Opcode::GetAttributeU32);
+            inst.SetArg(0, arg_inst->Arg(0));
+            inst.SetArg(1, arg_inst->Arg(1));
+            return;
+        }
+    }
 }
 
 void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {

From 9f34be5a6176674c9f4be0e2636cf1c01c067e69 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sat, 25 Dec 2021 16:59:55 -0500
Subject: [PATCH 3/5] emit_glsl_integer: Use negation work around

---
 src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
index 44060df33..b0d85be99 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -87,11 +87,11 @@ void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
 }
 
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddU32("{}=uint(-({}));", inst, value);
+    ctx.AddU32("{}=uint(int(0)-int({}));", inst, value);
 }
 
 void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddU64("{}=-({});", inst, value);
+    ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value);
 }
 
 void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {

From b84d429c2ec59e54a89d9d4e34b0df9f22172e8f Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Mon, 27 Dec 2021 23:59:32 -0500
Subject: [PATCH 4/5] glsl_context_get_set: Add alternative cbuf type for
 broken drivers

some drivers have a bug bitwise converting floating point cbuf values to uint variables. This adds a workaround for these drivers to make all cbufs uint and convert to floating point as needed.
---
 .../glsl/emit_glsl_context_get_set.cpp        | 35 +++++++++++--------
 .../backend/glsl/glsl_emit_context.cpp        |  7 ++--
 src/shader_recompiler/profile.h               |  2 ++
 src/video_core/renderer_opengl/gl_device.cpp  |  9 ++---
 src/video_core/renderer_opengl/gl_device.h    |  5 +++
 .../renderer_opengl/gl_shader_cache.cpp       |  1 +
 6 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 5ef46d634..0c1fbc7b1 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
 
 void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
-    GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
-    GetCbuf(ctx, ret, binding, offset, 32);
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                       const IR::Value& offset) {
     const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
     if (offset.IsImmediate()) {
         static constexpr u32 cbuf_size{0x10000};
         const u32 u32_offset{offset.U32()};
@@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
             return;
         }
         if (u32_offset % 2 == 0) {
-            ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+            ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
                          OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
         } else {
-            ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
-                         OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
-                         OffsetSwizzle(u32_offset + 4));
+            ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
+                         u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
+                         (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
         }
         return;
     }
     const auto offset_var{ctx.var_alloc.Consume(offset)};
     if (!ctx.profile.has_gl_component_indexing_bug) {
-        ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
-                     inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+        ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
+                     cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
         return;
     }
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
     const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
     for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
-        ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
-                swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+        ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
+                swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
                 "xyzw"[(swizzle + 1) % 4]);
     }
 }
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index bc9d2a904..bb7f1a0fd 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
         return;
     }
     for (const auto& desc : info.constant_buffer_descriptors) {
-        header += fmt::format(
-            "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
-            bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+        const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"};
+        header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};",
+                              bindings.uniform_buffer, stage_name, desc.index, cbuf_type,
+                              stage_name, desc.index, 4 * 1024);
         bindings.uniform_buffer += desc.count;
     }
 }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0c3b3b17..9deb3f4bb 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -65,6 +65,8 @@ struct Profile {
     bool has_gl_component_indexing_bug{};
     /// The precise type qualifier is broken in the fragment stage of some drivers
     bool has_gl_precise_bug{};
+    /// Some drivers do not properly support floatBitsToUint when used on cbufs
+    bool has_gl_cbuf_ftou_bug{};
     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
     bool ignore_nan_fp_comparisons{};
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e0..32736126f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,12 @@ Device::Device() {
         shader_backend = Settings::ShaderBackend::GLSL;
     }
 
-    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
-        !Settings::values.renderer_debug) {
+    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
         const std::string_view driver_version = version.substr(13);
         const int version_major =
             std::atoi(driver_version.substr(0, driver_version.find(".")).data());
-
         if (version_major >= 495) {
-            LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
-                                       "with yuzu. Forcing GLASM as a mitigation.");
-            shader_backend = Settings::ShaderBackend::GLASM;
-            use_assembly_shaders = true;
+            has_cbuf_ftou_bug = true;
         }
     }
 
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659..fe53ef991 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,10 @@ public:
         return need_fastmath_off;
     }
 
+    bool HasCbufFtouBug() const {
+        return has_cbuf_ftou_bug;
+    }
+
     Settings::ShaderBackend GetShaderBackend() const {
         return shader_backend;
     }
@@ -200,6 +204,7 @@ private:
     bool has_sparse_texture_2{};
     bool warp_size_potentially_larger_than_guest{};
     bool need_fastmath_off{};
+    bool has_cbuf_ftou_bug{};
 
     std::string vendor_name;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 29c6e1a5f..1efcc3562 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
           .has_broken_fp16_float_controls = false,
           .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
           .has_gl_precise_bug = device.HasPreciseBug(),
+          .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
           .ignore_nan_fp_comparisons = true,
           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
       },

From 8c907c620d830bdaef30ff4316489443775b3ea4 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 29 Dec 2021 18:55:42 -0500
Subject: [PATCH 5/5] glsl: Add boolean reference workaround

---
 .../backend/glsl/emit_glsl_bitwise_conversion.cpp            | 4 +++-
 src/shader_recompiler/backend/glsl/emit_glsl_special.cpp     | 4 +++-
 src/shader_recompiler/profile.h                              | 2 ++
 src/video_core/renderer_opengl/gl_device.cpp                 | 1 +
 src/video_core/renderer_opengl/gl_device.h                   | 5 +++++
 src/video_core/renderer_opengl/gl_shader_cache.cpp           | 1 +
 6 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
index 0f2668d9e..e0ead7a53 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -7,6 +7,7 @@
 #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
 #include "shader_recompiler/backend/glsl/glsl_emit_context.h"
 #include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
 
 namespace Shader::Backend::GLSL {
 namespace {
@@ -30,8 +31,9 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value)
     inst.DestructiveAddUsage(1);
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
     const auto input{ctx.var_alloc.Consume(value)};
+    const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""};
     if (ret != input) {
-        ctx.Add("{}={};", ret, input);
+        ctx.Add("{}={}{};", ret, input, suffix);
     }
 }
 
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
index b8ddafe48..fcf620b79 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -90,7 +90,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
     if (phi_reg == val_reg) {
         return;
     }
-    ctx.Add("{}={};", phi_reg, val_reg);
+    const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1};
+    const auto suffix{needs_workaround ? "?true:false" : ""};
+    ctx.Add("{}={}{};", phi_reg, val_reg, suffix);
 }
 
 void EmitPrologue(EmitContext& ctx) {
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 9deb3f4bb..dc4c806ff 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -67,6 +67,8 @@ struct Profile {
     bool has_gl_precise_bug{};
     /// Some drivers do not properly support floatBitsToUint when used on cbufs
     bool has_gl_cbuf_ftou_bug{};
+    /// Some drivers poorly optimize boolean variable references
+    bool has_gl_bool_ref_bug{};
     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
     bool ignore_nan_fp_comparisons{};
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 32736126f..e62912a22 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -188,6 +188,7 @@ Device::Device() {
             std::atoi(driver_version.substr(0, driver_version.find(".")).data());
         if (version_major >= 495) {
             has_cbuf_ftou_bug = true;
+            has_bool_ref_bug = true;
         }
     }
 
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index fe53ef991..95c2e8d38 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
         return has_cbuf_ftou_bug;
     }
 
+    bool HasBoolRefBug() const {
+        return has_bool_ref_bug;
+    }
+
     Settings::ShaderBackend GetShaderBackend() const {
         return shader_backend;
     }
@@ -205,6 +209,7 @@ private:
     bool warp_size_potentially_larger_than_guest{};
     bool need_fastmath_off{};
     bool has_cbuf_ftou_bug{};
+    bool has_bool_ref_bug{};
 
     std::string vendor_name;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1efcc3562..ec558a9af 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -215,6 +215,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
           .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
           .has_gl_precise_bug = device.HasPreciseBug(),
           .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
+          .has_gl_bool_ref_bug = device.HasBoolRefBug(),
           .ignore_nan_fp_comparisons = true,
           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
       },