From daf2504d310558e0a74e6eb321ab0811ba6e5e55 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Tue, 24 Jul 2018 20:41:40 -0500
Subject: [PATCH] GPU: Implemented the Z32_S8_X24 depth buffer format.

---
 src/video_core/gpu.h                                   | 1 +
 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 5 ++++-
 src/video_core/renderer_opengl/gl_rasterizer_cache.h   | 8 ++++++++
 src/video_core/textures/decoders.cpp                   | 3 +++
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 58501ca8b..a0e5e3365 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -33,6 +33,7 @@ enum class DepthFormat : u32 {
     Z24_X8_UNORM = 0x15,
     Z24_S8_UNORM = 0x16,
     Z24_C8_UNORM = 0x18,
+    Z32_S8_X24_FLOAT = 0x19,
 };
 
 /// Returns the number of bytes per pixel of each rendertarget format.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 0f5006383..91ce0357b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -122,6 +122,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
     {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
      false}, // Z16
+    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
+     ComponentType::Float, false}, // Z32FS8
 }};
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -209,7 +211,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::R32F>,         MortonCopy<true, PixelFormat::R16F>,
         MortonCopy<true, PixelFormat::R16UNORM>,     MortonCopy<true, PixelFormat::Z24S8>,
         MortonCopy<true, PixelFormat::S8Z24>,        MortonCopy<true, PixelFormat::Z32F>,
-        MortonCopy<true, PixelFormat::Z16>,
+        MortonCopy<true, PixelFormat::Z16>,          MortonCopy<true, PixelFormat::Z32FS8>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -241,6 +243,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<false, PixelFormat::S8Z24>,
         MortonCopy<false, PixelFormat::Z32F>,
         MortonCopy<false, PixelFormat::Z16>,
+        MortonCopy<false, PixelFormat::Z32FS8>,
 };
 
 // Allocate an uninitialized texture of appropriate size and format for the surface
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e1d3670d9..7785b7df4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -51,6 +51,7 @@ struct SurfaceParams {
         S8Z24 = 22,
         Z32F = 23,
         Z16 = 24,
+        Z32FS8 = 25,
 
         MaxDepthStencilFormat,
 
@@ -113,6 +114,7 @@ struct SurfaceParams {
             1, // S8Z24
             1, // Z32F
             1, // Z16
+            1, // Z32FS8
         }};
 
         ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -149,6 +151,7 @@ struct SurfaceParams {
             32,  // S8Z24
             32,  // Z32F
             16,  // Z16
+            64,  // Z32FS8
         }};
 
         ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -169,6 +172,8 @@ struct SurfaceParams {
             return PixelFormat::Z32F;
         case Tegra::DepthFormat::Z16_UNORM:
             return PixelFormat::Z16;
+        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
+            return PixelFormat::Z32FS8;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -325,6 +330,8 @@ struct SurfaceParams {
             return Tegra::DepthFormat::Z32_FLOAT;
         case PixelFormat::Z16:
             return Tegra::DepthFormat::Z16_UNORM;
+        case PixelFormat::Z32FS8:
+            return Tegra::DepthFormat::Z32_S8_X24_FLOAT;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -382,6 +389,7 @@ struct SurfaceParams {
         case Tegra::DepthFormat::Z24_S8_UNORM:
             return ComponentType::UNorm;
         case Tegra::DepthFormat::Z32_FLOAT:
+        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
             return ComponentType::Float;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 970c06e71..50c5a56f6 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -90,6 +90,8 @@ static u32 DepthBytesPerPixel(DepthFormat format) {
     case DepthFormat::Z24_S8_UNORM:
     case DepthFormat::Z32_FLOAT:
         return 4;
+    case DepthFormat::Z32_S8_X24_FLOAT:
+        return 8;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
         break;
@@ -150,6 +152,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
     case DepthFormat::S8_Z24_UNORM:
     case DepthFormat::Z24_S8_UNORM:
     case DepthFormat::Z32_FLOAT:
+    case DepthFormat::Z32_S8_X24_FLOAT:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;