From 2036504a8279014cd54557c35907f8c0808ae189 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 27 Sep 2019 17:54:58 -0400
Subject: [PATCH 1/2] TextureCache: Add the ability to deduce if two textures
 are depth on blit.

---
 src/video_core/texture_cache/texture_cache.h | 144 ++++++++++++++++++-
 1 file changed, 142 insertions(+), 2 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 877c6635d..1a2b90e0c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -224,8 +224,13 @@ public:
                      const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
                      const Tegra::Engines::Fermi2D::Config& copy_config) {
         std::lock_guard lock{mutex};
-        std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
-        std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
+        SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
+        SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
+        const GPUVAddr src_gpu_addr = src_config.Address();
+        const GPUVAddr dst_gpu_addr = dst_config.Address();
+        DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
+        std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
+        std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
         ImageBlit(src_surface.second, dst_surface.second, copy_config);
         dst_surface.first->MarkAsModified(true, Tick());
     }
@@ -357,6 +362,29 @@ private:
         BufferCopy = 3,
     };
 
+    enum class DeductionType : u32 {
+        DeductionComplete,
+        DeductionIncomplete,
+        DeductionFailed,
+    };
+
+    struct Deduction {
+        DeductionType type{DeductionType::DeductionFailed};
+        TSurface surface{};
+
+        bool Failed() const {
+            return type == DeductionType::DeductionFailed;
+        }
+
+        bool Incomplete() const {
+            return type == DeductionType::DeductionIncomplete;
+        }
+
+        bool IsDepth() const {
+            return surface->GetSurfaceParams().IsPixelFormatZeta();
+        }
+    };
+
     /**
      * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
      * @param overlaps, the overlapping surfaces registered in the cache.
@@ -691,6 +719,118 @@ private:
                               MatchTopologyResult::FullMatch);
     }
 
+    /**
+     * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries
+     * to find a matching surface within the cache that's similar to it. If there are many textures
+     * or the texture found if entirely incompatible, it will fail. If no texture is found, the
+     * blit will be unsuccessful.
+     * @param gpu_addr, the starting address of the candidate surface.
+     * @param params, the paremeters on the candidate surface.
+     **/
+    Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
+        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+
+        if (!cache_addr) {
+            Deduction result{};
+            result.type = DeductionType::DeductionFailed;
+            return result;
+        }
+
+        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+            TSurface& current_surface = iter->second;
+            const auto topological_result = current_surface->MatchesTopology(params);
+            if (topological_result != MatchTopologyResult::FullMatch) {
+                Deduction result{};
+                result.type = DeductionType::DeductionFailed;
+                return result;
+            }
+            const auto struct_result = current_surface->MatchesStructure(params);
+            if (struct_result != MatchStructureResult::None &&
+                current_surface->MatchTarget(params.target)) {
+                Deduction result{};
+                result.type = DeductionType::DeductionComplete;
+                result.surface = current_surface;
+                return result;
+            }
+        }
+
+        const std::size_t candidate_size = params.GetGuestSizeInBytes();
+        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+
+        if (overlaps.empty()) {
+            Deduction result{};
+            result.type = DeductionType::DeductionIncomplete;
+            return result;
+        }
+
+        if (overlaps.size() > 1) {
+            Deduction result{};
+            result.type = DeductionType::DeductionFailed;
+            return result;
+        } else {
+            Deduction result{};
+            result.type = DeductionType::DeductionComplete;
+            result.surface = overlaps[0];
+            return result;
+        }
+    }
+
+    /**
+     * `DeduceBestBlit` gets the a source and destination starting address and parameters,
+     * and tries to deduce if they are supposed to be depth textures. If so, their
+     * parameters are modified and fixed into so.
+     * @param gpu_addr, the starting address of the candidate surface.
+     * @param params, the parameters on the candidate surface.
+     **/
+    void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
+                        const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
+        auto deduc_src = DeduceSurface(src_gpu_addr, src_params);
+        auto deduc_dst = DeduceSurface(src_gpu_addr, src_params);
+        if (deduc_src.Failed() || deduc_dst.Failed()) {
+            return;
+        }
+
+        const bool incomplete_src = deduc_src.Incomplete();
+        const bool incomplete_dst = deduc_dst.Incomplete();
+
+        if (incomplete_src && incomplete_dst) {
+            return;
+        }
+
+        const bool any_incomplete = incomplete_src || incomplete_dst;
+
+        if (!any_incomplete && !(deduc_src.IsDepth() && deduc_dst.IsDepth())) {
+            return;
+        }
+
+        if (incomplete_src && !(deduc_dst.IsDepth())) {
+            return;
+        }
+
+        if (incomplete_dst && !(deduc_src.IsDepth())) {
+            return;
+        }
+
+        const auto inherit_format = ([](SurfaceParams& to, TSurface from) {
+            const SurfaceParams& params = from->GetSurfaceParams();
+            to.pixel_format = params.pixel_format;
+            to.component_type = params.component_type;
+            to.type = params.type;
+        });
+        // Now we got the cases where one or both is Depth and the other is not known
+        if (!incomplete_src) {
+            inherit_format(src_params, deduc_src.surface);
+        } else {
+            inherit_format(src_params, deduc_dst.surface);
+        }
+        if (!incomplete_dst) {
+            inherit_format(dst_params, deduc_dst.surface);
+        } else {
+            inherit_format(dst_params, deduc_src.surface);
+        }
+    }
+
     std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
                                                  bool preserve_contents) {
         auto new_surface{GetUncachedSurface(gpu_addr, params)};

From ab47a660c8abfbfe3f4f4934a76202efd95acf95 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 4 Oct 2019 18:07:14 -0400
Subject: [PATCH 2/2] Texture_Cache: Blit Deduction corrections and
 simplifications.

---
 src/video_core/texture_cache/texture_cache.h | 38 ++++++++++----------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1a2b90e0c..ca2da8f97 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -785,14 +785,14 @@ private:
      **/
     void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
                         const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
-        auto deduc_src = DeduceSurface(src_gpu_addr, src_params);
-        auto deduc_dst = DeduceSurface(src_gpu_addr, src_params);
-        if (deduc_src.Failed() || deduc_dst.Failed()) {
+        auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
+        auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+        if (deduced_src.Failed() || deduced_dst.Failed()) {
             return;
         }
 
-        const bool incomplete_src = deduc_src.Incomplete();
-        const bool incomplete_dst = deduc_dst.Incomplete();
+        const bool incomplete_src = deduced_src.Incomplete();
+        const bool incomplete_dst = deduced_dst.Incomplete();
 
         if (incomplete_src && incomplete_dst) {
             return;
@@ -800,16 +800,18 @@ private:
 
         const bool any_incomplete = incomplete_src || incomplete_dst;
 
-        if (!any_incomplete && !(deduc_src.IsDepth() && deduc_dst.IsDepth())) {
-            return;
-        }
+        if (!any_incomplete) {
+            if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
+                return;
+            }
+        } else {
+            if (incomplete_src && !(deduced_dst.IsDepth())) {
+                return;
+            }
 
-        if (incomplete_src && !(deduc_dst.IsDepth())) {
-            return;
-        }
-
-        if (incomplete_dst && !(deduc_src.IsDepth())) {
-            return;
+            if (incomplete_dst && !(deduced_src.IsDepth())) {
+                return;
+            }
         }
 
         const auto inherit_format = ([](SurfaceParams& to, TSurface from) {
@@ -820,14 +822,14 @@ private:
         });
         // Now we got the cases where one or both is Depth and the other is not known
         if (!incomplete_src) {
-            inherit_format(src_params, deduc_src.surface);
+            inherit_format(src_params, deduced_src.surface);
         } else {
-            inherit_format(src_params, deduc_dst.surface);
+            inherit_format(src_params, deduced_dst.surface);
         }
         if (!incomplete_dst) {
-            inherit_format(dst_params, deduc_dst.surface);
+            inherit_format(dst_params, deduced_dst.surface);
         } else {
-            inherit_format(dst_params, deduc_src.surface);
+            inherit_format(dst_params, deduced_src.surface);
         }
     }