diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9add2bc94..842cdcbcf 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
 
 #include <cinttypes>
 #include <cstring>
+#include <optional>
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@@ -16,6 +17,8 @@
 
 namespace Tegra::Engines {
 
+using VideoCore::QueryType;
+
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;
 
@@ -614,10 +617,11 @@ void Maxwell3D::ProcessQueryCondition() {
 void Maxwell3D::ProcessCounterReset() {
     switch (regs.counter_reset) {
     case Regs::CounterReset::SampleCnt:
-        rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed);
+        rasterizer.ResetCounter(QueryType::SamplesPassed);
         break;
     default:
-        UNIMPLEMENTED_MSG("counter_reset={}", static_cast<u32>(regs.counter_reset));
+        LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
+                    static_cast<int>(regs.counter_reset));
         break;
     }
 }
@@ -670,7 +674,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
         return 0;
     case Regs::QuerySelect::SamplesPassed:
         // Deferred.
-        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed);
+        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+                         system.GPU().GetTicks());
         return {};
     default:
         UNIMPLEMENTED_MSG("Unimplemented query select type {}",
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a394f2d3e..e9f1436f0 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
 
 #include <atomic>
 #include <functional>
+#include <optional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
@@ -50,7 +51,7 @@ public:
     virtual void ResetCounter(QueryType type) = 0;
 
     /// Records a GPU query and caches it
-    virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0;
+    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 8f0e8241d..74cb73209 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -2,8 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include <memory>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -22,6 +24,13 @@ using VideoCore::QueryType;
 
 namespace {
 
+constexpr std::uintptr_t PAGE_SIZE = 4096;
+constexpr int PAGE_SHIFT = 12;
+
+constexpr std::size_t SMALL_QUERY_SIZE = 8;  // Query size without timestamp
+constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
+constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
+
 constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
 
 constexpr GLenum GetTarget(QueryType type) {
@@ -37,23 +46,19 @@ CounterStream::~CounterStream() = default;
 
 void CounterStream::Update(bool enabled, bool any_command_queued) {
     if (enabled) {
-        if (!current) {
-            current = cache.GetHostCounter(last, type);
-        }
-        return;
+        Enable();
+    } else {
+        Disable(any_command_queued);
     }
-
-    if (current) {
-        EndQuery(any_command_queued);
-    }
-    last = std::exchange(current, nullptr);
 }
 
 void CounterStream::Reset(bool any_command_queued) {
     if (current) {
         EndQuery(any_command_queued);
+
+        // Immediately start a new query to avoid disabling its state.
+        current = cache.GetHostCounter(nullptr, type);
     }
-    current = nullptr;
     last = nullptr;
 }
 
@@ -67,6 +72,20 @@ std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued)
     return last;
 }
 
+void CounterStream::Enable() {
+    if (current) {
+        return;
+    }
+    current = cache.GetHostCounter(last, type);
+}
+
+void CounterStream::Disable(bool any_command_queued) {
+    if (current) {
+        EndQuery(any_command_queued);
+    }
+    last = std::exchange(current, nullptr);
+}
+
 void CounterStream::EndQuery(bool any_command_queued) {
     if (!any_command_queued) {
         // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
@@ -78,26 +97,57 @@ void CounterStream::EndQuery(bool any_command_queued) {
 }
 
 QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
-    : RasterizerCache{rasterizer}, system{system},
-      rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {}
+    : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
+                                                                     QueryType::SamplesPassed}}} {}
 
 QueryCache::~QueryCache() = default;
 
-void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) {
+void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
+    const u64 addr_begin = static_cast<u64>(addr);
+    const u64 addr_end = addr_begin + static_cast<u64>(size);
+    const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+        const u64 cache_begin = query.GetCacheAddr();
+        const u64 cache_end = cache_begin + query.GetSizeInBytes();
+        return cache_begin < addr_end && addr_begin < cache_end;
+    };
+
+    const u64 page_end = addr_end >> PAGE_SHIFT;
+    for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+        const auto& it = cached_queries.find(page);
+        if (it == std::end(cached_queries)) {
+            continue;
+        }
+        auto& contents = it->second;
+        for (auto& query : contents) {
+            if (!in_range(query)) {
+                continue;
+            }
+            rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
+            Flush(query);
+        }
+        contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+                       std::end(contents));
+    }
+}
+
+void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
+    // We can handle flushes in the same way as invalidations.
+    InvalidateRegion(addr, size);
+}
+
+void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
     auto& memory_manager = system.GPU().MemoryManager();
     const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 
-    auto query = TryGet(host_ptr);
+    CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
     if (!query) {
         const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
         ASSERT_OR_EXECUTE(cpu_addr, return;);
 
-        query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr);
-        Register(query);
+        query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
     }
 
-    query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()));
-    query->MarkAsModified(true, *this);
+    query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
 }
 
 void QueryCache::UpdateCounters() {
@@ -117,34 +167,54 @@ void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
 
 std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
                                                         QueryType type) {
-    const auto type_index = static_cast<std::size_t>(type);
-    auto& reserve = reserved_queries[type_index];
-
+    auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
+    OGLQuery query;
     if (reserve.empty()) {
-        return std::make_shared<HostCounter>(*this, std::move(dependency), type);
+        query.Create(GetTarget(type));
+    } else {
+        query = std::move(reserve.back());
+        reserve.pop_back();
     }
 
-    auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type,
-                                                 std::move(reserve.back()));
-    reserve.pop_back();
-    return counter;
+    return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
 }
 
-void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) {
-    auto& counter = *counter_;
-    auto& stream = GetStream(counter.GetType());
+CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
+    const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
+    auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
+    rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
+    return stored_ref;
+}
+
+CachedQuery* QueryCache::TryGet(CacheAddr addr) {
+    const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+    const auto it = cached_queries.find(page);
+    if (it == std::end(cached_queries)) {
+        return nullptr;
+    }
+    auto& contents = it->second;
+    const auto found =
+        std::find_if(std::begin(contents), std::end(contents),
+                     [addr](const auto& query) { return query.GetCacheAddr() == addr; });
+    return found != std::end(contents) ? &*found : nullptr;
+}
+
+void QueryCache::Flush(CachedQuery& cached_query) {
+    auto& stream = GetStream(cached_query.GetType());
 
     // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
     // To avoid this disable and re-enable keeping the dependency stream.
-    const bool is_enabled = stream.IsEnabled();
-    if (is_enabled) {
-        stream.Update(false, false);
+    // But we only have to do this if we have pending waits to be done.
+    const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
+    const bool any_command_queued = rasterizer.AnyCommandQueued();
+    if (slice_counter) {
+        stream.Update(false, any_command_queued);
     }
 
-    counter.Flush();
+    cached_query.Flush();
 
-    if (is_enabled) {
-        stream.Update(true, false);
+    if (slice_counter) {
+        stream.Update(true, any_command_queued);
     }
 }
 
@@ -152,13 +222,6 @@ CounterStream& QueryCache::GetStream(QueryType type) {
     return streams[static_cast<std::size_t>(type)];
 }
 
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type)
-    : cache{cache}, type{type}, dependency{std::move(dependency)} {
-    const GLenum target = GetTarget(type);
-    query.Create(target);
-    glBeginQuery(target, query.handle);
-}
-
 HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
                          OGLQuery&& query_)
     : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
@@ -170,35 +233,80 @@ HostCounter::~HostCounter() {
 }
 
 u64 HostCounter::Query() {
-    if (query.handle == 0) {
-        return result;
+    if (result) {
+        return *result;
     }
 
-    glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result);
-
+    u64 value;
+    glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
     if (dependency) {
-        result += dependency->Query();
+        value += dependency->Query();
     }
 
-    return result;
+    return *(result = value);
+}
+
+bool HostCounter::WaitPending() const noexcept {
+    return result.has_value();
 }
 
 CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+    : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+
+CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
+    : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
+      counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
 
 CachedQuery::~CachedQuery() = default;
 
+CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+    type = rhs.type;
+    cpu_addr = rhs.cpu_addr;
+    host_ptr = rhs.host_ptr;
+    counter = std::move(rhs.counter);
+    timestamp = rhs.timestamp;
+    return *this;
+}
+
 void CachedQuery::Flush() {
-    const u64 value = counter->Query();
-    std::memcpy(host_ptr, &value, sizeof(value));
+    // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
+    // in these cases.
+    const u64 value = counter ? counter->Query() : 0;
+    std::memcpy(host_ptr, &value, sizeof(u64));
+
+    if (timestamp) {
+        std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+    }
 }
 
-void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) {
+void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+    if (counter) {
+        // If there's an old counter set it means the query is being rewritten by the game.
+        // To avoid losing the data forever, flush here.
+        Flush();
+    }
     counter = std::move(counter_);
+    timestamp = timestamp_;
 }
 
-QueryType CachedQuery::GetType() const {
+bool CachedQuery::WaitPending() const noexcept {
+    return counter && counter->WaitPending();
+}
+
+QueryType CachedQuery::GetType() const noexcept {
     return type;
 }
 
+VAddr CachedQuery::GetCpuAddr() const noexcept {
+    return cpu_addr;
+}
+
+CacheAddr CachedQuery::GetCacheAddr() const noexcept {
+    return ToCacheAddr(host_ptr);
+}
+
+u64 CachedQuery::GetSizeInBytes() const noexcept {
+    return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 91594b120..d9f22b44d 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -7,12 +7,12 @@
 #include <array>
 #include <memory>
 #include <optional>
+#include <unordered_map>
 #include <vector>
 
 #include <glad/glad.h>
 
 #include "common/common_types.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
@@ -43,6 +43,10 @@ public:
     }
 
 private:
+    void Enable();
+
+    void Disable(bool any_command_queued);
+
     void EndQuery(bool any_command_queued);
 
     QueryCache& cache;
@@ -53,12 +57,16 @@ private:
     GLenum target;
 };
 
-class QueryCache final : public RasterizerCache<std::shared_ptr<CachedQuery>> {
+class QueryCache final {
 public:
     explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
     ~QueryCache();
 
-    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type);
+    void InvalidateRegion(CacheAddr addr, std::size_t size);
+
+    void FlushRegion(CacheAddr addr, std::size_t size);
+
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
 
     void UpdateCounters();
 
@@ -69,23 +77,26 @@ public:
     std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
                                                 VideoCore::QueryType type);
 
-protected:
-    void FlushObjectInner(const std::shared_ptr<CachedQuery>& counter) override;
-
 private:
+    CachedQuery& Register(CachedQuery&& cached_query);
+
+    CachedQuery* TryGet(CacheAddr addr);
+
+    void Flush(CachedQuery& cached_query);
+
     CounterStream& GetStream(VideoCore::QueryType type);
 
     Core::System& system;
     RasterizerOpenGL& rasterizer;
 
+    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
+
     std::array<CounterStream, VideoCore::NumQueryTypes> streams;
     std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
 };
 
 class HostCounter final {
 public:
-    explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
-                         VideoCore::QueryType type);
     explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
                          VideoCore::QueryType type, OGLQuery&& query);
     ~HostCounter();
@@ -93,42 +104,54 @@ public:
     /// Returns the current value of the query.
     u64 Query();
 
+    /// Returns true when querying this counter will potentially wait for OpenGL.
+    bool WaitPending() const noexcept;
+
 private:
     QueryCache& cache;
     VideoCore::QueryType type;
 
     std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
     OGLQuery query;                          ///< OpenGL query.
-    u64 result;                              ///< Added values of the counter.
+    std::optional<u64> result;               ///< Added values of the counter.
 };
 
-class CachedQuery final : public RasterizerCacheObject {
+class CachedQuery final {
 public:
     explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
+    CachedQuery(CachedQuery&&) noexcept;
+    CachedQuery(const CachedQuery&) = delete;
     ~CachedQuery();
 
+    CachedQuery& operator=(CachedQuery&&) noexcept;
+
     /// Writes the counter value to host memory.
     void Flush();
 
     /// Updates the counter this cached query registered in guest memory will write when requested.
-    void SetCounter(std::shared_ptr<HostCounter> counter);
+    void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
+
+    /// Returns true when a flushing this query will potentially wait for OpenGL.
+    bool WaitPending() const noexcept;
 
     /// Returns the query type.
-    VideoCore::QueryType GetType() const;
+    VideoCore::QueryType GetType() const noexcept;
 
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
+    /// Returns the guest CPU address for this query.
+    VAddr GetCpuAddr() const noexcept;
 
-    std::size_t GetSizeInBytes() const override {
-        return sizeof(u64);
-    }
+    /// Returns the cache address for this query.
+    CacheAddr GetCacheAddr() const noexcept;
+
+    /// Returns the number of cached bytes.
+    u64 GetSizeInBytes() const noexcept;
 
 private:
-    VideoCore::QueryType type;
+    VideoCore::QueryType type;            ///< Abstracted query type (e.g. samples passed).
     VAddr cpu_addr;                       ///< Guest CPU address.
     u8* host_ptr;                         ///< Writable host pointer.
     std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
+    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 827f85884..4bdc8db85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -722,8 +722,9 @@ void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
     query_cache.ResetCounter(type);
 }
 
-void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) {
-    query_cache.Query(gpu_addr, type);
+void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+                             std::optional<u64> timestamp) {
+    query_cache.Query(gpu_addr, type, timestamp);
 }
 
 void RasterizerOpenGL::FlushAll() {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4fb6811a7..c772fd4ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -63,7 +63,7 @@ public:
     void Clear() override;
     void DispatchCompute(GPUVAddr code_addr) override;
     void ResetCounter(VideoCore::QueryType type) override;
-    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override;
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
     void FlushRegion(CacheAddr addr, u64 size) override;
     void InvalidateRegion(CacheAddr addr, u64 size) override;