From b167d1f56a70d806960e2473a0d73989df1c5e1e Mon Sep 17 00:00:00 2001 From: Xaver Hugl Date: Mon, 6 May 2024 21:59:18 +0200 Subject: [PATCH] core/renderbackend: improve handling of multiple render time queries With multiple OpenGL render time queries, you can't just add the render times together, because the GPUs execute commands asynchronously and you may get a far too big result this way. Instead, this converts all timestamps to steady_clock and correctly calculates the actual total time spent rendering with the first and last timestamp. --- src/core/renderbackend.cpp | 40 +++++++++++++++++++++++++++----- src/core/renderbackend.h | 14 +++++++++-- src/core/renderloop.cpp | 6 +++-- src/core/renderloop_p.h | 2 +- src/opengl/glrendertimequery.cpp | 27 ++++++++++++--------- src/opengl/glrendertimequery.h | 10 ++++---- 6 files changed, 72 insertions(+), 27 deletions(-) diff --git a/src/core/renderbackend.cpp b/src/core/renderbackend.cpp index a34f9be11c..f4c53ce391 100644 --- a/src/core/renderbackend.cpp +++ b/src/core/renderbackend.cpp @@ -15,6 +15,14 @@ namespace KWin { +RenderTimeSpan RenderTimeSpan::operator|(const RenderTimeSpan &other) const +{ + return RenderTimeSpan{ + .start = std::min(start, other.start), + .end = std::max(end, other.end), + }; +} + CpuRenderTimeQuery::CpuRenderTimeQuery() : m_start(std::chrono::steady_clock::now()) { @@ -25,10 +33,13 @@ void CpuRenderTimeQuery::end() m_end = std::chrono::steady_clock::now(); } -std::chrono::nanoseconds CpuRenderTimeQuery::query() +std::optional CpuRenderTimeQuery::query() { Q_ASSERT(m_end); - return *m_end - m_start; + return RenderTimeSpan{ + .start = m_start, + .end = *m_end, + }; } OutputFrame::OutputFrame(RenderLoop *loop) @@ -43,12 +54,29 @@ void OutputFrame::addFeedback(std::unique_ptr &&feedback) m_feedbacks.push_back(std::move(feedback)); } +std::optional OutputFrame::queryRenderTime() const +{ + if (m_renderTimeQueries.empty()) { + return std::chrono::nanoseconds::zero(); + } + const auto first = m_renderTimeQueries.front()->query(); + if (!first) { + return std::nullopt; + } + RenderTimeSpan ret = *first; + for (const auto &query : m_renderTimeQueries | std::views::drop(1)) { + const auto opt = query->query(); + if (!opt) { + return std::nullopt; + } + ret = ret | *opt; + } + return ret.end - ret.start; +} + void OutputFrame::presented(std::chrono::nanoseconds refreshDuration, std::chrono::nanoseconds timestamp, PresentationMode mode) { - const auto view = m_renderTimeQueries | std::views::transform([](const auto &query) { - return query->query(); - }); - const auto renderTime = std::accumulate(view.begin(), view.end(), std::chrono::nanoseconds::zero()); + std::optional renderTime = queryRenderTime(); RenderLoopPrivate::get(m_loop)->notifyFrameCompleted(timestamp, renderTime, mode); for (const auto &feedback : m_feedbacks) { feedback->presented(refreshDuration, timestamp, mode); diff --git a/src/core/renderbackend.h b/src/core/renderbackend.h index 700f516fb8..4bd17d5cc2 100644 --- a/src/core/renderbackend.h +++ b/src/core/renderbackend.h @@ -40,11 +40,19 @@ public: virtual void presented(std::chrono::nanoseconds refreshCycleDuration, std::chrono::nanoseconds timestamp, PresentationMode mode) = 0; }; +struct RenderTimeSpan +{ + std::chrono::steady_clock::time_point start; + std::chrono::steady_clock::time_point end; + + RenderTimeSpan operator|(const RenderTimeSpan &other) const; +}; + class KWIN_EXPORT RenderTimeQuery { public: virtual ~RenderTimeQuery() = default; - virtual std::chrono::nanoseconds query() = 0; + virtual std::optional query() = 0; }; class KWIN_EXPORT CpuRenderTimeQuery : public RenderTimeQuery @@ -57,7 +65,7 @@ public: void end(); - std::chrono::nanoseconds query() override; + std::optional query() override; private: const std::chrono::steady_clock::time_point m_start; @@ -86,6 +94,8 @@ public: void addRenderTimeQuery(std::unique_ptr &&query); private: + std::optional queryRenderTime() const; + RenderLoop *const m_loop; std::vector> m_feedbacks; std::optional m_contentType; diff --git a/src/core/renderloop.cpp b/src/core/renderloop.cpp index 5e3a74c7fc..a94807b77f 100644 --- a/src/core/renderloop.cpp +++ b/src/core/renderloop.cpp @@ -91,14 +91,16 @@ void RenderLoopPrivate::notifyFrameFailed() } } -void RenderLoopPrivate::notifyFrameCompleted(std::chrono::nanoseconds timestamp, std::chrono::nanoseconds renderTime, PresentationMode mode) +void RenderLoopPrivate::notifyFrameCompleted(std::chrono::nanoseconds timestamp, std::optional renderTime, PresentationMode mode) { Q_ASSERT(pendingFrameCount > 0); pendingFrameCount--; notifyVblank(timestamp); - renderJournal.add(renderTime, timestamp); + if (renderTime) { + renderJournal.add(*renderTime, timestamp); + } if (!inhibitCount) { maybeScheduleRepaint(); } diff --git a/src/core/renderloop_p.h b/src/core/renderloop_p.h index 62df087f11..c4f619e273 100644 --- a/src/core/renderloop_p.h +++ b/src/core/renderloop_p.h @@ -32,7 +32,7 @@ public: void maybeScheduleRepaint(); void notifyFrameFailed(); - void notifyFrameCompleted(std::chrono::nanoseconds timestamp, std::chrono::nanoseconds renderTime, PresentationMode mode = PresentationMode::VSync); + void notifyFrameCompleted(std::chrono::nanoseconds timestamp, std::optional renderTime, PresentationMode mode = PresentationMode::VSync); void notifyVblank(std::chrono::nanoseconds timestamp); RenderLoop *const q; diff --git a/src/opengl/glrendertimequery.cpp b/src/opengl/glrendertimequery.cpp index 8bd59e7b1d..d666f3956a 100644 --- a/src/opengl/glrendertimequery.cpp +++ b/src/opengl/glrendertimequery.cpp @@ -36,9 +36,11 @@ GLRenderTimeQuery::~GLRenderTimeQuery() void GLRenderTimeQuery::begin() { if (m_gpuProbe.query) { - glGetInteger64v(GL_TIMESTAMP, &m_gpuProbe.start); + GLint64 start = 0; + glGetInteger64v(GL_TIMESTAMP, &start); + m_gpuProbe.start = std::chrono::nanoseconds(start); } - m_cpuProbe.start = std::chrono::steady_clock::now().time_since_epoch(); + m_cpuProbe.start = std::chrono::steady_clock::now(); } void GLRenderTimeQuery::end() @@ -48,32 +50,35 @@ void GLRenderTimeQuery::end() if (m_gpuProbe.query) { glQueryCounter(m_gpuProbe.query, GL_TIMESTAMP); } - m_cpuProbe.end = std::chrono::steady_clock::now().time_since_epoch(); + m_cpuProbe.end = std::chrono::steady_clock::now(); } -std::chrono::nanoseconds GLRenderTimeQuery::query() +std::optional GLRenderTimeQuery::query() { if (!m_hasResult) { - return std::chrono::nanoseconds::zero(); + return std::nullopt; } m_hasResult = false; if (m_gpuProbe.query) { const auto context = m_context.lock(); if (!context) { - return std::chrono::nanoseconds::zero(); + return std::nullopt; } context->makeCurrent(); - glGetQueryObjecti64v(m_gpuProbe.query, GL_QUERY_RESULT, &m_gpuProbe.end); + GLint64 end = 0; + glGetQueryObjecti64v(m_gpuProbe.query, GL_QUERY_RESULT, &end); + m_gpuProbe.end = std::chrono::nanoseconds(end); } - const std::chrono::nanoseconds gpuTime(m_gpuProbe.end - m_gpuProbe.start); - const std::chrono::nanoseconds cpuTime = m_cpuProbe.end - m_cpuProbe.start; // timings are pretty unpredictable in the sub-millisecond range; this minimum // ensures that when CPU or GPU power states change, we don't drop any frames const std::chrono::nanoseconds minimumTime = std::chrono::milliseconds(2); - - return std::max({gpuTime, cpuTime, minimumTime}); + const auto end = std::max({m_cpuProbe.start + (m_gpuProbe.end - m_gpuProbe.start), m_cpuProbe.end, m_cpuProbe.start + minimumTime}); + return RenderTimeSpan{ + .start = m_cpuProbe.start, + .end = end, + }; } } diff --git a/src/opengl/glrendertimequery.h b/src/opengl/glrendertimequery.h index ba83b0d077..f77e848e45 100644 --- a/src/opengl/glrendertimequery.h +++ b/src/opengl/glrendertimequery.h @@ -31,7 +31,7 @@ public: /** * fetches the result of the query. If rendering is not done yet, this will block! */ - std::chrono::nanoseconds query() override; + std::optional query() override; private: const std::weak_ptr m_context; @@ -39,15 +39,15 @@ private: struct { - std::chrono::nanoseconds start = std::chrono::nanoseconds::zero(); - std::chrono::nanoseconds end = std::chrono::nanoseconds::zero(); + std::chrono::steady_clock::time_point start; + std::chrono::steady_clock::time_point end; } m_cpuProbe; struct { GLuint query = 0; - GLint64 start = 0; - GLint64 end = 0; + std::chrono::nanoseconds start{0}; + std::chrono::nanoseconds end{0}; } m_gpuProbe; };