~mir-team/mir/development-branch

Viewing changes to tests/acceptance-tests/test_latency.cpp

Committer: Tarmac
Author(s): Daniel van Vugt
Date: 2015-09-07 07:49:17 UTC
mfrom: (2902.1.5 fix-1447947)
Revision ID: tarmac-20150907074917-1ooflurzzu5vurd4

Fix the ClientLatency acceptance test (LP: #1491876) (LP: #1447947)

There were multiple problems:
  * Samples where latency was actually nbuffers were incorrectly recorded
    as a latency of zero because the old code did not account for the
    fact that we might have record of the same buffer_id twice before
    the compositor has cleared out the old instance. All due to the
    "early release optimization".
  * The simulated compositor was not sleeping at all, so did not
    realistically allow the client to get ahead of it to fill the
    buffer queue, hence creating spuriously low latency measurements,
    which finally explains LP: #1447947.
  * The final average measurement was calculated prematurely, possibly
    missing some samples which made the result unpredictable and racy.
  * Slow devices (like mako) will after a while start to use the
    "early release" optimization that was introduced recently. However
    since we don't have dynamic queue scaling enabled to keep latency
    limited, the peak buffer latency is actually now nbuffers in some
    cases instead of nbuffers-1 frames (LP: #1491876). This extra
    latency is actually a good sign that the new feature is helping
    the slow device to maintain a better frame rate. The higher latency
    however will go away (and even reach all time lows) after we soon
    re-enable dynamic queue scaling.
  * Unresolved: The test still measures time in integer frame numbers,
    which is a feature. However this means any slight change in
    scheduling on the host where the client or server gets more time
    than the other, results in an exaggerated whole number change in
    frame latency averages. Fixes: https://bugs.launchpad.net/bugs/1447947, https://bugs.launchpad.net/bugs/1491876.

Approved by PS Jenkins bot, Alexandros Frantzis.

files modified:
tests/acceptance-tests/test_latency.cpp

Show diffs side-by-side

added added

removed removed

tests/acceptance-tests/test_latency.cpp

#include <gtest/gtest.h>

#include <gmock/gmock.h>

#include <unordered_map>

#include <deque>

namespace mtf = mir_test_framework;

namespace mtd = mir::test::doubles;

void record_submission(uint32_t submission_id)

{

std::lock_guard<std::mutex> lock{mutex};

timestamps[submission_id] = post_count;

submissions.push_back({submission_id, post_count});

}

auto latency_for(uint32_t submission_id)

std::lock_guard<std::mutex> lock{mutex};

mir::optional_value<uint32_t> latency;

auto const it = timestamps.find(submission_id);

if (it != timestamps.end())

latency = post_count - it->second;

for (auto i = submissions.begin(); i != submissions.end(); i++)

{

if (i->buffer_id == submission_id)

{

latency = post_count - i->time;

submissions.erase(i);

break;

}

return latency;

}

private:

std::mutex mutex;

unsigned int post_count{0};

std::unordered_map<uint32_t, uint32_t> timestamps;

// Note that a buffer_id may appear twice in the list as the client is

// faster than the compositor and can produce a new frame before the

// compositor has measured the previous submisson of the same buffer id.

struct Submission

{

uint32_t buffer_id;

uint32_t time;

};

std::deque<Submission> submissions;

};

* Note: we're not aiming to check performance in terms of CPU or GPU time processing

class IdCollectingDB : public mtd::NullDisplayBuffer

{

public:

IdCollectingDB(Stats& stats) : stats{stats} {}

mir::geometry::Rectangle view_area() const override

{

return {{0,0}, {1920, 1080}};

100

101

bool post_renderables_if_optimizable(mg::RenderableList const& renderables) override

102

{

* Clients are blocked only until the below buffer() goes out of

* scope. Thereafter we'll be racing the client thread. So we need

* to increment the post_count (represents universal time) here

* where the client thread is predictably blocked in its call to

* mir_buffer_stream_swap_buffers_sync().

stats.post();

103

//the surface will be the frontmost of the renderables

100

104

if (!renderables.empty())

101

105

last = renderables.front()->buffer()->id();

107

111

return last;

108

112

}

109

113

private:

110

Stats& stats;

111

114

mg::BufferID last{0};

112

115

};

113

116

114

117

class TimeTrackingGroup : public mtd::NullDisplaySyncGroup

115

118

{

116

119

public:

117

TimeTrackingGroup(Stats& stats) : stats{stats}, db{stats} {}

120

TimeTrackingGroup(Stats& stats) : stats{stats} {}

118

121

119

122

void for_each_display_buffer(std::function<void(mg::DisplayBuffer&)> const& f) override

120

123

{

129

132

std::lock_guard<std::mutex> lock{mutex};

130

133

latency_list.push_back(latency.value());

131

134

}

135

136

stats.post();

137

138

139

* Sleep a little to make the test more realistic. This way the

140

* client will actually fill the buffer queue. If we don't do this,

141

* then it's like having an infinite refresh rate and the measured

142

* latency would never exceed 1.0. (LP: #1447947)

143

144

std::this_thread::sleep_for(std::chrono::milliseconds(16));

132

145

}

133

146

134

147

float average_latency()

188

201

mir_buffer_stream_swap_buffers_sync(stream);

189

202

}

190

203

204

// Wait for the compositor to finish rendering all those frames,

205

// or else we'll be missing some samples and get a spurious average.

206

std::this_thread::sleep_for(std::chrono::milliseconds(500));

207

191

208

unsigned int const expected_client_buffers = 3;

192

unsigned int const expected_latency = expected_client_buffers - 1;

193

209

210

// Note: Using the "early release" optimization without dynamic queue

211

// scaling enabled makes the expected latency possibly up to

212

// nbuffers instead of nbuffers-1. After dynamic queue scaling is

213

// enabled, the average will be lower than this.

214

float const expected_max_latency = expected_client_buffers;

215

float const expected_min_latency = expected_client_buffers - 1;

216

217

auto observed_latency = display.group.average_latency();

218

219

// We still have a margin for error here. The client and server will

220

// be scheduled somewhat unpredictably which affects results. Also

221

// affecting results will be the first few frames before the buffer

222

// quere is full (during which there will be no buffer latency).

194

223

float const error_margin = 0.1f;

195

auto observed_latency = display.group.average_latency();

196

197

// FIXME: LP: #1447947: This actually doesn't work as intended. Raising

198

// the queue length isn't affecting the measured latency for some

199

// reason. But latency too low is better than too high.

200

//EXPECT_THAT(observed_latency, AllOf(Gt(expected_latency-error_margin),

201

// Lt(expected_latency+error_margin)));

202

203

EXPECT_THAT(observed_latency, Lt(expected_latency+error_margin));

224

225

EXPECT_THAT(observed_latency, Gt(expected_min_latency-error_margin));

226

EXPECT_THAT(observed_latency, Lt(expected_max_latency+error_margin));

204

227

}

Older »