Setup
- Get FlameGraph repo
% git clone https://github.com/brendangregg/FlameGraph fg
- Create Docker image
Follow the instructions here: C++ develop container (Docker)
- Run Docker container
% docker run -it --rm -v $(pwd):/workspace --privileged cpp-dev
- Setup container
# apt update && apt install -y g++ linux-perf # echo -1 > /proc/sys/kernel/perf_event_paranoid
Initial version
#include <iostream>
#include <numeric>
#include <random>
#include <vector>
uint32_t calcAverage(const std::vector<uint32_t>& data) {
if (data.empty()) return 0;
uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
return static_cast<uint32_t>(sum / data.size());
}
std::vector<uint32_t> createRndData(uint64_t num) {
std::vector<uint32_t> data(num);
std::mt19937 gen(std::random_device{}());
std::uniform_int_distribution<uint32_t> dist(0, RAND_MAX);
for (uint64_t i = 0; i < num; ++i) {
data[i] = dist(gen);
}
return data;
}
int main() {
uint64_t num = 100'000'000;
std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
return 0;
}
# g++ main.cpp -g -o calc.out # perf record --freq 10000 --call-graph fp ./calc.out # perf script | /workspace/fg/stackcollapse-perf.pl | /workspace/fg/flamegraph.pl > flamegraph_initial.svg
The function createRndData takes 90% of the calculation time. Let’s try to reduce it.
Updated version
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <numeric>
#include <vector>
uint32_t calcAverage(const std::vector<uint32_t>& data) {
if (data.empty()) return 0;
uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
return static_cast<uint32_t>(sum / data.size());
}
std::vector<uint32_t> createRndData(uint64_t num) {
srand(time(NULL));
std::vector<uint32_t> data(num);
for (uint64_t i = 0; i < num; ++i) {
data[i] = static_cast<uint32_t>(rand());
}
return data;
}
int main() {
uint64_t num = 100'000'000;
std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
return 0;
}
# g++ main.cpp -g -o calc.out # perf record --freq 10000 --call-graph fp ./calc.out # perf script | /workspace/fg/stackcollapse-perf.pl | /workspace/fg/flamegraph.pl > flamegraph_updated.svg
Now, the function createRndData takes only 65% of the calculation time. Yeah. 🎉
Disclaimer
It’s build without any optimization!