FlameGraph (Docker)

Setup

  1. Get FlameGraph repo
    % git clone https://github.com/brendangregg/FlameGraph fg
  2. Create Docker image
    Follow the instructions here: C++ develop container (Docker)
  3. Run Docker container
    % docker run -it --rm -v $(pwd):/workspace --privileged cpp-dev
  4. Setup container
    # apt update && apt install -y g++ linux-perf
    # echo -1 > /proc/sys/kernel/perf_event_paranoid
    

Initial version

#include <iostream>
#include <numeric>
#include <random>
#include <vector>

uint32_t calcAverage(const std::vector<uint32_t>& data) {
    if (data.empty()) return 0;
    uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
    return static_cast<uint32_t>(sum / data.size());
}

std::vector<uint32_t> createRndData(uint64_t num) {
    std::vector<uint32_t> data(num);
    std::mt19937 gen(std::random_device{}());
    std::uniform_int_distribution<uint32_t> dist(0, RAND_MAX);
    for (uint64_t i = 0; i < num; ++i) {
        data[i] = dist(gen);
    }
    return data;
}

int main() {
    uint64_t num = 100'000'000;
    std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
    return 0;
}
# g++ main.cpp -g -o calc.out
# perf record --freq 10000 --call-graph fp ./calc.out
# perf script | /workspace/fg/stackcollapse-perf.pl | /workspace/fg/flamegraph.pl > flamegraph_initial.svg

The function createRndData takes 90% of the calculation time. Let’s try to reduce it.


Updated version

#include <cstdlib>
#include <ctime>
#include <iostream>
#include <numeric>
#include <vector>

uint32_t calcAverage(const std::vector<uint32_t>& data) {
    if (data.empty()) return 0;
    uint64_t sum = std::accumulate(data.begin(), data.end(), uint64_t(0));
    return static_cast<uint32_t>(sum / data.size());
}

std::vector<uint32_t> createRndData(uint64_t num) {
    srand(time(NULL));
    std::vector<uint32_t> data(num);
    for (uint64_t i = 0; i < num; ++i) {
        data[i] = static_cast<uint32_t>(rand());
    }
    return data;
}

int main() {
    uint64_t num = 100'000'000;
    std::cout << "Average: " << calcAverage(createRndData(num)) << std::endl;
    return 0;
}
# g++ main.cpp -g -o calc.out
# perf record --freq 10000 --call-graph fp ./calc.out
# perf script | /workspace/fg/stackcollapse-perf.pl | /workspace/fg/flamegraph.pl > flamegraph_updated.svg

Now, the function createRndData takes only 65% of the calculation time. Yeah. 🎉


Disclaimer

It’s build without any optimization!