Rickard Lindberg | Blog | Projects

rlprofiler

Usage

Download rlprofiler.c

wget https://projects.rickardlindberg.me/rlprofiler/rlprofiler.c

Instrument code

#include <stdio.h>

#include "rlprofiler.c"

void sleep_milliseconds(uint64_t milliseconds);

int sum(int n) {
    int result;
    RLPROFILER_ZONE_BEGIN("sum");
    result = 0;
    while (n > 0) {
        sleep_milliseconds(10);
        RLPROFILER_ZONE_BEGIN("delay");
        sleep_milliseconds(5);
        RLPROFILER_ZONE_END("delay");
        result += n--;
    }
    RLPROFILER_ZONE_END("sum");
    return result;
}

int fac(int n) {
    int result;
    RLPROFILER_ZONE_BEGIN("fac");
    sleep_milliseconds(25);
    if (n == 0) {
        result = 1;
    } else {
        RLPROFILER_ZONE_BEGIN("fac recurse");
        sleep_milliseconds(50);
        result = n * fac(n-1);
        RLPROFILER_ZONE_END("fac recurse");
    }
    RLPROFILER_ZONE_END("fac");
    return result;
}

void read_file(void) {
    int number_of_chunks;
    number_of_chunks = 2;
    while (number_of_chunks--) {
        RLPROFILER_ZONE_BEGIN("read file");
        /* simulate read */
        sleep_milliseconds(50);
        RLPROFILER_ZONE_END_BYTE_COUNT("read file", 1024*1024*1024/10/2);
    }
}

void example(FILE *f) {
    rlprofiler_begin();
    fprintf(f, "fac(4) = %d\n", fac(4));
    fprintf(f, "sum(4) = %d\n", sum(4));
    read_file();
    rlprofiler_end_report_file(f);
}

View report

fac(4) = 24
sum(4) = 10
========================================================================
Profiler
========================================================================
system clock frequency        = 1000000Hz
estimated cpu clock frequency = 2299959081Hz
elapsed time                  = 485.08ms
------------------------------------------------------------------------
label          | hit count | exclusive            | total
---------------+-----------+----------------------+---------------------
fac            |         5 |  25.77% (  125.00ms) |  67.00% (  325.00ms)
fac recurse    |         4 |  41.23% (  200.01ms) |  61.85% (  300.00ms)
sum            |         1 |   8.25% (   40.00ms) |  12.37% (   60.00ms)
delay          |         4 |   4.12% (   20.00ms) |   4.12% (   20.00ms)
read file      |         2 |  20.61% (  100.00ms) |  20.61% (  100.00ms)
-----------------------------------------------------
label          | size       | time       | throughput
---------------+------------+------------+-----------
read file      |   102.40mb |   100.00ms |   1.00gb/s
========================================================================

Full source code for rlprofiler.c

#include <inttypes.h>
#include <string.h>
#include <sys/time.h>
#include <x86intrin.h>

#ifndef RLPROFILER_MAX_ZONES
#define RLPROFILER_MAX_ZONES 100
#endif

#ifdef RLPROFILER_DISABLE_ZONES
#define RLPROFILER_ZONE_BEGIN(label)
#define RLPROFILER_ZONE_END_BYTE_COUNT(label, byte_count)
#else
#define RLPROFILER_ZONE_BEGIN(label)                         \
    {                                                        \
        RlprofilerZoneActivation activation;                 \
        rlprofiler_zone_begin(&activation, label);
#define RLPROFILER_ZONE_END_BYTE_COUNT(label, byte_count)    \
        rlprofiler_zone_end(&activation, label, byte_count); \
    }
#endif
#define RLPROFILER_ZONE_END(label)                           \
    RLPROFILER_ZONE_END_BYTE_COUNT(label, 0)

typedef struct {
    char *label;
    uint64_t hit_count;
    uint64_t elapsed_cpu_time_exclusive;
    uint64_t elapsed_cpu_time_total;
    uint64_t processed_byte_count;
} RlprofilerZone;

typedef struct {
    RlprofilerZone *parent;
    RlprofilerZone *zone;
    uint64_t cpu_clock_start;
    uint64_t elapsed_cpu_time_total_on_entry;
} RlprofilerZoneActivation;

typedef struct {
    RlprofilerZone zones[RLPROFILER_MAX_ZONES];
    RlprofilerZone *current_zone;
    uint64_t system_time_start;
    uint64_t system_time_end;
    uint64_t cpu_clock_start;
    uint64_t cpu_clock_end;
} Rlprofiler;

static Rlprofiler rlprofiler;

static uint64_t rlprofiler_system_clock_frequencey(void) {
    return 1000000;
}

static uint64_t rlprofiler_read_system_clock(void) {
    struct timeval value;
    gettimeofday(&value, 0);
    return rlprofiler_system_clock_frequencey() * (uint64_t)value.tv_sec + (uint64_t)value.tv_usec;
}

static uint64_t rlprofiler_read_cpu_clock(void) {
    return __rdtsc();
}

#ifndef RLPROFILER_DISABLE_ZONES
static RlprofilerZone *rlprofiler_find_zone(char *label) {
    int zone_index;
    RlprofilerZone *zone;
    for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
        zone = &rlprofiler.zones[zone_index];
        if (!zone->label) {
            zone->label = label;
            return zone;
        }
        if (strcmp(label, rlprofiler.zones[zone_index].label) == 0) {
            return zone;
        }
    }
    fprintf(stderr, "ERROR: Ran out of zones\n");
    exit(1);
}

static void rlprofiler_zone_begin(RlprofilerZoneActivation *activation, char *label) {
    activation->parent = rlprofiler.current_zone;
    activation->zone = rlprofiler_find_zone(label);
    activation->elapsed_cpu_time_total_on_entry = activation->zone->elapsed_cpu_time_total;
    rlprofiler.current_zone = activation->zone;
    activation->cpu_clock_start = rlprofiler_read_cpu_clock();
}

static void rlprofiler_zone_end(RlprofilerZoneActivation *activation, char *label, uint64_t byte_count) {
    uint64_t cpu_clock_end, elapsed_cpu_time;
    cpu_clock_end = rlprofiler_read_cpu_clock();
    if (strcmp(label, activation->zone->label) != 0) {
        fprintf(stderr, "ERROR: Closing zone %s but expected %s\n", activation->zone->label, label);
        exit(1);
    }
    elapsed_cpu_time = cpu_clock_end - activation->cpu_clock_start;
    activation->parent->elapsed_cpu_time_exclusive -= elapsed_cpu_time;
    activation->zone->elapsed_cpu_time_exclusive += elapsed_cpu_time;
    activation->zone->elapsed_cpu_time_total = activation->elapsed_cpu_time_total_on_entry + elapsed_cpu_time;
    activation->zone->processed_byte_count += byte_count;
    ++activation->zone->hit_count;
    rlprofiler.current_zone = activation->parent;
}
#endif

static uint64_t rlprofiler_elapsed_system_time(void) {
    return rlprofiler.system_time_end - rlprofiler.system_time_start;
}

static uint64_t rlprofiler_elapsed_cpu_time(void) {
    return rlprofiler.cpu_clock_end - rlprofiler.cpu_clock_start;
}

static uint64_t rlprofiler_cpu_clock_frequency_estimate() {
    return (rlprofiler_elapsed_cpu_time() * rlprofiler_system_clock_frequencey()) / rlprofiler_elapsed_system_time();
}

static double rlprofiler_cpu_duration_to_s(uint64_t elapsed_cpu_time) {
    return (double)elapsed_cpu_time / (double)rlprofiler_cpu_clock_frequency_estimate();
}

static double rlprofiler_cpu_duration_to_percent(uint64_t elapsed_cpu_time) {
    return 100.0 * (double)elapsed_cpu_time / (double)rlprofiler_elapsed_cpu_time();
}

static double rlprofiler_zone_exclusive_percent(RlprofilerZone *zone) {
    return rlprofiler_cpu_duration_to_percent(zone->elapsed_cpu_time_exclusive);
}

static double rlprofiler_zone_total_percent(RlprofilerZone *zone) {
    return rlprofiler_cpu_duration_to_percent(zone->elapsed_cpu_time_total);
}

static double rlprofiler_cpu_duration_to_ms(uint64_t elapsed_cpu_time) {
    return 1000.0 * rlprofiler_cpu_duration_to_s(elapsed_cpu_time);
}

static double rlprofiler_zone_exclusive_ms(RlprofilerZone *zone) {
    return rlprofiler_cpu_duration_to_ms(zone->elapsed_cpu_time_exclusive);
}

static double rlprofiler_zone_total_ms(RlprofilerZone *zone) {
    return rlprofiler_cpu_duration_to_ms(zone->elapsed_cpu_time_total);
}

static double rlprofiler_elapsed_time_ms() {
    return rlprofiler_cpu_duration_to_ms(rlprofiler_elapsed_cpu_time());
}

static double rlprofiler_zone_processed_mb(RlprofilerZone *zone) {
    double kb = 1024;
    double mb = 1024 * kb;
    return (double)zone->processed_byte_count / mb;
}

static double rlprofiler_zone_throughput_gb_s(RlprofilerZone *zone) {
    double kb = 1024;
    double mb = 1024 * kb;
    double gb = 1024 * mb;
    return (double)zone->processed_byte_count / gb / rlprofiler_cpu_duration_to_s(zone->elapsed_cpu_time_total);
}

static void rlprofiler_print_report(FILE *f) {
    int zone_index;
    RlprofilerZone *zone;
    int write_throughput_table;
    fprintf(f, "========================================================================\n");
    fprintf(f, "Profiler\n");
    fprintf(f, "========================================================================\n");
    fprintf(f, "system clock frequency        = %"PRId64"Hz\n", rlprofiler_system_clock_frequencey());
    fprintf(f, "estimated cpu clock frequency = %"PRId64"Hz\n", rlprofiler_cpu_clock_frequency_estimate());
    fprintf(f, "elapsed time                  = %.2fms\n", rlprofiler_elapsed_time_ms());
    fprintf(f, "------------------------------------------------------------------------\n");
    fprintf(f, "label          | hit count | exclusive            | total\n");
    fprintf(f, "---------------+-----------+----------------------+---------------------\n");
    write_throughput_table = 0;
    for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
        zone = &rlprofiler.zones[zone_index];
        if (zone->label) {
            fprintf(
                f,
                "%-14s | %9"PRId64" | %6.2f%% (%8.2fms) | %6.2f%% (%8.2fms)\n",
                zone->label,
                zone->hit_count,
                rlprofiler_zone_exclusive_percent(zone),
                rlprofiler_zone_exclusive_ms(zone),
                rlprofiler_zone_total_percent(zone),
                rlprofiler_zone_total_ms(zone)
            );
            if (zone->processed_byte_count) {
                write_throughput_table = 1;
            }
        }
    }
    if (write_throughput_table) {
        fprintf(f, "-----------------------------------------------------\n");
        fprintf(f, "label          | size       | time       | throughput\n");
        fprintf(f, "---------------+------------+------------+-----------\n");
        for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
            zone = &rlprofiler.zones[zone_index];
            if (zone->label && zone->processed_byte_count) {
                fprintf(
                    f,
                    "%-14s | %8.2fmb | %8.2fms | %6.2fgb/s\n",
                    zone->label,
                    rlprofiler_zone_processed_mb(zone),
                    rlprofiler_zone_total_ms(zone),
                    rlprofiler_zone_throughput_gb_s(zone)
                );
            }
        }
    }
    fprintf(f, "========================================================================\n");
}

void rlprofiler_begin(void) {
    int zone_index;
    RlprofilerZone *zone;
    for (zone_index=0; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
        zone = &rlprofiler.zones[zone_index];
        zone->label = NULL;
        zone->hit_count = 0;
        zone->elapsed_cpu_time_exclusive = 0;
        zone->elapsed_cpu_time_total = 0;
        zone->processed_byte_count = 0;
    }
    rlprofiler.current_zone = &rlprofiler.zones[0];
    rlprofiler.system_time_start = rlprofiler_read_system_clock();
    rlprofiler.system_time_end = rlprofiler.system_time_start;
    rlprofiler.cpu_clock_start = rlprofiler_read_cpu_clock();
    rlprofiler.cpu_clock_end = rlprofiler.cpu_clock_start;
}

void rlprofiler_end(void) {
    rlprofiler.cpu_clock_end = rlprofiler_read_cpu_clock();
    rlprofiler.system_time_end = rlprofiler_read_system_clock();
}

void rlprofiler_end_report_file(FILE *f) {
    rlprofiler_end();
    rlprofiler_print_report(f);
}

void rlprofiler_end_report(void) {
    rlprofiler_end_report_file(stderr);
}

What is Rickard working on and thinking about right now?

Every month I write a newsletter about just that. You will get updates about my current projects and thoughts about programming, and also get a chance to hit reply and interact with me. Subscribe to it below.

Powered by Buttondown (My Newsletter)

Profile picture of Rickard.

I'm Rickard Lindberg from Sweden. This is my home on the web. I like programming. I like both the craft of it and also to write software that solves problems. I also like running.

Me elsewhere: GitHub, Mastodon.