Rickard Lindberg | Blog | Projects

rlprofiler

Usage

Download rlprofiler.c

wget https://projects.rickardlindberg.me/rlprofiler/rlprofiler.c

Instrument code

#include <stdio.h>
#include "rlprofiler.c"

void sleep_milliseconds(unsigned long milliseconds) {
    unsigned long start;
    unsigned long duration = rlprofiler_system_clock_frequencey() * milliseconds / 1000;
    start = rlprofiler_read_system_clock();
    while (rlprofiler_read_system_clock() - start < duration) {
    }
}

unsigned long sum(unsigned long n) {
    unsigned long result;
    result = 0;
    while (n > 0) {
        result += n--;
    }
    return result;
}

int fac(int n) {
    int result;
    RLPROFILER_ZONE_BEGIN("fac");
    sleep_milliseconds(25);
    if (n == 0) {
        result = 1;
    } else {
        RLPROFILER_ZONE_BEGIN("fac recurse");
        sleep_milliseconds(50);
        result = n * fac(n-1);
        RLPROFILER_ZONE_END("fac recurse");
    }
    RLPROFILER_ZONE_END("fac");
    return result;
}

int main(void) {
    rlprofiler_begin();
    fprintf(stderr, "fac(3)     = %d\n", fac(3));
    RLPROFILER_ZONE_BEGIN("sum big");
    fprintf(stderr, "sum(big)   = %ld\n", sum(100000000));
    RLPROFILER_ZONE_END("sum big");
    RLPROFILER_ZONE_BEGIN("sum twice");
    fprintf(stderr, "sum(twice) = %ld\n", sum(200000000));
    RLPROFILER_ZONE_END("sum twice");
    rlprofiler_end();
    return 0;
}

View report

fac(3)     = 6
sum(big)   = 5000000050000000
sum(twice) = 20000000100000000
==========================================================================
Profiler
==========================================================================
system_clock_frequency        = 1000000
system_time_measured          = 840340
estimated_cpu_clock_frequency = 2299965021
elapsed_cpu_time              = 840.34ms
--------------------------------------------------------------------------
label           | hit count  | exclusive            | total
----------------+------------+----------------------+---------------------
fac             |          4 |  11.90% (  100.00ms) |  29.75% (  250.00ms)
fac recurse     |          3 |  17.85% (  150.00ms) |  26.77% (  225.00ms)
sum big         |          1 |  18.47% (  155.23ms) |  18.47% (  155.23ms)
sum twice       |          1 |  51.76% (  435.00ms) |  51.76% (  435.00ms)
==========================================================================

Full source code for rlprofiler.c

#include <string.h>
#include <sys/time.h>
#include <x86intrin.h>

#ifndef RLPROFILER_MAX_ZONES
#define RLPROFILER_MAX_ZONES 100
#endif

#ifdef RLPROFILER_DISABLE_ZONES
#define RLPROFILER_ZONE_BEGIN(label)
#define RLPROFILER_ZONE_END(label)
#else
#define RLPROFILER_ZONE_BEGIN(label)               \
    {                                              \
        ProfilerZoneActivation activation;         \
        rlprofiler_zone_begin(&activation, label);
#define RLPROFILER_ZONE_END(label)                 \
        rlprofiler_zone_end(&activation, label);   \
    }
#endif

typedef struct {
    char* label;
    unsigned long hit_count;
    unsigned long elapsed_cpu_time_exclusive;
    unsigned long elapsed_cpu_time_total;
} ProfilerZone;

typedef struct {
    ProfilerZone* parent;
    ProfilerZone* zone;
    unsigned long cpu_clock_start;
    unsigned long elapsed_cpu_time_total_on_entry;
} ProfilerZoneActivation;

typedef struct {
    ProfilerZone zones[RLPROFILER_MAX_ZONES];
    ProfilerZone* current_zone;
    unsigned long system_time_start;
    unsigned long system_time_end;
    unsigned long cpu_clock_start;
    unsigned long cpu_clock_end;
} Profiler;

static Profiler rlprofiler;

static unsigned long rlprofiler_system_clock_frequencey(void) {
    return 1000000;
}

static unsigned long rlprofiler_read_system_clock(void) {
    struct timeval value;
    gettimeofday(&value, 0);
    return rlprofiler_system_clock_frequencey() * (unsigned long)value.tv_sec + (unsigned long)value.tv_usec;
}

static unsigned long rlprofiler_read_cpu_clock(void) {
    return __rdtsc();
}

#ifndef RLPROFILER_DISABLE_ZONES
static ProfilerZone* rlprofiler_find_zone(char* label) {
    int zone_index;
    ProfilerZone* zone;
    for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
        zone = &rlprofiler.zones[zone_index];
        if (!zone->label) {
            zone->label = label;
            return zone;
        }
        if (strcmp(label, rlprofiler.zones[zone_index].label) == 0) {
            return zone;
        }
    }
    fprintf(stderr, "ERROR: Ran out of zones\n");
    exit(1);
}

static void rlprofiler_zone_begin(ProfilerZoneActivation* activation, char* label) {
    activation->parent = rlprofiler.current_zone;
    activation->zone = rlprofiler_find_zone(label);
    activation->elapsed_cpu_time_total_on_entry = activation->zone->elapsed_cpu_time_total;
    rlprofiler.current_zone = activation->zone;
    activation->cpu_clock_start = rlprofiler_read_cpu_clock();
}

static void rlprofiler_zone_end(ProfilerZoneActivation* activation, char* label) {
    unsigned long cpu_clock_end, elapsed_cpu_time;
    cpu_clock_end = rlprofiler_read_cpu_clock();
    if (strcmp(label, activation->zone->label) != 0) {
        fprintf(stderr, "ERROR: Closing zone %s but expected %s\n", activation->zone->label, label);
        exit(1);
    }
    elapsed_cpu_time = cpu_clock_end - activation->cpu_clock_start;
    activation->parent->elapsed_cpu_time_exclusive -= elapsed_cpu_time;
    activation->zone->elapsed_cpu_time_exclusive += elapsed_cpu_time;
    activation->zone->elapsed_cpu_time_total = activation->elapsed_cpu_time_total_on_entry + elapsed_cpu_time;
    ++activation->zone->hit_count;
    rlprofiler.current_zone = activation->parent;
}
#endif

static unsigned long rlprofiler_system_time_measured(void) {
    return rlprofiler.system_time_end - rlprofiler.system_time_start;
}

static unsigned long rlprofiler_elapsed_cpu_time(void) {
    return rlprofiler.cpu_clock_end - rlprofiler.cpu_clock_start;
}

static double rlprofiler_percent_cpu_time(unsigned long cpu_time) {
    return 100.0 * (double)cpu_time / (double)rlprofiler_elapsed_cpu_time();
}

static unsigned long rlprofiler_estimated_cpu_clock_frequency() {
    return (rlprofiler_elapsed_cpu_time() * rlprofiler_system_clock_frequencey()) / rlprofiler_system_time_measured();
}

static double rlprofiler_elapsed_cpu_time_in_milliseconds(unsigned long elapsed_cpu_time) {
    return 1000.0 * (double)elapsed_cpu_time / (double)rlprofiler_estimated_cpu_clock_frequency();
}

static void rlprofiler_print_report(void) {
    int zone_index;
    ProfilerZone* zone;
    fprintf(stderr, "==========================================================================\n");
    fprintf(stderr, "Profiler\n");
    fprintf(stderr, "==========================================================================\n");
    fprintf(stderr, "system_clock_frequency        = %ld\n", rlprofiler_system_clock_frequencey());
    fprintf(stderr, "system_time_measured          = %ld\n", rlprofiler_system_time_measured());
    fprintf(stderr, "estimated_cpu_clock_frequency = %ld\n", rlprofiler_estimated_cpu_clock_frequency());
    fprintf(stderr, "elapsed_cpu_time              = %.2fms\n", rlprofiler_elapsed_cpu_time_in_milliseconds(rlprofiler_elapsed_cpu_time()));
    fprintf(stderr, "--------------------------------------------------------------------------\n");
    fprintf(stderr, "label           | hit count  | exclusive            | total\n");
    fprintf(stderr, "----------------+------------+----------------------+---------------------\n");
    for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) {
        zone = &rlprofiler.zones[zone_index];
        if (zone->label) {
            fprintf(
                stderr,
                "%-15s | %10ld | %6.2f%% (%8.2fms) | %6.2f%% (%8.2fms)\n",
                zone->label,
                zone->hit_count,
                rlprofiler_percent_cpu_time(zone->elapsed_cpu_time_exclusive),
                rlprofiler_elapsed_cpu_time_in_milliseconds(zone->elapsed_cpu_time_exclusive),
                rlprofiler_percent_cpu_time(zone->elapsed_cpu_time_total),
                rlprofiler_elapsed_cpu_time_in_milliseconds(zone->elapsed_cpu_time_total)
            );
        }
    }
    fprintf(stderr, "==========================================================================\n");
}

static void rlprofiler_begin(void) {
    rlprofiler.current_zone = &rlprofiler.zones[0];
    rlprofiler.system_time_start = rlprofiler_read_system_clock();
    rlprofiler.cpu_clock_start = rlprofiler_read_cpu_clock();
}

static void rlprofiler_end(void) {
    rlprofiler.cpu_clock_end = rlprofiler_read_cpu_clock();
    rlprofiler.system_time_end = rlprofiler_read_system_clock();
    rlprofiler_print_report();
}

What is Rickard working on and thinking about right now?

Every month I write a newsletter about just that. You will get updates about my current projects and thoughts about programming, and also get a chance to hit reply and interact with me. Subscribe to it below.

Powered by Buttondown (My Newsletter)

Profile picture of Rickard.

I'm Rickard Lindberg from Sweden. This is my home on the web. I like programming. I like both the craft of it and also to write software that solves problems. I also like running.

Me elsewhere: GitHub, Mastodon.