wget https://projects.rickardlindberg.me/rlprofiler/rlprofiler.c
#include <stdio.h> #include "rlprofiler.c" void sleep_milliseconds(uint64_t milliseconds); int sum(int n) { int result; RLPROFILER_ZONE_BEGIN("sum"); result = 0; while (n > 0) { sleep_milliseconds(10); RLPROFILER_ZONE_BEGIN("delay"); sleep_milliseconds(5); RLPROFILER_ZONE_END("delay"); result += n--; } RLPROFILER_ZONE_END("sum"); return result; } int fac(int n) { int result; RLPROFILER_ZONE_BEGIN("fac"); sleep_milliseconds(25); if (n == 0) { result = 1; } else { RLPROFILER_ZONE_BEGIN("fac recurse"); sleep_milliseconds(50); result = n * fac(n-1); RLPROFILER_ZONE_END("fac recurse"); } RLPROFILER_ZONE_END("fac"); return result; } void read_file(void) { int number_of_chunks; number_of_chunks = 2; while (number_of_chunks--) { RLPROFILER_ZONE_BEGIN("read file"); /* simulate read */ sleep_milliseconds(50); RLPROFILER_ZONE_END_BYTE_COUNT("read file", 1024*1024*1024/10/2); } } void example(FILE *f) { rlprofiler_begin(); fprintf(f, "fac(4) = %d\n", fac(4)); fprintf(f, "sum(4) = %d\n", sum(4)); read_file(); rlprofiler_end_report_file(f); }
fac(4) = 24 sum(4) = 10 ======================================================================== Profiler ======================================================================== system clock frequency = 1000000Hz estimated cpu clock frequency = 2299959081Hz elapsed time = 485.08ms ------------------------------------------------------------------------ label | hit count | exclusive | total ---------------+-----------+----------------------+--------------------- fac | 5 | 25.77% ( 125.00ms) | 67.00% ( 325.00ms) fac recurse | 4 | 41.23% ( 200.01ms) | 61.85% ( 300.00ms) sum | 1 | 8.25% ( 40.00ms) | 12.37% ( 60.00ms) delay | 4 | 4.12% ( 20.00ms) | 4.12% ( 20.00ms) read file | 2 | 20.61% ( 100.00ms) | 20.61% ( 100.00ms) ----------------------------------------------------- label | size | time | throughput ---------------+------------+------------+----------- read file | 102.40mb | 100.00ms | 1.00gb/s ========================================================================
#include <inttypes.h> #include <string.h> #include <sys/time.h> #include <x86intrin.h> #ifndef RLPROFILER_MAX_ZONES #define RLPROFILER_MAX_ZONES 100 #endif #ifdef RLPROFILER_DISABLE_ZONES #define RLPROFILER_ZONE_BEGIN(label) #define RLPROFILER_ZONE_END_BYTE_COUNT(label, byte_count) #else #define RLPROFILER_ZONE_BEGIN(label) \ { \ RlprofilerZoneActivation activation; \ rlprofiler_zone_begin(&activation, label); #define RLPROFILER_ZONE_END_BYTE_COUNT(label, byte_count) \ rlprofiler_zone_end(&activation, label, byte_count); \ } #endif #define RLPROFILER_ZONE_END(label) \ RLPROFILER_ZONE_END_BYTE_COUNT(label, 0) typedef struct { char *label; uint64_t hit_count; uint64_t elapsed_cpu_time_exclusive; uint64_t elapsed_cpu_time_total; uint64_t processed_byte_count; } RlprofilerZone; typedef struct { RlprofilerZone *parent; RlprofilerZone *zone; uint64_t cpu_clock_start; uint64_t elapsed_cpu_time_total_on_entry; } RlprofilerZoneActivation; typedef struct { RlprofilerZone zones[RLPROFILER_MAX_ZONES]; RlprofilerZone *current_zone; uint64_t system_time_start; uint64_t system_time_end; uint64_t cpu_clock_start; uint64_t cpu_clock_end; } Rlprofiler; static Rlprofiler rlprofiler; static uint64_t rlprofiler_system_clock_frequencey(void) { return 1000000; } static uint64_t rlprofiler_read_system_clock(void) { struct timeval value; gettimeofday(&value, 0); return rlprofiler_system_clock_frequencey() * (uint64_t)value.tv_sec + (uint64_t)value.tv_usec; } static uint64_t rlprofiler_read_cpu_clock(void) { return __rdtsc(); } #ifndef RLPROFILER_DISABLE_ZONES static RlprofilerZone *rlprofiler_find_zone(char *label) { int zone_index; RlprofilerZone *zone; for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) { zone = &rlprofiler.zones[zone_index]; if (!zone->label) { zone->label = label; return zone; } if (strcmp(label, rlprofiler.zones[zone_index].label) == 0) { return zone; } } fprintf(stderr, "ERROR: Ran out of zones\n"); exit(1); } static void rlprofiler_zone_begin(RlprofilerZoneActivation *activation, char *label) { activation->parent = rlprofiler.current_zone; activation->zone = rlprofiler_find_zone(label); activation->elapsed_cpu_time_total_on_entry = activation->zone->elapsed_cpu_time_total; rlprofiler.current_zone = activation->zone; activation->cpu_clock_start = rlprofiler_read_cpu_clock(); } static void rlprofiler_zone_end(RlprofilerZoneActivation *activation, char *label, uint64_t byte_count) { uint64_t cpu_clock_end, elapsed_cpu_time; cpu_clock_end = rlprofiler_read_cpu_clock(); if (strcmp(label, activation->zone->label) != 0) { fprintf(stderr, "ERROR: Closing zone %s but expected %s\n", activation->zone->label, label); exit(1); } elapsed_cpu_time = cpu_clock_end - activation->cpu_clock_start; activation->parent->elapsed_cpu_time_exclusive -= elapsed_cpu_time; activation->zone->elapsed_cpu_time_exclusive += elapsed_cpu_time; activation->zone->elapsed_cpu_time_total = activation->elapsed_cpu_time_total_on_entry + elapsed_cpu_time; activation->zone->processed_byte_count += byte_count; ++activation->zone->hit_count; rlprofiler.current_zone = activation->parent; } #endif static uint64_t rlprofiler_elapsed_system_time(void) { return rlprofiler.system_time_end - rlprofiler.system_time_start; } static uint64_t rlprofiler_elapsed_cpu_time(void) { return rlprofiler.cpu_clock_end - rlprofiler.cpu_clock_start; } static uint64_t rlprofiler_cpu_clock_frequency_estimate() { return (rlprofiler_elapsed_cpu_time() * rlprofiler_system_clock_frequencey()) / rlprofiler_elapsed_system_time(); } static double rlprofiler_cpu_duration_to_s(uint64_t elapsed_cpu_time) { return (double)elapsed_cpu_time / (double)rlprofiler_cpu_clock_frequency_estimate(); } static double rlprofiler_cpu_duration_to_percent(uint64_t elapsed_cpu_time) { return 100.0 * (double)elapsed_cpu_time / (double)rlprofiler_elapsed_cpu_time(); } static double rlprofiler_zone_exclusive_percent(RlprofilerZone *zone) { return rlprofiler_cpu_duration_to_percent(zone->elapsed_cpu_time_exclusive); } static double rlprofiler_zone_total_percent(RlprofilerZone *zone) { return rlprofiler_cpu_duration_to_percent(zone->elapsed_cpu_time_total); } static double rlprofiler_cpu_duration_to_ms(uint64_t elapsed_cpu_time) { return 1000.0 * rlprofiler_cpu_duration_to_s(elapsed_cpu_time); } static double rlprofiler_zone_exclusive_ms(RlprofilerZone *zone) { return rlprofiler_cpu_duration_to_ms(zone->elapsed_cpu_time_exclusive); } static double rlprofiler_zone_total_ms(RlprofilerZone *zone) { return rlprofiler_cpu_duration_to_ms(zone->elapsed_cpu_time_total); } static double rlprofiler_elapsed_time_ms() { return rlprofiler_cpu_duration_to_ms(rlprofiler_elapsed_cpu_time()); } static double rlprofiler_zone_processed_mb(RlprofilerZone *zone) { double kb = 1024; double mb = 1024 * kb; return (double)zone->processed_byte_count / mb; } static double rlprofiler_zone_throughput_gb_s(RlprofilerZone *zone) { double kb = 1024; double mb = 1024 * kb; double gb = 1024 * mb; return (double)zone->processed_byte_count / gb / rlprofiler_cpu_duration_to_s(zone->elapsed_cpu_time_total); } static void rlprofiler_print_report(FILE *f) { int zone_index; RlprofilerZone *zone; int write_throughput_table; fprintf(f, "========================================================================\n"); fprintf(f, "Profiler\n"); fprintf(f, "========================================================================\n"); fprintf(f, "system clock frequency = %"PRId64"Hz\n", rlprofiler_system_clock_frequencey()); fprintf(f, "estimated cpu clock frequency = %"PRId64"Hz\n", rlprofiler_cpu_clock_frequency_estimate()); fprintf(f, "elapsed time = %.2fms\n", rlprofiler_elapsed_time_ms()); fprintf(f, "------------------------------------------------------------------------\n"); fprintf(f, "label | hit count | exclusive | total\n"); fprintf(f, "---------------+-----------+----------------------+---------------------\n"); write_throughput_table = 0; for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) { zone = &rlprofiler.zones[zone_index]; if (zone->label) { fprintf( f, "%-14s | %9"PRId64" | %6.2f%% (%8.2fms) | %6.2f%% (%8.2fms)\n", zone->label, zone->hit_count, rlprofiler_zone_exclusive_percent(zone), rlprofiler_zone_exclusive_ms(zone), rlprofiler_zone_total_percent(zone), rlprofiler_zone_total_ms(zone) ); if (zone->processed_byte_count) { write_throughput_table = 1; } } } if (write_throughput_table) { fprintf(f, "-----------------------------------------------------\n"); fprintf(f, "label | size | time | throughput\n"); fprintf(f, "---------------+------------+------------+-----------\n"); for (zone_index=1; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) { zone = &rlprofiler.zones[zone_index]; if (zone->label && zone->processed_byte_count) { fprintf( f, "%-14s | %8.2fmb | %8.2fms | %6.2fgb/s\n", zone->label, rlprofiler_zone_processed_mb(zone), rlprofiler_zone_total_ms(zone), rlprofiler_zone_throughput_gb_s(zone) ); } } } fprintf(f, "========================================================================\n"); } void rlprofiler_begin(void) { int zone_index; RlprofilerZone *zone; for (zone_index=0; zone_index<RLPROFILER_MAX_ZONES; ++zone_index) { zone = &rlprofiler.zones[zone_index]; zone->label = NULL; zone->hit_count = 0; zone->elapsed_cpu_time_exclusive = 0; zone->elapsed_cpu_time_total = 0; zone->processed_byte_count = 0; } rlprofiler.current_zone = &rlprofiler.zones[0]; rlprofiler.system_time_start = rlprofiler_read_system_clock(); rlprofiler.system_time_end = rlprofiler.system_time_start; rlprofiler.cpu_clock_start = rlprofiler_read_cpu_clock(); rlprofiler.cpu_clock_end = rlprofiler.cpu_clock_start; } void rlprofiler_end(void) { rlprofiler.cpu_clock_end = rlprofiler_read_cpu_clock(); rlprofiler.system_time_end = rlprofiler_read_system_clock(); } void rlprofiler_end_report_file(FILE *f) { rlprofiler_end(); rlprofiler_print_report(f); } void rlprofiler_end_report(void) { rlprofiler_end_report_file(stderr); }
What is Rickard working on and thinking about right now?
Every month I write a newsletter about just that. You will get updates about my current projects and thoughts about programming, and also get a chance to hit reply and interact with me. Subscribe to it below.