=== modified file 'configure.ac' --- configure.ac 2012-05-29 12:20:02 +0000 +++ configure.ac 2012-06-09 00:28:37 +0000 @@ -378,7 +378,8 @@ dnl dnl Check for atomic operations support in the compiler dnl -AC_MSG_CHECKING([for atomic operations support]) +AC_CHECK_HEADERS(atomic) +AC_MSG_CHECKING([for GNU atomic operations support]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ int n = 0; ]],[[ === modified file 'lib/profiler/Profiler.cc' --- lib/profiler/Profiler.cc 2012-01-20 18:55:04 +0000 +++ lib/profiler/Profiler.cc 2012-06-07 00:52:43 +0000 @@ -118,10 +118,6 @@ #if USE_XPROF_STATS - -#if HAVE_ASSERT_H -#include -#endif #if HAVE_GNUMALLLOC_H #include #elif HAVE_MALLOC_H @@ -131,39 +127,10 @@ #include #endif -/* Exported Data */ TimersArray *xprof_Timers = NULL; - -/* Private stuff */ - - -/* new stuff */ -#define MAXSTACKDEPTH 512 - -struct _callstack_entry { - int timer; /* index into timers array */ - const char *name; - hrtime_t start, stop, accum; -}; - -struct _callstack_entry cstack[MAXSTACKDEPTH]; -int cstack_head = 0; - -#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */ -static __inline void -#else -static inline void -#endif -xprof_update(xprof_stats_data * head) -{ - if (head->delta < head->best) - head->best = head->delta; - if (head->worst < head->delta) - head->worst = head->delta; - head->summ += head->delta; - head->count++; -} - +uint64_t *xprof_ActiveTimers = NULL; +uint64_t xprof_ActiveTimerCount = 0; +std::atomic_flag xprof_GlobalLock = ATOMIC_FLAG_INIT; static xprof_stats_data *xp_UNACCOUNTED; static int xprof_inited = 0; @@ -173,71 +140,72 @@ if (xprof_inited) return; - xprof_Timers = static_cast(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node))); - - xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED"; - xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick(); - xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu; - cstack_head = 0; + while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, std::memory_order_acquire)) + ; // spin until the global lock is acquired + xprof_TimerStats = static_cast(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node))); + xprof_ActiveTimers = static_cast(calloc(XPROF_LAST + 2, sizeof(uint64_t))); + xprof_ActiveTimerCount = static_cast(calloc(XPROF_LAST + 2, sizeof(uint64_t))); xprof_inited = 1; -} - -void -xprof_start(xprof_type type, const char *timer) -{ - hrtime_t tt = get_tick(); - if (!xprof_inited) - xprof_InitLib(); - - /* If nested, stop current stack frame */ - if (cstack_head > 0) { - cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start; - cstack[cstack_head - 1].start = -1; - } - - /* Are we at the first level? If so; stop the unaccounted timer */ - if (cstack_head == 0) { - assert(xp_UNACCOUNTED->start != -1); - xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start; - xp_UNACCOUNTED->start = -1; - xprof_update(xp_UNACCOUNTED); - } - - /* Allocate new stack frame */ - cstack[cstack_head].start = tt; - cstack[cstack_head].stop = -1; - cstack[cstack_head].accum = 0; - cstack[cstack_head].timer = type; - cstack[cstack_head].name = timer; - cstack_head++; - assert(cstack_head < MAXSTACKDEPTH); - -} - -void -xprof_stop(xprof_type type, const char *timer) -{ - hrtime_t tt = get_tick(); - assert(cstack_head > 0); - cstack_head--; - assert(cstack[cstack_head].timer == type); - - /* Record timer details */ - cstack[cstack_head].accum += tt - cstack[cstack_head].start; - xprof_Timers[type]->accu.delta = cstack[cstack_head].accum; - xprof_Timers[type]->name = timer; - - /* Update */ - xprof_update(&xprof_Timers[type]->accu); - - /* Restart previous timer if we're not at the top level */ - if (cstack_head > 0) { - cstack[cstack_head - 1].start = tt; - cstack[cstack_head - 1].stop = 0; - return; - } - /* Get here? We're at the top level; unaccounted */ - xp_UNACCOUNTED->start = tt; + std::atomic_flag_clear_explicit(&xprof_globalLock, std::memory_order_release); +} + +void +XProfilerNode::start() +{ + if (started_ > 0) + return; + + started_ = get_tick(); + + // kick initialization of timers statistics (just in case) + xprof_InitLib(); + + while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, std::memory_order_acquire)) + ; // spin until the global lock is acquired + xprof_ActiveTimers[type] += started_; + xprof_ActiveTimerCount++; + std::atomic_flag_clear_explicit(&xprof_globalLock, std::memory_order_release); + + xprof_stats_data * head = &(xprof_Timers[type_]->accu); + while(std::atomic_flag_test_and_set_explicit(&head->lock, std::memory_order_acquire)) + ; // spin until the list head lock is acquired + head->overheads += get_tick() - started_; + std::atomic_flag_clear_explicit(&head->lock, std::memory_order_release); +} + +void +XProfilerNode::stop() +{ + if (started_ < 0) + return; + + hrtime_t stopped_ = get_tick(); + + // calculate how long this timer was running. + hrtime_t delta = stopped_ - started_; + + // kick initialization of timers statistics (just in case) + xprof_InitLib(); + + // Record timer details into global counters + + while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, std::memory_order_acquire)) + ; // spin until the global lock is acquired + xprof_ActiveTimers[type_] -= started_; + xprof_ActiveTimerCount[type_]--; + std::atomic_flag_clear_explicit(&xprof_globalLock, std::memory_order_release); + + xprof_stats_data * head = &(xprof_Timers[type_]->accu); + while(std::atomic_flag_test_and_set_explicit(&head->lock, std::memory_order_acquire)) + ; // spin until the list head lock is acquired + if (delta < head->best) + head->best = delta; + if (delta > head->worst) + head->worst = delta; + head->summ += delta; + head->count++; + head->overheads += get_tick() - stopped_; + std::atomic_flag_clear_explicit(&head->lock, std::memory_order_release); } #endif /* USE_XPROF_STATS */ === modified file 'lib/profiler/Profiler.h' --- lib/profiler/Profiler.h 2011-01-11 07:31:04 +0000 +++ lib/profiler/Profiler.h 2012-06-07 00:33:42 +0000 @@ -18,6 +18,10 @@ #else /* USE_XPROF_STATS */ +#if HAVE_ATOMIC +#include +#endif + #define XP_NOBEST (hrtime_t)-1 typedef struct _xprof_stats_node xprof_stats_node; @@ -25,13 +29,15 @@ typedef struct _xprof_stats_data xprof_stats_data; struct _xprof_stats_data { + std::atomic_flag lock; hrtime_t start; hrtime_t stop; - hrtime_t delta; + hrtime_t delta; // unused? hrtime_t best; hrtime_t worst; hrtime_t count; hrtime_t accum; + int64_t overheads; int64_t summ; }; @@ -44,15 +50,42 @@ typedef xprof_stats_node TimersArray[1]; /* public Data */ + /// timer statistics histograms extern TimersArray *xprof_Timers; + /// count of active timers + extern uint64_t *xprof_ActiveTimerCount; + /// currently running timer accumulations + extern uint64_t *xprof_ActiveTimers; + /* Exported functions */ - extern void xprof_start(xprof_type type, const char *timer); - extern void xprof_stop(xprof_type type, const char *timer); extern void xprof_event(void *data); -#define PROF_start(probename) xprof_start(XPROF_##probename, #probename) -#define PROF_stop(probename) xprof_stop(XPROF_##probename, #probename) +/* +/// This class provides a thread-safe timer for use in code profiling +/// Timer kicks off when the class is creaated and terminates on destruct +/// or if stop() method is called explicitly. +/// +/// Use PROF_start(name) and PROF_stop(name) macros +*/ +class XProfilerNode +{ +public: + XProfilerNode(xprof_type aType, const char *aName) : type_(aType), name_(aName) { start(); } + ~XProfilerNode() { stop(); } + + void start(); + void stop(); + +private: + xprof_type type_; ///< which profile type to update when stopped + const char *name_; ///< printable version of the timers type name + hrtime_t started_; ///< when this timer was started +}; + +#define PROF_start(probename) XProfilerNode XProfTimer_##probename(XPROF_##probename, #probename) +#define PROF_restart(probename) XProfTimer_##probename.start() +#define PROF_stop(probename) XProfTimer_##probename.stop() #endif /* USE_XPROF_STATS */ === modified file 'src/ProfStats.cc' --- src/ProfStats.cc 2012-01-20 18:55:04 +0000 +++ src/ProfStats.cc 2012-06-07 01:24:08 +0000 @@ -46,12 +46,12 @@ #define MAX_SORTLIST 200 +static hrtime_t now; static hrtime_t xprof_delta = 0; static hrtime_t xprof_start_t = 0; static hrtime_t xprof_verystart = 0; static hrtime_t xprof_average_delta = 0; static int xprof_events = 0; -static int xprof_inited = 0; static xprof_stats_data Totals; static TimersArray *xprof_stats_avg1sec = NULL; @@ -113,13 +113,14 @@ xprof_show_item(StoreEntry * sentry, const char *name, xprof_stats_data * hist) { storeAppendPrintf(sentry, - "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %.2f\t %6.3f\t\n", + "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t" PRIu64 "\t %.2f\t %6.3f\t\n", name, hist->count, hist->summ, (hist->best != XP_NOBEST ? hist->best : 0), hist->count ? hist->summ / hist->count : 0, hist->worst, + hist->overheads, hist->count / time_frame, Math::doublePercent((double) hist->summ, (double) hist->delta)); } @@ -149,7 +150,7 @@ ); storeAppendPrintf(sentry, - "Probe Name\t Events\t cumulated time \t best case \t average \t worst case\t Rate / sec \t %% in int\n"); + "Probe Name\t Events\t cumulated time \t best case \t average \t worst case\toverheads\t Rate / sec \t %% in int\n"); for (i = 0; i < XPROF_LAST; i++) { if (!hist[i]->name) @@ -166,6 +167,8 @@ Totals.summ += show->summ; + Totals.overheads += show->overheads; + Totals.best += (show->best != XP_NOBEST ? show->best : 0); Totals.worst += show->worst; @@ -194,7 +197,11 @@ now = get_tick(); for (i = 0; i < XPROF_LAST; i++) { + while(std::atomic_flag_test_and_set_explicit(&(head[i]->lock), std::memory_order_acquire)) + ; // spin until the list record lock is acquired + hist[i]->name = head[i]->name; + hist[i]->accu.overheads += head[i]->accu.overheads; hist[i]->accu.summ += head[i]->accu.summ; hist[i]->accu.count += head[i]->accu.count; /* accumulate multisec */ @@ -225,6 +232,8 @@ hist[i]->accu.delta = 0; head[i]->accu.start = keep; } + + std::atomic_flag_clear_explicit(&(head[i]->lock), std::memory_order_release); } } @@ -259,41 +268,37 @@ static inline void xprof_chk_overhead(int samples) { + // NOTE: these profiler checks require both global and list xprof_* locks + PROF_start(PROF_OVERHEAD); while (samples--) { - PROF_start(PROF_OVERHEAD); + PROF_restart(PROF_OVERHEAD); PROF_stop(PROF_OVERHEAD); } } static void -xprofRegisterWithCacheManager(void) -{ - Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary, 0, 1); -} - -// FIXME: -// this gets colled once per event. This doesn't seem to make much sense, -// does it? -static hrtime_t now; -static void xprof_Init(void) { + static int xprof_inited = 0; if (xprof_inited) return; - xprof_delta = xprof_verystart = xprof_start_t = now; - + xprof_delta = xprof_verystart = xprof_start_t = get_tick(); xprof_inited = 1; - - xprofRegisterWithCacheManager(); //moved here so it's not double-init'ed + Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary, 0, 1); } void xprof_event(void *data) { - now = get_tick(); + hrtime_t now = get_tick(); xprof_Init(); xprof_delta = now - xprof_start_t; + xprof_chk_overhead(2); + + while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, std::memory_order_acquire)) + ; // spin until the global lock is acquired + xprof_start_t = now; xprof_events++; @@ -303,8 +308,6 @@ if (xprof_average_delta > (xprof_delta >> 1)) xprof_average_delta = xprof_average_delta - (xprof_average_delta >> 8) + (xprof_delta >> 8); - xprof_chk_overhead(2); - xprof_average(&xprof_stats_avg24hour, 24 * 3600); xprof_average(&xprof_stats_avg5hour, 5 * 3600); @@ -323,6 +326,8 @@ xprof_average(&xprof_stats_avg1sec, 1); + std::atomic_flag_clear_explicit(&xprof_globalLock, std::memory_order_release); + xprof_chk_overhead(30); eventAdd("cpuProfiling", xprof_event, NULL, 1.0, 1);