=== modified file 'configure.ac' --- configure.ac 2012-06-12 08:50:53 +0000 +++ configure.ac 2012-06-13 03:32:10 +0000 @@ -378,7 +378,8 @@ dnl dnl Check for atomic operations support in the compiler dnl -AC_MSG_CHECKING([for atomic operations support]) +AC_CHECK_HEADERS(atomic) +AC_MSG_CHECKING([for GNU atomic operations support]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ int n = 0; ]],[[ === modified file 'lib/profiler/Profiler.cc' --- lib/profiler/Profiler.cc 2012-07-02 12:28:10 +0000 +++ lib/profiler/Profiler.cc 2012-07-04 02:31:14 +0000 @@ -114,130 +114,78 @@ */ #include "squid.h" + +#if USE_XPROF_STATS + #include "profiler/Profiler.h" -#if USE_XPROF_STATS - - -#if HAVE_ASSERT_H -#include -#endif -#if HAVE_GNUMALLLOC_H -#include -#elif HAVE_MALLOC_H -#include -#endif -#if HAVE_UNISTD_H -#include -#endif - -/* Exported Data */ -TimersArray *xprof_Timers = NULL; - -/* Private stuff */ - - -/* new stuff */ -#define MAXSTACKDEPTH 512 - -struct _callstack_entry { - int timer; /* index into timers array */ - const char *name; - hrtime_t start, stop, accum; -}; - -struct _callstack_entry cstack[MAXSTACKDEPTH]; -int cstack_head = 0; - -#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */ -static __inline void -#else -static inline void -#endif -xprof_update(xprof_stats_data * head) -{ - if (head->delta < head->best) - head->best = head->delta; - if (head->worst < head->delta) - head->worst = head->delta; - head->summ += head->delta; - ++head->count; -} - +std::atomic *xprof_ActiveTimers = NULL; +std::atomic *xprof_ActiveTimerCount = NULL; static xprof_stats_data *xp_UNACCOUNTED; -static int xprof_inited = 0; - -static void -xprof_InitLib(void) -{ - if (xprof_inited) +static std::atomic xprof_inited = false; + +void +XProfilerNode::Initialize() +{ + // init may only be done once. potential callers need + static std::atomic_flag xprof_GlobalLock = ATOMIC_FLAG_INIT; + while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, std::memory_order_acquire)) + ; // spin until the global lock is acquired + // if still not set we now have a lock on it and can do the expensive allocations + // NP: xprof_inited is only set to true while locked. + if (!xprof_inited.load()) { + xprof_TimerStats = new TimersArray(XPROF_LAST + 2); + xprof_ActiveTimers = new std::atomic(XPROF_LAST + 2); + xprof_ActiveTimerCount = new std::atomic(XPROF_LAST + 2); + xprof_inited.store(true); + } + std::atomic_flag_clear_explicit(&xprof_globalLock, std::memory_order_release); +} + +void +XProfilerNode::start() +{ + if (started_ > 0) return; - xprof_Timers = static_cast(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node))); - - xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED"; - xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick(); - xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu; - cstack_head = 0; - xprof_inited = 1; -} - -void -xprof_start(xprof_type type, const char *timer) -{ - hrtime_t tt = get_tick(); - if (!xprof_inited) + started_ = get_tick(); + + // kick initialization of timers statistics (if needed) + if (!xprof_inited.load()) { xprof_InitLib(); - - /* If nested, stop current stack frame */ - if (cstack_head > 0) { - cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start; - cstack[cstack_head - 1].start = -1; - } - - /* Are we at the first level? If so; stop the unaccounted timer */ - if (cstack_head == 0) { - assert(xp_UNACCOUNTED->start != -1); - xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start; - xp_UNACCOUNTED->start = -1; - xprof_update(xp_UNACCOUNTED); - } - - /* Allocate new stack frame */ - cstack[cstack_head].start = tt; - cstack[cstack_head].stop = -1; - cstack[cstack_head].accum = 0; - cstack[cstack_head].timer = type; - cstack[cstack_head].name = timer; - ++cstack_head; - assert(cstack_head < MAXSTACKDEPTH); - + } + + xprof_ActiveTimers[type_] += started_; + ++xprof_ActiveTimerCount[type_]; + + xprof_stats_data * head = &(xprof_Timers[type_]->accu); + head->overheads += get_tick() - started_; } void -xprof_stop(xprof_type type, const char *timer) +XProfilerNode::stop() { - hrtime_t tt = get_tick(); - assert(cstack_head > 0); - --cstack_head; - assert(cstack[cstack_head].timer == type); - - /* Record timer details */ - cstack[cstack_head].accum += tt - cstack[cstack_head].start; - xprof_Timers[type]->accu.delta = cstack[cstack_head].accum; - xprof_Timers[type]->name = timer; - - /* Update */ - xprof_update(&xprof_Timers[type]->accu); - - /* Restart previous timer if we're not at the top level */ - if (cstack_head > 0) { - cstack[cstack_head - 1].start = tt; - cstack[cstack_head - 1].stop = 0; + if (started_ < 0) return; - } - /* Get here? We're at the top level; unaccounted */ - xp_UNACCOUNTED->start = tt; + + hrtime_t stopped_ = get_tick(); + + // calculate how long this timer was running. + hrtime_t delta = stopped_ - started_; + + // Record timer details into global counters + + xprof_ActiveTimers[type_] -= started_; + --xprof_ActiveTimerCount[type_]; + + xprof_stats_data * head = &(xprof_Timers[type_]->accu); + if (delta < head->best.load()) + head->best = delta; + if (delta > head->worst.load()) + head->worst = delta; + head->summ += delta; + ++head->count; + head->overheads += get_tick() - stopped_; } #endif /* USE_XPROF_STATS */ === modified file 'lib/profiler/Profiler.h' --- lib/profiler/Profiler.h 2011-01-11 07:31:04 +0000 +++ lib/profiler/Profiler.h 2012-06-12 02:25:24 +0000 @@ -18,6 +18,10 @@ #else /* USE_XPROF_STATS */ +#if HAVE_ATOMIC +#include +#endif + #define XP_NOBEST (hrtime_t)-1 typedef struct _xprof_stats_node xprof_stats_node; @@ -25,14 +29,16 @@ typedef struct _xprof_stats_data xprof_stats_data; struct _xprof_stats_data { - hrtime_t start; - hrtime_t stop; - hrtime_t delta; - hrtime_t best; - hrtime_t worst; - hrtime_t count; - hrtime_t accum; - int64_t summ; + std::atomic_flag lock; + std::atomic start; + std::atomic stop; +// std::atomic delta; // unused? + std::atomic best; + std::atomic worst; + std::atomic count; + std::atomic accum; + std::atomic overheads; + std::atomic summ; }; struct _xprof_stats_node { @@ -44,15 +50,48 @@ typedef xprof_stats_node TimersArray[1]; /* public Data */ + /// timer statistics histograms extern TimersArray *xprof_Timers; + /// count of active timers + extern std::atomic *xprof_ActiveTimerCount; + /// currently running timer accumulations + extern std::atomic *xprof_ActiveTimers; + /* Exported functions */ - extern void xprof_start(xprof_type type, const char *timer); - extern void xprof_stop(xprof_type type, const char *timer); extern void xprof_event(void *data); -#define PROF_start(probename) xprof_start(XPROF_##probename, #probename) -#define PROF_stop(probename) xprof_stop(XPROF_##probename, #probename) +/* +/// This class provides a thread-safe timer for use in code profiling +/// Timer kicks off when the class is creaated and terminates on destruct +/// or if stop() method is called explicitly. +/// +/// Use PROF_start(name) and PROF_stop(name) macros +*/ +class XProfilerNode +{ +public: + XProfilerNode(xprof_type aType, const char *aName) : type_(aType), name_(aName) { start(); } + ~XProfilerNode() { stop(); } + + // Initialize the xprof data arrays + // This function protects itself against re-entrant behaviour + // but in an expensive locking way. Potential callers should + // check with xprof_inited.load() to see if its needed first. + void initialize(); + + void start(); + void stop(); + +private: + xprof_type type_; ///< which profile type to update when stopped + const char *name_; ///< printable version of the timers type name + hrtime_t started_; ///< when this timer was started +}; + +#define PROF_start(probename) XProfilerNode XProfTimer_##probename(XPROF_##probename, #probename) +#define PROF_restart(probename) XProfTimer_##probename.start() +#define PROF_stop(probename) XProfTimer_##probename.stop() #endif /* USE_XPROF_STATS */ === modified file 'src/ProfStats.cc' --- src/ProfStats.cc 2012-01-20 18:55:04 +0000 +++ src/ProfStats.cc 2012-06-12 02:34:06 +0000 @@ -39,6 +39,7 @@ #include "event.h" #include "mgr/Registration.h" +#include "profiler/Profiler.h" #include "SquidMath.h" #include "Store.h" @@ -51,7 +52,6 @@ static hrtime_t xprof_verystart = 0; static hrtime_t xprof_average_delta = 0; static int xprof_events = 0; -static int xprof_inited = 0; static xprof_stats_data Totals; static TimersArray *xprof_stats_avg1sec = NULL; @@ -113,13 +113,14 @@ xprof_show_item(StoreEntry * sentry, const char *name, xprof_stats_data * hist) { storeAppendPrintf(sentry, - "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %.2f\t %6.3f\t\n", + "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t" PRIu64 "\t %.2f\t %6.3f\t\n", name, hist->count, hist->summ, (hist->best != XP_NOBEST ? hist->best : 0), hist->count ? hist->summ / hist->count : 0, hist->worst, + hist->overheads, hist->count / time_frame, Math::doublePercent((double) hist->summ, (double) hist->delta)); } @@ -149,7 +150,7 @@ ); storeAppendPrintf(sentry, - "Probe Name\t Events\t cumulated time \t best case \t average \t worst case\t Rate / sec \t %% in int\n"); + "Probe Name\t Events\t cumulated time \t best case \t average \t worst case\toverheads\t Rate / sec \t %% in int\n"); for (i = 0; i < XPROF_LAST; i++) { if (!hist[i]->name) @@ -166,6 +167,8 @@ Totals.summ += show->summ; + Totals.overheads += show->overheads; + Totals.best += (show->best != XP_NOBEST ? show->best : 0); Totals.worst += show->worst; @@ -187,7 +190,7 @@ int doavg = (xprof_events % secs); if (!*list) - *list = (TimersArray *)xcalloc(XPROF_LAST, sizeof(xprof_stats_node)); + *list = new TimersArray(XPROF_LAST); hist = *list; @@ -195,6 +198,7 @@ for (i = 0; i < XPROF_LAST; i++) { hist[i]->name = head[i]->name; + hist[i]->accu.overheads += head[i]->accu.overheads; hist[i]->accu.summ += head[i]->accu.summ; hist[i]->accu.count += head[i]->accu.count; /* accumulate multisec */ @@ -259,40 +263,31 @@ static inline void xprof_chk_overhead(int samples) { + PROF_start(PROF_OVERHEAD); while (samples--) { - PROF_start(PROF_OVERHEAD); + PROF_restart(PROF_OVERHEAD); PROF_stop(PROF_OVERHEAD); } } static void -xprofRegisterWithCacheManager(void) +xprof_Init(void) { + xprof_delta = xprof_verystart = xprof_start_t = get_tick(); Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary, 0, 1); } -// FIXME: -// this gets colled once per event. This doesn't seem to make much sense, -// does it? -static hrtime_t now; -static void -xprof_Init(void) -{ - if (xprof_inited) - return; - - xprof_delta = xprof_verystart = xprof_start_t = now; - - xprof_inited = 1; - - xprofRegisterWithCacheManager(); //moved here so it's not double-init'ed -} - void xprof_event(void *data) { - now = get_tick(); - xprof_Init(); + static bool inited = false; + if (inited) { + xprof_Init(); + inited = true; + } + + hrtime_t now = get_tick(); + xprof_delta = now - xprof_start_t; xprof_start_t = now; xprof_events++;