Profiling, debugging, and some optimization/fixes. === modified file 'include/MemPoolMalloc.h' --- include/MemPoolMalloc.h 2010-05-29 00:59:35 +0000 +++ include/MemPoolMalloc.h 2010-10-23 04:04:00 +0000 @@ -21,6 +21,7 @@ */ #include "MemPool.h" +#include /// \ingroup MemPoolsAPI class MemPoolMalloc : public MemImplementingAllocator @@ -42,7 +43,7 @@ virtual void *allocate(); virtual void deallocate(void *, bool aggressive); private: - Stack freelist; + std::list freelist; }; === modified file 'include/profiling.h' --- include/profiling.h 2010-07-07 15:48:48 +0000 +++ include/profiling.h 2010-10-25 02:15:23 +0000 @@ -216,8 +216,10 @@ #else /* USE_XPROF_STATS */ +#ifndef PROF_start #define PROF_start(ARGS) ((void)0) #define PROF_stop(ARGS) ((void)0) +#endif #endif /* USE_XPROF_STATS */ === modified file 'lib/MemPool.cc' --- lib/MemPool.cc 2010-05-30 09:13:54 +0000 +++ lib/MemPool.cc 2010-10-23 00:46:45 +0000 @@ -247,6 +247,7 @@ { assert(obj != NULL); (void) VALGRIND_CHECK_MEM_IS_ADDRESSABLE(obj, obj_size); +assert(MemPools::GetInstance().mem_idle_limit != 0); deallocate(obj, MemPools::GetInstance().mem_idle_limit == 0); ++free_calls; } === modified file 'lib/MemPoolMalloc.cc' --- lib/MemPoolMalloc.cc 2010-05-30 00:15:16 +0000 +++ lib/MemPoolMalloc.cc 2010-10-23 01:05:56 +0000 @@ -54,8 +54,11 @@ void * MemPoolMalloc::allocate() { - void *obj = freelist.pop(); - if (obj) { + void *obj = NULL; + if (!freelist.empty()) { + obj = freelist.back(); + assert(obj); + freelist.pop_back(); memMeterDec(meter.idle); saved_calls++; } else { @@ -127,13 +130,15 @@ bool MemPoolMalloc::idleTrigger(int shift) const { - return freelist.count >> (shift ? 8 : 0); + return freelist.size() >> (shift ? 8 : 0); } void MemPoolMalloc::clean(time_t maxage) { - while (void *obj = freelist.pop()) { + while (!freelist.empty()) { + void *obj = freelist.back(); + freelist.pop_back(); memMeterDec(meter.idle); memMeterDec(meter.alloc); xfree(obj); === modified file 'src/Debug.h' --- src/Debug.h 2010-05-26 03:06:02 +0000 +++ src/Debug.h 2010-10-25 02:12:31 +0000 @@ -143,4 +143,16 @@ /* Legacy debug function definitions */ SQUIDCEXTERN void _db_print(const char *,...) PRINTF_FORMAT_ARG1; + +/* Live call chain progress profiling */ +extern bool ForceLiveProfiling; ///< turns all live profilers on +extern int LiveProfilingDepth; ///< number of profilers in the call chain +extern timeval LiveProfilingStart; ///< the start time of the call chain +extern void ProfileProgress(const char * const aStartName, const int aStartLine); +extern void LiveProfilingReport(); +extern unsigned int LivePorfilingEventCount; +#define Here __FILE__, __LINE__ + +#include "base/Profiling.h" + #endif /* SQUID_DEBUG_H */ === modified file 'src/EventLoop.cc' --- src/EventLoop.cc 2010-05-26 03:06:02 +0000 +++ src/EventLoop.cc 2010-10-25 04:04:14 +0000 @@ -34,6 +34,7 @@ #include "EventLoop.h" #include "base/AsyncCallQueue.h" +#include "base/Profiling.h" EventLoop::EventLoop() : errcount(0), last_loop(false), timeService(NULL), primaryEngine(NULL) @@ -42,13 +43,23 @@ void EventLoop::checkEngine(AsyncEngine * engine, bool const primary) { + static u_int64_t visits = 0; + ForceLiveProfiling = !(++visits % 50000); + getCurrentTime(); + LiveProfilingStart = current_time; + LiveProfilingDepth = 0; + LivePorfilingEventCount = 0; + int requested_delay; - if (!primary) + if (!primary) { + ProfileProgress(Here); requested_delay = engine->checkEvents(0); - else + } else requested_delay = engine->checkEvents(loop_delay); + ProfileProgress(Here); + if (requested_delay < 0) switch (requested_delay) { @@ -71,6 +82,25 @@ if (requested_delay < loop_delay) loop_delay = requested_delay; } + + + getCurrentTime(); + const int spent = tvSubUsec(LiveProfilingStart, current_time); + if (ForceLiveProfiling) { + debugs(1, 1, "timed checkEvents(" << primary << "): " << spent); + //LiveProfilingReport(); + } + + static double totalTime = 0; + static u_int64_t totalCount = 0; + totalTime += spent; + totalCount += LivePorfilingEventCount; + if (totalCount > 5000000) { + debugs(1, 1, "main loop events: " << totalTime << " / " << totalCount << " = " << totalTime/totalCount); + totalTime = 0; + totalCount = 0; + ScopeInfo::ReportAll(); + } } void === modified file 'src/HttpHeader.cc' --- src/HttpHeader.cc 2010-10-14 04:16:15 +0000 +++ src/HttpHeader.cc 2010-10-24 22:20:26 +0000 @@ -394,6 +394,7 @@ void HttpHeader::clean() { + ProfileProgress(Here); HttpHeaderPos pos = HttpHeaderInitPos; HttpHeaderEntry *e; @@ -431,7 +432,9 @@ delete e; } } + ProfileProgress(Here); entries.clean(); + ProfileProgress(Here); httpHeaderMaskInit(&mask, 0); PROF_stop(HttpHeaderClean); } @@ -826,7 +829,7 @@ void HttpHeader::compact() { - entries.prune(NULL); + entries.prune(); } /* @@ -1838,3 +1841,48 @@ refreshMask(); } } + + + +HttpHeaderEntries::HttpHeaderEntries(): count(0) +{ +} + +HttpHeaderEntries::~HttpHeaderEntries() +{ + assert(!count); // or we would leak entries + // TODO: we should own the entries and delete them when we are destructed +} + +void +HttpHeaderEntries::push_back(HttpHeaderEntry *e) +{ + assert(count < 1024); // XXX: hardcoded limit + items[count++] = e; +} + +void +HttpHeaderEntries::insert(HttpHeaderEntry *e) +{ + push_back(e); // XXX: old code inserted at position one (expensive!) +} + +void +HttpHeaderEntries::prune() +{ + size_t newCount = 0; + for (size_t i = 0; i < count; ++i) { + if (items[i]) { + if (newCount < i) + items[newCount] = items[i]; + ++newCount; + } + } + count = newCount; +} + +void +HttpHeaderEntries::clean() +{ + count = 0; +} === modified file 'src/HttpHeader.h' --- src/HttpHeader.h 2010-08-25 00:14:25 +0000 +++ src/HttpHeader.h 2010-10-24 21:57:27 +0000 @@ -179,6 +179,22 @@ /// \todo CLEANUP: Kill this. typedef struct _ETag ETag; +class HttpHeaderEntry; + +class HttpHeaderEntries { +public: + HttpHeaderEntries(); + ~HttpHeaderEntries(); + + void push_back(HttpHeaderEntry *); + void insert(HttpHeaderEntry *); + void prune(); + void clean(); + + size_t count; + HttpHeaderEntry *items[1024]; +}; + class HttpHeaderEntry { @@ -257,7 +273,7 @@ inline bool chunked() const; ///< whether message uses chunked Transfer-Encoding /* protected, do not use these, use interface functions instead */ - Vector entries; /**< parsed fields in raw format */ + HttpHeaderEntries entries; /**< parsed fields in raw format */ HttpHeaderMask mask; /**< bit set <=> entry present */ http_hdr_owner_type owner; /**< request or reply */ int len; /**< length when packed, not counting terminating null-byte */ === modified file 'src/MemObject.cc' --- src/MemObject.cc 2009-08-04 02:31:27 +0000 +++ src/MemObject.cc 2010-10-24 16:33:01 +0000 @@ -137,6 +137,7 @@ void MemObject::write ( StoreIOBuffer writeBuffer, STMCB *callback, void *callbackData) { + ProfileProgress(Here); PROF_start(MemObject_write); debugs(19, 6, "memWrite: offset " << writeBuffer.offset << " len " << writeBuffer.length); @@ -149,7 +150,9 @@ assert (data_hdr.endOffset() || writeBuffer.offset == 0); assert (data_hdr.write (writeBuffer)); + ProfileProgress(Here); callback (callbackData, writeBuffer); + ProfileProgress(Here); PROF_stop(MemObject_write); } === modified file 'src/Server.cc' --- src/Server.cc 2010-09-13 02:25:09 +0000 +++ src/Server.cc 2010-10-24 20:57:26 +0000 @@ -60,6 +60,7 @@ #endif receivedWholeRequestBody(false) { + ProfileProgress(Here); fwd = theFwdState; entry = fwd->entry; === modified file 'src/base/AsyncCall.cc' --- src/base/AsyncCall.cc 2009-04-09 22:31:13 +0000 +++ src/base/AsyncCall.cc 2010-10-24 01:52:02 +0000 @@ -28,10 +28,14 @@ void AsyncCall::make() { + ProfileProgress(Here); + debugs(debugSection, debugLevel, HERE << "make call " << name << " [call"<< id << ']'); if (canFire()) { + ProfileProgress(Here); fire(); + ProfileProgress(Here); return; } === modified file 'src/base/AsyncCallQueue.cc' --- src/base/AsyncCallQueue.cc 2009-04-09 22:31:13 +0000 +++ src/base/AsyncCallQueue.cc 2010-10-25 01:25:12 +0000 @@ -8,11 +8,12 @@ #include "base/AsyncCallQueue.h" #include "base/AsyncCall.h" +#include "SquidTime.h" AsyncCallQueue *AsyncCallQueue::TheInstance = 0; -AsyncCallQueue::AsyncCallQueue(): theHead(NULL), theTail(NULL) +AsyncCallQueue::AsyncCallQueue(): theHead(NULL), theTail(NULL), count(0) { } @@ -27,6 +28,7 @@ } else { // create queue from cratch theHead = theTail = call; } + ++count; } // Fire all scheduled calls; returns true if at least one call was fired. @@ -48,10 +50,42 @@ call->theNext = NULL; if (theTail == call) theTail = NULL; + --count; + + static u_int64_t visits = 0; + ForceLiveProfiling = !(++visits % 50000); + getCurrentTime(); + LiveProfilingStart = current_time; + LiveProfilingDepth = 0; + LivePorfilingEventCount = 0; debugs(call->debugSection, call->debugLevel, "entering " << *call); + ProfileProgress(Here); call->make(); + ProfileProgress(Here); debugs(call->debugSection, call->debugLevel, "leaving " << *call); + + getCurrentTime(); + const int spent = tvSubUsec(LiveProfilingStart, current_time); + static double sum = 0; + sum += spent; + if (spent >= 500 || LiveProfilingDepth >= 100 || ForceLiveProfiling) { + debugs(call->debugSection, 1, "timed " << call->name << ": " << spent << ' ' << sum/visits << " depth: " << count); + LiveProfilingReport(); + } + + static double totalTime = 0; + static double totalCount = 0; + totalTime += spent; + totalCount += LivePorfilingEventCount; + if (totalCount > 10000000.0) { + debugs(1, 1, "async call events: " << totalTime << " / " << totalCount << " = " << totalTime/totalCount); + totalTime = 0; + totalCount = 0; + } + + ForceLiveProfiling = false; + LiveProfilingDepth = 0; } AsyncCallQueue & === modified file 'src/base/AsyncCallQueue.h' --- src/base/AsyncCallQueue.h 2009-04-09 22:31:13 +0000 +++ src/base/AsyncCallQueue.h 2010-10-23 22:39:36 +0000 @@ -33,6 +33,7 @@ AsyncCall::Pointer theHead; AsyncCall::Pointer theTail; + int count; static AsyncCallQueue *TheInstance; }; === added file 'src/base/Profiling.cc' --- src/base/Profiling.cc 1970-01-01 00:00:00 +0000 +++ src/base/Profiling.cc 2010-10-25 04:06:24 +0000 @@ -0,0 +1,74 @@ +#include "base/Profiling.h" +#include "Debug.h" +#include +#include + +u_int64_t ScopeInfo::LastId = 0; + +static std::set Scopes; + +static u_int64_t MinReportVolume = 0; + + +ScopeInfo::ScopeInfo(const char * const aName, const int aLine): + totalTicks(0), totalVisits(0), + fileName(aName), fileLine(aLine), id(++LastId) +{ + doRegister(); +} + +ScopeInfo::~ScopeInfo() +{ + unRegister(); +} + +void +ScopeInfo::doRegister() +{ + Scopes.insert(this); +} + +void +ScopeInfo::unRegister() +{ + Scopes.erase(this); +} + +void +ScopeInfo::report() +{ + if (MinReportVolume && totalVisits >= MinReportVolume) { + debugs(1,1, fileName << ':' << fileLine << ": scope" << id << ": " << + totalTicks << " / " << totalVisits << " = " << + (static_cast(totalTicks)/(totalVisits ? totalVisits : 1))); + totalVisits = 0; + totalTicks = 0; + } +} + +static double TotalVisits = 0; +static double TotalTicks = 0; + +static +void ReportOne(ScopeInfo *scope) +{ + TotalTicks += scope->totalTicks; + TotalVisits += scope->totalVisits; + scope->report(); +} + +void +ScopeInfo::ReportAll() +{ + TotalTicks = 0; + TotalVisits = 0; + + debugs(1,1, "Total profiling scopes: " << Scopes.size()); + MinReportVolume = 1; + std::for_each(Scopes.begin(), Scopes.end(), &ReportOne); + MinReportVolume = INT_MAX; + + debugs(1,1, "All scopes: " << + TotalTicks << " / " << TotalVisits << " = " << + (static_cast(TotalTicks)/(TotalVisits ? TotalVisits : 1))); +} === added file 'src/base/Profiling.h' --- src/base/Profiling.h 1970-01-01 00:00:00 +0000 +++ src/base/Profiling.h 2010-10-25 02:25:35 +0000 @@ -0,0 +1,112 @@ +#ifndef SQUID_BASE_PROFILING_H +#define SQUID_BASE_PROFILING_H + +#include "config.h" + +/* start future ticking.h; XXX: move */ + +#if !_SQUID_SOLARIS_ +typedef int64_t hrtime_t; +#endif + +#if defined(__GNUC__) && ( defined(__i386) || defined(__i386__) ) +static inline hrtime_t +get_tick(void) +{ + hrtime_t regs; + +asm volatile ("rdtsc":"=A" (regs)); + return regs; + /* We need return value, we rely on CC to optimise out needless subf calls */ + /* Note that "rdtsc" is relatively slow OP and stalls the CPU pipes, so use it wisely */ +} + +#elif defined(__GNUC__) && ( defined(__x86_64) || defined(__x86_64__) ) +static inline hrtime_t +get_tick(void) +{ + uint32_t lo, hi; + // Based on an example in Wikipedia + /* We cannot use "=A", since this would use %rax on x86_64 */ +asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); + return (hrtime_t)hi << 32 | lo; +} + +#elif defined(__GNUC__) && defined(__alpha) +static inline hrtime_t +get_tick(void) +{ + hrtime_t regs; + +asm volatile ("rpcc %0" : "=r" (regs)); + return regs; +} + +#elif defined(_M_IX86) && defined(_MSC_VER) /* x86 platform on Microsoft C Compiler ONLY */ +static __inline hrtime_t +get_tick(void) +{ + hrtime_t regs; + + __asm { + cpuid + rdtsc + mov eax,DWORD PTR regs[0] + mov edx,DWORD PTR regs[4] + } + return regs; +} + +#else +/* This CPU is unsupported. Short-circuit, no profiling here */ +#define get_tick() 0 +#undef USE_XPROF_STATS +#define USE_XPROF_STATS 0 +#endif + +/* end future ticking.h */ + +class ScopeInfo { +public: + static void ReportAll(); + + ScopeInfo(const char * const aName, const int aLine); + ~ScopeInfo(); + + void doRegister(); + void unRegister(); + void report(); + + u_int64_t totalTicks; + u_int64_t totalVisits; + + const char * const fileName; + const int fileLine; + + const u_int64_t id; + static u_int64_t LastId; +}; + +class ScopeProfiler { +public: + ScopeProfiler(ScopeInfo &aScope): scope(aScope), startTicks(get_tick()) {} + ~ScopeProfiler() { + scope.totalTicks += get_tick() - startTicks; + if (++scope.totalVisits > 100000) + scope.report(); + } + + ScopeInfo &scope; + u_int64_t startTicks; +}; + +#define ProfileScope(name, line, uid) \ + static ScopeInfo ScopeInfoName ## uid(name, line); \ + ScopeProfiler profiler ## uid(ScopeInfoName ## uid); + +#undef PROF_start +#undef PROF_stop +#define PROF_start(type) ProfileScope(__FILE__, __LINE__, type) +#define PROF_stop(type) + +#endif /* SQUID_BASE_PROFILING_H */ === modified file 'src/client_side.cc' --- src/client_side.cc 2010-09-22 23:04:40 +0000 +++ src/client_side.cc 2010-10-24 17:16:05 +0000 @@ -2040,6 +2040,8 @@ StoreIOBuffer tempBuffer; int r; + ProfileProgress(Here); + /* pre-set these values to make aborting simpler */ *method_p = METHOD_NONE; @@ -2136,7 +2138,9 @@ HttpParserReqSz(hp)); /* Ok, all headers are received */ + ProfileProgress(Here); http = new ClientHttpRequest(conn); + ProfileProgress(Here); http->req_sz = HttpParserRequestLen(hp); result = ClientSocketContextNew(http); @@ -2168,6 +2172,7 @@ *t = '\0'; #endif + ProfileProgress(Here); /* Rewrite the URL in transparent or accelerator mode */ /* NP: there are several cases to traverse here: @@ -2209,10 +2214,12 @@ strcpy(http->uri, url); } + ProfileProgress(Here); setLogUri(http, http->uri); debugs(33, 5, "parseHttpRequest: Complete request received"); result->flags.parsed_ok = 1; xfree(url); + ProfileProgress(Here); return result; } @@ -2357,6 +2364,7 @@ static void clientProcessRequest(ConnStateData *conn, HttpParser *hp, ClientSocketContext *context, const HttpRequestMethod& method, HttpVersion http_ver) { + ProfileProgress(Here); ClientHttpRequest *http = context->http; HttpRequest *request = NULL; bool notedUseOfBuffer = false; @@ -2434,6 +2442,8 @@ goto finish; } + ProfileProgress(Here); + request->flags.accelerated = http->flags.accel; request->flags.ignore_cc = conn->port->ignore_cc; request->flags.no_direct = request->flags.accelerated ? !conn->port->allow_direct : 0; @@ -2478,6 +2488,8 @@ request->my_addr = conn->me; request->http_ver = http_ver; + ProfileProgress(Here); + if (request->header.chunked()) { chunked = true; } else if (request->header.has(HDR_TRANSFER_ENCODING)) { @@ -2538,6 +2550,8 @@ if (http->request->method == METHOD_CONNECT) context->mayUseConnection(true); + ProfileProgress(Here); + /* Do we expect a request-body? */ expectBody = chunked || request->content_length > 0; if (!context->mayUseConnection() && expectBody) { @@ -2573,11 +2587,13 @@ context->mayUseConnection(!request->body_pipe->productionEnded()); } + ProfileProgress(Here); http->calloutContext = new ClientRequestContext(http); http->doCallouts(); finish: + ProfileProgress(Here); if (!notedUseOfBuffer) connNoteUseOfBuffer(conn, http->req_sz); @@ -2594,6 +2610,7 @@ comm_reset_close(conn->fd); return; } + ProfileProgress(Here); } static void @@ -2636,6 +2653,7 @@ HttpVersion http_ver; HttpParser hp; + ProfileProgress(Here); debugs(33, 5, "clientParseRequest: FD " << conn->fd << ": attempting to parse"); // Loop while we have read bytes that are not needed for producing the body @@ -2666,7 +2684,7 @@ PROF_start(parseHttpRequest); context = parseHttpRequest(conn, &hp, &method, &http_ver); - + ProfileProgress(Here); PROF_stop(parseHttpRequest); /* partial or incomplete request */ @@ -2683,7 +2701,9 @@ commSetTimeout(conn->fd, Config.Timeout.lifetime, clientLifetimeTimeout, context->http); + ProfileProgress(Here); clientProcessRequest(conn, &hp, context, method, http_ver); + ProfileProgress(Here); parsed_req = true; @@ -2697,12 +2717,16 @@ /* XXX where to 'finish' the parsing pass? */ + ProfileProgress(Here); + return parsed_req; } void ConnStateData::clientReadRequest(const CommIoCbParams &io) { + ProfileProgress(Here); + debugs(33,5,HERE << "clientReadRequest FD " << io.fd << " size " << io.size); Must(reading()); reader = NULL; @@ -2778,9 +2802,11 @@ * be if we have an incomplete request. * XXX: This duplicates ClientSocketContext::keepaliveNextRequest */ + ProfileProgress(Here); if (getConcurrentRequestCount() == 0 && commIsHalfClosed(fd)) { debugs(33, 5, "clientReadRequest: FD " << fd << ": half-closed connection, no completed request parsed, connection closing."); comm_close(fd); + ProfileProgress(Here); return; } } @@ -2788,7 +2814,9 @@ if (!isOpen()) return; + ProfileProgress(Here); clientAfterReadingRequests(do_next_read); + ProfileProgress(Here); } /** @@ -2800,6 +2828,7 @@ bool ConnStateData::handleReadData(char *buf, size_t size) { + ProfileProgress(Here); char *current_buf = in.addressToReadInto(); if (buf != current_buf) @@ -2809,9 +2838,12 @@ in.buf[in.notYetUsed] = '\0'; /* Terminate the string */ + ProfileProgress(Here); + // if we are reading a body, stuff data into the body pipe if (bodyPipe != NULL) return handleRequestBodyData(); + ProfileProgress(Here); return true; } @@ -2825,6 +2857,7 @@ bool ConnStateData::handleRequestBodyData() { + ProfileProgress(Here); assert(bodyPipe != NULL); size_t putSize = 0; @@ -2846,6 +2879,7 @@ if (putSize > 0) connNoteUseOfBuffer(this, putSize); + ProfileProgress(Here); if (!bodyPipe) { debugs(33,5, HERE << "produced entire request body for FD " << fd); @@ -2853,11 +2887,14 @@ /* we've finished reading like good clients, * now do the close that initiateClose initiated. */ + ProfileProgress(Here); comm_close(fd); + ProfileProgress(Here); return false; } } + ProfileProgress(Here); return true; } === modified file 'src/client_side_reply.cc' --- src/client_side_reply.cc 2010-09-28 17:43:42 +0000 +++ src/client_side_reply.cc 2010-10-24 20:39:21 +0000 @@ -616,6 +616,7 @@ void clientReplyContext::processMiss() { + ProfileProgress(Here); char *url = http->uri; HttpRequest *r = http->request; ErrorState *err = NULL; @@ -768,6 +769,7 @@ void clientReplyContext::created(StoreEntry *newEntry) { + ProfileProgress(Here); if (lookingforstore == 1) purgeFoundGet(newEntry); else if (lookingforstore == 2) @@ -1488,6 +1490,7 @@ void clientReplyContext::identifyFoundObject(StoreEntry *newEntry) { + ProfileProgress(Here); StoreEntry *e = newEntry; HttpRequest *r = http->request; @@ -1541,6 +1544,7 @@ #endif + ProfileProgress(Here); if (NULL == http->storeEntry()) { /** \li If no StoreEntry object is current assume this object isn't in the cache set MISS*/ debugs(85, 3, "clientProcessRequest2: StoreEntry is NULL - MISS"); @@ -1607,6 +1611,7 @@ void clientGetMoreData(clientStreamNode * aNode, ClientHttpRequest * http) { + ProfileProgress(Here); /* Test preconditions */ assert(aNode != NULL); assert(cbdataReferenceValid(aNode)); @@ -1629,8 +1634,10 @@ tempBuffer.length = next->readBuffer.length; tempBuffer.data = next->readBuffer.data; + ProfileProgress(Here); storeClientCopy(context->sc, http->storeEntry(), tempBuffer, clientReplyContext::SendMoreData, context); + ProfileProgress(Here); return; } @@ -1639,6 +1646,7 @@ return; } + ProfileProgress(Here); // OPTIONS with Max-Forwards:0 handled in clientProcessRequest() if (context->http->request->method == METHOD_TRACE) { @@ -1651,13 +1659,17 @@ http->logType = LOG_TCP_MISS; context->doGetMoreData(); - } else + } else { + ProfileProgress(Here); context->identifyStoreObject(); + } + ProfileProgress(Here); } void clientReplyContext::doGetMoreData() { + ProfileProgress(Here); /* We still have to do store logic processing - vary, cache hit etc */ if (http->storeEntry() != NULL) { /* someone found the object in the cache for us */ @@ -1702,6 +1714,7 @@ /* MISS CASE, http->logType is already set! */ processMiss(); } + ProfileProgress(Here); } /** The next node has removed itself from the stream. */ === modified file 'src/client_side_request.cc' --- src/client_side_request.cc 2010-09-28 17:43:42 +0000 +++ src/client_side_request.cc 2010-10-24 17:27:51 +0000 @@ -1135,6 +1135,7 @@ void ClientHttpRequest::httpStart() { + ProfileProgress(Here); PROF_start(httpStart); logType = LOG_TAG_NONE; debugs(85, 4, "ClientHttpRequest::httpStart: " << log_tags[logType] << " for '" << uri << "'"); @@ -1277,6 +1278,7 @@ void ClientHttpRequest::doCallouts() { + ProfileProgress(Here); assert(calloutContext); /*Save the original request for logging purposes*/ @@ -1287,6 +1289,7 @@ debugs(83, 3, HERE << "Doing calloutContext->clientAccessCheck()"); calloutContext->http_access_done = true; calloutContext->clientAccessCheck(); + ProfileProgress(Here); return; } @@ -1296,7 +1299,10 @@ if (Adaptation::AccessCheck::Start( Adaptation::methodReqmod, Adaptation::pointPreCache, request, NULL, adaptationAclCheckDoneWrapper, calloutContext)) +{ + ProfileProgress(Here); return; // will call callback +} } #endif @@ -1308,6 +1314,7 @@ debugs(83, 3, HERE << "Doing calloutContext->clientRedirectStart()"); calloutContext->redirect_state = REDIRECT_PENDING; calloutContext->clientRedirectStart(); + ProfileProgress(Here); return; } } @@ -1316,6 +1323,7 @@ debugs(83, 3, HERE << "Doing calloutContext->clientAccessCheck2()"); calloutContext->adapted_http_access_done = true; calloutContext->clientAccessCheck2(); + ProfileProgress(Here); return; } @@ -1323,6 +1331,7 @@ debugs(83, 3, HERE << "Doing clientInterpretRequestHeaders()"); calloutContext->interpreted_req_hdrs = 1; clientInterpretRequestHeaders(this); + ProfileProgress(Here); } if (!calloutContext->no_cache_done) { @@ -1331,6 +1340,7 @@ if (Config.accessList.noCache && request->flags.cachable) { debugs(83, 3, HERE << "Doing calloutContext->checkNoCache()"); calloutContext->checkNoCache(); + ProfileProgress(Here); return; } } @@ -1344,9 +1354,11 @@ int tos = aclMapTOS(Config.accessList.clientside_tos, &ch); if (tos) comm_set_tos(getConn()->fd, tos); + ProfileProgress(Here); } } + ProfileProgress(Here); cbdataReferenceDone(calloutContext->http); delete calloutContext; calloutContext = NULL; @@ -1356,13 +1368,16 @@ #endif debugs(83, 3, HERE << "calling processRequest()"); + ProfileProgress(Here); processRequest(); + ProfileProgress(Here); #if ICAP_CLIENT Adaptation::Icap::History::Pointer ih = request->icapHistory(); if (ih != NULL) ih->logType = logType; #endif + ProfileProgress(Here); } #if !_USE_INLINE_ === modified file 'src/comm_epoll.cc' --- src/comm_epoll.cc 2010-10-14 04:16:15 +0000 +++ src/comm_epoll.cc 2010-10-24 22:56:01 +0000 @@ -280,6 +280,7 @@ PROF_stop(comm_check_incoming); getCurrentTime(); + LiveProfilingStart = current_time; statHistCount(&statCounter.select_fds_hist, num); @@ -287,6 +288,7 @@ return COMM_TIMEOUT; /* No error.. */ PROF_start(comm_handle_ready_fd); + ProfileProgress(Here); for (i = 0, cevents = pevents; i < num; i++, cevents++) { fd = cevents->data.fd; @@ -297,6 +299,8 @@ // TODO: add EPOLLPRI?? + ProfileProgress(Here); + if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR) || F->flags.read_pending) { if ((hdl = F->read_handler) != NULL) { debugs(5, DEBUG_EPOLL ? 0 : 8, "comm_select(): Calling read handler on FD " << fd); @@ -313,6 +317,8 @@ } } + ProfileProgress(Here); + if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) { if ((hdl = F->write_handler) != NULL) { debugs(5, DEBUG_EPOLL ? 0 : 8, "comm_select(): Calling write handler on FD " << fd); @@ -329,6 +335,7 @@ } } + ProfileProgress(Here); PROF_stop(comm_handle_ready_fd); return COMM_OK; === modified file 'src/debug.cc' --- src/debug.cc 2010-07-06 23:09:44 +0000 +++ src/debug.cc 2010-10-25 02:12:46 +0000 @@ -805,3 +805,85 @@ return path+BuildPrefixLength; } + +bool ForceLiveProfiling = false; +int LiveProfilingDepth = 0; +timeval LiveProfilingStart = {0, 0}; +static const int LiveProfilingDepthMax = 10240; +unsigned int LivePorfilingEventCount = 0; + +class LiveProfilingHistoryStep { +public: + const char *fileName; + int lineNo; + timeval stamp; +}; +static LiveProfilingHistoryStep LiveProfilingHistory[LiveProfilingDepthMax]; +void ProfileProgress(const char * const fileName, const int lineNo) +{ + ++LivePorfilingEventCount; +return; + getCurrentTime(); + + LiveProfilingHistoryStep &step = + LiveProfilingHistory[min(LiveProfilingDepth++, LiveProfilingDepthMax)]; + step.fileName = fileName; + step.lineNo = lineNo; + step.stamp = current_time; +} + +void LiveProfilingReport() +{ + timeval last = LiveProfilingStart; + int aggr = 0; + for (int i = 0; i < LiveProfilingDepth; ++i) { + LiveProfilingHistoryStep &step = LiveProfilingHistory[i]; + const int spent = tvSubUsec(last, step.stamp); + aggr += spent; + debugs(1,1, step.fileName << ':' << step.lineNo << ": step" << i << + ": " << spent << " / " << aggr); + last = step.stamp; + } +} + +#if 0 /* XXX: remove old */ +void ProfileProgress(const char * const fileName, const int lineNo) +{ + static const char *LastFileName = "?"; + static int LastLineNo = 0; + static int LastSpent = 0; + + getCurrentTime(); + static timeval Last = { 0,0 }; + static int Aggr = 0; + if (!LiveProfilingDepth++) { + Last = LiveProfilingStart; + Aggr = 0; + LastFileName = 0; + LastLineNo = LastSpent = 0; + } + const int spent = tvSubUsec(Last, current_time); + Aggr += spent; + Last = current_time; + + if (!ForceLiveProfiling && spent > 200 && LastFileName) { + debugs(1,1, LastFileName << ':' << LastLineNo << ": " << + (LiveProfilingDepth-1) << "-spent: " << LastSpent << " (old)"); + LastFileName = 0; + } + + if (ForceLiveProfiling || spent > 200) { + debugs(1,1, fileName << ':' << lineNo << ": " << LiveProfilingDepth << + "-spent: " << spent << " / " << Aggr << " or " + << tvSubUsec(LiveProfilingStart, current_time)); + LastFileName = 0; + } else { + LastFileName = fileName; + LastLineNo = lineNo; + LastSpent = spent; + } +} +#endif + + +#include "base/Profiling.cc" === modified file 'src/forward.cc' --- src/forward.cc 2010-10-14 04:16:15 +0000 +++ src/forward.cc 2010-10-24 20:56:11 +0000 @@ -105,6 +105,7 @@ // Called once, right after object creation, when it is safe to set self void FwdState::start(Pointer aSelf) { + ProfileProgress(Here); // Protect ourselves from being destroyed when the only Server pointing // to us is gone (while we expect to talk to more Servers later). // Once we set self, we are responsible for clearing it when we do not @@ -116,6 +117,7 @@ entry->registerAbort(FwdState::abort, this); peerSelect(request, entry, fwdStartCompleteWrapper, this); + ProfileProgress(Here); // TODO: set self _after_ the peer is selected because we do not need // self until we start talking to some Server. @@ -199,6 +201,7 @@ void FwdState::fwdStart(int client_fd, StoreEntry *entry, HttpRequest *request) { + ProfileProgress(Here); /** \note * client_addr == no_addr indicates this is an "internal" request * from peer_digest.c, asn.c, netdb.c, etc and should always @@ -789,6 +792,7 @@ void FwdState::connectStart() { + ProfileProgress(Here); const char *url = entry->url(); int fd = -1; FwdServer *fs = servers; @@ -960,6 +964,7 @@ updateHierarchyInfo(); commConnectStart(fd, host, port, fwdConnectDoneWrapper, this); + ProfileProgress(Here); } void @@ -988,6 +993,7 @@ void FwdState::dispatch() { + ProfileProgress(Here); peer *p = NULL; debugs(17, 3, "fwdDispatch: FD " << client_fd << ": Fetching '" << RequestMethodStr(request->method) << " " << entry->url() << "'" ); /* @@ -1051,6 +1057,7 @@ } } #endif + ProfileProgress(Here); if (servers && (p = servers->_peer)) { p->stats.fetches++; === modified file 'src/http.cc' --- src/http.cc 2010-09-28 17:43:42 +0000 +++ src/http.cc 2010-10-24 21:21:59 +0000 @@ -63,6 +63,43 @@ #include "SquidTime.h" #include "Store.h" +#if 0 // XXX: remove +//#include "CodeTimer.h" +timeval CodeTimerStart = { 0, 0}; +class CodeTimer { +public: + CodeTimer(const char * const aStartName, const int aStartLine): + startName(aStartName), startLine(aStartLine) + { + if (!Depth++) + LastTime = CodeTimerStart; + tick(startName, startLine, "@beg "); + } + ~CodeTimer() { tick(startName, startLine, "@end "); --Depth; } + + void tick(const char * const nowName, const int nowLine, const char * const prefix = "") + { + getCurrentTime(); + int spent = -1; + if (LastTime.tv_sec > 0) + spent = tvSubUsec(LastTime, current_time); + else + if (CodeTimerStart.tv_sec > 0) + spent = tvSubUsec(CodeTimerStart, current_time); + + if (CodeTimerStart.tv_sec > 0 || spent > 200) + debugs(1,1, nowName << ':' << nowLine << ": " << prefix << "spent: +" << spent); + LastTime = current_time; + } + + const char * const startName; + const int startLine; + static timeval LastTime; + static int Depth; +}; +timeval CodeTimer::LastTime = { 0, 0}; +int CodeTimer::Depth = 0; +#endif #define SQUID_ENTER_THROWING_CODE() try { #define SQUID_EXIT_THROWING_CODE(status) \ @@ -85,6 +122,7 @@ lastChunk(0), header_bytes_read(0), reply_bytes_read(0), body_bytes_truncated(0), httpChunkDecoder(NULL) { + ProfileProgress(Here); debugs(11,5,HERE << "HttpStateData " << this << " created"); ignoreCacheControl = false; surrogateNoStore = false; @@ -1077,6 +1115,8 @@ int clen; int len = io.size; + ProfileProgress(Here); + assert(fd == io.fd); flags.do_next_read = 0; @@ -1115,6 +1155,7 @@ // update I/O stats if (len > 0) { readBuf->appended(len); + ProfileProgress(Here); reply_bytes_read += len; #if DELAY_POOLS @@ -1137,6 +1178,8 @@ sent.tv_sec ? tvSubMsec(sent, current_time) : -1; } + ProfileProgress(Here); + /** \par * Here the RFC says we should ignore whitespace between replies, but we can't as * doing so breaks HTTP/0.9 replies beginning with witespace, and in addition @@ -1181,6 +1224,7 @@ } } + ProfileProgress(Here); processReply(); } @@ -1190,6 +1234,8 @@ HttpStateData::processReply() { + ProfileProgress(Here); + if (flags.handling1xx) { // we came back after handling a 1xx response debugs(11, 5, HERE << "done with 1xx handling"); flags.handling1xx = false; @@ -1199,11 +1245,13 @@ if (!flags.headers_parsed) { // have not parsed headers yet? PROF_start(HttpStateData_processReplyHeader); processReplyHeader(); + ProfileProgress(Here); PROF_stop(HttpStateData_processReplyHeader); if (!continueAfterParsingHeader()) // parsing error or need more data return; // TODO: send errors to ICAP + ProfileProgress(Here); adaptOrFinalizeReply(); } @@ -1312,10 +1360,13 @@ void HttpStateData::writeReplyBody() { + ProfileProgress(Here); truncateVirginBody(); // if needed const char *data = readBuf->content(); int len = readBuf->contentSize(); + ProfileProgress(Here); addVirginReplyBody(data, len); + ProfileProgress(Here); readBuf->consume(len); } @@ -1352,6 +1403,8 @@ void HttpStateData::processReplyBody() { + ProfileProgress(Here); + AsyncCall::Pointer call; Ip::Address client_addr; bool ispinned = false; @@ -1376,6 +1429,7 @@ */ if (flags.chunked) { if (!decodeAndWriteReplyBody()) { + ProfileProgress(Here); flags.do_next_read = 0; serverComplete(); return; @@ -1383,6 +1437,7 @@ } else writeReplyBody(); + ProfileProgress(Here); if (EBIT_TEST(entry->flags, ENTRY_ABORTED)) { /* * The above writeReplyBody() call could ABORT this entry, @@ -1413,6 +1468,8 @@ commSetTimeout(fd, -1, call); flags.do_next_read = 0; + ProfileProgress(Here); + comm_remove_close_handler(fd, closeHandler); closeHandler = NULL; fwd->unregister(fd); @@ -1436,6 +1493,7 @@ fd = -1; + ProfileProgress(Here); serverComplete(); return; @@ -1445,6 +1503,7 @@ return; } + ProfileProgress(Here); maybeReadVirginBody(); } @@ -2026,6 +2085,7 @@ StoreEntry * sentry, MemBuf * mb) { + ProfileProgress(Here); const int offset = mb->size; HttpVersion httpver(1,1); mb->Printf("%s %s HTTP/%d.%d\r\n", @@ -2037,6 +2097,7 @@ HttpHeader hdr(hoRequest); Packer p; httpBuildRequestHeader(aRequest, original_request, sentry, &hdr, flags); + ProfileProgress(Here); if (aRequest->flags.pinned && aRequest->flags.connection_auth) aRequest->flags.auth_sent = 1; @@ -2045,11 +2106,14 @@ packerToMemInit(&p, mb); hdr.packInto(&p); + ProfileProgress(Here); hdr.clean(); packerClean(&p); + ProfileProgress(Here); } /* append header terminator */ mb->append(crlf, 2); + ProfileProgress(Here); return mb->size - offset; } @@ -2057,6 +2121,7 @@ bool HttpStateData::sendRequest() { + ProfileProgress(Here); MemBuf mb; debugs(11, 5, "httpSendRequest: FD " << fd << ", request " << request << ", this " << this << "."); @@ -2074,6 +2139,7 @@ flags.do_next_read = 1; maybeReadVirginBody(); + ProfileProgress(Here); if (orig_request->body_pipe != NULL) { if (!startRequestBodyFlow()) // register to receive body data return false; @@ -2128,12 +2194,16 @@ flags.front_end_https = _peer->front_end_https; } + ProfileProgress(Here); + mb.init(); request->peer_host=_peer?_peer->host:NULL; buildRequestPrefix(request, orig_request, entry, &mb); + ProfileProgress(Here); debugs(11, 6, "httpSendRequest: FD " << fd << ":\n" << mb.buf); comm_write_mbuf(fd, &mb, requestSender); + ProfileProgress(Here); return true; } === modified file 'src/mem.cc' --- src/mem.cc 2010-10-14 04:16:15 +0000 +++ src/mem.cc 2010-10-23 00:49:16 +0000 @@ -488,7 +488,11 @@ memClean(void) { MemPoolGlobalStats stats; - MemPools::GetInstance().setIdleLimit(0); + +// XXX: restore +// MemPools::GetInstance().setIdleLimit(0); +// XXX + MemPools::GetInstance().clean(0); memPoolGetGlobalStats(&stats); === modified file 'src/peer_select.cc' --- src/peer_select.cc 2010-01-30 00:30:56 +0000 +++ src/peer_select.cc 2010-10-24 20:48:36 +0000 @@ -126,6 +126,7 @@ PSC * callback, void *callback_data) { + ProfileProgress(Here); ps_state *psstate; if (entry) @@ -180,6 +181,7 @@ static void peerSelectCallback(ps_state * psstate) { + ProfileProgress(Here); StoreEntry *entry = psstate->entry; FwdServer *fs = psstate->servers; PSC *callback; @@ -261,6 +263,7 @@ static void peerSelectFoo(ps_state * ps) { + ProfileProgress(Here); StoreEntry *entry = ps->entry; HttpRequest *request = ps->request; debugs(44, 3, "peerSelectFoo: '" << RequestMethodStr(request->method) << " " << request->GetHost() << "'"); @@ -305,6 +308,7 @@ debugs(44, 3, "peerSelectFoo: direct = " << DirectStr[ps->direct]); } + ProfileProgress(Here); if (!entry || entry->ping_status == PING_NONE) peerSelectPinned(ps); if (entry == NULL) { @@ -345,6 +349,7 @@ } peerSelectCallback(ps); + ProfileProgress(Here); } /* @@ -762,6 +767,7 @@ static void peerAddFwdServer(FwdServer ** FSVR, peer * p, hier_code code) { + ProfileProgress(Here); FwdServer *fs = (FwdServer *)memAllocate(MEM_FWD_SERVER); debugs(44, 5, "peerAddFwdServer: adding " << (p ? p->host : "DIRECT") << " " << @@ -773,6 +779,7 @@ FSVR = &(*FSVR)->next; *FSVR = fs; + ProfileProgress(Here); } void * === modified file 'src/stmem.cc' --- src/stmem.cc 2009-03-06 13:26:57 +0000 +++ src/stmem.cc 2010-10-24 16:24:19 +0000 @@ -133,6 +133,7 @@ size_t mem_hdr::writeAvailable(mem_node *aNode, int64_t location, size_t amount, char const *source) { + ProfileProgress(Here); /* if we attempt to overwrite existing data or leave a gap within a node */ assert (location == aNode->nodeBuffer.offset + (int64_t)aNode->nodeBuffer.length); /* And we are not at the end of the node */ @@ -142,7 +143,9 @@ assert (location - aNode->nodeBuffer.offset == (int64_t)aNode->nodeBuffer.length); size_t copyLen = min(amount, aNode->space()); + ProfileProgress(Here); xmemcpy(aNode->nodeBuffer.data + aNode->nodeBuffer.length, source, copyLen); + ProfileProgress(Here); if (inmem_hi <= location) inmem_hi = location + copyLen; @@ -157,6 +160,7 @@ mem_hdr::appendNode (mem_node *aNode) { nodes.insert (aNode, NodeCompare); + ProfileProgress(Here); } void @@ -325,10 +329,13 @@ mem_node * mem_hdr::nodeToRecieve(int64_t offset) { + ProfileProgress(Here); + /* case 1: Nothing in memory */ if (!nodes.size()) { appendNode (new mem_node(offset)); + ProfileProgress(Here); return nodes.start()->data; } @@ -344,6 +351,7 @@ candidate = *leadup; } + ProfileProgress(Here); if (candidate && candidate->canAccept(offset)) return candidate; @@ -351,6 +359,7 @@ candidate = new mem_node(offset); appendNode (candidate); + ProfileProgress(Here); /* simpler to write than a indented if */ return candidate; @@ -360,6 +369,7 @@ bool mem_hdr::write (StoreIOBuffer const &writeBuffer) { + ProfileProgress(Here); PROF_start(mem_hdr_write); debugs(19, 6, "mem_hdr::write: " << writeBuffer.range() << " object end " << endOffset()); @@ -371,6 +381,7 @@ return false; } + ProfileProgress(Here); assert (writeBuffer.offset >= 0); mem_node *target; @@ -386,6 +397,7 @@ currentSource += wrote; } + ProfileProgress(Here); PROF_stop(mem_hdr_write); return true; } === modified file 'src/store.cc' --- src/store.cc 2010-10-14 04:16:15 +0000 +++ src/store.cc 2010-10-25 04:03:29 +0000 @@ -379,6 +379,7 @@ void StoreEntry::destroyMemObject() { + ProfileScope(__FILE__, __LINE__, 0); debugs(20, 3, HERE << "destroyMemObject " << mem_obj); setMemStatus(NOT_IN_MEMORY); MemObject *mem = mem_obj; @@ -389,6 +390,7 @@ void destroyStoreEntry(void *data) { + ProfileScope(__FILE__, __LINE__, 0); debugs(20, 3, HERE << "destroyStoreEntry: destroying " << data); StoreEntry *e = static_cast(static_cast(data)); assert(e != NULL); @@ -410,6 +412,7 @@ void StoreEntry::hashInsert(const cache_key * someKey) { + ProfileScope(__FILE__, __LINE__, 0); debugs(20, 3, "StoreEntry::hashInsert: Inserting Entry " << this << " key '" << storeKeyText(someKey) << "'"); key = storeKeyDup(someKey); hash_join(store_table, this); @@ -418,6 +421,7 @@ void StoreEntry::hashDelete() { + ProfileScope(__FILE__, __LINE__, 0); hash_remove_link(store_table, this); storeKeyFree((const cache_key *)key); key = NULL; @@ -813,6 +817,8 @@ void StoreEntry::write (StoreIOBuffer writeBuffer) { + ProfileProgress(Here); + assert(mem_obj != NULL); assert(writeBuffer.length >= 0); /* This assert will change when we teach the store to update */ @@ -835,8 +841,11 @@ debugs(20, 5, "storeWrite: writing " << writeBuffer.length << " bytes for '" << getMD5Text() << "'"); PROF_stop(StoreEntry_write); + ProfileProgress(Here); storeGetMemSpace(writeBuffer.length); + ProfileProgress(Here); mem_obj->write (writeBuffer, storeWriteComplete, this); + ProfileProgress(Here); } /* Append incoming data from a primary server to an entry. */ @@ -1791,7 +1800,21 @@ assert (isEmpty()); - getReply()->packHeadersInto(&p); + // Pack header into a temporary buffer and then write the whole buffer + // at once to avoid hundreds of calls to mem_hdr::write() per header. + // While each call is relatively cheap, it is death by thousands cuts. + { + MemBuf hdrBuf; + hdrBuf.init(); + + Packer hdrPacker; + packerToMemInit(&hdrPacker, &hdrBuf); + getReply()->packHeadersInto(&hdrPacker); + packerClean(&hdrPacker); + + if (hdrBuf.contentSize()) + packerAppend(&p, hdrBuf.content(), hdrBuf.contentSize()); + } rep->hdr_sz = mem_obj->endOffset(); === modified file 'src/store_client.cc' --- src/store_client.cc 2010-04-07 09:49:57 +0000 +++ src/store_client.cc 2010-10-25 16:32:34 +0000 @@ -444,8 +444,11 @@ /* What the client wants is in memory */ /* Old style */ debugs(90, 3, "store_client::doCopy: Copying normal from memory"); + ProfileProgress(Here); size_t sz = entry->mem_obj->data_hdr.copy(copyInto); + ProfileProgress(Here); callback(sz); + ProfileProgress(Here); flags.store_copying = 0; } === modified file 'src/tools.cc' --- src/tools.cc 2010-09-23 17:58:15 +0000 +++ src/tools.cc 2010-10-20 16:35:09 +0000 @@ -424,7 +424,7 @@ if (state == 0) { #if !MEM_GEN_TRACE - Debug::parseOptions("ALL,7"); + Debug::parseOptions("ALL,9"); #else log_trace_done();