diff --git src/DiskIO/IpcIo/IpcIoFile.cc src/DiskIO/IpcIo/IpcIoFile.cc index 337fdb9..c2845fe 100644 --- src/DiskIO/IpcIo/IpcIoFile.cc +++ src/DiskIO/IpcIo/IpcIoFile.cc @@ -654,64 +654,67 @@ diskerWrite(IpcIoMsg &ipcIo) void IpcIoFile::DiskerHandleMoreRequests(void *source) { debugs(47, 7, HERE << "resuming handling requests after " << static_cast(source)); DiskerHandleMoreRequestsScheduled = false; IpcIoFile::DiskerHandleRequests(); } bool IpcIoFile::WaitBeforePop() { const Ipc::QueueReader::Rate::Value ioRate = queue->localRateLimit(); const double maxRate = ioRate/1e3; // req/ms // do we need to enforce configured I/O rate? if (maxRate <= 0) return false; // is there an I/O request we could potentially delay? - if (!queue->popReady()) { - // unlike pop(), popReady() is not reliable and does not block reader + int processId; + IpcIoMsg ipcIo; + if (!queue->peek(processId, ipcIo)) { + // unlike pop(), peek() is not reliable and does not block reader // so we must proceed with pop() even if it is likely to fail return false; } static timeval LastIo = current_time; const double ioDuration = 1.0 / maxRate; // ideal distance between two I/Os // do not accumulate more than 100ms or 100 I/Os, whichever is smaller const int64_t maxImbalance = min(static_cast(100), static_cast(100 * ioDuration)); const double credit = ioDuration; // what the last I/O should have cost us const double debit = tvSubMsec(LastIo, current_time); // actual distance from the last I/O LastIo = current_time; Ipc::QueueReader::Balance &balance = queue->localBalance(); balance += static_cast(credit - debit); debugs(47, 7, HERE << "rate limiting balance: " << balance << " after +" << credit << " -" << debit); - if (balance > maxImbalance) { - // if we accumulated too much time for future slow I/Os, - // then shed accumulated time to keep just half of the excess + if (ipcIo.command == IpcIo::cmdWrite && balance > maxImbalance) { + // if the next request is (likely) write and we accumulated + // too much time for future slow I/Os, then shed accumulated + // time to keep just half of the excess const int64_t toSpend = balance - maxImbalance/2; if (toSpend/1e3 > Timeout) debugs(47, DBG_IMPORTANT, "WARNING: Rock disker delays I/O " << "requests for " << (toSpend/1e3) << " seconds to obey " << ioRate << "/sec rate limit"); debugs(47, 3, HERE << "rate limiting by " << toSpend << " ms to get" << (1e3*maxRate) << "/sec rate"); eventAdd("IpcIoFile::DiskerHandleMoreRequests", &IpcIoFile::DiskerHandleMoreRequests, const_cast("rate limiting"), toSpend/1e3, 0, false); DiskerHandleMoreRequestsScheduled = true; return true; } else if (balance < -maxImbalance) { // do not owe "too much" to avoid "too large" bursts of I/O balance = -maxImbalance; } diff --git src/cf.data.pre src/cf.data.pre index 11d333e..a97bafb 100644 --- src/cf.data.pre +++ src/cf.data.pre @@ -2761,43 +2761,47 @@ DOC_START The rock store type: cache_dir rock Directory-Name Mbytes [options] The Rock Store type is a database-style storage. All cached entries are stored in a "database" file, using fixed-size slots, one entry per slot. The database size is specified in MB. The slot size is specified in bytes using the max-size option. See below for more info on the max-size option. swap-timeout=msec: Squid will not start writing a miss to or reading a hit from disk if it estimates that the swap operation will take more than the specified number of milliseconds. By default and when set to zero, disables the disk I/O time limit enforcement. Ignored when using blocking I/O module because blocking synchronous I/O does not allow Squid to estimate the expected swap wait time. max-swap-rate=swaps/sec: Artificially limits disk access using - the specified I/O rate limit. Swap in and swap out requests that + the specified I/O rate limit. Swap out requests that would cause the average I/O rate to exceed the limit are - delayed. This is necessary on file systems that buffer "too + delayed. Individual swap in requests (i.e., hits or reads) are + not delayed, but they do contribute to measured swap rate and + since they are placed in the same FIFO queue as swap out + requests, they may wait longer if max-swap-rate is smaller. + This is necessary on file systems that buffer "too many" writes and then start blocking Squid and other processes while committing those writes to disk. Usually used together with swap-timeout to avoid excessive delays and queue overflows when disk demand exceeds available disk "bandwidth". By default and when set to zero, disables the disk I/O rate limit enforcement. Currently supported by IpcIo module only. The coss store type: NP: COSS filesystem in Squid-3 has been deemed too unstable for production use and has thus been removed from this release. We hope that it can be made usable again soon. block-size=n defines the "block size" for COSS cache_dir's. Squid uses file numbers as block numbers. Since file numbers are limited to 24 bits, the block size determines the maximum size of the COSS partition. The default is 512 bytes, which leads to a maximum cache_dir size of 512<<24, or 8 GB. Note you should not change the coss block size after Squid diff --git src/ipc/Queue.cc src/ipc/Queue.cc index 24e6706..c794a7d 100644 --- src/ipc/Queue.cc +++ src/ipc/Queue.cc @@ -216,55 +216,40 @@ const Ipc::QueueReader & Ipc::FewToFewBiQueue::reader(const Group group, const int processId) const { return readers->theReaders[readerIndex(group, processId)]; } void Ipc::FewToFewBiQueue::clearReaderSignal(const int remoteProcessId) { QueueReader &localReader = reader(theLocalGroup, theLocalProcessId); debugs(54, 7, HERE << "reader: " << localReader.id); Must(validProcessId(remoteGroup(), remoteProcessId)); localReader.clearSignal(); // we got a hint; we could reposition iteration to try popping from the // remoteProcessId queue first; but it does not seem to help much and might // introduce some bias so we do not do that for now: // theLastPopProcessId = remoteProcessId; } -bool -Ipc::FewToFewBiQueue::popReady() const -{ - // mimic FewToFewBiQueue::pop() but quit just before popping - int popProcessId = theLastPopProcessId; // preserve for future pop() - for (int i = 0; i < remoteGroupSize(); ++i) { - if (++popProcessId >= remoteGroupIdOffset() + remoteGroupSize()) - popProcessId = remoteGroupIdOffset(); - const OneToOneUniQueue &queue = oneToOneQueue(remoteGroup(), popProcessId, theLocalGroup, theLocalProcessId); - if (!queue.empty()) - return true; - } - return false; // most likely, no process had anything to pop -} - Ipc::QueueReader::Balance & Ipc::FewToFewBiQueue::localBalance() { QueueReader &r = reader(theLocalGroup, theLocalProcessId); return r.balance; } Ipc::QueueReader::Rate & Ipc::FewToFewBiQueue::localRateLimit() { QueueReader &r = reader(theLocalGroup, theLocalProcessId); return r.rateLimit; } Ipc::FewToFewBiQueue::Metadata::Metadata(const int aGroupASize, const int aGroupAIdOffset, const int aGroupBSize, const int aGroupBIdOffset): theGroupASize(aGroupASize), theGroupAIdOffset(aGroupAIdOffset), theGroupBSize(aGroupBSize), theGroupBIdOffset(aGroupBIdOffset) { Must(theGroupASize > 0); Must(theGroupBSize > 0); diff --git src/ipc/Queue.h src/ipc/Queue.h index 72fe3e5..56642ac 100644 --- src/ipc/Queue.h +++ src/ipc/Queue.h @@ -185,42 +185,42 @@ public: enum Group { groupA = 0, groupB = 1 }; FewToFewBiQueue(const String &id, const Group aLocalGroup, const int aLocalProcessId); Group localGroup() const { return theLocalGroup; } Group remoteGroup() const { return theLocalGroup == groupA ? groupB : groupA; } /// clears the reader notification received by the local process from the remote process void clearReaderSignal(const int remoteProcessId); /// picks a process and calls OneToOneUniQueue::pop() using its queue template bool pop(int &remoteProcessId, Value &value); /// calls OneToOneUniQueue::push() using the given process queue template bool push(const int remoteProcessId, const Value &value); /// finds the oldest item in incoming and outgoing queues between /// us and the given remote process template bool findOldest(const int remoteProcessId, Value &value) const; - /// returns true if pop() would have probably succeeded but does not pop() - bool popReady() const; + /// peeks at the item likely to be pop()ed next + template bool peek(int &remoteProcessId, Value &value) const; /// returns local reader's balance QueueReader::Balance &localBalance(); /// returns local reader's rate limit QueueReader::Rate &localRateLimit(); private: bool validProcessId(const Group group, const int processId) const; int oneToOneQueueIndex(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) const; const OneToOneUniQueue &oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) const; OneToOneUniQueue &oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId); QueueReader &reader(const Group group, const int processId); const QueueReader &reader(const Group group, const int processId) const; int readerIndex(const Group group, const int processId) const; int remoteGroupSize() const { return theLocalGroup == groupA ? metadata->theGroupBSize : metadata->theGroupASize; } int remoteGroupIdOffset() const { return theLocalGroup == groupA ? metadata->theGroupBIdOffset : metadata->theGroupAIdOffset; } private: const Mem::Pointer metadata; ///< shared metadata @@ -351,23 +351,43 @@ FewToFewBiQueue::push(const int remoteProcessId, const Value &value) template bool FewToFewBiQueue::findOldest(const int remoteProcessId, Value &value) const { // we may be called before remote process configured its queue end if (!validProcessId(remoteGroup(), remoteProcessId)) return false; // we need the oldest value, so start with the incoming, them-to-us queue: const OneToOneUniQueue &inQueue = oneToOneQueue(remoteGroup(), remoteProcessId, theLocalGroup, theLocalProcessId); debugs(54, 2, HERE << "peeking from " << remoteProcessId << " to " << theLocalProcessId << " at " << inQueue.size()); if (inQueue.peek(value)) return true; // if the incoming queue is empty, check the outgoing, us-to-them queue: const OneToOneUniQueue &outQueue = oneToOneQueue(theLocalGroup, theLocalProcessId, remoteGroup(), remoteProcessId); debugs(54, 2, HERE << "peeking from " << theLocalProcessId << " to " << remoteProcessId << " at " << outQueue.size()); return outQueue.peek(value); } +template +bool +FewToFewBiQueue::peek(int &remoteProcessId, Value &value) const +{ + // mimic FewToFewBiQueue::pop() but quit just before popping + int popProcessId = theLastPopProcessId; // preserve for future pop() + for (int i = 0; i < remoteGroupSize(); ++i) { + if (++popProcessId >= remoteGroupIdOffset() + remoteGroupSize()) + popProcessId = remoteGroupIdOffset(); + const OneToOneUniQueue &queue = + oneToOneQueue(remoteGroup(), popProcessId, + theLocalGroup, theLocalProcessId); + if (queue.peek(value)) { + remoteProcessId = popProcessId; + return true; + } + } + return false; // most likely, no process had anything to pop +} + } // namespace Ipc #endif // SQUID_IPC_QUEUE_H