Added reply_from_cache and reply_to_cache squid.conf directives to control caching of responses using response info. The existing "cache" directive is checked before Squid has access to the response and, hence, could not use response-based ACLs such as http_status. Response-based ACLs may be essential when fine-tuning caching. Squid Bug 3937 (StoreID can lead to 302 infinite loop) is a good use case. Updated old "cache" directive documentation to provide more information, to help folks distinguish the three related directives, and to polish for clarity. === modified file 'src/Server.cc' --- src/Server.cc 2013-07-25 07:21:54 +0000 +++ src/Server.cc 2013-10-11 22:01:11 +0000 @@ -14,40 +14,41 @@ * incorporates software developed and/or copyrighted by other * sources; see the CREDITS file for full details. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. * */ #include "squid.h" +#include "acl/FilledChecklist.h" #include "acl/Gadgets.h" #include "base/TextException.h" #include "comm/Connection.h" #include "comm/forward.h" #include "comm/Write.h" #include "fd.h" #include "err_detail_type.h" #include "errorpage.h" #include "HttpReply.h" #include "HttpRequest.h" #include "Server.h" #include "SquidTime.h" #include "StatCounters.h" #include "Store.h" #include "tools.h" #include "URL.h" #if USE_ADAPTATION #include "adaptation/AccessCheck.h" #include "adaptation/Answer.h" @@ -156,40 +157,42 @@ ServerStateData::setVirginReply(HttpRepl HttpReply * ServerStateData::finalReply() { assert(theFinalReply); return theFinalReply; } HttpReply * ServerStateData::setFinalReply(HttpReply *rep) { debugs(11,5, HERE << this << " setting final reply to " << rep); assert(!theFinalReply); assert(rep); theFinalReply = rep; HTTPMSGLOCK(theFinalReply); // give entry the reply because haveParsedReplyHeaders() expects it there entry->replaceHttpReply(theFinalReply, false); // but do not write yet haveParsedReplyHeaders(); // update the entry/reply (e.g., set timestamps) + if (EBIT_TEST(entry->flags, ENTRY_CACHABLE) && blockCaching()) + entry->release(); entry->startWriting(); // write the updated entry to store return theFinalReply; } // called when no more server communication is expected; may quit void ServerStateData::serverComplete() { debugs(11,5,HERE << "serverComplete " << this); if (!doneWithServer()) { closeServer(); assert(doneWithServer()); } completed = true; HttpRequest *r = originalRequest(); r->hier.total_response_time = r->hier.first_conn_start.tv_sec ? @@ -510,40 +513,58 @@ ServerStateData::maybePurgeOthers() // and probably only if the response was successful if (theFinalReply->sline.status() >= 400) return; // XXX: should we use originalRequest() here? const char *reqUrl = urlCanonical(request); debugs(88, 5, "maybe purging due to " << RequestMethodStr(request->method) << ' ' << reqUrl); purgeEntriesByUrl(request, reqUrl); purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_LOCATION); purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_CONTENT_LOCATION); } /// called when we have final (possibly adapted) reply headers; kids extend void ServerStateData::haveParsedReplyHeaders() { Must(theFinalReply); maybePurgeOthers(); } +/// whether to prevent caching of an otherwise cachable response +bool +ServerStateData::blockCaching() +{ + if (const Acl::Tree *acl = Config.accessList.replyToCache) { + // This relatively expensive check is not in StoreEntry::checkCachable: + // That method lacks HttpRequest and may be called too many times. + ACLFilledChecklist ch(acl, originalRequest(), NULL); + ch.reply = const_cast(entry->getReply()); // ACLFilledChecklist API bug + HTTPMSGLOCK(ch.reply); + if (ch.fastCheck() != ACCESS_ALLOWED) { // when in doubt, block + debugs(20, 3, "reply_to_cache prohibits caching"); + return true; + } + } + return false; +} + HttpRequest * ServerStateData::originalRequest() { return request; } #if USE_ADAPTATION /// Initiate an asynchronous adaptation transaction which will call us back. void ServerStateData::startAdaptation(const Adaptation::ServiceGroupPointer &group, HttpRequest *cause) { debugs(11, 5, "ServerStateData::startAdaptation() called"); // check whether we should be sending a body as well // start body pipe to feed ICAP transaction if needed assert(!virginBodyDestination); HttpReply *vrep = virginReply(); assert(!vrep->body_pipe); int64_t size = 0; if (vrep->expectingBody(cause->method, size) && size) { virginBodyDestination = new BodyPipe(this); === modified file 'src/Server.h' --- src/Server.h 2013-06-07 04:35:25 +0000 +++ src/Server.h 2013-10-11 19:56:46 +0000 @@ -114,40 +114,42 @@ protected: virtual void completeForwarding(); /**< default calls fwd->complete() */ // BodyConsumer for HTTP: consume request body. bool startRequestBodyFlow(); void handleMoreRequestBodyAvailable(); void handleRequestBodyProductionEnded(); virtual void handleRequestBodyProducerAborted() = 0; // sending of the request body to the server void sendMoreRequestBody(); // has body; kids overwrite to increment I/O stats counters virtual void sentRequestBody(const CommIoCbParams &io) = 0; virtual void doneSendingRequestBody() = 0; virtual void closeServer() = 0; /**< end communication with the server */ virtual bool doneWithServer() const = 0; /**< did we end communication? */ /// Entry-dependent callbacks use this check to quit if the entry went bad bool abortOnBadEntry(const char *abortReason); + bool blockCaching(); + #if USE_ADAPTATION void startAdaptation(const Adaptation::ServiceGroupPointer &group, HttpRequest *cause); void adaptVirginReplyBody(const char *buf, ssize_t len); void cleanAdaptation(); virtual bool doneWithAdaptation() const; /**< did we end ICAP communication? */ // BodyConsumer for ICAP: consume adapted response body. void handleMoreAdaptedBodyAvailable(); void handleAdaptedBodyProductionEnded(); void handleAdaptedBodyProducerAborted(); void handleAdaptedHeader(HttpMsg *msg); void handleAdaptationCompleted(); void handleAdaptationBlocked(const Adaptation::Answer &answer); void handleAdaptationAborted(bool bypassable = false); /// called by StoreEntry when it has more buffer space available void resumeBodyStorage(); /// called when the entire adapted response body is consumed void endAdaptedBodyConsumption(); === modified file 'src/SquidConfig.h' --- src/SquidConfig.h 2013-08-01 20:45:56 +0000 +++ src/SquidConfig.h 2013-10-11 17:38:50 +0000 @@ -357,40 +357,42 @@ public: int client_dst_passthru; int dns_mdns; } onoff; int pipeline_max_prefetch; int forward_max_tries; int connect_retries; class ACL *aclList; struct { acl_access *http; acl_access *adapted_http; acl_access *icp; acl_access *miss; acl_access *NeverDirect; acl_access *AlwaysDirect; acl_access *ASlists; acl_access *noCache; + acl_access *replyFromCache; + acl_access *replyToCache; acl_access *stats_collection; #if SQUID_SNMP acl_access *snmp; #endif #if USE_HTTP_VIOLATIONS acl_access *brokenPosts; #endif acl_access *redirector; acl_access *store_id; acl_access *reply; AclAddress *outgoing_address; #if USE_HTCP acl_access *htcp; acl_access *htcp_clr; #endif #if USE_SSL acl_access *ssl_bump; === modified file 'src/cf.data.pre' --- src/cf.data.pre 2013-08-29 09:21:53 +0000 +++ src/cf.data.pre 2013-10-11 21:55:43 +0000 @@ -4793,54 +4793,124 @@ LOC: Config.onoff.store_id_bypass DEFAULT: on DOC_START When this is 'on', a request will not go through the helper if all helpers are busy. If this is 'off' and the helper queue grows too large, Squid will exit with a FATAL error and ask you to increase the number of helpers. You should only enable this if the helperss are not critical to your caching system. If you use helpers for critical caching components, and you enable this option, users may not get objects from cache. DOC_END COMMENT_START OPTIONS FOR TUNING THE CACHE ----------------------------------------------------------------------------- COMMENT_END NAME: cache no_cache TYPE: acl_access DEFAULT: none -DEFAULT_DOC: Allow caching, unless rules exist in squid.conf. +DEFAULT_DOC: By default, this directive is unused and has no effect. LOC: Config.accessList.noCache DOC_START - A list of ACL elements which, if matched and denied, cause the request to - not be satisfied from the cache and the reply to not be cached. - In other words, use this to force certain objects to never be cached. - - You must use the words 'allow' or 'deny' to indicate whether items - matching the ACL should be allowed or denied into the cache. + Requests denied by this directive will not be served from the cache + and their responses will not be stored in the cache. This directive + has no effect on other transactions and on already cached responses. + + This and the two other similar caching directives listed below are + checked at different transaction processing stages, have different + access to response information, and affect different cache operations: + + * cache: Checked before Squid makes a hit/miss determination. + No access to reply information! + Denies both serving a hit and storing a miss. + * reply_from_cache: Checked after a hit was detected. + Has access to reply (hit) information. + Denies serving a hit only. + * reply_to_cache: Checked before storing a cachable miss. + Has access to reply (miss) information. + Denies storing a miss only. + + If you are not sure which of the three directives to use, apply the + following rule of thumb: + + * If your directive ACLs need no response info, use "cache". Otherwise: + * If you do not want the response cached, use reply_to_cache; and/or + * if you do not want a hit on a cached response, use reply_from_cache. This clause supports both fast and slow acl types. See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details. DOC_END +NAME: reply_from_cache +TYPE: acl_access +DEFAULT: none +DEFAULT_DOC: By default, this directive is unused and has no effect. +LOC: Config.accessList.replyFromCache +DOC_START + Responses denied by this directive will not be served from the cache + (but may still be cached, see reply_to_cache). This directive has no + effect on the responses it allows and on the cached objects. + + Please see the "cache" directive for a summary of differences among + reply_to_cache, reply_from_cache, and cache directives. + + Unlike the "cache" directive, reply_from_cache only supports fast acl + types. See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details. + + For example: + + # apply custom Store ID mapping to some URLs + acl MapMe dstdomain .c.example.com + store_id_program ... + store_id_access allow MapMe + + # but prevent caching of special responses + # such as 302 redirects that cause StoreID loops + acl Ordinary http_status 200-299 + reply_to_cache deny MapMe !Ordinary + + # and do not serve any previously stored special responses + # from the cache (in case they were already cached before + # the above reply_to_cache rule was in effect). + reply_from_cache deny MapMe !Ordinary +DOC_END + +NAME: reply_to_cache +TYPE: acl_access +DEFAULT: none +DEFAULT_DOC: By default, this directive is unused and has no effect. +LOC: Config.accessList.replyToCache +DOC_START + Responses denied by this directive will not be cached (but may still + be served from the cache, see reply_from_cache). This directive has no + effect on the responses it allows and on the already cached responses. + + Please see the "cache" directive for a summary of differences among + reply_to_cache, reply_from_cache, and cache directives. See the + reply_from_cache directive for a usage example. + + Unlike the "cache" directive, reply_to_cache only supports fast acl + types. See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details. +DOC_END + NAME: max_stale COMMENT: time-units TYPE: time_t LOC: Config.maxStale DEFAULT: 1 week DOC_START This option puts an upper limit on how stale content Squid will serve from the cache if cache validation fails. Can be overriden by the refresh_pattern max-stale option. DOC_END NAME: refresh_pattern TYPE: refreshpattern LOC: Config.Refresh DEFAULT: none DOC_START usage: refresh_pattern [-i] regex min percent max [options] By default, regular expressions are CASE-SENSITIVE. To make them case-insensitive, use the -i option. === modified file 'src/client_side_reply.cc' --- src/client_side_reply.cc 2013-07-15 07:49:43 +0000 +++ src/client_side_reply.cc 2013-10-11 22:01:45 +0000 @@ -517,40 +517,45 @@ clientReplyContext::cacheHit(StoreIOBuff case VARY_CANCEL: /* varyEvaluateMatch found a object loop. Process as miss */ debugs(88, DBG_IMPORTANT, "clientProcessHit: Vary object loop!"); processMiss(); return; } if (r->method == Http::METHOD_PURGE) { removeClientStoreReference(&sc, http); e = NULL; purgeRequest(); return; } if (e->checkNegativeHit() && !r->flags.noCacheHack() ) { http->logType = LOG_TCP_NEGATIVE_HIT; sendMoreData(result); + } else if (blockedHit()) { + debugs(88, 5, "reply_from_cache forces a MISS"); + http->logType = LOG_TCP_MISS; + processMiss(); + return; } else if (!http->flags.internal && refreshCheckHTTP(e, r)) { debugs(88, 5, "clientCacheHit: in refreshCheck() block"); /* * We hold a stale copy; it needs to be validated */ /* * The 'needValidation' flag is used to prevent forwarding * loops between siblings. If our copy of the object is stale, * then we should probably only use parents for the validation * request. Otherwise two siblings could generate a loop if * both have a stale version of the object. */ r->flags.needValidation = true; if (e->lastmod < 0) { debugs(88, 3, "validate HIT object? NO. Missing Last-Modified header. Do MISS."); /* * Previous reply didn't have a Last-Modified header, * we cannot revalidate it. */ @@ -745,40 +750,64 @@ clientReplyContext::processConditional(S if (r.flags.ims) { // handle If-Modified-Since requests from the client if (e->modifiedSince(&r)) { http->logType = LOG_TCP_IMS_HIT; sendMoreData(result); return; } if (matchedIfNoneMatch) { // If-None-Match matched, reply with 304 Not Modified or // 412 Precondition Failed sendNotModifiedOrPreconditionFailedError(); return; } // otherwise reply with 304 Not Modified sendNotModified(); } } +/// whether squid.conf reply_from_cache prevents us from serving this hit +bool +clientReplyContext::blockedHit() const +{ + if (!Config.accessList.replyFromCache) + return false; // no hits are blocked by default + + if (http->flags.internal) + return false; // internal content "hits" cannot be blocked + + if (const HttpReply *rep = http->storeEntry()->getReply()) { + std::auto_ptr chl(clientAclChecklistCreate(Config.accessList.replyFromCache, http)); + chl->reply = const_cast(rep); // ACLChecklist API bug + HTTPMSGLOCK(chl->reply); + return chl->fastCheck() != ACCESS_ALLOWED; // when in doubt, block + } + + // This does not happen, I hope, because we are called from CacheHit, which + // is called via a storeClientCopy() callback, and store should initialize + // the reply before calling that callback. + debugs(88, 3, "Missing reply!"); + return false; +} + void clientReplyContext::purgeRequestFindObjectToPurge() { /* Try to find a base entry */ http->flags.purging = true; lookingforstore = 1; // TODO: can we use purgeAllCached() here instead of doing the // getPublicByRequestMethod() dance? StoreEntry::getPublicByRequestMethod(this, http->request, Http::METHOD_GET); } // Purges all entries with a given url // TODO: move to SideAgent parent, when we have one /* * We probably cannot purge Vary-affected responses because their MD5 * keys depend on vary headers. */ void purgeEntriesByUrl(HttpRequest * req, const char *url) === modified file 'src/client_side_reply.h' --- src/client_side_reply.h 2013-06-27 15:58:46 +0000 +++ src/client_side_reply.h 2013-10-11 16:31:21 +0000 @@ -123,34 +123,35 @@ private: void sendStreamError(StoreIOBuffer const &result); void pushStreamData(StoreIOBuffer const &result, char *source); clientStreamNode * next() const; StoreIOBuffer holdingBuffer; HttpReply *reply; void processReplyAccess(); static ACLCB ProcessReplyAccessResult; void processReplyAccessResult(const allow_t &accessAllowed); void cloneReply(); void buildReplyHeader (); bool alwaysAllowResponse(Http::StatusCode sline) const; int checkTransferDone(); void processOnlyIfCachedMiss(); void processConditional(StoreIOBuffer &result); void cacheHit(StoreIOBuffer result); void handleIMSReply(StoreIOBuffer result); void sendMoreData(StoreIOBuffer result); void triggerInitialStoreRead(); void sendClientOldEntry(); void purgeAllCached(); + bool blockedHit() const; void sendBodyTooLargeError(); void sendPreconditionFailedError(); void sendNotModified(); void sendNotModifiedOrPreconditionFailedError(); StoreEntry *old_entry; store_client *old_sc; /* ... for entry to be validated */ bool deleting; CBDATA_CLASS2(clientReplyContext); }; #endif /* SQUID_CLIENTSIDEREPLY_H */