=== modified file 'src/http.cc' --- src/http.cc 2012-10-12 04:14:44 +0000 +++ src/http.cc 2012-10-16 06:19:41 +0000 @@ -354,67 +354,99 @@ #define REFRESH_OVERRIDE(flag) 0 #endif - if (surrogateNoStore) - return 0; - - // RFC 2616: do not cache replies to responses with no-store CC directive - if (request && request->cache_control && - request->cache_control->noStore() && - !REFRESH_OVERRIDE(ignore_no_store)) - return 0; - - if (!ignoreCacheControl && request->cache_control != NULL) { - const HttpHdrCc* cc=request->cache_control; - if (cc->Private()) { - if (!REFRESH_OVERRIDE(ignore_private)) - return 0; - } - - if (cc->noCache()) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - - if (cc->noStore()) { - if (!REFRESH_OVERRIDE(ignore_no_store)) - return 0; - } - } - - if (request->flags.auth || request->flags.authSent) { - /* - * Responses to requests with authorization may be cached - * only if a Cache-Control: public reply header is present. - * RFC 2068, sec 14.9.4 - */ - - if (!request->cache_control || !request->cache_control->Public()) { - if (!REFRESH_OVERRIDE(ignore_auth)) - return 0; - } - } - - /* Pragma: no-cache in _replies_ is not documented in HTTP, - * but servers like "Active Imaging Webcast/2.0" sure do use it */ - if (hdr->has(HDR_PRAGMA)) { - String s = hdr->getList(HDR_PRAGMA); - const int no_cache = strListIsMember(&s, "no-cache", ','); - s.clean(); - - if (no_cache) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - } - - /* - * The "multipart/x-mixed-replace" content type is used for + // Check for Surrogate/1.0 protocol conditions + // NP: reverse-proxy traffic our parent server has instructed us never to cache + if (surrogateNoStore) { + debugs(22, 3, HERE << "NO because Surrogate-Control:no-store"); + return 0; + } + + // RFC 2616: HTTP/1.1 Cache-Control conditions + if (!ignoreCacheControl) { + // XXX: check to see if the request headers alone were enough to prevent caching earlier + // (ie no-store request header) no need to check those all again here if so. + // for now we are not reliably doing that so we waste CPU re-checking request CC + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with request CC:no-store + if (request && request->cache_control && request->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because client request Cache-Control:no-store"); + return 0; + } + + // NP: request CC:no-cache only means cache READ is forbidden. STORE is permitted. + // NP: request CC:private is undefined. We ignore. + // NP: other request CC flags are limiters on HIT/MISS. We don't care about here. + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with CC:no-store + if (rep->cache_control && rep->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:no-store"); + return 0; + } + + // RFC 2616 section 14.9.1 - MUST NOT cache any response with CC:private in a shared cache like Squid. + // TODO: add a shared/private cache configuration possibility. + if (rep->cache_control && + rep->cache_control->Private() && + !REFRESH_OVERRIDE(ignore_private)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:private"); + return 0; + } + // NP: being conservative; CC:private overrides CC:public when both are present in a response. + + } + // RFC 2068, sec 14.9.4 - MUST NOT cache any response with Authentication UNLESS certain CC controls are present + // allow HTTP violations to IGNORE those controls (ie re-block caching Auth) + if (request && (request->flags.auth || request->flags.authSent) && !REFRESH_OVERRIDE(ignore_auth)) { + if (!rep->cache_control) { + debugs(22, 3, HERE << "NO because Authenticated and server reply missing Cache-Control"); + return 0; + } + + if (ignoreCacheControl) { + debugs(22, 3, HERE << "NO because Authenticated and ignoring Cache-Control"); + return 0; + } + + // HTTPbis pt7 section 4.1 clause 3: a response CC:public is present + bool mayStore = false; + if ((mayStore |= rep->cache_control->Public())) { + debugs(22, 3, HERE << "YES because Authenticated and server reply Cache-Control:public"); + + // HTTPbis pt7 section 4.1 clause 2: a response CC:must-revalidate is present + } else if ((mayStore |= (rep->cache_control->mustRevalidate() && !REFRESH_OVERRIDE(ignore_must_revalidate)) )) { + debugs(22, 3, HERE << "YES because Authenticated and server reply Cache-Control:public"); + +#if 0 // waiting on HTTPbis WG agreement before we do this + // NP: given the must-revalidate exception we should also be able to exempt no-cache. + } else if ((mayStore |= (rep->cache_control->noCache() && !REFRESH_OVERRIDE(ignore_no_cache)) )) { + debugs(22, 3, HERE << "YES because Authenticated and server reply Cache-Control:no-cache"); +#endif + + // HTTPbis pt7 section 4.1 clause 1: a response CC:s-maxage is present + } else if ((mayStore |= rep->cache_control->sMaxAge())) { + debugs(22, 3, HERE << "YES because Authenticated and server reply Cache-Control:s-maxage"); + } + + if (!mayStore) { + debugs(22, 3, HERE << "NO because Authenticated transaction"); + return 0; + } + + // NP: response CC:no-cache is equivalent to CC:must-revalidate,max-age=0. We MAY cache, and do so. + // NP: other request CC flags are limiters on HIT/MISS/REFRESH. We don't care about here. + } + + /* HACK: The "multipart/x-mixed-replace" content type is used for * continuous push replies. These are generally dynamic and * probably should not be cachable */ if ((v = hdr->getStr(HDR_CONTENT_TYPE))) - if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) + if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) { + debugs(22, 3, HERE << "NO because Content-Type:multipart/x-mixed-replace"); return 0; + } switch (rep->sline.status) { /* Responses that are cacheable */ @@ -435,11 +467,12 @@ */ if (!refreshIsCachable(entry) && !REFRESH_OVERRIDE(store_stale)) { - debugs(22, 3, "refreshIsCachable() returned non-cacheable.."); + debugs(22, 3, "NO because refreshIsCachable() returned non-cacheable.."); return 0; - } else + } else { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status); return 1; - + } /* NOTREACHED */ break; @@ -447,11 +480,17 @@ case HTTP_MOVED_TEMPORARILY: case HTTP_TEMPORARY_REDIRECT: - if (rep->expires > rep->date && rep->date > 0) + if (rep->date <= 0) { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Date missing/invalid"); + return 0; + } + if (rep->expires > rep->date) { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status << " and Expires > Date"); return 1; - else + } else { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Expires <= Date"); return 0; - + } /* NOTREACHED */ break; @@ -480,6 +519,7 @@ case HTTP_SERVICE_UNAVAILABLE: case HTTP_GATEWAY_TIMEOUT: + debugs(22, 3, HERE << "MAYBE because HTTP status " << rep->sline.status); return -1; /* NOTREACHED */ @@ -516,11 +556,12 @@ case HTTP_REQUESTED_RANGE_NOT_SATISFIABLE: case HTTP_EXPECTATION_FAILED: + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status); return 0; default: /* RFC 2616 section 6.1.1: an unrecognized response MUST NOT be cached. */ - debugs (11, 3, HERE << "Unknown HTTP status code " << rep->sline.status << ". Not cacheable."); + debugs (11, 3, HERE << "NO because Unknown HTTP status code " << rep->sline.status << ". Not cacheable."); return 0; @@ -927,12 +968,25 @@ no_cache: - if (!ignoreCacheControl && rep->cache_control) { - if (rep->cache_control->proxyRevalidate() || - rep->cache_control->mustRevalidate() || - rep->cache_control->hasSMaxAge() - ) - EBIT_SET(entry->flags, ENTRY_REVALIDATE); + if (!ignoreCacheControl) { + if (rep->cache_control) { + if (rep->cache_control->proxyRevalidate() || + rep->cache_control->mustRevalidate() || + rep->cache_control->noCache() || + rep->cache_control->hasSMaxAge()) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#if USE_HTTP_VIOLATIONS // response header Pragma is undefined in HTTP + else { + // Expensive calculation. So only do it IF the CC: header is not present. + + /* HACK: Pragma: no-cache in _replies_ is not documented in HTTP, + * but servers like "Active Imaging Webcast/2.0" sure do use it */ + if (rep->header.has(HDR_PRAGMA) && + rep->header.hasListMember(HDR_PRAGMA,"no-cache",',')) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#endif } #if HEADERS_LOG === modified file 'src/http.h' --- src/http.h 2012-09-22 20:07:31 +0000 +++ src/http.h 2012-10-16 06:19:41 +0000 @@ -60,6 +60,8 @@ void processReplyBody(); void readReply(const CommIoCbParams &io); virtual void maybeReadVirginBody(); // read response data from the network + + // Determine whether the response is a cacheable representation int cacheableReply(); CachePeer *_peer; /* CachePeer request made to */