=== modified file 'src/http.cc' --- src/http.cc 2012-10-12 04:14:44 +0000 +++ src/http.cc 2012-10-14 05:28:15 +0000 @@ -336,6 +336,11 @@ HttpStateData::cacheableReply() { HttpReply const *rep = finalReply(); + + // non-existent replies are not cacheable. + if (!rep) + return 0; + HttpHeader const *hdr = &rep->header; const char *v; #if USE_HTTP_VIOLATIONS @@ -354,61 +359,66 @@ #define REFRESH_OVERRIDE(flag) 0 #endif + // Check for Surrogate/1.0 protocol conditions + // NP: reverse-proxy traffic our parent server has instructed us never to cache if (surrogateNoStore) return 0; - // RFC 2616: do not cache replies to responses with no-store CC directive - if (request && request->cache_control && - request->cache_control->noStore() && - !REFRESH_OVERRIDE(ignore_no_store)) - return 0; - - if (!ignoreCacheControl && request->cache_control != NULL) { - const HttpHdrCc* cc=request->cache_control; - if (cc->Private()) { - if (!REFRESH_OVERRIDE(ignore_private)) - return 0; - } - - if (cc->noCache()) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - - if (cc->noStore()) { - if (!REFRESH_OVERRIDE(ignore_no_store)) - return 0; - } - } - - if (request->flags.auth || request->flags.authSent) { - /* - * Responses to requests with authorization may be cached - * only if a Cache-Control: public reply header is present. - * RFC 2068, sec 14.9.4 - */ - - if (!request->cache_control || !request->cache_control->Public()) { - if (!REFRESH_OVERRIDE(ignore_auth)) - return 0; - } - } - - /* Pragma: no-cache in _replies_ is not documented in HTTP, - * but servers like "Active Imaging Webcast/2.0" sure do use it */ - if (hdr->has(HDR_PRAGMA)) { - String s = hdr->getList(HDR_PRAGMA); - const int no_cache = strListIsMember(&s, "no-cache", ','); - s.clean(); - - if (no_cache) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - } - - /* - * The "multipart/x-mixed-replace" content type is used for + // RFC 2616: HTTP/1.1 Cache-Control conditions + if (!ignoreCacheControl) { + // XXX: check to see if the request headers alone were enough to prevent caching earlier + // (ie no-store request header) no need to check those all again here if so. + // for now we are not reliably doing that so we waste CPU re-checking request CC + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with request CC:no-store + if (request && request->cache_control && request->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) + return 0; + + // NP: request CC:no-cache only means cache READ is forbidden. STORE is permitted. + // NP: request CC:private is undefined. We ignore. + // NP: other request CC flags are limiters on HIT/MISS. We don't care about here. + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with CC:no-store + if (rep->cache_control && rep->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) + return 0; + + // RFC 2616 section 14.9.1 - MUST NOT cache any response with CC:private in a shared cache like Squid. + // TODO: add a shared/private cache configuration possibility. + if (rep->cache_control && + rep->cache_control->Private() && + !REFRESH_OVERRIDE(ignore_private)) + return 0; + // NP: being conservative; CC:private overrides CC:public when both are present in a response. + + // RFC 2068, sec 14.9.4 - MUST NOT cache any response with Authentication UNLESS certain CC controls are present + // allow HTTP violations to IGNORE those controls (ie re-block caching Auth) + if (request && (request->flags.auth || request->flags.authSent) && !REFRESH_OVERRIDE(ignore_auth)) { + if (!rep->cache_control) + return 0; + + // HTTPbis pt7 section 4.1 clause 3: a response CC:public is present + const bool mayStore = (rep->cache_control->Public()) || + // HTTPbis pt7 section 4.1 clause 2: a response CC:must-revalidate is present + (rep->cache_control->mustRevalidate() && !REFRESH_OVERRIDE(ignore_must_revalidate)) || +#if 0 + // NP: given the must-revalidate exception we should also be able to exempt no-cache. + (rep->cache_control->noCache() && !REFRESH_OVERRIDE(ignore_no_cache)) || +#endif + + // HTTPbis pt7 section 4.1 clause 1: a response CC:s-maxage is present + (rep->cache_control->sMaxAge()); + + if (!mayStore) + return 0; + } + + // NP: response CC:no-cache is equivalent to CC:must-revalidate,max-age=0. We MAY cache, and do so. + // NP: other request CC flags are limiters on HIT/MISS/REFRESH. We don't care about here. + } + + /* HACK: The "multipart/x-mixed-replace" content type is used for * continuous push replies. These are generally dynamic and * probably should not be cachable */ @@ -930,9 +940,24 @@ if (!ignoreCacheControl && rep->cache_control) { if (rep->cache_control->proxyRevalidate() || rep->cache_control->mustRevalidate() || + rep->cache_control->noCache() || rep->cache_control->hasSMaxAge() - ) + ) { EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } else { + // Expensive calculation. So only do it IF the CC: header is not setting revalidate flag anyway + + /* HACK: Pragma: no-cache in _replies_ is not documented in HTTP, + * but servers like "Active Imaging Webcast/2.0" sure do use it */ + if (!rep->cache_control && rep->header.has(HDR_PRAGMA)) { + String s = rep->header.getList(HDR_PRAGMA); + const int no_cache = strListIsMember(&s, "no-cache", ','); + s.clean(); + + if (no_cache) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } + } } #if HEADERS_LOG === modified file 'src/http.h' --- src/http.h 2012-09-22 20:07:31 +0000 +++ src/http.h 2012-10-14 04:08:25 +0000 @@ -60,6 +60,8 @@ void processReplyBody(); void readReply(const CommIoCbParams &io); virtual void maybeReadVirginBody(); // read response data from the network + + // Determine whether the response is a cacheable representation int cacheableReply(); CachePeer *_peer; /* CachePeer request made to */