Truncate too-long HTTP response bodies to match their Content-Length header. Sometimes a broken server sends more than Content-Length bytes in the response. For example, a 302 redirect message with "Content-Length: 0" header may include an HTML body. Squid used to send "everything" it read to the client, even if it read more than the Content-Length bytes. That may have helped in some cases, but I think we should be more conservative when dealing with broken servers to combat message smuggling attacks and other bad side-effects for clients. We now do not forward more than the advertised content length and declare the connection with a broken server non-persistent. Chunked responses (that Squid should not receive and that must not have a Content-Length header) are not truncated because RFC 2616 says we MUST ignore their Content-Length header. TODO: simply truncating read content would not work for pipelined responses. We should preserve extra content for the next transaction on a pconn. === modified file 'src/MemBuf.cc' --- src/MemBuf.cc 2006-09-20 14:13:38 +0000 +++ src/MemBuf.cc 2009-05-26 16:20:20 +0000 @@ -220,6 +220,15 @@ PROF_stop(MemBuf_consume); } +// removes last tailSize bytes +void MemBuf::truncate(mb_size_t tailSize) +{ + const mb_size_t cSize = contentSize(); + assert(0 <= tailSize && tailSize <= cSize); + assert(!stolen); /* not frozen */ + size -= tailSize; +} + // calls memcpy, appends exactly size bytes, extends buffer if needed void MemBuf::append(const char *newContent, mb_size_t sz) { === modified file 'src/MemBuf.h' --- src/MemBuf.h 2008-04-06 00:35:11 +0000 +++ src/MemBuf.h 2009-05-26 15:45:58 +0000 @@ -73,6 +73,7 @@ void consume(mb_size_t sz); // removes sz bytes, moving content left void append(const char *c, mb_size_t sz); // grows if needed and possible void appended(mb_size_t sz); // updates content size after external append + void truncate(mb_size_t sz); // removes sz last bytes // XXX: convert global memBuf*() functions into methods === modified file 'src/http.cc' --- src/http.cc 2009-05-01 15:50:41 +0000 +++ src/http.cc 2009-06-10 04:56:17 +0000 @@ -68,7 +68,7 @@ HttpHeader * hdr_out, int we_do_ranges, http_state_flags); HttpStateData::HttpStateData(FwdState *theFwdState) : ServerStateData(theFwdState), - header_bytes_read(0), reply_bytes_read(0) + header_bytes_read(0), reply_bytes_read(0), body_bytes_truncated(0) { debugs(11,5,HERE << "HttpStateData " << this << " created"); ignoreCacheControl = false; @@ -923,6 +923,9 @@ if (body_bytes_read < vrep->content_length) return INCOMPLETE_MSG; + + if (body_bytes_truncated > 0) // already read more than needed + return COMPLETE_NONPERSISTENT_MSG; // disable pconns } /* If there is no message body or we got it all, we can be persistent */ @@ -1099,6 +1102,33 @@ return false; // quit on error } +/** truncate what we read if we read too much so that writeReplyBody() + writes no more than what we should have read */ +void +HttpStateData::truncateVirginBody() +{ + assert(flags.headers_parsed); + + HttpReply *vrep = virginReply(); + int64_t clen = -1; + if (!vrep->expectingBody(request->method, clen) || clen < 0) + return; // no body or a body of unknown size, including chunked + + const int64_t body_bytes_read = reply_bytes_read - header_bytes_read; + if (body_bytes_read - body_bytes_truncated <= clen) + return; // we did not read too much or already took care of the extras + + if (const int64_t extras = body_bytes_read - body_bytes_truncated - clen) { + // server sent more that the advertised content length + debugs(11,5, HERE << "body_bytes_read=" << body_bytes_read << + " clen=" << clen << '/' << vrep->content_length << + " body_bytes_truncated=" << body_bytes_truncated << '+' << extras); + + readBuf->truncate(extras); + body_bytes_truncated += extras; + } +} + /* * Call this when there is data from the origin server * which should be sent to either StoreEntry, or to ICAP... @@ -1106,6 +1136,7 @@ void HttpStateData::writeReplyBody() { + truncateVirginBody(); // if needed const char *data = readBuf->content(); int len = readBuf->contentSize(); === modified file 'src/http.h' --- src/http.h 2007-08-10 05:30:52 +0000 +++ src/http.h 2009-05-26 16:02:52 +0000 @@ -71,6 +71,7 @@ size_t read_sz; int header_bytes_read; // to find end of response, int reply_bytes_read; // without relying on StoreEntry + int body_bytes_truncated; // positive when we read more than we wanted MemBuf *readBuf; bool ignoreCacheControl; bool surrogateNoStore; @@ -92,6 +93,7 @@ void checkDateSkew(HttpReply *); bool continueAfterParsingHeader(); + void truncateVirginBody(); virtual void haveParsedReplyHeaders(); virtual void closeServer(); // end communication with the server