=== modified file 'src/HttpMsg.cc' --- src/HttpMsg.cc 2009-02-02 12:50:23 +0000 +++ src/HttpMsg.cc 2009-07-06 20:28:38 +0000 @@ -309,6 +309,14 @@ return -1; } +void +HttpMsg::setContentLength(int64_t clen) +{ + header.delById(HDR_CONTENT_LENGTH); // if any + header.putInt64(HDR_CONTENT_LENGTH, clen); + content_length = clen; +} + /* returns true if connection should be "persistent" * after processing this message */ int === modified file 'src/HttpMsg.h' --- src/HttpMsg.h 2009-01-21 03:47:47 +0000 +++ src/HttpMsg.h 2009-07-06 20:28:38 +0000 @@ -58,6 +58,9 @@ ///< produce a message copy, except for a few connection-specific settings virtual HttpMsg *clone() const = 0; ///< \todo rename: not a true copy? + /// [re]sets Content-Length header and cached value + void setContentLength(int64_t clen); + public: HttpVersion http_ver; @@ -109,7 +112,8 @@ }; /* Temporary parsing state; might turn into the replacement parser later on */ -struct _HttpParser { +class HttpParser { +public: char state; const char *buf; int bufsiz; @@ -120,7 +124,6 @@ int v_start, v_end; int v_maj, v_min; }; -typedef struct _HttpParser HttpParser; extern void HttpParserInit(HttpParser *, const char *buf, int len); extern int HttpParserParseReqLine(HttpParser *hp); === modified file 'src/cf.data.pre' --- src/cf.data.pre 2009-06-28 11:47:27 +0000 +++ src/cf.data.pre 2009-07-06 20:19:22 +0000 @@ -3265,6 +3265,33 @@ be no limit imposed. DOC_END +NAME: chunked_request_body_max_size +COMMENT: (bytes) +TYPE: b_int64_t +DEFAULT: 64 KB +LOC: Config.maxChunkedRequestBodySize +DOC_START + A broken or confused HTTP/1.1 client may send a chunked HTTP + request to Squid. Squid does not have full support for that + feature yet. To cope with such requests, Squid buffers the + entire request and then dechunks request body to create a + plain HTTP/1.0 request with a known content length. The plain + request is then used by the rest of Squid code as usual. + + The option value specifies the maximum size of the buffer used + to hold the request before the conversion. If the chunked + request size exceeds the specified limit, the conversion + fails, and the client receives an "unsupported request" error, + as if dechunking was disabled. + + Dechunking is enabled by default. To disable conversion of + chunked requests, set the maximum to zero. + + Request dechunking feature and this option in particular are a + temporary hack. When chunking requests and responses are fully + supported, there will be no need to buffer a chunked request. +DOC_END + NAME: broken_posts IFDEF: HTTP_VIOLATIONS TYPE: acl_access === modified file 'src/client_side.cc' --- src/client_side.cc 2009-06-14 12:18:00 +0000 +++ src/client_side.cc 2009-07-07 20:39:04 +0000 @@ -103,6 +103,7 @@ #include "ClientRequestContext.h" #include "MemBuf.h" #include "SquidTime.h" +#include "ChunkedCodingParser.h" #if LINGERING_CLOSE #define comm_close comm_lingering_close @@ -1873,6 +1874,18 @@ } } +// Temporary hack helper: determine whether the request is chunked, expensive +static bool +isChunkedRequest(const HttpParser *hp) { + HttpRequest request; + if (!request.parseHeader(HttpParserHdrBuf(hp), HttpParserHdrSz(hp))) + return false; + + return request.header.has(HDR_TRANSFER_ENCODING) && + request.header.hasListMember(HDR_TRANSFER_ENCODING, "chunked", ','); +} + + /** * parseHttpRequest() * @@ -1885,7 +1898,6 @@ static ClientSocketContext * parseHttpRequest(ConnStateData *conn, HttpParser *hp, HttpRequestMethod * method_p, HttpVersion *http_ver) { - char *url = NULL; char *req_hdr = NULL; char *end; size_t req_sz; @@ -1952,17 +1964,6 @@ return parseHttpRequestAbort(conn, "error:unsupported-request-method"); } - /* set url */ - /* - * XXX this should eventually not use a malloc'ed buffer; the transformation code - * below needs to be modified to not expect a mutable nul-terminated string. - */ - url = (char *)xmalloc(hp->u_end - hp->u_start + 16); - - memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1); - - url[hp->u_end - hp->u_start + 1] = '\0'; - /* * Process headers after request line * TODO: Use httpRequestParse here. @@ -1982,7 +1983,6 @@ */ if ( squid_strnstr(req_hdr, "\r\r\n", req_sz) ) { debugs(33, 1, "WARNING: suspicious HTTP request contains double CR"); - xfree(url); return parseHttpRequestAbort(conn, "error:double-CR"); } @@ -1990,6 +1990,35 @@ (int) HttpParserRequestLen(hp) << ", req_line_sz = " << HttpParserReqSz(hp)); + // Temporary hack: We might receive a chunked body from a broken HTTP/1.1 + // client that sends chunked requests to HTTP/1.0 Squid. If the request + // might have a chunked body, parse the headers early to look for the + // "Transfer-Encoding: chunked" header. If we find it, wait until the + // entire body is available so that we can set the content length and + // forward the request without chunks. The primary reason for this is + // to avoid forwarding a chunked request because the server side lacks + // logic to determine when it is valid to do so. + // FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS below will replace this hack. + if (hp->v_min == 1 && hp->v_maj == 1 && // broken client, may send chunks + Config.maxChunkedRequestBodySize > 0 && // configured to dechunk + (*method_p == METHOD_PUT || *method_p == METHOD_POST)) { + + // check only once per request because isChunkedRequest is expensive + if (conn->in.dechunkingState == ConnStateData::chunkUnknown) { + if (isChunkedRequest(hp)) + conn->startDechunkingRequest(hp); + else + conn->in.dechunkingState = ConnStateData::chunkNone; + } + + if (conn->in.dechunkingState == ConnStateData::chunkParsing) { + if (conn->parseRequestChunks(hp)) // parses newly read chunks + return NULL; // wait for more data + debugs(33, 5, HERE << "Got complete chunked request or err."); + assert(conn->in.dechunkingState != ConnStateData::chunkParsing); + } + } + /* Ok, all headers are received */ http = new ClientHttpRequest(conn); @@ -2006,6 +2035,17 @@ debugs(33, 5, "parseHttpRequest: Request Header is\n" <<(hp->buf) + hp->hdr_start); + /* set url */ + /* + * XXX this should eventually not use a malloc'ed buffer; the transformation code + * below needs to be modified to not expect a mutable nul-terminated string. + */ + char *url = (char *)xmalloc(hp->u_end - hp->u_start + 16); + + memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1); + + url[hp->u_end - hp->u_start + 1] = '\0'; + #if THIS_VIOLATES_HTTP_SPECS_ON_URL_TRANSFORMATION if ((t = strchr(url, '#'))) /* remove HTML anchors */ @@ -2157,6 +2197,11 @@ int connKeepReadingIncompleteRequest(ConnStateData * conn) { + // when we read chunked requests, the entire body is buffered + // XXX: this check ignores header size and its limits. + if (conn->in.dechunkingState == ConnStateData::chunkParsing) + return conn->in.notYetUsed < Config.maxChunkedRequestBodySize; + return conn->in.notYetUsed >= Config.maxRequestHeaderSize ? 0 : 1; } @@ -2166,8 +2211,13 @@ ClientSocketContext *context = parseHttpRequestAbort(conn, "error:request-too-large"); clientStreamNode *node = context->getClientReplyContext(); assert(!connKeepReadingIncompleteRequest(conn)); - debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)"); - debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes."); + if (conn->in.dechunkingState == ConnStateData::chunkParsing) { + debugs(33, 1, "Chunked request is too large (" << conn->in.notYetUsed << " bytes)"); + debugs(33, 1, "Config 'chunked_request_body_max_size'= " << Config.maxChunkedRequestBodySize << " bytes."); + } else { + debugs(33, 1, "Request header is too large (" << conn->in.notYetUsed << " bytes)"); + debugs(33, 1, "Config 'request_header_max_size'= " << Config.maxRequestHeaderSize << " bytes."); + } clientReplyContext *repContext = dynamic_cast(node->data.getRaw()); assert (repContext); repContext->setReplyToError(ERR_TOO_BIG, @@ -2213,6 +2263,9 @@ ClientHttpRequest *http = context->http; HttpRequest *request = NULL; bool notedUseOfBuffer = false; + bool tePresent = false; + bool deChunked = false; + bool unsupportedTe = false; /* We have an initial client stream in place should it be needed */ /* setup our private context */ @@ -2312,8 +2365,17 @@ request->my_addr = conn->me; request->http_ver = http_ver; - if (!urlCheckRequest(request) || - request->header.has(HDR_TRANSFER_ENCODING)) { + tePresent = request->header.has(HDR_TRANSFER_ENCODING); + deChunked = conn->in.dechunkingState == ConnStateData::chunkReady; + if (deChunked) { + assert(tePresent); + request->setContentLength(conn->in.dechunked.contentSize()); + request->header.delById(HDR_TRANSFER_ENCODING); + conn->finishDechunkingRequest(hp); + } + + unsupportedTe = tePresent && !deChunked; + if (!urlCheckRequest(request) || unsupportedTe) { clientStreamNode *node = context->getClientReplyContext(); clientReplyContext *repContext = dynamic_cast(node->data.getRaw()); assert (repContext); @@ -2647,13 +2709,73 @@ { assert(bodyPipe != NULL); - if (const size_t putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed)) + size_t putSize = 0; + +#if FUTURE_CODE_TO_SUPPORT_CHUNKED_REQUESTS + // The code below works, in principle, but we cannot do dechunking + // on-the-fly because that would mean sending chunked requests to + // the next hop. Squid lacks logic to determine which servers can + // receive chunk requests. + // The error generation code probably needs more work. + if (in.bodyParser) { // chunked body + debugs(33,5, HERE << "handling chunked request body for FD " << fd); + bool malformedChunks = false; + + MemBuf raw; // ChunkedCodingParser only works with MemBufs + raw.init(in.notYetUsed, in.notYetUsed); + raw.append(in.buf, in.notYetUsed); + try { // the parser will throw on errors + const mb_size_t wasContentSize = raw.contentSize(); + BodyPipeCheckout bpc(*bodyPipe); + const bool parsed = in.bodyParser->parse(&raw, &bpc.buf); + bpc.checkIn(); + putSize = wasContentSize - raw.contentSize(); + + if (parsed) { + stopProducingFor(bodyPipe, true); // this makes bodySize known + } else { + // parser needy state must imply body pipe needy state + if (in.bodyParser->needsMoreData() && + !bodyPipe->mayNeedMoreData()) + malformedChunks = true; + // XXX: if bodyParser->needsMoreSpace, how can we guarantee it? + } + } catch (...) { // XXX: be more specific + malformedChunks = true; + } + + if (malformedChunks) { + if (bodyPipe != NULL) + stopProducingFor(bodyPipe, false); + + ClientSocketContext::Pointer context = getCurrentContext(); + if (!context->http->out.offset) { + clientStreamNode *node = context->getClientReplyContext(); + clientReplyContext *repContext = dynamic_cast(node->data.getRaw()); + assert (repContext); + repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST, + METHOD_NONE, NULL, &peer.sin_addr, + NULL, NULL, NULL); + context->pullData(); + } + flags.readMoreRequests = false; + return; // XXX: is that sufficient to generate an error? + } + } else // identity encoding +#endif + { + debugs(33,5, HERE << "handling plain request body for FD " << fd); + putSize = bodyPipe->putMoreData(in.buf, in.notYetUsed); + if (!bodyPipe->mayNeedMoreData()) { + // BodyPipe will clear us automagically when we produced everything + bodyPipe = NULL; + } + } + + if (putSize > 0) connNoteUseOfBuffer(this, putSize); - if (!bodyPipe->mayNeedMoreData()) { - // BodyPipe will clear us automagically when we produced everything - bodyPipe = NULL; - + if (!bodyPipe) { debugs(33,5, HERE << "produced entire request body for FD " << fd); if (closing()) { @@ -3434,19 +3556,121 @@ bodyPipe->enableAutoConsumption(); } +// initialize dechunking state +void +ConnStateData::startDechunkingRequest(HttpParser *hp) +{ + debugs(33, 5, HERE << "start dechunking at " << HttpParserRequestLen(hp)); + assert(in.dechunkingState == chunkUnknown); + assert(!in.bodyParser); + in.bodyParser = new ChunkedCodingParser; + in.chunkedSeen = HttpParserRequestLen(hp); // skip headers when dechunking + in.chunked.init(); // TODO: should we have a smaller-than-default limit? + in.dechunked.init(); + in.dechunkingState = chunkParsing; +} + +// put parsed content into input buffer and clean up +void +ConnStateData::finishDechunkingRequest(HttpParser *hp) +{ + debugs(33, 5, HERE << "finish dechunking; content: " << in.dechunked.contentSize()); + + assert(in.dechunkingState == chunkReady); + assert(in.bodyParser); + delete in.bodyParser; + in.bodyParser = NULL; + + const mb_size_t headerSize = HttpParserRequestLen(hp); + + // dechunking cannot make data bigger + assert(headerSize + in.dechunked.contentSize() + in.chunked.contentSize() + <= static_cast(in.notYetUsed)); + assert(in.notYetUsed <= in.allocatedSize); + + // copy dechunked content + char *end = in.buf + headerSize; + xmemmove(end, in.dechunked.content(), in.dechunked.contentSize()); + end += in.dechunked.contentSize(); + + // copy post-chunks leftovers, if any, caused by request pipelining? + if (in.chunked.contentSize()) { + xmemmove(end, in.chunked.content(), in.chunked.contentSize()); + end += in.chunked.contentSize(); + } + + in.notYetUsed = end - in.buf; + + in.chunked.clean(); + in.dechunked.clean(); + in.dechunkingState = chunkUnknown; +} + +// parse newly read request chunks and buffer them for finishDechunkingRequest +// returns true iff needs more data +bool +ConnStateData::parseRequestChunks(HttpParser *) +{ + debugs(33,5, HERE << "parsing chunked request body at " << + in.chunkedSeen << " < " << in.notYetUsed); + assert(in.bodyParser); + assert(in.dechunkingState == chunkParsing); + + assert(in.chunkedSeen <= in.notYetUsed); + const mb_size_t fresh = in.notYetUsed - in.chunkedSeen; + + // be safe: count some chunked coding metadata towards the total body size + if (fresh + in.dechunked.contentSize() > Config.maxChunkedRequestBodySize) { + debugs(33,3, HERE << "chunked body (" << fresh << " + " << + in.dechunked.contentSize() << " may exceed " << + "chunked_request_body_max_size=" << + Config.maxChunkedRequestBodySize); + in.dechunkingState = chunkError; + return false; + } + + if (fresh > in.chunked.potentialSpaceSize()) { + // should not happen if Config.maxChunkedRequestBodySize is reasonable + debugs(33,1, HERE << "request_body_max_size exceeds chunked buffer " << + "size: " << fresh << " + " << in.chunked.contentSize() << " > " << + in.chunked.potentialSpaceSize() << " with " << + "chunked_request_body_max_size=" << + Config.maxChunkedRequestBodySize); + in.dechunkingState = chunkError; + return false; + } + in.chunked.append(in.buf + in.chunkedSeen, fresh); + in.chunkedSeen += fresh; + + try { // the parser will throw on errors + if (in.bodyParser->parse(&in.chunked, &in.dechunked)) + in.dechunkingState = chunkReady; // successfully parsed all chunks + else + return true; // need more, keep the same state + } catch (...) { + debugs(33,3, HERE << "chunk parsing error"); + in.dechunkingState = chunkError; + } + return false; // error, unsupported, or done +} + char * ConnStateData::In::addressToReadInto() const { return buf + notYetUsed; } -ConnStateData::In::In() : buf (NULL), notYetUsed (0), allocatedSize (0) +ConnStateData::In::In() : bodyParser(NULL), + buf (NULL), notYetUsed (0), allocatedSize (0), + dechunkingState(ConnStateData::chunkUnknown) {} ConnStateData::In::~In() { if (allocatedSize) memFreeBuf(allocatedSize, buf); + if (bodyParser) + delete bodyParser; // TODO: pool } /* This is a comm call normally scheduled by comm_close() */ === modified file 'src/client_side.h' --- src/client_side.h 2009-02-19 07:17:31 +0000 +++ src/client_side.h 2009-07-07 21:16:30 +0000 @@ -48,6 +48,9 @@ class AuthUserRequest; +class ChunkedCodingParser; +class HttpParser; + template class Range; @@ -144,13 +147,22 @@ int fd; + /// chunk buffering and parsing algorithm state + typedef enum { chunkUnknown, chunkNone, chunkParsing, chunkReady, chunkError } DechunkingState; + struct In { In(); ~In(); char *addressToReadInto() const; + + ChunkedCodingParser *bodyParser; ///< parses chunked request body + MemBuf chunked; ///< contains unparsed raw (chunked) body data + MemBuf dechunked; ///< accumulates parsed (dechunked) content char *buf; size_t notYetUsed; size_t allocatedSize; + size_t chunkedSeen; ///< size of processed or ignored raw read data + DechunkingState dechunkingState; ///< request dechunking state } in; int64_t bodySizeLeft(); @@ -253,6 +265,10 @@ bool switchedToHttps() const { return false; } #endif + void startDechunkingRequest(HttpParser *hp); + bool parseRequestChunks(HttpParser *hp); + void finishDechunkingRequest(HttpParser *hp); + private: int connReadWasError(comm_err_t flag, int size, int xerrno); int connFinishedWithConn(int size); === modified file 'src/structs.h' --- src/structs.h 2009-06-28 08:03:28 +0000 +++ src/structs.h 2009-07-06 20:19:22 +0000 @@ -183,6 +183,7 @@ } Timeout; size_t maxRequestHeaderSize; int64_t maxRequestBodySize; + int64_t maxChunkedRequestBodySize; size_t maxReplyHeaderSize; acl_size_t *ReplyBodySize;