Compliance: Ignore unused chunk-extensions to correctly handle large ones. Chunk parser did not advance until it got a complete chunk-extension. HttpStateData::maybeReadVirginBody() does not grow the buffer if there is no space available for the [chunked] body so the transaction with a large chunk-extension would stall. The default HttpStateData input buffer size is just 2KB so it does not take a "very large" extension to stall the transaction. Somewhat ironically, we are not even interested in the HTTP chunk-extension itself. After the change, Squid skips the chunk-extension data as soon as it gets it (except for the last-chunk, see below). Incrementally ignoring data requires handling quoted strings correctly, to avoid mis-detecting a quoted CRLF. Thus, we now preserve the quoted string parsing state in ChunkedCodingParser. Last-chunk chunk-extension is useful for ICAP. We parse it instead of ignoring. This parsing is done as before and may still lead to connection hanging, but a proper fix is outside this patch scope. Similarly, other stalling reasons are outside this patch scope. Co-Advisor test case: test_case/rfc2616/chunked-1p0-longValExt-16385-toClt === modified file 'src/ChunkedCodingParser.cc' --- src/ChunkedCodingParser.cc 2010-06-15 07:21:57 +0000 +++ src/ChunkedCodingParser.cc 2010-08-29 23:55:59 +0000 @@ -1,150 +1,161 @@ #include "squid.h" #include "base/TextException.h" #include "Parsing.h" #include "ChunkedCodingParser.h" #include "MemBuf.h" -ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg; +ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize; +ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension; +ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension; ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody; ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd; ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer; ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd; ChunkedCodingParser::ChunkedCodingParser() { reset(); } void ChunkedCodingParser::reset() { - theStep = psChunkBeg; + theStep = psChunkSize; theChunkSize = theLeftBodySize = 0; doNeedMoreData = false; theIn = theOut = NULL; useOriginBody = -1; + inQuoted = inSlashed = false; } bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent) { Must(rawData && parsedContent); theIn = rawData; theOut = parsedContent; // we must reset this all the time so that mayContinue() lets us // output more content if we stopped due to needsMoreSpace() before doNeedMoreData = !theIn->hasContent(); while (mayContinue()) { (this->*theStep)(); } return theStep == psMessageEnd; } bool ChunkedCodingParser::needsMoreData() const { return doNeedMoreData; } bool ChunkedCodingParser::needsMoreSpace() const { assert(theOut); return theStep == psChunkBody && !theOut->hasPotentialSpace(); } bool ChunkedCodingParser::mayContinue() const { return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd; } -void ChunkedCodingParser::parseChunkBeg() +void ChunkedCodingParser::parseChunkSize() { Must(theChunkSize <= 0); // Should(), really - size_t crlfBeg = 0; - size_t crlfEnd = 0; - - if (findCrlf(crlfBeg, crlfEnd)) { - debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd); - int64_t size = -1; - const char *p = 0; - - if (StringToInt64(theIn->content(), size, &p, 16)) { - if (size < 0) { - throw TexcHere("negative chunk size"); - return; - } + const char *p = theIn->content(); + while (p < theIn->space() && xisxdigit(*p)) ++p; + if (p >= theIn->space()) { + doNeedMoreData = true; + return; + } - // to allow chunk extensions in any chunk, remove this size check - if (size == 0) // is this the last-chunk? - parseChunkExtension(p, theIn->content() + crlfBeg); - - theIn->consume(crlfEnd); - theChunkSize = theLeftBodySize = size; - debugs(94,7, "found chunk: " << theChunkSize); - theStep = theChunkSize == 0 ? psTrailer : psChunkBody; - return; + int64_t size = -1; + if (StringToInt64(theIn->content(), size, &p, 16)) { + if (size < 0) + throw TexcHere("negative chunk size"); + + theChunkSize = theLeftBodySize = size; + debugs(94,7, "found chunk: " << theChunkSize); + // parse chunk extensions only in the last-chunk + if (theChunkSize) + theStep = psUnusedChunkExtension; + else { + theIn->consume(p - theIn->content()); + theStep = psLastChunkExtension; } - + } else throw TexcHere("corrupted chunk size"); - } +} - doNeedMoreData = true; +void ChunkedCodingParser::parseUnusedChunkExtension() +{ + size_t crlfBeg = 0; + size_t crlfEnd = 0; + if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) { + inQuoted = inSlashed = false; + theIn->consume(crlfEnd); + theStep = theChunkSize ? psChunkBody : psTrailer; + } else { + theIn->consume(theIn->contentSize()); + doNeedMoreData = true; + } } void ChunkedCodingParser::parseChunkBody() { Must(theLeftBodySize > 0); // Should, really const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize()); const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize()); doNeedMoreData = availSize < theLeftBodySize; // and we may also need more space theOut->append(theIn->content(), safeSize); theIn->consume(safeSize); theLeftBodySize -= safeSize; if (theLeftBodySize == 0) theStep = psChunkEnd; else Must(needsMoreData() || needsMoreSpace()); } void ChunkedCodingParser::parseChunkEnd() { Must(theLeftBodySize == 0); // Should(), really size_t crlfBeg = 0; size_t crlfEnd = 0; if (findCrlf(crlfBeg, crlfEnd)) { if (crlfBeg != 0) { throw TexcHere("found data bewteen chunk end and CRLF"); return; } theIn->consume(crlfEnd); theChunkSize = 0; // done with the current chunk - theStep = psChunkBeg; + theStep = psChunkSize; return; } doNeedMoreData = true; } void ChunkedCodingParser::parseTrailer() { Must(theChunkSize == 0); // Should(), really while (mayContinue()) parseTrailerHeader(); } void ChunkedCodingParser::parseTrailerHeader() { size_t crlfBeg = 0; size_t crlfEnd = 0; if (findCrlf(crlfBeg, crlfEnd)) { @@ -152,55 +163,62 @@ void ChunkedCodingParser::parseTrailerHe ; //theTrailer.append(theIn->content(), crlfEnd); theIn->consume(crlfEnd); if (crlfBeg == 0) theStep = psMessageEnd; return; } doNeedMoreData = true; } void ChunkedCodingParser::parseMessageEnd() { // termination step, should not be called Must(false); // Should(), really } -// finds next CRLF +/// Finds next CRLF. Does not store parsing state. bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd) { + bool quoted = false; + bool slashed = false; + return findCrlf(crlfBeg, crlfEnd, quoted, slashed); +} + +/// Finds next CRLF. Parsing state stored in quoted and slashed +/// parameters. Incremental: can resume when more data is available. +bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed) +{ // XXX: This code was copied, with permission, from another software. // There is a similar and probably better code inside httpHeaderParse // but it seems difficult to isolate due to parsing-unrelated bloat. // Such isolation should probably be done before this class is used // for handling of traffic "more external" than ICAP. const char *buf = theIn->content(); size_t size = theIn->contentSize(); ssize_t crOff = -1; - bool quoted = false; - bool slashed = false; for (size_t i = 0; i < size; ++i) { if (slashed) { slashed = false; continue; } const char c = buf[i]; // handle quoted strings if (quoted) { if (c == '\\') slashed = true; else if (c == '"') quoted = false; continue; } else if (c == '"') { quoted = true; crOff = -1; @@ -217,51 +235,65 @@ bool ChunkedCodingParser::findCrlf(size_ if (c == '\r') crOff = i; } else { // skipping CRs, looking for the first LF if (c == '\n') { crlfBeg = crOff; crlfEnd = ++i; return true; } if (c != '\r') crOff = -1; } } return false; } // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) -void ChunkedCodingParser::parseChunkExtension(const char *startExt, const char *endExt) +void ChunkedCodingParser::parseLastChunkExtension() { + size_t crlfBeg = 0; + size_t crlfEnd = 0; + + if (!findCrlf(crlfBeg, crlfEnd)) { + doNeedMoreData = true; + return; + } + + const char *const startExt = theIn->content(); + const char *const endExt = theIn->content() + crlfBeg; + // chunk-extension starts at startExt and ends with LF at endEx for (const char *p = startExt; p < endExt;) { while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';' if (*p++ != ';') // each ext name=value pair is preceded with ';' - return; + break; while (*p == ' ' || *p == '\t') ++p; // skip spaces before name if (p >= endExt) - return; // malformed extension: ';' without ext name=value pair + break; // malformed extension: ';' without ext name=value pair const int extSize = endExt - p; // TODO: we need debugData() stream manipulator to dump data debugs(94,7, "Found chunk extension; size=" << extSize); // TODO: support implied *LWS around '=' if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) { (void)StringToInt64(p+18, useOriginBody, &p, 10); debugs(94, 3, HERE << "use-original-body=" << useOriginBody); - return; // remove to support more than just use-original-body + break; // remove to support more than just use-original-body } else { debugs(94, 5, HERE << "skipping unknown chunk extension"); // TODO: support quoted-string chunk-ext-val while (p < endExt && *p != ';') ++p; // skip until the next ';' } } + + theIn->consume(crlfEnd); + theStep = theChunkSize ? psChunkBody : psTrailer; } === modified file 'src/ChunkedCodingParser.h' --- src/ChunkedCodingParser.h 2010-06-14 20:01:59 +0000 +++ src/ChunkedCodingParser.h 2010-08-29 23:56:17 +0000 @@ -56,50 +56,57 @@ public: ChunkedCodingParser(); void reset(); /** \retval true complete success \retval false needs more data \throws ?? error. */ bool parse(MemBuf *rawData, MemBuf *parsedContent); bool needsMoreData() const; bool needsMoreSpace() const; private: typedef void (ChunkedCodingParser::*Step)(); private: bool mayContinue() const; + void parseChunkSize(); + void parseUnusedChunkExtension(); + void parseLastChunkExtension(); void parseChunkBeg(); void parseChunkBody(); void parseChunkEnd(); void parseTrailer(); void parseTrailerHeader(); void parseMessageEnd(); - void parseChunkExtension(const char *, const char * ); bool findCrlf(size_t &crlfBeg, size_t &crlfEnd); + bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed); private: - static Step psChunkBeg; + static Step psChunkSize; + static Step psUnusedChunkExtension; + static Step psLastChunkExtension; static Step psChunkBody; static Step psChunkEnd; static Step psTrailer; static Step psMessageEnd; MemBuf *theIn; MemBuf *theOut; Step theStep; uint64_t theChunkSize; uint64_t theLeftBodySize; bool doNeedMoreData; + bool inQuoted; ///< stores parsing state for incremental findCrlf + bool inSlashed; ///< stores parsing state for incremental findCrlf public: int64_t useOriginBody; }; #endif /* SQUID_CHUNKEDCODINGPARSER_H */