Compliance: Ignore unused chunk-extensions to correctly handle large ones.

Chunk parser did not advance until it got a complete chunk-extension.
HttpStateData::maybeReadVirginBody() does not grow the buffer if there is no
space available for the [chunked] body so the transaction with a large
chunk-extension would stall. The default HttpStateData input buffer size is
just 2KB so it does not take a "very large" extension to stall the
transaction.

Somewhat ironically, we are not even interested in the HTTP chunk-extension
itself. After the change, Squid skips the chunk-extension data as soon as it
gets it (except for the last-chunk, see below). Incrementally ignoring data
requires handling quoted strings correctly, to avoid mis-detecting a quoted
CRLF.  Thus, we now preserve the quoted string parsing state in
ChunkedCodingParser.

Last-chunk chunk-extension is useful for ICAP. We parse it instead of
ignoring. This parsing is done as before and may still lead to connection
hanging, but a proper fix is outside this patch scope. Similarly, other
stalling reasons are outside this patch scope.


Co-Advisor test case: test_case/rfc2616/chunked-1p0-longValExt-16385-toClt

=== modified file 'src/ChunkedCodingParser.cc'
--- src/ChunkedCodingParser.cc	2010-06-15 07:21:57 +0000
+++ src/ChunkedCodingParser.cc	2010-08-29 23:55:59 +0000
@@ -1,150 +1,161 @@
 #include "squid.h"
 #include "base/TextException.h"
 #include "Parsing.h"
 #include "ChunkedCodingParser.h"
 #include "MemBuf.h"
 
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
+ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
+ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
+ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
 ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
 ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
 ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
 ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
 
 ChunkedCodingParser::ChunkedCodingParser()
 {
     reset();
 }
 
 void ChunkedCodingParser::reset()
 {
-    theStep = psChunkBeg;
+    theStep = psChunkSize;
     theChunkSize = theLeftBodySize = 0;
     doNeedMoreData = false;
     theIn = theOut = NULL;
     useOriginBody = -1;
+    inQuoted = inSlashed = false;
 }
 
 bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
 {
     Must(rawData && parsedContent);
     theIn = rawData;
     theOut = parsedContent;
 
     // we must reset this all the time so that mayContinue() lets us
     // output more content if we stopped due to needsMoreSpace() before
     doNeedMoreData = !theIn->hasContent();
 
     while (mayContinue()) {
         (this->*theStep)();
     }
 
     return theStep == psMessageEnd;
 }
 
 bool ChunkedCodingParser::needsMoreData() const
 {
     return doNeedMoreData;
 }
 
 bool ChunkedCodingParser::needsMoreSpace() const
 {
     assert(theOut);
     return theStep == psChunkBody && !theOut->hasPotentialSpace();
 }
 
 bool ChunkedCodingParser::mayContinue() const
 {
     return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
 }
 
-void ChunkedCodingParser::parseChunkBeg()
+void ChunkedCodingParser::parseChunkSize()
 {
     Must(theChunkSize <= 0); // Should(), really
 
-    size_t crlfBeg = 0;
-    size_t crlfEnd = 0;
-
-    if (findCrlf(crlfBeg, crlfEnd)) {
-        debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
-        int64_t size = -1;
-        const char *p = 0;
-
-        if (StringToInt64(theIn->content(), size, &p, 16)) {
-            if (size < 0) {
-                throw TexcHere("negative chunk size");
-                return;
-            }
+    const char *p = theIn->content();
+    while (p < theIn->space() && xisxdigit(*p)) ++p;
+    if (p >= theIn->space()) {
+        doNeedMoreData = true;
+        return;
+    }
 
-            // to allow chunk extensions in any chunk, remove this size check
-            if (size == 0) // is this the last-chunk?
-                parseChunkExtension(p, theIn->content() + crlfBeg);
-
-            theIn->consume(crlfEnd);
-            theChunkSize = theLeftBodySize = size;
-            debugs(94,7, "found chunk: " << theChunkSize);
-            theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
-            return;
+    int64_t size = -1;
+    if (StringToInt64(theIn->content(), size, &p, 16)) {
+        if (size < 0)
+            throw TexcHere("negative chunk size");
+
+        theChunkSize = theLeftBodySize = size;
+        debugs(94,7, "found chunk: " << theChunkSize);
+        // parse chunk extensions only in the last-chunk
+        if (theChunkSize)
+            theStep = psUnusedChunkExtension;
+        else {
+            theIn->consume(p - theIn->content());
+            theStep = psLastChunkExtension;
         }
-
+    } else
         throw TexcHere("corrupted chunk size");
-    }
+}
 
-    doNeedMoreData = true;
+void ChunkedCodingParser::parseUnusedChunkExtension()
+{
+    size_t crlfBeg = 0;
+    size_t crlfEnd = 0;
+    if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
+        inQuoted = inSlashed = false;
+        theIn->consume(crlfEnd);
+        theStep = theChunkSize ? psChunkBody : psTrailer;
+    } else {
+        theIn->consume(theIn->contentSize());
+        doNeedMoreData = true;
+    }
 }
 
 void ChunkedCodingParser::parseChunkBody()
 {
     Must(theLeftBodySize > 0); // Should, really
 
     const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
     const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
 
     doNeedMoreData = availSize < theLeftBodySize;
     // and we may also need more space
 
     theOut->append(theIn->content(), safeSize);
     theIn->consume(safeSize);
     theLeftBodySize -= safeSize;
 
     if (theLeftBodySize == 0)
         theStep = psChunkEnd;
     else
         Must(needsMoreData() || needsMoreSpace());
 }
 
 void ChunkedCodingParser::parseChunkEnd()
 {
     Must(theLeftBodySize == 0); // Should(), really
 
     size_t crlfBeg = 0;
     size_t crlfEnd = 0;
 
     if (findCrlf(crlfBeg, crlfEnd)) {
         if (crlfBeg != 0) {
             throw TexcHere("found data bewteen chunk end and CRLF");
             return;
         }
 
         theIn->consume(crlfEnd);
         theChunkSize = 0; // done with the current chunk
-        theStep = psChunkBeg;
+        theStep = psChunkSize;
         return;
     }
 
     doNeedMoreData = true;
 }
 
 void ChunkedCodingParser::parseTrailer()
 {
     Must(theChunkSize == 0); // Should(), really
 
     while (mayContinue())
         parseTrailerHeader();
 }
 
 void ChunkedCodingParser::parseTrailerHeader()
 {
     size_t crlfBeg = 0;
     size_t crlfEnd = 0;
 
     if (findCrlf(crlfBeg, crlfEnd)) {
@@ -152,55 +163,62 @@ void ChunkedCodingParser::parseTrailerHe
 
             ; //theTrailer.append(theIn->content(), crlfEnd);
 
         theIn->consume(crlfEnd);
 
         if (crlfBeg == 0)
             theStep = psMessageEnd;
 
         return;
     }
 
     doNeedMoreData = true;
 }
 
 void ChunkedCodingParser::parseMessageEnd()
 {
     // termination step, should not be called
     Must(false); // Should(), really
 }
 
-// finds next CRLF
+/// Finds next CRLF. Does not store parsing state.
 bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
 {
+    bool quoted = false;
+    bool slashed = false;
+    return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
+}
+
+/// Finds next CRLF. Parsing state stored in quoted and slashed
+/// parameters. Incremental: can resume when more data is available.
+bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
+{
     // XXX: This code was copied, with permission, from another software.
     // There is a similar and probably better code inside httpHeaderParse
     // but it seems difficult to isolate due to parsing-unrelated bloat.
     // Such isolation should probably be done before this class is used
     // for handling of traffic "more external" than ICAP.
 
     const char *buf = theIn->content();
     size_t size = theIn->contentSize();
 
     ssize_t crOff = -1;
-    bool quoted = false;
-    bool slashed = false;
 
     for (size_t i = 0; i < size; ++i) {
         if (slashed) {
             slashed = false;
             continue;
         }
 
         const char c = buf[i];
 
         // handle quoted strings
         if (quoted) {
             if (c == '\\')
                 slashed = true;
             else if (c == '"')
                 quoted = false;
 
             continue;
         } else if (c == '"') {
             quoted = true;
             crOff = -1;
@@ -217,51 +235,65 @@ bool ChunkedCodingParser::findCrlf(size_
 
             if (c == '\r')
                 crOff = i;
         } else { // skipping CRs, looking for the first LF
 
             if (c == '\n') {
                 crlfBeg = crOff;
                 crlfEnd = ++i;
                 return true;
             }
 
             if (c != '\r')
                 crOff = -1;
         }
     }
 
     return false;
 }
 
 // chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
-void ChunkedCodingParser::parseChunkExtension(const char *startExt, const char *endExt)
+void ChunkedCodingParser::parseLastChunkExtension()
 {
+    size_t crlfBeg = 0;
+    size_t crlfEnd = 0;
+
+    if (!findCrlf(crlfBeg, crlfEnd)) {
+        doNeedMoreData = true;
+        return;
+    }
+
+    const char *const startExt = theIn->content();
+    const char *const endExt = theIn->content() + crlfBeg;
+
     // chunk-extension starts at startExt and ends with LF at endEx
     for (const char *p = startExt; p < endExt;) {
 
         while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'
 
         if (*p++ != ';') // each ext name=value pair is preceded with ';'
-            return;
+            break;
 
         while (*p == ' ' || *p == '\t') ++p; // skip spaces before name
 
         if (p >= endExt)
-            return; // malformed extension: ';' without ext name=value pair
+            break; // malformed extension: ';' without ext name=value pair
 
         const int extSize = endExt - p;
         // TODO: we need debugData() stream manipulator to dump data
         debugs(94,7, "Found chunk extension; size=" << extSize);
 
         // TODO: support implied *LWS around '='
         if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
             (void)StringToInt64(p+18, useOriginBody, &p, 10);
             debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
-            return; // remove to support more than just use-original-body
+            break; // remove to support more than just use-original-body
         } else {
             debugs(94, 5, HERE << "skipping unknown chunk extension");
             // TODO: support quoted-string chunk-ext-val
             while (p < endExt && *p != ';') ++p; // skip until the next ';'
         }
     }
+
+    theIn->consume(crlfEnd);
+    theStep = theChunkSize ? psChunkBody : psTrailer;
 }

=== modified file 'src/ChunkedCodingParser.h'
--- src/ChunkedCodingParser.h	2010-06-14 20:01:59 +0000
+++ src/ChunkedCodingParser.h	2010-08-29 23:56:17 +0000
@@ -56,50 +56,57 @@ public:
     ChunkedCodingParser();
 
     void reset();
 
     /**
      \retval true    complete success
      \retval false   needs more data
      \throws ??      error.
      */
     bool parse(MemBuf *rawData, MemBuf *parsedContent);
 
     bool needsMoreData() const;
     bool needsMoreSpace() const;
 
 private:
     typedef void (ChunkedCodingParser::*Step)();
 
 private:
     bool mayContinue() const;
 
+    void parseChunkSize();
+    void parseUnusedChunkExtension();
+    void parseLastChunkExtension();
     void parseChunkBeg();
     void parseChunkBody();
     void parseChunkEnd();
     void parseTrailer();
     void parseTrailerHeader();
     void parseMessageEnd();
 
-    void parseChunkExtension(const char *, const char * );
     bool findCrlf(size_t &crlfBeg, size_t &crlfEnd);
+    bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed);
 
 private:
-    static Step psChunkBeg;
+    static Step psChunkSize;
+    static Step psUnusedChunkExtension;
+    static Step psLastChunkExtension;
     static Step psChunkBody;
     static Step psChunkEnd;
     static Step psTrailer;
     static Step psMessageEnd;
 
     MemBuf *theIn;
     MemBuf *theOut;
 
     Step theStep;
     uint64_t theChunkSize;
     uint64_t theLeftBodySize;
     bool doNeedMoreData;
+    bool inQuoted; ///< stores parsing state for incremental findCrlf
+    bool inSlashed; ///< stores parsing state for incremental findCrlf
 
 public:
     int64_t useOriginBody;
 };
 
 #endif /* SQUID_CHUNKEDCODINGPARSER_H */