squid : Optimising Web Delivery

Go to the documentation of this file.

 /*
  * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
  
 #include "squid.h"
 #include "debug/Stream.h"
 #include "http/one/RequestParser.h"
 #include "http/ProtocolVersion.h"
 #include "parser/Tokenizer.h"
 #include "SquidConfig.h"
  
 Http1::Parser::size_type
 Http::One::RequestParser::firstLineSize() const
 {
     // RFC 7230 section 2.6
     /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
     return method_.image().length() + uri_.length() + 12;
 }
  
 void
 Http::One::RequestParser::skipGarbageLines()
 {
     if (Config.onoff.relaxed_header_parser) {
         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                    "CRLF bytes received ahead of request-line. " <<
                    "Ignored due to relaxed_header_parser.");
         // Be tolerant of prefix empty lines
         // ie any series of either \n or \r\n with no other characters and no repeated \r
         while (!buf_.isEmpty() && (buf_[0] == '\n' ||
                                    (buf_[0] == '\r' && buf_.length() > 1 && buf_[1] == '\n'))) {
             buf_.consume(1);
         }
     }
 }
  
 bool
 Http::One::RequestParser::parseMethodField(Tokenizer &tok)
 {
     // method field is a sequence of TCHAR.
     // Limit to 32 characters to prevent overly long sequences of non-HTTP
     // being sucked in before mismatch is detected. 32 is itself annoyingly
     // big but there are methods registered by IANA that reach 17 bytes:
     //  http://www.iana.org/assignments/http-methods
     static const size_t maxMethodLength = 32; // TODO: make this configurable?
  
     SBuf methodFound;
     if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
         parseStatusCode = Http::scBadRequest;
         return false;
     }
     method_ = HttpRequestMethod(methodFound);
  
     if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
         return false;
  
     return true;
 }
  
 static const CharacterSet &
 UriValidCharacters()
 {
     /* RFC 3986 section 2:
      * "
      *   A URI is composed from a limited set of characters consisting of
      *   digits, letters, and a few graphic symbols.
      * "
      */
     static const CharacterSet UriChars =
         CharacterSet("URI-Chars","") +
         // RFC 3986 section 2.2 - reserved characters
         CharacterSet("gen-delims", ":/?#[]@") +
         CharacterSet("sub-delims", "!$&'()*+,;=") +
         // RFC 3986 section 2.3 - unreserved characters
         CharacterSet::RFC3986_UNRESERVED() +
         // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
         CharacterSet("pct-encoded", "%") +
         CharacterSet::HEXDIG;
  
     return UriChars;
 }
  
 const CharacterSet &
 Http::One::RequestParser::RequestTargetCharacters()
 {
     if (Config.onoff.relaxed_header_parser) {
 #if USE_HTTP_VIOLATIONS
         static const CharacterSet RelaxedExtended =
             UriValidCharacters() +
             // accept whitespace (extended), it will be dealt with later
             DelimiterCharacters() +
             // RFC 2396 unwise character set which must never be transmitted
             // in un-escaped form. But many web services do anyway.
             CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
             // UTF-8 because we want to be future-proof
             CharacterSet("UTF-8", 128, 255);
  
         return RelaxedExtended;
 #else
         static const CharacterSet RelaxedCompliant =
             UriValidCharacters() +
             // accept whitespace (extended), it will be dealt with later.
             DelimiterCharacters();
  
         return RelaxedCompliant;
 #endif
     }
  
     // strict parse only accepts what the RFC say we can
     return UriValidCharacters();
 }
  
 bool
 Http::One::RequestParser::parseUriField(Tokenizer &tok)
 {
     /* Arbitrary 64KB URI upper length limit.
      *
      * Not quite as arbitrary as it seems though. Old SquidString objects
      * cannot store strings larger than 64KB, so we must limit until they
      * have all been replaced with SBuf.
      *
      * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
      * at least 8000 octets for the whole line, including method and version.
      */
     const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
  
     SBuf uriFound;
     if (!tok.prefix(uriFound, RequestTargetCharacters())) {
         parseStatusCode = Http::scBadRequest;
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
         return false;
     }
  
     if (uriFound.length() > maxUriLength) {
         // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
         parseStatusCode = Http::scUriTooLong;
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
                "-byte URI exceeds " << maxUriLength << "-byte limit");
         return false;
     }
  
     uri_ = uriFound;
     return true;
 }
  
 bool
 Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
 {
     static const SBuf http1p0("HTTP/1.0");
     static const SBuf http1p1("HTTP/1.1");
     const auto savedTok = tok;
  
     // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
     // the vast majority of cases.
     if (tok.skipSuffix(http1p1)) {
         msgProtocol_ = Http::ProtocolVersion(1, 1);
         return true;
     } else if (tok.skipSuffix(http1p0)) {
         msgProtocol_ = Http::ProtocolVersion(1, 0);
         return true;
     } else {
         // RFC 7230 section 2.6:
         // HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
         static const CharacterSet period("Decimal point", ".");
         static const SBuf proto("HTTP/");
         SBuf majorDigit;
         SBuf minorDigit;
         if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
                 tok.skipOneTrailing(period) &&
                 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
                 tok.skipSuffix(proto)) {
             const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
             // use '0.0' for unsupported multiple digit version numbers
             const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
             const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
             msgProtocol_ = Http::ProtocolVersion(major, minor);
             return true;
         }
     }
  
     // A GET request might use HTTP/0.9 syntax
     if (method_ == Http::METHOD_GET) {
         // RFC 1945 - no HTTP version field at all
         tok = savedTok; // in case the URI ends with a digit
         // report this assumption as an error if configured to triage parsing
         debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
         msgProtocol_ = Http::ProtocolVersion(0,9);
         return true;
     }
  
     debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
     parseStatusCode = Http::scBadRequest;
     return false;
 }
  
 bool
 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
 {
     if (count <= 0) {
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
         parseStatusCode = Http::scBadRequest;
         return false;
     }
  
     // tolerant parser allows multiple whitespace characters between request-line fields
     if (count > 1 && !Config.onoff.relaxed_header_parser) {
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
         parseStatusCode = Http::scBadRequest;
         return false;
     }
  
     return true;
 }
  
 bool
 Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
 {
     if (Config.onoff.relaxed_header_parser) {
         (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
     } else {
         if (!tok.skipOneTrailing(CharacterSet::CR)) {
             debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
             parseStatusCode = Http::scBadRequest;
             return false;
         }
     }
     return true;
 }
  
 int
 Http::One::RequestParser::parseRequestFirstLine()
 {
     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
     debugs(74, DBG_DATA, buf_);
  
     SBuf line;
  
     // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
     // Now, the request line has to end at the first LF.
     static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
     Tokenizer lineTok(buf_);
     if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
         if (buf_.length() >= Config.maxRequestHeaderSize) {
             /* who should we blame for our failure to parse this line? */
  
             Tokenizer methodTok(buf_);
             if (!parseMethodField(methodTok))
                 return -1; // blame a bad method (or its delimiter)
  
             // assume it is the URI
             debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
                    Config.maxRequestHeaderSize << "-byte limit");
             parseStatusCode = Http::scUriTooLong;
             return -1;
         }
         debugs(74, 5, "Parser needs more data");
         return 0;
     }
  
     Tokenizer tok(line);
  
     if (!parseMethodField(tok))
         return -1;
  
     /* now parse backwards, to leave just the URI */
     if (!skipTrailingCrs(tok))
         return -1;
  
     if (!parseHttpVersionField(tok))
         return -1;
  
     if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
         return -1;
  
     /* parsed everything before and after the URI */
  
     if (!parseUriField(tok))
         return -1;
  
     if (!tok.atEnd()) {
         debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
         parseStatusCode = Http::scBadRequest;
         return -1;
     }
  
     parseStatusCode = Http::scOkay;
     buf_ = lineTok.remaining(); // incremental parse checkpoint
     return 1;
 }
  
 bool
 Http::One::RequestParser::parse(const SBuf &aBuf)
 {
     const bool result = doParse(aBuf);
     if (preserveParsed_) {
         assert(aBuf.length() >= remaining().length());
         parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
     }
  
     return result;
 }
  
 // raw is not a reference because a reference might point back to our own buf_ or parsed_
 bool
 Http::One::RequestParser::doParse(const SBuf &aBuf)
 {
     buf_ = aBuf;
     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
  
     // stage 1: locate the request-line
     if (parsingStage_ == HTTP_PARSE_NONE) {
         skipGarbageLines();
  
         // if we hit something before EOS treat it as a message
         if (!buf_.isEmpty())
             parsingStage_ = HTTP_PARSE_FIRST;
         else
             return false;
     }
  
     // stage 2: parse the request-line
     if (parsingStage_ == HTTP_PARSE_FIRST) {
         const int retcode = parseRequestFirstLine();
  
         // first-line (or a look-alike) found successfully.
         if (retcode > 0) {
             parsingStage_ = HTTP_PARSE_MIME;
         }
  
         debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
         debugs(74, 5, "request-line: method: " << method_);
         debugs(74, 5, "request-line: url: " << uri_);
         debugs(74, 5, "request-line: proto: " << msgProtocol_);
         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
  
         // syntax errors already
         if (retcode < 0) {
             parsingStage_ = HTTP_PARSE_DONE;
             return false;
         }
     }
  
     // stage 3: locate the mime header block
     if (parsingStage_ == HTTP_PARSE_MIME) {
         // HTTP/1.x request-line is valid and parsing completed.
         if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
             if (parseStatusCode == Http::scHeaderTooLarge)
                 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
             return false;
         }
     }
  
     return !needsMoreData();
 }
  

Http::One::RequestParser::RequestTargetCharacters

static const CharacterSet & RequestTargetCharacters()

characters which Squid will accept in the HTTP request-target (URI)

Definition: RequestParser.cc:111

SquidConfig::relaxed_header_parser

int relaxed_header_parser

Definition: SquidConfig.h:315

Http::scBadRequest

@ scBadRequest

Definition: StatusCode.h:45

Http::One::RequestParser::skipDelimiter

bool skipDelimiter(const size_t count, const char *where)

Definition: RequestParser.cc:229

Http::One::RequestParser::parseMethodField

bool parseMethodField(Tokenizer &)

Definition: RequestParser.cc:62

Http::One::HTTP_PARSE_MIME

@ HTTP_PARSE_MIME

HTTP/1 mime-header block.

Definition: Parser.h:28

SBuf

Definition: SBuf.h:93

CharacterSet::complement

CharacterSet complement(const char *complementLabel=nullptr) const

Definition: CharacterSet.cc:74

Http::One::RequestParser::parseUriField

bool parseUriField(Tokenizer &)

Definition: RequestParser.cc:141

Http::One::RequestParser::parseRequestFirstLine

int parseRequestFirstLine()

Definition: RequestParser.cc:275

Http::One::RequestParser::uri_

SBuf uri_

raw copy of the original client request-line URI field

Definition: RequestParser.h:75

HttpRequestMethod::image

const SBuf & image() const

Definition: RequestMethod.cc:99

CharacterSet::LF

static const CharacterSet LF

Definition: CharacterSet.h:92

SBuf::substr

SBuf substr(size_type pos, size_type n=npos) const

Definition: SBuf.cc:576

Http::One::RequestParser::firstLineSize

Http1::Parser::size_type firstLineSize() const override

size in bytes of the first line including CRLF terminator

Definition: RequestParser.cc:17

DBG_DATA

#define DBG_DATA

Definition: Stream.h:40

Http::One::Parser::size_type

SBuf::size_type size_type

Definition: Parser.h:43

CharacterSet::CR

static const CharacterSet CR

Definition: CharacterSet.h:80

SBuf::rawContent

const char * rawContent() const

Definition: SBuf.cc:509

Tokenizer.h

Http::One::RequestParser::skipGarbageLines

void skipGarbageLines()

Definition: RequestParser.cc:38

Http::One::RequestParser::method_

HttpRequestMethod method_

what request method has been found on the first line

Definition: RequestParser.h:72

Http::One::RequestParser::skipTrailingCrs

bool skipTrailingCrs(Tokenizer &tok)

Parse CRs at the end of request-line, just before the terminating LF.

Definition: RequestParser.cc:249

CharacterSet::TCHAR

static const CharacterSet TCHAR

Definition: CharacterSet.h:105

CharacterSet::HEXDIG

static const CharacterSet HEXDIG

Definition: CharacterSet.h:88

HttpRequestMethod

Definition: RequestMethod.h:26

Http::One::RequestParser::doParse

bool doParse(const SBuf &aBuf)

called from parse() to do the parsing

Definition: RequestParser.cc:349

Http::One::HTTP_PARSE_NONE

@ HTTP_PARSE_NONE

initialized, but nothing usefully parsed yet

Definition: Parser.h:23

Http::scRequestHeaderFieldsTooLarge

@ scRequestHeaderFieldsTooLarge

Definition: StatusCode.h:71

assert

#define assert(EX)

Definition: assert.h:17

Http::scUriTooLong

@ scUriTooLong

Definition: StatusCode.h:59

UriValidCharacters

static const CharacterSet & UriValidCharacters()

the characters which truly are valid within URI

Definition: RequestParser.cc:87

CharacterSet::DIGIT

static const CharacterSet DIGIT

Definition: CharacterSet.h:84

SBuf::length

size_type length() const

Returns the number of bytes stored in SBuf.

Definition: SBuf.h:419

SquidConfig::maxRequestHeaderSize

size_t maxRequestHeaderSize

Definition: SquidConfig.h:134

Http::One::HTTP_PARSE_FIRST

@ HTTP_PARSE_FIRST

HTTP/1 message first-line.

Definition: Parser.h:24

Http::One::RequestParser::parse

bool parse(const SBuf &aBuf) override

Definition: RequestParser.cc:336

Http::One::HTTP_PARSE_DONE

@ HTTP_PARSE_DONE

parsed a message header, or reached a terminal syntax error

Definition: Parser.h:29

ProtocolVersion.h

tok

Definition: parse.c:160

Stream.h

Http::One::Parser::Tokenizer

::Parser::Tokenizer Tokenizer

Definition: Parser.h:44

CharacterSet::RFC3986_UNRESERVED

static const CharacterSet & RFC3986_UNRESERVED()

allowed URI characters that do not have a reserved purpose, RFC 3986

Definition: CharacterSet.cc:164

DBG_IMPORTANT

#define DBG_IMPORTANT

Definition: Stream.h:38

SquidConfig.h

CharacterSet

optimized set of C chars, with quick membership test and merge support

Definition: CharacterSet.h:17

Http::One::ErrorLevel

int ErrorLevel()

the right debugs() level for logging HTTP violation messages

Definition: Parser.cc:269

Http::scOkay

@ scOkay

Definition: StatusCode.h:27

Http::One::RequestParser::parseHttpVersionField

bool parseHttpVersionField(Tokenizer &)

Definition: RequestParser.cc:174

RequestParser.h

Http::scHeaderTooLarge

@ scHeaderTooLarge

Header too large to process.

Definition: StatusCode.h:89

squid.h

Http::METHOD_GET

@ METHOD_GET

Definition: MethodType.h:25

SquidConfig::onoff

struct SquidConfig::@90 onoff

debugs

#define debugs(SECTION, LEVEL, CONTENT)

Definition: Stream.h:192

Config

class SquidConfig Config

Definition: SquidConfig.cc:12

Http::ProtocolVersion

AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)

HTTP version label information.

Definition: ProtocolVersion.h:19

squid-cache.org

Optimising Web Delivery

Introduction

Documentation

Support

Miscellaneous