RequestParser.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "debug/Stream.h"
13#include "parser/Tokenizer.h"
14#include "SquidConfig.h"
15
18{
19 // RFC 7230 section 2.6
20 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
21 return method_.image().length() + uri_.length() + 12;
22}
23
37void
39{
41 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
42 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
43 "CRLF bytes received ahead of request-line. " <<
44 "Ignored due to relaxed_header_parser.");
45 // Be tolerant of prefix empty lines
46 // ie any series of either \n or \r\n with no other characters and no repeated \r
47 while (!buf_.isEmpty() && (buf_[0] == '\n' ||
48 (buf_[0] == '\r' && buf_.length() > 1 && buf_[1] == '\n'))) {
49 buf_.consume(1);
50 }
51 }
52}
53
61bool
63{
64 // method field is a sequence of TCHAR.
65 // Limit to 32 characters to prevent overly long sequences of non-HTTP
66 // being sucked in before mismatch is detected. 32 is itself annoyingly
67 // big but there are methods registered by IANA that reach 17 bytes:
68 // http://www.iana.org/assignments/http-methods
69 static const size_t maxMethodLength = 32; // TODO: make this configurable?
70
71 SBuf methodFound;
72 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
73 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
74 parseStatusCode = Http::scBadRequest;
75 return false;
76 }
77 method_ = HttpRequestMethod(methodFound);
78
79 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
80 return false;
81
82 return true;
83}
84
86static const CharacterSet &
88{
89 /* RFC 3986 section 2:
90 * "
91 * A URI is composed from a limited set of characters consisting of
92 * digits, letters, and a few graphic symbols.
93 * "
94 */
95 static const CharacterSet UriChars =
96 CharacterSet("URI-Chars","") +
97 // RFC 3986 section 2.2 - reserved characters
98 CharacterSet("gen-delims", ":/?#[]@") +
99 CharacterSet("sub-delims", "!$&'()*+,;=") +
100 // RFC 3986 section 2.3 - unreserved characters
103 CharacterSet("unreserved", "-._~") +
104 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
105 CharacterSet("pct-encoded", "%") +
107
108 return UriChars;
109}
110
112const CharacterSet &
114{
116#if USE_HTTP_VIOLATIONS
117 static const CharacterSet RelaxedExtended =
119 // accept whitespace (extended), it will be dealt with later
120 DelimiterCharacters() +
121 // RFC 2396 unwise character set which must never be transmitted
122 // in un-escaped form. But many web services do anyway.
123 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
124 // UTF-8 because we want to be future-proof
125 CharacterSet("UTF-8", 128, 255);
126
127 return RelaxedExtended;
128#else
129 static const CharacterSet RelaxedCompliant =
131 // accept whitespace (extended), it will be dealt with later.
132 DelimiterCharacters();
133
134 return RelaxedCompliant;
135#endif
136 }
137
138 // strict parse only accepts what the RFC say we can
139 return UriValidCharacters();
140}
141
142bool
144{
145 /* Arbitrary 64KB URI upper length limit.
146 *
147 * Not quite as arbitrary as it seems though. Old SquidString objects
148 * cannot store strings larger than 64KB, so we must limit until they
149 * have all been replaced with SBuf.
150 *
151 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
152 * at least 8000 octets for the whole line, including method and version.
153 */
154 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
155
156 SBuf uriFound;
157 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
158 parseStatusCode = Http::scBadRequest;
159 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
160 return false;
161 }
162
163 if (uriFound.length() > maxUriLength) {
164 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
165 parseStatusCode = Http::scUriTooLong;
166 debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
167 "-byte URI exceeds " << maxUriLength << "-byte limit");
168 return false;
169 }
170
171 uri_ = uriFound;
172 return true;
173}
174
175bool
177{
178 static const SBuf http1p0("HTTP/1.0");
179 static const SBuf http1p1("HTTP/1.1");
180 const auto savedTok = tok;
181
182 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
183 // the vast majority of cases.
184 if (tok.skipSuffix(http1p1)) {
185 msgProtocol_ = Http::ProtocolVersion(1, 1);
186 return true;
187 } else if (tok.skipSuffix(http1p0)) {
188 msgProtocol_ = Http::ProtocolVersion(1, 0);
189 return true;
190 } else {
191 // RFC 7230 section 2.6:
192 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
193 static const CharacterSet period("Decimal point", ".");
194 static const SBuf proto("HTTP/");
195 SBuf majorDigit;
196 SBuf minorDigit;
197 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
198 tok.skipOneTrailing(period) &&
199 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
200 tok.skipSuffix(proto)) {
201 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
202 // use '0.0' for unsupported multiple digit version numbers
203 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
204 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
205 msgProtocol_ = Http::ProtocolVersion(major, minor);
206 return true;
207 }
208 }
209
210 // A GET request might use HTTP/0.9 syntax
211 if (method_ == Http::METHOD_GET) {
212 // RFC 1945 - no HTTP version field at all
213 tok = savedTok; // in case the URI ends with a digit
214 // report this assumption as an error if configured to triage parsing
215 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
216 msgProtocol_ = Http::ProtocolVersion(0,9);
217 return true;
218 }
219
220 debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
221 parseStatusCode = Http::scBadRequest;
222 return false;
223}
224
230bool
231Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
232{
233 if (count <= 0) {
234 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
235 parseStatusCode = Http::scBadRequest;
236 return false;
237 }
238
239 // tolerant parser allows multiple whitespace characters between request-line fields
240 if (count > 1 && !Config.onoff.relaxed_header_parser) {
241 debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
242 parseStatusCode = Http::scBadRequest;
243 return false;
244 }
245
246 return true;
247}
248
250bool
252{
254 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
255 } else {
256 if (!tok.skipOneTrailing(CharacterSet::CR)) {
257 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
258 parseStatusCode = Http::scBadRequest;
259 return false;
260 }
261 }
262 return true;
263}
264
276int
278{
279 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
280 debugs(74, DBG_DATA, buf_);
281
282 SBuf line;
283
284 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
285 // Now, the request line has to end at the first LF.
286 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
287 Tokenizer lineTok(buf_);
288 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
289 if (buf_.length() >= Config.maxRequestHeaderSize) {
290 /* who should we blame for our failure to parse this line? */
291
292 Tokenizer methodTok(buf_);
293 if (!parseMethodField(methodTok))
294 return -1; // blame a bad method (or its delimiter)
295
296 // assume it is the URI
297 debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
298 Config.maxRequestHeaderSize << "-byte limit");
299 parseStatusCode = Http::scUriTooLong;
300 return -1;
301 }
302 debugs(74, 5, "Parser needs more data");
303 return 0;
304 }
305
306 Tokenizer tok(line);
307
308 if (!parseMethodField(tok))
309 return -1;
310
311 /* now parse backwards, to leave just the URI */
312 if (!skipTrailingCrs(tok))
313 return -1;
314
315 if (!parseHttpVersionField(tok))
316 return -1;
317
318 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
319 return -1;
320
321 /* parsed everything before and after the URI */
322
323 if (!parseUriField(tok))
324 return -1;
325
326 if (!tok.atEnd()) {
327 debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
328 parseStatusCode = Http::scBadRequest;
329 return -1;
330 }
331
332 parseStatusCode = Http::scOkay;
333 buf_ = lineTok.remaining(); // incremental parse checkpoint
334 return 1;
335}
336
337bool
339{
340 const bool result = doParse(aBuf);
341 if (preserveParsed_) {
342 assert(aBuf.length() >= remaining().length());
343 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
344 }
345
346 return result;
347}
348
349// raw is not a reference because a reference might point back to our own buf_ or parsed_
350bool
352{
353 buf_ = aBuf;
354 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
355
356 // stage 1: locate the request-line
357 if (parsingStage_ == HTTP_PARSE_NONE) {
358 skipGarbageLines();
359
360 // if we hit something before EOS treat it as a message
361 if (!buf_.isEmpty())
362 parsingStage_ = HTTP_PARSE_FIRST;
363 else
364 return false;
365 }
366
367 // stage 2: parse the request-line
368 if (parsingStage_ == HTTP_PARSE_FIRST) {
369 const int retcode = parseRequestFirstLine();
370
371 // first-line (or a look-alike) found successfully.
372 if (retcode > 0) {
373 parsingStage_ = HTTP_PARSE_MIME;
374 }
375
376 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
377 debugs(74, 5, "request-line: method: " << method_);
378 debugs(74, 5, "request-line: url: " << uri_);
379 debugs(74, 5, "request-line: proto: " << msgProtocol_);
380 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
381
382 // syntax errors already
383 if (retcode < 0) {
384 parsingStage_ = HTTP_PARSE_DONE;
385 return false;
386 }
387 }
388
389 // stage 3: locate the mime header block
390 if (parsingStage_ == HTTP_PARSE_MIME) {
391 // HTTP/1.x request-line is valid and parsing completed.
392 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
393 if (parseStatusCode == Http::scHeaderTooLarge)
394 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
395 return false;
396 }
397 }
398
399 return !needsMoreData();
400}
401
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
class SquidConfig Config
Definition: SquidConfig.cc:12
#define assert(EX)
Definition: assert.h:17
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
static const CharacterSet HEXDIG
Definition: CharacterSet.h:88
static const CharacterSet LF
Definition: CharacterSet.h:92
static const CharacterSet CR
Definition: CharacterSet.h:80
const SBuf & image() const
SBuf::size_type size_type
Definition: Parser.h:43
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
bool parseMethodField(Tokenizer &)
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
Http1::Parser::size_type firstLineSize() const override
size in bytes of the first line including CRLF terminator
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
bool parse(const SBuf &aBuf) override
bool skipDelimiter(const size_t count, const char *where)
bool parseHttpVersionField(Tokenizer &)
HttpRequestMethod method_
what request method has been found on the first line
Definition: RequestParser.h:72
bool parseUriField(Tokenizer &)
SBuf uri_
raw copy of the original client request-line URI field
Definition: RequestParser.h:75
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
Definition: SBuf.h:94
const char * rawContent() const
Definition: SBuf.cc:509
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:415
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
struct SquidConfig::@106 onoff
size_t maxRequestHeaderSize
Definition: SquidConfig.h:134
int relaxed_header_parser
Definition: SquidConfig.h:315
#define DBG_DATA
Definition: Stream.h:40
#define DBG_IMPORTANT
Definition: Stream.h:38
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:194
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:269
@ scUriTooLong
Definition: StatusCode.h:58
@ scHeaderTooLarge
Definition: StatusCode.h:87
@ scBadRequest
Definition: StatusCode.h:44
@ scOkay
Definition: StatusCode.h:26
@ scRequestHeaderFieldsTooLarge
Definition: StatusCode.h:69
@ METHOD_GET
Definition: MethodType.h:25
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.
Definition: parse.c:160

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors