RequestParser.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "debug/Stream.h"
13#include "parser/Tokenizer.h"
14#include "SquidConfig.h"
15
18{
19 // RFC 7230 section 2.6
20 /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
21 return method_.image().length() + uri_.length() + 12;
22}
23
37void
39{
41 if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
42 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
43 "CRLF bytes received ahead of request-line. " <<
44 "Ignored due to relaxed_header_parser.");
45 // Be tolerant of prefix empty lines
46 // ie any series of either \n or \r\n with no other characters and no repeated \r
47 while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
48 buf_.consume(1);
49 }
50 }
51}
52
60bool
62{
63 // method field is a sequence of TCHAR.
64 // Limit to 32 characters to prevent overly long sequences of non-HTTP
65 // being sucked in before mismatch is detected. 32 is itself annoyingly
66 // big but there are methods registered by IANA that reach 17 bytes:
67 // http://www.iana.org/assignments/http-methods
68 static const size_t maxMethodLength = 32; // TODO: make this configurable?
69
70 SBuf methodFound;
71 if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
72 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
73 parseStatusCode = Http::scBadRequest;
74 return false;
75 }
76 method_ = HttpRequestMethod(methodFound);
77
78 if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
79 return false;
80
81 return true;
82}
83
85static const CharacterSet &
87{
88 /* RFC 3986 section 2:
89 * "
90 * A URI is composed from a limited set of characters consisting of
91 * digits, letters, and a few graphic symbols.
92 * "
93 */
94 static const CharacterSet UriChars =
95 CharacterSet("URI-Chars","") +
96 // RFC 3986 section 2.2 - reserved characters
97 CharacterSet("gen-delims", ":/?#[]@") +
98 CharacterSet("sub-delims", "!$&'()*+,;=") +
99 // RFC 3986 section 2.3 - unreserved characters
102 CharacterSet("unreserved", "-._~") +
103 // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
104 CharacterSet("pct-encoded", "%") +
106
107 return UriChars;
108}
109
111const CharacterSet &
113{
115#if USE_HTTP_VIOLATIONS
116 static const CharacterSet RelaxedExtended =
118 // accept whitespace (extended), it will be dealt with later
119 DelimiterCharacters() +
120 // RFC 2396 unwise character set which must never be transmitted
121 // in un-escaped form. But many web services do anyway.
122 CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
123 // UTF-8 because we want to be future-proof
124 CharacterSet("UTF-8", 128, 255);
125
126 return RelaxedExtended;
127#else
128 static const CharacterSet RelaxedCompliant =
130 // accept whitespace (extended), it will be dealt with later.
131 DelimiterCharacters();
132
133 return RelaxedCompliant;
134#endif
135 }
136
137 // strict parse only accepts what the RFC say we can
138 return UriValidCharacters();
139}
140
141bool
143{
144 /* Arbitrary 64KB URI upper length limit.
145 *
146 * Not quite as arbitrary as it seems though. Old SquidString objects
147 * cannot store strings larger than 64KB, so we must limit until they
148 * have all been replaced with SBuf.
149 *
150 * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
151 * at least 8000 octets for the whole line, including method and version.
152 */
153 const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
154
155 SBuf uriFound;
156 if (!tok.prefix(uriFound, RequestTargetCharacters())) {
157 parseStatusCode = Http::scBadRequest;
158 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
159 return false;
160 }
161
162 if (uriFound.length() > maxUriLength) {
163 // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
164 parseStatusCode = Http::scUriTooLong;
165 debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
166 "-byte URI exceeds " << maxUriLength << "-byte limit");
167 return false;
168 }
169
170 uri_ = uriFound;
171 return true;
172}
173
174bool
176{
177 static const SBuf http1p0("HTTP/1.0");
178 static const SBuf http1p1("HTTP/1.1");
179 const auto savedTok = tok;
180
181 // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
182 // the vast majority of cases.
183 if (tok.skipSuffix(http1p1)) {
184 msgProtocol_ = Http::ProtocolVersion(1, 1);
185 return true;
186 } else if (tok.skipSuffix(http1p0)) {
187 msgProtocol_ = Http::ProtocolVersion(1, 0);
188 return true;
189 } else {
190 // RFC 7230 section 2.6:
191 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
192 static const CharacterSet period("Decimal point", ".");
193 static const SBuf proto("HTTP/");
194 SBuf majorDigit;
195 SBuf minorDigit;
196 if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
197 tok.skipOneTrailing(period) &&
198 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
199 tok.skipSuffix(proto)) {
200 const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
201 // use '0.0' for unsupported multiple digit version numbers
202 const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
203 const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
204 msgProtocol_ = Http::ProtocolVersion(major, minor);
205 return true;
206 }
207 }
208
209 // A GET request might use HTTP/0.9 syntax
210 if (method_ == Http::METHOD_GET) {
211 // RFC 1945 - no HTTP version field at all
212 tok = savedTok; // in case the URI ends with a digit
213 // report this assumption as an error if configured to triage parsing
214 debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
215 msgProtocol_ = Http::ProtocolVersion(0,9);
216 return true;
217 }
218
219 debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
220 parseStatusCode = Http::scBadRequest;
221 return false;
222}
223
229bool
230Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
231{
232 if (count <= 0) {
233 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
234 parseStatusCode = Http::scBadRequest;
235 return false;
236 }
237
238 // tolerant parser allows multiple whitespace characters between request-line fields
239 if (count > 1 && !Config.onoff.relaxed_header_parser) {
240 debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
241 parseStatusCode = Http::scBadRequest;
242 return false;
243 }
244
245 return true;
246}
247
249bool
251{
253 (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
254 } else {
255 if (!tok.skipOneTrailing(CharacterSet::CR)) {
256 debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
257 parseStatusCode = Http::scBadRequest;
258 return false;
259 }
260 }
261 return true;
262}
263
275int
277{
278 debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
279 debugs(74, DBG_DATA, buf_);
280
281 SBuf line;
282
283 // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
284 // Now, the request line has to end at the first LF.
285 static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
286 Tokenizer lineTok(buf_);
287 if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
288 if (buf_.length() >= Config.maxRequestHeaderSize) {
289 /* who should we blame for our failure to parse this line? */
290
291 Tokenizer methodTok(buf_);
292 if (!parseMethodField(methodTok))
293 return -1; // blame a bad method (or its delimiter)
294
295 // assume it is the URI
296 debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
297 Config.maxRequestHeaderSize << "-byte limit");
298 parseStatusCode = Http::scUriTooLong;
299 return -1;
300 }
301 debugs(74, 5, "Parser needs more data");
302 return 0;
303 }
304
305 Tokenizer tok(line);
306
307 if (!parseMethodField(tok))
308 return -1;
309
310 /* now parse backwards, to leave just the URI */
311 if (!skipTrailingCrs(tok))
312 return -1;
313
314 if (!parseHttpVersionField(tok))
315 return -1;
316
317 if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
318 return -1;
319
320 /* parsed everything before and after the URI */
321
322 if (!parseUriField(tok))
323 return -1;
324
325 if (!tok.atEnd()) {
326 debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
327 parseStatusCode = Http::scBadRequest;
328 return -1;
329 }
330
331 parseStatusCode = Http::scOkay;
332 buf_ = lineTok.remaining(); // incremental parse checkpoint
333 return 1;
334}
335
336bool
338{
339 const bool result = doParse(aBuf);
340 if (preserveParsed_) {
341 assert(aBuf.length() >= remaining().length());
342 parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
343 }
344
345 return result;
346}
347
348// raw is not a reference because a reference might point back to our own buf_ or parsed_
349bool
351{
352 buf_ = aBuf;
353 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
354
355 // stage 1: locate the request-line
356 if (parsingStage_ == HTTP_PARSE_NONE) {
357 skipGarbageLines();
358
359 // if we hit something before EOS treat it as a message
360 if (!buf_.isEmpty())
361 parsingStage_ = HTTP_PARSE_FIRST;
362 else
363 return false;
364 }
365
366 // stage 2: parse the request-line
367 if (parsingStage_ == HTTP_PARSE_FIRST) {
368 const int retcode = parseRequestFirstLine();
369
370 // first-line (or a look-alike) found successfully.
371 if (retcode > 0) {
372 parsingStage_ = HTTP_PARSE_MIME;
373 }
374
375 debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
376 debugs(74, 5, "request-line: method: " << method_);
377 debugs(74, 5, "request-line: url: " << uri_);
378 debugs(74, 5, "request-line: proto: " << msgProtocol_);
379 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
380
381 // syntax errors already
382 if (retcode < 0) {
383 parsingStage_ = HTTP_PARSE_DONE;
384 return false;
385 }
386 }
387
388 // stage 3: locate the mime header block
389 if (parsingStage_ == HTTP_PARSE_MIME) {
390 // HTTP/1.x request-line is valid and parsing completed.
391 if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
392 if (parseStatusCode == Http::scHeaderTooLarge)
393 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
394 return false;
395 }
396 }
397
398 return !needsMoreData();
399}
400
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
class SquidConfig Config
Definition: SquidConfig.cc:12
#define assert(EX)
Definition: assert.h:19
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
static const CharacterSet HEXDIG
Definition: CharacterSet.h:88
static const CharacterSet LF
Definition: CharacterSet.h:92
static const CharacterSet CR
Definition: CharacterSet.h:80
const SBuf & image() const
SBuf::size_type size_type
Definition: Parser.h:43
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
bool parseMethodField(Tokenizer &)
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
bool skipDelimiter(const size_t count, const char *where)
bool parseHttpVersionField(Tokenizer &)
HttpRequestMethod method_
what request method has been found on the first line
Definition: RequestParser.h:72
bool parseUriField(Tokenizer &)
SBuf uri_
raw copy of the original client request-line URI field
Definition: RequestParser.h:75
virtual Http1::Parser::size_type firstLineSize() const
size in bytes of the first line including CRLF terminator
virtual bool parse(const SBuf &aBuf)
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
Definition: SBuf.h:94
const char * rawContent() const
Definition: SBuf.cc:509
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:415
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
struct SquidConfig::@111 onoff
size_t maxRequestHeaderSize
Definition: SquidConfig.h:132
int relaxed_header_parser
Definition: SquidConfig.h:313
#define DBG_DATA
Definition: Stream.h:43
#define DBG_IMPORTANT
Definition: Stream.h:41
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:196
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:275
@ scUriTooLong
Definition: StatusCode.h:58
@ scHeaderTooLarge
Definition: StatusCode.h:87
@ scBadRequest
Definition: StatusCode.h:44
@ scOkay
Definition: StatusCode.h:26
@ scRequestHeaderFieldsTooLarge
Definition: StatusCode.h:69
@ METHOD_GET
Definition: MethodType.h:25
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.
Definition: parse.c:160

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors