RequestParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "debug/Stream.h"
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "parser/Tokenizer.h"
14 #include "SquidConfig.h"
15 
18 {
19  // RFC 7230 section 2.6
20  /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
21  return method_.image().length() + uri_.length() + 12;
22 }
23 
37 void
39 {
41  if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
42  debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
43  "CRLF bytes received ahead of request-line. " <<
44  "Ignored due to relaxed_header_parser.");
45  // Be tolerant of prefix empty lines
46  // ie any series of either \n or \r\n with no other characters and no repeated \r
47  while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
48  buf_.consume(1);
49  }
50  }
51 }
52 
60 bool
62 {
63  // method field is a sequence of TCHAR.
64  // Limit to 32 characters to prevent overly long sequences of non-HTTP
65  // being sucked in before mismatch is detected. 32 is itself annoyingly
66  // big but there are methods registered by IANA that reach 17 bytes:
67  // http://www.iana.org/assignments/http-methods
68  static const size_t maxMethodLength = 32; // TODO: make this configurable?
69 
70  SBuf methodFound;
71  if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
72  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
73  parseStatusCode = Http::scBadRequest;
74  return false;
75  }
76  method_ = HttpRequestMethod(methodFound);
77 
78  if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
79  return false;
80 
81  return true;
82 }
83 
85 static const CharacterSet &
87 {
88  /* RFC 3986 section 2:
89  * "
90  * A URI is composed from a limited set of characters consisting of
91  * digits, letters, and a few graphic symbols.
92  * "
93  */
94  static const CharacterSet UriChars =
95  CharacterSet("URI-Chars","") +
96  // RFC 3986 section 2.2 - reserved characters
97  CharacterSet("gen-delims", ":/?#[]@") +
98  CharacterSet("sub-delims", "!$&'()*+,;=") +
99  // RFC 3986 section 2.3 - unreserved characters
102  CharacterSet("unreserved", "-._~") +
103  // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
104  CharacterSet("pct-encoded", "%") +
106 
107  return UriChars;
108 }
109 
111 const CharacterSet &
113 {
115 #if USE_HTTP_VIOLATIONS
116  static const CharacterSet RelaxedExtended =
118  // accept whitespace (extended), it will be dealt with later
119  DelimiterCharacters() +
120  // RFC 2396 unwise character set which must never be transmitted
121  // in un-escaped form. But many web services do anyway.
122  CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
123  // UTF-8 because we want to be future-proof
124  CharacterSet("UTF-8", 128, 255);
125 
126  return RelaxedExtended;
127 #else
128  static const CharacterSet RelaxedCompliant =
130  // accept whitespace (extended), it will be dealt with later.
131  DelimiterCharacters();
132 
133  return RelaxedCompliant;
134 #endif
135  }
136 
137  // strict parse only accepts what the RFC say we can
138  return UriValidCharacters();
139 }
140 
141 bool
143 {
144  /* Arbitrary 64KB URI upper length limit.
145  *
146  * Not quite as arbitrary as it seems though. Old SquidString objects
147  * cannot store strings larger than 64KB, so we must limit until they
148  * have all been replaced with SBuf.
149  *
150  * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
151  * at least 8000 octets for the whole line, including method and version.
152  */
153  const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
154 
155  SBuf uriFound;
156  if (!tok.prefix(uriFound, RequestTargetCharacters())) {
157  parseStatusCode = Http::scBadRequest;
158  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
159  return false;
160  }
161 
162  if (uriFound.length() > maxUriLength) {
163  // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
164  parseStatusCode = Http::scUriTooLong;
165  debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
166  "-byte URI exceeds " << maxUriLength << "-byte limit");
167  return false;
168  }
169 
170  uri_ = uriFound;
171  return true;
172 }
173 
174 bool
176 {
177  static const SBuf http1p0("HTTP/1.0");
178  static const SBuf http1p1("HTTP/1.1");
179  const auto savedTok = tok;
180 
181  // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
182  // the vast majority of cases.
183  if (tok.skipSuffix(http1p1)) {
184  msgProtocol_ = Http::ProtocolVersion(1, 1);
185  return true;
186  } else if (tok.skipSuffix(http1p0)) {
187  msgProtocol_ = Http::ProtocolVersion(1, 0);
188  return true;
189  } else {
190  // RFC 7230 section 2.6:
191  // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
192  static const CharacterSet period("Decimal point", ".");
193  static const SBuf proto("HTTP/");
194  SBuf majorDigit;
195  SBuf minorDigit;
196  if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
197  tok.skipOneTrailing(period) &&
198  tok.suffix(majorDigit, CharacterSet::DIGIT) &&
199  tok.skipSuffix(proto)) {
200  const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
201  // use '0.0' for unsupported multiple digit version numbers
202  const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
203  const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
204  msgProtocol_ = Http::ProtocolVersion(major, minor);
205  return true;
206  }
207  }
208 
209  // A GET request might use HTTP/0.9 syntax
210  if (method_ == Http::METHOD_GET) {
211  // RFC 1945 - no HTTP version field at all
212  tok = savedTok; // in case the URI ends with a digit
213  // report this assumption as an error if configured to triage parsing
214  debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
215  msgProtocol_ = Http::ProtocolVersion(0,9);
216  return true;
217  }
218 
219  debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
220  parseStatusCode = Http::scBadRequest;
221  return false;
222 }
223 
229 bool
230 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
231 {
232  if (count <= 0) {
233  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
234  parseStatusCode = Http::scBadRequest;
235  return false;
236  }
237 
238  // tolerant parser allows multiple whitespace characters between request-line fields
239  if (count > 1 && !Config.onoff.relaxed_header_parser) {
240  debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
241  parseStatusCode = Http::scBadRequest;
242  return false;
243  }
244 
245  return true;
246 }
247 
249 bool
251 {
253  (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
254  } else {
255  if (!tok.skipOneTrailing(CharacterSet::CR)) {
256  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
257  parseStatusCode = Http::scBadRequest;
258  return false;
259  }
260  }
261  return true;
262 }
263 
275 int
277 {
278  debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
279  debugs(74, DBG_DATA, buf_);
280 
281  SBuf line;
282 
283  // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
284  // Now, the request line has to end at the first LF.
285  static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
286  Tokenizer lineTok(buf_);
287  if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
288  if (buf_.length() >= Config.maxRequestHeaderSize) {
289  /* who should we blame for our failure to parse this line? */
290 
291  Tokenizer methodTok(buf_);
292  if (!parseMethodField(methodTok))
293  return -1; // blame a bad method (or its delimiter)
294 
295  // assume it is the URI
296  debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
297  Config.maxRequestHeaderSize << "-byte limit");
298  parseStatusCode = Http::scUriTooLong;
299  return -1;
300  }
301  debugs(74, 5, "Parser needs more data");
302  return 0;
303  }
304 
305  Tokenizer tok(line);
306 
307  if (!parseMethodField(tok))
308  return -1;
309 
310  /* now parse backwards, to leave just the URI */
311  if (!skipTrailingCrs(tok))
312  return -1;
313 
314  if (!parseHttpVersionField(tok))
315  return -1;
316 
317  if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
318  return -1;
319 
320  /* parsed everything before and after the URI */
321 
322  if (!parseUriField(tok))
323  return -1;
324 
325  if (!tok.atEnd()) {
326  debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
327  parseStatusCode = Http::scBadRequest;
328  return -1;
329  }
330 
331  parseStatusCode = Http::scOkay;
332  buf_ = lineTok.remaining(); // incremental parse checkpoint
333  return 1;
334 }
335 
336 bool
338 {
339  const bool result = doParse(aBuf);
340  if (preserveParsed_) {
341  assert(aBuf.length() >= remaining().length());
342  parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
343  }
344 
345  return result;
346 }
347 
348 // raw is not a reference because a reference might point back to our own buf_ or parsed_
349 bool
351 {
352  buf_ = aBuf;
353  debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
354 
355  // stage 1: locate the request-line
356  if (parsingStage_ == HTTP_PARSE_NONE) {
357  skipGarbageLines();
358 
359  // if we hit something before EOS treat it as a message
360  if (!buf_.isEmpty())
361  parsingStage_ = HTTP_PARSE_FIRST;
362  else
363  return false;
364  }
365 
366  // stage 2: parse the request-line
367  if (parsingStage_ == HTTP_PARSE_FIRST) {
368  const int retcode = parseRequestFirstLine();
369 
370  // first-line (or a look-alike) found successfully.
371  if (retcode > 0) {
372  parsingStage_ = HTTP_PARSE_MIME;
373  }
374 
375  debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
376  debugs(74, 5, "request-line: method: " << method_);
377  debugs(74, 5, "request-line: url: " << uri_);
378  debugs(74, 5, "request-line: proto: " << msgProtocol_);
379  debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
380 
381  // syntax errors already
382  if (retcode < 0) {
383  parsingStage_ = HTTP_PARSE_DONE;
384  return false;
385  }
386  }
387 
388  // stage 3: locate the mime header block
389  if (parsingStage_ == HTTP_PARSE_MIME) {
390  // HTTP/1.x request-line is valid and parsing completed.
391  if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
392  if (parseStatusCode == Http::scHeaderTooLarge)
393  parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
394  return false;
395  }
396  }
397 
398  return !needsMoreData();
399 }
400 
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
int relaxed_header_parser
Definition: SquidConfig.h:321
@ scBadRequest
Definition: StatusCode.h:44
bool skipDelimiter(const size_t count, const char *where)
bool parseMethodField(Tokenizer &)
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
Definition: SBuf.h:94
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
bool parseUriField(Tokenizer &)
SBuf uri_
raw copy of the original client request-line URI field
Definition: RequestParser.h:75
const SBuf & image() const
static const CharacterSet LF
Definition: CharacterSet.h:92
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
virtual Http1::Parser::size_type firstLineSize() const
size in bytes of the first line including CRLF terminator
#define DBG_DATA
Definition: Stream.h:43
SBuf::size_type size_type
Definition: Parser.h:43
static const CharacterSet CR
Definition: CharacterSet.h:80
const char * rawContent() const
Definition: SBuf.cc:509
HttpRequestMethod method_
what request method has been found on the first line
Definition: RequestParser.h:72
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet HEXDIG
Definition: CharacterSet.h:88
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
@ scRequestHeaderFieldsTooLarge
Definition: StatusCode.h:69
#define assert(EX)
Definition: assert.h:19
@ scUriTooLong
Definition: StatusCode.h:58
virtual bool parse(const SBuf &aBuf)
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:415
size_t maxRequestHeaderSize
Definition: SquidConfig.h:132
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
Definition: parse.c:160
struct SquidConfig::@110 onoff
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
#define DBG_IMPORTANT
Definition: Stream.h:41
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:275
@ scOkay
Definition: StatusCode.h:26
bool parseHttpVersionField(Tokenizer &)
@ scHeaderTooLarge
Definition: StatusCode.h:87
@ METHOD_GET
Definition: MethodType.h:25
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:196
class SquidConfig Config
Definition: SquidConfig.cc:12
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors