Parser.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "base/CharacterSet.h"
11#include "debug/Stream.h"
12#include "http/one/Parser.h"
13#include "mime_header.h"
14#include "parser/Tokenizer.h"
15#include "SquidConfig.h"
16
18const SBuf Http::One::Parser::Http1magic("HTTP/1.");
19
21{
22 static const SBuf crlf("\r\n");
23 return crlf;
24}
25
26void
28{
30 buf_ = nullptr;
33}
34
36static const CharacterSet &
38{
39 // RFC 7230 section 3.5
40 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
41 // or bare CR as whitespace between request-line fields
42 static const CharacterSet RelaxedDels =
45 CharacterSet("VT,FF","\x0B\x0C") +
46 CharacterSet::CR).rename("relaxed-WSP");
47
48 return RelaxedDels;
49}
50
51const CharacterSet &
53{
56}
57
58const CharacterSet &
60{
63}
64
65void
67{
68 if (tok.skip(Http1::CrLf()))
69 return;
70
72 return;
73
74 if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
75 throw InsufficientInput();
76
77 throw TexcHere("garbage instead of CRLF line terminator");
78}
79
81static const CharacterSet &
83{
84 static const CharacterSet line = CharacterSet::LF.complement("non-LF");
85 return line;
86}
87
102void
104{
105 Tokenizer tok(mimeHeaderBlock_);
106 while (tok.skipOne(RelaxedDelimiterCharacters())) {
107 (void)tok.skipAll(LineCharacters()); // optional line content
108 // LF terminator is required.
109 // trust headersEnd() to ensure that we have at least one LF
110 (void)tok.skipOne(CharacterSet::LF);
111 }
112
113 // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
114 // then we skipped everything, including that terminating LF.
115 // Restore the terminating CRLF if needed.
116 if (tok.atEnd())
117 mimeHeaderBlock_ = Http1::CrLf();
118 else
119 mimeHeaderBlock_ = tok.remaining();
120 // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
121}
122
137void
139{
140 Tokenizer tok(mimeHeaderBlock_);
141 const auto szLimit = mimeHeaderBlock_.length();
142 mimeHeaderBlock_.clear();
143 // prevent the mime sender being able to make append() realloc/grow multiple times.
144 mimeHeaderBlock_.reserveSpace(szLimit);
145
146 static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
147
148 while (!tok.atEnd()) {
149 const SBuf all(tok.remaining());
150 const auto blobLen = tok.skipAll(nonCRLF); // may not be there
151 const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
152 const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
153
154 if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
155 mimeHeaderBlock_.append(all.substr(0, blobLen));
156 mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
157 } else
158 mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
159 }
160}
161
162bool
163Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
164{
165 // MIME headers block exist in (only) HTTP/1.x and ICY
166 const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
167 msgProtocol_.protocol == AnyP::PROTO_ICY ||
168 hackExpectsMime_;
169
170 if (expectMime) {
171 /* NOTE: HTTP/0.9 messages do not have a mime header block.
172 * So the rest of the code will need to deal with '0'-byte headers
173 * (ie, none, so don't try parsing em)
174 */
175 bool containsObsFold;
176 if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
177
178 // Squid could handle these headers, but admin does not want to
179 if (firstLineSize() + mimeHeaderBytes >= limit) {
180 debugs(33, 5, "Too large " << which);
181 parseStatusCode = Http::scHeaderTooLarge;
182 buf_.consume(mimeHeaderBytes);
183 parsingStage_ = HTTP_PARSE_DONE;
184 return false;
185 }
186
187 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
188 cleanMimePrefix();
189 if (containsObsFold)
190 unfoldMime();
191
192 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
193
194 } else { // headersEnd() == 0
195 if (buf_.length()+firstLineSize() >= limit) {
196 debugs(33, 5, "Too large " << which);
197 parseStatusCode = Http::scHeaderTooLarge;
198 parsingStage_ = HTTP_PARSE_DONE;
199 } else
200 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
201 return false;
202 }
203
204 } else
205 debugs(33, 3, "Missing HTTP/1.x identifier");
206
207 // NP: we do not do any further stages here yet so go straight to DONE
208 parsingStage_ = HTTP_PARSE_DONE;
209
210 return true;
211}
212
213// arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
214#define GET_HDR_SZ 1024
215
216// BUG: returns only the first header line with given name,
217// ignores multi-line headers and obs-fold headers
218char *
220{
221 if (!headerBlockSize())
222 return nullptr;
223
224 LOCAL_ARRAY(char, header, GET_HDR_SZ);
225 const char *name = "Host";
226 const int namelen = strlen(name);
227
228 debugs(25, 5, "looking for " << name);
229
230 // while we can find more LF in the SBuf
231 Tokenizer tok(mimeHeaderBlock_);
232 SBuf p;
233
234 while (tok.prefix(p, LineCharacters())) {
235 if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
236 break; // error. reached invalid octet or end of buffer instead of an LF ??
237
238 // header lines must start with the name (case insensitive)
239 if (p.substr(0, namelen).caseCmp(name, namelen))
240 continue;
241
242 // then a COLON
243 if (p[namelen] != ':')
244 continue;
245
246 // drop any trailing *CR sequence
247 p.trim(Http1::CrLf(), false, true);
248
249 debugs(25, 5, "checking " << p);
250 p.consume(namelen + 1);
251
252 // TODO: optimize SBuf::trim to take CharacterSet directly
253 Tokenizer t(p);
254 t.skipAll(CharacterSet::WSP);
255 p = t.remaining();
256
257 // prevent buffer overrun on char header[];
258 p.chop(0, sizeof(header)-1);
259
260 // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
261 static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
262 if (p.findFirstNotOf(hostChars) != SBuf::npos)
263 break; // error. line contains character not accepted in Host header
264
265 // return the header field-value
266 SBufToCstring(header, p);
267 debugs(25, 5, "returning " << header);
268 return header;
269 }
270
271 return nullptr;
272}
273
274int
276{
278}
279
280// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
281void
283{
284 const auto count = tok.skipAll(Parser::WhitespaceCharacters());
285
286 if (tok.atEnd())
287 throw InsufficientInput(); // even if count is positive
288
289 if (count) {
290 // Generating BWS is a MUST-level violation so warn about it as needed.
291 debugs(33, ErrorLevel(), "found " << count << " BWS octets");
292 // RFC 7230 says we MUST parse BWS, so we fall through even if
293 // Config.onoff.relaxed_header_parser is off.
294 }
295 // else we successfully "parsed" an empty BWS sequence
296
297 // success: no more BWS characters expected
298}
299
void SBufToCstring(char *d, const SBuf &s)
Definition: SBuf.h:752
class SquidConfig Config
Definition: SquidConfig.cc:12
#define TexcHere(msg)
legacy convenience macro; it is not difficult to type Here() now
Definition: TextException.h:59
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
static const CharacterSet SP
Definition: CharacterSet.h:94
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
Definition: CharacterSet.h:61
static const CharacterSet WSP
Definition: CharacterSet.h:98
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
static const CharacterSet LF
Definition: CharacterSet.h:92
static const CharacterSet CR
Definition: CharacterSet.h:80
static const CharacterSet HTAB
Definition: CharacterSet.h:90
char * getHostHeaderField()
Definition: Parser.cc:219
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:154
static const CharacterSet & WhitespaceCharacters()
Definition: Parser.cc:52
SBuf buf_
bytes remaining to be parsed
Definition: Parser.h:148
void cleanMimePrefix()
Definition: Parser.cc:103
void skipLineTerminator(Tokenizer &) const
Definition: Parser.cc:66
bool grabMimeBlock(const char *which, const size_t limit)
Definition: Parser.cc:163
ParseState parsingStage_
what stage the parser is currently up to
Definition: Parser.h:151
static const CharacterSet & DelimiterCharacters()
Definition: Parser.cc:59
virtual void clear()=0
Definition: Parser.cc:27
void unfoldMime()
Definition: Parser.cc:138
SBuf mimeHeaderBlock_
buffer holding the mime headers (if any)
Definition: Parser.h:157
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition: Parser.h:145
Definition: SBuf.h:94
int caseCmp(const SBuf &S, const size_type n) const
shorthand version for case-insensitive compare()
Definition: SBuf.h:283
static const size_type npos
Definition: SBuf.h:99
SBuf consume(size_type n=npos)
Definition: SBuf.cc:481
SBuf & chop(size_type pos, size_type n=npos)
Definition: SBuf.cc:530
size_type findFirstNotOf(const CharacterSet &set, size_type startPos=0) const
Definition: SBuf.cc:746
SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)
Definition: SBuf.cc:551
void clear()
Definition: SBuf.cc:175
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
MemBlob::size_type size_type
Definition: SBuf.h:96
struct SquidConfig::@111 onoff
int relaxed_header_parser
Definition: SquidConfig.h:313
#define DBG_IMPORTANT
Definition: Stream.h:41
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:196
static const CharacterSet & RelaxedDelimiterCharacters()
characters HTTP permits tolerant parsers to accept as delimiters
Definition: Parser.cc:37
static const CharacterSet & LineCharacters()
all characters except the LF line terminator
Definition: Parser.cc:82
#define GET_HDR_SZ
Definition: Parser.cc:214
size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)
Definition: mime_header.cc:17
@ PROTO_ICY
Definition: ProtocolType.h:38
@ PROTO_HTTP
Definition: ProtocolType.h:25
AnyP::ProtocolVersion ProtocolVersion()
Protocol version to use in Http::Message structures wrapping FTP messages.
Definition: Elements.cc:24
void ParseBws(Parser::Tokenizer &)
Definition: Parser.cc:282
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
const SBuf & CrLf()
CRLF textual representation.
Definition: Parser.cc:20
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:275
@ scHeaderTooLarge
Definition: StatusCode.h:87
#define LOCAL_ARRAY(type, name, size)
Definition: squid.h:68
Definition: parse.c:160
const CharacterSet crlf("crlf","\r\n")

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors