Parser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/Parser.h"
12 #include "http/one/Tokenizer.h"
13 #include "mime_header.h"
14 #include "SquidConfig.h"
15 
17 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
18 
20 {
21  static const SBuf crlf("\r\n");
22  return crlf;
23 }
24 
25 void
27 {
29  buf_ = NULL;
32 }
33 
35 static const CharacterSet &
37 {
38  // RFC 7230 section 3.5
39  // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
40  // or bare CR as whitespace between request-line fields
41  static const CharacterSet RelaxedDels =
44  CharacterSet("VT,FF","\x0B\x0C") +
45  CharacterSet::CR).rename("relaxed-WSP");
46 
47  return RelaxedDels;
48 }
49 
50 const CharacterSet &
52 {
55 }
56 
57 const CharacterSet &
59 {
62 }
63 
64 bool
66 {
67  if (tok.skip(Http1::CrLf()))
68  return true;
69 
71  return true;
72 
73  if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
74  return false; // need more data
75 
76  throw TexcHere("garbage instead of CRLF line terminator");
77  return false; // unreachable, but make naive compilers happy
78 }
79 
81 static const CharacterSet &
83 {
84  static const CharacterSet line = CharacterSet::LF.complement("non-LF");
85  return line;
86 }
87 
102 void
104 {
106  while (tok.skipOne(RelaxedDelimiterCharacters())) {
107  (void)tok.skipAll(LineCharacters()); // optional line content
108  // LF terminator is required.
109  // trust headersEnd() to ensure that we have at least one LF
110  (void)tok.skipOne(CharacterSet::LF);
111  }
112 
113  // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
114  // then we skipped everything, including that terminating LF.
115  // Restore the terminating CRLF if needed.
116  if (tok.atEnd())
118  else
119  mimeHeaderBlock_ = tok.remaining();
120  // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
121 }
122 
137 void
139 {
141  const auto szLimit = mimeHeaderBlock_.length();
143  // prevent the mime sender being able to make append() realloc/grow multiple times.
145 
146  static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
147 
148  while (!tok.atEnd()) {
149  const SBuf all(tok.remaining());
150  const auto blobLen = tok.skipAll(nonCRLF); // may not be there
151  const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
152  const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
153 
154  if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
155  mimeHeaderBlock_.append(all.substr(0, blobLen));
156  mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
157  } else
158  mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
159  }
160 }
161 
162 bool
163 Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
164 {
165  // MIME headers block exist in (only) HTTP/1.x and ICY
166  const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
169 
170  if (expectMime) {
171  /* NOTE: HTTP/0.9 messages do not have a mime header block.
172  * So the rest of the code will need to deal with '0'-byte headers
173  * (ie, none, so don't try parsing em)
174  */
175  bool containsObsFold;
176  if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
177 
178  // Squid could handle these headers, but admin does not want to
179  if (firstLineSize() + mimeHeaderBytes >= limit) {
180  debugs(33, 5, "Too large " << which);
182  buf_.consume(mimeHeaderBytes);
184  return false;
185  }
186 
187  mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
188  cleanMimePrefix();
189  if (containsObsFold)
190  unfoldMime();
191 
192  debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
193 
194  } else { // headersEnd() == 0
195  if (buf_.length()+firstLineSize() >= limit) {
196  debugs(33, 5, "Too large " << which);
199  } else
200  debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
201  return false;
202  }
203 
204  } else
205  debugs(33, 3, "Missing HTTP/1.x identifier");
206 
207  // NP: we do not do any further stages here yet so go straight to DONE
209 
210  return true;
211 }
212 
213 // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField()
214 #define GET_HDR_SZ 1024
215 
216 // BUG: returns only the first header line with given name,
217 // ignores multi-line headers and obs-fold headers
218 char *
220 {
221  if (!headerBlockSize() || !name)
222  return NULL;
223 
224  LOCAL_ARRAY(char, header, GET_HDR_SZ);
225  const int namelen = strlen(name);
226 
227  debugs(25, 5, "looking for " << name);
228 
229  // while we can find more LF in the SBuf
231  SBuf p;
232 
233  while (tok.prefix(p, LineCharacters())) {
234  if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
235  break; // error. reached invalid octet or end of buffer insted of an LF ??
236 
237  // header lines must start with the name (case insensitive)
238  if (p.substr(0, namelen).caseCmp(name, namelen))
239  continue;
240 
241  // then a COLON
242  if (p[namelen] != ':')
243  continue;
244 
245  // drop any trailing *CR sequence
246  p.trim(Http1::CrLf(), false, true);
247 
248  debugs(25, 5, "checking " << p);
249  p.consume(namelen + 1);
250 
251  // TODO: optimize SBuf::trim to take CharacterSet directly
252  Http1::Tokenizer t(p);
254  p = t.remaining();
255 
256  // prevent buffer overrun on char header[];
257  p.chop(0, sizeof(header)-1);
258 
259  // return the header field-value
260  SBufToCstring(header, p);
261  debugs(25, 5, "returning " << header);
262  return header;
263  }
264 
265  return NULL;
266 }
267 
268 int
270 {
272 }
273 
274 // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
275 bool
277 {
278  if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
279  // Generating BWS is a MUST-level violation so warn about it as needed.
280  debugs(33, ErrorLevel(), "found " << count << " BWS octets");
281  // RFC 7230 says we MUST parse BWS, so we fall through even if
282  // Config.onoff.relaxed_header_parser is off.
283  }
284  // else we successfully "parsed" an empty BWS sequence
285 
286  return true;
287 }
288 
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:404
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:152
void reserveSpace(size_type minSpace)
Definition: SBuf.h:429
static const CharacterSet & DelimiterCharacters()
Definition: Parser.cc:58
bool ParseBws(Tokenizer &tok)
Definition: Parser.cc:276
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:67
int caseCmp(const SBuf &S, const size_type n) const
shorthand version for case-insensitive compare()
Definition: SBuf.h:272
AnyP::ProtocolVersion ProtocolVersion()
Protocol version to use in Http::Message structures wrapping FTP messages.
Definition: Elements.cc:24
static const CharacterSet LF
Definition: CharacterSet.h:89
size_type headerBlockSize() const
Definition: Parser.h:71
Definition: SBuf.h:86
void cleanMimePrefix()
Definition: Parser.cc:103
SBuf & append(const SBuf &S)
Definition: SBuf.cc:195
SBuf & chop(size_type pos, size_type n=npos)
Definition: SBuf.cc:540
void clear()
Definition: SBuf.cc:178
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:269
Http::StatusCode parseStatusCode
Definition: Parser.h:106
char * p
Definition: membanger.c:43
char * getHeaderField(const char *name)
Definition: Parser.cc:219
void SBufToCstring(char *d, const SBuf &s)
Definition: SBuf.h:741
#define GET_HDR_SZ
Definition: Parser.cc:214
static const CharacterSet WSP
Definition: CharacterSet.h:95
static const CharacterSet & LineCharacters()
all characters except the LF line terminator
Definition: Parser.cc:82
virtual size_type firstLineSize() const =0
size in bytes of the first line including CRLF terminator
unsigned int major
major version number
int relaxed_header_parser
Definition: SquidConfig.h:318
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:124
#define DBG_IMPORTANT
Definition: Debug.h:46
char at(size_type pos) const
Definition: SBuf.h:238
initialized, but nothing usefully parsed yet
Definition: Parser.h:22
static const CharacterSet HTAB
Definition: CharacterSet.h:87
static const CharacterSet CR
Definition: CharacterSet.h:77
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition: Tokenizer.h:44
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
bool skipLineTerminator(Http1::Tokenizer &tok) const
Definition: Parser.cc:65
Definition: parse.c:160
bool skipOne(const CharacterSet &discardables)
Definition: Tokenizer.cc:132
static const CharacterSet & WhitespaceCharacters()
Definition: Parser.cc:51
SBuf consume(size_type n=npos)
Definition: SBuf.cc:491
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:28
#define LOCAL_ARRAY(type, name, size)
Definition: leakcheck.h:18
struct SquidConfig::@112 onoff
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:160
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:79
const CharacterSet crlf("crlf","\")
Definition: Elements.cc:12
SBuf mimeHeaderBlock_
buffer holding the mime headers (if any)
Definition: Parser.h:155
bool atEnd() const
whether the end of the buffer has been reached
Definition: Tokenizer.h:41
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
Definition: CharacterSet.h:58
static const CharacterSet & RelaxedDelimiterCharacters()
characters HTTP permits tolerant parsers to accept as delimiters
Definition: Parser.cc:36
virtual void clear()=0
Definition: Parser.cc:26
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:586
SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)
Definition: SBuf.cc:561
SBuf buf_
bytes remaining to be parsed
Definition: Parser.h:146
bool grabMimeBlock(const char *which, const size_t limit)
Definition: Parser.cc:163
static const CharacterSet SP
Definition: CharacterSet.h:91
#define TexcHere(msg)
legacy convenience macro; it is not difficult to type Here() now
Definition: TextException.h:55
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition: Tokenizer.cc:120
bool hackExpectsMime_
Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block.
Definition: Parser.h:158
void unfoldMime()
Definition: Parser.cc:138
ProtocolType protocol
which protocol this version is for
size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)
Definition: mime_header.cc:16
const SBuf & CrLf()
CRLF textual representation.
Definition: Parser.cc:19
MemBlob::size_type size_type
Definition: SBuf.h:89
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition: Parser.h:143
class SquidConfig Config
Definition: SquidConfig.cc:12
#define NULL
Definition: types.h:166
ParseState parsingStage_
what stage the parser is currently up to
Definition: Parser.h:149

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors