ResponseParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "Debug.h"
12 #include "http/ProtocolVersion.h"
13 #include "parser/Tokenizer.h"
14 #include "profiler/Profiler.h"
15 #include "sbuf/Stream.h"
16 #include "SquidConfig.h"
17 
19 
22 {
23  Http1::Parser::size_type result = 0;
24 
25  switch (msgProtocol_.protocol)
26  {
27  case AnyP::PROTO_HTTP:
28  result += Http1magic.length();
29  break;
30  case AnyP::PROTO_ICY:
31  result += IcyMagic.length();
32  break;
33  default: // no other protocols supported
34  return result;
35  }
36  // NP: the parser does not accept >2 DIGIT for version numbers
37  if (msgProtocol_.minor > 9)
38  result += 2;
39  else
40  result += 1;
41 
42  result += 5; /* 5 octets in: SP status SP */
43  result += reasonPhrase_.length();
44  result += 2; /* CRLF terminator */
45  return result;
46 }
47 
48 // NP: we found the protocol version and consumed it already.
49 // just need the status code and reason phrase
50 int
52 {
53  try {
54  if (!completedStatus_) {
55  debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
56  ParseResponseStatus(tok, statusCode_);
57  buf_ = tok.remaining(); // resume checkpoint
58  completedStatus_ = true;
59  }
60  // NOTE: any whitespace after the single SP is part of the reason phrase.
61 
62  /* RFC 7230 says we SHOULD ignore the reason phrase content
63  * but it has a definite valid vs invalid character set.
64  * We interpret the SHOULD as ignoring absence and syntax, but
65  * producing an error if it contains an invalid octet.
66  */
67 
68  debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
69  // if we got here we are still looking for reason-phrase bytes
71  (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
72  skipLineTerminator(tok);
73  buf_ = tok.remaining(); // resume checkpoint
74  debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
75  return 1;
76  } catch (const InsufficientInput &) {
77  reasonPhrase_.clear();
78  return 0; // need more to be sure we have it all
79  } catch (const std::exception &ex) {
80  debugs(74, 6, "invalid status-line: " << ex.what());
81  }
82  return -1;
83 }
84 
85 void
87 {
88  int64_t statusValue;
89  if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(Parser::DelimiterCharacters())) {
90  debugs(74, 6, "raw status-code=" << statusValue);
91  code = static_cast<StatusCode>(statusValue); // may be invalid
92 
93  // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
94  if (code <= 99)
95  throw TextException(ToSBuf("status-code too short: ", code), Here());
96 
97  // Codes with a non-standard first digit (a.k.a. response class) are
98  // considered semantically invalid per the following HTTP WG discussion:
99  // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
100  if (code >= 600)
101  throw TextException(ToSBuf("status-code from an invalid response class: ", code), Here());
102  } else if (tok.atEnd()) {
103  throw InsufficientInput();
104  } else {
105  throw TextException("syntactically invalid status-code area", Here());
106  }
107 }
108 
124 int
126 {
127  Tokenizer tok(buf_);
128 
129  if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
130  debugs(74, 6, "continue incremental parse for " << msgProtocol_);
131  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
132  // we already found the magic, but not the full line. keep going.
133  return parseResponseStatusAndReason(tok);
134 
135  } else if (tok.skip(Http1magic)) {
136  debugs(74, 6, "found prefix magic " << Http1magic);
137  // HTTP Response status-line parse
138 
139  // magic contains major version, still need to find minor DIGIT
140  int64_t verMinor;
141  const auto &WspDelim = DelimiterCharacters();
142  if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
143  msgProtocol_.protocol = AnyP::PROTO_HTTP;
144  msgProtocol_.major = 1;
145  msgProtocol_.minor = static_cast<unsigned int>(verMinor);
146 
147  debugs(74, 6, "found version=" << msgProtocol_);
148 
149  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
150  buf_ = tok.remaining(); // resume checkpoint
151  return parseResponseStatusAndReason(tok);
152 
153  } else if (tok.atEnd())
154  return 0; // need more to be sure we have it all
155  else
156  return -1; // invalid version or delimiter, a single SP terminator required
157 
158  } else if (tok.skip(IcyMagic)) {
159  debugs(74, 6, "found prefix magic " << IcyMagic);
160  // ICY Response status-line parse (same as HTTP/1 after the magic version)
161  msgProtocol_.protocol = AnyP::PROTO_ICY;
162  // NP: ICY has no /major.minor details
163  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
164  buf_ = tok.remaining(); // resume checkpoint
165  return parseResponseStatusAndReason(tok);
166  } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) {
167  debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length()));
168  return 0;
169  } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) {
170  debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length()));
171  return 0;
172  } else {
173  debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
174  // found something that looks like an HTTP/0.9 response
175  // Gateway/Transform it into HTTP/1.1
176  msgProtocol_ = Http::ProtocolVersion(1,1);
177  // XXX: probably should use version 0.9 here and upgrade on output,
178  // but the old code did 1.1 transformation now.
179  statusCode_ = Http::scOkay;
180  static const SBuf gatewayPhrase("Gatewaying");
181  reasonPhrase_ = gatewayPhrase;
182  static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
183  /* Server: visible_appname_string */
184  "Mime-Version: 1.0\r\n"
185  /* Date: squid_curtime */
186  "Expires: -1\r\n\r\n");
187  mimeHeaderBlock_ = fakeHttpMimeBlock;
188  parsingStage_ = HTTP_PARSE_DONE;
189  return 1; // no more parsing
190  }
191 
192  // unreachable
193  assert(false);
194  return -1;
195 }
196 
197 bool
199 {
200  buf_ = aBuf;
201  debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
202 
203  // stage 1: locate the status-line
204  if (parsingStage_ == HTTP_PARSE_NONE) {
205  // RFC 7230 explicitly states whether garbage whitespace is to be handled
206  // at each point of the message framing boundaries.
207  // It omits mentioning garbage prior to HTTP Responses.
208  // Therefore, if we receive anything at all treat it as Response message.
209  if (!buf_.isEmpty())
210  parsingStage_ = HTTP_PARSE_FIRST;
211  else
212  return false;
213  }
214 
215  // stage 2: parse the status-line
216  if (parsingStage_ == HTTP_PARSE_FIRST) {
217  PROF_start(HttpParserParseReplyLine);
218 
219  const int retcode = parseResponseFirstLine();
220 
221  // first-line (or a look-alike) found successfully.
222  if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
223  parsingStage_ = HTTP_PARSE_MIME;
224  debugs(74, 5, "status-line: retval " << retcode);
225  debugs(74, 5, "status-line: proto " << msgProtocol_);
226  debugs(74, 5, "status-line: status-code " << statusCode_);
227  debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
228  debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
229  PROF_stop(HttpParserParseReplyLine);
230 
231  // syntax errors already
232  if (retcode < 0) {
233  parsingStage_ = HTTP_PARSE_DONE;
234  parseStatusCode = Http::scInvalidHeader;
235  return false;
236  }
237  }
238 
239  // stage 3: locate the mime header block
240  if (parsingStage_ == HTTP_PARSE_MIME) {
241  if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
242  return false;
243  }
244 
245  return !needsMoreData();
246 }
247 
static const CharacterSet & DelimiterCharacters()
Definition: Parser.cc:59
#define Here()
source code location of the caller
Definition: Here.h:15
@ PROTO_NONE
Definition: ProtocolType.h:24
unsigned int minor
minor version number
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
Definition: SBuf.h:87
size_t maxReplyHeaderSize
Definition: SquidConfig.h:134
StatusCode
Definition: StatusCode.h:20
#define PROF_stop(probename)
Definition: Profiler.h:63
ProtocolType protocol
which protocol this version is for
static const CharacterSet VCHAR
Definition: CharacterSet.h:96
SBuf::size_type size_type
Definition: Parser.h:43
Definition: Debug.h:189
virtual Http1::Parser::size_type firstLineSize() const
size in bytes of the first line including CRLF terminator
static const CharacterSet WSP
Definition: CharacterSet.h:98
static void ParseResponseStatus(Tokenizer &, StatusCode &code)
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:128
unsigned char code
Definition: html_quote.c:20
SBuf reasonPhrase_
HTTP/1 status-line reason phrase.
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
#define assert(EX)
Definition: assert.h:19
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:404
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
static const SBuf IcyMagic
magic prefix for identifying ICY response messages
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition: Parser.h:145
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ PROTO_HTTP
Definition: ProtocolType.h:25
Definition: parse.c:160
an std::runtime_error with thrower location info
Definition: TextException.h:20
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:154
SBuf ToSBuf(Args &&... args)
slowly stream-prints all arguments into a freshly allocated SBuf
Definition: Stream.h:124
@ scInvalidHeader
Definition: StatusCode.h:86
static const CharacterSet OBSTEXT
Definition: CharacterSet.h:111
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
@ PROTO_ICY
Definition: ProtocolType.h:39
@ scOkay
Definition: StatusCode.h:26
virtual bool parse(const SBuf &aBuf)
int parseResponseStatusAndReason(Tokenizer &)
#define PROF_start(probename)
Definition: Profiler.h:62
class SquidConfig Config
Definition: SquidConfig.cc:12
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.
#define DBG_DATA
Definition: Debug.h:48

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors