Tokenizer.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "debug/Stream.h"
11#include "http/one/Parser.h"
12#include "http/one/Tokenizer.h"
13#include "parser/Tokenizer.h"
14#include "sbuf/Stream.h"
15
20static SBuf
22{
23 /*
24 * RFC 1945 - defines qdtext:
25 * inclusive of LWS (which includes CR and LF)
26 * exclusive of 0x80-0xFF
27 * includes 0x5C ('\') as just a regular character
28 */
29 static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
30 CharacterSet("", "!") +
32 /*
33 * RFC 7230 - defines qdtext:
34 * exclusive of CR and LF
35 * inclusive of 0x80-0xFF
36 * includes 0x5C ('\') but only when part of quoted-pair
37 */
38 static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
39 CharacterSet("", "!") +
40 CharacterSet("", 0x5D, 0x7E) +
43
44 // best we can do is a conditional reference since http1p0 value may change per-client
45 const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
46
47 SBuf parsedToken;
48
49 while (!tok.atEnd()) {
50 SBuf qdText;
51 if (tok.prefix(qdText, tokenChars))
52 parsedToken.append(qdText);
53
54 if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
55 if (tok.atEnd())
56 break;
57
58 /* RFC 7230 section 3.2.6
59 *
60 * The backslash octet ("\") can be used as a single-octet quoting
61 * mechanism within quoted-string and comment constructs. Recipients
62 * that process the value of a quoted-string MUST handle a quoted-pair
63 * as if it were replaced by the octet following the backslash.
64 *
65 * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
66 */
68 SBuf escaped;
69 if (!tok.prefix(escaped, qPairChars, 1))
70 throw TexcHere("invalid escaped character in quoted-pair");
71
72 parsedToken.append(escaped);
73 continue;
74 }
75
76 if (tok.skip('"'))
77 return parsedToken; // may be empty
78
79 if (tok.atEnd())
80 break;
81
82 throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
83 }
84
85 throw Http::One::InsufficientInput();
86}
87
88SBuf
90{
91 if (tok.skip('"'))
92 return parseQuotedStringSuffix(tok, http1p0);
93
94 if (tok.atEnd())
95 throw InsufficientInput();
96
97 SBuf parsedToken;
98 if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
99 throw TexcHere("invalid input while expecting an HTTP token");
100
101 if (tok.atEnd())
102 throw InsufficientInput();
103
104 // got the complete token
105 return parsedToken;
106}
107
#define TexcHere(msg)
legacy convenience macro; it is not difficult to type Here() now
Definition: TextException.h:63
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet SP
Definition: CharacterSet.h:94
static const CharacterSet VCHAR
Definition: CharacterSet.h:96
static const CharacterSet LF
Definition: CharacterSet.h:92
const char * name
optional set label for debugging (default: "anonymous")
Definition: CharacterSet.h:72
static const CharacterSet CR
Definition: CharacterSet.h:80
static const CharacterSet OBSTEXT
Definition: CharacterSet.h:111
static const CharacterSet HTAB
Definition: CharacterSet.h:90
Definition: SBuf.h:94
SBuf & append(const SBuf &S)
Definition: SBuf.cc:185
static SBuf parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
Definition: Tokenizer.cc:21
SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0=false)
Definition: Tokenizer.cc:89
SBuf ToSBuf(Args &&... args)
slowly stream-prints all arguments into a freshly allocated SBuf
Definition: Stream.h:63
Definition: parse.c:160

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors