testTokenizer.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
10#include "base/CharacterSet.h"
11#include "parser/Tokenizer.h"
12#include "tests/testTokenizer.h"
13#include "unitTestMain.h"
14
16
17SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
18 "Host: resource.com\r\n"
19 "Cookie: laijkpk3422r j1noin \r\n"
20 "\r\n");
21const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22const CharacterSet whitespace("whitespace"," \r\n");
23const CharacterSet crlf("crlf","\r\n");
24const CharacterSet tab("tab","\t");
25const CharacterSet numbers("numbers","0123456789");
26
27void
29{
30 const SBuf canary("This text should not be changed.");
31
33 SBuf s;
34
36 all += alpha;
37 all += crlf;
38 all += numbers;
39 all.add(':').add('.').add('/');
40
41 // an empty prefix should return false (the full output buffer case)
42 s = canary;
43 const SBuf before = t.remaining();
44 CPPUNIT_ASSERT(!t.prefix(s, all, 0));
45 // ... and a false return value means no parameter changes
46 CPPUNIT_ASSERT_EQUAL(canary, s);
47 // ... and a false return value means no input buffer changes
48 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
49
50 // successful prefix tokenization
51 CPPUNIT_ASSERT(t.prefix(s,alpha));
52 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
53 CPPUNIT_ASSERT(t.prefix(s,whitespace));
54 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
55
56 //no match (first char is not in the prefix set)
57 CPPUNIT_ASSERT(!t.prefix(s,whitespace));
58 CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
59
60 // one more match to set S to something meaningful
61 CPPUNIT_ASSERT(t.prefix(s,alpha));
62 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
63
64 //no match (no characters from the character set in the prefix)
65 CPPUNIT_ASSERT(!t.prefix(s,tab));
66 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
67
68 // match until the end of the sample
69 CPPUNIT_ASSERT(t.prefix(s,all));
70 CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
71
72 // empty prefix should return false (the empty input buffer case)
73 s = canary;
74 CPPUNIT_ASSERT(!t.prefix(s, all));
75 // ... and a false return value means no parameter changes
76 CPPUNIT_ASSERT_EQUAL(canary, s);
77}
78
79void
81{
83 SBuf s;
84
85 // first scenario: patterns match
86 // prep for test
87 CPPUNIT_ASSERT(t.prefix(s,alpha));
88 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
89
90 // test skipping one character from a character set
91 CPPUNIT_ASSERT(t.skipOne(whitespace));
92 // check that skip was right
93 CPPUNIT_ASSERT(t.prefix(s,alpha));
94 CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
95
96 //check skip prefix
97 CPPUNIT_ASSERT(t.skip(SBuf("://")));
98 // verify
99 CPPUNIT_ASSERT(t.prefix(s,alpha));
100 CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
101
102 // no skip
103 CPPUNIT_ASSERT(!t.skipOne(alpha));
104 CPPUNIT_ASSERT(!t.skip(SBuf("://")));
105 CPPUNIT_ASSERT(!t.skip('a'));
106
107 // test skipping all characters from a character set while looking at .com
108 CPPUNIT_ASSERT(t.skip('.'));
109 CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
110 CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
111}
112
113void
115{
117 SBuf s;
118
119 // first scenario: patterns match
120 CPPUNIT_ASSERT(t.token(s,whitespace));
121 CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
122 CPPUNIT_ASSERT(t.token(s,whitespace));
123 CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
124 CPPUNIT_ASSERT(t.token(s,whitespace));
125 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
126 CPPUNIT_ASSERT(t.token(s,whitespace));
127 CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
128
129}
130
131void
133{
134 const SBuf canary("This text should not be changed.");
135
137 SBuf s;
138
140 all += alpha;
141 all += crlf;
142 all += numbers;
143 all.add(':').add('.').add('/');
144
145 // an empty suffix should return false (the full output buffer case)
146 s = canary;
147 const SBuf before = t.remaining();
148 CPPUNIT_ASSERT(!t.suffix(s, all, 0));
149 // ... and a false return value means no parameter changes
150 CPPUNIT_ASSERT_EQUAL(canary, s);
151 // ... and a false return value means no input buffer changes
152 CPPUNIT_ASSERT_EQUAL(before, t.remaining());
153
154 // consume suffix until the last CRLF, including that last CRLF
155 SBuf::size_type remaining = t.remaining().length();
156 while (t.remaining().findLastOf(crlf) != SBuf::npos) {
157 CPPUNIT_ASSERT(t.remaining().length() > 0);
158 CPPUNIT_ASSERT(t.skipOneTrailing(all));
159 // ensure steady progress
160 CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
161 --remaining;
162 }
163
164 // no match (last char is not in the suffix set)
165 CPPUNIT_ASSERT(!t.suffix(s, crlf));
166 CPPUNIT_ASSERT(!t.suffix(s, whitespace));
167
168 // successful suffix tokenization
169 CPPUNIT_ASSERT(t.suffix(s, numbers));
170 CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
171 CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
172 CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
173 CPPUNIT_ASSERT(t.suffix(s, alpha));
174 CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
175 CPPUNIT_ASSERT(t.suffix(s, whitespace));
176 CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
177
178 // match until the end of the sample
179 CPPUNIT_ASSERT(t.suffix(s, all));
180 CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
181
182 // an empty buffer does not end with a token
183 s = canary;
184 CPPUNIT_ASSERT(!t.suffix(s, all));
185 CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
186
187 // we cannot skip an empty suffix, even in an empty buffer
188 CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
189}
190
191void
193{
194
195}
196
197void
199{
200 // successful parse in base 10
201 {
202 int64_t rv;
203 Parser::Tokenizer t(SBuf("1234"));
204 const int64_t benchmark = 1234;
205 CPPUNIT_ASSERT(t.int64(rv, 10));
206 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
207 CPPUNIT_ASSERT(t.buf().isEmpty());
208 }
209
210 // successful parse, autodetect base
211 {
212 int64_t rv;
213 Parser::Tokenizer t(SBuf("1234"));
214 const int64_t benchmark = 1234;
215 CPPUNIT_ASSERT(t.int64(rv));
216 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
217 CPPUNIT_ASSERT(t.buf().isEmpty());
218 }
219
220 // successful parse, autodetect base
221 {
222 int64_t rv;
223 Parser::Tokenizer t(SBuf("01234"));
224 const int64_t benchmark = 01234;
225 CPPUNIT_ASSERT(t.int64(rv));
226 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
227 CPPUNIT_ASSERT(t.buf().isEmpty());
228 }
229
230 // successful parse, autodetect base
231 {
232 int64_t rv;
233 Parser::Tokenizer t(SBuf("0x12f4"));
234 const int64_t benchmark = 0x12f4;
235 CPPUNIT_ASSERT(t.int64(rv));
236 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
237 CPPUNIT_ASSERT(t.buf().isEmpty());
238 }
239
240 // API mismatch: don't eat leading space
241 {
242 int64_t rv;
243 Parser::Tokenizer t(SBuf(" 1234"));
244 CPPUNIT_ASSERT(!t.int64(rv));
245 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
246 }
247
248 // API mismatch: don't eat multiple leading spaces
249 {
250 int64_t rv;
251 Parser::Tokenizer t(SBuf(" 1234"));
252 CPPUNIT_ASSERT(!t.int64(rv));
253 CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
254 }
255
256 // trailing spaces
257 {
258 int64_t rv;
259 Parser::Tokenizer t(SBuf("1234 foo"));
260 const int64_t benchmark = 1234;
261 CPPUNIT_ASSERT(t.int64(rv));
262 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
263 CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
264 }
265
266 // trailing nonspaces
267 {
268 int64_t rv;
269 Parser::Tokenizer t(SBuf("1234foo"));
270 const int64_t benchmark = 1234;
271 CPPUNIT_ASSERT(t.int64(rv));
272 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
273 CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
274 }
275
276 // trailing nonspaces
277 {
278 int64_t rv;
279 Parser::Tokenizer t(SBuf("0x1234foo"));
280 const int64_t benchmark = 0x1234f;
281 CPPUNIT_ASSERT(t.int64(rv));
282 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
283 CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
284 }
285
286 // overflow
287 {
288 int64_t rv;
289 Parser::Tokenizer t(SBuf("1029397752385698678762234"));
290 CPPUNIT_ASSERT(!t.int64(rv));
291 CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
292 }
293
294 // buffered sub-string parsing
295 {
296 int64_t rv;
297 SBuf base("1029397752385698678762234");
298 const int64_t benchmark = 22;
299 Parser::Tokenizer t(base.substr(base.length()-4,2));
300 CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
301 CPPUNIT_ASSERT(t.int64(rv));
302 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
303 CPPUNIT_ASSERT(t.buf().isEmpty());
304 }
305
306 // base-16, prefix
307 {
308 int64_t rv;
309 SBuf base("deadbeefrow");
310 const int64_t benchmark=0xdeadbeef;
311 Parser::Tokenizer t(base);
312 CPPUNIT_ASSERT(t.int64(rv,16));
313 CPPUNIT_ASSERT_EQUAL(benchmark,rv);
314 CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
315
316 }
317}
318
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
CharacterSet & add(const unsigned char c)
add a given character to the character set
Definition: CharacterSet.cc:47
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:81
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:119
bool skipOne(const CharacterSet &discardables)
Definition: Tokenizer.cc:151
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition: Tokenizer.cc:63
bool skipSuffix(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:162
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition: Tokenizer.cc:139
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:228
SBuf buf() const
yet unparsed data
Definition: Tokenizer.h:35
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition: Tokenizer.h:44
bool skipOneTrailing(const CharacterSet &discardables)
Definition: Tokenizer.cc:201
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:179
Definition: SBuf.h:94
static const size_type npos
Definition: SBuf.h:99
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:415
bool isEmpty() const
Definition: SBuf.h:431
size_type findLastOf(const CharacterSet &set, size_type endPos=npos) const
Definition: SBuf.cc:769
bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const
Definition: SBuf.cc:442
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
MemBlob::size_type size_type
Definition: SBuf.h:96
void testTokenizerSkip()
void testTokenizerToken()
void testTokenizerInt64()
void testCharacterSet()
void testTokenizerSuffix()
void testTokenizerPrefix()
const CharacterSet tab("tab","\t")
SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Host: resource.com\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n")
const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
const CharacterSet crlf("crlf","\r\n")
CPPUNIT_TEST_SUITE_REGISTRATION(testTokenizer)
const CharacterSet whitespace("whitespace"," \r\n")
const CharacterSet numbers("numbers","0123456789")

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors