testTokenizer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 #include "parser/Tokenizer.h"
12 #include "tests/testTokenizer.h"
13 #include "unitTestMain.h"
14 
16 
17 SBuf text("GET http://resource.com/path HTTP/1.1\r\n"
18  "Host: resource.com\r\n"
19  "Cookie: laijkpk3422r j1noin \r\n"
20  "\r\n");
21 const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
22 const CharacterSet whitespace("whitespace"," \r\n");
23 const CharacterSet crlf("crlf","\r\n");
24 const CharacterSet tab("tab","\t");
25 const CharacterSet numbers("numbers","0123456789");
26 
27 void
29 {
30  const SBuf canary("This text should not be changed.");
31 
33  SBuf s;
34 
36  all += alpha;
37  all += crlf;
38  all += numbers;
39  all.add(':').add('.').add('/');
40 
41  // an empty prefix should return false (the full output buffer case)
42  s = canary;
43  const SBuf before = t.remaining();
44  CPPUNIT_ASSERT(!t.prefix(s, all, 0));
45  // ... and a false return value means no parameter changes
46  CPPUNIT_ASSERT_EQUAL(canary, s);
47  // ... and a false return value means no input buffer changes
48  CPPUNIT_ASSERT_EQUAL(before, t.remaining());
49 
50  // successful prefix tokenization
51  CPPUNIT_ASSERT(t.prefix(s,alpha));
52  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
53  CPPUNIT_ASSERT(t.prefix(s,whitespace));
54  CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
55 
56  //no match (first char is not in the prefix set)
57  CPPUNIT_ASSERT(!t.prefix(s,whitespace));
58  CPPUNIT_ASSERT_EQUAL(SBuf(" "),s);
59 
60  // one more match to set S to something meaningful
61  CPPUNIT_ASSERT(t.prefix(s,alpha));
62  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
63 
64  //no match (no characters from the character set in the prefix)
65  CPPUNIT_ASSERT(!t.prefix(s,tab));
66  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched
67 
68  // match until the end of the sample
69  CPPUNIT_ASSERT(t.prefix(s,all));
70  CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining());
71 
72  // empty prefix should return false (the empty input buffer case)
73  s = canary;
74  CPPUNIT_ASSERT(!t.prefix(s, all));
75  // ... and a false return value means no parameter changes
76  CPPUNIT_ASSERT_EQUAL(canary, s);
77 }
78 
79 void
81 {
83  SBuf s;
84 
85  // first scenario: patterns match
86  // prep for test
87  CPPUNIT_ASSERT(t.prefix(s,alpha));
88  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
89 
90  // test skipping one character from a character set
91  CPPUNIT_ASSERT(t.skipOne(whitespace));
92  // check that skip was right
93  CPPUNIT_ASSERT(t.prefix(s,alpha));
94  CPPUNIT_ASSERT_EQUAL(SBuf("http"),s);
95 
96  //check skip prefix
97  CPPUNIT_ASSERT(t.skip(SBuf("://")));
98  // verify
99  CPPUNIT_ASSERT(t.prefix(s,alpha));
100  CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s);
101 
102  // no skip
103  CPPUNIT_ASSERT(!t.skipOne(alpha));
104  CPPUNIT_ASSERT(!t.skip(SBuf("://")));
105  CPPUNIT_ASSERT(!t.skip('a'));
106 
107  // test skipping all characters from a character set while looking at .com
108  CPPUNIT_ASSERT(t.skip('.'));
109  CPPUNIT_ASSERT_EQUAL(static_cast<SBuf::size_type>(3), t.skipAll(alpha));
110  CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path")));
111 }
112 
113 void
115 {
117  SBuf s;
118 
119  // first scenario: patterns match
120  CPPUNIT_ASSERT(t.token(s,whitespace));
121  CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s);
122  CPPUNIT_ASSERT(t.token(s,whitespace));
123  CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s);
124  CPPUNIT_ASSERT(t.token(s,whitespace));
125  CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s);
126  CPPUNIT_ASSERT(t.token(s,whitespace));
127  CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s);
128 
129 }
130 
131 void
133 {
134  const SBuf canary("This text should not be changed.");
135 
137  SBuf s;
138 
140  all += alpha;
141  all += crlf;
142  all += numbers;
143  all.add(':').add('.').add('/');
144 
145  // an empty suffix should return false (the full output buffer case)
146  s = canary;
147  const SBuf before = t.remaining();
148  CPPUNIT_ASSERT(!t.suffix(s, all, 0));
149  // ... and a false return value means no parameter changes
150  CPPUNIT_ASSERT_EQUAL(canary, s);
151  // ... and a false return value means no input buffer changes
152  CPPUNIT_ASSERT_EQUAL(before, t.remaining());
153 
154  // consume suffix until the last CRLF, including that last CRLF
155  SBuf::size_type remaining = t.remaining().length();
156  while (t.remaining().findLastOf(crlf) != SBuf::npos) {
157  CPPUNIT_ASSERT(t.remaining().length() > 0);
158  CPPUNIT_ASSERT(t.skipOneTrailing(all));
159  // ensure steady progress
160  CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1);
161  --remaining;
162  }
163 
164  // no match (last char is not in the suffix set)
165  CPPUNIT_ASSERT(!t.suffix(s, crlf));
166  CPPUNIT_ASSERT(!t.suffix(s, whitespace));
167 
168  // successful suffix tokenization
169  CPPUNIT_ASSERT(t.suffix(s, numbers));
170  CPPUNIT_ASSERT_EQUAL(SBuf("1"), s);
171  CPPUNIT_ASSERT(t.skipSuffix(SBuf("1.")));
172  CPPUNIT_ASSERT(t.skipSuffix(SBuf("/")));
173  CPPUNIT_ASSERT(t.suffix(s, alpha));
174  CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s);
175  CPPUNIT_ASSERT(t.suffix(s, whitespace));
176  CPPUNIT_ASSERT_EQUAL(SBuf(" "), s);
177 
178  // match until the end of the sample
179  CPPUNIT_ASSERT(t.suffix(s, all));
180  CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining());
181 
182  // an empty buffer does not end with a token
183  s = canary;
184  CPPUNIT_ASSERT(!t.suffix(s, all));
185  CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes
186 
187  // we cannot skip an empty suffix, even in an empty buffer
188  CPPUNIT_ASSERT(!t.skipSuffix(SBuf()));
189 }
190 
191 void
193 {
194 
195 }
196 
197 void
199 {
200  // successful parse in base 10
201  {
202  int64_t rv;
203  Parser::Tokenizer t(SBuf("1234"));
204  const int64_t benchmark = 1234;
205  CPPUNIT_ASSERT(t.int64(rv, 10));
206  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
207  CPPUNIT_ASSERT(t.buf().isEmpty());
208  }
209 
210  // successful parse, autodetect base
211  {
212  int64_t rv;
213  Parser::Tokenizer t(SBuf("1234"));
214  const int64_t benchmark = 1234;
215  CPPUNIT_ASSERT(t.int64(rv));
216  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
217  CPPUNIT_ASSERT(t.buf().isEmpty());
218  }
219 
220  // successful parse, autodetect base
221  {
222  int64_t rv;
223  Parser::Tokenizer t(SBuf("01234"));
224  const int64_t benchmark = 01234;
225  CPPUNIT_ASSERT(t.int64(rv));
226  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
227  CPPUNIT_ASSERT(t.buf().isEmpty());
228  }
229 
230  // successful parse, autodetect base
231  {
232  int64_t rv;
233  Parser::Tokenizer t(SBuf("0x12f4"));
234  const int64_t benchmark = 0x12f4;
235  CPPUNIT_ASSERT(t.int64(rv));
236  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
237  CPPUNIT_ASSERT(t.buf().isEmpty());
238  }
239 
240  // API mismatch: don't eat leading space
241  {
242  int64_t rv;
243  Parser::Tokenizer t(SBuf(" 1234"));
244  CPPUNIT_ASSERT(!t.int64(rv));
245  CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
246  }
247 
248  // API mismatch: don't eat multiple leading spaces
249  {
250  int64_t rv;
251  Parser::Tokenizer t(SBuf(" 1234"));
252  CPPUNIT_ASSERT(!t.int64(rv));
253  CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
254  }
255 
256  // trailing spaces
257  {
258  int64_t rv;
259  Parser::Tokenizer t(SBuf("1234 foo"));
260  const int64_t benchmark = 1234;
261  CPPUNIT_ASSERT(t.int64(rv));
262  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
263  CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf());
264  }
265 
266  // trailing nonspaces
267  {
268  int64_t rv;
269  Parser::Tokenizer t(SBuf("1234foo"));
270  const int64_t benchmark = 1234;
271  CPPUNIT_ASSERT(t.int64(rv));
272  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
273  CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf());
274  }
275 
276  // trailing nonspaces
277  {
278  int64_t rv;
279  Parser::Tokenizer t(SBuf("0x1234foo"));
280  const int64_t benchmark = 0x1234f;
281  CPPUNIT_ASSERT(t.int64(rv));
282  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
283  CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf());
284  }
285 
286  // overflow
287  {
288  int64_t rv;
289  Parser::Tokenizer t(SBuf("1029397752385698678762234"));
290  CPPUNIT_ASSERT(!t.int64(rv));
291  CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf());
292  }
293 
294  // buffered sub-string parsing
295  {
296  int64_t rv;
297  SBuf base("1029397752385698678762234");
298  const int64_t benchmark = 22;
299  Parser::Tokenizer t(base.substr(base.length()-4,2));
300  CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf());
301  CPPUNIT_ASSERT(t.int64(rv));
302  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
303  CPPUNIT_ASSERT(t.buf().isEmpty());
304  }
305 
306  // base-16, prefix
307  {
308  int64_t rv;
309  SBuf base("deadbeefrow");
310  const int64_t benchmark=0xdeadbeef;
311  Parser::Tokenizer t(base);
312  CPPUNIT_ASSERT(t.int64(rv,16));
313  CPPUNIT_ASSERT_EQUAL(benchmark,rv);
314  CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf());
315 
316  }
317 }
318 
Definition: SBuf.h:87
SBuf text("GET http://resource.com/path HTTP/1.1\r\n""Host: resource.com\r\n""Cookie: laijkpk3422r j1noin \r\n""\r\n")
CPPUNIT_TEST_SUITE_REGISTRATION(testTokenizer)
bool isEmpty() const
Definition: SBuf.h:422
bool skipSuffix(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:143
CharacterSet & add(const unsigned char c)
add a given character to the character set
Definition: CharacterSet.cc:47
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:405
void testTokenizerSuffix()
void testTokenizerPrefix()
const CharacterSet whitespace("whitespace"," \r\n")
bool skipOneTrailing(const CharacterSet &discardables)
Definition: Tokenizer.cc:182
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
bool skipOne(const CharacterSet &discardables)
Definition: Tokenizer.cc:132
const CharacterSet numbers("numbers","0123456789")
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:160
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:79
bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:100
void testTokenizerToken()
void testCharacterSet()
const CharacterSet tab("tab","\t")
bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const
Definition: SBuf.cc:472
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition: Tokenizer.h:44
static const size_type npos
Definition: SBuf.h:93
SBuf::size_type skipAll(const CharacterSet &discardables)
Definition: Tokenizer.cc:120
void testTokenizerSkip()
size_type findLastOf(const CharacterSet &set, size_type endPos=npos) const
Definition: SBuf.cc:799
MemBlob::size_type size_type
Definition: SBuf.h:90
const CharacterSet crlf("crlf","\r\n")
Definition: Elements.cc:12
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:606
SBuf buf() const
yet unparsed data
Definition: Tokenizer.h:35
void testTokenizerInt64()
bool token(SBuf &returnedToken, const CharacterSet &delimiters)
Definition: Tokenizer.cc:61
bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:209

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors