squid : Optimising Web Delivery

Go to the documentation of this file.

 /*
  * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
  
 /* DEBUG: section 24    SBuf */
  
 #include "squid.h"
 #include "debug/Stream.h"
 #include "parser/forward.h"
 #include "parser/Tokenizer.h"
 #include "sbuf/Stream.h"
  
 #include <cctype>
 #include <cerrno>
  
 SBuf
 Parser::Tokenizer::consume(const SBuf::size_type n)
 {
     // careful: n may be npos!
     debugs(24, 5, "consuming " << n << " bytes");
     const SBuf result = buf_.consume(n);
     parsed_ += result.length();
     return result;
 }
  
 SBuf::size_type
 Parser::Tokenizer::success(const SBuf::size_type n)
 {
     return consume(n).length();
 }
  
 SBuf
 Parser::Tokenizer::consumeTrailing(const SBuf::size_type n)
 {
     debugs(24, 5, "consuming " << n << " bytes");
  
     // If n is npos, we consume everything from buf_ (and nothing from result).
     const SBuf::size_type parsed = (n == SBuf::npos) ? buf_.length() : n;
  
     SBuf result = buf_;
     buf_ = result.consume(buf_.length() - parsed);
     parsed_ += parsed;
     return result;
 }
  
 SBuf::size_type
 Parser::Tokenizer::successTrailing(const SBuf::size_type n)
 {
     return consumeTrailing(n).length();
 }
  
 bool
 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
 {
     const Tokenizer saved(*this);
     skipAll(delimiters);
     const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
     if (tokenLen == SBuf::npos) {
         debugs(24, 8, "no token found for delimiters " << delimiters.name);
         *this = saved;
         return false;
     }
     returnedToken = consume(tokenLen); // cannot be empty
     skipAll(delimiters);
     debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
            returnedToken << '\'');
     return true;
 }
  
 bool
 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
     SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
     if (prefixLen == 0) {
         debugs(24, 8, "no prefix for set " << tokenChars.name);
         return false;
     }
     if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
         debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
         return false;
     }
     if (prefixLen == SBuf::npos && limit > 0) {
         debugs(24, 8, "whole haystack matched");
         prefixLen = limit;
     }
     debugs(24, 8, "found with length " << prefixLen);
     returnedToken = consume(prefixLen); // cannot be empty after the npos check
     return true;
 }
  
 SBuf
 Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
     if (atEnd())
         throw InsufficientInput();
  
     SBuf result;
  
     if (!prefix(result, tokenChars, limit))
         throw TexcHere(ToSBuf("cannot parse ", description));
  
     if (atEnd())
         throw InsufficientInput();
  
     return result;
 }
  
 bool
 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
     SBuf span = buf_;
  
     if (limit < buf_.length())
         span.consume(buf_.length() - limit); // ignore the N prefix characters
  
     auto i = span.rbegin();
     SBuf::size_type found = 0;
     while (i != span.rend() && tokenChars[*i]) {
         ++i;
         ++found;
     }
     if (!found)
         return false;
     returnedToken = consumeTrailing(found);
     return true;
 }
  
 SBuf::size_type
 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
 {
     const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
     if (prefixLen == 0) {
         debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
         return 0;
     }
     debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
     return success(prefixLen);
 }
  
 void
 Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
 {
     if (skip(tokenToSkip) || tokenToSkip.isEmpty())
         return;
  
     if (tokenToSkip.startsWith(buf_))
         throw InsufficientInput();
  
     throw TextException(ToSBuf("cannot skip ", description), Here());
 }
  
 bool
 Parser::Tokenizer::skipOne(const CharacterSet &chars)
 {
     if (!buf_.isEmpty() && chars[buf_[0]]) {
         debugs(24, 8, "skipping one-of " << chars.name);
         return success(1);
     }
     debugs(24, 8, "no match while skipping one-of " << chars.name);
     return false;
 }
  
 bool
 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
 {
     if (buf_.length() < tokenToSkip.length())
         return false;
  
     SBuf::size_type offset = 0;
     if (tokenToSkip.length() < buf_.length())
         offset = buf_.length() - tokenToSkip.length();
  
     if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
         debugs(24, 8, "skipping " << tokenToSkip.length());
         return successTrailing(tokenToSkip.length());
     }
     return false;
 }
  
 bool
 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
 {
     if (buf_.startsWith(tokenToSkip)) {
         debugs(24, 8, "skipping " << tokenToSkip.length());
         return success(tokenToSkip.length());
     }
     debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
     return false;
 }
  
 bool
 Parser::Tokenizer::skip(const char tokenChar)
 {
     if (!buf_.isEmpty() && buf_[0] == tokenChar) {
         debugs(24, 8, "skipping char '" << tokenChar << '\'');
         return success(1);
     }
     debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
     return false;
 }
  
 bool
 Parser::Tokenizer::skipOneTrailing(const CharacterSet &skippable)
 {
     if (!buf_.isEmpty() && skippable[buf_[buf_.length()-1]]) {
         debugs(24, 8, "skipping one-of " << skippable.name);
         return successTrailing(1);
     }
     debugs(24, 8, "no match while skipping one-of " << skippable.name);
     return false;
 }
  
 SBuf::size_type
 Parser::Tokenizer::skipAllTrailing(const CharacterSet &skippable)
 {
     const SBuf::size_type prefixEnd = buf_.findLastNotOf(skippable);
     const SBuf::size_type prefixLen = prefixEnd == SBuf::npos ?
                                       0 : (prefixEnd + 1);
     const SBuf::size_type suffixLen = buf_.length() - prefixLen;
     if (suffixLen == 0) {
         debugs(24, 8, "no match when trying to skip " << skippable.name);
         return 0;
     }
     debugs(24, 8, "skipping in " << skippable.name << " len " << suffixLen);
     return successTrailing(suffixLen);
 }
  
 /* reworked from compat/strtoll.c */
 bool
 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
 {
     if (atEnd() || limit == 0)
         return false;
  
     const SBuf range(buf_.substr(0,limit));
  
     // XXX: account for buf_.size()
     bool neg = false;
     const char *s = range.rawContent();
     const char *end = range.rawContent() + range.length();
  
     if (allowSign) {
         if (*s == '-') {
             neg = true;
             ++s;
         } else if (*s == '+') {
             ++s;
         }
         if (s >= end) return false;
     }
     if (( base == 0 || base == 16) && *s == '0' && (s+1 < end ) &&
             tolower(*(s+1)) == 'x') {
         s += 2;
         base = 16;
     }
     if (base == 0) {
         if ( *s == '0') {
             base = 8;
         } else {
             base = 10;
         }
     }
     if (s >= end) return false;
  
     uint64_t cutoff;
  
     cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
     const int cutlim = cutoff % static_cast<int64_t>(base);
     cutoff /= static_cast<uint64_t>(base);
  
     int any = 0, c;
     int64_t acc = 0;
     do {
         c = *s;
         if (xisdigit(c)) {
             c -= '0';
         } else if (xisalpha(c)) {
             c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
         } else {
             break;
         }
         if (c >= base)
             break;
         if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
             any = -1;
         else {
             any = 1;
             acc *= base;
             acc += c;
         }
     } while (++s < end);
  
     if (any == 0) // nothing was parsed
         return false;
     if (any < 0) {
         acc = neg ? INT64_MIN : INT64_MAX;
         errno = ERANGE;
         return false;
     } else if (neg)
         acc = -acc;
  
     result = acc;
     return success(s - range.rawContent());
 }
  
 int64_t
 Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
 {
     if (atEnd())
         throw InsufficientInput();
  
     int64_t result = 0;
  
     // Since we only support unsigned decimals, a parsing failure with a
     // non-empty input always implies invalid/malformed input (or a buggy
     // limit=0 caller). TODO: Support signed and non-decimal integers by
     // refactoring int64() to detect insufficient input.
     if (!int64(result, 10, false, limit))
         throw TexcHere(ToSBuf("cannot parse ", description));
  
     if (atEnd())
         throw InsufficientInput(); // more digits may be coming
  
     return result;
 }
  

Parser::Tokenizer::buf_

SBuf buf_

yet unparsed input

Definition: Tokenizer.h:176

Parser::Tokenizer::prefix

bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)

Definition: Tokenizer.cc:79

Here

#define Here()

source code location of the caller

Definition: Here.h:15

Parser::Tokenizer::skipAll

SBuf::size_type skipAll(const CharacterSet &discardables)

Definition: Tokenizer.cc:137

CharacterSet::name

const char * name

optional set label for debugging (default: "anonymous")

Definition: CharacterSet.h:72

SBuf::rbegin

const_reverse_iterator rbegin() const

Definition: SBuf.h:595

Parser::Tokenizer::skipOneTrailing

bool skipOneTrailing(const CharacterSet &discardables)

Definition: Tokenizer.cc:211

Parser::Tokenizer::success

SBuf::size_type success(const SBuf::size_type n)

convenience method: consume()s up to n bytes and returns their count

Definition: Tokenizer.cc:33

SBuf::isEmpty

bool isEmpty() const

Definition: SBuf.h:435

Parser::Tokenizer::token

bool token(SBuf &returnedToken, const CharacterSet &delimiters)

Definition: Tokenizer.cc:61

INT64_MIN

#define INT64_MIN

Definition: types.h:79

SBuf

Definition: SBuf.h:93

Parser::Tokenizer::skip

bool skip(const SBuf &tokenToSkip)

Definition: Tokenizer.cc:189

SBuf::startsWith

bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive=caseSensitive) const

Definition: SBuf.cc:442

Parser::Tokenizer::skipAllTrailing

SBuf::size_type skipAllTrailing(const CharacterSet &discardables)

Definition: Tokenizer.cc:222

TexcHere

#define TexcHere(msg)

legacy convenience macro; it is not difficult to type Here() now

Definition: TextException.h:63

DBG_DATA

#define DBG_DATA

Definition: Stream.h:40

xisupper

#define xisupper(x)

Definition: xis.h:26

SBuf::rend

const_reverse_iterator rend() const

Definition: SBuf.h:599

Stream.h

xisalpha

#define xisalpha(x)

Definition: xis.h:21

SBuf::rawContent

const char * rawContent() const

Definition: SBuf.cc:509

SBuf::consume

SBuf consume(size_type n=npos)

Definition: SBuf.cc:481

Parser::Tokenizer::consumeTrailing

SBuf consumeTrailing(const SBuf::size_type n)

convenience method: consumes up to n last bytes and returns them

Definition: Tokenizer.cc:40

Tokenizer.h

SBuf::size_type

MemBlob::size_type size_type

Definition: SBuf.h:96

Parser::Tokenizer::parsed_

SBuf::size_type parsed_

bytes successfully parsed, including skipped

Definition: Tokenizer.h:177

xisdigit

#define xisdigit(x)

Definition: xis.h:18

Parser::Tokenizer

Definition: Tokenizer.h:29

SBuf::length

size_type length() const

Returns the number of bytes stored in SBuf.

Definition: SBuf.h:419

SBuf::npos

static const size_type npos

Definition: SBuf.h:100

Parser::Tokenizer::consume

SBuf consume(const SBuf::size_type n)

convenience method: consumes up to n bytes, counts, and returns them

Definition: Tokenizer.cc:22

INT64_MAX

#define INT64_MAX

Definition: types.h:89

Parser::Tokenizer::int64

bool int64(int64_t &result, int base=0, bool allowSign=true, SBuf::size_type limit=SBuf::npos)

Definition: Tokenizer.cc:238

Stream.h

TextException

an std::runtime_error with thrower location info

Definition: TextException.h:20

Parser::Tokenizer::skipRequired

void skipRequired(const char *description, const SBuf &tokenToSkip)

Definition: Tokenizer.cc:149

Parser::Tokenizer::successTrailing

SBuf::size_type successTrailing(const SBuf::size_type n)

convenience method: consumes up to n last bytes and returns their count

Definition: Tokenizer.cc:55

ToSBuf

SBuf ToSBuf(Args &&... args)

slowly stream-prints all arguments into a freshly allocated SBuf

Definition: Stream.h:63

Parser::Tokenizer::skipOne

bool skipOne(const CharacterSet &discardables)

Definition: Tokenizer.cc:161

forward.h

Parser::Tokenizer::skipSuffix

bool skipSuffix(const SBuf &tokenToSkip)

Definition: Tokenizer.cc:172

CharacterSet

optimized set of C chars, with quick membership test and merge support

Definition: CharacterSet.h:17

Parser::Tokenizer::udec64

int64_t udec64(const char *description, SBuf::size_type limit=SBuf::npos)

int64() wrapper but limited to unsigned decimal integers (for now)

Definition: Tokenizer.cc:315

squid.h

Parser::Tokenizer::suffix

bool suffix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)

Definition: Tokenizer.cc:117

debugs

#define debugs(SECTION, LEVEL, CONTENT)

Definition: Stream.h:192

Parser::InsufficientInput

thrown by modern "incremental" parsers when they need more data

Definition: forward.h:18

squid-cache.org

Optimising Web Delivery

Introduction

Documentation

Support

Miscellaneous