RegexData.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
13 */
14
15/* DEBUG: section 28 Access Control */
16
17#include "squid.h"
18#include "acl/Acl.h"
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
21#include "base/RegexPattern.h"
22#include "cache_cf.h"
23#include "ConfigParser.h"
24#include "debug/Stream.h"
25#include "sbuf/Algorithms.h"
26#include "sbuf/List.h"
27#include "sbuf/Stream.h"
28
30
32{
33}
34
35const Acl::Options &
37{
38 static auto MyCaseSensitivityOption = Acl::CaseSensitivityOption();
39 static const Acl::Options MyOptions = { &MyCaseSensitivityOption };
40 MyCaseSensitivityOption.linkWith(&CaseInsensitive_);
41 return MyOptions;
42}
43
44bool
45ACLRegexData::match(char const *word)
46{
47 if (!word)
48 return 0;
49
50 debugs(28, 3, "checking '" << word << "'");
51
52 // walk the list of patterns to see if one matches
53 for (auto &i : data) {
54 if (i.match(word)) {
55 debugs(28, 2, '\'' << i << "' found in '" << word << '\'');
56 // TODO: old code also popped the pattern to second place of the list
57 // in order to reduce patterns search times.
58 return 1;
59 }
60 }
61
62 return 0;
63}
64
67{
68 SBufStream os;
69
70 const RegexPattern *previous = nullptr;
71 for (const auto &i: data) {
72 i.print(os, previous); // skip flags implied by the previous entry
73 previous = &i;
74 }
75
76 return SBufList(1, os.buf());
77}
78
79static const char *
81{
82 if (strcmp(t, ".*") == 0) // we cannot simplify that further
83 return t; // avoid "WARNING: ... Using '.*' instead" below
84
85 char * orig = t;
86
87 if (strncmp(t, "^.*", 3) == 0)
88 t += 3;
89
90 /* NOTE: an initial '.' might seem unnessary but is not;
91 * it can be a valid requirement that cannot be optimised
92 */
93 while (*t == '.' && *(t+1) == '*') {
94 t += 2;
95 }
96
97 if (*t == '\0') {
98 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
99 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
100 return ".*";
101 }
102 if (t != orig) {
103 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
104 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
105 }
106
107 return t;
108}
109
110static void
111compileRE(std::list<RegexPattern> &curlist, const SBuf &RE, int flags)
112{
113 curlist.emplace_back(RE, flags);
114}
115
116static void
117compileREs(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
118{
119 assert(!RE.empty());
120 SBuf regexp;
121 static const SBuf openparen("("), closeparen(")"), separator(")|(");
122 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
123 closeparen);
124 compileRE(curlist, regexp, flags);
125}
126
131static void
132compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl, const int flagsAtLineStart)
133{
134 std::list<RegexPattern> newlist;
135 SBufList accumulatedRE;
136 int numREs = 0, reSize = 0;
137 auto flags = flagsAtLineStart;
138
139 for (const SBuf & configurationLineWord : sl) {
140 static const SBuf minus_i("-i");
141 static const SBuf plus_i("+i");
142 if (configurationLineWord == minus_i) {
143 if (flags & REG_ICASE) {
144 /* optimisation of -i ... -i */
145 debugs(28, 2, "optimisation of -i ... -i" );
146 } else {
147 debugs(28, 2, "-i" );
148 if (!accumulatedRE.empty()) {
149 compileREs(newlist, accumulatedRE, flags);
150 accumulatedRE.clear();
151 reSize = 0;
152 }
153 flags |= REG_ICASE;
154 }
155 continue;
156 } else if (configurationLineWord == plus_i) {
157 if ((flags & REG_ICASE) == 0) {
158 /* optimisation of +i ... +i */
159 debugs(28, 2, "optimisation of +i ... +i");
160 } else {
161 debugs(28, 2, "+i");
162 if (!accumulatedRE.empty()) {
163 compileREs(newlist, accumulatedRE, flags);
164 accumulatedRE.clear();
165 reSize = 0;
166 }
167 flags &= ~REG_ICASE;
168 }
169 continue;
170 }
171
172 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
173 accumulatedRE.push_back(configurationLineWord);
174 ++numREs;
175 reSize += configurationLineWord.length();
176
177 if (reSize > 1024) { // must be < BUFSIZ everything included
178 debugs(28, 2, "buffer full, generating new optimised RE..." );
179 compileREs(newlist, accumulatedRE, flags);
180 accumulatedRE.clear();
181 reSize = 0;
182 continue; /* do the loop again to add the RE to largeRE */
183 }
184 }
185
186 if (!accumulatedRE.empty()) {
187 compileREs(newlist, accumulatedRE, flags);
188 accumulatedRE.clear();
189 reSize = 0;
190 }
191
192 /* all was successful, so put the new list at the tail */
193 curlist.splice(curlist.end(), newlist);
194
195 debugs(28, 2, numREs << " REs are optimised into one RE.");
196 if (numREs > 100) {
198 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
199 "Consider using less REs or use rules without expressions like 'dstdomain'.");
200 }
201}
202
203static void
204compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl, const int flagsAtLineStart)
205{
206 auto flags = flagsAtLineStart;
207
208 static const SBuf minus_i("-i"), plus_i("+i");
209 for (const auto &configurationLineWord: sl) {
210 if (configurationLineWord == minus_i) {
211 flags |= REG_ICASE;
212 } else if (configurationLineWord == plus_i) {
213 flags &= ~REG_ICASE;
214 } else {
215 compileRE(curlist, configurationLineWord, flags);
216 }
217 }
218}
219
220void
222{
223 debugs(28, 2, "new Regex line or file");
224
225 int flagsAtLineStart = REG_EXTENDED | REG_NOSUB;
227 flagsAtLineStart |= REG_ICASE;
228
229 SBufList sl;
230 while (char *t = ConfigParser::RegexStrtokFile()) {
231 const char *clean = removeUnnecessaryWildcards(t);
232 debugs(28, 3, "buffering RE '" << clean << "'");
233 sl.emplace_back(clean);
234 }
235
236 try {
237 // ignore the danger of merging invalid REs into a valid "optimized" RE
238 compileOptimisedREs(data, sl, flagsAtLineStart);
239 } catch (...) {
240 compileUnoptimisedREs(data, sl, flagsAtLineStart);
241 // Delay compileOptimisedREs() failure reporting until we know that
242 // compileUnoptimisedREs() above have succeeded. If
243 // compileUnoptimisedREs() also fails, then the compileOptimisedREs()
244 // exception caught earlier was probably not related to _optimization_
245 // (and we do not want to report the same RE compilation problem twice).
246 debugs(28, DBG_IMPORTANT, "WARNING: Failed to optimize a set of regular expressions; will use them as-is instead;" <<
247 Debug::Extra << "configuration: " << cfg_filename << " line " << config_lineno << ": " << config_input_line <<
248 Debug::Extra << "optimization error: " << CurrentException);
249 }
250}
251
252bool
254{
255 return data.empty();
256}
257
SBuf & JoinContainerIntoSBuf(SBuf &dest, const ContainerIterator &begin, const ContainerIterator &end, const SBuf &separator, const SBuf &prefix=SBuf(), const SBuf &suffix=SBuf())
Definition: Algorithms.h:68
#define REG_ICASE
Definition: GnuRegex.h:230
#define REG_EXTENDED
Definition: GnuRegex.h:226
#define REG_NOSUB
Definition: GnuRegex.h:239
static void compileOptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl, const int flagsAtLineStart)
Definition: RegexData.cc:132
static void compileUnoptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl, const int flagsAtLineStart)
Definition: RegexData.cc:204
static void compileREs(std::list< RegexPattern > &curlist, const SBufList &RE, int flags)
Definition: RegexData.cc:117
static void compileRE(std::list< RegexPattern > &curlist, const SBuf &RE, int flags)
Definition: RegexData.cc:111
static const char * removeUnnecessaryWildcards(char *t)
Definition: RegexData.cc:80
std::ostream & CurrentException(std::ostream &os)
prints active (i.e., thrown but not yet handled) exception
#define assert(EX)
Definition: assert.h:19
char config_input_line[BUFSIZ]
Definition: cache_cf.cc:273
const char * cfg_filename
Definition: cache_cf.cc:271
int config_lineno
Definition: cache_cf.cc:272
std::list< RegexPattern > data
Definition: RegexData.h:36
virtual ~ACLRegexData()
Definition: RegexData.cc:31
static Acl::BooleanOptionValue CaseInsensitive_
whether parse() is called in a case insensitive context
Definition: RegexData.h:31
virtual const Acl::Options & lineOptions()
supported ACL "line" options (e.g., "-i")
Definition: RegexData.cc:36
virtual void parse()
Definition: RegexData.cc:221
virtual SBufList dump() const
Definition: RegexData.cc:66
virtual bool match(char const *user)
Definition: RegexData.cc:45
virtual bool empty() const
Definition: RegexData.cc:253
static char * RegexStrtokFile()
static std::ostream & Extra(std::ostream &os)
prefixes each grouped debugs() line after the first one in the group
Definition: Stream.h:117
void print(std::ostream &os, const RegexPattern *previous=nullptr) const
Definition: RegexPattern.cc:42
SBuf buf()
bytes written so far
Definition: Stream.h:41
Definition: SBuf.h:94
const_iterator begin() const
Definition: SBuf.h:583
#define DBG_IMPORTANT
Definition: Stream.h:41
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:196
int opt_parse_cfg_only
const BooleanOption & CaseSensitivityOption()
Definition: Options.cc:241
std::vector< const Option * > Options
Definition: Options.h:214
OptionValue< bool > BooleanOptionValue
Definition: Options.h:200
std::list< SBuf > SBufList
Definition: forward.h:23

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors