RegexData.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /*
10  * Portions of this code are copyrighted and released under GPLv2+ by:
11  * Copyright (c) 2011, Marcus Kool
12  * Please add new claims to the CONTRIBUTORS file instead.
13  */
14 
15 /* DEBUG: section 28 Access Control */
16 
17 #include "squid.h"
18 #include "acl/Acl.h"
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "base/RegexPattern.h"
22 #include "ConfigParser.h"
23 #include "Debug.h"
24 #include "sbuf/Algorithms.h"
25 #include "sbuf/List.h"
26 
28 {
29 }
30 
31 const Acl::ParameterFlags &
33 {
34  static const Acl::ParameterFlags flags = { "-i", "+i" };
35  return flags;
36 }
37 
38 bool
39 ACLRegexData::match(char const *word)
40 {
41  if (!word)
42  return 0;
43 
44  debugs(28, 3, "checking '" << word << "'");
45 
46  // walk the list of patterns to see if one matches
47  for (auto &i : data) {
48  if (i.match(word)) {
49  debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
50  // TODO: old code also popped the pattern to second place of the list
51  // in order to reduce patterns search times.
52  return 1;
53  }
54  }
55 
56  return 0;
57 }
58 
61 {
62  SBufList sl;
63  int flags = REG_EXTENDED | REG_NOSUB;
64 
65  // walk and dump the list
66  // keeping the flags values consistent
67  for (auto &i : data) {
68  if (i.flags != flags) {
69  if ((i.flags&REG_ICASE) != 0) {
70  sl.emplace_back("-i");
71  } else {
72  sl.emplace_back("+i");
73  }
74  flags = i.flags;
75  }
76 
77  sl.emplace_back(i.c_str());
78  }
79 
80  return sl;
81 }
82 
83 static const char *
85 {
86  char * orig = t;
87 
88  if (strncmp(t, "^.*", 3) == 0)
89  t += 3;
90 
91  /* NOTE: an initial '.' might seem unnessary but is not;
92  * it can be a valid requirement that cannot be optimised
93  */
94  while (*t == '.' && *(t+1) == '*') {
95  t += 2;
96  }
97 
98  if (*t == '\0') {
99  debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
100  debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
101  return ".*";
102  }
103  if (t != orig) {
104  debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
105  debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
106  }
107 
108  return t;
109 }
110 
111 static bool
112 compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
113 {
114  if (RE == NULL || *RE == '\0')
115  return curlist.empty(); // XXX: old code did this. It looks wrong.
116 
117  regex_t comp;
118  if (int errcode = regcomp(&comp, RE, flags)) {
119  char errbuf[256];
120  regerror(errcode, &comp, errbuf, sizeof errbuf);
121  debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
122  debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
123  return false;
124  }
125  debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
126 
127  curlist.emplace_back(flags, RE);
128  curlist.back().regex = comp;
129 
130  return true;
131 }
132 
133 static bool
134 compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
135 {
136  if (RE.empty())
137  return curlist.empty(); // XXX: old code did this. It looks wrong.
138  SBuf regexp;
139  static const SBuf openparen("("), closeparen(")"), separator(")|(");
140  JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
141  closeparen);
142  return compileRE(curlist, regexp.c_str(), flags);
143 }
144 
149 static int
150 compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
151 {
152  std::list<RegexPattern> newlist;
153  SBufList accumulatedRE;
154  int numREs = 0, reSize = 0;
155  int flags = REG_EXTENDED | REG_NOSUB;
156 
157  for (const SBuf & configurationLineWord : sl) {
158  static const SBuf minus_i("-i");
159  static const SBuf plus_i("+i");
160  if (configurationLineWord == minus_i) {
161  if (flags & REG_ICASE) {
162  /* optimisation of -i ... -i */
163  debugs(28, 2, "optimisation of -i ... -i" );
164  } else {
165  debugs(28, 2, "-i" );
166  if (!compileRE(newlist, accumulatedRE, flags))
167  return 0;
168  flags |= REG_ICASE;
169  accumulatedRE.clear();
170  reSize = 0;
171  }
172  continue;
173  } else if (configurationLineWord == plus_i) {
174  if ((flags & REG_ICASE) == 0) {
175  /* optimisation of +i ... +i */
176  debugs(28, 2, "optimisation of +i ... +i");
177  } else {
178  debugs(28, 2, "+i");
179  if (!compileRE(newlist, accumulatedRE, flags))
180  return 0;
181  flags &= ~REG_ICASE;
182  accumulatedRE.clear();
183  reSize = 0;
184  }
185  continue;
186  }
187 
188  debugs(28, 2, "adding RE '" << configurationLineWord << "'");
189  accumulatedRE.push_back(configurationLineWord);
190  ++numREs;
191  reSize += configurationLineWord.length();
192 
193  if (reSize > 1024) { // must be < BUFSIZ everything included
194  debugs(28, 2, "buffer full, generating new optimised RE..." );
195  if (!compileRE(newlist, accumulatedRE, flags))
196  return 0;
197  accumulatedRE.clear();
198  reSize = 0;
199  continue; /* do the loop again to add the RE to largeRE */
200  }
201  }
202 
203  if (!compileRE(newlist, accumulatedRE, flags))
204  return 0;
205 
206  accumulatedRE.clear();
207  reSize = 0;
208 
209  /* all was successful, so put the new list at the tail */
210  curlist.splice(curlist.end(), newlist);
211 
212  debugs(28, 2, numREs << " REs are optimised into one RE.");
213  if (numREs > 100) {
215  debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
216  "Consider using less REs or use rules without expressions like 'dstdomain'.");
217  }
218 
219  return 1;
220 }
221 
222 static void
223 compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
224 {
225  int flags = REG_EXTENDED | REG_NOSUB;
226 
227  static const SBuf minus_i("-i"), plus_i("+i");
228  for (auto configurationLineWord : sl) {
229  if (configurationLineWord == minus_i) {
230  flags |= REG_ICASE;
231  } else if (configurationLineWord == plus_i) {
232  flags &= ~REG_ICASE;
233  } else {
234  if (!compileRE(curlist, configurationLineWord.c_str(), flags))
235  debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
236  "Compile failed: '" << configurationLineWord << "'");
237  }
238  }
239 }
240 
241 void
243 {
244  debugs(28, 2, "new Regex line or file");
245 
246  SBufList sl;
247  while (char *t = ConfigParser::RegexStrtokFile()) {
248  const char *clean = removeUnnecessaryWildcards(t);
249  if (strlen(clean) > BUFSIZ-1) {
250  debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
251  debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
252  } else {
253  debugs(28, 3, "buffering RE '" << clean << "'");
254  sl.emplace_back(clean);
255  }
256  }
257 
258  if (!compileOptimisedREs(data, sl)) {
259  debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
261  }
262 }
263 
264 bool
266 {
267  return data.empty();
268 }
269 
272 {
273  /* Regex's don't clone yet. */
274  assert(data.empty());
275  return new ACLRegexData;
276 }
277 
virtual bool match(char const *user)
Definition: RegexData.cc:39
static int compileOptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl)
Definition: RegexData.cc:150
int regcomp(regex_t *preg, const char *pattern, int cflags)
Definition: GnuRegex.c:4120
static bool compileRE(std::list< RegexPattern > &curlist, const char *RE, int flags)
Definition: RegexData.cc:112
#define assert(EX)
Definition: assert.h:17
Definition: SBuf.h:87
int i
Definition: membanger.c:49
static void compileUnoptimisedREs(std::list< RegexPattern > &curlist, const SBufList &sl)
Definition: RegexData.cc:223
virtual SBufList dump() const
Definition: RegexData.cc:60
int opt_parse_cfg_only
static const char * removeUnnecessaryWildcards(char *t)
Definition: RegexData.cc:84
static char * RegexStrtokFile()
#define DBG_CRITICAL
Definition: Debug.h:44
#define REG_EXTENDED
Definition: GnuRegex.h:226
#define REG_ICASE
Definition: GnuRegex.h:230
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:123
std::list< RegexPattern > data
Definition: RegexData.h:32
int config_lineno
#define DBG_IMPORTANT
Definition: Debug.h:45
const char * cfg_filename
virtual void parse()
Definition: RegexData.cc:242
const char * c_str()
Definition: SBuf.cc:546
virtual bool empty() const
Definition: RegexData.cc:265
virtual ~ACLRegexData()
Definition: RegexData.cc:27
#define BUFSIZ
Definition: defines.h:20
std::list< SBuf > SBufList
Definition: forward.h:26
std::set< OptionName, OptionNameCmp > ParameterFlags
a set of parameter flag names
Definition: Options.h:162
char config_input_line[BUFSIZ]
const_iterator begin() const
Definition: SBuf.h:574
SBuf & JoinContainerIntoSBuf(SBuf &dest, const ContainerIterator &begin, const ContainerIterator &end, const SBuf &separator, const SBuf &prefix=SBuf(), const SBuf &suffix=SBuf())
Definition: Algorithms.h:68
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: GnuRegex.c:4248
virtual const Acl::ParameterFlags & supportedFlags() const
Definition: RegexData.cc:32
#define REG_NOSUB
Definition: GnuRegex.h:239
virtual ACLData< char const * > * clone() const
Definition: RegexData.cc:271
#define NULL
Definition: types.h:166

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors