Uri.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 23 URL Parsing */
10 
11 #include "squid.h"
12 #include "anyp/Uri.h"
13 #include "globals.h"
14 #include "HttpRequest.h"
15 #include "rfc1738.h"
16 #include "SquidConfig.h"
17 #include "SquidString.h"
18 
19 static const char valid_hostname_chars_u[] =
20  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
21  "abcdefghijklmnopqrstuvwxyz"
22  "0123456789-._"
23  "[:]"
24  ;
25 static const char valid_hostname_chars[] =
26  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
27  "abcdefghijklmnopqrstuvwxyz"
28  "0123456789-."
29  "[:]"
30  ;
31 
32 const SBuf &
34 {
35  static SBuf star("*");
36  return star;
37 }
38 
39 const SBuf &
41 {
42  static SBuf slash("/");
43  return slash;
44 }
45 
46 void
47 AnyP::Uri::host(const char *src)
48 {
50  hostAddr_ = src;
51  if (hostAddr_.isAnyAddr()) {
52  xstrncpy(host_, src, sizeof(host_));
53  hostIsNumeric_ = false;
54  } else {
55  hostAddr_.toHostStr(host_, sizeof(host_));
56  debugs(23, 3, "given IP: " << hostAddr_);
57  hostIsNumeric_ = 1;
58  }
59  touch();
60 }
61 
62 SBuf
64 {
65  static char ip[MAX_IPSTRLEN];
66  if (hostIsNumeric())
67  return SBuf(hostIP().toStr(ip, sizeof(ip)));
68  else
69  return SBuf(host());
70 }
71 
72 const SBuf &
74 {
75  // RFC 3986 section 3.3 says path can be empty (path-abempty).
76  // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
77  // at least when sending and using. We must still accept path-abempty as input.
79  return SlashPath();
80 
81  return path_;
82 }
83 
84 void
86 {
87  debugs(23, 5, "urlInitialize: Initializing...");
88  /* this ensures that the number of protocol strings is the same as
89  * the enum slots allocated because the last enum is always 'MAX'.
90  */
91  assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
92  /*
93  * These test that our matchDomainName() function works the
94  * way we expect it to.
95  */
96  assert(0 == matchDomainName("foo.com", "foo.com"));
97  assert(0 == matchDomainName(".foo.com", "foo.com"));
98  assert(0 == matchDomainName("foo.com", ".foo.com"));
99  assert(0 == matchDomainName(".foo.com", ".foo.com"));
100  assert(0 == matchDomainName("x.foo.com", ".foo.com"));
101  assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
102  assert(0 != matchDomainName("x.foo.com", "foo.com"));
103  assert(0 != matchDomainName("foo.com", "x.foo.com"));
104  assert(0 != matchDomainName("bar.com", "foo.com"));
105  assert(0 != matchDomainName(".bar.com", "foo.com"));
106  assert(0 != matchDomainName(".bar.com", ".foo.com"));
107  assert(0 != matchDomainName("bar.com", ".foo.com"));
108  assert(0 < matchDomainName("zzz.com", "foo.com"));
109  assert(0 > matchDomainName("aaa.com", "foo.com"));
110  assert(0 == matchDomainName("FOO.com", "foo.COM"));
111  assert(0 < matchDomainName("bfoo.com", "afoo.com"));
112  assert(0 > matchDomainName("afoo.com", "bfoo.com"));
113  assert(0 < matchDomainName("x-foo.com", ".foo.com"));
114 
115  assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
116  assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
117  assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
118  assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
119 
120  assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
121  assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
122  assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
123  assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
124 
125  /* more cases? */
126 }
127 
133 urlParseProtocol(const char *b)
134 {
135  // make e point to the ':' character
136  const char *e = b + strcspn(b, ":");
137  int len = e - b;
138 
139  /* test common stuff first */
140 
141  if (strncasecmp(b, "http", len) == 0)
142  return AnyP::PROTO_HTTP;
143 
144  if (strncasecmp(b, "ftp", len) == 0)
145  return AnyP::PROTO_FTP;
146 
147  if (strncasecmp(b, "https", len) == 0)
148  return AnyP::PROTO_HTTPS;
149 
150  if (strncasecmp(b, "file", len) == 0)
151  return AnyP::PROTO_FTP;
152 
153  if (strncasecmp(b, "coap", len) == 0)
154  return AnyP::PROTO_COAP;
155 
156  if (strncasecmp(b, "coaps", len) == 0)
157  return AnyP::PROTO_COAPS;
158 
159  if (strncasecmp(b, "gopher", len) == 0)
160  return AnyP::PROTO_GOPHER;
161 
162  if (strncasecmp(b, "wais", len) == 0)
163  return AnyP::PROTO_WAIS;
164 
165  if (strncasecmp(b, "cache_object", len) == 0)
167 
168  if (strncasecmp(b, "urn", len) == 0)
169  return AnyP::PROTO_URN;
170 
171  if (strncasecmp(b, "whois", len) == 0)
172  return AnyP::PROTO_WHOIS;
173 
174  if (len > 0)
175  return AnyP::PROTO_UNKNOWN;
176 
177  return AnyP::PROTO_NONE;
178 }
179 
187 bool
189 {
190  /* For IPv4 addresses check for a dot */
191  /* For IPv6 addresses also check for a colon */
192  if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':')) {
193  const uint64_t dlen = strlen(host);
194  const uint64_t want = dlen + Config.appendDomainLen;
195  if (want > SQUIDHOSTNAMELEN - 1) {
196  debugs(23, 2, "URL domain too large (" << dlen << " bytes)");
197  return false;
198  }
199  strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - dlen - 1);
200  }
201  return true;
202 }
203 
204 /*
205  * Parse a URI/URL.
206  *
207  * Stores parsed values in the `request` argument.
208  *
209  * This abuses HttpRequest as a way of representing the parsed url
210  * and its components.
211  * method is used to switch parsers and to init the HttpRequest.
212  * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
213  * looked for.
214  * The url is non const so that if its too long we can NULL-terminate it in place.
215  */
216 
217 /*
218  * This routine parses a URL. Its assumed that the URL is complete -
219  * ie, the end of the string is the end of the URL. Don't pass a partial
220  * URL here as this routine doesn't have any way of knowing whether
221  * its partial or not (ie, it handles the case of no trailing slash as
222  * being "end of host with implied path of /".
223  */
224 bool
225 AnyP::Uri::parse(const HttpRequestMethod& method, const char *url)
226 {
227  LOCAL_ARRAY(char, proto, MAX_URL);
228  LOCAL_ARRAY(char, login, MAX_URL);
229  LOCAL_ARRAY(char, foundHost, MAX_URL);
230  LOCAL_ARRAY(char, urlpath, MAX_URL);
231  char *t = NULL;
232  char *q = NULL;
233  int foundPort;
235  int l;
236  int i;
237  const char *src;
238  char *dst;
239  proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0';
240 
241  if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
242  debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
243  return false;
244  }
245  if (method == Http::METHOD_CONNECT) {
246  /*
247  * RFC 7230 section 5.3.3: authority-form = authority
248  * "excluding any userinfo and its "@" delimiter"
249  *
250  * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
251  *
252  * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
253  */
254  foundPort = 443;
255 
256  if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
257  if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
258  return false;
259 
260  } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
261  AnyP::Uri::Asterisk().cmp(url) == 0) {
262  parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */);
263  return true;
264  } else if (strncmp(url, "urn:", 4) == 0) {
265  debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'");
266  debugs(50, 5, "urn=" << (url+4));
267  setScheme(AnyP::PROTO_URN, nullptr);
268  path(url + 4);
269  return true;
270  } else {
271  /* Parse the URL: */
272  src = url;
273  i = 0;
274  /* Find first : - everything before is protocol */
275  for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
276  *dst = *src;
277  }
278  if (i >= l)
279  return false;
280  *dst = '\0';
281 
282  /* Then its :// */
283  if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
284  return false;
285  i += 3;
286  src += 3;
287 
288  /* Then everything until first /; thats host (and port; which we'll look for here later) */
289  // bug 1881: If we don't get a "/" then we imply it was there
290  // bug 3074: We could just be given a "?" or "#". These also imply "/"
291  // bug 3233: whitespace is also a hostname delimiter.
292  for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
293  *dst = *src;
294  }
295 
296  /*
297  * We can't check for "i >= l" here because we could be at the end of the line
298  * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
299  * been -given- a valid URL and the path is just '/'.
300  */
301  if (i > l)
302  return false;
303  *dst = '\0';
304 
305  // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
306  if (*src == '?' || *src == '#' || *src == '\0') {
307  urlpath[0] = '/';
308  dst = &urlpath[1];
309  } else {
310  dst = urlpath;
311  }
312  /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
313  for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
314  *dst = *src;
315  }
316 
317  /* We -could- be at the end of the buffer here */
318  if (i > l)
319  return false;
320  /* If the URL path is empty we set it to be "/" */
321  if (dst == urlpath) {
322  *dst = '/';
323  ++dst;
324  }
325  *dst = '\0';
326 
327  protocol = urlParseProtocol(proto);
328  foundPort = AnyP::UriScheme(protocol).defaultPort();
329 
330  /* Is there any login information? (we should eventually parse it above) */
331  t = strrchr(foundHost, '@');
332  if (t != NULL) {
333  strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
334  login[sizeof(login)-1] = '\0';
335  t = strrchr(login, '@');
336  *t = 0;
337  strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
338  foundHost[sizeof(foundHost)-1] = '\0';
339  // Bug 4498: URL-unescape the login info after extraction
340  rfc1738_unescape(login);
341  }
342 
343  /* Is there any host information? (we should eventually parse it above) */
344  if (*foundHost == '[') {
345  /* strip any IPA brackets. valid under IPv6. */
346  dst = foundHost;
347  /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
348  src = foundHost;
349  ++src;
350  l = strlen(foundHost);
351  i = 1;
352  for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
353  *dst = *src;
354  }
355 
356  /* we moved in-place, so truncate the actual hostname found */
357  *dst = '\0';
358  ++dst;
359 
360  /* skip ahead to either start of port, or original EOS */
361  while (*dst != '\0' && *dst != ':')
362  ++dst;
363  t = dst;
364  } else {
365  t = strrchr(foundHost, ':');
366 
367  if (t != strchr(foundHost,':') ) {
368  /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
369  /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
370  /* therefore we MUST accept the case where they are not bracketed at all. */
371  t = NULL;
372  }
373  }
374 
375  // Bug 3183 sanity check: If scheme is present, host must be too.
376  if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') {
377  debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
378  return false;
379  }
380 
381  if (t && *t == ':') {
382  *t = '\0';
383  ++t;
384  foundPort = atoi(t);
385  }
386  }
387 
388  for (t = foundHost; *t; ++t)
389  *t = xtolower(*t);
390 
391  if (stringHasWhitespace(foundHost)) {
393  t = q = foundHost;
394  while (*t) {
395  if (!xisspace(*t)) {
396  *q = *t;
397  ++q;
398  }
399  ++t;
400  }
401  *q = '\0';
402  }
403  }
404 
405  debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
406 
408  strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
409  debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
410  return false;
411  }
412 
413  if (!urlAppendDomain(foundHost))
414  return false;
415 
416  /* remove trailing dots from hostnames */
417  while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
418  foundHost[l] = '\0';
419 
420  /* reject duplicate or leading dots */
421  if (strstr(foundHost, "..") || *foundHost == '.') {
422  debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
423  return false;
424  }
425 
426  if (foundPort < 1 || foundPort > 65535) {
427  debugs(23, 3, "Invalid port '" << foundPort << "'");
428  return false;
429  }
430 
431 #if HARDCODE_DENY_PORTS
432  /* These ports are filtered in the default squid.conf, but
433  * maybe someone wants them hardcoded... */
434  if (foundPort == 7 || foundPort == 9 || foundPort == 19) {
435  debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort);
436  return false;
437  }
438 #endif
439 
440  if (stringHasWhitespace(urlpath)) {
441  debugs(23, 2, "URI has whitespace: {" << url << "}");
442 
443  switch (Config.uri_whitespace) {
444 
445  case URI_WHITESPACE_DENY:
446  return false;
447 
449  break;
450 
452  t = rfc1738_escape_unescaped(urlpath);
453  xstrncpy(urlpath, t, MAX_URL);
454  break;
455 
456  case URI_WHITESPACE_CHOP:
457  *(urlpath + strcspn(urlpath, w_space)) = '\0';
458  break;
459 
461  default:
462  t = q = urlpath;
463  while (*t) {
464  if (!xisspace(*t)) {
465  *q = *t;
466  ++q;
467  }
468  ++t;
469  }
470  *q = '\0';
471  }
472  }
473 
474  parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort);
475  return true;
476 }
477 
479 void
481  const char *const protoStr, // for unknown protocols
482  const char *const aUrlPath,
483  const char *const aHost,
484  const SBuf &aLogin,
485  const int aPort)
486 {
487  setScheme(protocol, protoStr);
488  path(aUrlPath);
489  host(aHost);
490  userInfo(aLogin);
491  port(aPort);
492 }
493 
494 void
496 {
497  absolute_.clear();
500 }
501 
502 SBuf &
503 AnyP::Uri::authority(bool requirePort) const
504 {
505  if (authorityHttp_.isEmpty()) {
506 
507  // both formats contain Host/IP
510 
511  // authorityForm_ only has :port if it is non-default
513  if (port() != getScheme().defaultPort())
515  }
516 
517  return requirePort ? authorityWithPort_ : authorityHttp_;
518 }
519 
520 SBuf &
522 {
523  if (absolute_.isEmpty()) {
524  // TODO: most URL will be much shorter, avoid allocating this much
526 
527  absolute_.append(getScheme().image());
528  absolute_.append(":",1);
529  if (getScheme() != AnyP::PROTO_URN) {
530  absolute_.append("//", 2);
531  const bool allowUserInfo = getScheme() == AnyP::PROTO_FTP ||
533 
534  if (allowUserInfo && !userInfo().isEmpty()) {
536  absolute_.append("@", 1);
537  }
539  }
540  absolute_.append(path());
541  }
542 
543  return absolute_;
544 }
545 
550 char *
552 {
553  LOCAL_ARRAY(char, buf, MAX_URL);
554 
555  snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(url));
556  buf[sizeof(buf)-1] = '\0';
557 
558  // URN, CONNECT method, and non-stripped URIs can go straight out
559  if (Config.onoff.strip_query_terms && !(method == Http::METHOD_CONNECT || scheme == AnyP::PROTO_URN)) {
560  // strip anything AFTER a question-mark
561  // leaving the '?' in place
562  if (auto t = strchr(buf, '?')) {
563  *(++t) = '\0';
564  }
565  }
566 
567  if (stringHasCntl(buf))
569 
570  return buf;
571 }
572 
579 const char *
581 {
582  LOCAL_ARRAY(char, buf, MAX_URL);
583 
584  // method CONNECT and port HTTPS
585  if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
586  snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
587  return buf;
588  }
589 
590  // else do the normal complete canonical thing.
591  return request->canonicalCleanUrl();
592 }
593 
594 /*
595  * Test if a URL is relative.
596  *
597  * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
598  * appear before a ':'.
599  */
600 bool
601 urlIsRelative(const char *url)
602 {
603  const char *p;
604 
605  if (url == NULL) {
606  return (false);
607  }
608  if (*url == '\0') {
609  return (false);
610  }
611 
612  for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
613 
614  if (*p == ':') {
615  return (false);
616  }
617  return (true);
618 }
619 
620 /*
621  * Convert a relative URL to an absolute URL using the context of a given
622  * request.
623  *
624  * It is assumed that you have already ensured that the URL is relative.
625  *
626  * If NULL is returned it is an indication that the method in use in the
627  * request does not distinguish between relative and absolute and you should
628  * use the url unchanged.
629  *
630  * If non-NULL is returned, it is up to the caller to free the resulting
631  * memory using safe_free().
632  */
633 char *
634 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
635 {
636 
637  if (req->method.id() == Http::METHOD_CONNECT) {
638  return (NULL);
639  }
640 
641  char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
642 
643  if (req->url.getScheme() == AnyP::PROTO_URN) {
644  // XXX: this is what the original code did, but it seems to break the
645  // intended behaviour of this function. It returns the stored URN path,
646  // not converting the given one into a URN...
647  snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
648  return (urlbuf);
649  }
650 
651  SBuf authorityForm = req->url.authority(); // host[:port]
652  const SBuf &scheme = req->url.getScheme().image();
653  size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
654  SQUIDSBUFPRINT(scheme),
655  SQUIDSBUFPRINT(req->url.userInfo()),
656  !req->url.userInfo().isEmpty() ? "@" : "",
657  SQUIDSBUFPRINT(authorityForm));
658 
659  // if the first char is '/' assume its a relative path
660  // XXX: this breaks on scheme-relative URLs,
661  // but we should not see those outside ESI, and rarely there.
662  // XXX: also breaks on any URL containing a '/' in the query-string portion
663  if (relUrl[0] == '/') {
664  xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
665  } else {
666  SBuf path = req->url.path();
667  SBuf::size_type lastSlashPos = path.rfind('/');
668 
669  if (lastSlashPos == SBuf::npos) {
670  // replace the whole path with the given bit(s)
671  urlbuf[urllen] = '/';
672  ++urllen;
673  xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
674  } else {
675  // replace only the last (file?) segment with the given bit(s)
676  ++lastSlashPos;
677  if (lastSlashPos > MAX_URL - urllen - 1) {
678  // XXX: crops bits in the middle of the combined URL.
679  lastSlashPos = MAX_URL - urllen - 1;
680  }
681  SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
682  urllen += lastSlashPos;
683  if (urllen + 1 < MAX_URL) {
684  xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
685  }
686  }
687  }
688 
689  return (urlbuf);
690 }
691 
692 int
693 matchDomainName(const char *h, const char *d, uint8_t flags)
694 {
695  int dl;
696  int hl;
697 
698  const bool hostIncludesSubdomains = (*h == '.');
699  while ('.' == *h)
700  ++h;
701 
702  hl = strlen(h);
703 
704  if (hl == 0)
705  return -1;
706 
707  dl = strlen(d);
708 
709  /*
710  * Start at the ends of the two strings and work towards the
711  * beginning.
712  */
713  while (xtolower(h[--hl]) == xtolower(d[--dl])) {
714  if (hl == 0 && dl == 0) {
715  /*
716  * We made it all the way to the beginning of both
717  * strings without finding any difference.
718  */
719  return 0;
720  }
721 
722  if (0 == hl) {
723  /*
724  * The host string is shorter than the domain string.
725  * There is only one case when this can be a match.
726  * If the domain is just one character longer, and if
727  * that character is a leading '.' then we call it a
728  * match.
729  */
730 
731  if (1 == dl && '.' == d[0])
732  return 0;
733  else
734  return -1;
735  }
736 
737  if (0 == dl) {
738  /*
739  * The domain string is shorter than the host string.
740  * This is a match only if the first domain character
741  * is a leading '.'.
742  */
743 
744  if ('.' == d[0]) {
745  if (flags & mdnRejectSubsubDomains) {
746  // Check for sub-sub domain and reject
747  while(--hl >= 0 && h[hl] != '.');
748  if (hl < 0) {
749  // No sub-sub domain found, but reject if there is a
750  // leading dot in given host string (which is removed
751  // before the check is started).
752  return hostIncludesSubdomains ? 1 : 0;
753  } else
754  return 1; // sub-sub domain, reject
755  } else
756  return 0;
757  } else
758  return 1;
759  }
760  }
761 
762  /*
763  * We found different characters in the same position (from the end).
764  */
765 
766  // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
767  // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
768  // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
769  if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
770  return 0;
771 
772  /*
773  * If one of those character is '.' then its special. In order
774  * for splay tree sorting to work properly, "x-foo.com" must
775  * be greater than ".foo.com" even though '-' is less than '.'.
776  */
777  if ('.' == d[dl])
778  return 1;
779 
780  if ('.' == h[hl])
781  return -1;
782 
783  return (xtolower(h[hl]) - xtolower(d[dl]));
784 }
785 
786 /*
787  * return true if we can serve requests for this method.
788  */
789 int
791 {
792  int rc = 0;
793  /* protocol "independent" methods
794  *
795  * actually these methods are specific to HTTP:
796  * they are methods we recieve on our HTTP port,
797  * and if we had a FTP listener would not be relevant
798  * there.
799  *
800  * So, we should delegate them to HTTP. The problem is that we
801  * do not have a default protocol from the client side of HTTP.
802  */
803 
804  if (r->method == Http::METHOD_CONNECT)
805  return 1;
806 
807  // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
808  // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
811 
812  if (r->method == Http::METHOD_PURGE)
813  return 1;
814 
815  /* does method match the protocol? */
816  switch (r->url.getScheme()) {
817 
818  case AnyP::PROTO_URN:
819 
820  case AnyP::PROTO_HTTP:
821 
823  rc = 1;
824  break;
825 
826  case AnyP::PROTO_FTP:
827 
828  if (r->method == Http::METHOD_PUT)
829  rc = 1;
830 
831  case AnyP::PROTO_GOPHER:
832 
833  case AnyP::PROTO_WAIS:
834 
835  case AnyP::PROTO_WHOIS:
836  if (r->method == Http::METHOD_GET)
837  rc = 1;
838  else if (r->method == Http::METHOD_HEAD)
839  rc = 1;
840 
841  break;
842 
843  case AnyP::PROTO_HTTPS:
844 #if USE_OPENSSL
845  rc = 1;
846 #elif USE_GNUTLS
847  rc = 1;
848 #else
849  /*
850  * Squid can't originate an SSL connection, so it should
851  * never receive an "https:" URL. It should always be
852  * CONNECT instead.
853  */
854  rc = 0;
855 #endif
856  break;
857 
858  default:
859  break;
860  }
861 
862  return rc;
863 }
864 
865 /*
866  * Quick-n-dirty host extraction from a URL. Steps:
867  * Look for a colon
868  * Skip any '/' after the colon
869  * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
870  * Look for an ending '/' or ':' and terminate
871  * Look for login info preceeded by '@'
872  */
873 
875 {
876 
877 public:
878  char * extract(char const *url);
879 
880 private:
881  static char Host [SQUIDHOSTNAMELEN];
882  void init(char const *);
883  void findHostStart();
884  void trimTrailingChars();
885  void trimAuth();
886  char const *hostStart;
887  char const *url;
888 };
889 
890 char *
891 urlHostname(const char *url)
892 {
893  return URLHostName().extract(url);
894 }
895 
897 
898 void
899 URLHostName::init(char const *aUrl)
900 {
901  Host[0] = '\0';
902  url = aUrl;
903 }
904 
905 void
907 {
908  if (NULL == (hostStart = strchr(url, ':')))
909  return;
910 
911  ++hostStart;
912 
913  while (*hostStart != '\0' && *hostStart == '/')
914  ++hostStart;
915 
916  if (*hostStart == ']')
917  ++hostStart;
918 }
919 
920 void
922 {
923  char *t;
924 
925  if ((t = strchr(Host, '/')))
926  *t = '\0';
927 
928  if ((t = strrchr(Host, ':')))
929  *t = '\0';
930 
931  if ((t = strchr(Host, ']')))
932  *t = '\0';
933 }
934 
935 void
937 {
938  char *t;
939 
940  if ((t = strrchr(Host, '@'))) {
941  ++t;
942  memmove(Host, t, strlen(t) + 1);
943  }
944 }
945 
946 char *
947 URLHostName::extract(char const *aUrl)
948 {
949  init(aUrl);
950  findHostStart();
951 
952  if (hostStart == NULL)
953  return NULL;
954 
955  xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
956 
957  trimTrailingChars();
958 
959  trimAuth();
960 
961  return Host;
962 }
963 
965  scheme_(aScheme),
967  port_(0)
968 {
969  *host_=0;
970 }
971 
972 // TODO: fix code duplication with AnyP::Uri::parse()
973 char *
974 AnyP::Uri::cleanup(const char *uri)
975 {
976  int flags = 0;
977  char *cleanedUri = nullptr;
978  switch (Config.uri_whitespace) {
980  flags |= RFC1738_ESCAPE_NOSPACE;
981  // fall through to next case
983  flags |= RFC1738_ESCAPE_UNESCAPED;
984  cleanedUri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
985  break;
986 
987  case URI_WHITESPACE_CHOP: {
988  flags |= RFC1738_ESCAPE_UNESCAPED;
989  const auto pos = strcspn(uri, w_space);
990  char *choppedUri = nullptr;
991  if (pos < strlen(uri))
992  choppedUri = xstrndup(uri, pos + 1);
993  cleanedUri = xstrndup(rfc1738_do_escape(choppedUri ? choppedUri : uri, flags), MAX_URL);
994  cleanedUri[pos] = '\0';
995  xfree(choppedUri);
996  }
997  break;
998 
999  case URI_WHITESPACE_DENY:
1000  case URI_WHITESPACE_STRIP:
1001  default: {
1002  // TODO: avoid duplication with urlParse()
1003  const char *t;
1004  char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
1005  char *q = tmp_uri;
1006  t = uri;
1007  while (*t) {
1008  if (!xisspace(*t)) {
1009  *q = *t;
1010  ++q;
1011  }
1012  ++t;
1013  }
1014  *q = '\0';
1015  cleanedUri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
1016  xfree(tmp_uri);
1017  }
1018  break;
1019  }
1020 
1021  assert(cleanedUri);
1022  return cleanedUri;
1023 }
1024 
char * urlHostname(const char *url)
Definition: Uri.cc:891
#define rfc1738_escape_unescaped(x)
Definition: rfc1738.h:59
#define assert(EX)
Definition: assert.h:17
SBuf & authority(bool requirePort=false) const
Definition: Uri.cc:503
SBuf & appendf(const char *fmt,...)
Definition: SBuf.cc:239
void trimAuth()
Definition: Uri.cc:936
char * urlMakeAbsolute(const HttpRequest *req, const char *relUrl)
Definition: Uri.cc:634
SBuf image() const
Definition: UriScheme.h:50
bool urlIsRelative(const char *url)
Definition: Uri.cc:601
void path(const char *p)
Definition: Uri.h:93
Definition: SBuf.h:86
#define RFC1738_ESCAPE_UNESCAPED
Definition: rfc1738.h:25
const char * urlCanonicalFakeHttps(const HttpRequest *request)
Definition: Uri.cc:580
unsigned short port() const
Definition: Uri.h:89
HttpRequestMethod method
Definition: HttpRequest.h:114
int stringHasWhitespace(const char *)
Definition: String.cc:380
struct _request * request(char *urlin)
Definition: tcp-banger2.c:291
int i
Definition: membanger.c:49
SBuf & append(const SBuf &S)
Definition: SBuf.cc:195
static const char valid_hostname_chars_u[]
Definition: Uri.cc:19
char * xstrndup(const char *s, size_t n)
Definition: xstring.cc:56
unsigned short defaultPort() const
Definition: UriScheme.cc:52
int allow_underscore
Definition: SquidConfig.h:320
SBuf path_
URI path segment.
Definition: Uri.h:160
static const SBuf & SlashPath()
the static &#39;/&#39; default URL-path
Definition: Uri.cc:40
SBuf authorityHttp_
RFC 7230 section 5.3.3 authority, maybe without default-port.
Definition: Uri.h:163
void clear()
Definition: SBuf.cc:178
bool isEmpty() const
Definition: SBuf.h:420
char * appendDomain
Definition: SquidConfig.h:215
#define xisspace(x)
Definition: xis.h:17
#define DBG_CRITICAL
Definition: Debug.h:45
char * p
Definition: membanger.c:43
void SBufToCstring(char *d, const SBuf &s)
Definition: SBuf.h:741
static const SBuf & Asterisk()
the static &#39;*&#39; pseudo-URI
Definition: Uri.cc:33
#define w_space
char * urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme)
Definition: Uri.cc:551
static const char valid_hostname_chars[]
Definition: Uri.cc:25
SBuf & absolute() const
Definition: Uri.cc:521
const char * host(void) const
Definition: Uri.h:79
size_t appendDomainLen
Definition: SquidConfig.h:216
bool urlAppendDomain(char *host)
apply append_domain config to the given hostname
Definition: Uri.cc:188
char * rfc1738_do_escape(const char *url, int flags)
Definition: rfc1738.c:56
void urlInitialize(void)
Definition: Uri.cc:85
char const * hostStart
Definition: Uri.cc:886
int urlCheckRequest(const HttpRequest *r)
Definition: Uri.cc:790
void port(unsigned short p)
Definition: Uri.h:88
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:124
#define DBG_IMPORTANT
Definition: Debug.h:46
#define URI_WHITESPACE_ALLOW
Definition: defines.h:195
Uri()
Definition: Uri.h:35
bool parse(const HttpRequestMethod &, const char *url)
Definition: Uri.cc:225
void setScheme(const AnyP::ProtocolType &p, const char *str)
convert the URL scheme to that given
Definition: Uri.h:70
void touch()
clear the cached URI display forms
Definition: Uri.cc:495
void trimTrailingChars()
Definition: Uri.cc:921
int uri_whitespace
Definition: SquidConfig.h:456
int hostIsNumeric(void) const
Definition: Uri.h:80
AnyP::Uri url
the request URI
Definition: HttpRequest.h:115
#define RFC1738_ESCAPE_NOSPACE
Definition: rfc1738.h:22
char * xstrncpy(char *dst, const char *src, size_t n)
Definition: xstring.cc:37
void userInfo(const SBuf &s)
Definition: Uri.h:75
const SBuf & path() const
Definition: Uri.cc:73
#define URI_WHITESPACE_STRIP
Definition: defines.h:194
char * canonicalCleanUrl() const
Definition: HttpRequest.cc:777
unsigned short port_
URL port.
Definition: Uri.h:157
Ip::Address hostAddr_
binary representation of the URI authority if it is a raw-IP
Definition: Uri.h:155
void init(char const *)
Definition: Uri.cc:899
int check_hostnames
Definition: SquidConfig.h:319
void setEmpty()
Fast reset of the stored content to what would be after default constructor.
Definition: Address.cc:184
void host(const char *src)
Definition: Uri.cc:47
#define URI_WHITESPACE_ENCODE
Definition: defines.h:196
Http::MethodType id() const
Definition: RequestMethod.h:73
#define LOCAL_ARRAY(type, name, size)
Definition: leakcheck.h:18
bool isAnyAddr() const
Definition: Address.cc:170
int unsigned int const char *desc STUB void int len
Definition: stub_fd.cc:20
void parseFinish(const AnyP::ProtocolType, const char *const, const char *const, const char *const, const SBuf &, const int)
Update the URL object with parsed URI data.
Definition: Uri.cc:480
char const * url
Definition: Uri.cc:887
void const char * buf
Definition: stub_helper.cc:16
struct SquidConfig::@112 onoff
static char * cleanup(const char *uri)
Definition: Uri.cc:974
HttpHeader header
Definition: Message.h:75
#define URI_WHITESPACE_CHOP
Definition: defines.h:197
void findHostStart()
Definition: Uri.cc:906
bool hostIsNumeric_
whether the authority &#39;host&#39; is a raw-IP
Definition: Uri.h:154
char * extract(char const *url)
Definition: Uri.cc:947
#define MYNAME
Definition: Debug.h:166
SBuf authorityWithPort_
RFC 7230 section 5.3.3 authority with explicit port.
Definition: Uri.h:164
#define xmalloc
AnyP::ProtocolType urlParseProtocol(const char *b)
Definition: Uri.cc:133
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:586
#define SQUIDHOSTNAMELEN
Definition: rfc2181.h:30
char host_[SQUIDHOSTNAMELEN]
string representation of the URI authority name or IP
Definition: Uri.h:153
size_t HttpReply *STUB StoreEntry const KeyScope scope const HttpRequestMethod & method
Definition: stub_store.cc:112
static const size_type npos
Definition: SBuf.h:92
ProtocolType
Definition: ProtocolType.h:22
void rfc1738_unescape(char *url)
Definition: rfc1738.c:146
const char * ProtocolType_str[]
SBuf hostOrIp() const
Definition: Uri.cc:63
const SBuf & userInfo() const
Definition: Uri.h:76
#define MAX_IPSTRLEN
Length of buffer that needs to be allocated to old a null-terminated IP-string.
Definition: forward.h:23
#define MAX_URL
Definition: defines.h:118
AnyP::UriScheme scheme_
Definition: Uri.h:147
void defaultPort()
reset the port to the default port number for the current scheme
Definition: Uri.h:91
#define URI_WHITESPACE_DENY
Definition: defines.h:198
Ip::Address const & hostIP(void) const
Definition: Uri.h:81
void reserveCapacity(size_type minCapacity)
Definition: SBuf.cc:105
AnyP::UriScheme const & getScheme() const
Definition: Uri.h:67
#define SQUIDSBUFPH
Definition: SBuf.h:31
unsigned int toHostStr(char *buf, const unsigned int len) const
Definition: Address.cc:852
int matchDomainName(const char *h, const char *d, uint8_t flags)
Definition: Uri.cc:693
#define xfree
MemBlob::size_type size_type
Definition: SBuf.h:89
#define SQUIDSBUFPRINT(s)
Definition: SBuf.h:32
#define xtolower(x)
Definition: xis.h:19
SBuf absolute_
RFC 7230 section 5.3.2 absolute-URI.
Definition: Uri.h:165
class SquidConfig Config
Definition: SquidConfig.cc:12
static char Host[SQUIDHOSTNAMELEN]
Definition: Uri.cc:881
#define NULL
Definition: types.h:166
int strip_query_terms
Definition: SquidConfig.h:302
int stringHasCntl(const char *)
Definition: String.cc:387
#define false
Definition: GnuRegex.c:233
int64_t getInt64(Http::HdrType id) const
Definition: HttpHeader.cc:1148
size_type rfind(char c, size_type endPos=npos) const
Definition: SBuf.cc:702

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors