Uri.cc
Go to the documentation of this file.
1/*
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/* DEBUG: section 23 URL Parsing */
10
11#include "squid.h"
12#include "anyp/Uri.h"
13#include "base/Raw.h"
14#include "globals.h"
15#include "HttpRequest.h"
16#include "parser/Tokenizer.h"
17#include "rfc1738.h"
18#include "SquidConfig.h"
19#include "SquidString.h"
20
21static const char valid_hostname_chars_u[] =
22 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
23 "abcdefghijklmnopqrstuvwxyz"
24 "0123456789-._"
25 "[:]"
26 ;
27static const char valid_hostname_chars[] =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 "abcdefghijklmnopqrstuvwxyz"
30 "0123456789-."
31 "[:]"
32 ;
33
35static const CharacterSet &
37{
38 /*
39 * RFC 3986 section 3.2.1
40 *
41 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
42 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
43 * pct-encoded = "%" HEXDIG HEXDIG
44 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
45 */
46 static const auto userInfoValid = CharacterSet("userinfo", ":-._~%!$&'()*+,;=") +
49 return userInfoValid;
50}
51
55SBuf
56AnyP::Uri::Encode(const SBuf &buf, const CharacterSet &ignore)
57{
58 if (buf.isEmpty())
59 return buf;
60
61 Parser::Tokenizer tk(buf);
62 SBuf goodSection;
63 // optimization for the arguably common "no encoding necessary" case
64 if (tk.prefix(goodSection, ignore) && tk.atEnd())
65 return buf;
66
67 SBuf output;
68 output.reserveSpace(buf.length() * 3); // worst case: encode all chars
69 output.append(goodSection); // may be empty
70
71 while (!tk.atEnd()) {
72 // TODO: Add Tokenizer::parseOne(void).
73 const auto ch = tk.remaining()[0];
74 output.appendf("%%%02X", static_cast<unsigned int>(ch)); // TODO: Optimize using a table
75 (void)tk.skip(ch);
76
77 if (tk.prefix(goodSection, ignore))
78 output.append(goodSection);
79 }
80
81 return output;
82}
83
84const SBuf &
86{
87 static SBuf star("*");
88 return star;
89}
90
91const SBuf &
93{
94 static SBuf slash("/");
95 return slash;
96}
97
98void
99AnyP::Uri::host(const char *src)
100{
101 hostAddr_.fromHost(src);
102 if (hostAddr_.isAnyAddr()) {
103 xstrncpy(host_, src, sizeof(host_));
104 hostIsNumeric_ = false;
105 } else {
106 hostAddr_.toHostStr(host_, sizeof(host_));
107 debugs(23, 3, "given IP: " << hostAddr_);
108 hostIsNumeric_ = 1;
109 }
110 touch();
111}
112
113SBuf
115{
116 if (hostIsNumeric()) {
117 static char ip[MAX_IPSTRLEN];
118 const auto hostStrLen = hostIP().toHostStr(ip, sizeof(ip));
119 return SBuf(ip, hostStrLen);
120 } else
121 return SBuf(host());
122}
123
124const SBuf &
126{
127 // RFC 3986 section 3.3 says path can be empty (path-abempty).
128 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
129 // at least when sending and using. We must still accept path-abempty as input.
130 if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
131 return SlashPath();
132
133 return path_;
134}
135
136void
138{
139 debugs(23, 5, "urlInitialize: Initializing...");
140 /* this ensures that the number of protocol strings is the same as
141 * the enum slots allocated because the last enum is always 'MAX'.
142 */
143 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
144 /*
145 * These test that our matchDomainName() function works the
146 * way we expect it to.
147 */
148 assert(0 == matchDomainName("foo.com", "foo.com"));
149 assert(0 == matchDomainName(".foo.com", "foo.com"));
150 assert(0 == matchDomainName("foo.com", ".foo.com"));
151 assert(0 == matchDomainName(".foo.com", ".foo.com"));
152 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
153 assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
154 assert(0 != matchDomainName("x.foo.com", "foo.com"));
155 assert(0 != matchDomainName("foo.com", "x.foo.com"));
156 assert(0 != matchDomainName("bar.com", "foo.com"));
157 assert(0 != matchDomainName(".bar.com", "foo.com"));
158 assert(0 != matchDomainName(".bar.com", ".foo.com"));
159 assert(0 != matchDomainName("bar.com", ".foo.com"));
160 assert(0 < matchDomainName("zzz.com", "foo.com"));
161 assert(0 > matchDomainName("aaa.com", "foo.com"));
162 assert(0 == matchDomainName("FOO.com", "foo.COM"));
163 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
164 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
165 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
166
167 assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
168 assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
169 assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
170 assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
171
172 assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
173 assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
174 assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
175 assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
176
177 /* more cases? */
178}
179
187static AnyP::UriScheme
189{
190 /*
191 * RFC 3986 section 3.1 paragraph 2:
192 *
193 * Scheme names consist of a sequence of characters beginning with a
194 * letter and followed by any combination of letters, digits, plus
195 * ("+"), period ("."), or hyphen ("-").
196 *
197 * The underscore ("_") required to match "cache_object://" squid
198 * special URI scheme.
199 */
200 static const auto schemeChars =
201#if USE_HTTP_VIOLATIONS
202 CharacterSet("special", "_") +
203#endif
205
206 SBuf str;
207 if (tok.prefix(str, schemeChars, 16) && tok.skip(':') && CharacterSet::ALPHA[str.at(0)]) {
208 const auto protocol = AnyP::UriScheme::FindProtocolType(str);
209 if (protocol == AnyP::PROTO_UNKNOWN)
210 return AnyP::UriScheme(protocol, str.c_str());
211 return AnyP::UriScheme(protocol, nullptr);
212 }
213
214 throw TextException("invalid URI scheme", Here());
215}
216
224bool
226{
227 /* For IPv4 addresses check for a dot */
228 /* For IPv6 addresses also check for a colon */
229 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':')) {
230 const uint64_t dlen = strlen(host);
231 const uint64_t want = dlen + Config.appendDomainLen;
232 if (want > SQUIDHOSTNAMELEN - 1) {
233 debugs(23, 2, "URL domain too large (" << dlen << " bytes)");
234 return false;
235 }
236 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - dlen - 1);
237 }
238 return true;
239}
240
241/*
242 * Parse a URI/URL.
243 *
244 * It is assumed that the URL is complete -
245 * ie, the end of the string is the end of the URL. Don't pass a partial
246 * URL here as this routine doesn't have any way of knowing whether
247 * it is partial or not (ie, it handles the case of no trailing slash as
248 * being "end of host with implied path of /".
249 *
250 * method is used to switch parsers. If method is Http::METHOD_CONNECT,
251 * then rather than a URL a hostname:port is looked for.
252 */
253bool
254AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
255{
256 try {
257
258 LOCAL_ARRAY(char, login, MAX_URL);
259 LOCAL_ARRAY(char, foundHost, MAX_URL);
260 LOCAL_ARRAY(char, urlpath, MAX_URL);
261 char *t = nullptr;
262 char *q = nullptr;
263 int foundPort;
264 int l;
265 int i;
266 const char *src;
267 char *dst;
268 foundHost[0] = urlpath[0] = login[0] = '\0';
269
270 if ((l = rawUrl.length()) + Config.appendDomainLen > (MAX_URL - 1)) {
271 debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
272 return false;
273 }
274
275 if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
276 Asterisk().cmp(rawUrl) == 0) {
277 // XXX: these methods might also occur in HTTPS traffic. Handle this better.
278 setScheme(AnyP::PROTO_HTTP, nullptr);
279 port(getScheme().defaultPort());
280 path(Asterisk());
281 return true;
282 }
283
284 Parser::Tokenizer tok(rawUrl);
285 AnyP::UriScheme scheme;
286
287 if (method == Http::METHOD_CONNECT) {
288 /*
289 * RFC 7230 section 5.3.3: authority-form = authority
290 * "excluding any userinfo and its "@" delimiter"
291 *
292 * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
293 *
294 * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
295 */
296 foundPort = 443;
297
298 // XXX: use tokenizer
299 auto B = tok.buf();
300 const char *url = B.c_str();
301
302 if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
303 if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
304 return false;
305
306 } else {
307
308 scheme = uriParseScheme(tok);
309
310 if (scheme == AnyP::PROTO_NONE)
311 return false; // invalid scheme
312
313 if (scheme == AnyP::PROTO_URN) {
314 parseUrn(tok); // throws on any error
315 return true;
316 }
317
318 // URLs then have "//"
319 static const SBuf doubleSlash("//");
320 if (!tok.skip(doubleSlash))
321 return false;
322
323 auto B = tok.remaining();
324 const char *url = B.c_str();
325
326 /* Parse the URL: */
327 src = url;
328 i = 0;
329
330 /* Then everything until first /; that's host (and port; which we'll look for here later) */
331 // bug 1881: If we don't get a "/" then we imply it was there
332 // bug 3074: We could just be given a "?" or "#". These also imply "/"
333 // bug 3233: whitespace is also a hostname delimiter.
334 for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
335 *dst = *src;
336 }
337
338 /*
339 * We can't check for "i >= l" here because we could be at the end of the line
340 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
341 * been -given- a valid URL and the path is just '/'.
342 */
343 if (i > l)
344 return false;
345 *dst = '\0';
346
347 // We are looking at path-abempty.
348 if (*src != '/') {
349 // path-empty, including the end of the `src` c-string cases
350 urlpath[0] = '/';
351 dst = &urlpath[1];
352 } else {
353 dst = urlpath;
354 }
355 /* Then everything from / (inclusive) until \r\n or \0 - that's urlpath */
356 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
357 *dst = *src;
358 }
359
360 /* We -could- be at the end of the buffer here */
361 if (i > l)
362 return false;
363 *dst = '\0';
364
365 // If the parsed scheme has no (known) default port, and there is no
366 // explicit port, then we will reject the zero port during foundPort
367 // validation, often resulting in a misleading 400/ERR_INVALID_URL.
368 // TODO: Remove this hack when switching to Tokenizer-based parsing.
369 foundPort = scheme.defaultPort().value_or(0); // may be reset later
370
371 /* Is there any login information? (we should eventually parse it above) */
372 t = strrchr(foundHost, '@');
373 if (t != nullptr) {
374 strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
375 login[sizeof(login)-1] = '\0';
376 t = strrchr(login, '@');
377 *t = 0;
378 strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
379 foundHost[sizeof(foundHost)-1] = '\0';
380 // Bug 4498: URL-unescape the login info after extraction
381 rfc1738_unescape(login);
382 }
383
384 /* Is there any host information? (we should eventually parse it above) */
385 if (*foundHost == '[') {
386 /* strip any IPA brackets. valid under IPv6. */
387 dst = foundHost;
388 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
389 src = foundHost;
390 ++src;
391 l = strlen(foundHost);
392 i = 1;
393 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
394 *dst = *src;
395 }
396
397 /* we moved in-place, so truncate the actual hostname found */
398 *dst = '\0';
399 ++dst;
400
401 /* skip ahead to either start of port, or original EOS */
402 while (*dst != '\0' && *dst != ':')
403 ++dst;
404 t = dst;
405 } else {
406 t = strrchr(foundHost, ':');
407
408 if (t != strchr(foundHost,':') ) {
409 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
410 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
411 /* therefore we MUST accept the case where they are not bracketed at all. */
412 t = nullptr;
413 }
414 }
415
416 // Bug 3183 sanity check: If scheme is present, host must be too.
417 if (scheme != AnyP::PROTO_NONE && foundHost[0] == '\0') {
418 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
419 return false;
420 }
421
422 if (t && *t == ':') {
423 *t = '\0';
424 ++t;
425 foundPort = atoi(t);
426 }
427 }
428
429 for (t = foundHost; *t; ++t)
430 *t = xtolower(*t);
431
432 if (stringHasWhitespace(foundHost)) {
434 t = q = foundHost;
435 while (*t) {
436 if (!xisspace(*t)) {
437 *q = *t;
438 ++q;
439 }
440 ++t;
441 }
442 *q = '\0';
443 }
444 }
445
446 debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
447
449 strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
450 debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
451 return false;
452 }
453
454 if (!urlAppendDomain(foundHost))
455 return false;
456
457 /* remove trailing dots from hostnames */
458 while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
459 foundHost[l] = '\0';
460
461 /* reject duplicate or leading dots */
462 if (strstr(foundHost, "..") || *foundHost == '.') {
463 debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
464 return false;
465 }
466
467 if (foundPort < 1 || foundPort > 65535) {
468 debugs(23, 3, "Invalid port '" << foundPort << "'");
469 return false;
470 }
471
472 if (stringHasWhitespace(urlpath)) {
473 debugs(23, 2, "URI has whitespace: {" << rawUrl << "}");
474
475 switch (Config.uri_whitespace) {
476
478 return false;
479
481 break;
482
484 t = rfc1738_escape_unescaped(urlpath);
485 xstrncpy(urlpath, t, MAX_URL);
486 break;
487
489 *(urlpath + strcspn(urlpath, w_space)) = '\0';
490 break;
491
493 default:
494 t = q = urlpath;
495 while (*t) {
496 if (!xisspace(*t)) {
497 *q = *t;
498 ++q;
499 }
500 ++t;
501 }
502 *q = '\0';
503 }
504 }
505
506 setScheme(scheme);
507 path(urlpath);
508 host(foundHost);
509 userInfo(SBuf(login));
510 port(foundPort);
511 return true;
512
513 } catch (...) {
514 debugs(23, 2, "error: " << CurrentException << " " << Raw("rawUrl", rawUrl.rawContent(), rawUrl.length()));
515 return false;
516 }
517}
518
533void
535{
536 static const auto nidChars = CharacterSet("NID","-") + CharacterSet::ALPHA + CharacterSet::DIGIT;
537 static const auto alphanum = (CharacterSet::ALPHA + CharacterSet::DIGIT).rename("alphanum");
538 SBuf nid;
539 if (!tok.prefix(nid, nidChars, 32))
540 throw TextException("NID not found", Here());
541
542 if (!tok.skip(':'))
543 throw TextException("NID too long or missing ':' delimiter", Here());
544
545 if (nid.length() < 2)
546 throw TextException("NID too short", Here());
547
548 if (!alphanum[*nid.begin()])
549 throw TextException("NID prefix is not alphanumeric", Here());
550
551 if (!alphanum[*nid.rbegin()])
552 throw TextException("NID suffix is not alphanumeric", Here());
553
554 setScheme(AnyP::PROTO_URN, nullptr);
555 host(nid.c_str());
556 // TODO validate path characters
557 path(tok.remaining());
558 debugs(23, 3, "Split URI into proto=urn, nid=" << nid << ", " << Raw("path",path().rawContent(),path().length()));
559}
560
561void
563{
564 absolute_.clear();
565 authorityHttp_.clear();
566 authorityWithPort_.clear();
567}
568
569SBuf &
570AnyP::Uri::authority(bool requirePort) const
571{
572 if (authorityHttp_.isEmpty()) {
573
574 // both formats contain Host/IP
575 authorityWithPort_.append(host());
576 authorityHttp_ = authorityWithPort_;
577
578 if (port().has_value()) {
579 authorityWithPort_.appendf(":%hu", *port());
580 // authorityHttp_ only has :port for known non-default ports
581 if (port() != getScheme().defaultPort())
582 authorityHttp_ = authorityWithPort_;
583 }
584 // else XXX: We made authorityWithPort_ that does not have a port.
585 // TODO: Audit callers and refuse to give out broken authorityWithPort_.
586 }
587
588 return requirePort ? authorityWithPort_ : authorityHttp_;
589}
590
591SBuf &
593{
594 if (absolute_.isEmpty()) {
595 // TODO: most URL will be much shorter, avoid allocating this much
596 absolute_.reserveCapacity(MAX_URL);
597
598 absolute_.append(getScheme().image());
599 absolute_.append(":",1);
600 if (getScheme() != AnyP::PROTO_URN) {
601 absolute_.append("//", 2);
602 const bool allowUserInfo = getScheme() == AnyP::PROTO_FTP ||
603 getScheme() == AnyP::PROTO_UNKNOWN;
604
605 if (allowUserInfo && !userInfo().isEmpty()) {
606 static const CharacterSet uiChars = CharacterSet(UserInfoChars())
607 .remove('%')
608 .rename("userinfo-reserved");
609 absolute_.append(Encode(userInfo(), uiChars));
610 absolute_.append("@", 1);
611 }
612 absolute_.append(authority());
613 } else {
614 absolute_.append(host());
615 absolute_.append(":", 1);
616 }
617 absolute_.append(path()); // TODO: Encode each URI subcomponent in path_ as needed.
618 }
619
620 return absolute_;
621}
622
623/* XXX: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
624 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
625 * and never copy the query-string part in the first place
626 */
627char *
629{
630 LOCAL_ARRAY(char, buf, MAX_URL);
631
632 snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(url));
633 buf[sizeof(buf)-1] = '\0';
634
635 // URN, CONNECT method, and non-stripped URIs can go straight out
636 if (Config.onoff.strip_query_terms && !(method == Http::METHOD_CONNECT || scheme == AnyP::PROTO_URN)) {
637 // strip anything AFTER a question-mark
638 // leaving the '?' in place
639 if (auto t = strchr(buf, '?')) {
640 *(++t) = '\0';
641 }
642 }
643
644 if (stringHasCntl(buf))
646
647 return buf;
648}
649
656const char *
658{
659 LOCAL_ARRAY(char, buf, MAX_URL);
660
661 // method CONNECT and port HTTPS
662 if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
663 snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
664 return buf;
665 }
666
667 // else do the normal complete canonical thing.
668 return request->canonicalCleanUrl();
669}
670
683bool
684urlIsRelative(const char *url)
685{
686 if (!url)
687 return false; // no URL
688
689 /*
690 * RFC 3986 section 5.2.3
691 *
692 * path = path-abempty ; begins with "/" or is empty
693 * / path-absolute ; begins with "/" but not "//"
694 * / path-noscheme ; begins with a non-colon segment
695 * / path-rootless ; begins with a segment
696 * / path-empty ; zero characters
697 */
698
699 if (*url == '\0')
700 return true; // path-empty
701
702 if (*url == '/') {
703 // RFC 3986 section 5.2.3
704 // path-absolute ; begins with "/" but not "//"
705 if (url[1] == '/')
706 return true; // network-path reference, aka. 'scheme-relative URI'
707 else
708 return true; // path-absolute, aka 'absolute-path reference'
709 }
710
711 for (const auto *p = url; *p != '\0' && *p != '/' && *p != '?' && *p != '#'; ++p) {
712 if (*p == ':')
713 return false; // colon is forbidden in first segment
714 }
715
716 return true; // path-noscheme, path-abempty, path-rootless
717}
718
719void
720AnyP::Uri::addRelativePath(const char *relUrl)
721{
722 // URN cannot be merged
723 if (getScheme() == AnyP::PROTO_URN)
724 return;
725
726 // TODO: Handle . and .. segment normalization
727
728 const auto lastSlashPos = path_.rfind('/');
729 // TODO: To optimize and simplify, add and use SBuf::replace().
730 const auto relUrlLength = strlen(relUrl);
731 if (lastSlashPos == SBuf::npos) {
732 // start replacing the whole path
733 path_.reserveCapacity(1 + relUrlLength);
734 path_.assign("/", 1);
735 } else {
736 // start replacing just the last segment
737 path_.reserveCapacity(lastSlashPos + 1 + relUrlLength);
738 path_.chop(0, lastSlashPos+1);
739 }
740 path_.append(relUrl, relUrlLength);
741}
742
743int
744matchDomainName(const char *h, const char *d, MatchDomainNameFlags flags)
745{
746 int dl;
747 int hl;
748
749 const bool hostIncludesSubdomains = (*h == '.');
750 while ('.' == *h)
751 ++h;
752
753 hl = strlen(h);
754
755 if (hl == 0)
756 return -1;
757
758 dl = strlen(d);
759
760 /*
761 * Start at the ends of the two strings and work towards the
762 * beginning.
763 */
764 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
765 if (hl == 0 && dl == 0) {
766 /*
767 * We made it all the way to the beginning of both
768 * strings without finding any difference.
769 */
770 return 0;
771 }
772
773 if (0 == hl) {
774 /*
775 * The host string is shorter than the domain string.
776 * There is only one case when this can be a match.
777 * If the domain is just one character longer, and if
778 * that character is a leading '.' then we call it a
779 * match.
780 */
781
782 if (1 == dl && '.' == d[0])
783 return 0;
784 else
785 return -1;
786 }
787
788 if (0 == dl) {
789 /*
790 * The domain string is shorter than the host string.
791 * This is a match only if the first domain character
792 * is a leading '.'.
793 */
794
795 if ('.' == d[0]) {
796 if (flags & mdnRejectSubsubDomains) {
797 // Check for sub-sub domain and reject
798 while(--hl >= 0 && h[hl] != '.');
799 if (hl < 0) {
800 // No sub-sub domain found, but reject if there is a
801 // leading dot in given host string (which is removed
802 // before the check is started).
803 return hostIncludesSubdomains ? 1 : 0;
804 } else
805 return 1; // sub-sub domain, reject
806 } else
807 return 0;
808 } else
809 return 1;
810 }
811 }
812
813 /*
814 * We found different characters in the same position (from the end).
815 */
816
817 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
818 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
819 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
820 if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
821 return 0;
822
823 /*
824 * If one of those character is '.' then its special. In order
825 * for splay tree sorting to work properly, "x-foo.com" must
826 * be greater than ".foo.com" even though '-' is less than '.'.
827 */
828 if ('.' == d[dl])
829 return 1;
830
831 if ('.' == h[hl])
832 return -1;
833
834 return (xtolower(h[hl]) - xtolower(d[dl]));
835}
836
837/*
838 * return true if we can serve requests for this method.
839 */
840bool
842{
843 /* protocol "independent" methods
844 *
845 * actually these methods are specific to HTTP:
846 * they are methods we receive on our HTTP port,
847 * and if we had a FTP listener would not be relevant
848 * there.
849 *
850 * So, we should delegate them to HTTP. The problem is that we
851 * do not have a default protocol from the client side of HTTP.
852 */
853
855 return true;
856
857 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
858 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
861
862 if (r->method == Http::METHOD_PURGE)
863 return true;
864
865 /* does method match the protocol? */
866 switch (r->url.getScheme()) {
867
868 case AnyP::PROTO_URN:
869 case AnyP::PROTO_HTTP:
871 return true;
872
873 case AnyP::PROTO_FTP:
874 if (r->method == Http::METHOD_PUT ||
875 r->method == Http::METHOD_GET ||
877 return true;
878 return false;
879
880 case AnyP::PROTO_WAIS:
882 if (r->method == Http::METHOD_GET ||
884 return true;
885 return false;
886
888#if USE_OPENSSL || USE_GNUTLS
889 return true;
890#else
891 /*
892 * Squid can't originate an SSL connection, so it should
893 * never receive an "https:" URL. It should always be
894 * CONNECT instead.
895 */
896 return false;
897#endif
898
899 default:
900 return false;
901 }
902
903 /* notreached */
904 return false;
905}
906
908 scheme_(aScheme),
909 hostIsNumeric_(false)
910{
911 *host_=0;
912}
913
914// TODO: fix code duplication with AnyP::Uri::parse()
915char *
916AnyP::Uri::cleanup(const char *uri)
917{
918 char *cleanedUri = nullptr;
919 switch (Config.uri_whitespace) {
922 cleanedUri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
923 break;
924 }
925
928 break;
929
930 case URI_WHITESPACE_CHOP: {
931 const auto pos = strcspn(uri, w_space);
932 char *choppedUri = nullptr;
933 if (pos < strlen(uri))
934 choppedUri = xstrndup(uri, pos + 1);
935 cleanedUri = xstrndup(rfc1738_do_escape(choppedUri ? choppedUri : uri,
937 cleanedUri[pos] = '\0';
938 xfree(choppedUri);
939 break;
940 }
941
944 default: {
945 // TODO: avoid duplication with urlParse()
946 const char *t;
947 char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
948 char *q = tmp_uri;
949 t = uri;
950 while (*t) {
951 if (!xisspace(*t)) {
952 *q = *t;
953 ++q;
954 }
955 ++t;
956 }
957 *q = '\0';
958 cleanedUri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
959 xfree(tmp_uri);
960 break;
961 }
962 }
963
964 assert(cleanedUri);
965 return cleanedUri;
966}
967
#define Here()
source code location of the caller
Definition: Here.h:15
#define SQUIDSBUFPH
Definition: SBuf.h:31
#define SQUIDSBUFPRINT(s)
Definition: SBuf.h:32
class SquidConfig Config
Definition: SquidConfig.cc:12
int stringHasWhitespace(const char *)
Definition: String.cc:366
int stringHasCntl(const char *)
Definition: String.cc:373
std::ostream & CurrentException(std::ostream &os)
prints active (i.e., thrown but not yet handled) exception
bool urlCheckRequest(const HttpRequest *r)
Definition: Uri.cc:841
static const char valid_hostname_chars[]
Definition: Uri.cc:27
static const char valid_hostname_chars_u[]
Definition: Uri.cc:21
bool urlIsRelative(const char *url)
Definition: Uri.cc:684
void urlInitialize(void)
Definition: Uri.cc:137
int matchDomainName(const char *h, const char *d, MatchDomainNameFlags flags)
Definition: Uri.cc:744
char * urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme)
Definition: Uri.cc:628
static AnyP::UriScheme uriParseScheme(Parser::Tokenizer &tok)
Definition: Uri.cc:188
static const CharacterSet & UserInfoChars()
Characters which are valid within a URI userinfo section.
Definition: Uri.cc:36
const char * urlCanonicalFakeHttps(const HttpRequest *request)
Definition: Uri.cc:657
bool urlAppendDomain(char *host)
apply append_domain config to the given hostname
Definition: Uri.cc:225
MatchDomainNameFlags
Definition: Uri.h:225
@ mdnRejectSubsubDomains
Definition: Uri.h:228
@ mdnHonorWildcards
Definition: Uri.h:227
#define assert(EX)
Definition: assert.h:17
static AnyP::ProtocolType FindProtocolType(const SBuf &)
Definition: UriScheme.cc:52
Port defaultPort() const
Definition: UriScheme.cc:71
SBuf image() const
Definition: UriScheme.h:57
static const SBuf & SlashPath()
the static '/' default URL-path
Definition: Uri.cc:92
void parseUrn(Parser::Tokenizer &)
Definition: Uri.cc:534
AnyP::UriScheme const & getScheme() const
Definition: Uri.h:67
void touch()
clear the cached URI display forms
Definition: Uri.cc:562
SBuf & authority(bool requirePort=false) const
Definition: Uri.cc:570
void path(const char *p)
Definition: Uri.h:101
char host_[SQUIDHOSTNAMELEN]
string representation of the URI authority name or IP
Definition: Uri.h:176
const char * host(void) const
Definition: Uri.h:85
Uri()
Definition: Uri.h:35
static char * cleanup(const char *uri)
Definition: Uri.cc:916
void addRelativePath(const char *relUrl)
Definition: Uri.cc:720
SBuf & absolute() const
Definition: Uri.cc:592
static const SBuf & Asterisk()
the static '*' pseudo-URI
Definition: Uri.cc:85
void port(const Port p)
reset authority port subcomponent
Definition: Uri.h:95
const SBuf & path() const
Definition: Uri.cc:125
void host(const char *src)
Definition: Uri.cc:99
bool parse(const HttpRequestMethod &, const SBuf &url)
Definition: Uri.cc:254
SBuf hostOrIp() const
Definition: Uri.cc:114
static SBuf Encode(const SBuf &, const CharacterSet &expected)
Definition: Uri.cc:56
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:18
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
Definition: CharacterSet.h:61
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
CharacterSet & remove(const unsigned char c)
remove a given character from the character set
Definition: CharacterSet.cc:54
int64_t getInt64(Http::HdrType id) const
Definition: HttpHeader.cc:1166
HttpRequestMethod method
Definition: HttpRequest.h:114
char * canonicalCleanUrl() const
Definition: HttpRequest.cc:817
AnyP::Uri url
the request URI
Definition: HttpRequest.h:115
HttpHeader header
Definition: Message.h:74
bool prefix(SBuf &returnedToken, const CharacterSet &tokenChars, SBuf::size_type limit=SBuf::npos)
Definition: Tokenizer.cc:79
const SBuf & remaining() const
the remaining unprocessed section of buffer
Definition: Tokenizer.h:44
bool atEnd() const
whether the end of the buffer has been reached
Definition: Tokenizer.h:41
bool skip(const SBuf &tokenToSkip)
Definition: Tokenizer.cc:177
Definition: Raw.h:21
Definition: SBuf.h:94
const char * rawContent() const
Definition: SBuf.cc:509
static const size_type npos
Definition: SBuf.h:99
char at(size_type pos) const
Definition: SBuf.h:249
const char * c_str()
Definition: SBuf.cc:516
void reserveCapacity(size_type minCapacity)
Definition: SBuf.cc:105
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:415
SBuf & appendf(const char *fmt,...) PRINTF_FORMAT_ARG2
Definition: SBuf.cc:229
bool isEmpty() const
Definition: SBuf.h:431
const_iterator begin() const
Definition: SBuf.h:583
SBuf & append(const SBuf &S)
Definition: SBuf.cc:185
const_reverse_iterator rbegin() const
Definition: SBuf.h:591
void reserveSpace(size_type minSpace)
Definition: SBuf.h:440
size_t appendDomainLen
Definition: SquidConfig.h:221
int strip_query_terms
Definition: SquidConfig.h:298
struct SquidConfig::@106 onoff
char * appendDomain
Definition: SquidConfig.h:220
int uri_whitespace
Definition: SquidConfig.h:455
int check_hostnames
Definition: SquidConfig.h:314
int allow_underscore
Definition: SquidConfig.h:315
an std::runtime_error with thrower location info
Definition: TextException.h:21
#define w_space
#define MYNAME
Definition: Stream.h:236
#define DBG_IMPORTANT
Definition: Stream.h:38
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:194
#define URI_WHITESPACE_CHOP
Definition: defines.h:129
#define URI_WHITESPACE_STRIP
Definition: defines.h:126
#define URI_WHITESPACE_DENY
Definition: defines.h:130
#define URI_WHITESPACE_ALLOW
Definition: defines.h:127
#define URI_WHITESPACE_ENCODE
Definition: defines.h:128
#define MAX_URL
Definition: defines.h:78
static int port
Definition: ldap_backend.cc:70
#define MAX_IPSTRLEN
Length of buffer that needs to be allocated to old a null-terminated IP-string.
Definition: forward.h:25
static uint32 B
Definition: md4.c:43
const char * ProtocolType_str[]
@ PROTO_NONE
Definition: ProtocolType.h:24
@ PROTO_HTTPS
Definition: ProtocolType.h:27
@ PROTO_UNKNOWN
Definition: ProtocolType.h:42
@ PROTO_HTTP
Definition: ProtocolType.h:25
@ PROTO_FTP
Definition: ProtocolType.h:26
@ PROTO_WHOIS
Definition: ProtocolType.h:37
@ PROTO_MAX
Definition: ProtocolType.h:43
@ PROTO_CACHE_OBJECT
Definition: ProtocolType.h:31
@ PROTO_URN
Definition: ProtocolType.h:36
@ PROTO_WAIS
Definition: ProtocolType.h:30
@ METHOD_TRACE
Definition: MethodType.h:30
@ METHOD_PUT
Definition: MethodType.h:27
@ METHOD_OPTIONS
Definition: MethodType.h:31
@ METHOD_CONNECT
Definition: MethodType.h:29
@ METHOD_GET
Definition: MethodType.h:25
@ METHOD_PURGE
Definition: MethodType.h:92
@ METHOD_HEAD
Definition: MethodType.h:28
#define xfree
#define xmalloc
#define RFC1738_ESCAPE_NOSPACE
Definition: rfc1738.h:22
char * rfc1738_do_escape(const char *url, int flags)
Definition: rfc1738.c:56
#define RFC1738_ESCAPE_UNESCAPED
Definition: rfc1738.h:25
#define rfc1738_escape_unescaped(x)
Definition: rfc1738.h:59
void rfc1738_unescape(char *url)
Definition: rfc1738.c:146
#define SQUIDHOSTNAMELEN
Definition: rfc2181.h:30
#define LOCAL_ARRAY(type, name, size)
Definition: squid.h:68
Definition: parse.c:160
#define xisspace(x)
Definition: xis.h:15
#define xtolower(x)
Definition: xis.h:17
char * xstrncpy(char *dst, const char *src, size_t n)
Definition: xstring.cc:37
char * xstrndup(const char *s, size_t n)
Definition: xstring.cc:56

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors