This version of connection-retry patch for Squid 1.1.20 was modify from Mike Pelletier's patch for Squid 1.1.14. I don't know if there has any error on it.But it seems run smoothly on my site.Follow are the message in original patch. Modify by Erick C. Chang This version of the connection-retry patch for Squid 1.1.14 has only very minor differences from the "+cruft" version. In trying to get a patch to fix the Linux and SunOS problems out quickly, I left a spurious variable in the code, and this patch simply removes it and changes the way the values are transferred, shrinking the executable at my site by a hundred bytes or so. It also changes the debug() items in my parts of the code to display the "FD" value in a more standard way, as "function: FD %d" rather than "function(%d):". This makes it easier to sort through logfile messages, as I discovered. Questions & Problems: Mike Pelletier --- ./include/version.h.orig Mon Jul 14 13:40:10 1997 +++ ./include/version.h Thu Jul 31 10:45:25 1997 @@ -3,7 +3,7 @@ * SQUID_VERSION - String for version id of this distribution */ #ifndef SQUID_VERSION -#define SQUID_VERSION "1.1.20" +#define SQUID_VERSION "1.1.20+retry" #endif #ifndef SQUID_RELEASE_TIME --- ./src/comm.h.orig Sat May 31 19:14:01 1997 +++ ./src/comm.h Thu Jul 31 12:30:53 1997 @@ -143,9 +143,9 @@ struct sockaddr_in S; CCH callback; void *data; -#if RETRY_CONNECT - int tries; -#endif + unsigned char tries; + unsigned char addrcount; + int connstart; } ConnectStateData; typedef struct fde { @@ -172,6 +172,7 @@ unsigned int comm_type; time_t stall_until; /* don't select for read until this time reached */ RWStateData *rwstate; /* State data for comm_write */ + time_t conntimeout; /* Multi-addr retry connect timeout */ } FD_ENTRY; extern FD_ENTRY *fd_table; --- ./src/ipcache.c.orig Mon Jun 23 15:02:35 1997 +++ ./src/ipcache.c Thu Jul 31 10:45:25 1997 @@ -246,6 +246,7 @@ } if (i->status == IP_CACHED) { safe_free(i->addrs.in_addrs); + safe_free(i->addrs.bad_addrs); debug(14, 5, "ipcache_release: Released IP cached record for '%s'.\n", i->name); } @@ -415,14 +416,19 @@ int addr_count = 0; int k; safe_free(i->addrs.in_addrs); + safe_free(i->addrs.bad_addrs); while ((addr_count < 255) && *(hp->h_addr_list + addr_count)) ++addr_count; i->addrs.count = (unsigned char) addr_count; i->addrs.in_addrs = xcalloc(addr_count, sizeof(struct in_addr)); - for (k = 0; k < addr_count; k++) + i->addrs.bad_addrs = xcalloc(addr_count, sizeof(unsigned char)); + i->addrs.badcount = 0; + for (k = 0; k < addr_count; k++) { xmemcpy(&i->addrs.in_addrs[k].s_addr, *(hp->h_addr_list + k), hp->h_length); + i->addrs.bad_addrs[k] = FALSE; + } } static ipcache_entry * @@ -514,8 +520,10 @@ i.addrs.count = (unsigned char) ipcount; if (ipcount == 0) { i.addrs.in_addrs = NULL; + i.addrs.bad_addrs = NULL; } else { i.addrs.in_addrs = xcalloc(ipcount, sizeof(struct in_addr)); + i.addrs.bad_addrs = xcalloc(ipcount, sizeof(unsigned char)); } for (k = 0; k < ipcount; k++) { if ((token = strtok(NULL, w_space)) == NULL) @@ -771,6 +779,7 @@ ip_table = hash_create(urlcmp, 229, hash4); /* small hash table */ memset(&static_addrs, '\0', sizeof(ipcache_addrs)); static_addrs.in_addrs = xcalloc(1, sizeof(struct in_addr)); + static_addrs.bad_addrs = xcalloc(1, sizeof(unsigned char)); ipcache_high = (long) (((float) Config.ipcache.size * (float) Config.ipcache.high) / (float) 100); @@ -852,6 +861,8 @@ /* only dnsHandleRead() can change from DISPATCHED to CACHED */ static_addrs.count = 1; static_addrs.cur = 0; + static_addrs.badcount = 0; + static_addrs.bad_addrs[0] = FALSE; xmemcpy(&static_addrs.in_addrs[0].s_addr, *(hp->h_addr_list), hp->h_length); @@ -883,15 +894,17 @@ ipcacheStatPrint(ipcache_entry * i, StoreEntry * sentry) { int k; - storeAppendPrintf(sentry, " {%-32.32s %c%c %6d %6d %d", + storeAppendPrintf(sentry, " {%-32.32s %c%c %6d %6d %2d(%2d)", i->name, ipcache_status_char[i->status], i->locks ? 'L' : ' ', (int) (squid_curtime - i->lastref), (int) (i->expires - squid_curtime), - (int) i->addrs.count); + (int) i->addrs.count, + (int) i->addrs.badcount); for (k = 0; k < (int) i->addrs.count; k++) - storeAppendPrintf(sentry, " %15s", inet_ntoa(i->addrs.in_addrs[k])); + storeAppendPrintf(sentry, " %15s-%3s", inet_ntoa(i->addrs.in_addrs[k]), + i->addrs.bad_addrs[k] ? "BAD" : "OK "); storeAppendPrintf(sentry, close_bracket); } @@ -1002,6 +1015,8 @@ static_addrs.count = 1; static_addrs.cur = 0; static_addrs.in_addrs[0].s_addr = ip.s_addr; + static_addrs.bad_addrs[0] = FALSE; + static_addrs.badcount = 0; return &static_addrs; } @@ -1032,20 +1047,65 @@ ipcache_release(i); } +/* Cycles to next not-marked-bad address, or next address if all are bad */ void ipcacheCycleAddr(const char *name) { ipcache_entry *i; + ipcache_addrs *ia; + unsigned char fullcircle; if ((i = ipcache_get(name)) == NULL) return; if (i->status != IP_CACHED) return; - if (++i->addrs.cur == i->addrs.count) - i->addrs.cur = 0; + ia = &i->addrs; + fullcircle = ia->cur; + while (ia->bad_addrs[ia->cur]) { + if (++ia->cur == ia->count) ia->cur = 0; + if (ia->cur == fullcircle) { /* All bad, just use next one */ + if (++ia->cur == ia->count) ia->cur = 0; + break; + } + } +} + +/* "MarkBad" function must leave the "cur" pointer at the next + * available good address, or the next bad address, in the list. + * This simulates the functionality of RemoveBadAddr() which it + * replaces. Marking, instead of removing, allows bad addresses + * to be retried as a last resort before returning an error to + * the user. + */ +void +ipcacheMarkBadAddr(const char *name, struct in_addr addr) +{ + ipcache_entry *i; + ipcache_addrs *ia; + int k; + if ((i = ipcache_get(name)) == NULL) + return; + ia = &i->addrs; + for (k = 0; k < (int) ia->count; k++) { + if (ia->in_addrs[k].s_addr == addr.s_addr) + break; + } + if (k == (int) ia->count) + return; + if (!ia->bad_addrs[k]) { + ia->bad_addrs[k] = TRUE; ia->badcount++; + debug(14, 1, "%s(%s) marked bad\n", name, inet_ntoa(ia->in_addrs[k])); + if (ia->badcount != ia->count) { /* at least one good address left */ + i->expires = squid_curtime + Config.positiveDnsTtl; + while (ia->bad_addrs[ia->cur]) + if (++ia->cur == ia->count) ia->cur = 0; + return; + } + } + if (++ia->cur == ia->count) ia->cur = 0; } void -ipcacheRemoveBadAddr(const char *name, struct in_addr addr) +ipcacheMarkGoodAddr(const char *name, struct in_addr addr) { ipcache_entry *i; ipcache_addrs *ia; @@ -1059,11 +1119,12 @@ } if (k == (int) ia->count) return; - ia->in_addrs[k] = ia->in_addrs[--ia->count]; - if (ia->count == 0) - i->expires = squid_curtime; - if (ia->cur >= ia->count) - ia->cur = 0; + i->expires = squid_curtime + Config.positiveDnsTtl; + if (ia->bad_addrs[k]) { + ia->bad_addrs[k] = FALSE; ia->badcount--; + i->expires = squid_curtime + Config.positiveDnsTtl; + debug(14, 1, "%s(%s) marked good\n", name, inet_ntoa(ia->in_addrs[k])); + } } void @@ -1083,6 +1144,7 @@ for (j = 0; j < k; j++) { i = *(list + j); safe_free(i->addrs.in_addrs); + safe_free(i->addrs.bad_addrs); safe_free(i->name); safe_free(i->error_message); safe_free(i); --- ./src/ipcache.h.orig Mon Apr 21 17:44:41 1997 +++ ./src/ipcache.h Thu Jul 31 10:45:25 1997 @@ -121,6 +121,8 @@ typedef struct { unsigned char count; unsigned char cur; + unsigned char badcount; + unsigned char *bad_addrs; struct in_addr *in_addrs; } ipcache_addrs; @@ -155,7 +157,8 @@ extern int ipcacheQueueDrain _PARAMS((void)); extern void ipcacheOpenServers _PARAMS((void)); extern void ipcacheCycleAddr _PARAMS((const char *name)); -extern void ipcacheRemoveBadAddr _PARAMS((const char *name, struct in_addr)); +extern void ipcacheMarkBadAddr _PARAMS((const char *name, struct in_addr)); +extern void ipcacheMarkGoodAddr _PARAMS((const char *name, struct in_addr)); extern void ipcacheFreeMemory _PARAMS((void)); extern ipcache_addrs *ipcacheCheckNumeric _PARAMS((const char *name)); extern void ipcache_restart _PARAMS((void)); --- ./src/errorpage.c.orig Thu Jul 10 21:09:39 1997 +++ ./src/errorpage.c Thu Jul 31 10:45:26 1997 @@ -91,7 +91,7 @@ "Client(s) dropped connection before transmission completed.\nObject fetching is aborted.",}, {"ERR_CONNECT_FAIL", "Connection Failed", - "The remote site or server may be down. Please try again soon."}, + "The remote site or server may be busy or down. Please try again later."}, {"ERR_INVALID_REQUEST", "Invalid HTTP request", "Please double check it, or ask for assistance."}, @@ -106,7 +106,7 @@ "The cache is currently very busy. Please try again."}, {"ERR_DNS_FAIL", "DNS name lookup failure", - "The named host probably does not exist."}, + "The named host does not exist or its address can't be found."}, {"ERR_NOT_IMPLEMENTED", "Protocol Not Supported", "The cache does not know about the requested protocol."}, --- ./src/squid.conf.pre.in.orig Wed Jul 9 12:47:59 1997 +++ ./src/squid.conf.pre.in Thu Jul 31 10:45:26 1997 @@ -1223,3 +1223,34 @@ # it is probably okay to set this to 'on'. # #icp_hit_stale off + +# TAG: minimum_retry_timeout +# This specifies the minimum connect timeout, for when the +# connect timeout is reduced to compensate for the availability of +# multiple IP addresses. +# +# When a connection to a host is initiated, and that host has +# several IP addresses, the default connection timeout is +# reduced by dividing it by the number of addresses. So, +# a site with 15 addresses would then have a timeout of 8 +# seconds for each address attempted. To avoid having the +# timeout reduced to the point where even a working host +# would not have a chance to respond, this setting is +# provided. The default, and the minimum value, is five +# seconds, and the maximum value is sixty seconds, or half of +# connect_timeout, whichever is greater and less than +# connect_timeout. +# +#minimum_retry_timeout 5 + +# TAG: maximum_single_addr_tries +# This sets the maximum number of connection attempts for a +# host that only has one address (for multiple-address hosts, +# each address is tried once). +# +# The default value is three tries, the (not recommended) +# maximum is 255 tries. A warning message will be generated if +# it is set to a value greater than ten. +# +#maximum_single_addr_tries 3 + --- ./src/cache_cf.h.orig Wed Jul 9 12:47:57 1997 +++ ./src/cache_cf.h Thu Jul 31 10:45:26 1997 @@ -279,6 +279,10 @@ int query_icmp; int icp_hit_stale; } Options; + struct { + int min_timeout; + int max_single_addr; + } Retry; char *fake_ua; }; --- ./src/cache_cf.c.orig Wed Jul 9 12:47:56 1997 +++ ./src/cache_cf.c Thu Jul 31 10:45:27 1997 @@ -136,6 +136,9 @@ #define DefaultRedirectChildren 5 /* 5 processes */ #define DefaultMaxRequestSize (100 << 10) /* 100Kb */ +#define DefaultSingleAddrTries 3 +#define DefaultMinRetryTimeout 5 + #define DefaultHttpPortNum CACHE_HTTP_PORT #define DefaultIcpPortNum CACHE_ICP_PORT @@ -1390,6 +1393,10 @@ parseIntegerValue(&Config.Netdb.low); else if (!strcmp(token, "netdb_ping_period")) parseTimeLine(&Config.Netdb.period, "seconds"); + else if (!strcmp(token, "minimum_retry_timeout")) + parseTimeLine(&Config.Retry.min_timeout, "seconds"); + else if (!strcmp(token, "maximum_single_addr_tries")) + parseIntegerValue(&Config.Retry.max_single_addr); /* If unknown, treat as a comment line */ else { @@ -1400,6 +1407,42 @@ } /* Sanity checks */ +/* If connect_timeout is shorter than the default, don't bug the admin + * with this message unless retry timeout is greater than connect timeout. */ + if (Config.Retry.min_timeout > Config.connectTimeout / 2 + && (Config.Retry.min_timeout > 60 + || Config.Retry.min_timeout >= Config.connectTimeout)) { + printf("WARNING: minimum_retry_timeout is more than half of connect_timeout\n"); + printf(" This can cause very long waits for multi-address host retries.\n"); + printf(" Resetting half of connect_timeout (%d seconds).\n", + Config.connectTimeout / 2); + Config.Retry.min_timeout = Config.connectTimeout / 2; + fflush(stdout); + } +/* Make sure we, or they, didn't reset it to below five seconds */ + if (Config.Retry.min_timeout < 5) { + printf("WARNING: minimum_retry_timeout is less than five seconds.\n"); + printf(" This can cause spurious timeouts on multi-address hosts.\n"); + printf(" Resetting to 5 seconds.\n"); + Config.Retry.min_timeout = 5; + fflush(stdout); + } + if (Config.Retry.max_single_addr > 255) { /* value is used in uchar vars */ + printf("WARNING: maximum_single_addr_tries set to a bad value: %d\n", + Config.Retry.max_single_addr); + printf(" Setting it to the maximum (255).\n"); + Config.Retry.max_single_addr = 255; + fflush(stdout); + } + if (Config.Retry.max_single_addr > 10) { + printf("WARNING: maximum_single_addr_tries is larger than 10.\n"); + printf(" This can cause increased network traffic and very\n"); + printf(" long delays to display an error for a nonexistant,\n"); + printf(" busy, or unavailable site.\n"); + fflush(stdout); + } else if (Config.Retry.max_single_addr < 1) + Config.Retry.max_single_addr = 1; + if (Config.lifetimeDefault < Config.readTimeout) { printf("WARNING: client_lifetime (%d seconds) is less than read_timeout (%d seconds).\n", Config.lifetimeDefault, Config.readTimeout); @@ -1656,3 +1656,5 @@ Config.Options.enable_purge = DefaultOptionsEnablePurge; Config.Options.client_db = DefaultOptionsClientDb; Config.Options.query_icmp = DefaultOptionsQueryIcmp; + Config.Retry.min_timeout = DefaultMinRetryTimeout; + Config.Retry.max_single_addr = DefaultSingleAddrTries; --- ./src/comm.c.orig Mon Dec 1 15:09:41 1997 +++ ./src/comm.c Thu Jan 15 17:38:01 1998 @@ -149,7 +149,10 @@ static void commSetTcpNoDelay _PARAMS((int)); #endif static void commSetTcpRcvbuf _PARAMS((int, int)); +static void commSetConntimeo _PARAMS((int, long)); static void commConnectFree _PARAMS((int fd, void *data)); +static int commResetFd _PARAMS((int, ConnectStateData *)); +static long commBackoffTimeout _PARAMS((unsigned char)); static void commConnectHandle _PARAMS((int fd, void *data)); static void commHandleWrite _PARAMS((int fd, RWStateData * state)); @@ -339,30 +342,41 @@ xfree(cs); } +/* Resets an FD so that it can be re-connect()ed */ static int -commRetryConnect(int fd, ConnectStateData * connectState) +commResetFd(int fd, ConnectStateData *cs) { -#if RETRY_CONNECT - int fd2; - if (++connectState->tries == 4) - return 0; - fd2 = socket(AF_INET, SOCK_STREAM, 0); - if (fd2 < 0) { - debug(5, 0, "commRetryConnect: socket: %s\n", xstrerror()); - return 0; - } - if (dup2(fd2, fd) < 0) { - debug(5, 0, "commRetryConnect: dup2: %s\n", xstrerror()); - return 0; + int fd2; + fd2 = socket(AF_INET, SOCK_STREAM, 0); + if (fd2 < 0) { + debug(50,0,"commResetFd: socket: %s\n", xstrerror()); + return FALSE; + } + if (dup2(fd2, fd) < 0) { + debug(50,0,"commResetFd: dup2: %s\n", xstrerror()); + return FALSE; + } + close(fd2); + commSetConntimeo(fd, commBackoffTimeout(cs->addrcount)); + commSetNonBlocking(fd); + debug(50,10,"commResetFd: fd %d reset sucessfully\n", fd); + return TRUE; +} + +/* Back off the socket timeout if there's several addresses available */ +static time_t +commBackoffTimeout(unsigned char numaddrs) +{ + time_t timeout; + timeout = (time_t)Config.connectTimeout; + if (numaddrs > 2) { + timeout = (time_t)(Config.connectTimeout / numaddrs); + if (timeout < Config.Retry.min_timeout) + timeout = (time_t)Config.Retry.min_timeout; } - commSetNonBlocking(fd); - close(fd2); - return 1; -#else - debug(5, 2, "commRetryConnect not supported\n"); - return 0; -#endif + return timeout; } + /* Connect SOCK to specified DEST_PORT at DEST_HOST. */ static void @@ -382,7 +396,11 @@ } connectState->S.sin_family = AF_INET; connectState->S.sin_addr = ia->in_addrs[ia->cur]; + ipcacheCycleAddr(connectState->host); connectState->S.sin_port = htons(connectState->port); + connectState->addrcount = ia->count; + connectState->connstart = getCurrentTime(); + commSetConntimeo(fd, commBackoffTimeout(ia->count)); if (Config.Log.log_fqdn) fqdncache_gethostbyaddr(connectState->S.sin_addr, FQDN_LOOKUP_IF_MISS); } @@ -397,22 +415,58 @@ case COMM_OK: if (vizSock > -1) vizHackSendPkt(&connectState->S, 2); - ipcacheCycleAddr(connectState->host); + ipcacheMarkGoodAddr(connectState->host, connectState->S.sin_addr); + if (connectState->tries > 0) + if (safe_inet_addr(connectState->host, NULL)) /* numeric? */ + debug(5,1, + "commConnectHandle: FD %d %s conn succeeded (try %d)\n", + fd, + connectState->host, + connectState->tries + 1); + else + debug(5,1, + "commConnectHandle: FD %d %s[%s] conn suceeded (try %d)\n", + fd, + connectState->host, + inet_ntoa(connectState->S.sin_addr), + connectState->tries + 1); connectState->callback(fd, COMM_OK, connectState->data); break; default: - if (commRetryConnect(fd, connectState)) { - debug(5, 1, "Retrying connection to %s\n", connectState->host); - connectState->S.sin_addr.s_addr = 0; - ipcacheCycleAddr(connectState->host); + connectState->tries++; + ipcacheMarkBadAddr(connectState->host, connectState->S.sin_addr); + getCurrentTime(); + /* Retry single addr hosts three times, but only if the connect time + * was smaller than half of the default connect timeout. Timeouts + * on multi-addr hosts *are* retried, however */ + if ((connectState->addrcount == 1 + && connectState->tries < Config.Retry.max_single_addr + && (squid_curtime - connectState->connstart) < /* conn time */ + (int)(Config.connectTimeout / 2)) + || (connectState->addrcount > 1 + && connectState->tries <= connectState->addrcount)) { + commResetFd(fd, connectState); + if (connectState->addrcount == 1) /* set min timeout for retry */ + commSetConntimeo(fd, commBackoffTimeout((unsigned char)100)); + if (safe_inet_addr(connectState->host, NULL)) /* numeric? */ + debug(5,1,"commConnectHandle: FD %d %s conn fail, retrying\n", + fd, + connectState->host); + else + debug(5,1, + "commConnectHandle: FD %d %s[%s] conn fail, retrying\n", + fd, + connectState->host, + inet_ntoa(connectState->S.sin_addr)); + connectState->S.sin_addr.s_addr = (long)0; commConnectHandle(fd, connectState); + break; } else { - ipcacheRemoveBadAddr(connectState->host, connectState->S.sin_addr); if (Config.Options.test_reachability) netdbDeleteAddrNetwork(connectState->S.sin_addr); connectState->callback(fd, COMM_ERROR, connectState->data); + break; } - break; } } @@ -455,14 +509,12 @@ if (connect(sock, (struct sockaddr *) address, sizeof(struct sockaddr_in)) < 0) { cerrno = errno; switch (cerrno) { - case EALREADY: - return COMM_ERROR; - /* NOTREACHED */ #if EAGAIN != EWOULDBLOCK case EAGAIN: #endif case EWOULDBLOCK: case EINPROGRESS: + case EALREADY: status = COMM_INPROGRESS; break; case EISCONN: @@ -486,11 +538,12 @@ /* set the lifetime for this client */ if (status == COMM_OK) { lft = comm_set_fd_lifetime(sock, Config.lifetimeDefault); - debug(5, 10, "comm_connect_addr: FD %d connected to %s:%d, lifetime %d.\n", + debug(5,10,"comm_connect_addr: FD %d connected to %s:%d, lifetime %d.\n", sock, conn->ipaddr, conn->remote_port, lft); } else if (status == COMM_INPROGRESS) { - lft = comm_set_fd_lifetime(sock, Config.connectTimeout); - debug(5, 10, "comm_connect_addr: FD %d connection pending, lifetime %d\n", + /* For multi-addr hosts, the timeout is reduced in commConnectHandle */ + lft = comm_set_fd_lifetime(sock, (int)conn->conntimeout); + debug(5,10,"comm_connect_addr: FD %d connection pending, lifetime %d\n", sock, lft); } /* Add new socket to list of open sockets. */ @@ -1263,6 +1316,14 @@ fd, size, xstrerror()); } +static void +commSetConntimeo(int fd, time_t size) +{ + FD_ENTRY *fde = &fd_table[fd]; + fde->conntimeout = size; +} + + int commSetNonBlocking(int fd) { @@ -1481,7 +1542,6 @@ } else { debug(5, 5, "checkLifetimes: FD %d: No handlers, calling comm_close()\n", fd); comm_close(fd); - comm_cleanup_fd_entry(fd); } if (fde->openned) { /* still opened */