Re: new patch & new ideas (squid+epoll)

From: David Nicklay <dnicklay@dont-contact.us>
Date: Fri, 07 Nov 2003 11:36:44 -0500

Hi,

oops. Forgot to post the attachment. Here goes.

Henrik Nordstrom wrote:
> Can we please have the patch clean again.. (preferably as a MIME
> attachment). I don't seem to have the original message left in my
> archives, and trying to recover the patch from the inline discussion is a
> bit cumbersome due to linewraps and other destructive email formatting
> effects..
>
> Regards
> Henrik
>
>
> On Thu, 6 Nov 2003, David Nicklay wrote:
>
>
>>Hi,
>>
>>I finally took some time to test this. Gonzalo's patch seems to address
>> a lot of outstanding issues with comm_epoll including:
>>
>>- CPU usage problem w/ idle clients (both local and remote)
>>- epoll kernel table being out of sync with fde table
>>- missed epoll table updates (happens on my boxes but not Gonzalo's)
>>
>>Could someone apply this to the squid-3.0 HEAD?
>>
>>Kudos to Gonzalo.
>>
>>
>>>Gonzalo Arana wrote:
>>>
>>>
>>>>Hi,
>>>>
>>>>(yes, it's me again :-( ).
>>>>
>>>>I have this new patch which
>>>>
>>>>1) issues epoll_ctl(EPOLL_CTL_(ADD|MOD|DEL)) according to handler ==
>>>>NULL or not AND based also on past epoll monitoring state (in
>>>>commSetSelect).
>>>>
>>>>2) does some profiling in case it was defined on compile time
>>>>(PROF_(start|stop) ), and statistics counters are updated (haven't been
>>>>checked).
>>>>
>>>>3) removes interest when no handler is defined for event (but
>>>>read_pending is set if applies)
>>>>
>>>>4) returns COMM_TIMEOUT when no event has been detected
>>>>
>>>>Last 2 'features' were already in my original patch.
>>>>
>>>>BTW, I got 100% CPU problem when web client (wget) is run on the same
>>>>machine where squid does. Running wget in other machine, gives me less
>>>>than 40% of CPU usage.
>>>>
>>>>Hope this solves/explains the behaviour you had been having (english is
>>>>not my native language, so sorry if 'had been having' is not correct)
>>>>about CPU usage.
>>>>
>>>>I'm posting this to you since you are the original author of
>>>>comm_epoll.cc, and I don't want to add noise to squid devel-list.
>>>>
>>>>Thank you very much in advance,
>>>>
>>>>
>>>>
>>>>------------------------------------------------------------------------
>>>>
>>>>diff -bur squid-3.0-PRE3-20031008-CVS/src/comm_epoll.cc
>>>>squid-3.0-PRE3-20031008-EPOLL-FIXED/src/comm_epoll.cc
>>>>--- squid-3.0-PRE3-20031008-CVS/src/comm_epoll.cc Sun Aug 3
>>>>18:38:15 2003
>>>>+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/comm_epoll.cc Fri Oct
>>>>17 13:01:05 2003
>>>>@@ -94,6 +94,15 @@
>>>> }
>>>> }
>>>>
>>>>+static const char* epolltype_atoi(int x) {
>>>>+ switch(x) {
>>>>+ case EPOLL_CTL_ADD: return "EPOLL_CTL_ADD";
>>>>+ case EPOLL_CTL_DEL: return "EPOLL_CTL_DEL";
>>>>+ case EPOLL_CTL_MOD: return "EPOLL_CTL_MOD";
>>>>+ default: return "UNKNOWN_EPOLLCTL_OP";
>>>>+ }
>>>>+}
>>>>+
>>>> /*
>>>> * comm_setselect
>>>> *
>>>>@@ -106,81 +115,37 @@
>>>> void *client_data, time_t timeout)
>>>> {
>>>> fde *F = &fd_table[fd];
>>>>- int change = 0;
>>>>- int events = 0;
>>>>- int pollin = 0;
>>>>- int pollout = 0;
>>>>-
>>>>+ int epoll_ctl_type = 0;
>>>> struct epoll_event ev;
>>>> assert(fd >= 0);
>>>> assert(F->flags.open);
>>>> debug(5, DEBUG_EPOLL ? 0 : 8)
>>>>("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n",
>>>> fd,type,handler,client_data,timeout);
>>>>
>>>>- if(F->read_handler != NULL)
>>>>- pollin = 1;
>>>>-
>>>>- if(F->write_handler != NULL)
>>>>- pollout = 1;
>>>>+ ev.events = 0;
>>>>+ ev.data.fd = fd;
>>>>
>>>> if (type & COMM_SELECT_READ) {
>>>>- if(F->read_handler != handler)
>>>>- change = 1;
>>>>-
>>>>- if(handler == NULL)
>>>>- pollin = 0;
>>>>- else
>>>>- pollin = 1;
>>>>-
>>>>+ if (handler) ev.events |= EPOLLIN | EPOLLHUP | EPOLLERR;
>>>> F->read_handler = handler;
>>>>-
>>>> F->read_data = client_data;
>>>> }
>>>>
>>>> if (type & COMM_SELECT_WRITE) {
>>>>- if(F->write_handler != handler)
>>>>- change = 1;
>>>>-
>>>>- if(handler == NULL)
>>>>- pollout = 0;
>>>>- else
>>>>- pollout = 1;
>>>>-
>>>>+ if (handler) ev.events |= EPOLLOUT | EPOLLHUP | EPOLLERR;
>>>> F->write_handler = handler;
>>>>-
>>>> F->write_data = client_data;
>>>> }
>>>>
>>>>- if(pollin)
>>>>- events |= EPOLLIN;
>>>>-
>>>>- if(pollout)
>>>>- events |= EPOLLOUT;
>>>>-
>>>>- if(events)
>>>>- events |= EPOLLHUP | EPOLLERR;
>>>>-
>>>>- ev.data.fd = fd;
>>>>-
>>>>- ev.events = events;
>>>>-
>>>>- if(events) {
>>>>- if (epoll_ctl(kdpfd, EPOLL_CTL_MOD, fd, &ev) < 0) {
>>>>- if(errno == ENOENT) {
>>>>- debug(5,4) ("commSetSelect:
>>>>epoll_ctl(,EPOLL_CTL_MOD,,) failed on fd=%d: entry does not exist\n",fd);
>>>>-
>>>>- if (epoll_ctl(kdpfd, EPOLL_CTL_ADD, fd, &ev) < 0)
>>>>- debug(5,1) ("commSetSelect:
>>>>cpoll_ctl(,EPOLL_CTL_ADD,,) failed on fd=%d!: %s\n",fd,xstrerror());
>>>>- } else {
>>>>- debug(5,1) ("commSetSelect:
>>>>cpoll_ctl(,EPOLL_CTL_MOD,,) failed on fd=%d!: %s\n",fd,xstrerror());
>>>>- }
>>>>- }
>>>>- } else if(change) {
>>>>- if(epoll_ctl(kdpfd,EPOLL_CTL_DEL,fd,&ev) < 0) {
>>>>- if(errno != ENOENT)
>>>>- debug(5,1) ("commSetSelect:
>>>>cpoll_ctl(,EPOLL_CTL_DEL,,) failed on fd=%d!: %s\n",fd,xstrerror());
>>>>- else
>>>>- debug(5,4) ("commSetSelect:
>>>>epoll_ctl(,EPOLL_CTL_DEL,,) failed on fd=%d: entry does not exist\n",fd);
>>>>+ if (ev.events != F->epoll_state) {
>>>>+ if (F->epoll_state) // already monitoring something.
>>>>+ epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
>>>>+ else epoll_ctl_type = EPOLL_CTL_ADD;
>>>>+ F->epoll_state = ev.events;
>>>>+
>>>>+ if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
>>>>+ debug(5, DEBUG_EPOLL ? 0 : 8) ("commSetSelect:
>>>>epoll_ctl(,%s,,): failed on fd=%d: %s\n",
>>>>+ epolltype_atoi(epoll_ctl_type),
>>>>fd, xstrerror());
>>>> }
>>>> }
>>>>
>>>>@@ -209,7 +174,6 @@
>>>> int num, i,fd;
>>>> fde *F;
>>>> PF *hdl;
>>>>-
>>>> struct epoll_event *cevents;
>>>> static time_t last_timeout = 0;
>>>>
>>>>@@ -238,31 +202,48 @@
>>>>
>>>> getCurrentTime();
>>>>
>>>>+ statHistCount(&statCounter.select_fds_hist, num);
>>>>+
>>>> if (num == 0)
>>>>- return COMM_OK; /* No error.. */
>>>>+ return COMM_TIMEOUT; /* No error.. */
>>>>
>>>>+ PROF_start(comm_handle_ready_fd);
>>>> for (i = 0, cevents = pevents; i < num; i++, cevents++) {
>>>> fd = cevents->data.fd;
>>>> F = &fd_table[fd];
>>>>- debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): got fd=%d
>>>>events=%d F->read_handler=%p F->write_handler=%p\n",
>>>>-
>>>>fd,cevents->events,F->read_handler,F->write_handler);
>>>>+ debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): got fd=%d
>>>>events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n",
>>>>+
>>>>fd,cevents->events,F->epoll_state,F->read_handler,F->write_handler);
>>>>
>>>>- if(cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR)) {
>>>>+ //TODO: add EPOLLPRI??
>>>>+ if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR)) {
>>>> if((hdl = F->read_handler) != NULL) {
>>>> debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select():
>>>>Calling read handler on fd=%d\n",fd);
>>>>+ PROF_start(comm_write_handler);
>>>> F->read_handler = NULL;
>>>> hdl(fd, F->read_data);
>>>>+ PROF_stop(comm_write_handler);
>>>>+ statCounter.select_fds++;
>>>>+ } else {
>>>>+ fd_table[fd].flags.read_pending = 1;
>>>>+ commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
>>>> }
>>>> }
>>>>
>>>>- if(cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
>>>>+ if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
>>>> if((hdl = F->write_handler) != NULL) {
>>>> debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select():
>>>>Calling write handler on fd=%d\n",fd);
>>>>+ PROF_start(comm_read_handler);
>>>> F->write_handler = NULL;
>>>> hdl(fd, F->write_data);
>>>>+ PROF_stop(comm_read_handler);
>>>>+ statCounter.select_fds++;
>>>>+ } else {
>>>>+ // remove interest since no handler exist for this event.
>>>>+ commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
>>>> }
>>>> }
>>>> }
>>>>+ PROF_stop(comm_handle_ready_fd);
>>>>
>>>> return COMM_OK;
>>>> }
>>>>diff -bur squid-3.0-PRE3-20031008-CVS/src/fd.cc
>>>>squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fd.cc
>>>>--- squid-3.0-PRE3-20031008-CVS/src/fd.cc Fri Feb 21 19:50:08 2003
>>>>+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fd.cc Thu Oct 16
>>>>13:08:44 2003
>>>>@@ -163,6 +163,7 @@
>>>> debug(51, 3) ("fd_open FD %d %s\n", fd, desc);
>>>> F->type = type;
>>>> F->flags.open = 1;
>>>>+ F->epoll_state = 0;
>>>> #ifdef _SQUID_MSWIN_
>>>>
>>>> switch (type) {
>>>>diff -bur squid-3.0-PRE3-20031008-CVS/src/fde.h
>>>>squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fde.h
>>>>--- squid-3.0-PRE3-20031008-CVS/src/fde.h Tue Jul 15 03:50:42 2003
>>>>+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fde.h Fri Oct 17
>>>>12:36:20 2003
>>>>@@ -99,6 +99,7 @@
>>>> int bytes_read;
>>>> int bytes_written;
>>>> int uses; /* ie # req's over persistent conn */
>>>>+ unsigned epoll_state;
>>>>
>>>> struct _fde_disk disk;
>>>> PF *read_handler;
>>>
>>>
>>
>

-- 
David Nicklay
Location: CNN Center - SE0811A
Office: 404-827-2698	Cell: 404-545-6218

diff -bur squid-3.0-PRE3-20031008-CVS/src/comm_epoll.cc squid-3.0-PRE3-20031008-EPOLL-FIXED/src/comm_epoll.cc
--- squid-3.0-PRE3-20031008-CVS/src/comm_epoll.cc Sun Aug 3 18:38:15 2003
+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/comm_epoll.cc Fri Oct 17 13:01:05 2003
@@ -94,6 +94,15 @@
     }
 }
 
+static const char* epolltype_atoi(int x) {
+ switch(x) {
+ case EPOLL_CTL_ADD: return "EPOLL_CTL_ADD";
+ case EPOLL_CTL_DEL: return "EPOLL_CTL_DEL";
+ case EPOLL_CTL_MOD: return "EPOLL_CTL_MOD";
+ default: return "UNKNOWN_EPOLLCTL_OP";
+ }
+}
+
 /*
  * comm_setselect
  *
@@ -106,81 +115,37 @@
               void *client_data, time_t timeout)
 {
     fde *F = &fd_table[fd];
- int change = 0;
- int events = 0;
- int pollin = 0;
- int pollout = 0;
-
+ int epoll_ctl_type = 0;
     struct epoll_event ev;
     assert(fd >= 0);
     assert(F->flags.open);
     debug(5, DEBUG_EPOLL ? 0 : 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n",
                                    fd,type,handler,client_data,timeout);
 
- if(F->read_handler != NULL)
- pollin = 1;
-
- if(F->write_handler != NULL)
- pollout = 1;
+ ev.events = 0;
+ ev.data.fd = fd;
 
     if (type & COMM_SELECT_READ) {
- if(F->read_handler != handler)
- change = 1;
-
- if(handler == NULL)
- pollin = 0;
- else
- pollin = 1;
-
+ if (handler) ev.events |= EPOLLIN | EPOLLHUP | EPOLLERR;
         F->read_handler = handler;
-
         F->read_data = client_data;
     }
 
     if (type & COMM_SELECT_WRITE) {
- if(F->write_handler != handler)
- change = 1;
-
- if(handler == NULL)
- pollout = 0;
- else
- pollout = 1;
-
+ if (handler) ev.events |= EPOLLOUT | EPOLLHUP | EPOLLERR;
         F->write_handler = handler;
-
         F->write_data = client_data;
     }
 
- if(pollin)
- events |= EPOLLIN;
-
- if(pollout)
- events |= EPOLLOUT;
-
- if(events)
- events |= EPOLLHUP | EPOLLERR;
-
- ev.data.fd = fd;
-
- ev.events = events;
-
- if(events) {
- if (epoll_ctl(kdpfd, EPOLL_CTL_MOD, fd, &ev) < 0) {
- if(errno == ENOENT) {
- debug(5,4) ("commSetSelect: epoll_ctl(,EPOLL_CTL_MOD,,) failed on fd=%d: entry does not exist\n",fd);
-
- if (epoll_ctl(kdpfd, EPOLL_CTL_ADD, fd, &ev) < 0)
- debug(5,1) ("commSetSelect: cpoll_ctl(,EPOLL_CTL_ADD,,) failed on fd=%d!: %s\n",fd,xstrerror());
- } else {
- debug(5,1) ("commSetSelect: cpoll_ctl(,EPOLL_CTL_MOD,,) failed on fd=%d!: %s\n",fd,xstrerror());
- }
- }
- } else if(change) {
- if(epoll_ctl(kdpfd,EPOLL_CTL_DEL,fd,&ev) < 0) {
- if(errno != ENOENT)
- debug(5,1) ("commSetSelect: cpoll_ctl(,EPOLL_CTL_DEL,,) failed on fd=%d!: %s\n",fd,xstrerror());
- else
- debug(5,4) ("commSetSelect: epoll_ctl(,EPOLL_CTL_DEL,,) failed on fd=%d: entry does not exist\n",fd);
+ if (ev.events != F->epoll_state) {
+ if (F->epoll_state) // already monitoring something.
+ epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
+ else epoll_ctl_type = EPOLL_CTL_ADD;
+ F->epoll_state = ev.events;
+
+ if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+ debug(5, DEBUG_EPOLL ? 0 : 8) ("commSetSelect: epoll_ctl(,%s,,): failed on fd=%d: %s\n",
+ epolltype_atoi(epoll_ctl_type), fd, xstrerror());
         }
     }
 
@@ -209,7 +174,6 @@
     int num, i,fd;
     fde *F;
     PF *hdl;
-
     struct epoll_event *cevents;
     static time_t last_timeout = 0;
 
@@ -238,31 +202,48 @@
 
     getCurrentTime();
 
+ statHistCount(&statCounter.select_fds_hist, num);
+
     if (num == 0)
- return COMM_OK; /* No error.. */
+ return COMM_TIMEOUT; /* No error.. */
 
+ PROF_start(comm_handle_ready_fd);
     for (i = 0, cevents = pevents; i < num; i++, cevents++) {
         fd = cevents->data.fd;
         F = &fd_table[fd];
- debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): got fd=%d events=%d F->read_handler=%p F->write_handler=%p\n",
- fd,cevents->events,F->read_handler,F->write_handler);
+ debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n",
+ fd,cevents->events,F->epoll_state,F->read_handler,F->write_handler);
 
- if(cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR)) {
+ //TODO: add EPOLLPRI??
+ if (cevents->events & (EPOLLIN|EPOLLHUP|EPOLLERR)) {
             if((hdl = F->read_handler) != NULL) {
                 debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): Calling read handler on fd=%d\n",fd);
+ PROF_start(comm_write_handler);
                 F->read_handler = NULL;
                 hdl(fd, F->read_data);
+ PROF_stop(comm_write_handler);
+ statCounter.select_fds++;
+ } else {
+ fd_table[fd].flags.read_pending = 1;
+ commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
             }
         }
 
- if(cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
+ if (cevents->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) {
             if((hdl = F->write_handler) != NULL) {
                 debug(5, DEBUG_EPOLL ? 0 : 8) ("comm_select(): Calling write handler on fd=%d\n",fd);
+ PROF_start(comm_read_handler);
                 F->write_handler = NULL;
                 hdl(fd, F->write_data);
+ PROF_stop(comm_read_handler);
+ statCounter.select_fds++;
+ } else {
+ // remove interest since no handler exist for this event.
+ commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
             }
         }
     }
+ PROF_stop(comm_handle_ready_fd);
 
     return COMM_OK;
 }
diff -bur squid-3.0-PRE3-20031008-CVS/src/fd.cc squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fd.cc
--- squid-3.0-PRE3-20031008-CVS/src/fd.cc Fri Feb 21 19:50:08 2003
+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fd.cc Thu Oct 16 13:08:44 2003
@@ -163,6 +163,7 @@
     debug(51, 3) ("fd_open FD %d %s\n", fd, desc);
     F->type = type;
     F->flags.open = 1;
+ F->epoll_state = 0;
 #ifdef _SQUID_MSWIN_
 
     switch (type) {
diff -bur squid-3.0-PRE3-20031008-CVS/src/fde.h squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fde.h
--- squid-3.0-PRE3-20031008-CVS/src/fde.h Tue Jul 15 03:50:42 2003
+++ squid-3.0-PRE3-20031008-EPOLL-FIXED/src/fde.h Fri Oct 17 12:36:20 2003
@@ -99,6 +99,7 @@
     int bytes_read;
     int bytes_written;
     int uses; /* ie # req's over persistent conn */
+ unsigned epoll_state;
 
     struct _fde_disk disk;
     PF *read_handler;
Received on Fri Nov 07 2003 - 09:36:48 MST

This archive was generated by hypermail pre-2.1.9 : Tue Dec 09 2003 - 16:20:46 MST