Index: src/comm_poll.c =================================================================== RCS file: src/comm_poll.c diff -N src/comm_poll.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/comm_poll.c 24 May 2006 14:31:51 -0000 @@ -0,0 +1,689 @@ + +/* + * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $ + * + * DEBUG: section 5 Socket Functions + * + * SQUID Web Proxy Cache http://www.squid-cache.org/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from + * the Internet community; see the CONTRIBUTORS file for full + * details. Many organizations have provided support for Squid's + * development; see the SPONSORS file for full details. Squid is + * Copyrighted (C) 2001 by the Regents of the University of + * California; see the COPYRIGHT file for full details. Squid + * incorporates software developed and/or copyrighted by other + * sources; see the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" + +#if USE_POLL + +static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */ + + +#ifndef howmany +#define howmany(x, y) (((x)+((y)-1))/(y)) +#endif +#ifndef NBBY +#define NBBY 8 +#endif +#define FD_MASK_BYTES sizeof(fd_mask) +#define FD_MASK_BITS (FD_MASK_BYTES*NBBY) + +/* STATIC */ +static int fdIsHttp(int fd); +static int fdIsIcp(int fd); +static int fdIsDns(int fd); +static int commDeferRead(int fd); +static void checkTimeouts(void); +static OBJH commIncomingStats; +static int comm_check_incoming_poll_handlers(int nfds, int *fds); +static void comm_poll_dns_incoming(void); + +static fd_set global_readfds; +static fd_set global_writefds; +static int nreadfds; +static int nwritefds; + +/* + * Automatic tuning for incoming requests: + * + * INCOMING sockets are the ICP and HTTP ports. We need to check these + * fairly regularly, but how often? When the load increases, we + * want to check the incoming sockets more often. If we have a lot + * of incoming ICP, then we need to check these sockets more than + * if we just have HTTP. + * + * The variables 'incoming_icp_interval' and 'incoming_http_interval' + * determine how many normal I/O events to process before checking + * incoming sockets again. Note we store the incoming_interval + * multipled by a factor of (2^INCOMING_FACTOR) to have some + * pseudo-floating point precision. + * + * The variable 'icp_io_events' and 'http_io_events' counts how many normal + * I/O events have been processed since the last check on the incoming + * sockets. When io_events > incoming_interval, its time to check incoming + * sockets. + * + * Every time we check incoming sockets, we count how many new messages + * or connections were processed. This is used to adjust the + * incoming_interval for the next iteration. The new incoming_interval + * is calculated as the current incoming_interval plus what we would + * like to see as an average number of events minus the number of + * events just processed. + * + * incoming_interval = incoming_interval + target_average - number_of_events_processed + * + * There are separate incoming_interval counters for both HTTP and ICP events + * + * You can see the current values of the incoming_interval's, as well as + * a histogram of 'incoming_events' by asking the cache manager + * for 'comm_incoming', e.g.: + * + * % ./client mgr:comm_incoming + * + * Caveats: + * + * - We have MAX_INCOMING_INTEGER as a magic upper limit on + * incoming_interval for both types of sockets. At the + * largest value the cache will effectively be idling. + * + * - The higher the INCOMING_FACTOR, the slower the algorithm will + * respond to load spikes/increases/decreases in demand. A value + * between 3 and 8 is recommended. + */ + +#define MAX_INCOMING_INTEGER 256 +#define INCOMING_FACTOR 5 +#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR) +static int icp_io_events = 0; +static int dns_io_events = 0; +static int http_io_events = 0; +static int incoming_icp_interval = 16 << INCOMING_FACTOR; +static int incoming_dns_interval = 16 << INCOMING_FACTOR; +static int incoming_http_interval = 16 << INCOMING_FACTOR; +#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> INCOMING_FACTOR)) +#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR)) +#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR)) + +static int +fdIsIcp(int fd) +{ + if (fd == theInIcpConnection) + return 1; + if (fd == theOutIcpConnection) + return 1; + return 0; +} + +static int +fdIsDns(int fd) +{ + if (fd == DnsSocket) + return 1; + return 0; +} + +static int +fdIsHttp(int fd) +{ + int j; + for (j = 0; j < NHttpSockets; j++) { + if (fd == HttpSockets[j]) + return 1; + } + return 0; +} + +#if DELAY_POOLS +static int slowfdcnt = 0; +static int slowfdarr[SQUID_MAXFD]; + +static void +commAddSlowFd(int fd) +{ + assert(slowfdcnt < SQUID_MAXFD); + slowfdarr[slowfdcnt++] = fd; +} + +static int +commGetSlowFd(void) +{ + int whichfd, retfd; + + if (!slowfdcnt) + return -1; + whichfd = squid_random() % slowfdcnt; + retfd = slowfdarr[whichfd]; + slowfdarr[whichfd] = slowfdarr[--slowfdcnt]; + return retfd; +} +#endif + +static int +comm_check_incoming_poll_handlers(int nfds, int *fds) +{ + int i; + int fd; + PF *hdl = NULL; + int npfds; + struct pollfd pfds[3 + MAXHTTPPORTS]; + incoming_sockets_accepted = 0; + for (i = npfds = 0; i < nfds; i++) { + int events; + fd = fds[i]; + events = 0; + if (fd_table[fd].read_handler) + events |= POLLRDNORM; + if (fd_table[fd].write_handler) + events |= POLLWRNORM; + if (events) { + pfds[npfds].fd = fd; + pfds[npfds].events = events; + pfds[npfds].revents = 0; + npfds++; + } + } + if (!nfds) + return -1; +#if !ALARM_UPDATES_TIME + getCurrentTime(); +#endif + statCounter.syscalls.polls++; + if (poll(pfds, npfds, 0) < 1) + return incoming_sockets_accepted; + for (i = 0; i < npfds; i++) { + int revents; + if (((revents = pfds[i].revents) == 0) || ((fd = pfds[i].fd) == -1)) + continue; + if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) { + if ((hdl = fd_table[fd].read_handler)) { + fd_table[fd].read_handler = NULL; + hdl(fd, fd_table[fd].read_data); + } else if (pfds[i].events & POLLRDNORM) + debug(5, 1) ("comm_poll_incoming: FD %d NULL read handler\n", + fd); + } + if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) { + if ((hdl = fd_table[fd].write_handler)) { + fd_table[fd].write_handler = NULL; + hdl(fd, fd_table[fd].write_data); + } else if (pfds[i].events & POLLWRNORM) + debug(5, 1) ("comm_poll_incoming: FD %d NULL write_handler\n", + fd); + } + } + return incoming_sockets_accepted; +} + +static void +comm_poll_icp_incoming(void) +{ + int nfds = 0; + int fds[2]; + int nevents; + icp_io_events = 0; + if (theInIcpConnection >= 0) + fds[nfds++] = theInIcpConnection; + if (theInIcpConnection != theOutIcpConnection) + if (theOutIcpConnection >= 0) + fds[nfds++] = theOutIcpConnection; + if (nfds == 0) + return; + nevents = comm_check_incoming_poll_handlers(nfds, fds); + incoming_icp_interval += Config.comm_incoming.icp_average - nevents; + if (incoming_icp_interval < Config.comm_incoming.icp_min_poll) + incoming_icp_interval = Config.comm_incoming.icp_min_poll; + if (incoming_icp_interval > MAX_INCOMING_INTERVAL) + incoming_icp_interval = MAX_INCOMING_INTERVAL; + if (nevents > INCOMING_ICP_MAX) + nevents = INCOMING_ICP_MAX; + statHistCount(&statCounter.comm_icp_incoming, nevents); +} + +static void +comm_poll_http_incoming(void) +{ + int nfds = 0; + int fds[MAXHTTPPORTS]; + int j; + int nevents; + http_io_events = 0; + for (j = 0; j < NHttpSockets; j++) { + if (HttpSockets[j] < 0) + continue; + if (commDeferRead(HttpSockets[j])) + continue; + fds[nfds++] = HttpSockets[j]; + } + nevents = comm_check_incoming_poll_handlers(nfds, fds); + incoming_http_interval = incoming_http_interval + + Config.comm_incoming.http_average - nevents; + if (incoming_http_interval < Config.comm_incoming.http_min_poll) + incoming_http_interval = Config.comm_incoming.http_min_poll; + if (incoming_http_interval > MAX_INCOMING_INTERVAL) + incoming_http_interval = MAX_INCOMING_INTERVAL; + if (nevents > INCOMING_HTTP_MAX) + nevents = INCOMING_HTTP_MAX; + statHistCount(&statCounter.comm_http_incoming, nevents); +} + +/* poll all sockets; call handlers for those that are ready. */ +int +comm_poll(int msec) +{ + struct pollfd pfds[SQUID_MAXFD]; +#if DELAY_POOLS + fd_set slowfds; +#endif + int fd; + unsigned int i; + unsigned int maxfd; + unsigned int nfds; + unsigned int npending; + int num; + int callicp = 0, callhttp = 0; + int calldns = 0; + static time_t last_timeout = 0; + double timeout = current_dtime + (msec / 1000.0); + do { +#if !ALARM_UPDATES_TIME + double start; + getCurrentTime(); + start = current_dtime; +#endif + /* Handle any fs callbacks that need doing */ + storeDirCallback(); +#if DELAY_POOLS + FD_ZERO(&slowfds); +#endif + if (commCheckICPIncoming) + comm_poll_icp_incoming(); + if (commCheckDNSIncoming) + comm_poll_dns_incoming(); + if (commCheckHTTPIncoming) + comm_poll_http_incoming(); + callicp = calldns = callhttp = 0; + nfds = 0; + npending = 0; + maxfd = Biggest_FD + 1; + for (i = 0; i < maxfd; i++) { + int events; + events = 0; + /* Check each open socket for a handler. */ + if (fd_table[i].read_handler) { + int dopoll = 1; + switch (commDeferRead(i)) { + case 0: + break; + case 1: + dopoll = 0; + break; +#if DELAY_POOLS + case -1: + FD_SET(i, &slowfds); + break; +#endif + default: + fatalf("bad return value from commDeferRead(FD %d)\n", i); + /* NOTREACHED */ + } + if (dopoll) { + switch (fd_table[i].read_pending) { + case COMM_PENDING_NORMAL: + events |= POLLRDNORM; + break; + case COMM_PENDING_WANTS_WRITE: + events |= POLLWRNORM; + break; + case COMM_PENDING_WANTS_READ: + events |= POLLRDNORM; + break; + case COMM_PENDING_NOW: + events |= POLLRDNORM; + npending++; + break; + } + } + } + if (fd_table[i].write_handler) { + switch (fd_table[i].write_pending) { + case COMM_PENDING_NORMAL: + events |= POLLWRNORM; + break; + case COMM_PENDING_WANTS_WRITE: + events |= POLLWRNORM; + break; + case COMM_PENDING_WANTS_READ: + events |= POLLRDNORM; + break; + case COMM_PENDING_NOW: + events |= POLLWRNORM; + npending++; + break; + } + } + if (events) { + pfds[nfds].fd = i; + pfds[nfds].events = events; + pfds[nfds].revents = 0; + nfds++; + } + } + if (nfds == 0) { + assert(shutting_down); + return COMM_SHUTDOWN; + } + if (npending) + msec = 0; + if (msec > MAX_POLL_TIME) + msec = MAX_POLL_TIME; + statCounter.syscalls.polls++; + num = poll(pfds, nfds, msec); + statCounter.select_loops++; + if (num < 0 && !ignoreErrno(errno)) { + debug(5, 0) ("comm_poll: poll failure: %s\n", xstrerror()); + assert(errno != EINVAL); + return COMM_ERROR; + /* NOTREACHED */ + } + debug(5, num ? 5 : 8) ("comm_poll: %d+%u FDs ready\n", num, npending); + statHistCount(&statCounter.select_fds_hist, num); + /* Check timeout handlers ONCE each second. */ + if (squid_curtime > last_timeout) { + last_timeout = squid_curtime; + checkTimeouts(); + } + if (num <= 0 && npending == 0) + continue; + /* scan each socket but the accept socket. Poll this + * more frequently to minimize losses due to the 5 connect + * limit in SunOS */ + for (i = 0; i < nfds; i++) { + fde *F; + int revents = pfds[i].revents; + fd = pfds[i].fd; + if (fd == -1) + continue; + switch (fd_table[fd].read_pending) { + case COMM_PENDING_NORMAL: + case COMM_PENDING_WANTS_READ: + break; + case COMM_PENDING_WANTS_WRITE: + if (pfds[i].revents & (POLLOUT | POLLWRNORM)) + revents |= POLLIN; + break; + case COMM_PENDING_NOW: + revents |= POLLIN; + break; + } + switch (fd_table[fd].write_pending) { + case COMM_PENDING_NORMAL: + case COMM_PENDING_WANTS_WRITE: + break; + case COMM_PENDING_WANTS_READ: + if (pfds[i].revents & (POLLIN | POLLRDNORM)) + revents |= POLLOUT; + break; + case COMM_PENDING_NOW: + revents |= POLLOUT; + break; + } + if (revents == 0) + continue; + if (fdIsIcp(fd)) { + callicp = 1; + continue; + } + if (fdIsDns(fd)) { + calldns = 1; + continue; + } + if (fdIsHttp(fd)) { + callhttp = 1; + continue; + } + F = &fd_table[fd]; + if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) { + PF *hdl = F->read_handler; + debug(5, 6) ("comm_poll: FD %d ready for reading\n", fd); + if (hdl == NULL) + (void) 0; /* Nothing to do */ +#if DELAY_POOLS + else if (FD_ISSET(fd, &slowfds)) + commAddSlowFd(fd); +#endif + else { + F->read_handler = NULL; + F->read_pending = COMM_PENDING_NORMAL; + hdl(fd, F->read_data); + statCounter.select_fds++; + if (commCheckICPIncoming) + comm_poll_icp_incoming(); + if (commCheckDNSIncoming) + comm_poll_dns_incoming(); + if (commCheckHTTPIncoming) + comm_poll_http_incoming(); + } + } + if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) { + PF *hdl = F->write_handler; + debug(5, 5) ("comm_poll: FD %d ready for writing\n", fd); + if (hdl != NULL) { + F->write_handler = NULL; + F->write_pending = COMM_PENDING_NORMAL; + hdl(fd, F->write_data); + statCounter.select_fds++; + if (commCheckICPIncoming) + comm_poll_icp_incoming(); + if (commCheckDNSIncoming) + comm_poll_dns_incoming(); + if (commCheckHTTPIncoming) + comm_poll_http_incoming(); + } + } + if (revents & POLLNVAL) { + close_handler *ch; + debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd); + debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]); + debug(5, 0) ("--> %s\n", F->desc); + debug(5, 0) ("tmout:%p read:%p write:%p\n", + F->timeout_handler, + F->read_handler, + F->write_handler); + for (ch = F->close_handler; ch; ch = ch->next) + debug(5, 0) (" close handler: %p\n", ch->handler); + if (F->close_handler) { + commCallCloseHandlers(fd); + } else if (F->timeout_handler) { + debug(5, 0) ("comm_poll: Calling Timeout Handler\n"); + F->timeout_handler(fd, F->timeout_data); + } + F->close_handler = NULL; + F->timeout_handler = NULL; + F->read_handler = NULL; + F->write_handler = NULL; + if (F->flags.open) + fd_close(fd); + } + } + if (callicp) + comm_poll_icp_incoming(); + if (calldns) + comm_poll_dns_incoming(); + if (callhttp) + comm_poll_http_incoming(); +#if DELAY_POOLS + while ((fd = commGetSlowFd()) != -1) { + fde *F = &fd_table[fd]; + PF *hdl = F->read_handler; + debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd); + if (hdl != NULL) { + F->read_handler = NULL; + F->read_pending = COMM_PENDING_NORMAL; + hdl(fd, F->read_data); + statCounter.select_fds++; + if (commCheckICPIncoming) + comm_poll_icp_incoming(); + if (commCheckDNSIncoming) + comm_poll_dns_incoming(); + if (commCheckHTTPIncoming) + comm_poll_http_incoming(); + } + } +#endif +#if !ALARM_UPDATES_TIME + getCurrentTime(); + statCounter.select_time += (current_dtime - start); +#endif + return COMM_OK; + } + while (timeout > current_dtime); + debug(5, 8) ("comm_poll: time out: %ld.\n", (long int) squid_curtime); + return COMM_TIMEOUT; +} + + +static void +comm_poll_dns_incoming(void) +{ + int nfds = 0; + int fds[2]; + int nevents; + dns_io_events = 0; + if (DnsSocket < 0) + return; + fds[nfds++] = DnsSocket; + nevents = comm_check_incoming_poll_handlers(nfds, fds); + if (nevents < 0) + return; + incoming_dns_interval += Config.comm_incoming.dns_average - nevents; + if (incoming_dns_interval < Config.comm_incoming.dns_min_poll) + incoming_dns_interval = Config.comm_incoming.dns_min_poll; + if (incoming_dns_interval > MAX_INCOMING_INTERVAL) + incoming_dns_interval = MAX_INCOMING_INTERVAL; + if (nevents > INCOMING_DNS_MAX) + nevents = INCOMING_DNS_MAX; + statHistCount(&statCounter.comm_dns_incoming, nevents); +} + +void +comm_select_init(void) +{ + cachemgrRegister("comm_incoming", + "comm_incoming() stats", + commIncomingStats, 0, 1); + FD_ZERO(&global_readfds); + FD_ZERO(&global_writefds); + nreadfds = nwritefds = 0; +} + + +static void +commIncomingStats(StoreEntry * sentry) +{ + StatCounters *f = &statCounter; + storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n", + incoming_icp_interval >> INCOMING_FACTOR); + storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n", + incoming_dns_interval >> INCOMING_FACTOR); + storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n", + incoming_http_interval >> INCOMING_FACTOR); + storeAppendPrintf(sentry, "\n"); + storeAppendPrintf(sentry, "Histogram of events per incoming socket type\n"); + storeAppendPrintf(sentry, "ICP Messages handled per comm_poll_icp_incoming() call:\n"); + statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper); + storeAppendPrintf(sentry, "DNS Messages handled per comm_poll_dns_incoming() call:\n"); + statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper); + storeAppendPrintf(sentry, "HTTP Messages handled per comm_poll_http_incoming() call:\n"); + statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper); +} + +void +commUpdateReadBits(int fd, PF * handler) +{ + if (handler && !FD_ISSET(fd, &global_readfds)) { + FD_SET(fd, &global_readfds); + nreadfds++; + } else if (!handler && FD_ISSET(fd, &global_readfds)) { + FD_CLR(fd, &global_readfds); + nreadfds--; + } +} + +void +commUpdateWriteBits(int fd, PF * handler) +{ + if (handler && !FD_ISSET(fd, &global_writefds)) { + FD_SET(fd, &global_writefds); + nwritefds++; + } else if (!handler && FD_ISSET(fd, &global_writefds)) { + FD_CLR(fd, &global_writefds); + nwritefds--; + } +} + + +static int +commDeferRead(int fd) +{ + fde *F = &fd_table[fd]; + if (F->defer_check == NULL) + return 0; + return F->defer_check(fd, F->defer_data); +} + +static void +checkTimeouts(void) +{ + int fd; + fde *F = NULL; + PF *callback; + for (fd = 0; fd <= Biggest_FD; fd++) { + F = &fd_table[fd]; + if (!F->flags.open) + continue; + if (F->timeout == 0) + continue; + if (F->timeout > squid_curtime) + continue; + debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd); + if (F->timeout_handler) { + debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd); + callback = F->timeout_handler; + F->timeout_handler = NULL; + callback(fd, F->timeout_data); + } else { + debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd); + comm_close(fd); + } + } +} + + +/* Called by async-io or diskd to speed up the polling */ +void +comm_quick_poll_required(void) +{ + MAX_POLL_TIME = 10; +} + +#endif Index: src/comm_epoll.c =================================================================== RCS file: src/comm_epoll.c diff -N src/comm_epoll.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/comm_epoll.c 24 May 2006 14:31:51 -0000 @@ -0,0 +1,459 @@ + +/* + * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $ + * + * DEBUG: section 5 Socket Functions + * + * SQUID Web Proxy Cache http://www.squid-cache.org/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from + * the Internet community; see the CONTRIBUTORS file for full + * details. Many organizations have provided support for Squid's + * development; see the SPONSORS file for full details. Squid is + * Copyrighted (C) 2001 by the Regents of the University of + * California; see the COPYRIGHT file for full details. Squid + * incorporates software developed and/or copyrighted by other + * sources; see the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "squid.h" + +#if USE_EPOLL + +static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */ + +/* epoll structs */ +static int kdpfd; +static struct epoll_event *pevents; + +/* Array to keep track of backed off filedescriptors */ +static int backoff_fds[FD_SETSIZE]; + +static void checkTimeouts(void); +static int commDeferRead(int fd); + +static const char * +epolltype_atoi(int x) +{ + switch (x) { + + case EPOLL_CTL_ADD: + return "EPOLL_CTL_ADD"; + + case EPOLL_CTL_DEL: + return "EPOLL_CTL_DEL"; + + case EPOLL_CTL_MOD: + return "EPOLL_CTL_MOD"; + + default: + return "UNKNOWN_EPOLLCTL_OP"; + } +} + +/* Bring all fds back online */ +void +commEpollBackon() +{ + fde *F; + int i; + int fd; + int j = 0; + + for (i = 0; i < FD_SETSIZE; i++) { + if (backoff_fds[i]) { + /* record the fd and zero the descriptor */ + fd = backoff_fds[i]; + F = &fd_table[fd]; + backoff_fds[i] = 0; + + /* If the fd is no longer backed off, ignore */ + if (!(F->epoll_backoff)) { + continue; + } + /* If the fd is still meant to be backed off, add it to the start of + * the list and continue */ + if (commDeferRead(fd) == 1) { + backoff_fds[j++] = fd; + continue; + } + debug(5, 4) ("commEpollBackon: fd=%d\n", fd); + + /* Resume operations for this fd */ + commResumeFD(fd); + } else { + /* Once we hit a non-backed off FD, we can break */ + break; + } + } +} + + +/* Back off on the next epoll for the given fd */ +void +commEpollBackoff(int fd) +{ + commDeferFD(fd); +} + +/* Defer reads from this fd */ +void +commDeferFD(int fd) +{ + fde *F = &fd_table[fd]; + struct epoll_event ev; + int epoll_ctl_type = 0; + int i; + + /* Return if the fd is already backed off */ + if (F->epoll_backoff) { + return; + } + for (i = 0; i < FD_SETSIZE; i++) { + if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) { + break; + } + } + + /* die if we have no fd (very unlikely), if the fd has no existing epoll + * state, if we are given a bad fd, or if the fd is not open. */ + assert(i < FD_SETSIZE); + assert(F->epoll_state); + assert(fd >= 0); + assert(F->flags.open); + + /* set up ev struct */ + ev.events = 0; + ev.data.fd = fd; + + /* If we were only waiting for reads, delete the fd, otherwise remove the + * read event */ + if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) { + epoll_ctl_type = EPOLL_CTL_DEL; + } else { + epoll_ctl_type = EPOLL_CTL_MOD; + ev.events = (F->epoll_state - EPOLLIN); + } + debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", epolltype_atoi(epoll_ctl_type), fd, F->epoll_state); + + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + /* If an error occurs, log it */ + debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } else { + backoff_fds[i] = fd; + F->epoll_backoff = 1; + F->epoll_state = ev.events; + } +} + +/* Resume reading from the given fd */ +void +commResumeFD(int fd) +{ + struct epoll_event ev; + int epoll_ctl_type = 0; + fde *F; + + F = &fd_table[fd]; + + /* If the fd has been modified, do nothing and remove the flag */ + if (!(F->read_handler) || !(F->epoll_backoff)) { + debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff); + F->epoll_backoff = 0; + return; + } + /* we need to re-add the fd to the epoll list with EPOLLIN set */ + ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR; + ev.data.fd = fd; + + /* If epoll_state is not set, then this fd is only waiting for + * reads, and needs adding, otherwise we mod it to add EPOLLIN */ + if (!(F->epoll_state)) { + epoll_ctl_type = EPOLL_CTL_ADD; + } else { + epoll_ctl_type = EPOLL_CTL_MOD; + } + debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", epolltype_atoi(epoll_ctl_type), fd); + + /* Try and add the fd back into the epoll struct */ + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + /* If an error occurs, log */ + debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } else { + F->epoll_backoff = 0; + F->epoll_state = ev.events; + } +} +void +comm_select_init() +{ + int i; + pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event)); + if (!pevents) { + fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror()); + } + kdpfd = epoll_create(SQUID_MAXFD); + + if (kdpfd < 0) { + fatalf("comm_select_init: epoll_create(): %s\n", xstrerror()); + } + for (i = 0; i < FD_SETSIZE; i++) { + backoff_fds[i] = 0; + } +} + +void +commUpdateReadBits(int fd, PF * handler) +{ + /* Not imlpemented */ +} + +void +commUpdateWriteBits(int fd, PF * handler) +{ + /* Not imlpemented */ +} + +void +commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout) +{ + fde *F = &fd_table[fd]; + int epoll_ctl_type = 0; + struct epoll_event ev; + + assert(fd >= 0); + assert(F->flags.open); + debug(5, 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, type, handler, client_data, timeout); + + ev.events = 0; + ev.data.fd = fd; + + if (type & COMM_SELECT_READ) { + /* Only add the epoll event if the fd is not backed off */ + if (handler && !(F->epoll_backoff)) { + ev.events |= EPOLLIN; + } + F->read_handler = handler; + F->read_data = client_data; + + // Otherwise, use previously stored value if the fd is not backed off + } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) { + ev.events |= EPOLLIN; + } + if (type & COMM_SELECT_WRITE) { + if (handler) { + ev.events |= EPOLLOUT; + } + F->write_handler = handler; + F->write_data = client_data; + + // Otherwise, use previously stored value + } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) { + ev.events |= EPOLLOUT; + } + if (ev.events) { + ev.events |= EPOLLHUP | EPOLLERR; + } + /* If the type is 0, force adding the fd to the epoll set */ + if (!(type)) { + F->epoll_state = 0; + } + if (ev.events != F->epoll_state) { + // If the struct is already in epoll MOD or DEL, else ADD + if (F->epoll_state) { + epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; + } else { + epoll_ctl_type = EPOLL_CTL_ADD; + } + + /* Update the state */ + F->epoll_state = ev.events; + + if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { + debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n", + epolltype_atoi(epoll_ctl_type), fd, xstrerror()); + } + } + if (timeout) + F->timeout = squid_curtime + timeout; +} + +int +comm_epoll(int msec) +{ + struct timespec ts; + static time_t last_timeout = 0; + int i; + int num; + int fd; + fde *F; + PF *hdl; + struct epoll_event *cevents; + double timeout = current_dtime + (msec / 1000.0); + + if (msec > MAX_POLL_TIME) + msec = MAX_POLL_TIME; + + debug(50, 3) ("comm_epoll: timeout %d\n", msec); + + do { +#if !ALARM_UPDATES_TIME + double start; + getCurrentTime(); + start = current_dtime; +#endif + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000; + + /* Check timeouts once per second */ + if (last_timeout < squid_curtime) { + last_timeout = squid_curtime; + checkTimeouts(); + + /* bring backed off connections back online */ + commEpollBackon(); + } + /* Check for disk io callbacks */ + storeDirCallback(); + + for (;;) { + statCounter.syscalls.polls++; + num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec); + statCounter.select_loops++; + + if (num >= 0) + break; + + if (ignoreErrno(errno)) + break; + + debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror()); + + return COMM_ERROR; + } + + statHistCount(&statCounter.select_fds_hist, num); + + if (num <= 0) + continue; + + for (i = 0, cevents = pevents; i < num; i++, cevents++) { + fd = cevents->data.fd; + F = &fd_table[fd]; + debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n" + ,fd, cevents->events, F->epoll_state, F->read_handler, F->write_handler); + if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) { + if ((hdl = F->read_handler) != NULL) { + // If the descriptor is meant to be deferred, don't handle + if (commDeferRead(fd) == 1) { + if (!(F->epoll_backoff)) { + debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, F->desc); + commEpollBackoff(fd); + } + goto WRITE_EVENT; + } + debug(5, 8) ("comm_epoll(): Calling read handler on fd=%d\n", fd); + F->read_handler = NULL; + hdl(fd, F->read_data); + statCounter.select_fds++; + if ((F->read_handler == NULL) && (F->flags.open)) { + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); + } + } else if (cevents->events & EPOLLIN) { + debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd); + if (F->flags.open) { + commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); + } + } + } + WRITE_EVENT: + if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) { + if ((hdl = F->write_handler) != NULL) { + debug(5, 8) ("comm_epoll(): Calling write handler on fd=%d\n", fd); + F->write_handler = NULL; + hdl(fd, F->write_data); + statCounter.select_fds++; + if ((F->write_handler == NULL) && (F->flags.open)) { + commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); + } + } else if (cevents->events & EPOLLOUT) { + debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", fd); + if (F->flags.open) { + commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); + } + } + } + } +#if !ALARM_UPDATES_TIME + getCurrentTime(); + statCounter.select_time += (current_dtime - start); +#endif + return COMM_OK; + } + while (timeout > current_dtime); + + debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime); + return COMM_TIMEOUT; +} + +static int +commDeferRead(int fd) +{ + fde *F = &fd_table[fd]; + if (F->defer_check == NULL) + return 0; + return F->defer_check(fd, F->defer_data); +} + +static void +checkTimeouts(void) +{ + int fd; + fde *F = NULL; + PF *callback; + for (fd = 0; fd <= Biggest_FD; fd++) { + F = &fd_table[fd]; + if (!F->flags.open) + continue; + if (F->timeout == 0) + continue; + if (F->timeout > squid_curtime) + continue; + debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd); + if (F->timeout_handler) { + debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd); + callback = F->timeout_handler; + F->timeout_handler = NULL; + callback(fd, F->timeout_data); + } else { + debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd); + comm_close(fd); + } + } +} + + +/* Called by async-io or diskd to speed up the polling */ +void +comm_quick_poll_required(void) +{ + MAX_POLL_TIME = 10; +} + +#endif Index: src/Makefile.am =================================================================== RCS file: /server/cvs-server/squid/squid/src/Makefile.am,v retrieving revision 1.44 diff -u -r1.44 Makefile.am --- src/Makefile.am 23 May 2006 21:52:27 -0000 1.44 +++ src/Makefile.am 24 May 2006 14:31:51 -0000 @@ -115,6 +115,8 @@ client_side.c \ comm.c \ comm_select.c \ + comm_poll.c \ + comm_epoll.c \ debug.c \ defines.h \ $(DELAY_POOL_SOURCE) \ Index: src/client_side.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/client_side.c,v retrieving revision 1.621 diff -u -r1.621 client_side.c --- src/client_side.c 22 May 2006 22:16:37 -0000 1.621 +++ src/client_side.c 24 May 2006 14:31:52 -0000 @@ -3290,7 +3290,7 @@ ConnStateData *conn = data; if (conn->body.size_left && !F->flags.socket_eof) { if (conn->in.offset >= conn->in.size - 1) { -#if HAVE_EPOLL +#if USE_EPOLL /* The commResumeFD function is called in this file */ conn->in.clientfd = fd; commDeferFD(fd); @@ -3301,7 +3301,7 @@ } } else { if (conn->defer.until > squid_curtime) { -#if HAVE_EPOLL +#if USE_EPOLL /* This is a second resolution timer, so commEpollBackon will * handle the resume for this defer call */ commDeferFD(fd); @@ -3709,7 +3709,7 @@ conn->body.size_left -= size; /* Move any remaining data */ conn->in.offset -= size; -#if HAVE_EPOLL +#if USE_EPOLL /* Resume the fd if necessary */ if (conn->in.clientfd) { if (conn->in.offset < conn->in.size - 1) { Index: src/comm.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/comm.c,v retrieving revision 1.342 diff -u -r1.342 comm.c --- src/comm.c 22 May 2006 18:55:23 -0000 1.342 +++ src/comm.c 24 May 2006 14:31:52 -0000 @@ -375,7 +375,7 @@ commSetTcpNoDelay(cs->fd); #endif -#if HAVE_EPOLL +#if USE_EPOLL // If we are using epoll(), we need to make sure that this fd will be polled commSetSelect(cs->fd, 0, NULL, NULL, 0); #endif @@ -779,7 +779,7 @@ } /* Epoll redefines this function in comm_select.c */ -#if !HAVE_EPOLL +#if !USE_EPOLL void commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout) { Index: src/comm_select.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/comm_select.c,v retrieving revision 1.66 diff -u -r1.66 comm_select.c --- src/comm_select.c 22 May 2006 19:01:32 -0000 1.66 +++ src/comm_select.c 24 May 2006 14:31:52 -0000 @@ -34,9 +34,10 @@ #include "squid.h" +#if USE_SELECT + static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */ -#if !HAVE_EPOLL #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) @@ -765,7 +766,7 @@ default: fatalf("bad return value from commDeferRead(FD %d)\n", fd); } - if (FD_ISSET(fd, &readfds) && fd_table[fd].flags.read_pending) { + if (FD_ISSET(fd, &readfds) && fd_table[fd].read_pending & COMM_PENDING_NORMAL) { FD_SET(fd, &pendingfds); pending++; } @@ -1126,381 +1127,6 @@ } } -#else /* HAVE_EPOLL */ -/* epoll structs */ -static int kdpfd; -static struct epoll_event *pevents; - -/* Array to keep track of backed off filedescriptors */ -static int backoff_fds[FD_SETSIZE]; - -static void checkTimeouts(void); -static int commDeferRead(int fd); - -static const char * -epolltype_atoi(int x) -{ - switch (x) { - - case EPOLL_CTL_ADD: - return "EPOLL_CTL_ADD"; - - case EPOLL_CTL_DEL: - return "EPOLL_CTL_DEL"; - - case EPOLL_CTL_MOD: - return "EPOLL_CTL_MOD"; - - default: - return "UNKNOWN_EPOLLCTL_OP"; - } -} - -/* Bring all fds back online */ -void -commEpollBackon() -{ - fde *F; - int i; - int fd; - int j = 0; - - for (i = 0; i < FD_SETSIZE; i++) { - if (backoff_fds[i]) { - /* record the fd and zero the descriptor */ - fd = backoff_fds[i]; - F = &fd_table[fd]; - backoff_fds[i] = 0; - - /* If the fd is no longer backed off, ignore */ - if (!(F->epoll_backoff)) { - continue; - } - /* If the fd is still meant to be backed off, add it to the start of - * the list and continue */ - if (commDeferRead(fd) == 1) { - backoff_fds[j++] = fd; - continue; - } - debug(5, 4) ("commEpollBackon: fd=%d\n", fd); - - /* Resume operations for this fd */ - commResumeFD(fd); - } else { - /* Once we hit a non-backed off FD, we can break */ - break; - } - } -} - - -/* Back off on the next epoll for the given fd */ -void -commEpollBackoff(int fd) -{ - commDeferFD(fd); -} - -/* Defer reads from this fd */ -void -commDeferFD(int fd) -{ - fde *F = &fd_table[fd]; - struct epoll_event ev; - int epoll_ctl_type = 0; - int i; - - /* Return if the fd is already backed off */ - if (F->epoll_backoff) { - return; - } - for (i = 0; i < FD_SETSIZE; i++) { - if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) { - break; - } - } - - /* die if we have no fd (very unlikely), if the fd has no existing epoll - * state, if we are given a bad fd, or if the fd is not open. */ - assert(i < FD_SETSIZE); - assert(F->epoll_state); - assert(fd >= 0); - assert(F->flags.open); - - /* set up ev struct */ - ev.events = 0; - ev.data.fd = fd; - - /* If we were only waiting for reads, delete the fd, otherwise remove the - * read event */ - if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) { - epoll_ctl_type = EPOLL_CTL_DEL; - } else { - epoll_ctl_type = EPOLL_CTL_MOD; - ev.events = (F->epoll_state - EPOLLIN); - } - debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", epolltype_atoi(epoll_ctl_type), fd, F->epoll_state); - - if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { - /* If an error occurs, log it */ - debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror()); - } else { - backoff_fds[i] = fd; - F->epoll_backoff = 1; - F->epoll_state = ev.events; - } -} - -/* Resume reading from the given fd */ -void -commResumeFD(int fd) -{ - struct epoll_event ev; - int epoll_ctl_type = 0; - fde *F; - - F = &fd_table[fd]; - - /* If the fd has been modified, do nothing and remove the flag */ - if (!(F->read_handler) || !(F->epoll_backoff)) { - debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff); - F->epoll_backoff = 0; - return; - } - /* we need to re-add the fd to the epoll list with EPOLLIN set */ - ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR; - ev.data.fd = fd; - - /* If epoll_state is not set, then this fd is only waiting for - * reads, and needs adding, otherwise we mod it to add EPOLLIN */ - if (!(F->epoll_state)) { - epoll_ctl_type = EPOLL_CTL_ADD; - } else { - epoll_ctl_type = EPOLL_CTL_MOD; - } - debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", epolltype_atoi(epoll_ctl_type), fd); - - /* Try and add the fd back into the epoll struct */ - if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { - /* If an error occurs, log */ - debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", epolltype_atoi(epoll_ctl_type), fd, xstrerror()); - } else { - F->epoll_backoff = 0; - F->epoll_state = ev.events; - } -} -void -comm_select_init() -{ - int i; - pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct epoll_event)); - if (!pevents) { - fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror()); - } - kdpfd = epoll_create(SQUID_MAXFD); - - if (kdpfd < 0) { - fatalf("comm_select_init: epoll_create(): %s\n", xstrerror()); - } - for (i = 0; i < FD_SETSIZE; i++) { - backoff_fds[i] = 0; - } -} - -void -commUpdateReadBits(int fd, PF * handler) -{ - /* Not imlpemented */ -} - -void -commUpdateWriteBits(int fd, PF * handler) -{ - /* Not imlpemented */ -} - -void -commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout) -{ - fde *F = &fd_table[fd]; - int epoll_ctl_type = 0; - struct epoll_event ev; - - assert(fd >= 0); - assert(F->flags.open); - debug(5, 8) ("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, type, handler, client_data, timeout); - - ev.events = 0; - ev.data.fd = fd; - - if (type & COMM_SELECT_READ) { - /* Only add the epoll event if the fd is not backed off */ - if (handler && !(F->epoll_backoff)) { - ev.events |= EPOLLIN; - } - F->read_handler = handler; - F->read_data = client_data; - - // Otherwise, use previously stored value if the fd is not backed off - } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) { - ev.events |= EPOLLIN; - } - if (type & COMM_SELECT_WRITE) { - if (handler) { - ev.events |= EPOLLOUT; - } - F->write_handler = handler; - F->write_data = client_data; - - // Otherwise, use previously stored value - } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) { - ev.events |= EPOLLOUT; - } - if (ev.events) { - ev.events |= EPOLLHUP | EPOLLERR; - } - /* If the type is 0, force adding the fd to the epoll set */ - if (!(type)) { - F->epoll_state = 0; - } - if (ev.events != F->epoll_state) { - // If the struct is already in epoll MOD or DEL, else ADD - if (F->epoll_state) { - epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; - } else { - epoll_ctl_type = EPOLL_CTL_ADD; - } - - /* Update the state */ - F->epoll_state = ev.events; - - if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) { - debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n", - epolltype_atoi(epoll_ctl_type), fd, xstrerror()); - } - } - if (timeout) - F->timeout = squid_curtime + timeout; -} - -int -comm_epoll(int msec) -{ - struct timespec ts; - static time_t last_timeout = 0; - int i; - int num; - int fd; - fde *F; - PF *hdl; - struct epoll_event *cevents; - double timeout = current_dtime + (msec / 1000.0); - - if (msec > MAX_POLL_TIME) - msec = MAX_POLL_TIME; - - debug(50, 3) ("comm_epoll: timeout %d\n", msec); - - do { -#if !ALARM_UPDATES_TIME - double start; - getCurrentTime(); - start = current_dtime; -#endif - ts.tv_sec = msec / 1000; - ts.tv_nsec = (msec % 1000) * 1000; - - /* Check timeouts once per second */ - if (last_timeout < squid_curtime) { - last_timeout = squid_curtime; - checkTimeouts(); - - /* bring backed off connections back online */ - commEpollBackon(); - } - /* Check for disk io callbacks */ - storeDirCallback(); - - for (;;) { - statCounter.syscalls.polls++; - num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec); - statCounter.select_loops++; - - if (num >= 0) - break; - - if (ignoreErrno(errno)) - break; - - debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror()); - - return COMM_ERROR; - } - - statHistCount(&statCounter.select_fds_hist, num); - - if (num <= 0) - continue; - - for (i = 0, cevents = pevents; i < num; i++, cevents++) { - fd = cevents->data.fd; - F = &fd_table[fd]; - debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x F->read_handler=%p F->write_handler=%p\n" - ,fd, cevents->events, F->epoll_state, F->read_handler, F->write_handler); - if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) { - if ((hdl = F->read_handler) != NULL) { - // If the descriptor is meant to be deferred, don't handle - if (commDeferRead(fd) == 1) { - if (!(F->epoll_backoff)) { - debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, F->desc); - commEpollBackoff(fd); - } - goto WRITE_EVENT; - } - debug(5, 8) ("comm_epoll(): Calling read handler on fd=%d\n", fd); - F->read_handler = NULL; - hdl(fd, F->read_data); - statCounter.select_fds++; - if ((F->read_handler == NULL) && (F->flags.open)) { - commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); - } - } else if (cevents->events & EPOLLIN) { - debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd); - if (F->flags.open) { - commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); - } - } - } - WRITE_EVENT: - if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) { - if ((hdl = F->write_handler) != NULL) { - debug(5, 8) ("comm_epoll(): Calling write handler on fd=%d\n", fd); - F->write_handler = NULL; - hdl(fd, F->write_data); - statCounter.select_fds++; - if ((F->write_handler == NULL) && (F->flags.open)) { - commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); - } - } else if (cevents->events & EPOLLOUT) { - debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", fd); - if (F->flags.open) { - commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); - } - } - } - } -#if !ALARM_UPDATES_TIME - getCurrentTime(); - statCounter.select_time += (current_dtime - start); -#endif - return COMM_OK; - } - while (timeout > current_dtime); - - debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime); - return COMM_TIMEOUT; -} -#endif /* HAVE_EPOLL */ static int commDeferRead(int fd) @@ -1545,3 +1171,5 @@ { MAX_POLL_TIME = 10; } + +#endif Index: src/fd.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/fd.c,v retrieving revision 1.46 diff -u -r1.46 fd.c --- src/fd.c 18 May 2006 04:03:23 -0000 1.46 +++ src/fd.c 24 May 2006 14:31:52 -0000 @@ -84,7 +84,7 @@ assert(F->write_handler == NULL); } debug(51, 3) ("fd_close FD %d %s\n", fd, F->desc); -#if HAVE_EPOLL +#if USE_EPOLL /* the epoll code needs to update the descriptor before flags.ope is 0 */ commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0); commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0); Index: src/forward.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/forward.c,v retrieving revision 1.99 diff -u -r1.99 forward.c --- src/forward.c 24 May 2006 11:37:30 -0000 1.99 +++ src/forward.c 24 May 2006 14:31:52 -0000 @@ -873,7 +873,7 @@ else { int i = delayMostBytesWanted(mem, INT_MAX); if (0 == i) { -#if HAVE_EPOLL +#if USE_EPOLL if (fd >= 0) { mem->serverfd = fd; commDeferFD(fd); @@ -899,7 +899,7 @@ * few other corner cases. */ if (fd >= 0 && mem->inmem_hi - mem->inmem_lo > SM_PAGE_SIZE + Config.Store.maxInMemObjSize + READ_AHEAD_GAP) { -#if HAVE_EPOLL +#if USE_EPOLL EBIT_SET(e->flags, ENTRY_DEFER_READ); mem->serverfd = fd; commDeferFD(fd); @@ -909,7 +909,7 @@ } if (fd >= 0 && mem->inmem_hi - storeLowestMemReaderOffset(e) > READ_AHEAD_GAP) { EBIT_SET(e->flags, ENTRY_DEFER_READ); -#if HAVE_EPOLL +#if USE_EPOLL mem->serverfd = fd; commDeferFD(fd); #endif Index: src/main.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/main.c,v retrieving revision 1.366 diff -u -r1.366 main.c --- src/main.c 22 May 2006 21:54:56 -0000 1.366 +++ src/main.c 24 May 2006 14:31:52 -0000 @@ -776,12 +776,14 @@ eventRun(); if ((loop_delay = eventNextTime()) < 0) loop_delay = 0; -#if HAVE_EPOLL +#if USE_EPOLL switch (comm_epoll(loop_delay)) { -#elif HAVE_POLL +#elif USE_POLL switch (comm_poll(loop_delay)) { -#else +#elif USE_SELECT switch (comm_select(loop_delay)) { +#elif +#error Need poll, epoll or select defined! #endif case COMM_OK: errcount = 0; /* reset if successful */ Index: src/protos.h =================================================================== RCS file: /server/cvs-server/squid/squid/src/protos.h,v retrieving revision 1.469 diff -u -r1.469 protos.h --- src/protos.h 22 May 2006 23:05:27 -0000 1.469 +++ src/protos.h 24 May 2006 14:31:52 -0000 @@ -188,12 +188,14 @@ * comm_select.c */ extern void comm_select_init(void); -#if HAVE_EPOLL +#if USE_EPOLL extern int comm_epoll(int); -#elif HAVE_POLL +#elif USE_POLL extern int comm_poll(int); -#else +#elif USE_SELECT extern int comm_select(int); +#else +#error USE_POLL, USE_EPOLL or USE_SELECT need to be defined! #endif extern void commUpdateReadBits(int, PF *); extern void commUpdateWriteBits(int, PF *); Index: src/squid.h =================================================================== RCS file: /server/cvs-server/squid/squid/src/squid.h,v retrieving revision 1.234 diff -u -r1.234 squid.h --- src/squid.h 22 May 2006 18:55:23 -0000 1.234 +++ src/squid.h 24 May 2006 14:31:52 -0000 @@ -250,15 +250,15 @@ * -- Oskar Pearson * -- Stewart Forster */ -#if HAVE_POLL +#if USE_POLL #if HAVE_POLL_H #include #else /* HAVE_POLL_H */ #undef HAVE_POLL #endif /* HAVE_POLL_H */ -#endif /* HAVE_POLL */ +#endif /* USE_POLL */ -#if HAVE_EPOLL +#if USE_EPOLL #include #endif @@ -393,7 +393,7 @@ #include "Stack.h" /* Needed for poll() on Linux at least */ -#if HAVE_POLL +#if USE_POLL #ifndef POLLRDNORM #define POLLRDNORM POLLIN #endif Index: src/ssl.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/ssl.c,v retrieving revision 1.130 diff -u -r1.130 ssl.c --- src/ssl.c 18 May 2006 04:03:24 -0000 1.130 +++ src/ssl.c 24 May 2006 14:31:52 -0000 @@ -139,7 +139,7 @@ if (i == INT_MAX) return 0; if (i == 0) { -#if HAVE_EPOLL +#if USE_EPOLL commDeferFD(fd); #endif return 1; Index: src/stat.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/stat.c,v retrieving revision 1.366 diff -u -r1.366 stat.c --- src/stat.c 18 May 2006 04:03:24 -0000 1.366 +++ src/stat.c 24 May 2006 14:31:52 -0000 @@ -835,7 +835,7 @@ storeAppendPrintf(sentry, "aborted_requests = %f/sec\n", XAVG(aborted_requests)); -#if HAVE_POLL || HAVE_EPOLL +#if USE_POLL || USE_EPOLL storeAppendPrintf(sentry, "syscalls.polls = %f/sec\n", XAVG(syscalls.polls)); #else storeAppendPrintf(sentry, "syscalls.selects = %f/sec\n", XAVG(syscalls.selects)); Index: src/store_client.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/store_client.c,v retrieving revision 1.118 diff -u -r1.118 store_client.c --- src/store_client.c 20 May 2006 12:58:51 -0000 1.118 +++ src/store_client.c 24 May 2006 14:31:52 -0000 @@ -285,7 +285,7 @@ debug(20, 3) ("storeClientCopy3: %s - clearing ENTRY_DEFER_READ\n", e->mem_obj->url); /* Clear the flag and re-poll the fd */ EBIT_CLR(e->flags, ENTRY_DEFER_READ); -#if HAVE_EPOLL +#if USE_EPOLL if (mem->serverfd != 0) { commResumeFD(mem->serverfd); mem->serverfd = 0; Index: src/store_swapout.c =================================================================== RCS file: /server/cvs-server/squid/squid/src/store_swapout.c,v retrieving revision 1.91 diff -u -r1.91 store_swapout.c --- src/store_swapout.c 18 May 2006 12:48:51 -0000 1.91 +++ src/store_swapout.c 24 May 2006 14:31:52 -0000 @@ -152,7 +152,7 @@ if (mem->inmem_hi - mem->inmem_lo <= READ_AHEAD_GAP) { EBIT_CLR(e->flags, ENTRY_DEFER_READ); -#if HAVE_EPOLL +#if USE_EPOLL if (mem->serverfd != 0) { commResumeFD(mem->serverfd); mem->serverfd = 0; Index: src/structs.h =================================================================== RCS file: /server/cvs-server/squid/squid/src/structs.h,v retrieving revision 1.457 diff -u -r1.457 structs.h --- src/structs.h 22 May 2006 22:24:09 -0000 1.457 +++ src/structs.h 24 May 2006 14:31:53 -0000 @@ -2011,7 +2011,7 @@ int recvfroms; int sendtos; } sock; -#if HAVE_POLL || HAVE_EPOLL +#if USE_POLL || USE_EPOLL int polls; #else int selects; Index: configure.in =================================================================== RCS file: /server/cvs-server/squid/squid/configure.in,v retrieving revision 1.334 diff -u -r1.334 configure.in --- configure.in 24 May 2006 11:20:11 -0000 1.334 +++ configure.in 24 May 2006 14:31:53 -0000 @@ -783,6 +783,24 @@ esac ]) +dnl Enable select() +AC_ARG_ENABLE(select, +[ --enable-select Enable select() support. + --disable-select Disable select() support. ], + +[ + case "$enableval" in + yes) + echo "Forcing select() to be enabled" + ac_cv_func_select='yes' + ;; + no) + echo "Forcing select() to be disabled" + ac_cv_func_select='no' + ;; + esac +]) + dnl Enable epoll() AC_ARG_ENABLE(epoll, [ --enable-epoll Enable epoll() instead of poll() or select(). @@ -2082,6 +2100,31 @@ bswap_32 \ ) +dnl Magic which checks whether we are forcing a type of comm loop we +dnl are actually going to (ab)use + +dnl Actually do the define magic now +dnl mostly ripped from squid-commloops, thanks to adrian and benno + +if test "$ac_cv_func_epoll" = "yes" ; then + SELECT_TYPE="epoll" + AC_DEFINE(USE_EPOLL,1,[Use epoll() for the IO loop]) + AC_CHECK_LIB(epoll, epoll_create, [EPOLL_LIBS="-lepoll"]) + AC_SUBST(EPOLL_LIBS) +elif test "$ac_cv_func_poll" = "yes" ; then + SELECT_TYPE="poll" + AC_DEFINE(USE_POLL,1,[Use poll() for the IO loop]) +elif test "$ac_cv_func_select" = "yes" ; then + SELECT_TYPE="select" + AC_DEFINE(USE_SELECT,1,[Use select() for the IO loop]) +else + echo "Eep! Can't find poll, epoll, or select!" + echo "I'll try select and hope for the best." + SELECT_TYPE="select" + AC_DEFINE(USE_SELECT,1) +fi +echo "Using ${SELECT_TYPE} for select loop." + dnl Yay! Another Linux brokenness. Its not good enough dnl to know that setresuid() exists, because RedHat 5.0 declares dnl setresuid() but doesn't implement it. Index: include/autoconf.h.in =================================================================== RCS file: /server/cvs-server/squid/squid/include/autoconf.h.in,v retrieving revision 1.140 diff -u -r1.140 autoconf.h.in --- include/autoconf.h.in 23 May 2006 14:54:36 -0000 1.140 +++ include/autoconf.h.in 24 May 2006 14:31:53 -0000 @@ -753,6 +753,9 @@ dnsserver processes instead. */ #undef USE_DNSSERVERS +/* Use epoll() for the IO loop */ +#undef USE_EPOLL + /* Define if we should use GNU regex */ #undef USE_GNUREGEX @@ -778,10 +781,16 @@ (USE_SSL) */ #undef USE_OPENSSL +/* Use poll() for the IO loop */ +#undef USE_POLL + /* If you want to log Referer request header values, define this. By default, they are written to referer.log in the Squid log directory. */ #undef USE_REFERER_LOG +/* Use select() for the IO loop */ +#undef USE_SELECT + /* Define this to include code for SSL encryption. */ #undef USE_SSL