1 /* fdwatch.c - fd watcher routines, either select() or poll()
3 ** Copyright © 1999,2000 by Jef Poskanzer <jef@mail.acme.com>.
4 ** All rights reserved.
6 ** Redistribution and use in source and binary forms, with or without
7 ** modification, are permitted provided that the following conditions
9 ** 1. Redistributions of source code must retain the above copyright
10 ** notice, this list of conditions and the following disclaimer.
11 ** 2. Redistributions in binary form must reproduce the above copyright
12 ** notice, this list of conditions and the following disclaimer in the
13 ** documentation and/or other materials provided with the distribution.
15 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 ** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ** ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 ** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 ** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 ** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 ** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/types.h>
33 #include <sys/resource.h>
38 #define MIN(a,b) ((a) < (b) ? (a) : (b))
43 #else /* HAVE_POLL_H */
44 #ifdef HAVE_SYS_POLL_H
46 #endif /* HAVE_SYS_POLL_H */
47 #endif /* HAVE_POLL_H */
49 #ifdef HAVE_SYS_DEVPOLL_H
50 #include <sys/devpoll.h>
53 #endif /* !HAVE_DEVPOLL */
54 #endif /* HAVE_SYS_DEVPOLL_H */
56 #ifdef HAVE_SYS_EVENT_H
57 #include <sys/event.h>
58 #endif /* HAVE_SYS_EVENT_H */
66 #define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS)))
67 #define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS)))
68 #define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS)))
69 #define FD_ZERO(p) bzero((char*)(p), sizeof(*(p)))
71 #endif /* HAVE_SELECT */
76 static void** fd_data;
77 static int nreturned, next_ridx;
81 #define WHICH "kevent"
82 #define INIT( nf ) kqueue_init( nf )
83 #define ADD_FD( fd, rw ) kqueue_add_fd( fd, rw )
84 #define DEL_FD( fd ) kqueue_del_fd( fd )
85 #define WATCH( timeout_msecs ) kqueue_watch( timeout_msecs )
86 #define CHECK_FD( fd ) kqueue_check_fd( fd )
87 #define GET_FD( ridx ) kqueue_get_fd( ridx )
89 static int kqueue_init( int nf );
90 static void kqueue_add_fd( int fd, int rw );
91 static void kqueue_del_fd( int fd );
92 static int kqueue_watch( long timeout_msecs );
93 static int kqueue_check_fd( int fd );
94 static int kqueue_get_fd( int ridx );
96 #else /* HAVE_KQUEUE */
99 #define WHICH "devpoll"
100 #define INIT( nf ) devpoll_init( nf )
101 #define ADD_FD( fd, rw ) devpoll_add_fd( fd, rw )
102 #define DEL_FD( fd ) devpoll_del_fd( fd )
103 #define WATCH( timeout_msecs ) devpoll_watch( timeout_msecs )
104 #define CHECK_FD( fd ) devpoll_check_fd( fd )
105 #define GET_FD( ridx ) devpoll_get_fd( ridx )
107 static int devpoll_init( int nf );
108 static void devpoll_add_fd( int fd, int rw );
109 static void devpoll_del_fd( int fd );
110 static int devpoll_watch( long timeout_msecs );
111 static int devpoll_check_fd( int fd );
112 static int devpoll_get_fd( int ridx );
114 # else /* HAVE_DEVPOLL */
118 #define INIT( nf ) poll_init( nf )
119 #define ADD_FD( fd, rw ) poll_add_fd( fd, rw )
120 #define DEL_FD( fd ) poll_del_fd( fd )
121 #define WATCH( timeout_msecs ) poll_watch( timeout_msecs )
122 #define CHECK_FD( fd ) poll_check_fd( fd )
123 #define GET_FD( ridx ) poll_get_fd( ridx )
125 static int poll_init( int nf );
126 static void poll_add_fd( int fd, int rw );
127 static void poll_del_fd( int fd );
128 static int poll_watch( long timeout_msecs );
129 static int poll_check_fd( int fd );
130 static int poll_get_fd( int ridx );
132 # else /* HAVE_POLL */
135 #define WHICH "select"
136 #define INIT( nf ) select_init( nf )
137 #define ADD_FD( fd, rw ) select_add_fd( fd, rw )
138 #define DEL_FD( fd ) select_del_fd( fd )
139 #define WATCH( timeout_msecs ) select_watch( timeout_msecs )
140 #define CHECK_FD( fd ) select_check_fd( fd )
141 #define GET_FD( ridx ) select_get_fd( ridx )
143 static int select_init( int nf );
144 static void select_add_fd( int fd, int rw );
145 static void select_del_fd( int fd );
146 static int select_watch( long timeout_msecs );
147 static int select_check_fd( int fd );
148 static int select_get_fd( int ridx );
150 # endif /* HAVE_SELECT */
151 # endif /* HAVE_POLL */
152 # endif /* HAVE_DEVPOLL */
153 #endif /* HAVE_KQUEUE */
158 /* Figure out how many file descriptors the system allows, and
159 ** initialize the fdwatch data structures. Returns -1 on failure.
162 fdwatch_get_nfiles( void )
167 #endif /* RLIMIT_NOFILE */
169 /* Figure out how many fd's we can have. */
170 nfiles = getdtablesize();
172 /* If we have getrlimit(), use that, and attempt to raise the limit. */
173 if ( getrlimit( RLIMIT_NOFILE, &rl ) == 0 )
175 nfiles = rl.rlim_cur;
176 if ( rl.rlim_max == RLIM_INFINITY )
177 rl.rlim_cur = 8192; /* arbitrary */
178 else if ( rl.rlim_max > rl.rlim_cur )
179 rl.rlim_cur = rl.rlim_max;
180 if ( setrlimit( RLIMIT_NOFILE, &rl ) == 0 )
181 nfiles = rl.rlim_cur;
183 #endif /* RLIMIT_NOFILE */
185 #if defined(HAVE_SELECT) && ! ( defined(HAVE_POLL) || defined(HAVE_DEVPOLL) || defined(HAVE_KQUEUE) )
186 /* If we use select(), then we must limit ourselves to FD_SETSIZE. */
187 nfiles = MIN( nfiles, FD_SETSIZE );
188 #endif /* HAVE_SELECT && ! ( HAVE_POLL || HAVE_DEVPOLL || HAVE_KQUEUE ) */
190 /* Initialize the fdwatch data structures. */
192 fd_rw = (int*) malloc( sizeof(int) * nfiles );
193 fd_data = (void**) malloc( sizeof(void*) * nfiles );
194 if ( fd_rw == (int*) 0 || fd_data == (void**) 0 )
196 for ( i = 0; i < nfiles; ++i )
198 if ( INIT( nfiles ) == -1 )
205 /* Add a descriptor to the watch list. rw is either FDW_READ or FDW_WRITE. */
207 fdwatch_add_fd( int fd, void* client_data, int rw )
209 if ( fd < 0 || fd >= nfiles || fd_rw[fd] != -1 )
211 syslog( LOG_ERR, "bad fd (%d) passed to fdwatch_add_fd!", fd );
216 fd_data[fd] = client_data;
220 /* Remove a descriptor from the watch list. */
222 fdwatch_del_fd( int fd )
224 if ( fd < 0 || fd >= nfiles || fd_rw[fd] == -1 )
226 syslog( LOG_ERR, "bad fd (%d) passed to fdwatch_del_fd!", fd );
231 fd_data[fd] = (void*) 0;
234 /* Do the watch. Return value is the number of descriptors that are ready,
235 ** or 0 if the timeout expired, or -1 on errors. A timeout of INFTIM means
236 ** wait indefinitely.
239 fdwatch( long timeout_msecs )
242 nreturned = WATCH( timeout_msecs );
248 /* Check if a descriptor was ready. */
250 fdwatch_check_fd( int fd )
252 if ( fd < 0 || fd >= nfiles || fd_rw[fd] == -1 )
254 syslog( LOG_ERR, "bad fd (%d) passed to fdwatch_check_fd!", fd );
257 return CHECK_FD( fd );
262 fdwatch_get_next_client_data( void )
266 if ( next_ridx >= nreturned )
268 fd = GET_FD( next_ridx++ );
269 if ( fd < 0 || fd >= nfiles )
275 /* Generate debugging statistics syslog message. */
277 fdwatch_logstats( long secs )
281 LOG_NOTICE, " fdwatch - %ld %ss (%g/sec)",
282 nwatches, WHICH, (float) nwatches / secs );
289 static int maxkqevents;
290 static struct kevent* kqevents;
291 static int nkqevents;
292 static struct kevent* kqrevents;
293 static int* kqrfdidx;
298 kqueue_init( int nf )
303 maxkqevents = nf * 2;
304 kqevents = (struct kevent*) malloc( sizeof(struct kevent) * maxkqevents );
305 kqrevents = (struct kevent*) malloc( sizeof(struct kevent) * nf );
306 kqrfdidx = (int*) malloc( sizeof(int) * nf );
307 if ( kqevents == (struct kevent*) 0 || kqrevents == (struct kevent*) 0 ||
308 kqrfdidx == (int*) 0 )
310 (void) memset( kqevents, 0, sizeof(struct kevent) * maxkqevents );
311 (void) memset( kqrfdidx, 0, sizeof(int) * nf );
317 kqueue_add_fd( int fd, int rw )
319 if ( nkqevents >= maxkqevents )
321 syslog( LOG_ERR, "too many kqevents in kqueue_add_fd!" );
324 kqevents[nkqevents].ident = fd;
325 kqevents[nkqevents].flags = EV_ADD;
328 case FDW_READ: kqevents[nkqevents].filter = EVFILT_READ; break;
329 case FDW_WRITE: kqevents[nkqevents].filter = EVFILT_WRITE; break;
337 kqueue_del_fd( int fd )
339 if ( nkqevents >= maxkqevents )
341 syslog( LOG_ERR, "too many kqevents in kqueue_del_fd!" );
344 kqevents[nkqevents].ident = fd;
345 kqevents[nkqevents].flags = EV_DELETE;
348 case FDW_READ: kqevents[nkqevents].filter = EVFILT_READ; break;
349 case FDW_WRITE: kqevents[nkqevents].filter = EVFILT_WRITE; break;
356 kqueue_watch( long timeout_msecs )
360 if ( timeout_msecs == INFTIM )
362 kq, kqevents, nkqevents, kqrevents, nfiles, (struct timespec*) 0 );
366 ts.tv_sec = timeout_msecs / 1000L;
367 ts.tv_nsec = ( timeout_msecs % 1000L ) * 1000000L;
368 r = kevent( kq, kqevents, nkqevents, kqrevents, nfiles, &ts );
374 for ( i = 0; i < r; ++i )
375 kqrfdidx[kqrevents[i].ident] = i;
382 kqueue_check_fd( int fd )
384 int ridx = kqrfdidx[fd];
386 if ( ridx < 0 || ridx >= nfiles )
388 syslog( LOG_ERR, "bad ridx (%d) in kqueue_check_fd!", ridx );
391 if ( ridx >= nreturned )
393 if ( kqrevents[ridx].ident != fd )
395 if ( kqrevents[ridx].flags & EV_ERROR )
399 case FDW_READ: return kqrevents[ridx].filter == EVFILT_READ;
400 case FDW_WRITE: return kqrevents[ridx].filter == EVFILT_WRITE;
407 kqueue_get_fd( int ridx )
409 if ( ridx < 0 || ridx >= nfiles )
411 syslog( LOG_ERR, "bad ridx (%d) in kqueue_get_fd!", ridx );
414 return kqrevents[ridx].ident;
417 #else /* HAVE_KQUEUE */
422 static int maxdpevents;
423 static struct pollfd* dpevents;
424 static int ndpevents;
425 static struct pollfd* dprevents;
426 static int* dp_rfdidx;
431 devpoll_init( int nf )
433 dp = open( "/dev/poll", O_RDWR );
436 (void) fcntl( dp, F_SETFD, 1 );
437 maxdpevents = nf * 2;
438 dpevents = (struct pollfd*) malloc( sizeof(struct pollfd) * maxdpevents );
439 dprevents = (struct pollfd*) malloc( sizeof(struct pollfd) * nf );
440 dp_rfdidx = (int*) malloc( sizeof(int) * nf );
441 if ( dpevents == (struct pollfd*) 0 || dprevents == (struct pollfd*) 0 ||
442 dp_rfdidx == (int*) 0 )
444 (void) memset( dp_rfdidx, 0, sizeof(int) * nf );
450 devpoll_add_fd( int fd, int rw )
452 if ( ndpevents >= maxdpevents )
454 syslog( LOG_ERR, "too many fds in devpoll_add_fd!" );
457 dpevents[ndpevents].fd = fd;
460 case FDW_READ: dpevents[ndpevents].events = POLLIN; break;
461 case FDW_WRITE: dpevents[ndpevents].events = POLLOUT; break;
469 devpoll_del_fd( int fd )
471 if ( ndpevents >= maxdpevents )
473 syslog( LOG_ERR, "too many fds in devpoll_del_fd!" );
476 dpevents[ndpevents].fd = fd;
477 dpevents[ndpevents].events = POLLREMOVE;
483 devpoll_watch( long timeout_msecs )
488 r = sizeof(struct pollfd) * ndpevents;
489 if ( r > 0 && write( dp, dpevents, r ) != r )
493 dvp.dp_fds = dprevents;
494 dvp.dp_nfds = nfiles;
495 dvp.dp_timeout = (int) timeout_msecs;
497 r = ioctl( dp, DP_POLL, &dvp );
501 for ( i = 0; i < r; ++i )
502 dp_rfdidx[dprevents[i].fd] = i;
509 devpoll_check_fd( int fd )
511 int ridx = dp_rfdidx[fd];
513 if ( ridx < 0 || ridx >= nfiles )
515 syslog( LOG_ERR, "bad ridx (%d) in devpoll_check_fd!", ridx );
518 if ( ridx >= nreturned )
520 if ( dprevents[ridx].fd != fd )
522 if ( dprevents[ridx].revents & POLLERR )
526 case FDW_READ: return dprevents[ridx].revents & ( POLLIN | POLLHUP | POLLNVAL );
527 case FDW_WRITE: return dprevents[ridx].revents & ( POLLOUT | POLLHUP | POLLNVAL );
534 devpoll_get_fd( int ridx )
536 if ( ridx < 0 || ridx >= nfiles )
538 syslog( LOG_ERR, "bad ridx (%d) in devpoll_get_fd!", ridx );
541 return dprevents[ridx].fd;
545 # else /* HAVE_DEVPOLL */
550 static struct pollfd* pollfds;
551 static int npoll_fds;
552 static int* poll_fdidx;
553 static int* poll_rfdidx;
561 pollfds = (struct pollfd*) malloc( sizeof(struct pollfd) * nf );
562 poll_fdidx = (int*) malloc( sizeof(int) * nf );
563 poll_rfdidx = (int*) malloc( sizeof(int) * nf );
564 if ( pollfds == (struct pollfd*) 0 || poll_fdidx == (int*) 0 ||
565 poll_rfdidx == (int*) 0 )
567 for ( i = 0; i < nf; ++i )
568 pollfds[i].fd = poll_fdidx[i] = -1;
574 poll_add_fd( int fd, int rw )
576 if ( npoll_fds >= nfiles )
578 syslog( LOG_ERR, "too many fds in poll_add_fd!" );
581 pollfds[npoll_fds].fd = fd;
584 case FDW_READ: pollfds[npoll_fds].events = POLLIN; break;
585 case FDW_WRITE: pollfds[npoll_fds].events = POLLOUT; break;
588 poll_fdidx[fd] = npoll_fds;
594 poll_del_fd( int fd )
596 int idx = poll_fdidx[fd];
598 if ( idx < 0 || idx >= nfiles )
600 syslog( LOG_ERR, "bad idx (%d) in poll_del_fd!", idx );
604 pollfds[idx] = pollfds[npoll_fds];
605 poll_fdidx[pollfds[idx].fd] = idx;
606 pollfds[npoll_fds].fd = -1;
612 poll_watch( long timeout_msecs )
616 r = poll( pollfds, npoll_fds, (int) timeout_msecs );
621 for ( i = 0; i < npoll_fds; ++i )
622 if ( pollfds[i].revents &
623 ( POLLIN | POLLOUT | POLLERR | POLLHUP | POLLNVAL ) )
625 poll_rfdidx[ridx++] = pollfds[i].fd;
630 return ridx; /* should be equal to r */
635 poll_check_fd( int fd )
637 int fdidx = poll_fdidx[fd];
639 if ( fdidx < 0 || fdidx >= nfiles )
641 syslog( LOG_ERR, "bad fdidx (%d) in poll_check_fd!", fdidx );
644 if ( pollfds[fdidx].revents & POLLERR )
648 case FDW_READ: return pollfds[fdidx].revents & ( POLLIN | POLLHUP | POLLNVAL );
649 case FDW_WRITE: return pollfds[fdidx].revents & ( POLLOUT | POLLHUP | POLLNVAL );
656 poll_get_fd( int ridx )
658 if ( ridx < 0 || ridx >= nfiles )
660 syslog( LOG_ERR, "bad ridx (%d) in poll_get_fd!", ridx );
663 return poll_rfdidx[ridx];
666 # else /* HAVE_POLL */
671 static fd_set master_rfdset;
672 static fd_set master_wfdset;
673 static fd_set working_rfdset;
674 static fd_set working_wfdset;
675 static int* select_fds;
676 static int* select_fdidx;
677 static int* select_rfdidx;
678 static int nselect_fds;
680 static int maxfd_changed;
684 select_init( int nf )
688 FD_ZERO( &master_rfdset );
689 FD_ZERO( &master_wfdset );
690 select_fds = (int*) malloc( sizeof(int) * nf );
691 select_fdidx = (int*) malloc( sizeof(int) * nf );
692 select_rfdidx = (int*) malloc( sizeof(int) * nf );
693 if ( select_fds == (int*) 0 || select_fdidx == (int*) 0 ||
694 select_rfdidx == (int*) 0 )
699 for ( i = 0; i < nf; ++i )
700 select_fds[i] = select_fdidx[i] = -1;
706 select_add_fd( int fd, int rw )
708 if ( nselect_fds >= nfiles )
710 syslog( LOG_ERR, "too many fds in select_add_fd!" );
713 select_fds[nselect_fds] = fd;
716 case FDW_READ: FD_SET( fd, &master_rfdset ); break;
717 case FDW_WRITE: FD_SET( fd, &master_wfdset ); break;
722 select_fdidx[fd] = nselect_fds;
728 select_del_fd( int fd )
730 int idx = select_fdidx[fd];
732 if ( idx < 0 || idx >= nfiles )
734 syslog( LOG_ERR, "bad idx (%d) in select_del_fd!", idx );
739 select_fds[idx] = select_fds[nselect_fds];
740 select_fdidx[select_fds[idx]] = idx;
741 select_fds[nselect_fds] = -1;
742 select_fdidx[fd] = -1;
744 FD_CLR( fd, &master_rfdset );
745 FD_CLR( fd, &master_wfdset );
753 select_get_maxfd( void )
759 for ( i = 0; i < nselect_fds; ++i )
760 if ( select_fds[i] > maxfd )
761 maxfd = select_fds[i];
769 select_watch( long timeout_msecs )
774 working_rfdset = master_rfdset;
775 working_wfdset = master_wfdset;
776 mfd = select_get_maxfd();
777 if ( timeout_msecs == INFTIM )
779 mfd + 1, &working_rfdset, &working_wfdset, (fd_set*) 0,
780 (struct timeval*) 0 );
783 struct timeval timeout;
784 timeout.tv_sec = timeout_msecs / 1000L;
785 timeout.tv_usec = ( timeout_msecs % 1000L ) * 1000L;
787 mfd + 1, &working_rfdset, &working_wfdset, (fd_set*) 0, &timeout );
793 for ( idx = 0; idx < nselect_fds; ++idx )
794 if ( select_check_fd( select_fds[idx] ) )
796 select_rfdidx[ridx++] = select_fds[idx];
801 return ridx; /* should be equal to r */
806 select_check_fd( int fd )
810 case FDW_READ: return FD_ISSET( fd, &working_rfdset );
811 case FDW_WRITE: return FD_ISSET( fd, &working_wfdset );
818 select_get_fd( int ridx )
820 if ( ridx < 0 || ridx >= nfiles )
822 syslog( LOG_ERR, "bad ridx (%d) in select_get_fd!", ridx );
825 return select_rfdidx[ridx];
828 # endif /* HAVE_SELECT */
830 # endif /* HAVE_POLL */
832 # endif /* HAVE_DEVPOLL */
834 #endif /* HAVE_KQUEUE */