diff -purN v2.6.12-rc3/fs/aio.c test-rc3/fs/aio.c --- v2.6.12-rc3/fs/aio.c 2005-04-28 11:01:57.000000000 -0400 +++ test-rc3/fs/aio.c 2005-05-03 15:59:06.000000000 -0400 @@ -1016,11 +1016,14 @@ put_rq: /* everything turned out well, dispose of the aiocb. */ ret = __aio_put_req(ctx, iocb); - spin_unlock_irqrestore(&ctx->ctx_lock, flags); + if (ctx->nr_wait) + ctx->nr_wait --; - if (waitqueue_active(&ctx->wait)) + if (!ctx->nr_wait && waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + if (ret) put_ioctx(ctx); @@ -1069,6 +1072,23 @@ out: return ret; } +static inline long ring_events_avail(struct kioctx *ctx) +{ + struct aio_ring *ring; + long head, tail; + + ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); + spin_lock(&ctx->ring_info.ring_lock); + head = ring->head; + tail = ring->tail; + spin_unlock(&ctx->ring_info.ring_lock); + kunmap_atomic(ring, KM_USER0); + + if (head <= tail) + return tail - head; + return ctx->ring_info.nr + tail - head; +} + struct aio_timeout { struct timer_list timer; int timed_out; @@ -1185,7 +1205,29 @@ retry: ret = 0; if (to.timed_out) /* Only check after read evt */ break; + + /* If we're waiting for 2 or more events, let + * aio_complete() know about it so that we can + * avoid spurious wakeups. + */ + if ((min_nr - i) >= 2) { + long nr; + + spin_lock_irq(&ctx->ctx_lock); + nr = min_nr - i; + nr -= ring_events_avail(ctx); + ctx->nr_wait = nr; + spin_unlock_irq(&ctx->ctx_lock); + } schedule(); + + /* Make sure this is cleared for the next waiter. */ + if ((min_nr - i) >= 2) { + spin_lock_irq(&ctx->ctx_lock); + ctx->nr_wait = 0; + spin_unlock_irq(&ctx->ctx_lock); + } + event_loop++; if (signal_pending(tsk)) { ret = -EINTR; @@ -1213,6 +1255,14 @@ retry: if (timeout) clear_timeout(&to); + + /* It is possible that we exited early after setting ctx->min_nr + * causing a wakeup to be avoided. Fixup this potential race by + * waking the next thread if there are any events available. + */ + if (min_nr >= 2 && waitqueue_active(&ctx->wait) && + ring_events_avail(ctx) > 0) + wake_up(&ctx->wait); out: pr_debug("event loop executed %d times\n", event_loop); pr_debug("aio_run %ld\n", aio_run); diff -purN v2.6.12-rc3/include/linux/aio.h test-rc3/include/linux/aio.h --- v2.6.12-rc3/include/linux/aio.h 2004-12-24 16:35:50.000000000 -0500 +++ test-rc3/include/linux/aio.h 2005-05-03 11:56:02.000000000 -0400 @@ -145,6 +145,7 @@ struct kioctx { struct list_head run_list; /* used for kicked reqs */ unsigned max_reqs; + int nr_wait; struct aio_ring_info ring_info;