diff -u --recursive --new-file linux-2.4.19-bkl2/include/linux/sunrpc/xprt.h linux-2.4.19-rpc_cong/include/linux/sunrpc/xprt.h
--- linux-2.4.19-bkl2/include/linux/sunrpc/xprt.h	Fri May  3 12:11:18 2002
+++ linux-2.4.19-rpc_cong/include/linux/sunrpc/xprt.h	Fri May  3 12:19:52 2002
@@ -44,7 +44,7 @@
 #define RPC_MAXCWND		(RPC_MAXCONG * RPC_CWNDSCALE)
 #define RPC_INITCWND		RPC_CWNDSCALE
 #define RPCXPRT_CONGESTED(xprt) \
-	((xprt)->cong >= (xprt)->cwnd)
+	((xprt)->cong > (xprt)->cwnd)
 
 /* Default timeout values */
 #define RPC_MAX_UDP_TIMEOUT	(60*HZ)
@@ -96,6 +96,7 @@
 	struct rpc_task *	rq_task;	/* RPC task data */
 	__u32			rq_xid;		/* request XID */
 	struct rpc_rqst *	rq_next;	/* free list */
+	unsigned char		rq_cong     : 1;/* has incremented xprt->cong */
 	volatile unsigned char	rq_received : 1;/* receive completed */
 
 	/*
@@ -135,7 +136,6 @@
 
 	unsigned long		cong;		/* current congestion */
 	unsigned long		cwnd;		/* congestion window */
-	unsigned long		congtime;	/* hold cwnd until then */
 
 	struct rpc_wait_queue	sending;	/* requests waiting to send */
 	struct rpc_wait_queue	pending;	/* requests in flight */
diff -u --recursive --new-file linux-2.4.19-bkl2/net/sunrpc/xprt.c linux-2.4.19-rpc_cong/net/sunrpc/xprt.c
--- linux-2.4.19-bkl2/net/sunrpc/xprt.c	Fri May  3 11:56:27 2002
+++ linux-2.4.19-rpc_cong/net/sunrpc/xprt.c	Fri May  3 12:19:52 2002
@@ -269,48 +269,81 @@
 }
 
 /*
+ * Van Jacobson congestion avoidance. Check if the congestion window
+ * overflowed. Put the task to sleep if this is the case.
+ */
+static int
+xprt_cwnd_limited(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	struct rpc_task *task = req->rq_task;
+	unsigned long oldcong;
+
+	if (req->rq_cong || xprt->nocong)
+		return 0;
+	dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
+		task->tk_pid, xprt->cong, xprt->cwnd);
+	spin_lock_bh(&xprt->sock_lock);
+	oldcong = xprt->cong;
+	xprt->cong += RPC_CWNDSCALE;
+	if (!RPCXPRT_CONGESTED(xprt)) {
+		spin_unlock_bh(&xprt->sock_lock);
+		req->rq_cong = 1;
+		return 0;
+	}
+	xprt->cong = oldcong;
+	task->tk_timeout = 0;
+	task->tk_status = -EAGAIN;
+	rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+	spin_unlock_bh(&xprt->sock_lock);
+	return 1;
+}
+
+/*
  * Adjust RPC congestion window
  * We use a time-smoothed congestion estimator to avoid heavy oscillation.
  */
-static void
-xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
+static inline void
+__xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
 {
 	unsigned long	cwnd;
 
-	if (xprt->nocong)
-		return;
-	/*
-	 * Note: we're in a BH context
-	 */
-	spin_lock(&xprt->xprt_lock);
 	cwnd = xprt->cwnd;
 	if (result >= 0) {
-		if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime))
-			goto out;
 		/* The (cwnd >> 1) term makes sure
 		 * the result gets rounded properly. */
 		cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
 		if (cwnd > RPC_MAXCWND)
 			cwnd = RPC_MAXCWND;
-		else
-			pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
-		xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE;
-		dprintk("RPC:      cong %08lx, cwnd was %08lx, now %08lx, "
-			"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
-			(xprt->congtime-jiffies)*1000/HZ);
 	} else if (result == -ETIMEDOUT) {
 		if ((cwnd >>= 1) < RPC_CWNDSCALE)
 			cwnd = RPC_CWNDSCALE;
-		xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE;
-		dprintk("RPC:      cong %ld, cwnd was %ld, now %ld, "
-			"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
-			(xprt->congtime-jiffies)*1000/HZ);
-		pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
 	}
 
+	dprintk("RPC:      cong %ld, cwnd was %ld, now %ld\n",
+		xprt->cong, xprt->cwnd, cwnd);
 	xprt->cwnd = cwnd;
- out:
-	spin_unlock(&xprt->xprt_lock);
+}
+
+/*
+ * Adjust the congestion window, and wake up the next task
+ * that has been sleeping due to congestion
+ */
+static void
+xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_rqst *req, int adjust)
+{
+	if (!req->rq_cong)
+		return;
+	req->rq_cong = 0;
+	spin_lock_bh(&xprt->sock_lock);
+	if (adjust)
+		__xprt_adjust_cwnd(xprt, adjust);
+	if (!RPCXPRT_CONGESTED(xprt)) {
+		struct rpc_task *next = xprt->snd_task;
+		if (next && !next->tk_rqstp->rq_cong)
+			rpc_wake_up_task(next);
+	}
+	xprt->cong -= RPC_CWNDSCALE;
+	spin_unlock_bh(&xprt->sock_lock);
 }
 
 /*
@@ -529,13 +562,13 @@
  * Complete reply received.
  * The TCP code relies on us to remove the request from xprt->pending.
  */
-static inline void
+static void
 xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
 {
 	struct rpc_task	*task = req->rq_task;
 
 	/* Adjust congestion window */
-	xprt_adjust_cwnd(xprt, copied);
+	xprt_adjust_cwnd(xprt, req, copied);
 
 #ifdef RPC_PROFILE
 	/* Profile only reads for now */
@@ -990,8 +1023,9 @@
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 
+	/* Decrease congestion value. */
 	if (req)
-		xprt_adjust_cwnd(task->tk_xprt, -ETIMEDOUT);
+		xprt_adjust_cwnd(task->tk_xprt, req, -ETIMEDOUT);
 
 	dprintk("RPC: %4d xprt_timer (%s request)\n",
 		task->tk_pid, req ? "pending" : "backlogged");
@@ -1036,6 +1070,8 @@
 
 	if (!xprt_lock_write(xprt, task))
 		return;
+	if (xprt_cwnd_limited(xprt, req))
+		return;
 
 #ifdef RPC_PROFILE
 	req->rq_xtime = jiffies;
@@ -1091,6 +1127,7 @@
 			break;
 	}
 	rpc_unlock_task(task);
+	xprt_adjust_cwnd(xprt, req, 0);
 
 	/* Note: at this point, task->tk_sleeping has not yet been set,
 	 *	 hence there is no danger of the waking up task being put on
@@ -1169,8 +1206,6 @@
 	if (task->tk_rqstp)
 		return 0;
 
-	dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
-				task->tk_pid, xprt->cong, xprt->cwnd);
 	spin_lock_bh(&xprt->xprt_lock);
 	xprt_reserve_status(task);
 	if (task->tk_rqstp) {
@@ -1204,18 +1239,14 @@
 	} else if (task->tk_rqstp) {
 		/* We've already been given a request slot: NOP */
 	} else {
-		if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free))
+		if (!(req = xprt->free))
 			goto out_nofree;
 		/* OK: There's room for us. Grab a free slot and bump
 		 * congestion value */
 		xprt->free     = req->rq_next;
 		req->rq_next   = NULL;
-		xprt->cong    += RPC_CWNDSCALE;
 		task->tk_rqstp = req;
 		xprt_request_init(task, xprt);
-
-		if (xprt->free)
-			xprt_clear_backlog(xprt);
 	}
 
 	return;
@@ -1271,9 +1302,6 @@
 	req->rq_next = xprt->free;
 	xprt->free   = req;
 
-	/* Decrease congestion value. */
-	xprt->cong -= RPC_CWNDSCALE;
-
 	xprt_clear_backlog(xprt);
 	spin_unlock_bh(&xprt->xprt_lock);
 }
@@ -1331,7 +1359,6 @@
 		xprt->nocong = 1;
 	} else
 		xprt->cwnd = RPC_INITCWND;
-	xprt->congtime = jiffies;
 	spin_lock_init(&xprt->sock_lock);
 	spin_lock_init(&xprt->xprt_lock);
 	init_waitqueue_head(&xprt->cong_wait);
@@ -1488,8 +1515,6 @@
  */
 int
 xprt_clear_backlog(struct rpc_xprt *xprt) {
-	if (RPCXPRT_CONGESTED(xprt))
-		return 0;
 	rpc_wake_up_next(&xprt->backlog);
 	if (waitqueue_active(&xprt->cong_wait))
 		wake_up(&xprt->cong_wait);
