/*
 * T.C.F.S. 2.0 Alpha 1 
 *
 *      	   This  program  handles  RPC  "NFS"  data  requests
 *              adopting a secure transfer protocol.
 *                 This   is  an  unsecure   and  unchecked  version,
 *              use at your own risk.
 *
 *              Please, report Bugs to: <tcfs@edu-gw.dia.unisa.it>
 *
 * Authors:	Giuseppe Cattaneo, <cattaneo@udsab.dia.unisa.it>
 *		Giuseppe Persiano, <giuper@udsab.dia.unisa.it>
 *		Andrea Cozzolino, <andcoz@edu-gw.dia.unisa.it>
 *		Angelo Celentano, <angcel@edu-gw.dia.unisa.it>
 *		Aniello Del Sorbo, <anidel@edu-gw.dia.unisa.it>
 *		Ermelindo Mauriello, <ermmau@edu-gw.dia.unisa.it>
 *		Raffaele Pisapia, <rafpis@edu-gw.dia.unisa.it>
 *
 *   Permission to  use, copy, and modify  this software  without fee
 * is hereby granted, provided that this entire notice is included in
 * all copies  of  any  software  which  is  or  includes a  copy  or
 * modification of this software and in all copies  of the supporting
 * documentation for such software.
 *
 *   This  software is  distribuited  under  the  GNU General  Public
 * License  (version  2, June  1991). Check  the  file  'COPYING'  for
 * more  infos. Some  parts of  this  software  derive  from the  NFS
 * implementation in the Linux kernel 2.0.x.
 *
 * This software  maybe be used  for any  purpose provided  the above
 * copyright  notice  is retained.  It  is  supplied  as is,  with no
 * warranty expressed or implied.
 *
 */

/* -+-_== */

/*
 *  linux/fs/tcfs/rpcsock.c
 *
 *  This is a generic RPC call interface for datagram sockets that is able
 *  to place several concurrent RPC requests at the same time. It works like
 *  this:
 *
 *  -	When a process places a call, it allocates a request slot if
 *	one is available. Otherwise, it sleeps on the backlog queue
 *	(tcrpc_reserve).
 *  -	Then, the message is transmitted via tcrpc_send (exported by name of
 *	tcrpc_transmit).
 *  -	Finally, the process waits for the call to complete (tcrpc_doio):
 *	The first process on the receive queue waits for the next RPC packet,
 *	and peeks at the XID. If it finds a matching request, it receives
 *	the datagram on behalf of that process and wakes it up. Otherwise,
 *	the datagram is discarded.
 *  -	If the process having received the datagram was the first one on
 *	the receive queue, it wakes up the next one to listen for replies.
 *  -	It then removes itself from the request queue (tcrpc_release).
 *	If there are more callers waiting on the backlog queue, they are
 *	woken up, too.
 *
 * Mar 1996:
 *  -	Split up large functions into smaller chunks as per Linus' coding
 *	style. Found an interesting bug this way, too.
 *  -	Added entry points for tcfsiod.
 *
 *  Copyright (C) 1995, 1996, Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/types.h>
#include <linux/malloc.h>
#include <linux/sched.h>
#include <linux/tcfs_fs.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/fcntl.h>
#include <linux/in.h>
#include <linux/net.h>
#include <linux/mm.h>
#include <linux/tcrpcsock.h>

#include <linux/udp.h>
#include <net/sock.h>

#include <asm/segment.h>
#include <linux/wait.h>

#define msleep(sec)	{ current->timeout = sec * HZ / 1000; \
			  current->state = TASK_INTERRUPTIBLE; \
			  schedule(); \
			}

#undef DEBUG_RPC
#ifdef DEBUG_RPC			
#define dprintk(args...)	printk(## args)
#else
#define	dprintk(args...)
#endif


/*
 * Insert new request into wait list. We make sure list is sorted by
 * increasing timeout value.
 */
static inline void
tcrpc_insque(struct tcrpc_sock *rsock, struct tcrpc_wait *slot)
{
	struct tcrpc_wait	*next = rsock->pending;

	slot->w_next = next;
	slot->w_prev = NULL;
	if (next)
		next->w_prev = slot;
	rsock->pending = slot;
	slot->w_queued = 1;

	dprintk("RPC: inserted %p into queue\n", slot);
}

/*
 * Remove request from request queue
 */
static inline void
tcrpc_remque(struct tcrpc_sock *rsock, struct tcrpc_wait *slot)
{
	struct tcrpc_wait	*prev = slot->w_prev,
			*next = slot->w_next;

	if (prev != NULL)
		prev->w_next = next;
	else
		rsock->pending = next;
	if (next != NULL)
		next->w_prev = prev;

	slot->w_queued = 0;
	dprintk("RPC: removed %p from queue, head now %p.\n",
			slot, rsock->pending);
}

/*
 * Write data to socket.
 */
static inline int
tcrpc_sendmsg(struct tcrpc_sock *rsock, struct iovec *iov, int nr, int len,
				struct sockaddr *sap, int salen)
{
	struct socket	*sock = rsock->sock;
	struct msghdr	msg;
	unsigned long	oldfs;
	int		result;

	msg.msg_iov	= iov;
	msg.msg_iovlen	= nr;
	msg.msg_name	= sap;
	msg.msg_namelen = salen;
	msg.msg_control = NULL;

	oldfs = get_fs();
	set_fs(get_ds());
	result = sock->ops->sendmsg(sock, &msg, len, 0, 0);
	set_fs(oldfs);

	dprintk("RPC: tcrpc_sendmsg(iov %p, len %d) = %d\n", iov, len, result);
	return result;
}
/*
 * Read data from socket
 */
static inline int
tcrpc_recvmsg(struct tcrpc_sock *rsock, struct iovec *iov,
			int nr, int len, int flags)
{
	struct socket	*sock = rsock->sock;
	struct sockaddr	sa;
	struct msghdr	msg;
	unsigned long	oldfs;
	int		result, alen;

	msg.msg_iov	= iov;
	msg.msg_iovlen	= nr;
	msg.msg_name	= &sa;
	msg.msg_namelen = sizeof(sa);
	msg.msg_control = NULL;

	oldfs = get_fs();
	set_fs(get_ds());
	result = sock->ops->recvmsg(sock, &msg, len, 1, flags, &alen);
	set_fs(oldfs);

	dprintk("RPC: tcrpc_recvmsg(iov %p, len %d) = %d\n", iov, len, result);
	return result;
}

/*
 * This code is slightly complicated. Since the networking code does not
 * honor the current->timeout value, we have to select on the socket.
 */
static inline int
tcrpc_select(struct tcrpc_sock *rsock)
{
	struct select_table_entry entry;
	struct file	*file = rsock->file;
	select_table	wait_table;

	dprintk("RPC: selecting on socket...\n");
	wait_table.nr = 0;
	wait_table.entry = &entry;
	current->state = TASK_INTERRUPTIBLE;
	if (!file->f_op->select(file->f_inode, file, SEL_IN, &wait_table)
	 && !file->f_op->select(file->f_inode, file, SEL_IN, NULL)) {
		schedule();
		remove_wait_queue(entry.wait_address, &entry.wait);
		current->state = TASK_RUNNING;
		if (current->signal & ~current->blocked)
			return -ERESTARTSYS;
		if (current->timeout == 0)
			return -ETIMEDOUT;
	} else if (wait_table.nr)
		remove_wait_queue(entry.wait_address, &entry.wait);
	current->state = TASK_RUNNING;
	dprintk("RPC: ...Okay, there appears to be some data.\n");
	return 0;
}

/*
 * Reserve an RPC call slot. nocwait determines whether we wait in case
 * of congestion or not.
 */
int
tcrpc_reserve(struct tcrpc_sock *rsock, struct tcrpc_ioreq *req, int nocwait)
{
	struct tcrpc_wait	*slot;

	req->rq_slot = NULL;

	while (!(slot = rsock->free) || rsock->cong >= rsock->cwnd) {
		if (nocwait) {
			current->timeout = 0;
			return -ENOBUFS;
		}
		dprintk("RPC: tcrpc_reserve waiting on backlog\n");
		interruptible_sleep_on(&rsock->backlog);
		if (current->timeout == 0)
			return -ETIMEDOUT;
		if (current->signal & ~current->blocked)
			return -ERESTARTSYS;
		if (rsock->shutdown)
			return -EIO;
	}

	rsock->free = slot->w_next;
	rsock->cong += TCRPC_CWNDSCALE;	/* bump congestion value */

	slot->w_queued = 0;
	slot->w_gotit = 0;
	slot->w_req = req;

	dprintk("RPC: reserved slot %p\n", slot);
	req->rq_slot = slot;
	return 0;
}

/*
 * Release an RPC call slot
 */
void
tcrpc_release(struct tcrpc_sock *rsock, struct tcrpc_ioreq *req)
{
	struct tcrpc_wait	*slot = req->rq_slot;

	if (slot != NULL) {
		dprintk("RPC: release slot %p\n", slot);

		/* Wake up the next receiver */
		if (slot == rsock->pending && slot->w_next != NULL)
			wake_up(&slot->w_next->w_wait);

		/* remove slot from queue of pending */
		if (slot->w_queued)
			tcrpc_remque(rsock, slot);
		slot->w_next = rsock->free;
		rsock->free = slot;

		/* decrease congestion value */
		rsock->cong -= TCRPC_CWNDSCALE;
		if (rsock->cong < rsock->cwnd && rsock->backlog)
			wake_up(&rsock->backlog);
		if (rsock->shutdown)
			wake_up(&rsock->shutwait);

		req->rq_slot = NULL;
	}
}

/*
 * Adjust RPC congestion window
 */
static void
tcrpc_cwnd_adjust(struct tcrpc_sock *rsock, int timeout)
{
	unsigned long	cwnd = rsock->cwnd;

	if (!timeout) {
		if (rsock->cong >= cwnd) {
			/* The (cwnd >> 1) term makes sure
			 * the result gets rounded properly. */
			cwnd += (TCRPC_CWNDSCALE * TCRPC_CWNDSCALE +
					(cwnd >> 1)) / cwnd;
			if (cwnd > TCRPC_MAXCWND)
				cwnd = TCRPC_MAXCWND;
		}
	} else {
		if ((cwnd >>= 1) < TCRPC_CWNDSCALE)
			cwnd = TCRPC_CWNDSCALE;
		dprintk("RPC: cwnd decrease %08lx\n", cwnd);
	}
	dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx\n",
			rsock->cong, rsock->cwnd, cwnd);

	rsock->cwnd = cwnd;
}

static inline void
tcrpc_send_check(char *where, u32 *ptr)
{
	if (ptr[1] != htonl(TCRPC_CALL) || ptr[2] != htonl(TCRPC_VERSION)) {
		printk("RPC: %s sending evil packet:\n"
		       "     %08x %08x %08x %08x %08x %08x %08x %08x\n",
		       where,
		       ptr[0], ptr[1], ptr[2], ptr[3],
		       ptr[4], ptr[5], ptr[6], ptr[7]);
	}
}

/*
 * Place the actual RPC call.
 * We have to copy the iovec because sendmsg fiddles with its contents.
 */
static inline int
tcrpc_send(struct tcrpc_sock *rsock, struct tcrpc_wait *slot)
{
	struct tcrpc_ioreq *req = slot->w_req;
	struct iovec	iov[UIO_MAXIOV];

	if (rsock->shutdown)
		return -EIO;

	memcpy(iov, req->rq_svec, req->rq_snr * sizeof(iov[0]));
	slot->w_xid = *(u32 *)(iov[0].iov_base);
	if (!slot->w_queued)
		tcrpc_insque(rsock, slot);

	dprintk("tcrpc_send(%p, %x)\n", slot, slot->w_xid);
	tcrpc_send_check("tcrpc_send", (u32 *) req->rq_svec[0].iov_base);
	return tcrpc_sendmsg(rsock, iov, req->rq_snr, req->rq_slen,
				req->rq_addr, req->rq_alen);
}

/*
 * This is the same as tcrpc_send but for the functions exported to tcfsiod
 */
int
tcrpc_transmit(struct tcrpc_sock *rsock, struct tcrpc_ioreq *req)
{
	tcrpc_send_check("tcrpc_transmit", (u32 *) req->rq_svec[0].iov_base);
	return tcrpc_send(rsock, req->rq_slot);
}

/*
 * Receive and dispatch a single reply
 */
static inline int
tcrpc_grok(struct tcrpc_sock *rsock)
{
	struct tcrpc_wait	*rovr;
	struct tcrpc_ioreq *req;
	struct iovec	iov[UIO_MAXIOV];
	u32		xid;
	int		safe, result;

	iov[0].iov_base = (void *) &xid;
	iov[0].iov_len  = sizeof(xid);
	result = tcrpc_recvmsg(rsock, iov, 1, sizeof(xid), MSG_PEEK);

	if (result < 0) {
		switch (-result) {
		case EAGAIN: case ECONNREFUSED:
			return 0;
		case ERESTARTSYS:
			return result;
		default:
			dprintk("tcrpc_grok: recv error = %d\n", result);
		}
	}
	if (result < 4) {
		printk(KERN_WARNING "RPC: impossible RPC reply size %d\n",
						result);
		return 0;
	}

	dprintk("RPC: tcrpc_grok: got xid %08lx\n", (unsigned long) xid);

	/* Look for the caller */
	safe = 0;
	for (rovr = rsock->pending; rovr; rovr = rovr->w_next) {
		if (rovr->w_xid == xid)
			break;
		if (safe++ > TCRPC_MAXREQS) {
			printk(KERN_WARNING "RPC: loop in request Q!!\n");
			rovr = NULL;
			break;
		}
	}

	if (!rovr || rovr->w_gotit) {
		/* discard dgram */
		dprintk("RPC: tcrpc_grok: %s.\n",
			rovr? "duplicate reply" : "bad XID");
		iov[0].iov_base = (void *) &xid;
		iov[0].iov_len  = sizeof(xid);
		tcrpc_recvmsg(rsock, iov, 1, sizeof(xid), 0);
		return 0;
	}
	req = rovr->w_req;

	/* Now receive the reply... Copy the iovec first because of 
	 * memcpy_fromiovec fiddling. */
	memcpy(iov, req->rq_rvec, req->rq_rnr * sizeof(iov[0]));
	result = tcrpc_recvmsg(rsock, iov, req->rq_rnr, req->rq_rlen, 0);
	rovr->w_result = result;
	rovr->w_gotit = 1;

	/* ... and wake up the process */
	wake_up(&rovr->w_wait);

	return result;
}

/*
 * Wait for the reply to our call.
 */
static int
tcrpc_recv(struct tcrpc_sock *rsock, struct tcrpc_wait *slot)
{
	int	result;

	do {
		/* If we are not the receiver, wait on the sidelines */
		dprintk("RPC: tcrpc_recv TP1\n");
		while (rsock->pending != slot) {
			if (!slot->w_gotit)
				interruptible_sleep_on(&slot->w_wait);
			if (slot->w_gotit)
				return slot->w_result; /* quite important */
			if (current->signal & ~current->blocked)
				return -ERESTARTSYS;
			if (rsock->shutdown)
				return -EIO;
			if (current->timeout == 0)
				return -ETIMEDOUT;
		}

		/* Wait for data to arrive */
		if ((result = tcrpc_select(rsock)) < 0) {
			dprintk("RPC: select error = %d\n", result);
			return result;
		}

		/* Receive and dispatch */
		if ((result = tcrpc_grok(rsock)) < 0)
			return result;
	} while (current->timeout && !slot->w_gotit);

	return slot->w_gotit? slot->w_result : -ETIMEDOUT;
}

/*
 * Generic RPC call routine. This handles retries and timeouts etc pp.
 *
 * If sent is non-null, it assumes the called has already sent out the
 * message, so it won't need to do so unless a timeout occurs.
 */
int
tcrpc_doio(struct tcrpc_sock *rsock, struct tcrpc_ioreq *req,
			struct tcrpc_timeout *strategy, int sent)
{
	struct tcrpc_wait	*slot;
	int		result, retries;
	unsigned long	timeout;

	timeout = strategy->to_initval;
	retries = 0;
	slot = req->rq_slot;

	do {
	  dprintk("RPC: tcrpc_doio: TP1 (req %p)\n", req);
	  current->timeout = jiffies + timeout;
	  if (slot == NULL) {
	    result = tcrpc_reserve(rsock, req, 0);
	    if (result == -ETIMEDOUT)
	      goto timedout;
	    if (result < 0)
	      break;
	    slot = req->rq_slot;
	    tcrpc_send_check("tcrpc_doio",
			   (u32 *) req->rq_svec[0].iov_base);
	    tcrpc_insque(rsock, slot);
	  }
	  
	  /* This check is for loopback TCFS. Sometimes replies come
	   * in before biod has called tcrpc_doio... */
	  if (slot->w_gotit) {
	    result = slot->w_result;
	    break;
	  }
	  
	  dprintk("RPC: tcrpc_doio: TP2\n");
	  if (sent || (result = tcrpc_send(rsock, slot)) >= 0) {
	    result = tcrpc_recv(rsock, slot);
	    sent = 0;
	  }
	  
	  if (result != -ETIMEDOUT) {
	    /* dprintk("RPC: tcrpc_recv returned %d\n", result); */
	    tcrpc_cwnd_adjust(rsock, 0);
	    break;
	  }
	  
	  tcrpc_cwnd_adjust(rsock, 1);
	  
	timedout:
	  dprintk("RPC: tcrpc_recv returned timeout.\n");
	  if (strategy->to_exponential)
	    timeout <<= 1;
	  else
	    timeout += strategy->to_increment;
	  if (strategy->to_maxval && timeout >= strategy->to_maxval)
	    timeout = strategy->to_maxval;
	  if (strategy->to_retries && ++retries >= strategy->to_retries)
	    break;
	} while (1);
	
	dprintk("RPC: tcrpc_doio: TP3\n");
	current->timeout = 0;
	return result;
}

/*
 */
int
tcrpc_call(struct tcrpc_sock *rsock, struct tcrpc_ioreq *req,
			struct tcrpc_timeout *strategy)
{
	int	result;

	result = tcrpc_doio(rsock, req, strategy, 0);
	if (req->rq_slot == NULL)
		printk(KERN_WARNING "RPC: bad: rq_slot == NULL\n");
	tcrpc_release(rsock, req);
	return result;
}

struct tcrpc_sock *
tcrpc_makesock(struct file *file)
{
	struct tcrpc_sock	*rsock;
	struct socket	*sock;
	struct sock	*sk;
	struct tcrpc_wait	*slot;
	int		i;

	dprintk("RPC: make RPC socket...\n");
	sock = &file->f_inode->u.socket_i;
	if (sock->type != SOCK_DGRAM || sock->ops->family != AF_INET) {
		printk(KERN_WARNING "RPC: only UDP sockets supported\n");
		return NULL;
	}
	sk = (struct sock *) sock->data;

	if ((rsock = kmalloc(sizeof(struct tcrpc_sock), GFP_KERNEL)) == NULL)
		return NULL;
	memset(rsock, 0, sizeof(*rsock)); /* Nnnngh! */

	rsock->sock = sock;
	rsock->inet = sk;
	rsock->file = file;
	rsock->cwnd = TCRPC_INITCWND;

	dprintk("RPC: slots %p, %p, ...\n", rsock->waiting, rsock->waiting + 1);
	rsock->free = rsock->waiting;
	for (i = 0, slot = rsock->waiting; i < TCRPC_MAXREQS-1; i++, slot++)
		slot->w_next = slot + 1;
	slot->w_next = NULL;

	dprintk("RPC: made socket %p\n", rsock);
	return rsock;
}

int
tcrpc_closesock(struct tcrpc_sock *rsock)
{
	unsigned long	t0 = jiffies;

	rsock->shutdown = 1;
	while (rsock->pending || waitqueue_active(&rsock->backlog)) {
		interruptible_sleep_on(&rsock->shutwait);
		if (current->signal & ~current->blocked)
			return -EINTR;
#if 1
		if (t0 && t0 - jiffies > 60 * HZ) {
			printk(KERN_WARNING "RPC: hanging in tcrpc_closesock.\n");
			t0 = 0;
		}
#endif
	}

	kfree(rsock);
	return 0;
}
