typedef enum
{
	PROXY_OK = 0,
	PROXY_ERR = -1,
	PROXY_RERR = 1,
	PROXY_WERR = 2,
	PROXY_RTIMEO = 3,
	PROXY_WTIMEO = 4
}  proxy_rc_T;

typedef enum
{
   PROXY_OPT_NONE = 0,
   /* When set, read each descriptor returned by select() just once, write the
    * data out to the other socket, then call select() again. When not set,
    * each socket() returned by select() is read one after the other in a loop
    * until one of them generates an EAGAIN or EOF.
    */
   PROXY_OPT_READONCE = 1
} proxy_opt_T;

static proxy_rc_T
tcp_proxy(int fd1, int fd2, proxy_opt_T options, struct timeval* read_tv,
          struct timeval* write_tv, long int* fd1_written,
          long int* fd2_written, int* e_fd, int* e_errno);

typedef struct
{
	int fd1;
	int fd2;
	int fd1_flags;
	int fd2_flags;
} proxy_cleanup_data_T;

static void
proxy_cleanup(proxy_cleanup_data_T* pcd)
{
	/* Restore socket flags. */
	if (pcd->fd1_flags != -1)
	{
		fcntl(pcd->fd1, F_SETFL, pcd->fd1_flags);
	}
	
	if (pcd->fd2_flags != -1)
	{
		fcntl(pcd->fd2, F_SETFL, pcd->fd2_flags);
	}
}

/*
 * The fd1 and fd2 arguments are the TCP socket descriptors to proxy.
 * 
 * The read_tv and write_tv arguments are used for timing out the select() and
 * blocked write() calls respectively.
 * 
 * The fd1_written and fd2_written are set to 0 upon function entry, and upon
 * function return contain the number of bytes written to each descriptor.
 *
 * The e_fd and e_errno arguments are set to -1 and 0 respectively upon entrance
 * to the function. If a failure is associated with a single descriptor then
 * e_fd is set to that descriptor. If an error condition other than a timeout
 * occurs, the e_errno is set to the system errno value.
 */
static proxy_rc_T
proxy(int fd1, int fd2, proxy_opt_T options, struct timeval* read_tv,
      struct timeval* write_tv, long int* fd1_written, long int* fd2_written,
      int* e_fd, int* e_errno)
{
	struct timeval rtv;
	struct timeval wtv;
	int fd_max;
	int retval;
	int select_retval;
	/*
	 * The s1 and s2 variables are for tracking whether we are still interested
	 * in reading data from fd1 or fd2. Since one side may close while the other
	 * is open, we use these to indicate which one we still need to select()
	 * on.
	 */
	int s1 = fd1;
	int s2 = fd2;
	int i;
	fd_set rset;
	fd_set wset;
	int fd_count = 2;
	int ready_fd;
	int write_fd;
	ssize_t remain;
	ssize_t read_retval;
	ssize_t write_retval;
	char* p;
	int read_loop;
	proxy_cleanup_data_T pcd = {-1, -1, -1, -1};
	int fds[2];
	char buf[BUF_SIZ];

	pcd.fd1 = fd1;
	pcd.fd2 = fd2;
	*fd1_written = 0;
	*fd2_written = 0;
	*e_fd = -1;
	*e_errno = 0;

   /* Get socket flags. */
   if ( (pcd.fd1_flags = fcntl(fd1, F_GETFL, 0)) == -1)
      return PROXY_ERR;

   if ( (pcd.fd2_flags = fcntl(fd2, F_GETFL, 0)) == -1)
      return PROXY_ERR;

   /* Set socket to non-blocking. */
   if (fcntl(fd1, F_SETFL, pcd.fd1_flags | O_NONBLOCK) == -1)
		return PROXY_ERR;

   if (fcntl(fd2, F_SETFL, pcd.fd2_flags | O_NONBLOCK) == -1)
	{
		proxy_cleanup(&pcd);
		return PROXY_ERR;
	}

	FD_ZERO(&rset);
	FD_ZERO(&wset);
	fd_max = (fd1 > fd2) ? fd1 : fd2;

	/*
	 * The fd_count variable stores how many sockets we are interested in
	 * reading data from, and transferring to another socket. When one side
	 * closes we still have one left. When we hit 0 both sides have closed and
	 * we are done.
	 */
	while (fd_count > 0)
	{
		if (s1 != -1)
			FD_SET(fd1, &rset);
		else
			FD_CLR(fd1, &rset);

		if (s2 != -1)
			FD_SET(fd2, &rset);
		else
			FD_CLR(fd2, &rset);

		memcpy((void*)&rtv, (void*)read_tv, sizeof(struct timeval));

		select_retval = select(fd_max + 1, &rset, '\0', '\0', &rtv);

		if (select_retval == 0)
		{
			proxy_cleanup(&pcd);
			return PROXY_RTIMEO;
		}
		else if (select_retval == -1)
		{
			if (errno == EINTR)
				continue;

			proxy_cleanup(&pcd);
			return PROXY_ERR;
		}

		if ( (s1 != -1) && (FD_ISSET(fd1, &rset)) )
			fds[0] = fd1;
		else
			fds[0] = -1;

		if ( (s2 != -1) && (FD_ISSET(fd2, &rset)) )
			fds[1] = fd2;
		else
			fds[1] = -1;

		/*
		 * We have at least one socket ready to be read from. What we want to do
		 * is get in as many read/write sequences as we can before calling
		 * select() again to determine read ready status. The problem is that if
		 * we keep reading from one descriptor we could end up starving the other
		 * when select only returned one socket ready for reading, and the other
		 * socket becomes ready while we are reading from the socket indicated by
		 * select(). To get around this potential starvation problem, if
		 * PROXY_OPT_READONCE is set then a single read/write sequence is
		 * performed for each of the ready sockets, then we loop back to select().
		 */
		if ( (options & PROXY_OPT_READONCE) == 0)
			read_loop = 1;
		else
			read_loop = 0;

		do
		{
			for (i = 0; i < 2; ++i)
			{
				if (fds[i] == -1)
					continue;

				/* Read the data from the ready socket. */
				ready_fd = fds[i];

				while ( (read_retval = read(ready_fd, buf, sizeof(buf))) == -1)
				{
					if (errno == EINTR)
						continue;
	
					if (errno == EAGAIN)
					{
						read_loop = 0;
						break;
					}

					*e_fd = ready_fd;
					*e_errno = errno;
					proxy_cleanup(&pcd);
					return PROXY_RERR;
				}

				if (read_retval == -1)
					continue;
				else if (read_retval == 0)
				{
					/*
					 * We read EOF from the ready socket, so we close the write end
					 * of the other socket, and mark the fd as -1 so we don't try to
					 * read it any more.
					 */
					if (ready_fd == s1)
					{
						shutdown(fd2, SHUT_WR);
						s1 = -1;
						fd_max = fd2;
						--fd_count;
						read_loop = 0;
					}
					else
					{
						shutdown(fd1, SHUT_WR);
						s2 = -1;
						fd_max = fd1;
						--fd_count;
						read_loop = 0;
					}

					continue;
				}
	
				/* Write our data to the other socket. */
				if (ready_fd == fd1)
					write_fd = fd2;
				else
					write_fd = fd1;
		
				remain = read_retval;
				p = buf;

				while (remain > 0)
				{
					while ( (write_retval = write(write_fd, p, remain)) == -1)
					{
						if (errno == EINTR)
							continue;
						else if (errno == EAGAIN)
						{
							for (;;)
							{
								FD_SET(write_fd, &wset);
								memcpy((void*)&wtv, (void*)write_tv,
								       sizeof(struct timeval));

								retval = select(write_fd + 1, '\0', &wset, '\0', &wtv);

								if (retval == 0)
								{
									*e_fd = write_fd;
									proxy_cleanup(&pcd);
									return PROXY_WTIMEO;
								}
								else if (retval == -1)
								{
									if (errno == EINTR)
										continue;
				
									*e_fd = write_fd;
									*e_errno = errno;
									proxy_cleanup(&pcd);
									return PROXY_ERR;
								}

								FD_CLR(write_fd, &wset);
								break;
							}

							continue;
						}
						else
						{
							*e_fd = write_fd;
							*e_errno = errno;
							proxy_cleanup(&pcd);
							return PROXY_WERR;
						}
					}
				
					remain -= write_retval;
					p += write_retval;
	
					if (write_fd == fd1)
						(*fd1_written) += write_retval;
					else
						(*fd2_written) += write_retval;
				}   /* while (remain > 0) */
			}   /* for (i = 0; i < 2; ++i) */
		} while (read_loop == 1);
	}  /* while (fd_count > 0) */

	proxy_cleanup(&pcd);
	return PROXY_OK;
}

