/* ``The contents of this file are subject to the Erlang Public License,
 * Version 1.0, (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.erlang.org/EPL1_0.txt
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 * 
 * The Original Code is Erlang-4.7.3, December, 1998.
 * 
 * The Initial Developer of the Original Code is Ericsson Telecom
 * AB. Portions created by Ericsson are Copyright (C), 1998, Ericsson
 * Telecom AB. All Rights Reserved.
 * 
 * Contributor(s): ______________________________________.''
 */
/***************************************
  This is the NT version of the TCP driver, tcp_drv, for Erlang 4.5.

  Copyright (C) 1993-1997 Ericsson Telecom AB, Sweden.
  Author: Claes Wikstrom  klacke@erix.ericsson.se
  Purpose: Implement the erlang distribution protocol
  NT-port: Fredrik Tillman, tillman@erix.ericsson.se. 
  Ameliorations by Bjorn Gustavsson, bjorn@erix.ericsson.se and 
  Peter Hogfeldt, peter@erix.ericsson.se, 1997.

  ***************************************/

/*
  CHANGE LOG:

  1997-07-10  Peter Hogfeldt
              1.  Added wrappers of several functions with debug printouts.
	      2.  Added selection of FD_WRITE in activate_name.
	      3.  Changed from init_scheduled_writes to do_scheduled_write 
	          in nb_complete_connection.
	      4.  Removed peekEvent.
	      5.  Removed tcp_select from init_scheduled_writes (the FD_WRITE
	          event does not have to be selected/deselected).
	      6.  Removed 2nd call to WSAEnumNetworkEvents in tcp_dispatch.
	      7.  Added code for detection of cross-over connect.
	      8.  Removed show_time() since it was not used.
	      9.  Removed silly +-signs in comments.
	      10. Changed from WaitForMultipleObjects to WSAWaitForMultiple-
	          Events in cnct().
	      11. I think the tcp_select/deselect of listen_cp in cnct()
	          is not needed (it should be enough to just do EventSelect()
		  on the corresponding event), but I have not changed that.
		  The same is probably true for tcp_select in read_fill().
*/

#ifdef DEBUG
#  define MESSDEBUG
#endif

/*
 * XXX The following is a copy from sys.h, to make sure the socket
 * functions go through our winSock structure.
 */


static unsigned long zero_value = 0, one_value = 1;
#define SET_BLOCKING(fd) { if ((*winSock.ioctlsocket)((fd), FIONBIO, &zero_value) != 0) fprintf(stderr, "Error setting socket to non-blocking: %d\n", (*winSock.WSAGetLastError)()); }
#define SET_NONBLOCKING(fd) (*winSock.ioctlsocket)((fd), FIONBIO, &one_value)

/*
 * Make sure we don't get the macros we defined above.
 */
#undef WANT_NONBLOCKING
#include "sys.h"

#include <winsock2.h>
#include <windows.h>
#include <io.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <errno.h>

#include "proto.h"
#include "epmd.h"
#include "driver.h"
#include "winsock_func.h"

WinSockFuncs winSock;

/*+++++++++++++++++++++++++++++++++++++
  Looks up all WSA functions needed by this driver.
  +++++++++++++++++++++++++++++++++++++*/
EXTERN_FUNCTION(int, send_error_to_logger, (uint32));

int
tcp_lookup_functions(void)
{
  static char dll_name[] = "ws2_32";
  HMODULE module;

  if (winSock.WSAStartup != NULL) {
      return TRUE;
  }

  module = LoadLibrary(dll_name);

  if (module == NULL) {
    sys_printf(CBUF, "Can't find winsocket DLL %s: %s\n",
	       dll_name, last_error());
    send_error_to_logger(0);
    return FALSE;
  }

  winSock.WSAStartup = (int (WSAAPI *) (WORD wVersionRequired,
					LPWSADATA lpWSAData))
    GetProcAddress(module, "WSAStartup");
  winSock.WSACleanup = (int (WSAAPI *)(void))
    GetProcAddress(module, "WSACleanup");
  winSock.WSAGetLastError = (int (WSAAPI *)(void))
    GetProcAddress(module, "WSAGetLastError");
  winSock.WSAWaitForMultipleEvents =
    (DWORD (WSAAPI *) (DWORD cEvents, const WSAEVENT FAR * lphEvents,
		       BOOL fWaitAll, DWORD dwTimeout, BOOL fAlertable))
     GetProcAddress(module, "WSAWaitForMultipleEvents");
  winSock.WSACreateEvent = (WSAEVENT (WSAAPI *)(void))
    GetProcAddress(module, "WSACreateEvent");
  winSock.WSACloseEvent = (BOOL (WSAAPI *)(WSAEVENT hEvent))
    GetProcAddress(module, "WSACloseEvent");
  winSock.WSASetEvent = (BOOL (WSAAPI *) (WSAEVENT hEvent))
        GetProcAddress(module, "WSASetEvent");
  winSock.WSAResetEvent = (BOOL (WSAAPI *) (WSAEVENT hEvent))
        GetProcAddress(module, "WSAResetEvent");
  winSock.WSAEventSelect = (int (WSAAPI *) (SOCKET s, WSAEVENT hEventObject,
					    long lNetworkEvents))
    GetProcAddress(module, "WSAEventSelect");
  winSock.WSAEnumNetworkEvents = (int (WSAAPI *)
				  (SOCKET s,
				   WSAEVENT hEventObject,
				   LPWSANETWORKEVENTS lpNetworkEvents))
    GetProcAddress(module, "WSAEnumNetworkEvents");

  winSock.accept = (SOCKET (PASCAL FAR *)
		    (SOCKET s, struct sockaddr FAR *addr,
		     int FAR *addrlen))
    GetProcAddress(module, "accept");
  winSock.bind = (int (WSAAPI *)
		  (SOCKET s, const struct sockaddr FAR *addr, int namelen))
    GetProcAddress(module, "bind");
  winSock.closesocket = (int (WSAAPI *)(SOCKET s))
    GetProcAddress(module, "closesocket");
  winSock.connect = (int (WSAAPI *)
		     (SOCKET s, const struct sockaddr FAR *name, int namelen))
    GetProcAddress(module, "connect");
  winSock.ioctlsocket = (int (WSAAPI *)
			 (SOCKET s, long cmd, u_long FAR *argp))
    GetProcAddress(module, "ioctlsocket");

  winSock.getsockopt = (int (WSAAPI *)
			(SOCKET s, int level, int optname, char FAR * optval,
			 int FAR *optlen))
    GetProcAddress(module, "getsockopt");
  winSock.htonl = (u_long (WSAAPI *)(u_long hostlong))
		   GetProcAddress(module, "htonl");
  winSock.htons = (u_short (WSAAPI *)(u_short hostshort))
		   GetProcAddress(module, "htons");
  winSock.inet_addr = (unsigned long (WSAAPI *)(const char FAR *cp))
    GetProcAddress(module, "inet_addr");
  winSock.inet_ntoa = (char FAR * (WSAAPI *)(struct in_addr in))
	    GetProcAddress(module, "inet_ntoa");
  winSock.listen = (int (WSAAPI *)(SOCKET s, int backlog))
	    GetProcAddress(module, "listen");
  winSock.ntohs = (u_short (WSAAPI *)(u_short netshort))
	    GetProcAddress(module, "ntohs");
  winSock.recv = (int (WSAAPI *)(SOCKET s, char FAR * buf,
				 int len, int flags))
    GetProcAddress(module, "recv");
  winSock.send = (int (WSAAPI *)(SOCKET s, const char FAR * buf,
		int len, int flags)) GetProcAddress(module, "send");
  winSock.setsockopt = (int (WSAAPI *)(SOCKET s, int level,
		int optname, const char FAR * optval, int optlen))
	    GetProcAddress(module, "setsockopt");
  winSock.shutdown = (int (WSAAPI *)(SOCKET s, int how))
	    GetProcAddress(module, "shutdown");
  winSock.socket = (SOCKET (WSAAPI *)(int af, int type,
		int protocol)) GetProcAddress(module, "socket");
  winSock.gethostbyaddr = (struct hostent FAR * (WSAAPI *)
                (const char FAR *addr, int addrlen, int addrtype))
            GetProcAddress(module, "gethostbyaddr");
  winSock.gethostbyname = (struct hostent FAR * (WSAAPI *)
		(const char FAR *name))
	    GetProcAddress(module, "gethostbyname");
  winSock.gethostname = (int (WSAAPI *)(char FAR * name,
		int namelen)) GetProcAddress(module, "gethostname");
  winSock.getservbyname = (struct servent FAR * (WSAAPI *)
		(const char FAR * name, const char FAR * proto))
	    GetProcAddress(module, "getservbyname");
  winSock.getsockname = (int (WSAAPI *)(SOCKET sock,
                struct sockaddr FAR *name, int FAR *namelen))
            GetProcAddress(module, "getsockname");

  winSock.getpeername = (int (WSAAPI *)(SOCKET s, struct sockaddr FAR * name,
					int FAR * namelen))
      GetProcAddress(module, "getpeername");
  winSock.ntohl = (u_long (WSAAPI *)(u_long netlong))
      GetProcAddress(module, "ntohl");

  winSock.WSASend = (int (WSAAPI *)
		     (SOCKET s, LPWSABUF lpBuffers,
		      DWORD dwBufferCount,
		      LPDWORD lpNumberOfBytesSent, DWORD dwFlags,
		      LPWSAOVERLAPPED lpOverlapped,
		      LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine))
      GetProcAddress(module, "WSASend");
  winSock.sendto = (int (WSAAPI *)(SOCKET s, const char FAR * buf, int len,
		       int flags, const struct sockaddr FAR * to, int tolen))
      GetProcAddress(module, "sendto");
  winSock.recvfrom = (int (WSAAPI *)(SOCKET s, char FAR * buf, int len, int flags,
			 struct sockaddr FAR * from, int FAR * fromlen))
      GetProcAddress(module, "recvfrom");
  
  return TRUE;
}


static uint16 SOCKET_TAB_SIZE; /* The maximum number of connections allowed by tcp_drv. */
/* Max size of the 'node@host.domain' names */
#define MAXHOSTLEN 255
/* Max size of the part before '@' in node names. */
#define MAXALIVELEN 63

/* Whenever a message larger than LARGE_BUFFER is allocated, a
  counter is the connection struct is set to KEEP_LARGE_MESS. The
  counter is decremented for each message receival begun, and when it
  reaches zero, the large buffer is freed. */
#define KEEP_LARGE_MESS 4
#define LARGE_BUFFER 66000

/* We maintain a linked fifo queue of these structs in order to
  manage unfinished reads/and writes on differenet fd's */
typedef struct pend {
  int cpos;			/* Offset for next send(). */
  int remain;			/* Bytes left to send. */
  struct pend *next;		/* Pointer to next entry in queue. */
  char buf[1];			/* This is a trick to be able to malloc one chunk. */
} Pend;

typedef struct connection {
  int fd;			/* The filedescriptor (actually SOCKET). */
  char nodename[MAXHOSTLEN+1];	/* The complete nodename, 'node@host'. */
  char conn_nodename[MAXHOSTLEN+1]; /* Holds the nodename passed to the cnct_node() call, if any. This is used to check for nodename ambiguity. */
  char alivename[MAXALIVELEN+1]; /* The part of the name before '@'. */
  struct in_addr ip_addr;	/* The IP address of the peer node. */
  char *buf;			/* The buffer used for read operations. */
  char large;			/* See comment for #define KEEP_LARGE_MESS. */
  unsigned int status;		/* The status of the connection. */
  unsigned int readState;	/* Holds the state of a possible outstanding read operation. */
  int bufSize;			/* The number of bytes allocated for the buffer. */
  int readOffset;		/* Offset in the buffer for next read operation. */
  int remain;			/* Number of bytes remaining for current read operation. */
   int bytesRead;		/* Number of bytes currently read in current read operation. */
  int count;			/* A counter for the number of calls to nb_read(), to prevent infinite loops upon error. */
  int ticked;                   /* if ticked == tick we havn't received anything the last 4*ticktime secs */
  char pendings;		/* Pending read/write bits. */
  Pend *firstPend;		/* The head of the list of pending writes. */
  Pend *lastPend;		/* The last entry of the list of pending writes. */
  WSAEVENT event;		/* The event for which driver_select() is issued. */
  long eventMask;		/* The current event mask, which is passed to driver_select(). */
  uint16 erl_ix;		/* This entry's index in the connection table. */
} Connection;			/* The structure used for the connection table. */

/* Bits for the status field in a connection struct. */
#define FREE       1
#define CONNECTED  2		/* Also means the fd has been activated. */
#define TICK_READ  4
#define TICK_WRITE 8
#define HIDDEN    16

/* Possible values for the readState field in a connection struct. */
#define IDLE              0
#define READING_HEADER    1
#define READING_MESSAGE   2
#define GETTING_NAME_SZ   3
#define GETTING_NAME      4

/* Possible return values from different functions. */
#define READY 1
#define CONTINUE 2
#define READ_ERROR 3
#define WRITE_ERROR 4
#define WOULD_BLOCK 5
#define INVALID_HOSTNAME 6
#define MEMORY_ERROR 7
#define INVALID_NAME 8

#ifdef MESSDEBUG

const char *
status_str(int status)
{
  static char buf[256] = "";

  strcpy(buf, "");
  if (status & FREE) strcat(buf, " FREE");
  if (status & CONNECTED) strcat(buf, " CONNECTED");
  if (status & TICK_READ) strcat(buf, " TICK_READ");
  if (status & TICK_WRITE) strcat(buf, " TICK_WRITE");
  if (status & HIDDEN) strcat(buf, " HIDDEN");
  return buf;
}

const char *netw_events_str(long evs)
{
  static char buf[256] = "";

  buf[0] = '\0';
  if (evs & FD_READ) strcat(buf, "READ ");
  if (evs & FD_WRITE) strcat(buf, "WRITE ");
  if (evs & FD_OOB) strcat(buf, "OOB ");
  if (evs & FD_ACCEPT) strcat(buf, "ACCEPT ");
  if (evs & FD_CONNECT) strcat(buf, "CONNECT ");
  if (evs & FD_CLOSE) strcat(buf, "CLOSE ");
  if (evs & FD_QOS) strcat(buf, "QOS ");
  if (evs & FD_GROUP_QOS) strcat(buf, "GROUP_QOS ");
  return buf;
}
#endif

#define CLOSE_RET(fd) {DEBUGF(("CLOSE_RET(0x%x)\n", fd)); \
  do_closesocket(fd); return(-1);}
#define CLOSE_CLEAR(fd) \
{DEBUGF(("CLOSE_CLEAR(0x%x)\n", fd)); clear_conn_entry(get_conn_entry(fd)); return(-1);}

#define fd_set(cp,mask) { (cp)->pendings |= (mask); }
#define fd_clr(cp,mask) { (cp)->pendings &= ~(mask); }
#define fd_isset(cp, mask) ((cp)->pendings & (mask))
#define PENDING_READS (char)0x01
#define PENDING_WRITES (char)0x02

/* Forward declarations */
static int challoc(Connection *, int);
static void tcp_error();

static int cnct(uint16, struct in_addr *, int, char *);
static int cnct_node(char *, Connection *);
static int new_connection(struct in_addr *, uint16, int *);
static int activate_name(int);
static int tell_name(int);
static int get_name(int);
static int check_host_name(char *);
static Connection *find_connection(char *);
static int get_and_open_local_port(void);
static int pub(char *, uint16);

static int nb_write(Connection *, char *, int);
static int nb_read(Connection *, int);
static int read_header(Connection *);
static int nb_read_message(Connection *);
static int nb_get_name_header(Connection *);
static int nb_get_name(Connection *);
static int nb_complete_connection(Connection *);
static int message_to_erlang(Connection *);

static int bad_cp(Connection *, int);
static void bad_ix(uint16);

static int get_conn_entry_index(int);
static int get_conn_entry_index_ev(int);
static Connection* create_conn_entry(int);
static Connection* get_conn_entry_ev(int);
static Connection* get_conn_entry(int);
static void clear_conn_entry(Connection *);

static int tcp_select(Connection *, long, long);
static int tcp_deselect(Connection *);

static int do_scheduled_write(Connection *);
static int init_scheduled_writes(Connection *);
static int cancel_scheduled_writes(Connection *);
static int add_write_entry(Connection *, char *, int);

static int write_fill(int, char *, int);
static int read_fill(int, char *, int);

static int do_closesocket(int);
static WSAEVENT do_createevent(void);
static BOOL do_closeevent(WSAEVENT);
static BOOL do_setevent(WSAEVENT);
static BOOL do_resetevent(WSAEVENT);
static int do_eventselect(SOCKET, WSAEVENT, long);

/* Driver interface declarations */
static long tcp_start(int, char *);
static int tcp_init(void);
static int tcp_stop(long);
static int tcp_from_erlang(long, char *, int);
static int tcp_dispatch(long, int);
static int tcp_null(long, int);

/* Driver interface specifiation structure */
const struct driver_entry tcp_driver_entry = {
  tcp_init, tcp_start, tcp_stop, tcp_from_erlang, tcp_dispatch, 
  tcp_null, "tcp"
};

extern int driver_select(int this_port, HANDLE event, int mode, int on);

static int erlang_port;		/* The key used for communicating with the Erlang system */
static char rbuf[BUFSIZ];	/* Temporary buffer */

static int use_fully_qual_names; /* If 1, use only FQDNs, if 0 use plain hostnames */
static int connect_timeout = 2;	/* The timeout in seconds used in cnct() */
static char thishostname[MAXHOSTLEN+1];	/* Our hostname */
static char thisnodename[MAXHOSTLEN+1+MAXALIVELEN+1]; /* Our nodename, 'node@host' */

static struct in_addr this_ipaddr; /* Our own IP address, in network byte order */
static Connection *listen_cp;	/* The connection used for listen()ing */
static int mappersock;		/* The socket leading to epmd */
static Connection **conn_tab;	/* Table of active connections */

/* Standard set of integer macros  .. */

#define get_int32(s) ((((unsigned char*) (s))[0] << 24) | \
		      (((unsigned char*) (s))[1] << 16) | \
		      (((unsigned char*) (s))[2] << 8)  | \
		      (((unsigned char*) (s))[3]))

#define put_int32(i, s) {((unsigned char*)(s))[0] = ((i) >> 24) & 0xff; \
                        ((unsigned char*)(s))[1] = ((i) >> 16) & 0xff; \
                        ((unsigned char*)(s))[2] = ((i) >> 8)  & 0xff; \
                        ((unsigned char*)(s))[3] = (i)         & 0xff;}

#define get_int16(s) ((((unsigned char*)  (s))[0] << 8) | \
		      (((unsigned char*)  (s))[1]))


#define put_int16(i, s) {((unsigned char*)(s))[0] = ((i) >> 8) & 0xff; \
                        ((unsigned char*)(s))[1] = (i)         & 0xff;}

/*
 * We must make sure that the socket handles are not inherited
 * by port programs (if there are inherited, the sockets will not
 * get closed when the emulator terminates, and epmd and other Erlang
 * nodes will not notice that we have exited).
 *
 * XXX It is not clear whether this works/is necessary in Windows 95.
 * There could also be problems with Winsock implementations from other
 * suppliers than Microsoft.
 */

static SOCKET
make_noninheritable_handle(SOCKET s)
{
    HANDLE non_inherited;
    HANDLE this_process = GetCurrentProcess();

    if (DuplicateHandle(this_process, (HANDLE) s,
			this_process, &non_inherited, 0,
			FALSE, DUPLICATE_SAME_ACCESS)) {
	(*winSock.closesocket)(s);
	return (SOCKET) non_inherited;
    } else {
	/*
	 * Silently use the old handle.
	 */
	return s;
    }
}

/*+++++++++++++++++++++++++++++++++++++
  Send a formatted error message to Erlang for output on the console.

  char *fmt The format string, cf. printf().

  ... The rest of the arguments, as referenced by the format string.
  +++++++++++++++++++++++++++++++++++++*/
static void
tcp_error(char *fmt, ...)
{
  va_list args;
  char t[BUFSIZ];
  
  va_start(args, fmt);
  *t = DISTRIBUTION_ERROR;
  vsprintf(t+1,fmt,args);
  driver_output(erlang_port, t, strlen(t));
#ifdef DEBUG
  vprintf(fmt, args);
#endif
  va_end(args);
}


/*+++++++++++++++++++++++++++++++++++++
  Makes sure that the buffer associated with the specified connection
  will hold at least sz bytes.
  +++++++++++++++++++++++++++++++++++++*/
static int
challoc(cp, sz)
     Connection *cp;		/* Pointer to the connection record. */
     int sz;			/* The required number of bytes. */
     /* Returns 0 on success, -1 on failure. */
{
  if (sz > cp->bufSize) {
    cp->bufSize  = sz + BUFSIZ;
    if ((cp->buf = (char* ) sys_realloc(cp->buf, sz + BUFSIZ)) == NULL)
      return(-1);
    if (sz > LARGE_BUFFER) 
      cp->large = KEEP_LARGE_MESS;
  }
  return(0);
}

/*+++++++++++++++++++++++++++++++++++++
  Either we use fully qualified hostnames everywhere, or not at all,
  we can't have both since we need to be able to guarantee that
  nodenames are unique, so whenever we aquire a nodename by means of
  get_name(fd) we must check the name to see if it is on the same form
  as all other node names we have aquired.
  +++++++++++++++++++++++++++++++++++++*/
static int
check_host_name(hname)
     char* hname;		/* The hostname to check. */
     /* Returns 1 if the name is OK, 0 otherwise. */
{
  if ((strchr(hname,'.')) == NULL) {  /* No dots */
    if (use_fully_qual_names == 0) /* Only read /etc/hosts */
      return(1);
    tcp_error("\n** %s **\n** Hostname %s is illegal **\n",
	      "System running to use fully qualified hostnames",
	      hname);
    return(0);
  }
  /* Dots in name .. */
  if (use_fully_qual_names == 1) /* std case */
    return(1);
  tcp_error("\n** %s **\n** Hostname %s is illegal **\n",
	    "System NOT running to use fully qualified hostnames",
	    hname);
  return(0);
}

/*+++++++++++++++++++++++++++++++++++++
  Find connection by nodename
  +++++++++++++++++++++++++++++++++++++*/
static Connection *find_connection(nodename)
char *nodename;
{
  int s;

  for (s = 0; s < SOCKET_TAB_SIZE; s++) {
    if ((conn_tab[s] != NULL) &&
	(strcmp(conn_tab[s]->nodename, nodename) == 0))
      return conn_tab[s];
  }
  return NULL;
}
			       
/*+++++++++++++++++++++++++++++++++++++
  Read a specified number of bytes from a filedescriptor, and do not
  return until the requested number of bytes has been read.
  +++++++++++++++++++++++++++++++++++++*/
static int
read_fill(fd, buf, len)
     int fd;			/* The filedescriptor to read from. */
     char *buf;			/* The buffer to read to. */
     int len;			/* The number of bytes to read. */
     /* The number of bytes read, or -1 on failure. */
{
  /* Note! This is a BLOCKING call. */
  
  int i, got = 0;
  long oldEventMask;
  Connection *cp;

  cp = get_conn_entry(fd);
  if (cp) {
    oldEventMask = cp->eventMask;
    tcp_select(cp, 0, ~0);	/* NOTE! We should check return code! */
  }
  
  SET_BLOCKING(fd);
  
  do {
    i = (*winSock.recv)(fd, buf+got, len-got, 0);

#ifdef DEBUG
    erl_debug("recv(0x%x, %d, 0) -> %d [", fd, len-got, i);
    erl_bin_write(buf+got, i, 15);
    erl_debug("]\n");
#endif
  
    if (i == 0)			/* Socket gracefully closed? */
      return 0;
    if (i == SOCKET_ERROR) {
      DEBUGF(("recv() failed w/ error %d\n", (*winSock.WSAGetLastError)()));
      return -1;
    }
    got += i;
  } while (got < len);
  if (cp) {
    tcp_select(cp, oldEventMask, 0);
  }
  SET_NONBLOCKING(fd);
  return (len);
}


/*+++++++++++++++++++++++++++++++++++++
  Write a specified number of bytes to a connection, and do not return
  until the requested number tof bytes has been written.
  +++++++++++++++++++++++++++++++++++++*/
static int
write_fill(fd, buf, len)
     int fd;			/* The filedescriptor to write to. */
     char *buf;			/* The buffer to write. */
     int len;			/* The number of bytes to write. */
     /* The number of bytes written, or -1 on failure. */
{
  /* Note! This is a BLOCKING call. */
  
  int i, done = 0;

#ifdef DEBUG
  erl_debug("write_fill(0x%x, [", fd);
  erl_bin_write(buf, len, 15);
  erl_debug("], %d)\n", len);
#endif
  
  do {
    if ((i = (*winSock.send)(fd, buf+done, len-done, 0)) == SOCKET_ERROR) {
      DEBUGF(("send() failed w/ error %d\n", (*winSock.WSAGetLastError)()));
      return -1;
    }
    done += i;
  } while (done < len);
  return (len);
}


/*+++++++++++++++++++++++++++++++++++++
  Attempt to read a number of bytes from a connection. If the
  requested number could not be read, note the number of bytes
  remaining. Close the connection upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_read(cp, sz)
     Connection *cp;		/* The connection to read from. */
     int sz;			/* The number of bytes to read. */
     /* Returns READY if the requested number of bytes were read,
       CONTINUE if only a portion was read (including zero bytes), or
       READ_ERROR on failure. */
{
  int i;

  i = (*winSock.recv)(cp->fd, cp->buf + cp->readOffset, sz, 0);

#ifdef DEBUG
  erl_debug("recv(0x%x, %d, 0) -> %d [", cp->fd, sz, i);
  erl_bin_write(cp->buf + cp->readOffset, i, 15);
  erl_debug("]\n");
#endif
  
  if (i == sz) {
    cp->status |= TICK_READ;
    cp->remain = 0;
    cp->bytesRead += i;
    cp->readOffset = 0;
    cp->count = 0;
    fd_clr(cp, PENDING_READS);
    return READY;
  }

  if (i == 0 || cp->count++ == 10000) { /* EOF or looping madly? */
    DEBUGF(("nb_read(0x%x, %d) got EOF or is looping madly (count %d)\n",
	    cp, sz, cp->count));
    return bad_cp(cp, READ_ERROR);
  }
  
  if (i == SOCKET_ERROR)
    if ((*winSock.WSAGetLastError)() != WSAEWOULDBLOCK) {
      DEBUGF(("tcp: nb_read failed w/ reason %d\n", (*winSock.WSAGetLastError)()));
      return bad_cp(cp, READ_ERROR);
    } else {
      i=0;			/* Let's pretend we got zero bytes. */
    }
  
  if (i < sz) {
    cp->status |= TICK_READ;
    fd_set(cp, PENDING_READS);
    cp->remain = sz - i;
    cp->readOffset += i;
    cp->bytesRead += i;
    return CONTINUE;
  }
  
  DEBUGF(("tcp: nb_read failed\n"));
  return bad_cp(cp, READ_ERROR);
}


/*+++++++++++++++++++++++++++++++++++++
  Connect to a remote node, getting the port numver from epmd, and
  interchange handshake messages.
  +++++++++++++++++++++++++++++++++++++*/
static int
cnct_node(nodename, cp)
     char *nodename;		/* Name of remote node. */
     Connection *cp;		/* A pre-allocated connection
				  structure (with invalid
				  filedescriptor). */
     /* Returns READY, CONTINUE, etc. */
{
  int i, s, rval;
  uint16 rp, len;
  char *hostname, alivename[BUFSIZ];
  struct hostent *hp;
  struct in_addr *ip_addr;
  
  for (i=0; i<SOCKET_TAB_SIZE; i++) {
    if (conn_tab[i] &&
	conn_tab[i] != cp &&
	conn_tab[i]->conn_nodename &&
	strcmp(nodename, conn_tab[i]->conn_nodename) == 0) {
      int old_ix;
      DEBUGF(("Found entry 0x%x (fd 0x%x) with same nodename\n", cp, cp->fd));

      /* Move the schedule write to the found entry. */
      if (conn_tab[i]->firstPend)
	conn_tab[i]->lastPend->next = cp->firstPend;
      else
	conn_tab[i]->firstPend = cp->firstPend;
      conn_tab[i]->lastPend = cp->firstPend;
      cp->firstPend = cp->lastPend = NULL;

      old_ix = cp->erl_ix;
      sys_free(cp);
      conn_tab[old_ix] = NULL;
      
      return READY;
    }
  }
  
  strcpy(cp->conn_nodename, nodename);

  /* first extract the host part from nodename */
  i = 0;
  hostname = nodename;
  while (*hostname != '@') {
    if (*hostname == '\0') {
      tcp_error("** Nodename %s illegeal, no '@' character ** \n", 
		nodename);
      return bad_cp(cp, INVALID_HOSTNAME);
    }
    alivename[i++] = *hostname++;
  }
  alivename[i] = '\0';
  hostname++;
  
  /* now hostname points to a host name and alivename contains the
     name of the node */
  
  if (check_host_name(hostname) == 0) 
    return bad_cp(cp, INVALID_HOSTNAME);
  
  if ((hp = (*winSock.gethostbyname)(hostname)) == NULL) {
    tcp_error("\n** Can't find host for nodename %s\n%s\n%s\n",
	      nodename,"** maybe named/resolver configuration error",
	      "** Or unknown host \n");
    return bad_cp(cp, INVALID_HOSTNAME);
  }
  
  ip_addr = (struct in_addr*) *hp->h_addr_list;
  /* ip_addr is now in network byte order */

  /* first we have to get hold of the portnumber to the node through
     epmd at that host */
  
  if ((s = cnct(ERLANG_DAEMON_PORT, ip_addr, sizeof (struct in_addr),
		nodename)) < 0)
    return bad_cp(cp, READ_ERROR);

  DEBUGF(("cnct_node: cp->event is 0x%x\n", cp ? cp->event : 0));
  
  rbuf[2] = EPMD_PORT_PLEASE;
  strcpy(&rbuf[3], alivename);
  len = strlen(&rbuf[2]);
  put_int16(len, &rbuf[0]);
  len += 2;
  if (write_fill(s, rbuf, len) != len) {
    do_closesocket(s);
    return bad_cp(cp, WRITE_ERROR);
  }
  if((rval = read_fill(s, rbuf, 2)) != 2) {
    do_closesocket(s);
    return bad_cp(cp, READ_ERROR);
  }

  rp = get_int16(rbuf);		/* got the portnumber now in hbo */
  
  /* epmd stores all port numbers in host byte order */
  
  do_closesocket(s);
  
  if((s = cnct(rp, ip_addr, sizeof(struct in_addr), nodename)) < 0)
    return bad_cp(cp, READ_ERROR);
  
  /* Socket is now blocking */
  if (tell_name(s) == -1)
    return bad_cp(cp, READ_ERROR);
  
  cp->fd = s;			/* The entry has been created before, but with no fd. */
  SET_NONBLOCKING(s);
  tcp_select(cp, FD_READ | FD_CLOSE, 0); /* Needed because of nb I/O */
  return nb_get_name_header(cp);
}  /* end cnct_node() */


/*+++++++++++++++++++++++++++++++++++++
  Connect to a specified port/ip-address pair.
  +++++++++++++++++++++++++++++++++++++*/
static int
cnct(port, ip_addr,addr_len, nodename)
     uint16 port;		/* The remote port number (in host byte order). */
     struct in_addr *ip_addr;	/* The address of the remote node. */
     int addr_len;		/* The size of the ip_addr argument. */
     char *nodename;		/* Name of the node to connect to */
     /* A blocking filedescriptor connected to the specified
       destination, or -1 on failure. */
{
  int res, s, err, retval=-2;
  struct sockaddr_in iserv_addr;
  int new_fd, sim_connect = 0;
  Connection *cp;
  HANDLE lpHandles[2];	/* 0: wait for connect(), 1: wait for accept() */
  WSANETWORKEVENTS netEv;

  DEBUGF(("cnct(port %d,...)\n", port));

  if ((s = (*winSock.socket)(AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) {
    tcp_error("\n** Error creating socket: %d", (*winSock.WSAGetLastError)());
    return -1;
  }
  s = make_noninheritable_handle(s);

  DEBUGF(("cnct: socket 0x%x created\n", s));
  SET_NONBLOCKING(s);
  memzero((char*)&iserv_addr, sizeof(struct sockaddr_in));
  memcpy((char*)&iserv_addr.sin_addr, (char*)ip_addr, addr_len);
  iserv_addr.sin_family = AF_INET;
  iserv_addr.sin_port = (*winSock.htons)(port);
  
  /*
   * We have set the connect socket to non-blocking
   * This code handles the case with simultaneous connects
   */
  
  if ((res = (*winSock.connect)(s, (struct sockaddr*)&iserv_addr, sizeof iserv_addr)) == 0)  {
    DEBUGF(("connect() succeeded immediately on %s\n", s));
    SET_BLOCKING(s);
    return(s);
  }

  if ((err = (*winSock.WSAGetLastError)()) != WSAEWOULDBLOCK) {
    DEBUGF(("connect() failed with error %d\n", err));
    do_closesocket(s);
    return -1;
  }

  DEBUGF(("waiting for connect ...\n"));

  /* Temporarily stop waiting for accept events. */
  tcp_deselect(listen_cp);
    
  if (((lpHandles[0] = do_createevent()) == WSA_INVALID_EVENT) ||
      ((lpHandles[1] = do_createevent()) == WSA_INVALID_EVENT) ||
      (do_eventselect(s, lpHandles[0], FD_CONNECT) != 0) ||
      (do_eventselect(listen_cp->fd, lpHandles[1], FD_ACCEPT) != 0)) {
    DEBUGF(("Failed in do_createevent or WSAEventSelect: %d\n", (*winSock.WSAGetLastError)()));
    do_closesocket(s);
    if (lpHandles[0] != WSA_INVALID_EVENT)
      do_closeevent(lpHandles[0]);
    if (lpHandles[1] != WSA_INVALID_EVENT)
      do_closeevent(lpHandles[1]);
    tcp_select(listen_cp, FD_ACCEPT, 0);
    return -1;
  }
    
  do {
    /* XXX Note that we can get here several times, thus violating
       the connect timeout specified. */
    switch ((*winSock.WSAWaitForMultipleEvents)(2, lpHandles, FALSE, 
					     connect_timeout*1000, FALSE)) {
    case WAIT_OBJECT_0:	/* Connect event on 's' */
      (*winSock.WSAEnumNetworkEvents)(s, lpHandles[0], &netEv);
      if ((err = netEv.iErrorCode[FD_CONNECT_BIT]) != 0) {
	DEBUGF(("Connect on %d failed with error %d\n", s, err));
	do_closesocket(s);
	retval = -1;
	break;
      }
      do_eventselect((SOCKET)s, NULL, 0L);
      SET_BLOCKING(s);
      DEBUGF(("connection on 0x%x established\n", s));
      retval = s;
      break;
      
    case WAIT_OBJECT_0+1:	/* Accept event on 'listen_cp->fd' */
      DEBUGF(("WAIT_OBJECT_1 signalled!\n"));
      do_resetevent(lpHandles[1]); /* Must do manual reset */
      sim_connect = 0;
      new_fd = new_connection(ip_addr, port, &sim_connect);
      if ((new_fd > 0) && ((cp = get_conn_entry(new_fd)) != NULL) &&
	  (port == ERLANG_DAEMON_PORT) &&
	  (strcmp(cp->nodename, nodename) == 0)) {
	/* We are done: we do not need to ask the remote EPMD for the
	   port number since we are already connected. */
	do_closesocket(s);
	retval = -1;
	break;
      }

      DEBUGF(("CASE %d\n", sim_connect));

      switch (sim_connect) {
      case 0:
	break;		/* someone else connected */
	  
      case 1:
	/* This means that *we* should do the connect(), and the
	   other party should to the accept(). As we've already
	   issued an asynchronous connect(), just close the
	   returned socket, and wait for the connect() to
	   complete. */
	do_closesocket(new_fd);
	break;
	  
      case  2:
	/* We have accepted the other end which is now in the
	   middle of the cnct message exchange. So we'll just
	   close the socket for which connect() was issued, and
	   return the newly accept()ed socket. */
	do_closesocket(s);
	retval = new_fd;
	break;
      } /* switch(sim_connect) */
      break;

    case WAIT_TIMEOUT:
      DEBUGF(("WFMO() failed(TIMEOUT)\n"));
      do_closesocket(s);
      retval = -1;
      break;

    case WAIT_FAILED:
      DEBUGF(("WFMO() failed(FAILED)\n"));
      do_closesocket(s);
      retval = -1;
      break;

    default:
      DEBUGF(("WFMO() failed(default)\n"));
      do_closesocket(s);
      retval = -1;
      break;
    }	/* switch(res) */
  } while (retval == -2);
    
  tcp_select(listen_cp, FD_ACCEPT, 0);
  do_closeevent(lpHandles[0]);
  do_closeevent(lpHandles[1]);
  return retval;
} 


/*+++++++++++++++++++++++++++++++++++++
  Tell the local EPMD about out existence and set the 'thisnodename'
  variable.
  +++++++++++++++++++++++++++++++++++++*/
static int
pub(name, port)
     char *name;		/* A string on the form "alivename
				   [longname|shortnames]
				   [connect_timeout]", as given to
				   alive(). */
     uint16 port;		/* The port number of our listening port. */
     /* Returns 1 on success, -1 on failure. */
{
  int len, s;
  char *longname, *ct;

  struct hostent *hp;
  if(strlen(name) > MAXALIVELEN) {
    tcp_error("\n** Fatal error, name \"%s\" too long (> %d)\n", 
	      name, MAXALIVELEN);
    return(-1);
  }
  if ((longname = strchr(name, ' ')) == NULL) {
    use_fully_qual_names = 1;
    connect_timeout = 2;  /* global variable */
  }
  else {
    *longname++ = '\0';
    ct = strchr(longname, ' ');
    if (ct != NULL)
	*ct = '\0';
    if (strcmp(longname, "shortnames") == 0)
      use_fully_qual_names = 0;
    else if ( strcmp(longname, "longnames") == 0)
      use_fully_qual_names = 1;
    else if (*longname == '\0')
      use_fully_qual_names = 1;
    else {
      tcp_error("Bad args to tcp driver \n");
      return(-1);
    }
    if (ct != NULL) 
      if ((connect_timeout  = atoi(ct+1)) == 0) {
	tcp_error("Bad args to tcp driver \n");
	return(-1);
      }
  }

  if ((hp = (*winSock.gethostbyname)(thishostname)) == NULL) {
    tcp_error("\n** Fatal error, Host %s not found in nameserver\n", 
	      thishostname);
    return(-1);
  }

  if (use_fully_qual_names == 0) /* shortnames */
    if ((ct = strchr(hp->h_name, '.')) != NULL)
      *ct = '\0';  

  /* Now set thishostname again */
  strcpy(thishostname,hp->h_name);

  if ((s = cnct(ERLANG_DAEMON_PORT, (void *)hp->h_addr, 
		hp->h_length, "")) < 0) {
    tcp_error("** Failed to connect to daemon port (%d) **\n", 
	      ERLANG_DAEMON_PORT);
    return(-1);
  }
  memcpy(&this_ipaddr.s_addr, *hp->h_addr_list, sizeof(struct in_addr));

  mappersock = s;

  DEBUGF(("mappersock = 0x%x\n", mappersock));

  sprintf(thisnodename, "%s@%s", name, hp->h_name);
  rbuf[2] = EPMD_ALIVE;
  put_int16(port, &rbuf[3]);
  strcpy(&rbuf[5], name);
  len = 3 + strlen(name);
  put_int16(len, rbuf);
  if (write_fill(s, rbuf, len+2) != len+2) {
    DEBUGF(("Sending data to epmd failed\n"));
    return -1;
  }
  if (read_fill(s, rbuf, 3) != 3) {
    tcp_error("** Can't go distributed, epmd rejects us\n** maybe "
	      "name %s is already occupied !! \n", thisnodename);
    return(-1);
  }
  rbuf[0] = ALIVE_REQUEST_OK;
  strcpy(&rbuf[3], thishostname);
  driver_output(erlang_port, rbuf, strlen(&rbuf[3]) + 4);
  /* 4 == 3 + the NULL sign */
  return(1);
}


/*+++++++++++++++++++++++++++++++++++++
  Open our listening socket.
  +++++++++++++++++++++++++++++++++++++*/
static int
get_and_open_local_port(void)
     /* Returns the port number assigned to the listening socket, or
       -1 on failure. */
{
  struct sockaddr_in iserv_addr;
  int length;
  int listensock;
  
  if ((listensock = (*winSock.socket)(AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) {
    tcp_error("\n** Error creating socket: %d", (*winSock.WSAGetLastError)());
    driver_failure(erlang_port, -1);
    return(-1);
  }
  DEBUGF(("listensock = 0x%x\n", listensock));

  memzero((char*) &iserv_addr, sizeof(iserv_addr));
  iserv_addr.sin_family = AF_INET;
  iserv_addr.sin_addr.s_addr = (*winSock.htonl)(INADDR_ANY);
  iserv_addr.sin_port = (*winSock.htons)(0);
  if ((*winSock.bind)(listensock,
	   (struct sockaddr*) &iserv_addr, 
	   sizeof(iserv_addr)) == SOCKET_ERROR) {
    DEBUGF(("Error calling bind(): %d\n", (*winSock.WSAGetLastError)()));
    driver_failure(erlang_port, -1);
    return(-1);
  }
  if ((*winSock.listen)(listensock, 5) == SOCKET_ERROR) {
    DEBUGF(("Error calling listen(): %d\n", (*winSock.WSAGetLastError)()));
    driver_failure(erlang_port, -1);
    return(-1);
  }

  /* find out assigned portnumber */
  length = sizeof iserv_addr;
  if ((*winSock.getsockname)(listensock, 
		  (struct sockaddr *)&iserv_addr, 
		  &length) == SOCKET_ERROR) {
    DEBUGF(("Error calling getsockname(): %d\n", (*winSock.WSAGetLastError)()));
    driver_failure(erlang_port, -1);
    return(-1);
  }
  
  /* iserv_addr.sin_port shall now be in network byte order
     as returned by the syscall*/

  listen_cp = create_conn_entry(listensock);
  if (tcp_select(listen_cp, FD_ACCEPT, 0) == -1) {
    DEBUGF(("Error calling tcp_select().\n"));
    driver_failure(erlang_port, -1);
    return -1;
  }
  
  DEBUGF(("get_and_open_local_port() -> port # %d\n", (*winSock.ntohs)(iserv_addr.sin_port)));
  
  return (*winSock.ntohs)(iserv_addr.sin_port);
}


/*+++++++++++++++++++++++++++++++++++++
  Shutdown a connection, deallocate/clear all associated data, and
  tell Erlang that the node is down. How the last part is done depends
  on whether the connection has been activated.
  +++++++++++++++++++++++++++++++++++++*/
static int
bad_cp(cp, retval)
     Connection *cp;		/* The connection to remove. If NULL,
				  return without action. */
     int retval;		/* The return value from the function. */
     /* Returns the value specified in retval. */
{
  DEBUGF(("bad_cp(cp 0x%x (fd 0x%x, event 0x%x), %d)\n",
	  cp, (cp ? cp->fd : 0), (cp ? cp->event : 0), retval));
  
  if (cp == NULL)
    return retval;
  
  if (cp->status & CONNECTED) {	/* Has the conn been activated? */
    DEBUGF(("Sending BAD_CHANNEL %d to Erlang\n", cp->erl_ix));
    rbuf[0] = BAD_CHANNEL;
    put_int16(cp->erl_ix, &rbuf[1]);
    driver_output(erlang_port, rbuf, 3); /* XXX Check retval? */
  } else {
    DEBUGF(("Sending BAD_SYSTEM %s to Erlang\n", cp->conn_nodename));
    rbuf[0] = BAD_SYSTEM;
    strcpy(&rbuf[1], cp->conn_nodename);
    driver_output(erlang_port, rbuf, strlen(rbuf+1)+1); /* XXX Check retval? */
  }    
  clear_conn_entry(cp);
  return retval;
}


/*+++++++++++++++++++++++++++++++++++++
  Shutdown a connection, deallocate/clear all associated data, and
  tell Erlang that the node is down.
  +++++++++++++++++++++++++++++++++++++*/
static void
bad_ix(ix)
     uint16 ix;			/* The index identifying the
				  connection to remove. */
{
  DEBUGF(("bad_ix(ix %d (cp 0x%x))\n", ix, conn_tab[ix]));
  
  if (conn_tab[ix] != NULL)
    clear_conn_entry(conn_tab[ix]);

  rbuf[0] = BAD_CHANNEL;
  put_int16(ix, &rbuf[1]);
  driver_output(erlang_port, rbuf, 3);
}

/*+++++++++++++++++++++++++++++++++++++
  Shutdown a connection, deallocate/clear all associated data, and
  tell Erlang that the node is down.
  +++++++++++++++++++++++++++++++++++++*/
static void
sync_bad_ix(ix)
     uint16 ix;			/* The index identifying the
				  connection to remove. */
{
  DEBUGF(("bad_ix(ix %d (cp 0x%x))\n", ix, conn_tab[ix]));
  
  if (conn_tab[ix] != NULL)
    clear_conn_entry(conn_tab[ix]);

  rbuf[0] = SYNC_BAD_CHANNEL;
  put_int16(ix, &rbuf[1]);
  driver_output(erlang_port, rbuf, 3);
}


/*+++++++++++++++++++++++++++++++++++++
  Find the connection table entry for the specified filedescriptor.
  +++++++++++++++++++++++++++++++++++++*/
static int
get_conn_entry_index(fd)
     int fd;			/* The filedescriptor searched for. */
     /* Returns the index into the table, or -1 if not found. */
{
  int i;
  for(i=0; i<SOCKET_TAB_SIZE; i++)
    if ((conn_tab[i] != NULL) && (conn_tab[i]->fd == fd))
      return i;
  return -1;
}


/*+++++++++++++++++++++++++++++++++++++
  Find the connection table entry for the specified filedescriptor.
  +++++++++++++++++++++++++++++++++++++*/
static int
get_conn_entry_index_ev(event)
     int event;			/* The event searched for. */
     /* Returns the index into the table, or -1 if not found. */
{
  int i;
  for(i=0; i<SOCKET_TAB_SIZE; i++)
    if ((conn_tab[i] != NULL) && (conn_tab[i]->event == (HANDLE)event))
      return i;
  return -1;
}


/*+++++++++++++++++++++++++++++++++++++
  Find the connection table entry for the specified filedescriptor.
  +++++++++++++++++++++++++++++++++++++*/
static Connection *
get_conn_entry(fd)
     int fd;			/* The filedescriptor searched for. */
     /* Returns a pointer to the record, or NULL if not found. */
{
  int i;
  return ((i = get_conn_entry_index(fd)) >= 0) ? conn_tab[i] : NULL;
}


/*+++++++++++++++++++++++++++++++++++++
  Find the connection table entry for the specified filedescriptor.
  +++++++++++++++++++++++++++++++++++++*/
static Connection *
get_conn_entry_ev(event)
     int event;			/* The event searched for. */
     /* Returns a pointer to the entry, or NULL if not found. */
{
  int i;
  return ((i = get_conn_entry_index_ev(event)) >= 0) ? conn_tab[i] : NULL;
}


/*+++++++++++++++++++++++++++++++++++++
  Allocate and initialize a connection struct in the connection table.
  +++++++++++++++++++++++++++++++++++++*/
static Connection *
create_conn_entry(fd)
     int fd;			/* The initial value for the
				  filedescriptor field. */
     /* A pointer to the created struct. */
{
  int i;

  for (i=0; i<SOCKET_TAB_SIZE; i++)
    if (conn_tab[i] == NULL) {	/* Found free entry? */

      if ((conn_tab[i] = (Connection*) sys_alloc(sizeof(Connection))) == NULL)
	return NULL;
      
      conn_tab[i]->fd = fd;
      conn_tab[i]->buf = NULL;
      conn_tab[i]->status = FREE;
      conn_tab[i]->readState = IDLE;
      conn_tab[i]->bufSize = 0;
      conn_tab[i]->large = 0;
      conn_tab[i]->bytesRead = 0;
      conn_tab[i]->readOffset = 0;
      conn_tab[i]->remain = 0;
      conn_tab[i]->count = 0;
      conn_tab[i]->pendings = 0;
      conn_tab[i]->firstPend = NULL;
      conn_tab[i]->lastPend = NULL;
      conn_tab[i]->event = NULL;
      conn_tab[i]->eventMask = 0;
      conn_tab[i]->erl_ix = i;

      return(conn_tab[i]);
    }
  
  return NULL;
}


/*+++++++++++++++++++++++++++++++++++++
  Clear a connection entry, including deallocating all resources and
  calling tcp_deselect().
  +++++++++++++++++++++++++++++++++++++*/
static void
clear_conn_entry(cp)
     Connection *cp;		/* The connection to clear. If NULL,
				  return without action. */
{
  Pend *prevPend, *curPend;
  int cpi;
  
  DEBUGF(("clear_conn_entry(0x%x (fd==0x%x, event==0x%x))\n",
	  cp, (cp ? cp->fd : 0), (cp ? cp->event : 0)));

  if (cp == NULL)
    return;
  
  if (cp->fd > 0)
    do_closesocket(cp->fd);

  tcp_deselect(cp);

  /* Delete the list of pending writes. */
  curPend = cp->firstPend;
  while (curPend != NULL) {
    prevPend = curPend;
    curPend = curPend->next;
    sys_free(prevPend);
  }

  if (cp->buf) 
    sys_free(cp->buf);

  cpi = cp->erl_ix;
  sys_free(cp);
  conn_tab[cpi] = NULL;
}


/*+++++++++++++++++++++++++++++++++++++
  Read the 4-byte header of an incoming message (without ever
  blocking), and then schedules the reading of the rest of the message
  (if any - it might be just a tick). Closes the connection upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
read_header(cp)
     Connection *cp;		/* The connection to read from. */
     /* Returns READY, CONTINUE, or an error code; */
{
  int res;

  DEBUGF(("read_header(cp=0x%x)\n", cp));
  
  /* the idea here is that if a very large message has come on an fd
     mem has been allocated for that message, let's keep the buffer
     for a while and then after some time release it */

  if (cp == NULL)
    return READY;
  
  if (cp->large > 0) {
    cp->large--;
    if (cp->large == 0)  {
      if (cp->buf) 
	sys_free(cp->buf);
      cp->buf = NULL;
      cp->bufSize  = 0;
      cp->large = 0;
    }
  }

  if (challoc(cp, 4) < 0) {
    tcp_error("\n** Failed allocating receive buffer\n");
    return bad_cp(cp, MEMORY_ERROR);
  }
  res = nb_read(cp, 4);

  DEBUGF(("read_header(): nb_read(cp 0x%x (fd 0x%x), sz=4) -> %d\n",
	  cp, cp->fd, res));

  switch (res) {
  case READY:
    return nb_read_message(cp);

  case CONTINUE:
    cp->readState = READING_HEADER;
    return CONTINUE;

  default:
    return READ_ERROR;
  }
}


/*+++++++++++++++++++++++++++++++++++++
  Read the message body from a connection. The size of the body must
  be stored in cp->buf. Close the connection upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_read_message(cp)
     Connection *cp;		/* The connection to read from. */
     /* Returns READY, CONTINUE, or an error code. */
{
  int msgSize = get_int32(cp->buf);

  if (msgSize == 0)		/* The tick */
    return READY;

  cp->remain = msgSize;
  
  /* We know that cp->buf holds at least 4 bytes, as read_header()
     has been called previously. */
  cp->buf[0] = PASS_THROUGH;
  put_int16(cp->erl_ix, cp->buf+1);
  cp->readOffset = 3;
  cp->bytesRead = 3;
  
  if (challoc(cp, msgSize+3) < 0) {
    DEBUGF(("Failed allocating buffer\n"));
    return bad_cp(cp, -1);
  }
  fd_set(cp, PENDING_READS);

  switch (nb_read(cp, cp->remain)) {
  case READY:
    return message_to_erlang(cp);
    
  case CONTINUE:
    cp->readState = READING_MESSAGE;
    return CONTINUE;

  default:			/* READ_ERROR */
    return READ_ERROR;
  }
}

/*+++++++++++++++++++++++++++++++++++++
  Deliver a completely read message to Erlang.
  +++++++++++++++++++++++++++++++++++++*/
static int
message_to_erlang(cp)
     Connection *cp;		/* The connection read from. */
     /* Returns READY. */
{
  DEBUGF(("message_to_erlang(cp 0x%x)\n", cp));
  driver_output(erlang_port, cp->buf, cp->bytesRead); /* XXX Check retval! */
  cp->readState = IDLE;
  return READY;
}


/*+++++++++++++++++++++++++++++++++++++
  Accept an incoming connect request. If the node connection is the
  same as 'other', meaning that the two nodes are simultaneously trying
  to connect to each other, decide which node should connect() and which
  should accept(). 'port' is the port we are connecting to. We always
  accept if port is equal to ERLANG_DAEMON_PORT.
  +++++++++++++++++++++++++++++++++++++*/
static int
new_connection(other, port, sim_connect)
     struct in_addr *other;	/* The node address to check against. */
     uint16 port;		/* The port we are connecting to. */
     int *sim_connect;		/* Set to 0 on return if there was
				  no simultaneous connect, 1 if we
				  should do the connect(), and 2 if we
				  should do the accept() part. */
     /* The filedescriptor of the opened socket, -1 on error. */
{
  int i, fd;
  struct sockaddr_in icli_addr;	/* workaround for QNX bug - can not */
  int icli_addr_len;		/* handle NULL pointers to accept.  */

  icli_addr_len = sizeof(icli_addr);
  fd = (*winSock.accept)(listen_cp->fd,
	      (struct sockaddr*) &icli_addr,
	      (int*) &icli_addr_len);

  DEBUGF(("accepted socket = 0x%x\n", fd));
  if (fd == INVALID_SOCKET) {
    int err = (*winSock.WSAGetLastError)();
    if ((err == WSAEMFILE) || (err == WSAENOBUFS)) {
      tcp_error("\n** run out of filedecriptors (max == %d)\n", 
		SOCKET_TAB_SIZE);
    }
    DEBUGF(("failed in accept(): error %d\n", err));
    return(-1);
  }
  fd = make_noninheritable_handle(fd);

  /* We have to disable the WSAEventSelect "inherited" from listen_cp->fd */
  do_eventselect((SOCKET)fd, NULL, 0L);
  SET_NONBLOCKING(fd);

  /* If port is ERLANG_DAEMON_PORT we always accept connections */
  if ((port != ERLANG_DAEMON_PORT) &&
      (memcmp((char*) &(icli_addr.sin_addr), (char*) other, 
	      sizeof(struct in_addr)) == 0)) {
    *sim_connect = 1;
    /* Gory case, *we* are sitting in a non-blocking connect to */
    /* the same node which is now connecting to us */
    if (memcmp((char*) &this_ipaddr, (char*) &(icli_addr.sin_addr), 
	       sizeof(struct in_addr)) < 0) { /* we are smaller, we lose */
      *sim_connect = 2;
    }
    return(fd);

  }
  if (tell_name(fd) == -1) 
    return (-1);
  if ((i = get_name(fd)) < 0)
    return(i);

  activate_name(fd);
  return(fd);
}


/*+++++++++++++++++++++++++++++++++++++
  Tell the emulator to wait for input on this socket, and informs
  Erlang that a new node has been connected. Requests notification when
  there is data to read, or the socket has been closed.
  +++++++++++++++++++++++++++++++++++++*/
static int
activate_name(fd)
     int fd;			/* The filedescriptor to activate. */
     /* Returns 1 on success, -1 on failure. */
{
  char buf[BUFSIZ];
  Connection *cp;
  
  DEBUGF(("activate_name(fd==0x%x)\n", fd));
  
  cp = get_conn_entry(fd);
  if (cp->status & HIDDEN)
    buf[0] = NEW_HIDDEN_CONNECTION;
  else
    buf[0] = NEW_CONNECTION;
  /* put_int16(fd, &buf[1]); */
  put_int16(cp->erl_ix, &buf[1]);
  strcpy(&buf[3], cp->nodename);

  tcp_select(cp, FD_READ | FD_WRITE | FD_CLOSE, 0);
  
  driver_output(erlang_port, buf, strlen(&buf[3])+1+3);
  return(1);
}


/*
 * The messages produced by tell_name(fd) and recieved by get_name(fd)
 * consist of one byte type info (MY_NAME or MY_HIDDEN_NAME) followed
 * by the s_addr field of struct in_addr, and then the
 * (null-terminated) nodename.
 */


/*+++++++++++++++++++++++++++++++++++++
  Send this node's name to the other end of a socket.
  +++++++++++++++++++++++++++++++++++++*/
static int
tell_name(fd)
     int fd;			/* The filedescriptor to send data to. */
     /* Returns 1 on success, -1 on failure. */
{
  /* NOTE! This function blocks until the send operation is completed. */
  char buf[BUFSIZ];
  uint16 len;
  
  buf[2] = MY_NAME;
  memcpy(&buf[3], &this_ipaddr.s_addr, 4);
  strcpy(&buf[7], thisnodename); /* Our node name */
  len = 5 + strlen(&buf[7]);
  put_int16(len, buf);
  len += 2;
  if (write_fill(fd, buf, len) != len)
    CLOSE_RET(fd);
  return(1);
}


/*+++++++++++++++++++++++++++++++++++++
  Read the header of a tell_name() message from a connection. Close
  the connection upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_get_name_header(cp)
     Connection *cp;		/* The connection to read from. */
     /* Returns the result from nb_get_name(), or CONTINUE or an
       error code. */
{
  DEBUGF(("nb_get_name_header(cp 0x%x)\n", cp));
  if (challoc(cp, 2) != 0)
    return bad_cp(cp, MEMORY_ERROR);
  
  switch (nb_read(cp, 2)) {
  case CONTINUE:
    /* We're not done, schedule a new read op. */
    cp->readState = GETTING_NAME_SZ;
    return CONTINUE;
    
  case READY:
      return nb_get_name(cp);
  
  default:
    return bad_cp(cp, READ_ERROR);
  }
}


/*+++++++++++++++++++++++++++++++++++++
  Read the message of a tell_name() message from a connection. Close
  the connection upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_get_name(cp)
     Connection *cp;		/* The connection to read from. */
     /* Returns the result from nb_complete_connection(), or CONTINUE
       or an error code. */
{
  int rval = get_int16(cp->buf);

  DEBUGF(("nb_get_name(cp 0x%x)\n", cp));

  if (challoc(cp, rval) != 0)
    return bad_cp(cp, MEMORY_ERROR);
  
  cp->buf[rval] = '\0';
  
  switch (nb_read(cp, rval)) {
  case CONTINUE:
    /* We're not done, schedule a new read op. */
    cp->readState = GETTING_NAME;
    return CONTINUE;
    
  case READY:
    return nb_complete_connection(cp);

  default:
    return READ_ERROR;
  }
}


/*+++++++++++++++++++++++++++++++++++++
  Complete the connection setup for a connection. Close the connection
  upon error.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_complete_connection(cp)
     Connection *cp;		/* The connection. */
     /* Returns READY or an error code. */
{
  char *ascname;
  int i;
  static const int one = 1;
  
  DEBUGF(("nb_complete_connection(cp 0x%x)\n", cp));

  /* OK, now we got the name. */

  if ((*cp->buf != MY_NAME) && (*cp->buf!= MY_HIDDEN_NAME))
    return bad_cp(cp, INVALID_NAME);

  ascname = cp->buf + 1 + sizeof(struct in_addr);
  ascname = strchr(ascname, '@');
  if (check_host_name(ascname+1) == 0) {
    DEBUGF(("check_host_name(%s) failed\n", ascname+1));
    return bad_cp(cp, INVALID_HOSTNAME);
  }
  
  *ascname = '\0';		/* Skip everything after '@' */
  strcpy(cp->alivename, cp->buf + 1 + sizeof(struct in_addr));
  *ascname = '@';
  strcpy(cp->nodename, cp->buf + 1 + sizeof(struct in_addr));
  
  memcpy(&(cp->ip_addr), cp->buf+1, sizeof(struct in_addr));
  
  /* Check that no entry with the same IP but different name exists */
  for (i=0; i < SOCKET_TAB_SIZE; i++) {
    if (!conn_tab[i] ||
	conn_tab[i] == listen_cp ||
	conn_tab[i]->fd == cp->fd ||
	conn_tab[i]->ip_addr.s_addr != cp->ip_addr.s_addr)
      continue;
    if (strcmp(conn_tab[i]->alivename, cp->alivename) == 0) {
      DEBUGF(("ERROR: nb_complete_connection(%s)\n", cp->nodename));
      sync_bad_ix(i);
      break;
    }
  }
  
  if (strcmp(cp->conn_nodename, cp->nodename) != 0) {
    DEBUGF(("ERROR: cnct_node(%s)\n", cp->conn_nodename));
    tcp_error("** Ambigous nodename %s **\n", cp->conn_nodename);
    return bad_cp(cp, INVALID_HOSTNAME);
  }

  cp->status = (TICK_READ | TICK_WRITE | CONNECTED);
  if (cp->buf[0] == MY_HIDDEN_NAME)
    cp->status |= HIDDEN;
  
  (*winSock.setsockopt)(cp->fd, IPPROTO_TCP, TCP_NODELAY,
			(char *)&one, sizeof(one));
  cp->readState = IDLE;
  activate_name(cp->fd);

  if (cp->firstPend != NULL)	/* is there a message to send? */
    do_scheduled_write(cp);	/* Do the first writing here */

  return READY;
}


/*+++++++++++++++++++++++++++++++++++++
  Read the name of a peer node from the peer itself and set up the
  connection entry.
  +++++++++++++++++++++++++++++++++++++*/
static int
get_name(fd) 
     int fd;			/* The filedescriptor pointing out a
				  specific node. Must be in blocking
				  mode. */
     /* Returns 1 on success, -1 on failure. */
{
  int rval, i;
  char buf[BUFSIZ],  *cp;
  Connection *connp;
  static const int one = 1;
  
  if ((rval = read_fill(fd, buf, 2)) != 2) 
    CLOSE_RET(fd);
  if ((rval = get_int16(buf)) >= BUFSIZ)
    CLOSE_RET(fd);
  rval = get_int16(buf);
  if ((read_fill(fd, buf, rval)) != rval) 
    CLOSE_RET(fd);
  if ((buf[0] != MY_NAME) && (buf[0] != MY_HIDDEN_NAME))  
    CLOSE_RET(fd);
  
  /* Now we create Connection entry and install */
  /* all the data we have received */
  
  if ((connp = create_conn_entry(fd)) == NULL)
    CLOSE_RET(fd); 
  buf[rval] = '\0';
  
  cp = buf + 1 + sizeof(struct in_addr);
  cp = strchr(cp, '@');
  if (check_host_name(cp+1) == 0) 
    CLOSE_CLEAR(fd);
  
  *cp = '\0';
  strcpy(connp->alivename, buf + 1 + sizeof(struct in_addr));
  *cp = '@';
  strcpy(connp->nodename, buf + 1 + sizeof(struct in_addr));
  
  memcpy(&(connp->ip_addr), buf+1, sizeof(struct in_addr));
  
  /* Check that no entry with the same IP but different name exists */
  for (i=0; i < SOCKET_TAB_SIZE; i++) {
    if (!conn_tab[i] || conn_tab[i]->fd == fd || conn_tab[i] == listen_cp)
      continue;
    if (conn_tab[i]->ip_addr.s_addr != connp->ip_addr.s_addr)
      continue;
    if (strcmp(conn_tab[i]->alivename, connp->alivename) == 0) {
      sync_bad_ix(i);
      break;
    }
  }

  connp->status = (TICK_READ | TICK_WRITE | CONNECTED);
  if (buf[0] == MY_HIDDEN_NAME)
    connp->status |= HIDDEN;
  
  (*winSock.setsockopt)(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&one, sizeof(one));
  SET_NONBLOCKING(fd); 
  return(1);
}


/*+++++++++++++++++++++++++++++++++++++
  Send data to a socket as soon as it is possible without blocking.
  +++++++++++++++++++++++++++++++++++++*/
static int
nb_write(cp, buf, len)
     Connection *cp;		/* The connection to write to. */
     char *buf;			/* The data buffer. The data is
				  copied to an internal buffer before
				  the function call returns. */
     int len;			/* The number of bytes to write. */
     /* Returns 1 on success, -1 on failure. */
{
  int res;

  if (cp == NULL)
    return -1;

#ifdef DEBUG
  erl_debug("nb_write(fd 0x%x, len %d): [", cp->fd, len);
  erl_bin_write(buf, len, 15);
  erl_debug("]\n");
#endif
  
  if (fd_isset(cp, PENDING_WRITES)) {
    res = 0;
  } else {
    cp->status |= TICK_WRITE;
    if ((res = (*winSock.send)(cp->fd, buf, len, 0)) == len)
      return(1);
    
    if (res == SOCKET_ERROR) {
      if ((res = (*winSock.WSAGetLastError)()) != WSAEWOULDBLOCK) {
	DEBUGF(("nb_write failed w/ reason %d\n", res));
	return (-1);
      } else {
	res = 0;			/* Pretend we wrote 0 bytes */
      }
    }
  }

  if (add_write_entry(cp, buf+res, len-res) != 0)
    return -1;
  
  return init_scheduled_writes(cp);
}


/*+++++++++++++++++++++++++++++++++++++
  Make a note that we want to write to the connection, and tell Erlang
  to back off.
  +++++++++++++++++++++++++++++++++++++*/
static int
init_scheduled_writes(cp)
     Connection *cp;		/* The connection to write to. */
     /* Returns 1 on success, -1 on failure. */
{
  byte obuf[3];

  if (cp == NULL)
    return -1;
  
#ifdef DEBUG
  erl_debug("init_scheduled_writes(cp 0x%x  (fd 0x%x))\n", cp, cp->fd);
#endif
  /* Tell erlang to back off */
  put_int16(cp->erl_ix, obuf+1);
  *obuf = REAL_BUSY;
  driver_output(erlang_port, obuf, 3);
  
  fd_set(cp, PENDING_WRITES);
  return 1;
}

/*+++++++++++++++++++++++++++++++++++++
  Tell Erlang that it is okay to write to the connection again.
  +++++++++++++++++++++++++++++++++++++*/
static int
cancel_scheduled_writes(cp)
     Connection *cp;		/* The connection to write to. */
     /* Returns 1 on success, -1 on failure. */
{
  byte obuf[3];

  if (cp == NULL)
    return -1;
  
  put_int16(cp->erl_ix, obuf+1);
  *obuf = NOT_REAL_BUSY;
  driver_output(erlang_port, obuf, 3);
  return 1;
}


/*+++++++++++++++++++++++++++++++++++++
  Add an entry to the queue of pending writes for a specific
  connection.
  +++++++++++++++++++++++++++++++++++++*/
static int
add_write_entry(cp, buf, len)
     Connection *cp;		/* The connection to write to. */
     char *buf;			/* The data buffer. The data is
				  copied to an internal buffer before
				  the function call returns. */
     int len;			/* The number of bytes to write. */
     /* Returns 0 on success, -1 on failure. */
{
  Pend *p;

  if ((p = (Pend*) sys_alloc(len + sizeof(Pend))) == NULL) {
    DEBUGF(("add_write_entry(cp 0x%x, fd 0x%x, p 0x%x) failed.\n",
	    cp, cp->fd, p));
    return -1;
  }
  
  DEBUGF(("add_write_entry(cp 0x%x, fd 0x%x, p 0x%x, len %d)\n",
	  cp, cp->fd, p, len));

  memcpy(p->buf, buf, len);
  p->cpos = 0;
  p->remain = len;
  p->next = NULL;

  /* Add pending write to queue. */
  if (cp->firstPend)
    cp->lastPend->next = p;
  else
    cp->firstPend = p;
  cp->lastPend = p;

  return 0;
}


/*** Driver interface ***/  

/*+++++++++++++++++++++++++++++++++++++
  Driver interface function to initialize the TCP driver.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_init(void)
     /* Returns 0. */
{
  erlang_port = -1;
  return(0);
}

/*+++++++++++++++++++++++++++++++++++++
  Driver interface function to start the TCP driver.
  +++++++++++++++++++++++++++++++++++++*/
static long
tcp_start(port, buf) 
     int port;			/* The key used for communicating
				  with the Erlang system. */
     char *buf;			/* Not used. */
     /* The port argument, or -1 on failure. */
{
  /*   Start Winsock2 and initialize the table with active connections. */
  int i;
  WORD wVersionRequested;
  WSADATA wsaData;

  if (!tcp_lookup_functions())
    return -1;

  /* Start Winsock, version 2.0 (?) */
  wVersionRequested = MAKEWORD(2,0);
  if ((*winSock.WSAStartup)(wVersionRequested, &wsaData) != 0)
    return -1;
  if (LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 0) {
    (*winSock.WSACleanup)();
    return -1;
  }
  
  SOCKET_TAB_SIZE = sys_max_files();  /* Call direct here */
  if (erlang_port != -1 ||
      (conn_tab =
       (Connection **)sys_alloc(SOCKET_TAB_SIZE * sizeof (Connection*)))
      == NULL)
    return(-1);
  
  for (i=0; i<SOCKET_TAB_SIZE; i++)
    conn_tab[i] = NULL;

  erlang_port = port;
  mappersock = 0;
  listen_cp = NULL;
  (*winSock.gethostname)(thishostname, MAXHOSTLEN+1);
  return(port);
}


/*+++++++++++++++++++++++++++++++++++++
  Shuts the driver down; close all open connections and report their
  closing to the runtim system.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_stop(port)
     long port;			/* Not used. */
     /* Returns 0. */
{
  int i;

  erlang_port = -1;
  if (listen_cp)
    tcp_deselect(listen_cp);
  if (mappersock > 0) 
    do_closesocket(mappersock);
  for(i=0; i<SOCKET_TAB_SIZE; i++) {
    if(!conn_tab[i] || conn_tab[i] == listen_cp)
      continue;
    else
      (void)bad_cp(conn_tab[i], 0);
  }

  sys_free((char *)conn_tab);
  return(0);
}


/*+++++++++++++++++++++++++++++++++++++
  Does nothing.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_null(port, fd)
     long port;			/* Not used. */
     int fd;			/* Not used. */
     /* Returns 0. */
{
  /* This function is only present because the driver interface must
    provide a function to be called when it is possible to perform
    output on a filedescriptor. But this is really handled in
    tcp_dispatch(). */
  DEBUGF(("WARNING! tcp_null(%ld, 0x%d) called.\n", port, fd));
  return 0;
}


/*+++++++++++++++++++++++++++++++++++++
  Process commands from the runtime system.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_from_erlang(port, buf, count)
     long port;			/* Not used. */
     char *buf;			/* The command. */
     int count;			/* The length in bytes of the command. */
     /* Returns 0. */
{
  int len, i, res;
  int localPort;
  uint16 ix;
  char *mess;
  char *t, tmp[MAXHOSTLEN+1];
  unsigned char fourbytes[4];
  static int tick = 0;
  Connection *cp;
  
  t = buf;

#ifdef DEBUG
  erl_debug("tcp_from_erlang(port %d, cmd %d, count %d) [", port, buf[0], count);
  erl_bin_write(buf, count, 15);
  erl_debug("]\n");
#endif

  switch (*t) {
  case ALIVE_REQUEST:
    if (((localPort = get_and_open_local_port()) < 0) ||
	(pub(t+1, (uint16)localPort) < 0)) {
      rbuf[0] = ALIVE_REQUEST_ERROR;
      DEBUGF(("tcp_from_erlang(): ALIVE_REQUEST_ERROR\n"));
      driver_output(erlang_port, rbuf, 1);
      driver_failure(erlang_port, -1);
    }
    return(0);
  case NEW_CONNECTION:
    strcpy(tmp, t+1);
    
    DEBUGF(("Calling cnct_node(%s)...\n", tmp));
    
    cp = create_conn_entry(0);	/* XXX Check retval! But what to do if
				   NULL? Say "Bad name" isn't too
				   good... */

    mess = (t+1) + strlen(t+1) + 1; /* beyond the NUL sign */
    while (*mess == '\0')
      mess++;			/* might be padded  */

    if ((mess - buf) <= count) {
      *(mess-1) = PASS_THROUGH;
      len = count - (mess - buf) + 1;
      put_int32(len, mess-5);
      add_write_entry(cp, mess-5, len+4); /* Add to queue of pend writes */
    }

    res = cnct_node(tmp, cp);
#ifdef DEBUG   
    DEBUGF(("cnct_node(%s) -> %d\n", tmp, res));
    if (res != READY && res != CONTINUE)
      DEBUGF(("cnct_node failed.\n"));
#endif
    return 0;

  case PASS_THROUGH:
    ix = get_int16(t+1);
    if ((cp = conn_tab[ix])) {
      put_int32(count-4, t);
      t[4] = PASS_THROUGH;
      if (nb_write(cp, t, count) >= 0) /* Succeeded? */
	return 0;
    }
    bad_ix(ix);
    return(0);
    
  case BAD_CHANNEL:		/* Erlang is not pleased with a channel */
    ix = get_int16(t+1);
    bad_ix(ix);
    return 0;
    
  case TICK:  
    
    /* This will happen every 15 seconds (by default) */
    /* The idea here is that every 15 secs, we write a little */
    /* something on all fd's we haven't written anything on for */
    /* the last 15 secs */
    /* This will ensure that nodes that are nor responding due to */
    /* hardware errors (Or being suspended by means of ^Z) will */
    /* be considered to be down. If we do not want to have this  */
    /* we must start the net_kernel (in erlang) without its */
    /* ticker process, In that case this code will never run */

    /* And then every 60 seconds per fd we also check the fds and */
    /* close it if we havn't received anything on it for the */
    /* last 60 secs. If ticked == tick we havn't received anything */
    /* on this fd the last 60 secs. */

    /* The detection time interval is thus, by default, 45s < DT < 75s */

    /* A HIDDEN node is always (if not a pending write) ticked if */
    /* no TICK_READ exists as a hidden node only ticks then it receives */
    /* a TICK !! */
	    
#ifdef DEBUG
    erl_debug("READ/WRITE TICKS:");
    for (i=0; i< 10; i++)
      erl_debug(" %c%c",
		(!conn_tab[i] ? '_' : ((conn_tab[i]->status & TICK_WRITE) ? 'W' : '?')),
		(!conn_tab[i] ? '_' : ((conn_tab[i]->status & TICK_READ) ? 'R' : '?')));
    erl_debug("\n");
#endif

    tick++;
    put_int32(0, fourbytes);
    for(i=0; i<SOCKET_TAB_SIZE; i++) {
      if (!conn_tab[i] || conn_tab[i] == listen_cp) 
	continue;

	    if (!(conn_tab[i]->status & TICK_READ)) {
	        if (conn_tab[i]->ticked == tick) {
                    /* We found a dead fd  */
		    
		    tcp_error("** Node %s not responding **\n",
		              conn_tab[i]->nodename);
		    tcp_error("** Removing (timedout) connection **\n");
		    (void) bad_cp(conn_tab[i], 0);   /* Dead */
		    continue;
		}
		else if ((conn_tab[i]->status & HIDDEN) &&
			 (!fd_isset(conn_tab[i], PENDING_WRITES))) {
		  conn_tab[i]->status &= ~TICK_WRITE;   /* Clear our TICK write */	
		  if ((conn_tab[i]->status & CONNECTED) &&
		      (nb_write(conn_tab[i], fourbytes, 4) < 0))
		    (void)bad_cp(conn_tab[i], 0);
		  continue;
		}
	    }
	    else {
	      conn_tab[i]->ticked = tick;
	      conn_tab[i]->status &= ~TICK_READ;  /* Clear flag */
	    }


      if (conn_tab[i]->status & TICK_WRITE) {
	conn_tab[i]->status &= ~TICK_WRITE;   /* Clear */
	continue;
      }
      if (fd_isset(conn_tab[i], PENDING_WRITES)) 
	continue;
      conn_tab[i]->status &= ~TICK_WRITE;   /* Clear our TICK write */	
      if ((conn_tab[i]->status & CONNECTED) &&
	  (nb_write(conn_tab[i], fourbytes, 4) < 0))
	(void)bad_cp(conn_tab[i], 0);
    }
    
    if (tick == 4) 
      tick = 0;
    return(0);
  default:
    DEBUGF(("tcp_drv: unknown op code from erlang %c\n", *t));
    driver_failure(erlang_port, -1);
    return(0);
  }
}


/*+++++++++++++++++++++++++++++++++++++
  Check what operations are possible on the filedescriptor
  (implicitly) pointed out by the specified event, and take appropriate
  actions. This function is called by check_io() when an event supplied
  by this driver has been signalled.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_dispatch(port, event)
     long port;			/* Not used. */
     int event;			/* The event that has been signalled. */
     /* Returns 0 on success, -1 on failure. */
{
  int res, zero = 0;
  Connection *cp;
  WSANETWORKEVENTS netEv;
  
  cp = get_conn_entry_ev(event);
  if (!cp) {
    DEBUGF(("tcp_dispatch(event 0x%x): No corresponding connection found.\n", event));
    return -1;
  }
  
#ifdef MESSDEBUG
  erl_debug("tcp_dispatch(port %ld, event 0x%x (fd 0x%x))\n", port, event, cp->fd);
  erl_debug("cp->status:%s\n", status_str(cp->status));
#endif

  if ((*winSock.WSAEnumNetworkEvents)(cp->fd, cp->event, &netEv) != 0) {
    DEBUGF(("WSAEnumNetworkEvents() failed w/ reason %d\n",
	    (*winSock.WSAGetLastError)()));
    driver_failure(erlang_port, -1);
    return -1;
  }
  
#ifdef DEBUG
  {
    int i;
    erl_debug("lNetworkEvents: %lu [%s] : ", netEv.lNetworkEvents,
	      netw_events_str(netEv.lNetworkEvents));
    for (i=0; i<FD_MAX_EVENTS; i++)
      erl_debug(" %u", netEv.iErrorCode[i]);
    erl_debug("\n");
  }
#endif

  if (netEv.lNetworkEvents == 0)
    return 0;

  /* I won't bother checking the iErrorCode array here, as the errors
     will get caught anyway while attempting a read or an accept
     operation. */
  
  if (netEv.lNetworkEvents & FD_CLOSE)
    return bad_cp(cp, 0);

  if (netEv.lNetworkEvents & FD_ACCEPT) {
    struct in_addr zero_addr;
    zero_addr.s_addr = 0;
    new_connection(&zero_addr, 0, &zero);
    return 0;
  }
  
  if (netEv.lNetworkEvents & FD_WRITE) {
    /* Note! This means that writing is now possible, and will so be
       until a send operation returns with EWOULDBLOCK! */
    DEBUGF(("tcp_dispatch(fd==0x%x) : FD_WRITE , calling do_scheduled_write()\n", cp->fd));
    do_scheduled_write(cp);
  }
  
  if (netEv.lNetworkEvents & FD_READ) {
    if (fd_isset(cp, PENDING_READS)) { /* Unfinished business? */
      if (cp->remain <= 0) {
	/* XXX Is there a, er, more appropriate action to take here?
           Perhaps close the connection, or maybe just reset the
           troublesome connection parameters. */
	DEBUGF(("Pending read is set, but remain <= 0!! (fd 0x%x)\n", cp->fd));
      } else {
	switch (nb_read(cp, cp->remain)) {
	case CONTINUE:
	  /* Still not done... */
	  break;
	  
	case READY:
	  switch (cp->readState) {
	  case READING_HEADER:
	    res = nb_read_message(cp);
	    break;

	  case READING_MESSAGE:
	    res = message_to_erlang(cp);
	    break;

	  case GETTING_NAME_SZ:
	    res = nb_get_name(cp);
	    break;

	  case GETTING_NAME:
	    res = nb_complete_connection(cp);
	    break;

	  default:
	    DEBUGF(("Invalid state (%d) for connection 0x%x (fd 0x%x)\n",
		    cp->readState, cp, cp->fd));
	    res = READ_ERROR;	/* Hmmm... */
	  } /* swithc(cp->readState) */

	  /* We actually don't need to check res, because if something
	     went wrong, the connection has already been taken down,
	     so no further action is required. */
#ifdef DEBUG
	  if (res != READY && res != CONTINUE)
	    erl_debug("Caught error in tcp_dispatch: cp 0x%x, res %d\n", cp, res);
#endif
	  break;

	default:
	  /* OK, something went wrong. But the connection has already
	     been taken down, so no further action is required. */
	  DEBUGF(("tcp_dispatch: nb_read() failed\n"));
	  break;
	} /* switch(nb_read(..)) */
      }
    } /* Unfinished business? */
    else {
      /* OK, no pending read, start reading a new message. */
      read_header(cp);
    }
  } /* FD_READ set? */

  return(0);
}


/*+++++++++++++++++++++++++++++++++++++
  Perform as much as possible of the writes scheduled for a
  connection.
  +++++++++++++++++++++++++++++++++++++*/
static int
do_scheduled_write(cp)
     Connection *cp;		/* The connection for which output is possible. */
     /* Returns 0. */
{
  int res;
  Pend* curPend;

  curPend = cp->firstPend;
  
#ifdef DEBUG
  if (!curPend)
    DEBUGF(("tcp: No pending writes for connection 0x%x (fd 0x%x)\n", cp, cp->fd));
#endif
  
  while (curPend != NULL) {
    res = (*winSock.send)(cp->fd, curPend->buf + curPend->cpos,
			  curPend->remain, 0);
    DEBUGF(("Sent %d bytes (of %d)\n", res, curPend->remain));
    
    if (res == curPend->remain) { /* All gone? */
      /* Remove current entry and go on with the next. */
      cp->status |= TICK_WRITE;
      cp->firstPend = curPend->next;
      sys_free(curPend);	/* This gets rid of the buffer as well. */
      curPend = cp->firstPend;
      continue;
    } else if (res >= 1 && res < curPend->remain) {
      cp->status |= TICK_WRITE;
      curPend->cpos += res;
      curPend->remain -= res;
      /* We have to get WSAEWOULDBLOCK to re-enable the FD_WRITE
	 signalling. */
      continue;
    } else if (res == SOCKET_ERROR && GetLastError() == WSAEWOULDBLOCK) {
      /* Now we have to wait for FD_WRITE before attempting another
         write. */
	return 0;
    } else {
      DEBUGF(("tcp: do_scheduled_write failed on fd %d\n", cp->fd));
      DEBUGF(("     remain = %d , cpos = %d ", curPend->remain, curPend->cpos));
      return bad_cp(cp, 0);
    }
  } /* while() */

  /* Good, all pedning writes completed. */
  DEBUGF(("do_scheduled_write(cp 0x%x): Write queue flushed\n"));

  cancel_scheduled_writes(cp);

  fd_clr(cp, PENDING_WRITES);
  
  return 0;
}


/*+++++++++++++++++++++++++++++++++++++
  This routine is an interface to driver_select(), and manages the
  creation of events needed in the calls to driver_select().
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_select(cp, setMask, clearMask)
     Connection *cp;		/* The connection to monitor. */
     long setMask;		/* The events we want to begin checking for. */
     long clearMask;		/* The events we do not want to check for anymore. */
     /* Returns 1 on success, -1 on failure. */
{
  int call_driver_select;

#ifdef DEBUG
  DEBUGF(("tcp_select(0x%x (fd==0x%x) setMask = [%s], ", cp, 
	  (cp ? cp->fd : 0), netw_events_str(setMask)));
  DEBUGF(("clearMask = [%s]\n", netw_events_str(clearMask)));
#endif
  
  if (!cp || !cp->fd)
    return -1;
  
  /* Only call driver_select() once per event, unless we have cleared
     its entry by calling driver_select() with mode=off. */
  call_driver_select = (cp->eventMask == 0);

  cp->eventMask |= setMask;
  cp->eventMask &= ~clearMask;

  if ((cp->event == NULL) && ((cp->event = do_createevent()) == WSA_INVALID_EVENT)) {
    DEBUGF(("Error creating event: %d\n", (*winSock.WSAGetLastError)()));
    cp->event = NULL;
    return -1;
  }

  /* If cp->eventMask is zero, this removes the event from check_io(). */
  if (call_driver_select &&
      (driver_select(erlang_port, cp->event, DO_READ, cp->eventMask) < 0)) {
    DEBUGF(("tcp_select() failed calling driver_select()\n"));
    do_closeevent(cp->event);
    cp->event = NULL;
    return -1;
  }
  
  if (do_eventselect(cp->fd, (cp->eventMask ? cp->event : NULL),
				cp->eventMask) != 0) {
    DEBUGF(("tcp_select() failed in WSAES(): %d\n", (*winSock.WSAGetLastError)()));
    driver_select(erlang_port, cp->event, DO_READ, 0);
    do_closeevent(cp->event);
    cp->event = NULL;
    return -1;
  }
  
  return 1;
}


/*+++++++++++++++++++++++++++++++++++++
  Close the event associated with the specified connection, and order
  the emulator not to supervise that event.
  +++++++++++++++++++++++++++++++++++++*/
static int
tcp_deselect(cp)
     Connection *cp;		/* The connection to stop monitoring. */
     /* Returns -1 on failure. */
{
  int res;
  
  DEBUGF(("tcp_deselect(0x%x (fd==0x%x, event==0x%x))\n",
	  cp, (cp ? cp->fd : 0), (cp ? cp->event : 0)));
  
  if (!cp || !(cp->event))
    return -1;

  res = driver_select(erlang_port, cp->event, DO_READ, 0);
  do_closeevent(cp->event);
  cp->event = NULL;
  cp->eventMask = 0;		/* Necessary if we intend to do another
				 * tcp_select().
				 */
  return res;
}

static int do_closesocket(int fd)
{
  DEBUGF(("closesocket(0x%x)\n", fd));
  return (*winSock.closesocket)(fd);
}

/* Create a manual reset event with initial state
   non-signalled. */
static WSAEVENT do_createevent(void) 
{
  WSAEVENT hEvent;

  hEvent = (*winSock.WSACreateEvent)();
  DEBUGF(("CreateEvent() = 0x%x\n", hEvent));
  return hEvent;
}

static BOOL do_closeevent(WSAEVENT hEvent)
{
  DEBUGF(("CloseEvent(0x%x)\n", hEvent));
  return (*winSock.WSACloseEvent)(hEvent);
}

static BOOL do_setevent(WSAEVENT hEvent)
{
  DEBUGF(("SetEvent(0x%x)\n", hEvent));
  return (*winSock.WSASetEvent)(hEvent);
}

static BOOL do_resetevent(WSAEVENT hEvent)
{
  DEBUGF(("ResetEvent(0x%x)\n", hEvent));
  return (*winSock.WSAResetEvent)(hEvent);
}

static int do_eventselect(SOCKET fd, WSAEVENT hEvent, long iEvents)
{
  int res;

  DEBUGF(("EventSelect(fd=0x%x, event=0x%x, netw_events = [%s]\n", 
	  fd, hEvent, netw_events_str(iEvents)));
  res = (*winSock.WSAEventSelect)(fd, hEvent, iEvents);
  if (res != 0) {
    DEBUGF(("EventSelect error: %d\n", (*winSock.WSAGetLastError)()));
  }
  return res;
}
