/* send_expect.c
 
   The "send_expect" Tcl command.

   The base algorithm of the "send_expect" Network Expect command is
   based on the algorithm of the "sr" command of Scapy, the packet
   manipulation program written in Python by Philippe Biondi. There are
   some differences, specially that Network Expect's implementation
   stores sent and received packets in raw (binary) form, but the basics
   are the same.

   Copyright (C) 2007, 2008, 2009, 2010 Eloy Paris

   This is part of Network Expect.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include "includes.h"

#include <fcntl.h>
#include <glib.h>

#include <epan/epan.h>
#include <wiretap/pcap-encap.h> /* For wtap_pcap_encap_to_wtap_encap() */

#include "nexp_speakers.h"
#include "util-tcl.h"

#if __FreeBSD__ || __APPLE__
#define SELECT_LIES
#endif

/* 
 * NExp_SendExpectObjCmd() creates a thread to inject stimuli. This thread
 * needs some data created by NExp_SendExpectObjCmd(). The following structure
 * is used to pass data from NExp_SendExpectObjCmd() to the injection thread
 * (called "send_main()"). NExp_SendExpectObjCmd() fills in the structure prior
 * to creation of the injection thread and when the thread is created a pointer
 * to the structure is passed to the thread.
 */
struct thread_data {
    struct sent_packet *sent_packets;
    int npackets;
    int npasses;
    int current_pass;
    int *pfds; /* Pipe file descriptors */
};

struct answer {
    struct payload packet; /* The actual packet */
    struct timeval ts; /* Time the answer was received */
    int ll_type; /* The link layer type */
};

struct sent_packet {
    GByteArray *hash; /* Hash of this packet */
    struct payload packet; /* The actual packet data and its length */
    int ll_type; /* The link layer type */
    struct timeval ts; /* When the packet was sent */
    int nanswers; /* Number of matching answers for this packet */
    struct answer *answers; /* Matching answers for this packet */
};

/*
 * List of listeners we'll monitor for answers to injected stimulus. The
 * maximum number of listeners in this list is just an arbitrary number.
 * I reckon most people will only use a handful of listeners at the most,
 * and dynamically managing the size of this list is not worth the trouble.
 */
static struct nexp_listener *llist[16];
static int nlisteners;
#define MAX_NUM_LISTENERS (sizeof(llist)/sizeof(llist[0]) )

static struct timeval timeout;
static int ntries;

static pcb_t pcb;

#define MULTIANSWER_VARNAME "multianswers"
static int multiflag; /* Used by the send_expect command: if != 0 then
			 accept multiple answers for the same injected
			 stimulus */

static void
usage(void)
{
    fprintf(stderr, "\
usage: send_expect [-speaker <speaker ID>] [-listener <listener ID>]\n\
          [-delay <secs>] [-rate <pps>] [-timeout <secs>] [-tries <ntries>]\n\
          <PDU definition>");
}

static int
process_options(Tcl_Interp *interp, int argc, Tcl_Obj * const *objv)
{
    int i, index;
    char *pdudef = NULL; 
    double t, delay;
    char errbuf[PDU_ERRBUF_SIZE];
    struct nexp_listener *l;
    static const char *options[] = {
	"-o", "-i", "-delay", "-rate", "-timeout", "-tries", NULL
    };
    enum options {
	OPT_SPEAKER, OPT_LISTENER, OPT_DELAY, OPT_RATE, OPT_TIMEOUT,
	OPT_TRIES
    };

    /*
     * Parse command-line arguments.
     */
    for (i = 1; i < argc && *Tcl_GetString(objv[i]) == '-'; i++) {
	if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0, &index)
	    != TCL_OK)
	    return -1;

	switch ( (enum options) index) {
	case OPT_SPEAKER:
	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-o speaker");
		goto error;
	    }

	    pcb.speaker = lookup_speaker(Tcl_GetString(objv[i]) );
	    if (!pcb.speaker) {
		nexp_error(interp, "No speaker named \"%s\". Use "
			   "\"spawn_network -info\" to find out existing "
			   "speakers.", Tcl_GetString(objv[i]) );
		goto error;
	    }
	    break;
	case OPT_LISTENER:
	    if (nlisteners >= (int) MAX_NUM_LISTENERS) {
		nexp_error(interp, "Exceeded maximum number (%d) of listeners",
			   MAX_NUM_LISTENERS);
		goto error;
	    }

	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-o listener");
		goto error;
	    }

	    l = lookup_listener(Tcl_GetString(objv[i]) );
	    if (!l) {
		nexp_error(interp, "No listener named \"%s\". Use "
			   "\"spawn_network -info\" to find out existing "
			   "listeners.", Tcl_GetString(objv[i]) );
		goto error;
	    }

	    if (l->type != LISTENER_LIVE) {
		nexp_error(interp, "The send_expect command only supports "
				   "live listeners.");
		goto error;
	    }

	    llist[nlisteners++] = l;
	    break;
	case OPT_DELAY:
	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-d seconds");
		goto error;
	    }

	    if (Tcl_GetDoubleFromObj(interp, objv[i], &delay) != TCL_OK)
		goto error;

	    pcb.delay.tv_sec = delay;
	    pcb.delay.tv_usec = (delay - pcb.delay.tv_sec)*1000000UL;
	    break;
	case OPT_RATE:
	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-r PPS");
		goto error;
	    }

	    /* Convert a packets-per-second rate to a usecs delay */
	    if (Tcl_GetDoubleFromObj(interp, objv[i], &delay) != TCL_OK)
		goto error;

	    if (delay == 0.0) {
		nexp_error(interp, "Rate can't be 0 packets per second.");
		goto error;
	    }

	    delay = 1/delay;

	    pcb.delay.tv_sec = delay;
	    pcb.delay.tv_usec = (delay - pcb.delay.tv_sec)*1000000UL;
	    break;
	case OPT_TIMEOUT:
	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-timeout seconds");
		goto error;
	    }

	    t = strtod(Tcl_GetString(objv[i]), NULL);
	    if (t < 0.0) {
		nexp_error(interp, "Timeout can't be negative");
		goto error;
	    }

	    timeout.tv_sec = t;
	    timeout.tv_usec = (t - timeout.tv_sec)*1000000UL;
	    break;
	case OPT_TRIES:
	    if (++i >= argc) {
		Tcl_WrongNumArgs(interp, 1, objv, "-tries seconds");
		goto error;
	    }

	    ntries = strtoul(Tcl_GetString(objv[i]), NULL, 0);
	    break;
	}
    }

    /*
     * We treat whatever is left on the command line, i.e. anything that
     * is not an option (anything that doesn't start with '-'), as a PDU
     * definition.
     */
    pdudef = copy_objv(argc - i, &objv[i]);
    if (!pdudef) {
	usage();
	return -1;
    }

#ifdef DEBUG
    printf("PDU definition = %s\n", pdudef);
#endif

    if ( (pcb.pdu = pb_parsedef(pdudef, errbuf) ) == NULL) {
	nexp_error(interp, "%s", errbuf);
	goto error;
    }

    pcb.def = pdudef;

    return 0;

error:
    if (pdudef)
	free(pdudef);

    return -1;
}

static void
fill_in_framedata(frame_data *fdata, const struct pcap_pkthdr *h, int ll_type)
{
    struct wtap_pkthdr phdr;

    phdr.len = h->len;
    phdr.caplen = h->caplen;
    phdr.pkt_encap = wtap_pcap_encap_to_wtap_encap(ll_type);
    phdr.ts.secs = h->ts.tv_sec;
    phdr.ts.nsecs = h->ts.tv_usec*1000;

    frame_data_init(fdata, 0 /* num */, &phdr, 0 /* offset */, 0 /* cum_bytes */);

    nstime_set_unset(&fdata->rel_ts);
    nstime_set_unset(&fdata->del_cap_ts);
    nstime_set_unset(&fdata->del_dis_ts);
}

/*
 * Builds PDUs described by the PDU Control Block (PCB) "pcb" and calculates
 * a hash for each built packet.
 */
static int
build_and_hash(Tcl_Interp *interp, struct sent_packet **sent_packets)
{
    int i, n;
    struct sent_packet *packets;
    int npackets;
    frame_data fdata;
    struct pcap_pkthdr pkthdr;

    /*
     * Calculate total number of packets that we will send.
     */
    npackets = pb_permutation_count(pcb.pdu);

    packets = xmalloc(sizeof(*packets)*npackets);

    /*
     * Build PDUs and calculate hashes.
     */
    for (n = i = 0; i < npackets; i++, n++) {
	memset(&packets[n], 0, sizeof(*packets) );

	packets[n].ll_type = !strcmp(pb_getname(pcb.pdu), "ether")
				? DLT_EN10MB : DLT_RAW;

	packets[n].packet.data = xmalloc(pb_len(pcb.pdu) );
	packets[n].packet.len = pb_build(pcb.pdu, packets[n].packet.data, NULL);

	pkthdr.caplen = packets[n].packet.len;
	pkthdr.len = pkthdr.caplen;
	gettimeofday(&pkthdr.ts, NULL);

	fill_in_framedata(&fdata, &pkthdr, packets[n].ll_type);

	packets[n].hash = pkt_hash(interp, packets[n].packet.data, &fdata);

	frame_data_cleanup(&fdata);

#ifdef DEBUG
	printf("sent pkt len = %u, packet data:\n", packets[n].packet.len);
	dump(packets[n].packet.data, packets[n].packet.len);

	printf("sent pkt hash len = %u, hash data:\n", packets[n].hash->len);
	dump(packets[n].hash->data, packets[n].hash->len);
#endif
    }

    *sent_packets = packets;

    return npackets;
}

/*
 * Injection thread.
 */
static void
send_main(ClientData clientData)
{
    struct thread_data *td;
    int i, n;

    td = clientData;

    if (vflag)
	printf("Begin stimulus injection (pass %d/%d)... ",
	       td->current_pass, td->npasses);

    for (n = i = 0; i < td->npackets; i++) {
	if (td->sent_packets[i].nanswers)
	    /*
	     * This packet has already been answered so there is no
	     * need to send it again.
	     */
	    continue;

	/*
	 * Handle inter-packet sending delay.
	 */
	if (timerisset(&pcb.delay) && timerisset(&pcb.speaker->ts) ) {
	    struct timeval now, later;

	    timeradd(&pcb.speaker->ts, &pcb.delay, &later);

	    gettimeofday(&now, NULL);
	    /*
	     * Some versions of Solaris (seen in Solaris 8) don't like
	     * using timercmp() with "<=" or ">=". Rearranging the operands
	     * and just using '>' or '<' works around this limitation.
	     */
	    while (timercmp(&later, &now, >) )
		gettimeofday(&now, NULL);
	}

	nexp_pdu_output(&pcb, td->sent_packets[i].packet.data,
			td->sent_packets[i].packet.len);

	/*
	 * Store the timestamp of when the packet was sent. Since we're
	 * doing this from the child these timestamps will need to be sent
	 * to the parent via some sort of IPC. We do this after we're done
	 * injecting all packets (see below.)
	 */
	gettimeofday(&td->sent_packets[i].ts, NULL);

	n++;
    }

    if (vflag)
	printf("done (sent %d %s)\n", n, n == 1 ? "packet" : "packets");

    /*
     * Write to the pipe to tell the parent that we are done
     * injecting packets. We write all the timestamps so the parent
     * can update the send timestamps of all packets.
     */
    for (i = 0; i < td->npackets; i++)
	write(td->pfds[1], &td->sent_packets[i].ts, sizeof(struct timeval) );

    close(td->pfds[1]);
}

#ifndef __APPLE__

/*
 * This implements the "send_expect" command using select(). This does not work
 * on all platforms since on some platforms select() does not work well, or is
 * completely broken, when used on BPF devices.
 */
static int
NExp_SendExpectObjCmd(ClientData clientData _U_, Tcl_Interp *interp, int argc,
		      Tcl_Obj * const *objv)
{
    int retval, cmd_retval = TCL_OK;
    int i, j, n, npackets = 0, nunanswered;
    struct sent_packet *sent_packets = NULL;
    fd_set rfds;
    int highest_fd;
    struct pcap_pkthdr pkthdr, sent_pkthdr;
    const u_char *pkt_data;
    GByteArray *hash;
    int pfds[2]; /* Pipe file descriptors */
    struct timeval timeout_time, curr_time;
    struct timeval time_remaining, *tremainingptr;
    int got_answer;
    int nanswers;
    int call_select, got_packet;
    int fd;
    int datalink_type;
    pcap_t *pd;
    frame_data rcvd_fdata, sent_fdata;
    size_t hsize_adj; /* Layer 2 header size adjustment */
    Tcl_Obj *sent_list, *received_list, *unanswered_list, *packet_obj;
    int isanswer;
    Tcl_ThreadId thread_id;
    struct thread_data td;
    int thread_status, thread_created = 0;

    nlisteners = 0;
    timeout.tv_sec = 1; timeout.tv_usec = 0;
    ntries = 1;

    memset(&pcb, 0, sizeof(pcb) );

    retval = process_options(interp, argc, objv);
    if (retval == -1) {
	pcb_destroy(&pcb);
	return TCL_ERROR;
    }

    /*
     * Make sure we have a listener we can use to listen for answers.
     */
    if (nlisteners == 0) {
	llist[0] = lookup_listener(nexp_get_var(interp,
						LISTENER_SPAWN_ID_VARNAME) );
	if (!llist[0]) {
	    nexp_error(interp, "Can't find a suitable listener! Use "
			       "spawn_network to create one.");
	    pcb_destroy(&pcb);
	    return TCL_ERROR;
	}

	nlisteners++;
    }

    /*
     * Make sure the PCB has an assigned speaker: if the user has not
     * explicitely specified a speaker, then we use the default speaker,
     * which is referred to by name via the Tcl variable "speaker_id"
     * (SPEAKER_SPAWN_ID_VARNAME).
     */
    if (!pcb.speaker) {
	pcb.speaker = lookup_speaker(nexp_get_var(interp,
					SPEAKER_SPAWN_ID_VARNAME) );
	if (!pcb.speaker) {
	    nexp_error(interp, "Can't find a suitable speaker! Use "
			       "spawn_network to create one.");
	    pcb_destroy(&pcb);
	    return TCL_ERROR;
	}
    }

    /*
     * We now can build the packets and calculate their hash.
     */
    npackets = build_and_hash(interp, &sent_packets);

    /* Main send/expect loop */
    for (nunanswered = npackets, n = 0; nunanswered != 0 && n < ntries; n++) {
	/*
	 * Stimulus injection is done by a thread. Matching sent packets and
	 * responses is done by the parent thread.
	 */

	/*
	 * The following pipe is used for communication between the
	 * parent thread and the child thread. The most important use
	 * of this pipe is when the child, after injecting all the stimuli,
	 * writes to the parent all the timestamps of when the packets
	 * where sent. The parent then reads these timestamps and stores
	 * them in an appropriate place so they can use to measure thinks
	 * like round-trip times.
	 *
	 * The pipe is also used for the child to let the parent know that
	 * stimulus injection has been completed. This is then used by the
	 * parent to determine when a timeout has occurred, i.e. no responses
	 * received after a certain time.
	 */
	if (pipe(pfds) == -1) {
	    nexp_error(interp, "Couldn't open pipe: %s", strerror(errno) );
	    goto error;
	}

#if 0
        /* Block SIGCHLD */
	sigemptyset(&sigmask);
	sigaddset(&sigmask, SIGCHLD);
	if (sigprocmask(SIG_BLOCK, &sigmask, NULL) == -1) {
	    nexp_error(interp, "sigprocmask(): %s", strerror(errno) );
	    goto error;
	}
#endif

	td.sent_packets = sent_packets;
	td.npackets = npackets;
	td.npasses = ntries;
	td.current_pass = n;
	td.pfds = pfds;

	retval = Tcl_CreateThread(&thread_id, send_main, &td,
				  TCL_THREAD_STACK_DEFAULT,
				  TCL_THREAD_JOINABLE);
	if (retval != TCL_OK) {
	    nexp_error(interp, "Tcl_CreateThread() error");
	    goto error;
	}

	thread_created++;

	/*
	 * Receive loop. We loop while we still have unanswered packets
	 * and while we have not timed out.
	 */
	for (timerclear(&timeout_time),
	     call_select = 0 /* Work around FreeBSD silliness */;

	     /*
	      * This condition looks a bit complicated, but it is not.
	      * The first condition to remain in the loop is that we still
	      * have unanswered packets. Then we need to check whether
	      * we have had a timeout, but we only do this check if the
	      * child is finished sending packets, since the timer for
	      * the timeout does not start to run until all packets have
	      * been sent.
	      */
	     nunanswered != 0
	     && (!timerisset(&timeout_time)
		 || (timerisset(&timeout_time)
		     && timercmp(&timeout_time, &curr_time, >) ) );

	    gettimeofday(&curr_time, NULL) ) {

	    FD_ZERO(&rfds);
	    highest_fd = 0;

	    /*
	     * Arm the file descriptor set for select(). We first arm the 
	     * file descriptors of the listeners, and then the file descriptor
	     * for the pipe (if necessary)...
	     */
	    for (i = 0; i < nlisteners; i++) {
		fd = listener_fd(llist[i]);

		FD_SET(fd, &rfds);

		if (fd > highest_fd)
		    highest_fd = fd;
	    }

#ifdef SELECT_LIES
	    /*
	     * On certain platforms (FreeBSD, for example) select() will
	     * return "file descriptor ready for reading" only once, even if
	     * there is more than one packet in the queue. The purpose of these
	     * games with "call_select" is that we don't call select() unless
	     * we have tried to read from all file descriptors and no data was
	     * available anywhere. Since the file descriptor were set to
	     * non-blocking mode when the listener was created, it is okay to
	     * call pcap_next() without knowing that there'll be data
	     * available; we will not block.
	     */
	    if (!call_select)
		/*
		 * Bypass preparations for the select() call as well as the
		 * call to select().
		 */
		goto bypass_select;
#endif

	    if (!timerisset(&timeout_time) ) {
		FD_SET(pfds[0], &rfds);
		if (pfds[0] > highest_fd)
		    highest_fd = pfds[0];
	    }

	    /*
	     * Set the timeout value for select(). This value depends on
	     * whether the child has finished injecting stimulus. If this
	     * is the case the select() timeout value will be the time left
	     * until the send_expect timeout (set with the -timeout switch)
	     * is reached. If the child has not finished injecting stimulus
	     * then we don't want select to timeout.
	     */
	    if (!timerisset(&timeout_time) )
		tremainingptr = NULL;
	    else {
		struct timeval now;

		gettimeofday(&now, NULL);
		time_remaining = timeout_time;
		timersub(&time_remaining, &now, &time_remaining);
		tremainingptr = &time_remaining;
	    }

	    retval = select(highest_fd + 1, &rfds, NULL, NULL, tremainingptr);
	    if (retval < 0) {
		nexp_error(interp, "select(): %s (errno = %d)",
			   strerror(errno), errno);
		goto error;
	    } else if (retval == 0)
		/*
		 * There was a timeout. Let's break out of the loop
		 * and try again.
		 */
		break;

	    /*
	     * Data is now available to be read (retval > 0). We need to
	     * determine what file descriptor is the one that is ready. We
	     * check first the pipe file descriptor and then all listeners...
	     */

	    if (FD_ISSET(pfds[0], &rfds) ) {
		/*
		 * Child is done injecting stimulus; we need to calculate
		 * the time at which a timeout will occur so we can use
		 * that as a condition to exit the receive loop.
		 */
		gettimeofday(&timeout_time, NULL);
		timeradd(&timeout_time, &timeout, &timeout_time);
	    }

#ifdef SELECT_LIES
bypass_select:
#endif

	    for (i = 0; nunanswered != 0 && i < nlisteners; i++) {
		fd = listener_fd(llist[i]);

		if (!FD_ISSET(fd, &rfds) )
		    continue;

		/*
		 * Located a file descriptor with packets ready to be
		 * read. We now need to read the packet, calculate its
		 * hash, and try to match the answer with a packet
		 * that was sent.
		 */

		/*
		 * No need to check for listener type - it's always
		 * LISTENER_LIVE.
		 */
		pd = llist[i]->_live.pd;

		pkt_data = pcap_next(pd, &pkthdr);
		if (!pkt_data)
		    /* FIXME: need to check for error here? */
		    continue;

		/*
		 * This will be used to determine whether we need to call
		 * select() next time (workaround for what seems to be a
		 * FreeBSD silliness.)
		 */
		got_packet = 1;

		/*
		 * Determine the layer 2 and layer 3 PDUs as well as the size
		 * of the layer 2 PDU header and the length of the layer 2 PDU
		 * hash. This information depends on the datalink type, and is
		 * used later to determine in what part of the received packet
		 * we start matching.
		 *
		 * (there's no need to check for listener type - it's always
		 * LISTENER_LIVE.)
		 */
		datalink_type = llist[i]->_live.datalink_type;

		fill_in_framedata(&rcvd_fdata, &pkthdr, datalink_type);

		hash = pkt_hash(interp, pkt_data, &rcvd_fdata);

		frame_data_cleanup(&rcvd_fdata);

#ifdef DEBUG
		printf("recv'ed pkt len = %u, packet data:\n", pkthdr.len);
		dump(pkt_data, pkthdr.len);

		printf("recv'd pkt hash len = %u, hash data:\n", hash->len);
		dump(hash->data, hash->len);
#endif

		/*
		 * The logic here is that if the PDU we sent was not injected
		 * at layer 2 then the received packet is different from the
		 * sent packet in terms of layers in the protocol stack, i.e.
		 * the received packet is always at layer 2, but the sent
		 * packet is at layer 3. The follow code calculates the right
		 * adjustments.  We only check for Ethernet right now because
		 * it's the only layer 2 protocol we can inject.
		 */
		if (!strcmp(pb_getname(pcb.pdu), "ether") ) {
		    /*
		     * There's no need to adjust anything since the packet that
		     * was sent was injected at layer 2.
		     */
		    hsize_adj = 0;
		} else {
		    switch (datalink_type) {
		    case DLT_EN10MB:
			/*
			 * The hash of the Ethernet layer is the ethertype
			 * stored in a uint16_t. See packets/hash.c
			 */
			hsize_adj = sizeof(uint16_t);
			break;
		    case DLT_LINUX_SLL:
			/*
			 * We do not compute a hash for the Linux "cooked"
			 * mode encapsulation. See man pcap(3) for details
			 * on Linux "cooked" mode.
			 */
			hsize_adj = 0;
			break;
		    default:
			printf("Don't know how to handle received packet "
			       "with data link type %d (%s).\n",
			       datalink_type,
			       pcap_datalink_val_to_name(datalink_type) );
			goto error;
		    }
		}

		/*
		 * We have a hash for a received packet. Now we need to look in
		 * the table of sent packets for a packet that matches the hash
		 * of the received packet.
		 */
		for (got_answer = j = 0; !got_answer && j < npackets; j++) {
		    if (sent_packets[j].nanswers)
			/*
			 * This packet has already been answered. Nothing
			 * to do; let's keep looking.
			 */
			continue;

		    if (hash->len - hsize_adj != sent_packets[j].hash->len
			|| memcmp(hash->data + hsize_adj,
				  sent_packets[j].hash->data,
				  hash->len - hsize_adj) )
			/* Hash doesn't match; keep looking */
			continue;

#ifdef DEBUG
		    printf("Hash of sent and recv'ed packets is the same!\n");
#endif

		    /*
		     * We have a hash match! Let's see if it is
		     * an answer...
		     */

		    sent_pkthdr.caplen = sent_packets[j].packet.len;
		    sent_pkthdr.len = sent_pkthdr.caplen;
		    gettimeofday(&sent_pkthdr.ts, NULL);

		    fill_in_framedata(&sent_fdata, &sent_pkthdr,
				      sent_packets[j].ll_type);

		    isanswer = pkt_isanswer(interp, pkt_data, &rcvd_fdata,
					    sent_packets[j].packet.data,
					    &sent_fdata);
		    
		    frame_data_cleanup(&sent_fdata);

		    if (!isanswer)
			/*
			 * Hash does match, but packet is not an answer;
			 * keep looking
			 */
			continue;

		    /*
		     * Finally, we've been able to determine that
		     * the received packet is a response to a packet
		     * that we sent.
		     */

		    nanswers = sent_packets[j].nanswers;

		    /*
		     * Save the received packet. It is saved in the "answers"
		     * array of sent_packets[].
		     */

		    sent_packets[j].answers = xrealloc(sent_packets[j].answers,
			sizeof(struct answer)*(nanswers + 1) );

		    sent_packets[j].answers[nanswers].packet.len = pkthdr.len;
		    sent_packets[j].answers[nanswers].packet.data
			= xmalloc(pkthdr.len);
		    memcpy(sent_packets[j].answers[nanswers].packet.data,
			   pkt_data, pkthdr.len);
		    /* Time when the packet was captured */
		    sent_packets[j].answers[nanswers].ts = pkthdr.ts;
		    sent_packets[j].answers[nanswers].ll_type = datalink_type;

		    sent_packets[j].nanswers++;

		    nunanswered--; /* One less we're waiting an answer for */

		    if (vflag)
			putchar('!');
		    got_answer = 1;
		} /* Hash search loop */

		g_byte_array_free(hash, TRUE);

		if (!got_answer)
		    if (vflag)
			putchar('.');
	    } /* Listener sweep loop */

	    /*
	     * If we read at least one packet then we need to bypass select()
	     * again. We only call select() if we tried to read from all file
	     * descriptors and no data was available anywhere.
	     */
	    call_select = !got_packet;
	} /* Receive loop */

	if (vflag)
	    putchar('\n');

#if 0
	/* Install old signal handler */
	if (sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == -1) {
	    nexp_error(interp, "sigprocmask(): %s", strerror(errno) );
	    goto error;
	}
#endif

	/*
	 * Read the timevals containing the time when the packets were sent.
	 * These timevals are written to the main thread by the stimuli
	 * injection thread via the pipe that we create before launching the
	 * stimuli injection thread.
	 */
	for (i = 0; i < npackets; i++) {
	    retval = read(pfds[0], &sent_packets[i].ts,
			  sizeof(struct timeval) );
	    if (retval == 0) {
		/* EOF */
		nexp_error(interp, "read(): got EOF. This should not happen!");
		goto error;
	    } else if (retval == -1) {
		/* read() error */
		nexp_error(interp, "read(): %s", strerror(errno) );
		goto error;
	    } else if (retval != sizeof(struct timeval) ) {
		/* We got less than what we asked for */
		nexp_error(interp, "read() read less than requested. This "
				   "should not happen!");
		goto error;
	    }
	}

	close(pfds[0]);

	/*
	 * Need to wait for the sending thread even though we know it has
	 * finished to avoid a memory leak, per Tcl_JoinThread()'s
	 * documentation.
	 */
	Tcl_JoinThread(thread_id, &thread_status);
    } /* Main send/expect loop */

    /*
     * Done sending and receiving packets. Now we need to create the
     * "_" array, i.e. _(sent), _(received), and _(unanswered).
     */

    sent_list = Tcl_NewListObj(0, NULL);
    received_list = Tcl_NewListObj(0, NULL);
    unanswered_list = Tcl_NewListObj(0, NULL);

    /*
     * For efficiency this loop should be located within the main
     * send/expect loop above. However, I don't think the added complexity of
     * having it there is worth the few extra cycles that I reckon we'd gain.
     */
    for (nanswers = 0, i = 0; i < npackets; i++) {
	struct pcap_pkthdr pkthdr;

	pkthdr.caplen = sent_packets[i].packet.len;
	pkthdr.len = pkthdr.caplen;
	pkthdr.ts = sent_packets[i].ts;

	packet_obj = Tcl_NewPacketObj(sent_packets[i].packet.data, &pkthdr,
				      sent_packets[i].ll_type);

	if (sent_packets[i].nanswers) {
	    nanswers += sent_packets[i].nanswers;

	    Tcl_ListObjAppendElement(interp, sent_list, packet_obj);

	    if (!multiflag) {
		/*
		 * Each element of _(received) is a packet * object.
		 */
		pkthdr.caplen = sent_packets[i].answers[0].packet.len;
		pkthdr.len = pkthdr.caplen;
		pkthdr.ts = sent_packets[i].answers[0].ts;

		packet_obj = Tcl_NewPacketObj(
					sent_packets[i].answers[0].packet.data,
					&pkthdr,
					sent_packets[i].answers[0].ll_type);
		Tcl_ListObjAppendElement(interp, received_list, packet_obj);
	    } else {
		/*
		 * Each element of _(received) is a list. This is currently
		 * not implemented.
		 */
	    }
	} else {
	    /*
	     * No answers received for this packet -> add it to the list
	     * of unanswered packets.
	     */
	    Tcl_ListObjAppendElement(interp, unanswered_list, packet_obj);
	}
    }

    Tcl_SetVar2Ex(interp, "_", "sent", sent_list, 0);
    Tcl_SetVar2Ex(interp, "_", "received", received_list, 0);
    Tcl_SetVar2Ex(interp, "_", "unanswered", unanswered_list, 0);

    if (vflag)
	printf("Sent %d packets; received %d answers; %d unanswered\n",
	       npackets, nanswers, npackets - nanswers);

cleanup:
    pcb_destroy(&pcb);

    for (i = 0; i < npackets; i++) {
	free(sent_packets[i].packet.data);
	g_byte_array_free(sent_packets[i].hash, TRUE);

	for (j = 0; j < sent_packets[i].nanswers; j++)
	    free(sent_packets[i].answers[j].packet.data);

	if (sent_packets[i].nanswers)
	    free(sent_packets[i].answers);
    }

    free(sent_packets);

    return cmd_retval;

error:
    cmd_retval = TCL_ERROR;

    if (thread_created)
	/*
	 * Wait for sending thread to finish. We need this because parameters for
	 * the sending thread are stored in this function stack, so if we return
	 * without waiting for the sending thread to finish then we're in trouble.
	 */
	Tcl_JoinThread(thread_id, &thread_status);

    goto cleanup;
}

#else /* ifndef __APPLE__ */

/*
 * This implements the "send_expect" command without using select(). This
 * is needed for platforms where select() does not work well, or is
 * completely broken, when used on BPF devices. MacOS X is an example of
 * such a platform. The disadvantage of not using select() is higher
 * CPU utilization since code is always running to do polling.
 */
static int
NExp_SendExpectObjCmd(ClientData clientData _U_, Tcl_Interp *interp, int argc,
		      Tcl_Obj * const *objv)
{
    int retval;
    int i, j, n, npackets = 0, nunanswered;
    struct sent_packet *sent_packets = NULL;
    struct pcap_pkthdr pkthdr, sent_pkthdr;
    const u_char *pkt_data;
    GByteArray *hash;
    int pfds[2]; /* Pipe file descriptors */
    struct timeval timeout_time, curr_time;
    int got_answer;
    int nanswers;
    int datalink_type;
    pcap_t *pd;
    frame_data rcvd_fdata, sent_fdata;
    size_t hsize_adj; /* Layer 2 header size adjustment */
    Tcl_Obj *sent_list, *received_list, *unanswered_list, *packet_obj;
    int isanswer;
    Tcl_ThreadId thread_id;
    struct thread_data td;

    nlisteners = 0;
    timeout.tv_sec = 1; timeout.tv_usec = 0;
    ntries = 1;

    memset(&pcb, 0, sizeof(pcb) );

    retval = process_options(interp, argc, objv);
    if (retval == -1)
	goto error;

    /*
     * Make sure we have a listener we can use to listen for answers.
     */
    if (nlisteners == 0) {
	llist[0] = lookup_listener(nexp_get_var(interp,
						LISTENER_SPAWN_ID_VARNAME) );
	if (!llist[0]) {
	    nexp_error(interp, "Can't find a suitable listener! Use "
			       "spawn_network to create one.");
	    goto error;
	}

	nlisteners++;
    }

    /*
     * Make sure the PCB has an assigned speaker: if the user has not
     * explicitely specified a speaker, then we use the default speaker,
     * which is referred to by name via the Tcl variable "speaker_id"
     * (SPEAKER_SPAWN_ID_VARNAME).
     */
    if (!pcb.speaker) {
	pcb.speaker = lookup_speaker(nexp_get_var(interp,
					SPEAKER_SPAWN_ID_VARNAME) );
	if (!pcb.speaker) {
	    nexp_error(interp, "Can't find a suitable speaker! Use "
			       "spawn_network to create one.");
	    goto error;
	}
    }

    /*
     * We now can build the packets and calculate their hash.
     */
    npackets = build_and_hash(interp, &sent_packets);

    /* Main send/expect loop */
    for (nunanswered = npackets, n = 0; nunanswered != 0 && n < ntries; n++) {
	/*
	 * Stimulus injection is done by a thread. Matching sent packets and
	 * responses is done by the parent thread.
	 */

	/*
	 * The following pipe is used for communication between the
	 * parent thread and the child thread. The most important use
	 * of this pipe is when the child, after injecting all the stimuli,
	 * writes to the parent all the timestamps of when the packets
	 * where sent. The parent then reads these timestamps and stores
	 * them in an appropriate place so they can use to measure thinks
	 * like round-trip times.
	 *
	 * The pipe is also used for the child to let the parent know that
	 * stimulus injection has been completed. This is then used by the
	 * parent to determine when a timeout has occurred, i.e. no responses
	 * received after a certain time.
	 */
	if (pipe(pfds) == -1) {
	    nexp_error(interp, "Couldn't open pipe: %s", strerror(errno) );
	    goto error;
	}

#if 0
        /* Block SIGCHLD */
	sigemptyset(&sigmask);
	sigaddset(&sigmask, SIGCHLD);
	if (sigprocmask(SIG_BLOCK, &sigmask, NULL) == -1) {
	    nexp_error(interp, "sigprocmask(): %s", strerror(errno) );
	    goto error;
	}
#endif

	td.sent_packets = sent_packets;
	td.npackets = npackets;
	td.npasses = ntries;
	td.current_pass = n;
	td.pfds = pfds;

	retval = Tcl_CreateThread(&thread_id, send_main, &td,
				  TCL_THREAD_STACK_DEFAULT,
				  TCL_THREAD_NOFLAGS);
	if (retval != TCL_OK) {
	    nexp_error(interp, "Tcl_CreateThread() error");
	    goto error;
	}

	/*
	 * We do not use select() because of its brokenness on this
	 * OS so we need to non-blocking.
	 */
	fcntl(pfds[0], F_SETFL, O_NONBLOCK);

	/*
	 * Receive loop. We loop while we still have unanswered packets
	 * and while we have not timed out.
	 */
	for (timerclear(&timeout_time);

	     /*
	      * This condition looks a bit complicated, but it is not.
	      * The first condition to remain in the loop is that we still
	      * have unanswered packets. Then we need to check whether
	      * we have had a timeout, but we only do this check if the
	      * child is finished sending packets, since the timer for
	      * the timeout does not start to run until all packets have
	      * been sent.
	      */
	     nunanswered != 0
	     && (!timerisset(&timeout_time)
		 || (timerisset(&timeout_time)
		     && timercmp(&timeout_time, &curr_time, >) ) );

	    gettimeofday(&curr_time, NULL) ) {


	    if (!timerisset(&timeout_time) ) {
		/*
		 * Read the first of the timevals containing the time when the
		 * packets were sent. We read the remaining timevals below,
		 * outside of the reception loop. These timevals are written to
		 * the main thread by the stimuli injection thread via the pipe
		 * that we create before launching the stimuli injection
		 * thread.
		 */
		retval = read(pfds[0], &sent_packets[0].ts,
			      sizeof(struct timeval) );
		if (retval == 0) {
		    /* EOF */
		    nexp_error(interp, "read(): got EOF. This should not happen!");
		    goto error;
		} else if (retval == -1) {
		    /*
		     * read() error. Ignore if the error is EWOULDBLOCK since that
		     * just means that the child is not done injecting stimuli.
		     */
		    if (errno != EWOULDBLOCK) {
			nexp_error(interp, "read(): error %d (%s)", errno,
				   strerror(errno) );
			goto error;
		    }
		} else if (retval != sizeof(struct timeval) ) {
		    /* We got less than what we asked for */
		    nexp_error(interp, "read() read less than requested. This "
				       "should not happen!");
		    goto error;
		} else {
		    /*
		     * We got the first timeval. This means that the child is
		     * done injecting stimulus and we can calculate the time at
		     * which a timeout will occur so we can use that as a
		     * condition to exit the receive loop.
		     */
		    gettimeofday(&timeout_time, NULL);
		    timeradd(&timeout_time, &timeout, &timeout_time);
		}
	    }


	    /* Listener sweep loop */
	    for (i = 0; nunanswered != 0 && i < nlisteners; i++) {

		/*
		 * No need to check for listener type - it's always
		 * LISTENER_LIVE.
		 */
		pd = llist[i]->_live.pd;

		pkt_data = pcap_next(pd, &pkthdr);
		if (!pkt_data)
		    /* FIXME: need to check for error here? */
		    continue;

		/*
		 * Determine the layer 2 and layer 3 PDUs as well as the size
		 * of the layer 2 PDU header and the length of the layer 2 PDU
		 * hash. This information depends on the datalink type, and is
		 * used later to determine in what part of the received packet
		 * we start matching.
		 *
		 * (there's no need to check for listener type - it's always
		 * LISTENER_LIVE.)
		 */
		datalink_type = llist[i]->_live.datalink_type;

		fill_in_framedata(&rcvd_fdata, &pkthdr, datalink_type);

		hash = pkt_hash(interp, pkt_data, &rcvd_fdata);

		frame_data_cleanup(&rcvd_fdata);

#ifdef DEBUG
		printf("recv'ed pkt len = %u, packet data:\n", pkthdr.len);
		dump(pkt_data, pkthdr.len);

		printf("recv'd pkt hash len = %u, hash data:\n", hash->len);
		dump(hash->data, hash->len);
#endif

		/*
		 * The logic here is that if the PDU we sent was not injected
		 * at layer 2 then the received packet is different from the
		 * sent packet in terms of layers in the protocol stack, i.e.
		 * the received packet is always at layer 2, but the sent
		 * packet is at layer 3. The follow code calculates the right
		 * adjustments.  We only check for Ethernet right now because
		 * it's the only layer 2 protocol we can inject.
		 */
		if (!strcmp(pb_getname(pcb.pdu), "ether") ) {
		    /*
		     * There's no need to adjust anything since the packet that
		     * was sent was injected at layer 2.
		     */
		    hsize_adj = 0;
		} else {
		    switch (datalink_type) {
		    case DLT_EN10MB:
			/*
			 * The hash of the Ethernet layer is the ethertype
			 * stored in a uint16_t. See packets/hash.c
			 */
			hsize_adj = sizeof(uint16_t);
			break;
		    case DLT_LINUX_SLL:
			/*
			 * We do not compute a hash for the Linux "cooked"
			 * mode encapsulation. See man pcap(3) for details
			 * on Linux "cooked" mode.
			 */
			hsize_adj = 0;
			break;
		    default:
			printf("Don't know how to handle received packet "
			       "with data link type %d (%s).\n",
			       datalink_type,
			       pcap_datalink_val_to_name(datalink_type) );
			goto error;
		    }
		}

		/*
		 * We have a hash for a received packet. Now we need to look in
		 * the table of sent packets for a packet that matches the hash
		 * of the received packet.
		 */
		for (got_answer = j = 0; !got_answer && j < npackets; j++) {
		    if (sent_packets[j].nanswers)
			/*
			 * This packet has already been answered. Nothing
			 * to do; let's keep looking.
			 */
			continue;

		    if (hash->len - hsize_adj != sent_packets[j].hash->len
			|| memcmp(hash->data + hsize_adj,
				  sent_packets[j].hash->data,
				  hash->len - hsize_adj) )
			/* Hash doesn't match; keep looking */
			continue;

#ifdef DEBUG
		    printf("Hash of sent and recv'ed packets is the same!\n");
#endif

		    /*
		     * We have a hash match! Let's see if it is
		     * an answer...
		     */

		    sent_pkthdr.caplen = sent_packets[j].packet.len;
		    sent_pkthdr.len = sent_pkthdr.caplen;
		    gettimeofday(&sent_pkthdr.ts, NULL);

		    fill_in_framedata(&sent_fdata, &sent_pkthdr,
				      sent_packets[j].ll_type);

		    isanswer = pkt_isanswer(interp, pkt_data, &rcvd_fdata,
					    sent_packets[j].packet.data,
					    &sent_fdata);

		    frame_data_cleanup(&sent_fdata);

		    if (!isanswer)
			/*
			 * Hash does match, but packet is not an answer;
			 * keep looking
			 */
			continue;

		    /*
		     * Finally, we've been able to determine that
		     * the received packet is a response to a packet
		     * that we sent.
		     */

		    nanswers = sent_packets[j].nanswers;

		    /*
		     * Save the received packet. It is saved in the "answers"
		     * array of sent_packets[].
		     */

		    sent_packets[j].answers = xrealloc(sent_packets[j].answers,
			sizeof(struct answer)*(nanswers + 1) );

		    sent_packets[j].answers[nanswers].packet.len = pkthdr.len;
		    sent_packets[j].answers[nanswers].packet.data
			= xmalloc(pkthdr.len);
		    memcpy(sent_packets[j].answers[nanswers].packet.data,
			   pkt_data, pkthdr.len);
		    /* Time when the packet was captured */
		    sent_packets[j].answers[nanswers].ts = pkthdr.ts;
		    sent_packets[j].answers[nanswers].ll_type = datalink_type;

		    sent_packets[j].nanswers++;

		    nunanswered--; /* One less we're waiting an answer for */

		    if (vflag)
			putchar('!');
		    got_answer = 1;
		} /* Hash search loop */

		g_byte_array_free(hash, TRUE);

		if (!got_answer)
		    if (vflag)
			putchar('.');
	    } /* Listener sweep loop */

	} /* Receive loop */

	if (vflag)
	    putchar('\n');

#if 0
	/* Install old signal handler */
	if (sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == -1) {
	    nexp_error(interp, "sigprocmask(): %s", strerror(errno) );
	    goto error;
	}
#endif

	/*
	 * Read the rest of the timevals containing the time when the packets
	 * were sent.  Remember that we read the first of these timevals in the
	 * reception loop above.
	 */
	for (i = 1; i < npackets; i++) {
	    retval = read(pfds[0], &sent_packets[i].ts,
			  sizeof(struct timeval) );
	    if (retval == 0) {
		/* EOF */
		nexp_error(interp, "read(): got EOF. This should not happen!");
		goto error;
	    } else if (retval == -1) {
		/* read() error */
		nexp_error(interp, "read(): %s", strerror(errno) );
		goto error;
	    } else if (retval != sizeof(struct timeval) ) {
		/* We got less than what we asked for */
		nexp_error(interp, "read() read less than requested. This "
				   "should not happen!");
		goto error;
	    }
	}

	close(pfds[0]);
    } /* Main send/expect loop */

    /*
     * Done sending and receiving packets. Now we need to create the
     * "_" array, i.e. _(sent), _(received), and _(unanswered).
     */

    sent_list = Tcl_NewListObj(0, NULL);
    received_list = Tcl_NewListObj(0, NULL);
    unanswered_list = Tcl_NewListObj(0, NULL);

    /*
     * For efficiency this loop should be located within the main
     * send/expect loop above. However, I don't think the added complexity of
     * having it there is worth the few extra cycles that I reckon we'd gain.
     */
    for (nanswers = 0, i = 0; i < npackets; i++) {
	struct pcap_pkthdr pkthdr;

	pkthdr.caplen = sent_packets[i].packet.len;
	pkthdr.len = pkthdr.caplen;
	pkthdr.ts = sent_packets[i].ts;

	packet_obj = Tcl_NewPacketObj(sent_packets[i].packet.data, &pkthdr,
				      sent_packets[i].ll_type);

	if (sent_packets[i].nanswers) {
	    nanswers += sent_packets[i].nanswers;

	    Tcl_ListObjAppendElement(interp, sent_list, packet_obj);

	    if (!multiflag) {
		/*
		 * Each element of _(received) is a packet * object.
		 */
		pkthdr.caplen = sent_packets[i].answers[0].packet.len;
		pkthdr.len = pkthdr.caplen;
		pkthdr.ts = sent_packets[i].answers[0].ts;

		packet_obj = Tcl_NewPacketObj(
					sent_packets[i].answers[0].packet.data,
					&pkthdr,
					sent_packets[i].answers[0].ll_type);
		Tcl_ListObjAppendElement(interp, received_list, packet_obj);
	    } else {
		/*
		 * Each element of _(received) is a list. This is currently
		 * not implemented.
		 */
	    }
	} else {
	    /*
	     * No answers received for this packet -> add it to the list
	     * of unanswered packets.
	     */
	    Tcl_ListObjAppendElement(interp, unanswered_list, packet_obj);
	}
    }

    Tcl_SetVar2Ex(interp, "_", "sent", sent_list, 0);
    Tcl_SetVar2Ex(interp, "_", "received", received_list, 0);
    Tcl_SetVar2Ex(interp, "_", "unanswered", unanswered_list, 0);

    if (vflag)
	printf("Sent %d packets; received %d answers; %d unanswered\n",
	       npackets, nanswers, npackets - nanswers);

    /*
     * Clean up.
     */

    pcb_destroy(&pcb);

    for (i = 0; i < npackets; i++) {
	free(sent_packets[i].packet.data);
	g_byte_array_free(sent_packets[i].hash, TRUE);

	for (j = 0; j < sent_packets[i].nanswers; j++)
	    free(sent_packets[i].answers[j].packet.data);

	if (sent_packets[i].nanswers)
	    free(sent_packets[i].answers);
    }

    free(sent_packets);

    return TCL_OK;

error:
    return TCL_ERROR;
}

#endif /* ifndef __APPLE__ */

static struct nexp_cmd_data cmd_data[] = {
    {"send_expect", NExp_SendExpectObjCmd, NULL, 0, 0},

    {NULL, NULL, NULL, 0, 0}
};

void
nexp_init_send_expect_cmd(Tcl_Interp *interp)
{
    /*
     * The global variable multiflag and the Tcl variable "multianswers" will
     * always be in sync. We use Tcl_TraceVar() to accomplish this.
     *
     * There is no way to change the default via a CLI switch.
     */
    setup_tcl_vartrace(interp, TCLVAR_INT, MULTIANSWER_VARNAME, &multiflag);

    nexp_create_commands(interp, cmd_data);
}
