/* ``The contents of this file are subject to the Erlang Public License,
 * Version 1.0, (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.erlang.org/EPL1_0.txt
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 * 
 * The Original Code is Erlang-4.7.3, December, 1998.
 * 
 * The Initial Developer of the Original Code is Ericsson Telecom
 * AB. Portions created by Ericsson are Copyright (C), 1998, Ericsson
 * Telecom AB. All Rights Reserved.
 * 
 * Contributor(s): ______________________________________.''
 */
/*
 * Author: Bjorn Gustavsson
 * Based on code by: Bogumil Hausman, Tony Rogvall
 */
#include "sys.h"
#include "config.h"
#include "global.h"
#include "erl_process.h"
#include "error.h"
#include "bif.h"		/* MUST BE INCLUDED BEFORE db.h!!! */
#include "db.h"
#include "big.h"
#include "beam_opcodes.h"

/*
 * XXX This should go to a header file together with the
 * companion macros in "beam_load.c"
 */

#if defined(NO_JUMP_TABLE)
#  define OpCase(OpCode)    case op_##OpCode: lb_##OpCode
#  define CountCase(OpCode) case op_count_##OpCode
#  define OpCode(OpCode)    ((uint32*)op_##OpCode)
#  define Goto(Rel) {Go = (int)(Rel); goto emulator_loop;}
#  define LabelAddr(Addr) &&##Addr
#else
#  define OpCase(OpCode)    lb_##OpCode
#  define CountCase(OpCode) lb_count_##OpCode
#  define Goto(Rel) goto *(Rel)
#  define LabelAddr(Addr) &&##Addr
#  define OpCode(OpCode)  (&&lb_##OpCode)
#endif

#define is_both_small(X, Y) (((X) & (Y) & TAGMASK) == SMALL)

#define MY_IS_SSMALL(x) (((unsigned) (((x) >> (BODY-1)) + 1)) < 2)

#define MAX(x, y) (((x) > (y)) ? (x) : (y))

/*
 * We reuse some of fields in the save area in the process structure.
 * This is safe to do, since the this space is only active used when
 * the process is switched out.
 *
 * Reason for resue: The temporary big numbers must be in memory
 * below 256Mb, not on the stack, to be able to have a tagged pointer
 * them.  Using static variables would not be thread-safe.
 */
#define TMP_BIG1(p) ((p)->def_arg_reg)
#define TMP_BIG2(p) ((p)->def_arg_reg+2)
#define REDS_IN(p)  ((p)->def_arg_reg[5])

/*
 * Add a byte offset to a pointer to uint32.  This is useful when the
 * the loader has precalculated a byte offset.
 */
#define ADD_BYTE_OFFSET(ptr, offset) \
   ((uint32 *) (((unsigned char *)ptr) + (offset)))

/* We don't check the range if an ordinary switch is used */
#ifdef NO_JUMP_TABLE
#define VALID_INSTR(IP) (0 <= (sint32)(IP) && ((sint32)(IP) < (NUMBER_OF_OPCODES*2+10)))
#else
#define VALID_INSTR(IP) \
   ((sint32)LabelAddr(emulator_loop) <= (sint32)(IP) && \
    (sint32)(IP) < (sint32)LabelAddr(end_emulator_loop))
#endif /* NO_JUMP_TABLE */

#define SET_CP(p, ip) \
   ASSERT(VALID_INSTR(*(ip))); \
   (p)->cp = (ip)

#define SET_I(ip) \
   ASSERT(VALID_INSTR(* (uint32 *)(ip))); \
   I = (ip)

#define StoreResult(Res) \
    if (I_bis == NULL) { r(0) = (Res); } else { *I_bis = (Res); }

#define StoreBifResult(Dst, Result) \
    I_bis = (uint32 *) Arg(Dst); \
    switch (tag_val_def((uint32)I_bis)) { \
    case X_REG_DEF: xb(unsigned_val((uint32)I_bis)) = (Result); Next((Dst)+1); \
    case Y_REG_DEF: yb(unsigned_val((uint32)I_bis)) = (Result); Next((Dst)+1); \
    default: r(0) = (Result); Next((Dst)+1); \
    }

#define ClauseFail() goto lb_jump_j

#define Badmatch(Term) { \
    c_p->fvalue = (Term); \
    goto lb_jump_j; \
}

#define BadmatchSearch(Term) { \
    c_p->fvalue = (Term); \
    goto find_func_info; \
}

#define SAVE_CP(X)		*(X) = make_cp(c_p->cp)
#define RESTORE_CP(X)		SET_CP(c_p, cp_ptr_val(*(X)))

#define ISCATCHEND(instr) ((uint32 *) *(instr) == OpCode(catch_end_y))
#define ISFUNCBEGIN(A)	  ((uint32 *)*((uint32 *)A) == OpCode(func_info_aaI))

uint32* ready;
uint32* em_call_error_handler;
void** beam_ops;
uint32* beam_exit;
uint32* beam_apply_op;
extern int count_instructions;

#define SWAPIN \
    HTOP = c_p->htop; \
    E = c_p->stop

#define SWAPOUT \
    c_p->htop = HTOP; \
    c_p->stop = E

/*
 * handle registers x(N), local variables y(N) in local frame,
 * N is 0,1,...
 */
#define xb(N) (*(uint32 *) (((unsigned char *)reg) + (N)))
#define yb(N) (*(uint32 *) (((unsigned char *)E) + (N)))
#define x(N) reg[N]
#define y(N) E[N]
#define r(N) x##N

/*
 * Makes sure that there are StackNeed + HeapNeed + 1 words available
 * on the combined heap/stack segment, then allocates StackNeed + 1
 * words on the stack and saves CP.
 *
 * M is number of live registers to preserve during garbage collection
 */

#define AH(StackNeed, HeapNeed, M, L) \
     ASSERT(c_p->htop <= E && E <= c_p->stack); \
     tmp_arg1 = (StackNeed) + CP_SIZE; \
     if (E - (tmp_arg1 + (HeapNeed)) < HTOP) { \
           SWAPOUT; \
           reg[0] = r(0); \
           FCALLS -= do_gc2(c_p, tmp_arg1 + (HeapNeed), reg, (M)); \
           r(0) = reg[0]; \
           SWAPIN; \
     } \
     E -= tmp_arg1; \
     SAVE_CP(E); \
     ASSERT(c_p->htop <= E && E <= c_p->stack);

/*
 * Like the AH macro, but allocates no additional heap space.
 */

#define A(StackNeed, M, L) AH(StackNeed, 0, M, L)

#define D(N) \
     RESTORE_CP(E); \
     E += N + CP_SIZE; \
     ASSERT(c_p->htop <= E && E <= c_p->stack); \

/*
 * Check if Nh words of heap are available and garb if necessary.
 * M is number of active argument register to be preserved.
 */
#define TH(Nh, M, L) \
    ASSERT(c_p->htop <= E && E <= c_p->stack); \
    if (E < (HTOP + (Nh))) { \
       SWAPOUT; \
       reg[0] = r(0); \
       FCALLS -= do_gc2(c_p, (Nh), reg, (M)); \
       r(0) = reg[0]; \
       SWAPIN; \
    } \

/* initialize local variable with address Pointer 'y(N)' */
#define I(N) make_blank(yb(N))

/*
 * Check that we haven't used the reductions and jump to function pointed to by
 * the I register.  If we are out of reductions, do a context switch.
 */

#define DispatchMacro() \
     I_bis = (uint32 *) *I; \
     if (FCALLS > 0) { \
        FCALLS--; \
        Goto(I_bis); \
     } else { \
	goto context_switch; \
     }

#ifdef DEBUG
/*
 * To simplify breakpoint setting, put the code in one place only and jump to it.
 */
#  define Dispatch() goto do_dispatch
#else
/*
 * Inline for speed.
 */
#  define Dispatch() DispatchMacro()
#endif

#define BF_self_0(R) R = c_p->id

void trace_bif(Process* p, BifFunction bif, ...);

#define Arg(N)       I[N+1]
#define Next(N) \
    I += N + 1; \
    ASSERT(VALID_INSTR(*I)); \
    Goto(*I)
#define Fetch(N) I_bis = (uint32 *)*(I + N + 1)
#define NextF(N) \
    I += N + 1; \
    ASSERT(VALID_INSTR(I_bis)); \
    Goto(I_bis)

#define FetchAlt(N) t0 = (uint32 *)*(I + N + 1)
#define NextFAlt(N) \
    I += N + 1; \
    ASSERT(VALID_INSTR(t0)); \
    Goto(t0)

/* N is number of arguments, M is position they start in code, starting
 * from 0
 */

#define GetR(pos, tr) \
   tr = Arg(pos); \
   switch (tag_val_def(tr)) { \
   case R_REG_DEF: tr = r(0); break; \
   case X_REG_DEF: tr = xb(reg_number(tr)); break; \
   case Y_REG_DEF: ASSERT(reg_number(tr) > 0); tr = yb(reg_number(tr)); break; \
   }

#define GetDest(pos, tr) \
   i = Arg(pos); \
   switch (tag_val_def(i)) { \
   case R_REG_DEF: tr = NULL; break; \
   case X_REG_DEF: tr = &(xb(reg_number(i))); break; \
   case Y_REG_DEF: \
        ASSERT(reg_number(i) > 0); \
	tr = &(yb(reg_number(i))); \
        break; \
   }

#define GetArg1(M) \
   GetR((M), tmp_arg1)

#define GetArg2(M) \
   GetR((M), tmp_arg1); \
   GetR((M)+1, tmp_arg2)

#define GetArg3(M) \
   GetR((M), tmp_arg1); \
   GetR((M)+1, tmp_arg2); \
   GetR((M)+2, tmp_arg3)

/*
 * Note: FetchArgs must assign tmp_arg1 last, because it used to unpack packed
 * operands.
 */

#define FetchArgs(S1, S2, Dst) \
    I_bis = (uint32 *) Dst; \
    tmp_arg2 = (S2); \
    tmp_arg1 = (S1)

#define PutList(H, T, Dst) \
   HTOP[0] = (H); HTOP[1] = (T); \
   Dst = make_list(HTOP); HTOP += 2

#define PutListGenDest(H, T, dstp) \
    HTOP[0] = (H); HTOP[1] = (T); \
    if ((dstp) == NULL) { r(0) = make_list(HTOP); } else { *(dstp) = make_list(HTOP); } \
    HTOP += 2

#define Move(src, dst) dst = (src)

#define Move2(src1, dst1, src2, dst2) dst1 = (src1); dst2 = (src2)

#define MoveGenDest(src, dstp) \
   if ((dstp) == NULL) { r(0) = (src); } else { *(dstp) = src; }

#define MoveReturn(Src, Dest) \
    (Dest) = (Src); \
    I = c_p->cp; \
    ASSERT(VALID_INSTR(*c_p->cp)); \
    Goto(*I)

#define DeallocateReturn(Deallocate) \
    tmp_arg1 = (Deallocate); \
    SET_I(cp_ptr_val(*E)); \
    E = ADD_BYTE_OFFSET(E, tmp_arg1); \
    ASSERT(c_p->htop <= E && E <= c_p->stack); \
    Goto(*I)

#define MoveDeallocateReturn(Src, Dest, Deallocate) \
    (Dest) = (Src); \
    DeallocateReturn(Deallocate)

#define MoveCall(Src, Dest, CallDest, Size) \
    (Dest) = (Src); \
    SET_CP(c_p, I+Size+1); \
    SET_I((uint32 *) CallDest); \
    Dispatch();

#define MoveCallLast(Src, Dest, CallDest, Deallocate) \
    (Dest) = (Src); \
    RESTORE_CP(E); \
    E = ADD_BYTE_OFFSET(E, (Deallocate)); \
    ASSERT(c_p->htop <= E && E <= c_p->stack); \
    SET_I((uint32 *) CallDest); \
    Dispatch();

#define GetList(Src, H, T) \
   t0 = ptr_val(Src); \
   H = CAR(t0); \
   T = CDR(t0)

#define GetTupleElement(Src, Element, Dest) \
    (Dest) = (* (uint32 *) (((unsigned char *) ptr_val(Src)) + (Element)))

#define GetTupleElement2(Src, El1, El2, Dst1, Dst2) \
    (Dst1) = (* (uint32 *) (((unsigned char *) ptr_val(Src)) + (El1))); \
    (Dst2) = (* (uint32 *) (((unsigned char *) ptr_val(Src)) + (El2)))

#define PutTuple(Arity, Src, Dest) \
     ASSERT(is_arity_value(Arity)); \
     Dest = make_tuple(HTOP); \
     HTOP[0] = (Arity); \
     HTOP[1] = (Src); \
     HTOP += 2

#define Put(Word) *HTOP++ = (Word)

#define PutFloat(Float1, Float2, Dest) \
    Dest = make_float(HTOP); \
    HTOP[0] = make_thing(2); \
    HTOP[1] = Float1; \
    HTOP[2] = Float2; \
    HTOP += 3;

#define PutBignum(TaggedWord, Dest) \
    ASSERT(is_thing(TaggedWord)); \
    Dest = make_big(HTOP); \
    *HTOP++ = TaggedWord

#define Equal(X, Y, Action) if (X != Y) { Action; }

#define MakeRef(Dst) Dst = make_ref_0(c_p)

#define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; }

#define IsInteger(Src, Fail) if (is_not_integer(Src)) { Fail; }

#define IsAtom(Src, Fail) if (is_not_atom(Src)) { Fail; }

#define IsIntegerAllocate(Src, Need, Alive, Fail, Label) \
    if (is_not_integer(Src)) { Fail; } \
    A(Need, Alive, allocate##Label)

#define IsNil(Src, Fail) if (is_not_nil(Src)) { Fail; }

#define IsList(Src, Fail) if (is_not_list(Src) && is_not_nil(Src)) { Fail; }

#define IsNonemptyList(Src, Fail) if (is_not_list(Src)) { Fail; }

#define IsNonemptyListAllocate(Src, Need, Alive, Fail, Label) \
    if (is_not_list(Src)) { Fail; } \
    A(Need, Alive, allocate##Label)

#define IsNonemptyListTestHeap(Src, Need, Alive, Fail, Label) \
    if (is_not_list(Src)) { Fail; } \
    TH(Need, Alive, testheap##Label)

#define IsTuple(X, Action) if (is_not_tuple(X)) Action

#define IsTupleOfArity(Src, Arity, Fail) \
    if (is_not_tuple(Src) || *ptr_val(Src) != Arity) { \
        Fail; \
    }

#define IsBinary(Src, Fail) if (is_not_binary(Src)) { Fail; }

#define IsPort(Src, Fail) if (is_not_port(Src)) { Fail; }
#define IsPid(Src, Fail) if (is_not_pid(Src)) { Fail; }
#define IsRef(Src, Fail) if (is_not_refer(Src)) { Fail; }

static uint32 call_error_handler(Process* p, Export* exp, uint32* reg);
static void terminate_process(Process* p, uint32 code, uint32* reg);
static uint32
apply(Process* p, uint32 module, uint32 function, uint32 args, uint32* reg);
static uint32 do_mixed_plus(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_mixed_minus(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_mixed_times(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_mixed_div(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_int_div(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_int_rem(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_band(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_bor(Process* p, uint32 arg1, uint32 arg2);
static uint32 do_bxor(Process* p, uint32 arg1, uint32 arg2);
static uint32 mixed_eq(uint32 arg1, uint32 arg2);
static uint32 mixed_ge(uint32 arg1, uint32 arg2);
static void trace_call_or_ret(Process* c_p, uint32* fi, uint32* reg, uint32 what);


void
init_emulator(void)
{
    process_main(NULL, -1);
}

/*
 * On certain platforms, make sure the main variables really are placed
 * in registers.
 */

#if defined(__GNUC__) && defined(sparc)
#  define REG_x0 asm("%l0")
#  define REG_xregs asm("%l1")
#  define REG_htop asm("%l2")
#  define REG_stop asm("%l3")
#  define REG_I asm("%l4")
#  define REG_fcalls asm("%l5")
#else
#  define REG_x0
#  define REG_xregs
#  define REG_htop
#  define REG_stop
#  define REG_I
#  define REG_fcalls
#endif

int process_main(c_p, reds)
    Process* c_p;
    int reds;
{
    /*
     * X register zero; also called r(0)
     */
    register uint32 x0 REG_x0;

    /* Pointer to X registers: x(1)..x(N); reg[0] is used when doing GC,
     * in all other cases x0 is used.
     */
    register uint32* reg REG_xregs;

    /*
     * Top of heap (next free location); grows upwards.
     */
    register uint32* HTOP REG_htop;

    /* Stack pointer.  Grows downwards; points
     * to last item pushed (normally a saved
     * continuation pointer).
     */
    register uint32* E REG_stop;

    /*
     * Pointer to next threaded instruction.
     */
    register uint32 *I REG_I;

    /* Number of reductions left.  This function
     * returns to the scheduler when FCALLS reaches zero.
     */
    register sint32 FCALLS REG_fcalls;
    
    register uint32 *I_bis;	/* Prefetched pointer to C code for next
				 * instruction.
				 */
    register uint32* t0;

    /*
     * Temporaries used for picking up arguments for instructions.
     */
    uint32 tmp_arg1;
    uint32 tmp_arg2;
    uint32 tmp_arg3;

    int i;
    Export* exp;
    BifFunction bf;

    sint32 ires;		/* Signed temporary. */
    uint32 save_reg[MAX_REG];	/* X register -- not used directly, but
				 * through 'reg', because using it directly
				 * needs two instructions on a SPARC,
				 * while using it through reg needs only
				 * one.
				 */

#ifndef NO_JUMP_TABLE
    static void* opcodes[] = { DEFINE_OPCODES };
    static void* counting_opcodes[] = { DEFINE_COUNTING_OPCODES };
#else
    int Go;
#endif
    /*
     * Note: In this function, we attempt to place rarely executed code towards
     * the end of the function, in the hope that the cache hit rate will be better.
     * The initialization code is only run once, so it is at the very end.
     *
     * Note: c_p->arity must be set to reflect the number of useful terms in
     * c_p->arg_reg before *returning* from this function.  *Inside* this function,
     * there is no need to set it (do_gc2() no longer includes c_p->arg in its
     * rootset; only do_gc() does that).
     */

    if (reds < 0) {
	goto init_emulator;
    }

    reg = save_reg;
    t0 = c_p->arg_reg;
    for (i = c_p->arity - 1; i > 0; i--) {
	reg[i] = t0[i];
    }

    /*
     * We put the original reduction count in the process structure, to reduce
     * the code size (referencing a field in a struct through a pointer stored
     * in a register gives smaller code than referencing a global variable).
     */
    REDS_IN(c_p) = reds;
    FCALLS = REDS_IN(c_p);

    I_bis = c_p->call;
    r(0) = c_p->arg_reg[0];
    SET_I(c_p->i);
    SWAPIN;
    Goto(I_bis);

#if defined(DEBUG) || defined(NO_JUMP_TABLE)
 emulator_loop:
#endif

#ifdef NO_JUMP_TABLE
    switch (Go) {
#endif
#include "beam_hot.h"

 OpCase(i_plus_j):
    FetchAlt(1);
    if (is_both_small(tmp_arg1, tmp_arg2)) {
	i = signed_val(tmp_arg1) + signed_val(tmp_arg2);
	ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i));
	if (MY_IS_SSMALL(i)) {
	    tmp_arg1 = make_small(i);
	    StoreResult(tmp_arg1);
	    NextFAlt(1);
	}
    }
    tmp_arg1 = do_mixed_plus(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto lb_Cl_error;

 OpCase(i_minus_j):
    FetchAlt(1);
    if (is_both_small(tmp_arg1, tmp_arg2)) {
	i = signed_val(tmp_arg1) - signed_val(tmp_arg2);
	ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i));
	if (MY_IS_SSMALL(i)) {
	    tmp_arg1 = make_small(i);
	    StoreResult(tmp_arg1);
	    NextFAlt(1);
	}
    }
    tmp_arg1 = do_mixed_minus(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto lb_Cl_error;

 OpCase(i_is_lt_f):
    if (tmp_arg1 == tmp_arg2 || mixed_ge(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(i_is_ge_f):
    if (tmp_arg1 != tmp_arg2 && !mixed_ge(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(i_is_eq_f):
    if (tmp_arg1 != tmp_arg2 && !mixed_eq(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(i_is_ne_f):
    if (tmp_arg1 == tmp_arg2 || mixed_eq(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(i_is_eq_exact_f):
    if (tmp_arg1 != tmp_arg2 && !eq(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(is_eq_exact_body):
    Fetch(0);
    if (tmp_arg1 == tmp_arg2 || eq(tmp_arg1, tmp_arg2)) {
	NextF(0);
    }
    BadmatchSearch(tmp_arg1);

 OpCase(call_only_p):
    SET_I((uint32 *) Arg(0));
    Dispatch();

 OpCase(call_last_pP):
    RESTORE_CP(E);
    E = ADD_BYTE_OFFSET(E, Arg(1));
    ASSERT(c_p->htop <= E && E <= c_p->stack);
    SET_I((uint32 *) Arg(0));
    Dispatch();

 OpCase(call_p):
    SET_CP(c_p, I+2);
    SET_I((uint32 *) Arg(0));
    Dispatch();

 OpCase(call_ext_last_eP):
    RESTORE_CP(E);
    E = ADD_BYTE_OFFSET(E, Arg(1));
    ASSERT(c_p->htop <= E && E <= c_p->stack);

    /*
     * Note: The pointer to the export entry is never NULL; if the module
     * is not loaded, it points to code which will invoke the error handler
     * (see lb_call_error_handler below).
     */
    SET_I(((Export *) Arg(0))->address);
    Dispatch();

 OpCase(call_ext_e):
    SET_CP(c_p, I+2);
    SET_I(((Export *) Arg(0))->address);
    Dispatch();

 OpCase(init_y):
    Fetch(1); make_blank(yb(Arg(0))); NextF(1);

 OpCase(return):
    SET_I(c_p->cp);
    Goto(*I);

 OpCase(allocate_II):
    Fetch(2); A(Arg(0),Arg(1),_lb_A); NextF(2);

 OpCase(allocate_zero_II):
    Fetch(2);
    i = Arg(0);
    A(i, Arg(1), _lb_A_zero);
    for (t0 = E+1; i > 0; t0++, i--) {
	make_blank(*t0);
    }
    NextF(2);

 OpCase(allocate_heap_III):
    Fetch(3); AH(Arg(0), Arg(1), Arg(2), _lb_allocate_III); NextF(3);

 OpCase(allocate_heap_zero_III):
    Fetch(3);
    i = Arg(0);
    AH(i, Arg(1), Arg(2), _lb_allocate_zero_III);
    for (t0 = E+1; i > 0; t0++, i--) {
	make_blank(*t0);
    }
    NextF(3);

 OpCase(test_heap_II):
    Fetch(2); TH(Arg(0), Arg(1), lb_test_heap); NextF(2);

 OpCase(test_heap_1_move_Iy):
    /*
     * Shortcut for: test_heap Need 1, move r(0) yb(Reg) (common after calls)
     */
    Fetch(2);
    TH(Arg(0), 1, lb_testHeap_1_move_ry);
    yb(Arg(1)) = r(0);
    NextF(2);

 OpCase(test_heap_1_put_list_Iy):
    Fetch(2);
    TH(Arg(0), 1, lb_testHeap_1_putList_ryr);
    PutList(yb(Arg(1)), r(0), r(0));
    NextF(2);

 OpCase(put_string_IId):
    {
      unsigned char* s;

      tmp_arg2 = Arg(0);		/* Length. */
      tmp_arg1 = NIL;
      for (s = (unsigned char *) Arg(1); tmp_arg2 > 0; s--, tmp_arg2--) {
	  PutList(make_small(*s), tmp_arg1, tmp_arg1);
      }
      StoreBifResult(2, tmp_arg1);
    }

#define GTp(Pointer, Arity, Action) \
    if (*(ptr_val(Pointer)) != Arity) Action

 OpCase(test_arity_fxA): Fetch(3); GTp(xb(Arg(1)),Arg(2),goto lb_jump_j); NextF(3);
 OpCase(test_arity_frA): Fetch(2); GTp(r(0),Arg(1),goto lb_jump_j); NextF(2);
 OpCase(test_arity_fyA): Fetch(3); GTp(yb(Arg(1)),Arg(2),goto lb_jump_j); NextF(3);

 OpCase(self_r):   Fetch(0); BF_self_0(r(0)); NextF(0);
 OpCase(self_x):   Fetch(1); BF_self_0(xb(Arg(0))); NextF(1);
 OpCase(self_y):   Fetch(1); BF_self_0(yb(Arg(0))); NextF(1);

#define BF_node_0(R) R = this_node

 OpCase(node_r):   Fetch(0); BF_node_0(r(0)); NextF(0);
 OpCase(node_x):   Fetch(1); BF_node_0(xb(Arg(0))); NextF(1);

    /*
     * Send is almost a standard BIF/2, with the following exceptions:
     *    1) It cannot be traced.
     *	  2) The result is always returned in r(0).
     *	  3) There is no pointer to the send_2 function stored in
     *       the instruction.
     */
 OpCase(i_send):
 send_rescheduled:
    r(0) = send_2(c_p, tmp_arg1, tmp_arg2);
    if (r(0)) {
	Next(0);
    } else if (c_p->freason == RESCHEDULE) {
	c_p->arity = 2;
	c_p->call = OpCode(reschedule_send);
	goto suspend;
    } else if (c_p->freason == TRAP) {
	SET_CP(c_p, I+1);
	SET_I(export_list(unsigned_val(c_p->fvalue))->address);
	r(0) = tmp_arg1;
	x(1) = tmp_arg2;
	Dispatch();
    }
    goto find_func_info2;

 OpCase(tl_jsd):
    GetArg1(1);
    if (is_not_list(tmp_arg1)) {
	goto badarg;
    }
    StoreBifResult(2, ptr_val(tmp_arg1)[1]);

 OpCase(hd_jsd):
    GetArg1(1);
    if (is_not_list(tmp_arg1)) {
	goto badarg;
    }
    StoreBifResult(2, ptr_val(tmp_arg1)[0]);

 OpCase(element_jssd):
    /*
     * Inlined version of element/2 for speed.
     */
    GetArg2(1);
    if (is_small(tmp_arg1) && is_tuple(tmp_arg2)) {
	t0 = ptr_val(tmp_arg2);
	if ((signed_val(tmp_arg1) >= 1) &&
	    (signed_val(tmp_arg1) <= arityval(*t0))) {
	    StoreBifResult(3, t0[signed_val(tmp_arg1)]);
	}
    }
    goto badarg;


 OpCase(element_jIsd):
    /*
     * Inlined version of element/2 for even more speed.
     * The first argument is an untagged integer >= 1.
     */
    GetArg1(2);
    if (is_tuple(tmp_arg1)) {
	t0 = ptr_val(tmp_arg1);
	tmp_arg2 = Arg(1);
	if (tmp_arg2 <= arityval(*t0)) {
	    StoreBifResult(3, t0[tmp_arg2]);
	}
    }
    goto badarg;

 OpCase(catch_yp):
    c_p->catches++;
    yb(Arg(0)) = make_catch(Arg(1));
    Next(2);

 OpCase(catch_end_y):
    c_p->catches--;
    make_blank(yb(Arg(0)));
    Next(1);

#if 0
    /*
     * Start of a receive operation.  Get the next message to process
     * (which is not necessarily the first in queue) or NULL if no
     * messages.
     */
 OpCase(receive):
    Fetch(0);
    msg_ptr = PEEK_MESSAGE(c_p);
    NextF(0);
#endif

    /*
     * Pick up the next message and place it in the selected y register,
     * ready for pattern matching.
     * If no message, jump to a wait or wait_timeout instruction.
     */
 OpCase(loop_rec_py):
    t0 = (uint32 *) PEEK_MESSAGE(c_p);
    if (t0 == NULL) {
	SET_I((uint32 *) Arg(0));
	Goto(*I);		/* Jump to a wait or wait_timeout instruction */
    }
    Fetch(2);
    yb(Arg(1)) = ((ErlMessage *)t0)->mesg;
    NextF(2);

 OpCase(loop_rec_test_heap_0_pyI):
    t0 = (uint32 *) PEEK_MESSAGE(c_p);
    if (t0 == NULL) {
	SET_I((uint32 *) Arg(0));
	Goto(*I);		/* Jump to a wait or wait_timeout instruction */
    }
    Fetch(3);
    yb(Arg(1)) = ((ErlMessage *)t0)->mesg;
    tmp_arg1 = Arg(2);
    ASSERT(c_p->htop <= E && E <= c_p->stack);
    if (E < HTOP + tmp_arg1) {
       SWAPOUT;
       FCALLS -= do_gc2(c_p, tmp_arg1, NULL, 0);
       SWAPIN;
    }
    NextF(3);

    /*
     * Remove a (matched) message from the message queue.
     */
 OpCase(remove_message):
    Fetch(0);
    t0 = (uint32 *) PEEK_MESSAGE(c_p);
    UNLINK_MESSAGE(c_p);
    JOIN_MESSAGE(c_p);
    CANCEL_TIMER(c_p);
    free_message((ErlMessage *) t0);
    NextF(0);

    /*
     * Advance the save pointer to the next message (the current message didn't match),
     * then jump to the LoopRec instruction.
     */
 OpCase(loop_rec_end_p):
    SET_I((uint32 *) Arg(0));
    SAVE_MESSAGE(c_p);
    Goto(*I);		/* To loop_rec */

    /*
     * Prepare to wait for a message or a timeout, whichever occurs first.
     */
 OpCase(i_wait_timeout_ps):
    /*
     * If we have already set the timer, we must NOT set it again.  Therefore,
     * we must test the F_INSLPQUEUE flag as well as the F_TIMO flag.
     */
    if (c_p->flags & (F_INSLPQUEUE | F_TIMO)) {
	goto wait2;
    }
    GetArg1(1);
    if (tmp_arg1 != make_small(0)) {
	if (is_small(tmp_arg1) && signed_val(tmp_arg1) > 0) {
	    c_p->action_time_out = I+3;
	    set_timer(c_p, unsigned_val(tmp_arg1));
	} else if (tmp_arg1 == am_infinity) {
	    c_p->flags |= F_TIMO;
	} else {		/* Wrong time */
	    c_p->freason = TIMEOUT_VALUE;
	    goto find_func_info2;
	}

	/*
	 * Prepare to wait indefinitely for a new message to arrive
	 * (or the time set above if falling through from above).
	 */
#ifndef NO_JUMP_TABLE
    lb_i_wait_p:
    wait2:
#endif
	c_p->i = (uint32 *) Arg(0);
	SWAPOUT;
	c_p->call = (uint32 *) *(c_p->i);
	c_p->arity = 0;
	c_p->status = P_WAITING;
	c_p->current = NULL;
	return REDS_IN(c_p) - FCALLS;
    }
    Next(3);

#ifdef NO_JUMP_TABLE
 OpCase(i_wait_p):
 wait2:
   c_p->i = (uint32 *) Arg(0);
   SWAPOUT;
   c_p->call = (uint32 *) *(c_p->i);
   c_p->arity = 0;
   c_p->status = P_WAITING;
   c_p->current = NULL;
   return REDS_IN(c_p) - FCALLS;
#endif

    /*
     * A timeout has occurred.  Reset the save pointer so that the next
     * receive statement will examine the first message first.
     */
 OpCase(timeout):
    Fetch(0);
    if (IS_TRACED(c_p))
	trace_receive(c_p, am_timeout);
    c_p->flags &= ~F_TIMO;
    JOIN_MESSAGE(c_p);
    NextF(0);

 OpCase(init2_yy):
    Fetch(2);
    make_blank(yb(Arg(0))); make_blank(yb(Arg(1)));
    NextF(2);

 OpCase(allocate_init_IIy):
    Fetch(3);
    A(Arg(0), Arg(1), lb_A9);
    make_blank(yb(Arg(2))); 
    NextF(3);

 OpCase(jmp_switch_sI):
    GetArg1(0);
    if (is_small(tmp_arg1)) {
	tmp_arg1 = signed_val(tmp_arg1);
	if (0 <= tmp_arg1 && tmp_arg1 < Arg(1)) {
	    SET_I((uint32 *) (&Arg(2))[tmp_arg1]);
	    Goto(*I);
	}
    }
    Next(Arg(1) + 2);

 OpCase(lookup_switch_sI):
    GetArg1(0);
    if (is_small(tmp_arg1)) {
	struct Pairs {
	    int val;
	    uint32 *addr;
	};
	struct Pairs* low;
	struct Pairs* high;
	struct Pairs* mid;

	tmp_arg1 = signed_val(tmp_arg1);

    do_lookup:
	low = (struct Pairs *) &Arg(2);
	high = low + Arg(1);
	while (low < high) {
	    mid = low + (high-low) / 2;
	    if (tmp_arg1 < mid->val) {
		high = mid;
	    } else if (tmp_arg1 > mid->val) {
		low = mid + 1;
	    } else {
		SET_I(mid->addr);
		Goto(*I);
	    }
	}
    }
    Next(2*Arg(1)+2);

 OpCase(atom_switch_sI):
    GetArg1(0);
    if (is_atom(tmp_arg1)) {
	goto do_lookup;
    }
    Next(2*Arg(1)+2);

    /*
     * Guard BIFs occur in head only.  On failure, ignore the error and jump
     * to the code for the next clause.  A guard BIF consumes no heap space.
     */

 OpCase(bif1_fbsd):
    GetArg1(2);
    bf = (BifFunction) Arg(1);
    tmp_arg1 = (*bf)(c_p, tmp_arg1);
    if (tmp_arg1) {
	StoreBifResult(3, tmp_arg1);
    }
    SET_I((uint32 *) Arg(0));
    error_info[0] = 0;
    Goto(*I);

    /*
     * Call to BIFs that don't build on the heap.
     */
 OpCase(bif0_bd):
    bf = (BifFunction) Arg(0);
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf);
    tmp_arg1 = (*bf)(c_p);
    StoreBifResult(1, tmp_arg1);

 OpCase(bif1_body_bsd):
    GetArg1(1);
    bf = (BifFunction) Arg(0);
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1);
    tmp_arg1 = (*bf)(c_p, tmp_arg1);
    if (tmp_arg1) {
	StoreBifResult(2, tmp_arg1);
    }
    goto find_func_info2;

 OpCase(bif2_body_b):
    FetchAlt(1);
    bf = (BifFunction) Arg(0);
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1, tmp_arg2);
    tmp_arg1 = (*bf)(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto find_func_info2;

 OpCase(bif3_body_bs):
    FetchAlt(2);
    GetR(1, tmp_arg3);
    bf = (BifFunction) Arg(0);
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1, tmp_arg2, tmp_arg3);
    tmp_arg1 = (*bf)(c_p, tmp_arg1, tmp_arg2, tmp_arg3);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(2);
    }
    goto find_func_info2;

    /*
     * The most general BIF call.  The BIF may build any amount of data
     * on the heap.  The result is always returned in r(0).
     * Must be followed by a TestHeap instruction (if anything need to
     * to be constructed on the heap).
     */
 OpCase(gc_bif0_b):
    bf = (BifFunction) Arg(0);
    SWAPOUT;
    c_p->fcalls = FCALLS - 1;
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf);
    r(0) = (*bf)(c_p);
    FCALLS = c_p->fcalls;
    HTOP = c_p->htop;
    Next(1);

 OpCase(gc_bif1_body_bs):
    GetArg1(1);

 Bif_1_rescheduled:
    bf = (BifFunction) Arg(0);
    SWAPOUT;
    c_p->fcalls = FCALLS - 1;
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1);
    r(0) = (*bf)(c_p, tmp_arg1);
    FCALLS = c_p->fcalls;
    HTOP = c_p->htop;
    if (r(0)) {
	Next(2);
    } else if (c_p->freason == RESCHEDULE) {
	c_p->arity = 1;
	goto suspend_bif;
    } else if (c_p->freason == TRAP) {
	SET_CP(c_p, I+3);
	SET_I(export_list(unsigned_val(c_p->fvalue))->address);
	r(0) = tmp_arg1;
	Dispatch();
    }
    goto find_func_info2;

 OpCase(gc_bif2_body_b):
 Bif_2_rescheduled:
    bf = (BifFunction) Arg(0);
    SWAPOUT;
    c_p->fcalls = FCALLS - 1;
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1, tmp_arg2);
    r(0) = (*bf)(c_p, tmp_arg1, tmp_arg2);
    FCALLS = c_p->fcalls;
    HTOP = c_p->htop;
    if (r(0)) {
	Next(1);
    } else if (c_p->freason == RESCHEDULE) {
	c_p->arity = 2;
	goto suspend_bif;
    } else if (c_p->freason == TRAP) {
	SET_CP(c_p, I+2);
	SET_I(export_list(unsigned_val(c_p->fvalue))->address);
	r(0) = tmp_arg1;
	x(1) = tmp_arg2;
	Dispatch();
    }
    goto find_func_info2;

 OpCase(gc_bif3_body_bs):
    GetR(1, tmp_arg3);

 Bif_3_rescheduled:
    SWAPOUT;
    c_p->fcalls = FCALLS - 1;
    bf = (BifFunction) Arg(0);
    if (IS_TRACED(c_p))
	trace_bif(c_p, bf, tmp_arg1, tmp_arg2, tmp_arg3);
    r(0) = (*bf)(c_p, tmp_arg1, tmp_arg2, tmp_arg3);
    FCALLS = c_p->fcalls;
    HTOP = c_p->htop;
    if (r(0)) {
	Next(2);
    } else if (c_p->freason == RESCHEDULE) {
	/*
	 * Moving c_p->arg to a register is shorter than using c_p->arg_reg
	 * directly, since c_p->arg_reg is a pointer (not an array)
	 * and the compiler generates code to fetch the pointer every time.
	 *
	 * We store tmp_arg3 here to help the compiler keep it in a register.
	 */
	t0[2] = tmp_arg3;
	c_p->arity = 3;
	goto suspend_bif;
    }
    goto find_func_info2;

 OpCase(i_times_j):
    tmp_arg1 = do_mixed_times(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	Next(1);
    }
    /* Fall through */

 lb_Cl_error:
    /* May occur in head or body */
    if(!ISFUNCBEGIN(Arg(0))) {
    OpCase(jump_j):
	SET_I((uint32 *) Arg(0));
	Goto(*I);
    }
    ASSERT(c_p->freason != BADMATCH || c_p->fvalue != 0);
    goto error_action_set_location;

 OpCase(i_m_div_j):
    tmp_arg1 = do_mixed_div(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	Next(1);
    }
    goto lb_Cl_error;

 OpCase(i_int_div_j):
    if (tmp_arg2 == SMALL_ZERO) {
	goto badarith;
    } else if (is_both_small(tmp_arg1, tmp_arg2)) {
	tmp_arg1 = make_small(signed_val(tmp_arg1) / signed_val(tmp_arg2));
	StoreResult(tmp_arg1);
	Next(1);
    }
    tmp_arg1 = do_int_div(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	Next(1);
    }
    goto lb_Cl_error;

 OpCase(i_rem_j):
    if (tmp_arg2 == SMALL_ZERO) {
	goto badarith;
    } else if (is_both_small(tmp_arg1, tmp_arg2)) {
	tmp_arg1 = make_small(signed_val(tmp_arg1) % signed_val(tmp_arg2));
    } else {
	tmp_arg1 = do_int_rem(c_p, tmp_arg1, tmp_arg2);
    }
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	Next(1);
    }
    goto lb_Cl_error;

 OpCase(i_band_j):
    FetchAlt(1);
    if (is_both_small(tmp_arg1, tmp_arg2)) {
	/*
	 * No need to untag -- TAG & TAG == TAG.
	 */
	tmp_arg1 &= tmp_arg2;
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    tmp_arg1 = do_band(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto lb_Cl_error;

 OpCase(i_bor_j):
    FetchAlt(1);
    if (is_both_small(tmp_arg1, tmp_arg2)) {
	/*
	 * No need to untag -- TAG | TAG == TAG.
	 */
	tmp_arg1 |= tmp_arg2;
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    tmp_arg1 = do_bor(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto lb_Cl_error;

 OpCase(i_bxor_j):
    FetchAlt(1);
    if (is_both_small(tmp_arg1, tmp_arg2)) {
	/*
	 * We could extract the tag from one argument, but a tag extraction
	 * could mean a shift.  Therefore, play it safe here.
	 */
	tmp_arg1 = make_small(signed_val(tmp_arg1) ^ signed_val(tmp_arg2));
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    tmp_arg1 = do_bxor(c_p, tmp_arg1, tmp_arg2);
    if (tmp_arg1) {
	StoreResult(tmp_arg1);
	NextFAlt(1);
    }
    goto lb_Cl_error;

 OpCase(i_bsl_j):
    if (is_small(tmp_arg2)) {
	i = signed_val(tmp_arg2);

	if (is_small(tmp_arg1)) {
	small_shift:
	    ires = signed_val(tmp_arg1);
	    if (i == 0 || ires == 0) {
		StoreResult(tmp_arg1);
		Next(1);
	    } else if (i < 0)  {	/* Right shift */
		i = -i;
		if (i >= 27) {
		    tmp_arg1 = (ires < 0) ? SMALL_MINUS_ONE : SMALL_ZERO;
		} else {
		    tmp_arg1 = make_small(ires >> i);
		}
		StoreResult(tmp_arg1);
		Next(1);
	    } else if (i < 27) {	/* Left shift */
		if ((ires > 0 && ((-1 << (27-i)) & ires) == 0) ||
		    ((-1 << (27-i)) & ~ires) == 0) {
		    tmp_arg1 = make_small(ires << i);
		    StoreResult(tmp_arg1);
		    Next(1);
		}
	    }
	    tmp_arg1 = small_to_big(ires, TMP_BIG1(c_p));

	big_shift:
	    if (i > 0) {		/* Right shift. */
		ires = big_size(tmp_arg1) + (i / D_EXP);
	    } else {			/* Left shift. */
		ires = big_size(tmp_arg1);
		if (ires <= (-i / D_EXP))
		    ires = 3;
		else
		    ires -= (-i / D_EXP);
	    }
	    t0 = ArithAlloc(c_p, BIG_NEED_SIZE(ires+1));
	    tmp_arg1 = big_lshift(tmp_arg1, i, t0);
	    ArithCheck(c_p);
	    if (is_nil(tmp_arg1)) {
	    system_limit:
		c_p->freason = SYSTEM_LIMIT;
		goto lb_Cl_error;
	    }
	    StoreResult(tmp_arg1);
	    Next(1);
	} else if (is_big(tmp_arg1)) {
	    if (i == 0) {
		StoreResult(tmp_arg1);
		Next(1);
	    }
	    goto big_shift;
	}
    }
 badarith:
    c_p->freason = BADARITH;
    goto lb_Cl_error;
    
 OpCase(i_bsr_j):
    if (is_small(tmp_arg2)) {
	i = -signed_val(tmp_arg2);
	if (is_small(tmp_arg1)) {
	    goto small_shift;
	} else if (is_big(tmp_arg1)) {
	    if (i == 0) {
		StoreResult(tmp_arg1);
		Next(1);
	    }
	    goto big_shift;
	}
    }
    goto badarith;

 OpCase(i_apply_last_sssP):
    GetArg3(0);
    c_p->fcalls = FCALLS-1;
    c_p->i = I;
    SWAPOUT;

 apply_last_rescheduled:
    r(0) = apply(c_p, tmp_arg1, tmp_arg2, tmp_arg3, reg);
    HTOP = c_p->htop;
    FCALLS = c_p->fcalls;
    if (r(0)) {			/* Bif was successfully applied */
	uint32 deallocate = Arg(3);
	SET_I(cp_ptr_val(*E));
	E = ADD_BYTE_OFFSET(E, deallocate);
	ASSERT(c_p->htop <= E && E <= c_p->stack);
	Goto(*I);
    } else if (c_p->freason == DO_APPLY) { /* Apply on threaded code */
	r(0) = reg[0];
	SET_CP(c_p, (uint32 *) E[0]);
	E = ADD_BYTE_OFFSET(E, Arg(3));
	SET_I(c_p->call);
	Dispatch();
    } else if (c_p->freason == RESCHEDULE) { /* Busy port */
	c_p->arg_reg[2] = tmp_arg3;
	c_p->arity = 3;
	c_p->call = OpCode(reschedule_apply_last);
	goto suspend;
    }
    goto find_func_info2;


 OpCase(i_apply_sss):
    GetArg3(0);
    c_p->fcalls = FCALLS-1;
    c_p->i = I;
    SWAPOUT;

 apply3_rescheduled:
    r(0) = apply(c_p, tmp_arg1, tmp_arg2, tmp_arg3, reg);
    HTOP = c_p->htop;
    FCALLS = c_p->fcalls;
    if (r(0)) {			/* Bif was successfully applied */
	Next(3);
    } else if (c_p->freason == DO_APPLY) { /* Apply on threaded code */
	r(0) = reg[0];
	SET_CP(c_p, I+4);
	SET_I(c_p->call);
	Dispatch();
    } else if (c_p->freason == RESCHEDULE) { /* Busy port */
	c_p->arg_reg[2] = tmp_arg3;
	c_p->arity = 3;
	c_p->call = OpCode(reschedule_apply);
	goto suspend;
    }
    goto find_func_info2;

#ifdef DEBUG
    /*
     * Set a breakpoint here to get control just after a call instruction.
     * I points to the first instruction in the called function.
     *
     * In gdb, use 'call dis(I-4, 1)' to show the name of the function.
     */
 do_dispatch:
    DispatchMacro();
#endif

    /*
     * Jumped to from the Dispatch() macro when the reductions are used up.
     *
     * Since the I register points just beyond the FuncBegin instruction, we
     * can (normally) get the module, function, and arity for the function being
     * called from I[-3], I[-2], and I[-1] respectively.
     */
 context_switch:
    if (I_bis == OpCode(call_error_handler)) {
	/*
	 * Since there is no FuncBegin instruction before the lb_call_error_handler
	 * instruction, better go ahead and execute the instruction now,
	 * or c_p->current would be set pointing to garbage.
	 */
	goto lb_call_error_handler;
    }
    c_p->call = I_bis;
    c_p->current = I-3;		/* FuncBegin + 1: points to Mod, Func, Arity */
    c_p->arity = c_p->current[2];

    /*
     * Make sure that there is enough room for the arguments register to be saved.
     */
    if (c_p->arity > c_p->max_arg_reg) {
	/*
	 * Yes, this is an expensive operation, but you only pay it the first
	 * time you call a function with more than 6 arguments which is
	 * scheduled out.  This is better than paying for 26 words of wasted
	 * space for most processes which never call functions with more than
	 * 6 arguments.
	 */
	c_p->max_arg_reg = c_p->arity;
	if (c_p->arg_reg != c_p->def_arg_reg) {
	    c_p->arg_reg = (uint32 *) safe_realloc((char *) c_p->arg_reg,
						   c_p->arity * sizeof(c_p->arg_reg[0]));
	} else {
	    c_p->arg_reg = (uint32 *) safe_alloc(c_p->arity * sizeof(c_p->arg_reg[0]));
	}
    }

    /*
     * Since REDS_IN(c_p) is stored in the save area (c_p->arg_reg) we must read it
     * now before saving registers.
     *
     * The '+ 1' compensates for the last increment which was not done
     * (beacuse the code for the Dispatch() macro becomes shorter that way).
     */
    tmp_arg1 = REDS_IN(c_p) - FCALLS + 1;

    /*
     * Save the argument registers.
     */
    t0 = c_p->arg_reg;
    for (i = c_p->arity - 1; i > 0; i--) {
	t0[i] = reg[i];
    }
    c_p->arg_reg[0] = r(0);
    SWAPOUT;
    c_p->i = I;
    add_to_schedule_q(c_p);
    return tmp_arg1;

 OpCase(int_bnot_jsd):
    GetArg1(1);
    if (is_small(tmp_arg1)) {
	tmp_arg1 = make_small(~signed_val(tmp_arg1));
    } else if (is_big(tmp_arg1)) {
	t0 = ArithAlloc(c_p, BIG_NEED_SIZE(big_size(tmp_arg1)+1));
	tmp_arg1 = big_bnot(tmp_arg1, t0);
	ArithCheck(c_p);
	if (is_nil(tmp_arg1))
	    goto system_limit;
    } else {
	goto badarith;
    }
    StoreBifResult(2, tmp_arg1);

 OpCase(i_is_ne_exact_f):
    if (eq(tmp_arg1, tmp_arg2)) {
	ClauseFail();
    }
    Next(1);

 OpCase(normal_exit):
    SWAPOUT;
    do_exit(c_p, am_normal);
    return REDS_IN(c_p) - FCALLS;

    /*
     * Suspend BIF and prepare BIF to be rescheduled.
     */
 suspend_bif:
    c_p->call = OpCode(reschedule);

 suspend:
    /*
     * Moving c_p->arg to a register is shorter than using c_p->arg_reg
     * directly, since c_p->arg_reg is a pointer (not an array)
     * and the compiler generates code to fetch the pointer every time.
     */
    t0 = c_p->arg_reg;
    t0[0] = tmp_arg1;
    t0[1] = tmp_arg2;
    SWAPOUT;
    c_p->i = I;
    c_p->current = NULL;
    return REDS_IN(c_p) - FCALLS;

 OpCase(reschedule_apply):
    tmp_arg1 = r(0);
    tmp_arg2 = x(1);
    tmp_arg3 = x(2);
    goto apply3_rescheduled;

 OpCase(reschedule_apply_last):
    tmp_arg1 = r(0);
    tmp_arg2 = x(1);
    tmp_arg3 = x(2);
    goto apply_last_rescheduled;

 OpCase(reschedule_send):
    tmp_arg1 = r(0);
    tmp_arg2 = x(1);
    goto send_rescheduled;

 OpCase(reschedule):
    tmp_arg1 = r(0);
    tmp_arg2 = x(1);
    tmp_arg3 = x(2);
    switch (c_p->arity) {
    case 1: goto Bif_1_rescheduled;
    case 2: goto Bif_2_rescheduled;
    case 3: goto Bif_3_rescheduled;
    default: ASSERT(0);
    }

 badarg:
    c_p->freason = BADARG;
    goto lb_Cl_error;

 find_func_info:
    c_p->freason = BADMATCH;

 find_func_info2:
    if ((c_p->current = find_function_from_pc(I)) == NULL) {
	/*
	 * We will assume that this is the initial function
	 * (e.g. spawn_link(erlang, abs, [1])).
	 */
	c_p->current = c_p->initial+1;
    }
    goto lb_error_action_code;

 error_action_set_location:
    c_p->current = (uint32 *) Arg(0)+1;
    /* Fall through into error_action_code */

 OpCase(error_action_code):
    ASSERT(c_p->freason != TRAP); /* Should have been handled earlier. */
    ASSERT(c_p->freason != RESCHEDULE); /* Should have been handled earlier. */
    switch (c_p->freason) {
    case NORMAL:
	tmp_arg1 = am_normal;
	break;
    case BADMATCH:
	ASSERT(c_p->fvalue != 0);
	SWAPOUT;
	HTOP = HAlloc(c_p, 3);
	tmp_arg1 = TUPLE2(HTOP, am_badmatch, c_p->fvalue);
	SWAPIN;
	break;
    case CASE_CLAUSE:
	ASSERT(c_p->fvalue != 0);
	SWAPOUT;
	HTOP = HAlloc(c_p, 3);
	tmp_arg1 = TUPLE2(HTOP, am_case_clause, c_p->fvalue);
	SWAPIN;
	break;
    case IF_CLAUSE:
	tmp_arg1 = am_if_clause;
	break;
    case UNDEF:
	tmp_arg1 = am_undef;
	break;
    case BADARITH:
	tmp_arg1 = am_badarith;
	break;
    case BADARG:
	tmp_arg1 = am_badarg;
	break;
    case FUNCTION_CLAUSE:
	tmp_arg1 = am_function_clause;
	reg[0] = r(0);
	break;
    case BADSIGNAL:
	tmp_arg1 = am_badsig;
	break;
    case TIMEOUT_VALUE:
	tmp_arg1 = am_timeout_value;
	break;
    case NOPROC:
	tmp_arg1 = am_noproc;
	break;
    case USER_EXIT:
    case USER_ERROR:
	ASSERT(c_p->fvalue != 0);
	tmp_arg1 = c_p->fvalue;
	break;
    case THROWN:
	if (c_p->catches != 0) {
	    ASSERT(c_p->fvalue != 0);
	    tmp_arg1 = c_p->fvalue;
	} else
	    tmp_arg1 = am_nocatch;
	break;
    case NOTALIVE:
	tmp_arg1 = am_notalive;
	break;
    case SYSTEM_LIMIT:
	tmp_arg1 = am_system_limit;
	break;
    default:
	c_p->catches = 0;
	tmp_arg1 = am_internal_error;
	break;
    } /* end switch */
#ifdef DEBUG
    c_p->fvalue = 0;
#endif

    if (c_p->catches == 0) {
	/*
	 * No catch active -- terminate the process.
	 */
	SWAPOUT;
	terminate_process(c_p, tmp_arg1, reg);
	return REDS_IN(c_p) - FCALLS;
    } else {
	/*
	 * Search for the first catch.
	 */
	for (t0 = E + 1; t0 < c_p->stack; t0++) {
	    if (is_catch(*t0)) {
		SET_I((uint32 *)(ptr_val(*t0)));
		ASSERT(ISCATCHEND(I));
		I_bis = (uint32 *) *I;
		while (is_not_CP(*t0)) {
		    t0--;
		    ASSERT(E <= t0);
		}
		E = t0;

		/*
		 * When handling errors, we will use HAlloc(), because it
		 * doesn't do any garbage collection.  Not doing GCs will
		 * simplify the code below.
		 */

		SWAPOUT;
		if (c_p->freason == THROWN) {
		    r(0) = tmp_arg1;
		    Goto(I_bis);
		} else if (c_p->freason == NORMAL || c_p->freason == USER_EXIT) {
		    HTOP = HAlloc(c_p, 3);
		    r(0) = TUPLE2(HTOP, am_EXIT, tmp_arg1);
		    SWAPIN;
		    Goto(I_bis);
		} else if (error_info[0]) { /* Bif error */
		    i = error_info[0];
		    HTOP = HAlloc(c_p, 14+2*i);
		    r(0) = NIL;
		    while (i >= 2) {
			r(0) = CONS(HTOP, error_info[i], r(0));
			HTOP += 2;
			i--;
		    }
		    r(0) = CONS(HTOP, r(0), NIL);
		    HTOP += 2;
		    r(0) = CONS(HTOP, error_info[1], r(0));
		    HTOP += 2;
		    error_info[0] = 0;
		} else if (c_p->freason == FUNCTION_CLAUSE) {
		    i = c_p->current[2];
		    HTOP = HAlloc(c_p, 10+2*i);
		    r(0) = NIL;
		    while (i > 0) {
			i--;
			r(0) = CONS(HTOP, reg[i], r(0));
			HTOP += 2;
		    }
		} else {
		    HTOP = HAlloc(c_p, 10);
		    r(0) = make_small(c_p->current[2]);
		}
		r(0) = TUPLE3(HTOP, c_p->current[0], c_p->current[1], r(0));
		HTOP += 4;
		r(0) = TUPLE2(HTOP, tmp_arg1, r(0));
		HTOP += 3;
		r(0) = TUPLE2(HTOP, am_EXIT, r(0));
		SWAPIN;
		Goto(I_bis);
	    }
	}
    }
    erl_exit(1, "Catch not found");

 OpCase(call_error_handler):
    /*
     * Pick up the export entry from the I register.  This code is not
     * designed to look nice, but to avoid a special case in the CEx and
     * CExL instructions.
     *
     * At this point, I points to the op_error_handler field in the export
     * entry for a function which is not loaded.
     */
    exp = 0;
    exp = (Export *) (((char *) I) - ((char *) &exp->op_error_handler));
    SWAPOUT;
    reg[0] = r(0);
    tmp_arg1 = call_error_handler(c_p, exp, reg);
    r(0) = reg[0];
    SWAPIN;
    if (!tmp_arg1 && c_p->freason == DO_APPLY) {
	SET_I(c_p->call);
	Dispatch();
    }
    goto lb_error_action_code;	/* c_p->current is already set */

 OpCase(check_process_code_pss):
    /*
     * The check_process_code/2 BIF is an instruction because we must
     * update c_p->i before calling it.
     */
    GetArg2(1);
    SWAPOUT;
    c_p->i = I;
    if (IS_TRACED(c_p))
	trace_bif(c_p, check_process_code_2, tmp_arg1, tmp_arg2);
    r(0) = check_process_code_2(c_p, tmp_arg1, tmp_arg2);
    SWAPIN;
    if (!r(0))
	goto error_action_set_location;
    Next(3);

 OpCase(process_info_pss):
    /*
     * This BIF is not time-critical, but we must set the current_*
     * fields in the process structure before calling it, which is
     * is not needed for other BIFs.
     * Doing it here avoids a special case for other BIFs with two arguments.
     */
    GetArg2(1);
    c_p->current = (uint32 *)Arg(0)+1;
    SWAPOUT;
    if (IS_TRACED(c_p))
	trace_bif(c_p, process_info_2, tmp_arg1, tmp_arg2);
    r(0) = process_info_2(c_p, tmp_arg1, tmp_arg2);
    SWAPIN;
    if (!r(0))
	goto error_action_set_location;
    Next(3);

 OpCase(i_put_tuple0_d):
    tmp_arg1 = make_tuple(HTOP);
    HTOP[0] = make_arityval(0);
    HTOP++;
    StoreBifResult(0, tmp_arg1);

 OpCase(node_y):   Fetch(1); BF_node_0(yb(Arg(0))); NextF(1);

    /*
     * Infrequently used guard tests.
     */
#define TNmb(X, Action) if (is_not_integer(X) && is_not_float(X)) Action

 OpCase(is_number_fx): Fetch(2); TNmb(xb(Arg(1)),goto lb_jump_j); NextF(2);
 OpCase(is_number_fr): Fetch(1); TNmb(r(0),goto lb_jump_j); NextF(1); 
 OpCase(is_number_fy): Fetch(2); TNmb(yb(Arg(1)),goto lb_jump_j); NextF(2); 

#define TCnst(X, Action) if (is_list(X) || is_nil(X) || is_tuple(X)) Action

 OpCase(is_constant_fx): Fetch(2); TCnst(xb(Arg(1)),goto lb_jump_j); NextF(2);
 OpCase(is_constant_fr): Fetch(1); TCnst(r(0),goto lb_jump_j); NextF(1); 
 OpCase(is_constant_fy): Fetch(2); TCnst(yb(Arg(1)),goto lb_jump_j); NextF(2); 

    /*
     * Candidates for removal (never observed to be executed).
     */
 OpCase(test_arity_pxA): Fetch(3); GTp(xb(Arg(1)),Arg(2),Badmatch(xb(Arg(1)))); NextF(3);
 OpCase(test_arity_prA): Fetch(2); GTp(r(0),Arg(1),Badmatch(r(0))); NextF(2);
 OpCase(test_arity_pyA): Fetch(3); GTp(yb(Arg(1)),Arg(2),Badmatch(yb(Arg(1)))); NextF(3);

 OpCase(i_case_end_s):
    GetArg1(0);
    c_p->fvalue = tmp_arg1;
    c_p->freason = CASE_CLAUSE;
    goto find_func_info2;

 OpCase(i_if_end):
    c_p->freason = IF_CLAUSE;
    goto find_func_info2;

 OpCase(i_function_clause):
    c_p->freason = FUNCTION_CLAUSE;
    goto find_func_info2;

 OpCase(func_info_aaI):
    ASSERT(c_p->fvalue != 0);
    c_p->freason = BADMATCH;
    c_p->current = I + 1;
    goto lb_error_action_code;

 OpCase(badmatch_cp):
    c_p->fvalue = Arg(0);
    SET_I((uint32 *) Arg(1));
    Goto(*I);

#include "beam_cold.h"
    
    /*
     * Trace and debugging support.
     */

 OpCase(deallocate_I):		/* Only used when tracing. */
    Fetch(1); D(Arg(0)); NextF(1);

 OpCase(i_trace_info):
    if (IS_TRACED_FL(c_p, F_TRACE_CALLS)) {
	reg[0] = r(0);
	trace_call_or_ret(c_p, I-3, reg, am_call);
    }
    Next(0);

 OpCase(i_trace_return):
    SET_I(c_p->cp);
    if (IS_TRACED_FL(c_p, F_TRACE_CALLS|F_TIMESTAMP)) {
	c_p->current = find_function_from_pc(I);
	reg[0] = r(0);
	trace_call_or_ret(c_p, c_p->current, reg, am_return);
    }
    Goto(*I);

    DEFINE_COUNTING_LABELS;
#ifndef NO_JUMP_TABLE
#ifdef DEBUG
 end_emulator_loop:
#endif
#endif

 OpCase(int_code_end):
 OpCase(label_L):
    erl_exit(1, "meta op\n");

    /*
     * One-time initialization of Beam emulator.
     */

 init_emulator:
#ifndef NO_JUMP_TABLE
    /* Are tables correctly generated by beam_makeops? */
    ASSERT(sizeof(counting_opcodes) == sizeof(opcodes));

    if (!count_instructions) {
	beam_ops = opcodes;
    } else {
#ifdef DEBUG
	counting_opcodes[op_catch_end_y] = LabelAddr(lb_catch_end_y);
#endif
	counting_opcodes[op_func_info_aaI] = LabelAddr(lb_func_info_aaI);
	beam_ops = counting_opcodes;
    }
#endif /* NO_JUMP_TABLE */

    beam_exit = OpCode(error_action_code);
    ready = OpCode(normal_exit);
    beam_apply_op = OpCode(i_apply_sss);
    em_call_error_handler = OpCode(call_error_handler);
    return 0;

#ifdef NO_JUMP_TABLE
 default:
    erl_exit(1, "unexpected op code %d\n",Go);
  }
#endif
    return 0;			/* Never executed */
}

static uint32
call_error_handler(p, exp, reg)
Process* p; Export* exp; uint32* reg;
{
    uint32* hp;
    int eh;			/* Index for error handler */
    int arity;
    uint32 args;
    int i;

    /*
     * Search for the error_handler module.
     */
    eh = find_function(p->error_handler, am_undefined_function, 3);
    if (eh == -1) {		/* No error handler */
	static uint32 func_info[3];
	func_info[0] = am_error_handler;
	func_info[1] = am_undefined_function;
	func_info[2] = 3;
	p->current = func_info;
	p->freason = UNDEF;
	return 0;
    }
    p->call = export_list(eh)->address;

    /*
     * Create a list with all arguments in the x registers.
     */
    arity = exp->arity;
    hp = HAlloc(p, arity*2);
    args = NIL;
    for (i = arity-1; i >= 0; i--) {
	args = CONS(hp, reg[i], args);
	hp += 2;
    }

    /*
     * Set up registers for call to error_handler:undefined_function/3.
     */
    reg[0] = make_atom(exp->module);
    reg[1] = make_atom(exp->function);
    reg[2] = args;
    p->freason = DO_APPLY;
    return 0;
}

static void
terminate_process(Process* p,
		  uint32 code,	/* Exit code (term) */
		  uint32* reg)	/* X registers (reg[0] == saved x0) */
{
    int i;
    uint32* hp;			/* Heap pointer for building terms. */
    uint32 tmp0;		/* Temporary for building terms. */

    switch (p->freason) {
    case NORMAL:
    case USER_EXIT:
    case THROWN:
	do_exit(p, code);
	return;
    }
	
    if (error_info[0]) {	/* bif error */
	display(p->id, CBUF);
	erl_printf(CBUF, " error in BIF ");
	display(error_info[1], CBUF);
	erl_printf(CBUF, "/%d(", error_info[0]-1);
	i = 2;
	while (i <= error_info[0]) {
	    display(error_info[i++], CBUF);
	    if(i >  error_info[0])
		erl_printf(CBUF, ")\n");
	    else
		erl_printf(CBUF, ",");
	}
	error_info[0] = 0;
    }
    
    display(p->id, CBUF);
    erl_printf(CBUF, " error: ");
    display(code, CBUF);
    erl_printf(CBUF," in ");
    display(p->current[0], CBUF);
    erl_printf(CBUF, ":");
    display(p->current[1], CBUF);
    erl_printf(CBUF, "/%d", p->current[2]);
    if (code == am_function_clause) {
	erl_printf(CBUF, "(");
	i = 0;
	while (i < p->current[2]){
	    display(reg[i++], CBUF);
	    if (i < p->current[2])
		erl_printf(CBUF, ",");
	}
	erl_printf(CBUF, ")");
    }
    erl_printf(CBUF, "\n");
    send_error_to_logger(p->group_leader);

    /*
     * Now build the final exit code.
     */
    i = p->current[2];
    hp = HAlloc(p, 7+i*2);
    if (code != am_function_clause) {
	tmp0 = make_small(i);
    } else {			/* Function clause */
	i--;
	for (tmp0 = NIL; i >= 0; i--) {
	    tmp0 = CONS(hp, reg[i], tmp0);
	    hp += 2;
	}
    }
    tmp0 = TUPLE3(hp, p->current[0], p->current[1], tmp0);
    hp += 4;
    code = TUPLE2(hp, code, tmp0);
    do_exit(p, code);
}

static uint32
apply(Process* p, uint32 module, uint32 function, uint32 args, uint32* reg)
{
    int arity;
    int j;
    uint32 tmp;

 reapply:

    /*
     * Check the arguments which should be of the form apply(Module,
     * Function, Arguments) where Module and Function are atoms and
     * Arguments is an arity long list of terms.
     */
    if (is_not_atom(module) || is_not_atom(function)) {
	/*
	 * No need to test args here -- done below.
	 */
    error:
	p->freason = BADARG;
	error_info[0] = 4;
	error_info[1] = am_apply;
	error_info[2] = module;
	error_info[3] = function;
	error_info[4] = args;
	return 0;
    }

    /*
     * Walk down the 3rd parameter of apply (the argument list) and copy
     * the parameters to the x registers (reg[]).
     */
    
    tmp = args;
    arity = 0;
    while (is_list(tmp)) {
	reg[arity++] = CAR(ptr_val(tmp));
	tmp = CDR(ptr_val(tmp));
    }
    if (is_not_nil(tmp)) {	/* Must be well formed list */
	goto error;
    }

    /*
     * Check if we are calling a BIF.
     */
    if (module == am_erlang) {
	int index = find_bif(unsigned_val(function), arity);

	/*
	 * XXX If we remove the apply BIF from the table, this
	 * test only need to be done if the lookup fails.
	 */
	if (function == am_apply && arity == 3) {
	    module = reg[0];
	    function = reg[1];
	    args = reg[2];
	    goto reapply;
	}

	if (index != -1) {
	    uint32 res;

	    if (IS_TRACED_FL(p, F_TRACE_BIFS)) {
		trace_bif(p, bif_table[index].f, reg[0], reg[1], reg[2]);
	    }
	    p->current = NULL;
	    switch(bif_table[index].arity) {
	    case 0:
		res = (*bif_table[index].f)(p);
		break;
	    case 1:
		res = (*bif_table[index].f)(p, reg[0]);
		break;
	    case 2:
		res = (*bif_table[index].f)(p, reg[0], reg[1]);
		break;
	    case 3:
		res = (*bif_table[index].f)(p, reg[0], reg[1], reg[2]);
		break;
	    default:
		ASSERT(0);
	    }
	    if (res == 0 && p->freason == TRAP) {
		p->call = export_list(unsigned_val(p->fvalue))->address;
		p->freason = DO_APPLY;
	    }
	    return res;
	}

	/* 
	 * Not found, fall through to error handler
	 */
    }
    
    /*
     * Get the index into the export table.
     */

    if ((j = find_function(module, function, arity)) < 0) {
	if ((j = find_function(p->error_handler, am_undefined_function, 3)) < 0) {
	    goto error;
	} else {		/* Load arguments for error handler call */
	    reg[0] = module;
	    reg[1] = function;
	    reg[2] = args;
	}
    }

    p->call = export_list(j)->address;
    p->freason = DO_APPLY;
    return 0;
}

uint32*
arith_alloc(Process* p, uint32 need)
{
    ErlMessageBuffer* bp;
    uint32 n;
    uint32* hp;
#ifdef DEBUG
    uint32 i;
#endif

    n = (need < 128) ? 128 : need;
    bp = new_message_buffer(n+1);
    bp->next = p->mbuf;
    p->mbuf = bp;
    p->mbuf_sz += n+1;
    if (p->mbuf_sz >= p->heap_sz)
	p->flags |= F_NEED_GC;
    p->arith_avail = n - need;
    hp = bp->mem;
#ifdef DEBUG
    for (i = 0; i <= n; i++) {
	hp[i] = ARITH_MARKER;
    }
#endif
    p->arith_heap = hp + need;
#ifdef DEBUG
    p->arith_check_me = p->arith_heap;
#endif
    return hp;
}

static uint32
do_mixed_plus(Process* p, uint32 arg1, uint32 arg2)
{
    uint32 res;
    FloatDef f1, f2;
    dsize_t sz1, sz2, sz;
    int need_heap;
    uint32* hp;
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_SMALL:
	ires = signed_val(arg1) + signed_val(arg2);
	ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires));
	if (MY_IS_SSMALL(ires)) {
	    return make_small(ires);
	} else {
	    hp = ArithAlloc(p, 2);
	    res = small_to_big(ires, hp);
	    ArithCheck(p);
	    return res;
	}
	break;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	if (arg1 == SMALL_ZERO)
	    return arg2;
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	goto big_plus;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (arg2 == SMALL_ZERO)
	    return(arg1);
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto big_plus;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
    big_plus:
	sz1 = big_size(arg1);
	sz2 = big_size(arg2);
	sz = MAX(sz1, sz2)+1;
	need_heap = BIG_NEED_SIZE(sz);
	hp = ArithAlloc(p, need_heap);
	res = big_plus(arg1, arg2, hp);
	ArithCheck(p);
	if (is_nil(res)) {
	    p->freason = SYSTEM_LIMIT;
	    return 0;
	}
	return res;
    case SMALL_FLOAT:
	f1.fd = signed_val(arg1);
	GET_DOUBLE(arg2, f2);
	goto float_plus;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	}
	GET_DOUBLE(arg2, f2);
	goto float_plus;
    case FLOAT_SMALL:
        GET_DOUBLE(arg1, f1);
	f2.fd = signed_val(arg2);
	goto float_plus;
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	GET_DOUBLE(arg1, f1);
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd)) {
	    goto badarith;
	}
	goto float_plus;
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, f1);
	GET_DOUBLE(arg2, f2);

    float_plus:
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = f1.fd + f2.fd;
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	} else {
	    hp = ArithAlloc(p, 3);
	    res = make_float(hp);
	    ArithCheck(p);
	    PUT_DOUBLE(f1, hp);
	    return res;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	break;
    }
    return 0;
}

static uint32
do_mixed_minus(Process* p, uint32 arg1, uint32 arg2)
{
    uint32 res;
    FloatDef f1, f2;
    dsize_t sz1, sz2, sz;
    int need_heap;
    uint32* hp;
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_SMALL:
	ires = signed_val(arg1) - signed_val(arg2);
	ASSERT(MY_IS_SSMALL(ires) == IS_SSMALL(ires));
	if (MY_IS_SSMALL(ires)) {
	    return make_small(ires);
	} else {
	    hp = ArithAlloc(p, 2);
	    res = small_to_big(ires, hp);
	    ArithCheck(p);
	    return res;
	}
	break;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	goto big_minus;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (arg2 == SMALL_ZERO)
	    return(arg1);
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto big_minus;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
    big_minus:
	sz1 = big_size(arg1);
	sz2 = big_size(arg2);
	sz = MAX(sz1, sz2)+1;
	need_heap = BIG_NEED_SIZE(sz);
	hp = ArithAlloc(p, need_heap);
	res = big_minus(arg1, arg2, hp);
	ArithCheck(p);
	if (is_nil(res)) {
	    p->freason = SYSTEM_LIMIT;
	    return 0;
	}
	return res;
    case SMALL_FLOAT:
	f1.fd = signed_val(arg1);
	GET_DOUBLE(arg2, f2);
	goto float_minus;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	}
	GET_DOUBLE(arg2, f2);
	goto float_minus;
    case FLOAT_SMALL:
        GET_DOUBLE(arg1, f1);
	f2.fd = signed_val(arg2);
	goto float_minus;
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	GET_DOUBLE(arg1, f1);
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd)) {
	    goto badarith;
	}
	goto float_minus;
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, f1);
	GET_DOUBLE(arg2, f2);

    float_minus:
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = f1.fd - f2.fd;
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	} else {
	    hp = ArithAlloc(p, 3);
	    res = make_float(hp);
	    ArithCheck(p);
	    PUT_DOUBLE(f1, hp);
	    return res;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	break;
    }
    return 0;
}

static uint32
do_mixed_times(Process* p, uint32 arg1, uint32 arg2)
{
    uint32 res;
    FloatDef f1, f2;
    dsize_t sz1, sz2, sz;
    int need_heap;
    uint32* hp;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_SMALL:
	if ((arg1 == SMALL_ZERO) || (arg2 == SMALL_ZERO))
	    return(SMALL_ZERO);
	if (arg1 == SMALL_ONE)
	    return(arg2);
	if (arg2 == SMALL_ONE)
	    return(arg1);
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto big_times;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	if (arg1 == SMALL_ZERO)
	    return(SMALL_ZERO);
	if (arg1 == SMALL_ONE)
	    return(arg2);
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	goto big_times;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (arg2 == SMALL_ZERO)
	    return(SMALL_ZERO);
	if (arg2 == SMALL_ONE)
	    return(arg1);
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto big_times;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
    big_times:
	sz1 = big_size(arg1);
	sz2 = big_size(arg2);
	sz = sz1 + sz2;
	need_heap = BIG_NEED_SIZE(sz);
	hp = ArithAlloc(p, need_heap);
	res = big_times(arg1, arg2, hp);
	ArithCheck(p);
	if (is_nil(res)) {
	    p->freason = SYSTEM_LIMIT;
	    return 0;
	}
	return res;
    case SMALL_FLOAT:
	f1.fd = signed_val(arg1);
	GET_DOUBLE(arg2, f2);
	goto float_times;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	}
	GET_DOUBLE(arg2, f2);
	goto float_times;
    case FLOAT_SMALL:
        GET_DOUBLE(arg1, f1);
	f2.fd = signed_val(arg2);
	goto float_times;
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	GET_DOUBLE(arg1, f1);
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd)) {
	    goto badarith;
	}
	goto float_times;
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, f1);
	GET_DOUBLE(arg2, f2);

    float_times:
	if (!FP_PRE_CHECK_OK()) {
	    goto badarith;
	}
	f1.fd = f1.fd * f2.fd;
	if (!FP_RESULT_OK(f1.fd)) {
	    goto badarith;
	} else {
	    hp = ArithAlloc(p, 3);
	    res = make_float(hp);
	    ArithCheck(p);
	    PUT_DOUBLE(f1, hp);
	    return res;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	break;
    }
    return 0;
}

static uint32
do_mixed_div(Process* p, uint32 arg1, uint32 arg2)
{
    FloatDef f1, f2;
    uint32* hp;

    if (!FP_PRE_CHECK_OK()) {
	goto badarith;
    }

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_SMALL:
	f1.fd = signed_val(arg1);
	f2.fd = signed_val(arg2);
	break;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	f1.fd = signed_val(arg1);
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd))
	    goto badarith;
	break;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd))
	    goto badarith;
	f2.fd = signed_val(arg2);
	break;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd))
	    goto badarith;
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd))
	    goto badarith;
	break;
    case SMALL_FLOAT:
	f1.fd = signed_val(arg1);
	GET_DOUBLE(arg2, f2);
	break;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	f1.fd = big_to_double(arg1);
	if (!FP_RESULT_OK(f1.fd))
	    goto badarith;
	GET_DOUBLE(arg2, f2);
	break;
    case FLOAT_SMALL:
        GET_DOUBLE(arg1, f1);
	f2.fd = signed_val(arg2);
	break;
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	GET_DOUBLE(arg1, f1);
	f2.fd = big_to_double(arg2);
	if (!FP_RESULT_OK(f2.fd))
	    goto badarith;
	break;
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, f1);
	GET_DOUBLE(arg2, f2);
	break;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
    
    if (f2.fd == 0.0) {
	goto badarith;
    }
    f1.fd = f1.fd / f2.fd;
    if (!FP_RESULT_OK(f1.fd)) {
	goto badarith;
    }
    hp = ArithAlloc(p, 3);
    PUT_DOUBLE(f1, hp);
    ArithCheck(p);
    return make_float(hp);
}

static uint32
do_int_div(Process* p, uint32 arg1, uint32 arg2)
{	
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto L_big_div;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	return SMALL_ZERO;
    case BIG_BIG:
    L_big_div:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	ires = big_ucomp(arg1, arg2);
	if (ires < 0) {
	    arg1 = SMALL_ZERO;
	} else if (ires == 0) {
	    arg1 = (big_sign(arg1) == big_sign(arg2)) ?
		SMALL_ONE : SMALL_MINUS_ONE;
	} else {
	    uint32* hp;
	    int i = big_size(arg1);

	    ires = big_size(arg2);
	    hp = ArithAlloc(p, BIG_NEED_SIZE(i-ires+1) + BIG_NEED_SIZE(i));
	    arg1 = big_div(arg1, arg2, hp);
	    ArithCheck(p);
	    if (is_nil(arg1)) {
		p->freason = SYSTEM_LIMIT;
		return 0;
	    }
	}
	return arg1;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
}    

static uint32
do_int_rem(Process* p, uint32 arg1, uint32 arg2)
{	
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	goto L_big_rem;
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	return arg1;
    case BIG_BIG:
    L_big_rem:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	ires = big_ucomp(arg1, arg2);
	if (ires == 0) {
	    arg1 = SMALL_ZERO;
	} else if (ires > 0) {
	    uint32* hp = ArithAlloc(p, BIG_NEED_SIZE(big_size(arg1)));
	    arg1 = big_rem(arg1, arg2, hp);
	    ArithCheck(p);
	    if (is_nil(arg1)) {
		p->freason = SYSTEM_LIMIT;
		return 0;
	    }
	}
	return arg1;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
}

static uint32
do_band(Process* p, uint32 arg1, uint32 arg2)
{
    uint32* hp;
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	break;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	break;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
    ires = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
    hp = ArithAlloc(p, ires);
    arg1 = big_band(arg1, arg2, hp);
    ArithCheck(p);
    ASSERT(is_not_nil(arg1));
    return arg1;
}

static uint32
do_bor(Process* p, uint32 arg1, uint32 arg2)
{
    uint32* hp;
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	break;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	break;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
    ires = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
    hp = ArithAlloc(p, ires);
    arg1 = big_bor(arg1, arg2, hp);
    ArithCheck(p);
    ASSERT(is_not_nil(arg1));
    return arg1;
}

static uint32
do_bxor(Process* p, uint32 arg1, uint32 arg2)
{
    uint32* hp;
    int ires;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    goto badarith;
	}
	arg1 = small_to_big(signed_val(arg1), TMP_BIG1(p));
	break;
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    goto badarith;
	}
	arg2 = small_to_big(signed_val(arg2), TMP_BIG2(p));
	break;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    goto badarith;
	}
	break;
    default:
    badarith:
	p->freason = BADARITH;
	return 0;
    }
    ires = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
    hp = ArithAlloc(p, ires);
    arg1 = big_bxor(arg1, arg2, hp);
    ArithCheck(p);
    ASSERT(is_not_nil(arg1));
    return arg1;
}

static uint32
mixed_eq(uint32 arg1, uint32 arg2)
{
    FloatDef farg1, farg2;

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_BIG:		/* Has to be false */
    case BIG_SMALL:
	return 0;
    case SMALL_FLOAT:
	GET_DOUBLE(arg2, farg2);
	return signed_val(arg1) == farg2.fd;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    return 0;
	}
	GET_DOUBLE(arg2, farg2);
	return big_to_double(arg1) == farg2.fd;
    case BIG_BIG:
	if (is_nil(arg1) || is_nil(arg2)) {
	    return 0;		/* Can't be equal; tested above. */
	}
	return big_comp(arg1, arg2) == 0;
    case FLOAT_SMALL:
	GET_DOUBLE(arg1, farg1);
	return farg1.fd == signed_val(arg2);
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    return 0;
	}
	GET_DOUBLE(arg1, farg1);
	return farg1.fd == big_to_double(arg2);
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, farg1);
	GET_DOUBLE(arg2, farg2);
	return farg1.fd == farg2.fd;
    default:
	return eq(arg1, arg2);
    }
}

static uint32
mixed_ge(uint32 arg1, uint32 arg2)
{
    FloatDef farg1, farg2;
    double tmp;

    if (arg1 == arg2)
	return 1;		/* Identity or equal SMALL */

    /*
     * Note: Nil ([]) is a special case of BIG.  Nil is greater than any number.
     */

    switch (NUMBER_CODE(arg1, arg2)) {
    case SMALL_BIG:
	if (is_nil(arg2)) {
	    return 0;
	}
	return big_sign(arg2) != 0; /* Greater if negative big. */
    case BIG_SMALL:
	if (is_nil(arg1)) {
	    return 1;
	}
	return big_sign(arg1) == 0; /* Greater if positive big. */
    case SMALL_FLOAT:
	GET_DOUBLE(arg2, farg2);
	return signed_val(arg1) >= farg2.fd;
    case BIG_FLOAT:
	if (is_nil(arg1)) {
	    return 1;
	}
	GET_DOUBLE(arg2, farg2);
	tmp = big_to_double(arg1);
	if (!FP_RESULT_OK(tmp))
	   return big_sign(arg1) == 0;
	return tmp >= farg2.fd;
    case BIG_BIG:
	/*
	 * Only of these bigs can be []; [] == [] was taken care of above.
	 */
	if (is_nil(arg1)) {
	    return 1;
	} else if (is_nil(arg2)) {
	    return 0;
	} else {
	    return big_comp(arg1, arg2) >= 0;
	}
    case FLOAT_SMALL:
	GET_DOUBLE(arg1, farg1);
	return farg1.fd >= signed_val(arg2);
    case FLOAT_BIG:
	if (is_nil(arg2)) {
	    return 0;
	}
	GET_DOUBLE(arg1, farg1);
	tmp = big_to_double(arg2);
	if (!FP_RESULT_OK(tmp))
	   return big_sign(arg2) != 0;
	return farg1.fd >= tmp;
    case FLOAT_FLOAT:
	GET_DOUBLE(arg1, farg1);
	GET_DOUBLE(arg2, farg2);
	return farg1.fd >= farg2.fd;
    default:
	return cmp(arg1, arg2) >= 0;
    }
}

static void
trace_call_or_ret(Process* p, uint32* fi, uint32* reg, uint32 what)
{
    ErlMessageBuffer* bp;
    uint32* hp;
    uint32 arity = fi[2];
    uint32 mfa;
    uint32 size;
    uint32 sizes[256];
    Process *tracer;
    uint32 mess;
    int i;

    /*
     * Verify that the tracer process is valid.
     */
    tracer = process_tab[get_number(p->tracer_proc)];
    if (INVALID_PID(tracer, p->tracer_proc)) {
	p->flags &= ~TRACE_FLAGS;
	p->tracer_proc = NIL;
	return;
    }

    /*
     * Calculate size of message buffer and allocate it.
     */
    if ((p->flags & F_TIMESTAMP) != 0) {
	size = 4 + 4 + 6;
    } else {
	size = 2*arity + 4 + 5;
	for (i = arity-1; i >= 0; i--) {
	    sizes[i] = size_object(reg[i]);
	    size += sizes[i];
	}	
    }
    bp = new_message_buffer(size);
    hp = bp->mem;
    
    /*
     * Copy the {M, F, A} tuple to the message buffer
     * (A is the arity if timestamps or the list of arguments if no timestamp).
     */
    if (p->flags & F_TIMESTAMP) {
	mfa = make_small(arity);
    } else {
	mfa = NIL;
	for (i = arity-1; i >= 0; i--) {
	    uint32 term = copy_struct(reg[i], sizes[i], &hp, &bp->mso);
	    mfa = CONS(hp, term, mfa);
	    hp += 2;
	}
    }
    mfa = TUPLE3(hp, fi[0], fi[1], mfa);
    hp += 4;

    /*
     * Build the trace tuple and put it into receive queue of the tracer process.
     */
    
    if ((p->flags & F_TIMESTAMP) == 0) {
	mess = TUPLE4(hp, am_trace, p->id, what, mfa);
    } else {
	uint32 mega, sec, micro;
	uint32 ts_tuple;

	get_now(&mega, &sec, &micro);
	ts_tuple = TUPLE3(hp, make_small(mega), make_small(sec), make_small(micro));
	hp += 4;
	mess = TUPLE5(hp, am_trace, p->id, what, mfa, ts_tuple);
    }
    queue_message(tracer, bp, mess);
}
