/*
  Racket
  Copyright (c) 2006-2011 PLT Scheme Inc.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301 USA.
*/

#include "schpriv.h"
#include "schmach.h"

static Scheme_Object *future_p(int argc, Scheme_Object *argv[])
{
  if (SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_future_type))
    return scheme_true;
  else
    return scheme_false;
}

Scheme_Object *scheme_fsemaphore_p(int argc, Scheme_Object *argv[])
{
  if (SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) 
    return scheme_true;
  else 
    return scheme_false;
}


#ifdef MZ_PRECISE_GC
static void register_traversers(void);
#endif

#ifndef MZ_USE_FUTURES

/* Futures not enabled, but make a stub module and implementation */

typedef struct future_t {
  Scheme_Object so;
  Scheme_Object *running_sema;
  Scheme_Object *orig_lambda;
  Scheme_Object *retval;
  int multiple_count;
  Scheme_Object **multiple_array;
  int no_retval;
} future_t;

typedef struct fsemaphore_t {
  Scheme_Object so;
  Scheme_Object *sema;
} fsemaphore_t;

Scheme_Object *scheme_future(int argc, Scheme_Object *argv[])
{
  future_t *ft;

  scheme_check_proc_arity("future", 0, 0, argc, argv);

  ft = MALLOC_ONE_TAGGED(future_t);
  ft->so.type = scheme_future_type;

  ft->orig_lambda = argv[0];

  return (Scheme_Object *)ft;
}

static Scheme_Object *touch(int argc, Scheme_Object *argv[])
{
  future_t * volatile ft;

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_future_type))
    scheme_wrong_type("touch", "future", 0, argc, argv);

  ft = (future_t *)argv[0];

  while (1) {
    if (ft->retval) {
      if (SAME_OBJ(ft->retval, SCHEME_MULTIPLE_VALUES)) {
        Scheme_Thread *p = scheme_current_thread;
        p->ku.multiple.array = ft->multiple_array;
        p->ku.multiple.count = ft->multiple_count;
      }
      return ft->retval;
    }
    if (ft->no_retval)
      scheme_signal_error("touch: future previously aborted");
    
    if (ft->running_sema) {
      scheme_wait_sema(ft->running_sema, 0);
      scheme_post_sema(ft->running_sema);
    } else {
      Scheme_Object *sema;
      future_t *old_ft;
      mz_jmp_buf newbuf, * volatile savebuf;
      Scheme_Thread *p = scheme_current_thread;
      
      /* In case another Scheme thread touches the future. */
      sema = scheme_make_sema(0);
      ft->running_sema = sema;

      old_ft = p->current_ft;
      p->current_ft = ft;
      
      savebuf = p->error_buf;
      p->error_buf = &newbuf;
      if (scheme_setjmp(newbuf)) {
        ft->no_retval = 1;
        p->current_ft = old_ft;
        scheme_post_sema(ft->running_sema);
        scheme_longjmp(*savebuf, 1);
      } else {
        GC_CAN_IGNORE Scheme_Object *retval, *proc;
        proc = ft->orig_lambda;
        ft->orig_lambda = NULL; /* don't hold on to proc */
        retval = scheme_apply_multi(proc, 0, NULL);
        ft->retval = retval;
        if (SAME_OBJ(retval, SCHEME_MULTIPLE_VALUES)) {
          ft->multiple_array = p->ku.multiple.array;
          ft->multiple_count = p->ku.multiple.count;
          p->ku.multiple.array = NULL;
        }
        scheme_post_sema(ft->running_sema);
        p->current_ft = old_ft;
        p->error_buf = savebuf;
      }
    }
  }

  return NULL;
}



static Scheme_Object *processor_count(int argc, Scheme_Object *argv[])
{
  return scheme_make_integer(1);
}

int scheme_is_multiprocessor(int now)
{
  return 0;
}

Scheme_Object *scheme_current_future(int argc, Scheme_Object *argv[])
{
  future_t *ft = scheme_current_thread->current_ft;

  return (ft ? (Scheme_Object *)ft : scheme_false);
}

Scheme_Object *scheme_make_fsemaphore(int argc, Scheme_Object *argv[])
{
  intptr_t v;
  fsemaphore_t *fsema;
  Scheme_Object *sema;
  
  v = scheme_get_semaphore_init("make-fsemaphore", argc, argv);

  fsema = MALLOC_ONE_TAGGED(fsemaphore_t);
  fsema->so.type = scheme_fsemaphore_type;
  sema = scheme_make_sema(v);
  fsema->sema = sema;

  return (Scheme_Object*)fsema;
}

Scheme_Object *scheme_fsemaphore_post(int argc, Scheme_Object *argv[])
{
  fsemaphore_t *fsema;
  if (argc != 1 || !SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) 
    scheme_wrong_type("fsemaphore-post", "fsemaphore", 0, argc, argv);

  fsema = (fsemaphore_t*)argv[0];
  scheme_post_sema(fsema->sema);

  return scheme_void;
}

Scheme_Object *scheme_fsemaphore_wait(int argc, Scheme_Object *argv[])
{
  fsemaphore_t *fsema;
  if (argc != 1 || !SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) 
    scheme_wrong_type("fsemaphore-wait", "fsemaphore", 0, argc, argv);

  fsema = (fsemaphore_t*)argv[0];
  scheme_wait_sema(fsema->sema, 0);

  return scheme_void;
}

Scheme_Object *scheme_fsemaphore_try_wait(int argc, Scheme_Object *argv[])
{
  fsemaphore_t *fsema;
  if (argc != 1 || !SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type))
    scheme_wrong_type("fsemaphore-try-wait?", "fsemaphore", 0, argc, argv);

  fsema = (fsemaphore_t*)argv[0];
  if (scheme_wait_sema(fsema->sema, 1))
    return scheme_true;

  return scheme_false;
}

Scheme_Object *scheme_fsemaphore_count(int argc, Scheme_Object *argv[])
{
  fsemaphore_t *fsema;
  if (argc != 1 || !SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) 
    scheme_wrong_type("fsemaphore-count", "fsemaphore", 0, argc, argv);

  fsema = (fsemaphore_t*)argv[0];
  return scheme_make_integer(((Scheme_Sema *)fsema->sema)->value);
}

# define FUTURE_PRIM_W_ARITY(name, func, a1, a2, env) GLOBAL_PRIM_W_ARITY(name, func, a1, a2, env)

void scheme_init_futures(Scheme_Env *newenv)
{
  FUTURE_PRIM_W_ARITY("future?",          future_p,         1, 1, newenv);
  FUTURE_PRIM_W_ARITY("future",           scheme_future,    1, 1, newenv);
  FUTURE_PRIM_W_ARITY("processor-count",  processor_count,  0, 0, newenv);
  FUTURE_PRIM_W_ARITY("current-future",   scheme_current_future,   0, 0, newenv);
  FUTURE_PRIM_W_ARITY("touch",            touch,            1, 1, newenv);
  FUTURE_PRIM_W_ARITY("make-fsemaphore",  scheme_make_fsemaphore,  1, 1, newenv);
  FUTURE_PRIM_W_ARITY("fsemaphore?",      scheme_fsemaphore_p,     1, 1, newenv);
  FUTURE_PRIM_W_ARITY("fsemaphore-post",  scheme_fsemaphore_post,  1, 1, newenv);
  FUTURE_PRIM_W_ARITY("fsemaphore-wait",  scheme_fsemaphore_wait,  1, 1, newenv);
  FUTURE_PRIM_W_ARITY("fsemaphore-try-wait?", scheme_fsemaphore_try_wait, 1, 1, newenv);
  FUTURE_PRIM_W_ARITY("fsemaphore-count", scheme_fsemaphore_count, 1, 1, newenv);

  scheme_finish_primitive_module(newenv);
  scheme_protect_primitive_provide(newenv, NULL);

#ifdef MZ_PRECISE_GC
  register_traversers();
#endif
}

void scheme_init_futures_once()
{
}

void scheme_init_futures_per_place()
{
}

void scheme_end_futures_per_place()
{
}

#else

#include "future.h"
#include <stdlib.h>
#include <string.h>

#ifdef DEBUG_FUTURES 
#define DO_LOG(pr) do { pthread_t self; self = pthread_self(); fprintf(stderr, "%x:%s:%s:%d ", (unsigned) self, __FILE__, __FUNCTION__, __LINE__); pr; fprintf(stderr, "\n"); fflush(stdout); } while(0)
#define LOG0(t) DO_LOG(fprintf(stderr, t))
#define LOG(t, a) DO_LOG(fprintf(stderr, t, a))
#define LOG2(t, a, b) DO_LOG(fprintf(stderr, t, a, b))
#define LOG3(t, a, b, c) DO_LOG(fprintf(stderr, t, a, b, c))
#define LOG4(t, a, b, c, d) DO_LOG(fprintf(stderr, t, a, b, c, d))
#else
#define LOG0(t)
#define LOG(t, a)
#define LOG2(t, a, b)
#define LOG3(t, a, b, c)
#define LOG4(t, a, b, c, d)
#endif

static Scheme_Object *make_fsemaphore(int argc, Scheme_Object *argv[]);
static Scheme_Object *touch(int argc, Scheme_Object *argv[]);
static Scheme_Object *processor_count(int argc, Scheme_Object *argv[]);
static void futures_init(void);
static void init_future_thread(struct Scheme_Future_State *fs, int i);
static void requeue_future(struct future_t *future, struct Scheme_Future_State *fs);
static void future_do_runtimecall(struct Scheme_Future_Thread_State *fts,
                                  void *func,
                                  int is_atomic,
                                  int can_suspend);
static int capture_future_continuation(future_t *ft, void **storage);

#define INITIAL_C_STACK_SIZE 500000
#define FUTURE_RUNSTACK_SIZE 2000

#define FEVENT_BUFFER_SIZE    512

enum {
  FEVENT_CREATE,
  FEVENT_COMPLETE,
  FEVENT_START_WORK,
  FEVENT_START_RTONLY_WORK,
  FEVENT_END_WORK,
  FEVENT_RTCALL_ATOMIC,
  FEVENT_HANDLE_RTCALL_ATOMIC,
  FEVENT_RTCALL,
  FEVENT_RTCALL_TOUCH,
  FEVENT_HANDLE_RTCALL,
  FEVENT_RTCALL_RESULT,
  FEVENT_HANDLE_RTCALL_RESULT,
  FEVENT_RTCALL_ABORT,
  FEVENT_HANDLE_RTCALL_ABORT,
  FEVENT_RTCALL_SUSPEND,
  FEVENT_TOUCH_PAUSE,
  FEVENT_TOUCH_RESUME,
  FEVENT_MISSING,
  _FEVENT_COUNT_
};

static const char * const fevent_strs[] = { "create", "complete",
                                            "start-work", "start-0-work", "end-work",
                                            "sync", "sync", "block", "touch", "block",
                                            "result", "result", "abort", "abort", 
                                            "suspend", 
                                            "touch-pause", "touch-resume", "missing" };
static const char * const fevent_long_strs[] = { "created", "completed",
                                                 "started work", "started (process 0, only)", "ended work",
                                                 "synchronizing with process 0", "synchronizing", 
                                                 "BLOCKING on process 0", "touching future", "HANDLING",
                                                 "result from process 0", "result determined",
                                                 "abort from process 0", "abort determined",
                                                 "suspended",
                                                 "paused for touch", "resumed for touch",
                                                 "events missing" };

typedef struct Fevent {
  double timestamp;
  int what, fid;
} Fevent;

typedef struct Fevent_Buffer {
  Fevent *a;
  int pos, overflow;
  int i, count; /* used during flush */
} Fevent_Buffer;

typedef struct Scheme_Future_State {
  int thread_pool_size;
  struct Scheme_Future_Thread_State **pool_threads;

  void *signal_handle;

  int future_queue_count;
  future_t *future_queue;
  future_t *future_queue_end;
  future_t *future_waiting_atomic;
  future_t *future_waiting_lwc;
  future_t *future_waiting_touch;
  int next_futureid;

  mzrt_mutex *future_mutex; /* BEWARE: don't allocate while holding this lock */
  mzrt_sema *future_pending_sema;
  mzrt_sema *gc_ok_c;
  mzrt_sema *gc_done_c;

  int gc_not_ok, wait_for_gc, need_gc_ok_post, need_gc_done_post;
  int abort_all_futures;

  int *gc_counter_ptr;

  int future_threads_created;

  Fevent_Buffer runtime_fevents;
  Scheme_Object **fevent_syms;
  Scheme_Struct_Type *fevent_prefab;
} Scheme_Future_State;

typedef struct Scheme_Future_Thread_State {
  mz_proc_thread *t;
  int id;
  int worker_gc_counter;
  mzrt_sema *worker_can_continue_sema;
  intptr_t runstack_size;

  /* After a future thread starts, only the runtime thread
     modifies the values at these pointers. Future threads
     read them without any locks; assembly-level instructions,
     such as mfence, ensure that future threads eventually see 
     changes made by the runtime thread, and the runtime thread 
     waits as needed. */
  volatile int *fuel_pointer;
  volatile uintptr_t *stack_boundary_pointer;
  volatile int *need_gc_pointer;

  Scheme_Thread *thread;

  uintptr_t gen0_start;
  uintptr_t gen0_size;
  uintptr_t gen0_initial_offset;

  int use_fevents1;
  Fevent_Buffer fevents1;
  Fevent_Buffer fevents2;
} Scheme_Future_Thread_State;

THREAD_LOCAL_DECL(static Scheme_Future_State *scheme_future_state);
THREAD_LOCAL_DECL(void *jit_future_storage[2]);

#ifdef MZ_PRECISE_GC
THREAD_LOCAL_DECL(extern uintptr_t GC_gen0_alloc_page_ptr);
THREAD_LOCAL_DECL(extern uintptr_t GC_gen0_alloc_page_end);
THREAD_LOCAL_DECL(extern int GC_gen0_alloc_only);
#endif

static void start_gc_not_ok(Scheme_Future_State *fs);
static void end_gc_not_ok(Scheme_Future_Thread_State *fts, 
                          Scheme_Future_State *fs, 
                          Scheme_Object **current_rs);

static void *worker_thread_future_loop(void *arg);
static void invoke_rtcall(Scheme_Future_State * volatile fs, future_t * volatile future, volatile int is_atomic);
static future_t *enqueue_future(Scheme_Future_State *fs, future_t *ft);;
static future_t *get_pending_future(Scheme_Future_State *fs);
static void receive_special_result(future_t *f, Scheme_Object *retval, int clear);
static void send_special_result(future_t *f, Scheme_Object *retval);
static Scheme_Object *_apply_future_lw(future_t *ft);
static Scheme_Object *apply_future_lw(future_t *ft);
static int fsemaphore_ready(Scheme_Object *obj);
static void init_fevent(Fevent_Buffer *b);
static void free_fevent(Fevent_Buffer *b);

READ_ONLY static int cpucount;
static void init_cpucount(void);

#ifdef MZ_PRECISE_GC
# define scheme_future_setjmp(newbuf) scheme_jit_setjmp((newbuf).jb)
# define scheme_future_longjmp(newbuf, v) scheme_jit_longjmp((newbuf).jb, v)
#else
# define scheme_future_setjmp(newbuf) scheme_setjmp(newbuf)
# define scheme_future_longjmp(newbuf, v) scheme_longjmp(newbuf, v)
#endif

/**********************************************************************/
/* Arguments for a newly created future thread                        */
/**********************************************************************/

typedef struct future_thread_params_t {
  mzrt_sema *ready_sema;
  struct NewGC *shared_GC;
  Scheme_Future_State *fs;
  Scheme_Future_Thread_State *fts;
  Scheme_Object **runstack_start;

  Scheme_Object ***scheme_current_runstack_ptr;
  Scheme_Object ***scheme_current_runstack_start_ptr;
  Scheme_Thread **current_thread_ptr;
  void **jit_future_storage_ptr;
  Scheme_Current_LWC *lwc;
} future_thread_params_t;

/**********************************************************************/
/* Plumbing for Racket initialization                                 */
/**********************************************************************/

/* Invoked by the runtime on startup to make primitives known */
void scheme_init_futures(Scheme_Env *newenv)
{
  Scheme_Object *p;

  scheme_add_global_constant(
                             "future?", 
                             scheme_make_folding_prim(
                                                      future_p, 
                                                      "future?", 
                                                      1, 
                                                      1,
                                                      1), 
                             newenv);

  p = scheme_make_prim_w_arity(scheme_future, "future", 1, 1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("future", p, newenv);

  scheme_add_global_constant(
                             "processor-count", 
                             scheme_make_prim_w_arity(
                                                      processor_count, 
                                                      "processor-count", 
                                                      0, 
                                                      0), 
                             newenv);

  p = scheme_make_prim_w_arity(touch, "touch", 1, 1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("touch", p, newenv);

  p = scheme_make_immed_prim( 
                              scheme_current_future, 
                              "current-future", 
                              0, 
                              0);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
  scheme_add_global_constant("current-future", p, newenv);

  p = scheme_make_immed_prim(
                              scheme_fsemaphore_p, 
                              "fsemaphore?", 
                              1, 
                              1);

  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("fsemaphore?", p, newenv);

  p = scheme_make_immed_prim(
                              make_fsemaphore, 
                              "make-fsemaphore", 
                              1, 
                              1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("make-fsemaphore", p, newenv);

  p = scheme_make_immed_prim(
                              scheme_fsemaphore_count, 
                              "fsemaphore-count", 
                              1, 
                              1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("fsemaphore-count", p, newenv);
  
  p = scheme_make_immed_prim(
                              scheme_fsemaphore_wait, 
                              "fsemaphore-wait",
                              1, 
                              1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("fsemaphore-wait", p, newenv);

  p = scheme_make_immed_prim(
                              scheme_fsemaphore_post, 
                              "fsemaphore-post", 
                              1, 
                              1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("fsemaphore-post", p, newenv);

  p = scheme_make_immed_prim(
                              scheme_fsemaphore_try_wait, 
                              "fsemaphore-try-wait?", 
                              1, 
                              1);
  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  scheme_add_global_constant("fsemaphore-try-wait?", p, newenv);  

  scheme_finish_primitive_module(newenv);
  scheme_protect_primitive_provide(newenv, NULL);
}

void scheme_init_futures_once()
{
  init_cpucount();
}

void scheme_init_futures_per_place()
{
  futures_init();
}

void futures_init(void)
{
  Scheme_Future_State *fs;
  Scheme_Future_Thread_State **ftss;
  void *hand;
  Scheme_Object **syms, *sym;
  Scheme_Struct_Type *stype;
  int pool_size;

  fs = (Scheme_Future_State *)malloc(sizeof(Scheme_Future_State));
  memset(fs, 0, sizeof(Scheme_Future_State));
  scheme_future_state = fs;

  pool_size = cpucount * 2;
  ftss = (Scheme_Future_Thread_State **)malloc(pool_size * sizeof(Scheme_Future_Thread_State*));
  memset(ftss, 0, pool_size * sizeof(Scheme_Future_Thread_State*));
  fs->pool_threads = ftss;
  fs->thread_pool_size = pool_size;

  REGISTER_SO(fs->future_queue);
  REGISTER_SO(fs->future_queue_end);
  REGISTER_SO(fs->future_waiting_atomic);
  REGISTER_SO(fs->future_waiting_lwc);
  REGISTER_SO(fs->future_waiting_touch);
  REGISTER_SO(fs->fevent_syms);
  REGISTER_SO(fs->fevent_prefab);
  REGISTER_SO(jit_future_storage);

  mzrt_mutex_create(&fs->future_mutex);
  mzrt_sema_create(&fs->future_pending_sema, 0);
  mzrt_sema_create(&fs->gc_ok_c, 0);
  mzrt_sema_create(&fs->gc_done_c, 0);

  fs->gc_counter_ptr = &scheme_did_gc_count;

  hand = scheme_get_signal_handle();
  fs->signal_handle = hand;

  syms = MALLOC_N(Scheme_Object*, _FEVENT_COUNT_);
  fs->fevent_syms = syms;
  sym = scheme_intern_symbol(fevent_strs[FEVENT_HANDLE_RTCALL_ATOMIC]);
  syms[FEVENT_HANDLE_RTCALL_ATOMIC] = sym;
  sym = scheme_intern_symbol(fevent_strs[FEVENT_HANDLE_RTCALL]);
  syms[FEVENT_HANDLE_RTCALL] = sym;

  sym = scheme_intern_symbol("future-event");
  stype = scheme_lookup_prefab_type(sym, 4);
  fs->fevent_prefab = stype;

  init_fevent(&fs->runtime_fevents);

#ifdef MZ_PRECISE_GC
  register_traversers();
#endif
}

static void init_future_thread(Scheme_Future_State *fs, int i)
{
  Scheme_Future_Thread_State *fts;
  GC_CAN_IGNORE future_thread_params_t params;
  Scheme_Thread *skeleton;
  Scheme_Object **runstack_start;
  mz_proc_thread *t;

  /* Create the worker thread pool.  These threads will
     'queue up' and wait for futures to become available. */

  fts = (Scheme_Future_Thread_State *)malloc(sizeof(Scheme_Future_Thread_State));
  memset(fts, 0, sizeof(Scheme_Future_Thread_State));
  fts->id = i;

  fts->gen0_size = 1;

  fts->use_fevents1 = 1;
  init_fevent(&fts->fevents1);
  init_fevent(&fts->fevents2);

  params.shared_GC = GC_instance;
  params.fts = fts;
  params.fs = fs;

  /* Make enough of a thread record to deal with multiple values. */
  skeleton = MALLOC_ONE_TAGGED(Scheme_Thread);
  skeleton->so.type = scheme_thread_type;

  scheme_register_static(&fts->thread, sizeof(Scheme_Thread*));
  fts->thread = skeleton;

  {
    Scheme_Object **rs_start, **rs;
    intptr_t init_runstack_size = FUTURE_RUNSTACK_SIZE;
    rs_start = scheme_alloc_runstack(init_runstack_size);
    rs = rs_start XFORM_OK_PLUS init_runstack_size;
    runstack_start = rs_start;
    fts->runstack_size = init_runstack_size;
  }

  /* Fill in GCable values just before creating the thread,
     because the GC ignores `params': */
  params.runstack_start = runstack_start;

  mzrt_sema_create(&params.ready_sema, 0);
  t = mz_proc_thread_create_w_stacksize(worker_thread_future_loop, &params, INITIAL_C_STACK_SIZE);
  mzrt_sema_wait(params.ready_sema);
  mzrt_sema_destroy(params.ready_sema);

  fts->t = t;
	
  scheme_register_static(params.scheme_current_runstack_ptr, sizeof(void*));
  scheme_register_static(params.scheme_current_runstack_start_ptr, sizeof(void*));	
  scheme_register_static(params.jit_future_storage_ptr, 2 * sizeof(void*));
  scheme_register_static(params.current_thread_ptr, sizeof(void*));

  fs->pool_threads[i] = fts;
}

void scheme_end_futures_per_place()
{
  Scheme_Future_State *fs = scheme_future_state;

  if (fs) {
    int i;

    mzrt_mutex_lock(fs->future_mutex);
    fs->abort_all_futures = 1;
    fs->wait_for_gc = 1;
    mzrt_mutex_unlock(fs->future_mutex);

    /* post enough semas to ensure that every future
       wakes up and tries to disable GC: */
    for (i = 0; i < fs->thread_pool_size; i++) { 
      if (fs->pool_threads[i]) {
        mzrt_sema_post(fs->future_pending_sema);
        mzrt_sema_post(fs->pool_threads[i]->worker_can_continue_sema);
      }
    }
    
    scheme_future_block_until_gc();

    /* wait for all future threads to end: */
    for (i = 0; i < fs->thread_pool_size; i++) { 
      if (fs->pool_threads[i]) {
        (void)mz_proc_thread_wait(fs->pool_threads[i]->t);

        free_fevent(&fs->pool_threads[i]->fevents1);
        free_fevent(&fs->pool_threads[i]->fevents2);

        free(fs->pool_threads[i]);
      }
    }

    free_fevent(&fs->runtime_fevents);

    mzrt_mutex_destroy(fs->future_mutex);
    mzrt_sema_destroy(fs->future_pending_sema);
    mzrt_sema_destroy(fs->gc_ok_c);
    mzrt_sema_destroy(fs->gc_done_c);

    free(fs->pool_threads);
    free(fs);

    scheme_future_state = NULL;
  }
}

static void check_future_thread_creation(Scheme_Future_State *fs)
{
  if (!fs->future_threads_created && !fs->future_queue_count)
    return;

  if (fs->future_threads_created < fs->thread_pool_size) {
    int count;

    mzrt_mutex_lock(fs->future_mutex);
    count = fs->future_queue_count;
    mzrt_mutex_unlock(fs->future_mutex);

    if (count >= fs->future_threads_created) {
      init_future_thread(fs, fs->future_threads_created);
      fs->future_threads_created++;
    }
  }
}

static void start_gc_not_ok(Scheme_Future_State *fs)
/* must have mutex_lock */
{
  while (fs->wait_for_gc) {
    int quit = fs->abort_all_futures;
    fs->need_gc_done_post++;
    mzrt_mutex_unlock(fs->future_mutex);
    if (quit) mz_proc_thread_exit(NULL);
    mzrt_sema_wait(fs->gc_done_c);
    mzrt_mutex_lock(fs->future_mutex);
  }

  fs->gc_not_ok++;

#ifdef MZ_PRECISE_GC
  {
    Scheme_Future_Thread_State *fts = scheme_future_thread_state;
    if (fts->worker_gc_counter != *fs->gc_counter_ptr) {
      GC_gen0_alloc_page_ptr = 0; /* forces future to ask for memory */
      GC_gen0_alloc_page_end = 0;
      fts->gen0_start = 0;
      if (fts->gen0_size > 1)
        fts->gen0_size >>= 1;
      fts->worker_gc_counter = *fs->gc_counter_ptr;
    }
  }
#endif
}

static void end_gc_not_ok(Scheme_Future_Thread_State *fts, 
                          Scheme_Future_State *fs, 
                          Scheme_Object **current_rs)
/* must have mutex_lock */
{
  Scheme_Thread *p;

  scheme_set_runstack_limits(MZ_RUNSTACK_START, 
                             fts->runstack_size,
                             (current_rs
                              ? current_rs XFORM_OK_MINUS MZ_RUNSTACK_START
                              : fts->runstack_size),
                             fts->runstack_size);
  p = scheme_current_thread;
  p->runstack = MZ_RUNSTACK;
  p->runstack_start = MZ_RUNSTACK_START;
  p->cont_mark_stack = MZ_CONT_MARK_STACK;
  p->cont_mark_pos = MZ_CONT_MARK_POS;

  /* FIXME: clear scheme_current_thread->ku.multiple.array ? */

  --fs->gc_not_ok;
  if (fs->need_gc_ok_post) {
    fs->need_gc_ok_post = 0;
    mzrt_sema_post(fs->gc_ok_c);
  }
}

void scheme_future_block_until_gc()
{
  Scheme_Future_State *fs = scheme_future_state;
  int i;

  if (!fs) return;
  if (!fs->future_threads_created) return;

  mzrt_mutex_lock(fs->future_mutex);
  fs->wait_for_gc = 1;
  mzrt_mutex_unlock(fs->future_mutex);

  for (i = 0; i < fs->thread_pool_size; i++) { 
    if (fs->pool_threads[i]) {
      *(fs->pool_threads[i]->need_gc_pointer) = 1;
      if (*(fs->pool_threads[i]->fuel_pointer)) {
        *(fs->pool_threads[i]->fuel_pointer) = 0;
        *(fs->pool_threads[i]->stack_boundary_pointer) += INITIAL_C_STACK_SIZE;
      }
    }
  }

  if (cpucount > 1) {
    /* In principle, we need some sort of fence to ensure that future
       threads see the change to the fuel pointer. The MFENCE
       instruction would do that, but it requires SSE2. The CPUID
       instruction is a non-privileged serializing instruction that
       should be available on any x86 platform that runs threads. */
#if defined(i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) || defined(__amd64__)
# ifdef _MSC_VER
    {
      int r[4];
      __cpuid(r, 0);
    }
# else
    {
#  if defined(i386) || defined(__i386__)
#   define MZ_PUSH_EBX "pushl %%ebx"
#   define MZ_POP_EBX "popl %%ebx"
#  endif
#  if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__)
#   define MZ_PUSH_EBX "pushq %%rbx"
#   define MZ_POP_EBX "popq %%rbx"
#  endif
      int _eax, _ebx, _ecx, _edx, op = 0;
      /* we can't always use EBX, so save and restore it: */
      asm (MZ_PUSH_EBX "\n\t"
           "cpuid \n\t" 
           "movl %%ebx, %1 \n\t"
           MZ_POP_EBX
           : "=a" (_eax), "=r" (_ebx), "=c" (_ecx), "=d" (_edx) : "a" (op));
    }
#  undef MZ_PUSH_EBX
#  undef MZ_POP_EBX
# endif
#endif
  }

  mzrt_mutex_lock(fs->future_mutex);
  while (fs->gc_not_ok) {
    fs->need_gc_ok_post = 1;
    mzrt_mutex_unlock(fs->future_mutex);
    mzrt_sema_wait(fs->gc_ok_c);
    mzrt_mutex_lock(fs->future_mutex);
  }
  mzrt_mutex_unlock(fs->future_mutex);
}

void scheme_future_continue_after_gc()
{
  Scheme_Future_State *fs = scheme_future_state;
  int i;

  if (!fs) return;

  for (i = 0; i < fs->thread_pool_size; i++) {
    if (fs->pool_threads[i]) {
      *(fs->pool_threads[i]->need_gc_pointer) = 0;

      if (!fs->pool_threads[i]->thread->current_ft
          || scheme_custodian_is_available(fs->pool_threads[i]->thread->current_ft->cust)) {
        *(fs->pool_threads[i]->fuel_pointer) = 1;
        *(fs->pool_threads[i]->stack_boundary_pointer) -= INITIAL_C_STACK_SIZE;
      } else {
        /* leave fuel exhausted, which will force the thread into a slow 
           path when it resumes to suspend the computation */
      }
    }
  }

  mzrt_mutex_lock(fs->future_mutex);
  fs->wait_for_gc = 0;
  while (fs->need_gc_done_post) {
    --fs->need_gc_done_post;
    mzrt_sema_post(fs->gc_done_c);
  }
  mzrt_mutex_unlock(fs->future_mutex);
}

void scheme_future_gc_pause()
/* Called in future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  Scheme_Future_State *fs = scheme_future_state;

  mzrt_mutex_lock(fs->future_mutex); 
  end_gc_not_ok(fts, fs, MZ_RUNSTACK);
  start_gc_not_ok(fs); /* waits until wait_for_gc is 0 */
  mzrt_mutex_unlock(fs->future_mutex);
}

void scheme_future_check_custodians()
{
  scheme_future_block_until_gc();
  scheme_future_continue_after_gc();
}

/**********************************************************************/
/* Future-event logging                                               */
/**********************************************************************/

static double get_future_timestamp() XFORM_SKIP_PROC {
#if 1
  return scheme_get_inexact_milliseconds();
#else
  return 0.0;
#endif
}

static void init_fevent(Fevent_Buffer *b) XFORM_SKIP_PROC
{
  if (b->a) free(b->a);

  b->pos = 0;
  b->overflow = 0;
  b->a = (Fevent *)malloc(FEVENT_BUFFER_SIZE * sizeof(Fevent));
  memset(b->a, 0, FEVENT_BUFFER_SIZE * sizeof(Fevent));
}

static void free_fevent(Fevent_Buffer *b)
{
  if (b->a) {
    free(b->a);
    b->a = NULL;
  }
}

static void record_fevent(int what, int fid) XFORM_SKIP_PROC
/* call with the lock or in the runtime thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  Fevent_Buffer *b;
  
  if (fts) {
    if (fts->use_fevents1)
      b = &fts->fevents1;
    else
      b = &fts->fevents2;
  } else
    b = &scheme_future_state->runtime_fevents;
  
  b->a[b->pos].timestamp = get_future_timestamp();
  b->a[b->pos].what = what;
  b->a[b->pos].fid = fid;

  b->pos++;
  if (b->pos == FEVENT_BUFFER_SIZE) {
    b->overflow = 1;
    b->pos = 0;
  }
}

static void init_traversal(Fevent_Buffer *b)
{
  if (b->overflow) {
    b->count = FEVENT_BUFFER_SIZE;
    b->i = b->pos;
  } else {
    b->i = 0;
    b->count = b->pos;
  }
}

static void end_traversal(Fevent_Buffer *b)
{
  b->overflow = 0;
  b->pos = 0;
}

static void log_future_event(Scheme_Future_State *fs,
                             const char *msg_str,
                             const char *extra_str,
                             int which,
                             int what,
                             double timestamp,
                             int fid)
{
  Scheme_Object *data, *v;

  data = scheme_make_blank_prefab_struct_instance(fs->fevent_prefab);
  if (what == FEVENT_MISSING)
    ((Scheme_Structure *)data)->slots[0] = scheme_false;
  else
    ((Scheme_Structure *)data)->slots[0] = scheme_make_integer(fid);
  ((Scheme_Structure *)data)->slots[1] = scheme_make_integer((which+1));
  v = fs->fevent_syms[what];
  if (!v) {
    v = scheme_intern_symbol(fevent_strs[what]);
    fs->fevent_syms[what] = v;
  }
  ((Scheme_Structure *)data)->slots[2] = v;
  v = scheme_make_double(timestamp);
  ((Scheme_Structure *)data)->slots[3] = v;
  
  scheme_log_w_data(scheme_main_logger, SCHEME_LOG_DEBUG, 0,
                    data,                 
                    msg_str,
                    fid,
                    which+1,
                    fevent_long_strs[what],
                    extra_str,
                    timestamp);
}

static void log_overflow_event(Scheme_Future_State *fs, int which, double timestamp)
{
  log_future_event(fs,
                   "future ??%-, process %d: %s%s; before time: %f",
                   "",
                   which, 
                   FEVENT_MISSING, 
                   timestamp, 
                   0);
}

static void flush_future_logs(Scheme_Future_State *fs)
{
  Scheme_Future_Thread_State *fts;
  double t, min_t;
  int i, min_which, min_set;
  Fevent_Buffer *b, *min_b;

  if (scheme_log_level_p(scheme_main_logger, SCHEME_LOG_DEBUG)) {
    /* Hold lock while swapping buffers: */
    mzrt_mutex_lock(fs->future_mutex);
    for (i = 0; i < fs->thread_pool_size; i++) {
      fts = fs->pool_threads[i];
      if (fts) {
        fts->use_fevents1 = !fts->use_fevents1;
        if (fts->use_fevents1)
          b = &fts->fevents2;
        else
          b = &fts->fevents1;
        init_traversal(b);
      }
    }  
    mzrt_mutex_unlock(fs->future_mutex);
    init_traversal(&fs->runtime_fevents);

    if (fs->runtime_fevents.overflow)
      log_overflow_event(fs, -1, fs->runtime_fevents.a[fs->runtime_fevents.i].timestamp);
    for (i = 0; i < fs->thread_pool_size; i++) {
        fts = fs->pool_threads[i];
        if (fts) {
          if (fts->use_fevents1)
            b = &fts->fevents2;
          else
            b = &fts->fevents1;
          if (b->overflow)
            log_overflow_event(fs, i, b->a[b->i].timestamp);
        }
    }

    while (1) {
      min_set = 0;
      min_t = 0;
      min_b = NULL;
      min_which = -1;
      if (fs->runtime_fevents.count) {
        t = fs->runtime_fevents.a[fs->runtime_fevents.i].timestamp;
        if (!min_set || (t < min_t)) {
          min_t = t;
          min_b = &fs->runtime_fevents;
          min_set = 1;
        }
      }
      for (i = 0; i < fs->thread_pool_size; i++) {
        fts = fs->pool_threads[i];
        if (fts) {
          if (fts->use_fevents1)
            b = &fts->fevents2;
          else
            b = &fts->fevents1;
      
          if (b->count) {
            t = b->a[b->i].timestamp;
            if (!min_set || (t < min_t)) {
              min_t = t;
              min_b = b;
              min_which = i;
              min_set = 1;
            }
          }
        }
      }

      if (!min_b)
        break;

      log_future_event(fs,
                       "future %d, process %d: %s%s; time: %f",
                       "",
                       min_which, 
                       min_b->a[min_b->i].what, 
                       min_b->a[min_b->i].timestamp, 
                       min_b->a[min_b->i].fid);

      --min_b->count;
      min_b->i++;
      if (min_b->i == FEVENT_BUFFER_SIZE)
        min_b->i = 0;
    }

    for (i = 0; i < fs->thread_pool_size; i++) {
      fts = fs->pool_threads[i];
      if (fts) {
        if (fts->use_fevents1)
          b = &fts->fevents2;
        else
          b = &fts->fevents1;
        end_traversal(b);
      }
    }  
    end_traversal(&fs->runtime_fevents);
  }
}

/**********************************************************************/
/* Primitive implementations                                          */
/**********************************************************************/

static Scheme_Object *make_future(Scheme_Object *lambda)
/* Called in runtime thread --- as atomic on behalf of a future thread
   if `lambda' is known to be a thunk */ 
{
  Scheme_Future_State *fs = scheme_future_state;
  int futureid;
  future_t *ft;
  Scheme_Native_Closure *nc;
  Scheme_Native_Closure_Data *ncd;
  Scheme_Custodian *c;

  if (SAME_TYPE(SCHEME_TYPE(lambda), scheme_native_closure_type)) {
    nc = (Scheme_Native_Closure*)lambda;
    ncd = nc->code;
  } else {
    nc = NULL;
    ncd = NULL;
  }

  /* Create the future descriptor and add to the queue as 'pending' */
  ft = MALLOC_ONE_TAGGED(future_t);     
  ft->so.type = scheme_future_type;

  ft->orig_lambda = lambda;
  ft->status = PENDING;

  if (scheme_current_thread->mref)
    c = scheme_custodian_extract_reference(scheme_current_thread->mref);
  else {
    /* must be in a future thread (if futures can be created in future threads) */
    c = scheme_current_thread->current_ft->cust;
  }
  ft->cust = c;
   
  /* JIT the code if not already JITted */
  if (ncd) {
    if (ncd->code == scheme_on_demand_jit_code)
      scheme_on_demand_generate_lambda(nc, 0, NULL);
  
    if (ncd->max_let_depth > FUTURE_RUNSTACK_SIZE * sizeof(void*)) {
      /* Can't even call it in a future thread */
      ft->status = PENDING_OVERSIZE;
    }

    ft->code = (void*)ncd->code;
  } else
    ft->status = PENDING_OVERSIZE;

  mzrt_mutex_lock(fs->future_mutex);
  futureid = ++fs->next_futureid;
  ft->id = futureid;
  record_fevent(FEVENT_CREATE, futureid);
  if (ft->status != PENDING_OVERSIZE)
    enqueue_future(fs, ft);
  mzrt_mutex_unlock(fs->future_mutex);

  check_future_thread_creation(fs);

  return (Scheme_Object*)ft;
}

static Scheme_Object *do_make_future(int argc, Scheme_Object *argv[])
{
  scheme_check_proc_arity("future", 0, 0, argc, argv);
  return make_future(argv[0]);
}

Scheme_Object *scheme_future(int argc, Scheme_Object *argv[])
  XFORM_SKIP_PROC /* can be called from future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  if (!fts)
    return do_make_future(argc, argv);
  else {
    Scheme_Object *proc = argv[0];
    if (SAME_TYPE(SCHEME_TYPE(proc), scheme_native_closure_type)
        && scheme_native_arity_check(proc, 0)
        && (((Scheme_Native_Closure *)proc)->code->code != scheme_on_demand_jit_code)
        && (((Scheme_Native_Closure *)proc)->code->max_let_depth < FUTURE_RUNSTACK_SIZE * sizeof(void*))) {
      /* try to alocate a future in the future thread */
      future_t *ft;
      ft = MALLOC_ONE_TAGGED(future_t);
      if (ft) {
        Scheme_Future_State *fs = scheme_future_state;

        ft->so.type = scheme_future_type;
        ft->orig_lambda = proc;
        ft->status = PENDING;
        ft->cust = scheme_current_thread->current_ft->cust;
        ft->code = ((Scheme_Native_Closure *)proc)->code->code;

        mzrt_mutex_lock(fs->future_mutex);
        ft->id = ++fs->next_futureid;
        record_fevent(FEVENT_CREATE, ft->id);
        enqueue_future(fs, ft);
        mzrt_mutex_unlock(fs->future_mutex);

        return (Scheme_Object *)ft;
      } else {
        /* It would be nice to encourage allocation of a page for
           the future thread in this case, since it might try to
           allocate more futures. */
        return scheme_rtcall_make_future("future", FSRC_OTHER, proc);
      }
    } else {
      return scheme_rtcall_make_future("future", FSRC_OTHER, proc);
    }
  }
}

void fsemaphore_finalize(void *p, void *data)
{
  fsemaphore_t *sema;
  sema = (fsemaphore_t*)p;
  mzrt_mutex_destroy(sema->mut);
}

Scheme_Object *scheme_make_fsemaphore_inl(Scheme_Object *ready)
/* Called in runtime thread */
{
  fsemaphore_t *sema;
  intptr_t v;

  v = scheme_get_semaphore_init("make-fsemaphore", 1, &ready);

  sema = MALLOC_ONE_TAGGED(fsemaphore_t);
  sema->so.type = scheme_fsemaphore_type;
  
  mzrt_mutex_create(&sema->mut);
  sema->ready = SCHEME_INT_VAL(ready);

  scheme_register_finalizer((void*)sema, fsemaphore_finalize, NULL, NULL, NULL);

  return (Scheme_Object*)sema;
}


Scheme_Object *make_fsemaphore(int argc, Scheme_Object **argv)
  /* Called in runtime thread (atomic/synchronized) */
{
  return scheme_make_fsemaphore_inl(argv[0]);
}

Scheme_Object *scheme_fsemaphore_count(int argc, Scheme_Object **argv)
  XFORM_SKIP_PROC 
{
  fsemaphore_t *sema;

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) { 
    SCHEME_WRONG_TYPE_MAYBE_IN_FT("fsemaphore-count", "fsemaphore", 0, argc, argv);
  }

  sema = (fsemaphore_t*)argv[0]; 
  return scheme_make_integer(sema->ready);
}

static void requeue_future_within_lock(future_t *future, Scheme_Future_State *fs) 
{
  if (scheme_custodian_is_available(future->cust)) {
    future->status = PENDING;
    enqueue_future(fs, future);
  } else {
    /* The future's constodian is shut down, so don't try to
       run it in a future thread anymore */
    future->status = SUSPENDED;
  }
}

static void requeue_future(future_t *future, Scheme_Future_State *fs) 
{
  mzrt_mutex_lock(fs->future_mutex);
  requeue_future_within_lock(future, fs);
  mzrt_mutex_unlock(fs->future_mutex);
}

static int try_resume_future_from_fsema_wait(fsemaphore_t *sema, Scheme_Future_State *fs) 
  XFORM_SKIP_PROC
{
  future_t *ft;
  if (!sema->queue_front) { 
    return 0;
  }

  ft = sema->queue_front;
  sema->queue_front = ft->next_in_fsema_queue;
  ft->next_in_fsema_queue = NULL;
  if (!sema->queue_front) { 
    sema->queue_end = NULL;
  } else { 
    sema->queue_front->prev_in_fsema_queue = NULL;
  }

  sema->ready--;
    
  /* Place the waiting future back on the run queue */
  requeue_future(ft, fs);

  return 1;
}

Scheme_Object *scheme_fsemaphore_post(int argc, Scheme_Object **argv)
  XFORM_SKIP_PROC
{
  fsemaphore_t *sema;
  Scheme_Future_State *fs;
  int old_count;

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) { 
    SCHEME_WRONG_TYPE_MAYBE_IN_FT("fsemaphore-post", "fsemaphore", 0, argc, argv);
  }

  sema = (fsemaphore_t*)argv[0];

#ifdef FSEMA_LOGGING 
  printf("[Thread %p]: scheme_fsemaphore_post for sema at %p. Count before V: %d\n", 
    pthread_self(), 
    sema, 
    sema->ready);
#endif

  fs = scheme_future_state;
  mzrt_mutex_lock(sema->mut);

  old_count = sema->ready;
  sema->ready++;
  if (!old_count) { 
    try_resume_future_from_fsema_wait(sema, fs);
  }

  mzrt_mutex_unlock(sema->mut);
  return scheme_void;
}

static void enqueue_future_for_fsema(future_t *ft, fsemaphore_t *sema) 
  /* This function assumed sema->mut has already been acquired! */
{
  future_t *front;

  /* Enqueue this future in the semaphore's queue */ 
  front = sema->queue_front;
  if (!front) { 
    sema->queue_front = ft;
    sema->queue_end = ft;
  } else {
    future_t *end = sema->queue_end;
    end->next_in_fsema_queue = ft;
    ft->prev_in_fsema_queue = end;
    sema->queue_end = ft;
  }
}

static fsemaphore_t *block_until_sema_ready(fsemaphore_t *sema)
{
  /* This little function cooperates with the GC, unlike the
     function that calls it. */
  scheme_block_until(fsemaphore_ready, NULL, (Scheme_Object*)sema, 0);
  return sema;
}

Scheme_Object *scheme_fsemaphore_wait(int argc, Scheme_Object **argv)
  XFORM_SKIP_PROC
{
  fsemaphore_t *sema;
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  Scheme_Future_State *fs = scheme_future_state;
  void *storage[3];

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) { 
    SCHEME_WRONG_TYPE_MAYBE_IN_FT("fsemaphore-wait", "fsemaphore", 0, argc, argv);
  }

  sema = (fsemaphore_t*)argv[0];

#ifdef FSEMA_LOGGING 
  printf("[Thread %p]: scheme_fsemaphore_wait for sema at %p. Count before P: %d\n", 
    pthread_self(), 
    sema, 
    sema->ready);
#endif

  mzrt_mutex_lock(sema->mut);
  if (!sema->ready) { 
    if (!fts) { 
      /* Then we are on the runtime thread, block and wait for the 
          fsema to be ready while cooperating with the scheduler */ 
      mzrt_mutex_unlock(sema->mut);
      sema = block_until_sema_ready(sema);
      mzrt_mutex_lock(sema->mut);
    } else {
      /* On a future thread, suspend the future (to be 
        resumed whenever the fsema becomes ready */
      future_t *future = fts->thread->current_ft;
      jit_future_storage[0] = (void*)sema;
      jit_future_storage[1] = (void*)future;
      if (!future) { 
        /* Should never be here */
        scheme_log_abort("fsemaphore-wait: future was NULL for future thread.");
        abort();
      }

      /* Setup for LWC capture */
      mzrt_mutex_unlock(sema->mut);
      scheme_fill_lwc_end();
      future->lwc = scheme_current_lwc;
      future->fts = fts;
      future->arg_p = scheme_current_thread;   

      /* Try to capture it locally (on this thread) */
      if (GC_gen0_alloc_page_ptr 
          && capture_future_continuation(future, storage)) {
        /* This will set fts->thread->current_ft to NULL */
        mzrt_mutex_lock(fs->future_mutex);
        future->status = WAITING_FOR_FSEMA;
      } else { 
        /* Can't capture the continuation locally, so ask the runtime 
            thread to do it */
        mzrt_mutex_lock(fs->future_mutex);
        future->next_waiting_lwc = fs->future_waiting_lwc;
        fs->future_waiting_lwc = future;
        future->want_lw = 1;
      }   

      scheme_signal_received_at(fs->signal_handle);
      if (fts->thread->current_ft) { 
        /* Will get here if relying on runtime thread to capture for us -- 
            wait for the signal that LW continuation was captured 
            by the runtime thread. */ 
        future->can_continue_sema = fts->worker_can_continue_sema;
        end_gc_not_ok(fts, fs, MZ_RUNSTACK);
        mzrt_mutex_unlock(fs->future_mutex);
      
        mzrt_sema_wait(fts->worker_can_continue_sema);

        mzrt_mutex_lock(fs->future_mutex);
        start_gc_not_ok(fs); 
      }
      mzrt_mutex_unlock(fs->future_mutex);

      if (fts->thread->current_ft) { 
        /* Should never get here, cont. capture should remove it */ 
        scheme_log_abort("fsemaphore-wait: current_ft was not NULL after continuation capture.");
        abort();
      }

      /* Fetch the future and sema pointers again, in case moved by a GC */
      sema = (fsemaphore_t*)jit_future_storage[0];
      future = (future_t*)jit_future_storage[1];

      /* Check again to see whether the sema has become ready */ 
      mzrt_mutex_lock(sema->mut);
      if (sema->ready) { 
        /* Then resume the future immediately (requeue) */
        sema->ready--;
        requeue_future(future, fs);
      } else {
        /* Add the future to the sema's wait queue */ 
        enqueue_future_for_fsema(future, sema);
      }

      mzrt_mutex_unlock(sema->mut);
      
      /* Jump back to the worker thread future loop (this thread 
          is now free */ 
      scheme_future_longjmp(*scheme_current_thread->error_buf, 1);
    }
  } 

  /* Semaphore is ready -- decrement and continue */
  sema->ready--;
  mzrt_mutex_unlock(sema->mut);
  return scheme_void;
}

Scheme_Object *scheme_fsemaphore_try_wait(int argc, Scheme_Object **argv)
  XFORM_SKIP_PROC
{
  fsemaphore_t *sema;
  Scheme_Object *ret;

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_fsemaphore_type)) { 
    SCHEME_WRONG_TYPE_MAYBE_IN_FT("fsemaphore-try-wait?", "fsemaphore", 0, argc, argv);
  }

  sema = (fsemaphore_t*)argv[0];
  mzrt_mutex_lock(sema->mut);
  if (!sema->ready) { 
    ret = scheme_false;
  } else { 
    sema->ready--;
    ret = scheme_true;
  }

  mzrt_mutex_unlock(sema->mut);
  return ret;
}

static int fsemaphore_ready(Scheme_Object *obj) 
/* Called in runtime thread by Scheme scheduler */
{
  int ret = 0;
  fsemaphore_t *fsema = (fsemaphore_t*)obj;
  mzrt_mutex_lock(fsema->mut);
  ret = fsema->ready;
  mzrt_mutex_unlock(fsema->mut);
  return ret;
}


int future_ready(Scheme_Object *obj)
/* Called in runtime thread by Scheme scheduler */
{
  Scheme_Future_State *fs = scheme_future_state;
  int ret = 0;
  future_t *ft = (future_t*)obj;

  mzrt_mutex_lock(fs->future_mutex);
  if ((ft->status != RUNNING)
      && (ft->status != WAITING_FOR_FSEMA)
      && (ft->status != HANDLING_PRIM)) {
    ret = 1;
  }
  mzrt_mutex_unlock(fs->future_mutex);

  return ret;
}

static void dequeue_future(Scheme_Future_State *fs, future_t *ft)
  XFORM_SKIP_PROC
/* called from both future and runtime threads */
{
  if (ft->prev == NULL)
    fs->future_queue = ft->next;
  else
    ft->prev->next = ft->next;
  
  if (ft->next == NULL)
    fs->future_queue_end = ft->prev;
  else
    ft->next->prev = ft->prev;

  ft->next = NULL;
  ft->prev = NULL;

  --fs->future_queue_count;
}

static void complete_rtcall(Scheme_Future_State *fs, future_t *future)
{
  if (future->suspended_lw) {
    /* Re-enqueue the future so that some future thread can continue */
    requeue_future_within_lock(future, fs);
  } else {
    /* Signal the waiting worker thread that it
       can continue running machine code */
    future->want_lw = 0;
    if (future->can_continue_sema) {
      mzrt_sema_post(future->can_continue_sema);
      future->can_continue_sema = NULL;
    }
  }
}

static void direct_future_to_future_touch(Scheme_Future_State *fs, future_t *ft, future_t *t_ft)
{
  Scheme_Object *retval = ft->retval;

  receive_special_result(ft, retval, 0);
  t_ft->retval_s = retval;
  send_special_result(t_ft, retval);

  t_ft->arg_S1 = NULL;

  complete_rtcall(fs, t_ft);
}

static future_t *get_future_for_touch(future_t *ft)
  XFORM_SKIP_PROC
/* called in any thread with lock held */
{
  if ((ft->status == WAITING_FOR_PRIM) && (ft->prim_func == touch)) {
    /* try to enqueue it... */
    Scheme_Object **a = ft->arg_S1;
    if (ft->suspended_lw)
      a = scheme_adjust_runstack_argument(ft->suspended_lw, a);
    return (future_t *)a[0];
  } else
    return NULL;
}

static void trigger_added_touches(Scheme_Future_State *fs, future_t *ft)
  XFORM_SKIP_PROC
/* lock held; called from both future and runtime threads */ 
{
  if (ft->touching) {
    Scheme_Object *touching = ft->touching;
    while (!SCHEME_NULLP(touching)) {
      Scheme_Object *wb = SCHEME_CAR(touching);
      future_t *t_ft = (future_t *)SCHEME_WEAK_BOX_VAL(wb);

      if (t_ft && (get_future_for_touch(t_ft) == ft)) {
        direct_future_to_future_touch(fs, ft, t_ft);
      }

      touching = SCHEME_CDR(touching);
    }
  }
}

static void future_in_runtime(Scheme_Future_State *fs, future_t * volatile ft, int what)
{    
  mz_jmp_buf newbuf, * volatile savebuf;
  Scheme_Thread *p = scheme_current_thread;  
  Scheme_Object * volatile retval;
  future_t * volatile old_ft;

  old_ft = p->current_ft;
  p->current_ft = ft;

  savebuf = p->error_buf;
  p->error_buf = &newbuf;

  record_fevent(what, ft->id);

  if (scheme_setjmp(newbuf)) {
    ft->no_retval = 1;
    retval = NULL;
  } else {
    if (ft->suspended_lw) {
      retval = apply_future_lw(ft);
    } else {
      retval = scheme_apply_multi(ft->orig_lambda, 0, NULL);
    }
    send_special_result(ft, retval);
  }

  p->error_buf = savebuf;
  p->current_ft = old_ft;

  ft->retval = retval;

  mzrt_mutex_lock(fs->future_mutex);
  ft->status = FINISHED;
  trigger_added_touches(fs, ft);
  mzrt_mutex_unlock(fs->future_mutex);

  record_fevent(FEVENT_COMPLETE, ft->id);

  record_fevent(FEVENT_END_WORK, ft->id);

  if (!retval) {
    scheme_longjmp(*savebuf, 1);
  }
}

static int prefer_to_apply_future_in_runtime()
/* Called with the future mutex held. */
{
  /* Policy question: if the runtime thread is blocked on a
     future, should we just run the future (or its suspended continuation)
     directly in the runtime thread?

     If we don't, then we're better able to handle non-blocking requests
     from future threads. At the same time, the runtime thread shouldn't
     wait if no one is working on the future. We err on the safe side
     and always run when we're waiting on the future: */
  return 1;
}

Scheme_Object *general_touch(int argc, Scheme_Object *argv[])
/* Called in runtime thread */
{
  Scheme_Future_State *fs = scheme_future_state;
  Scheme_Object *retval = NULL;
  future_t *ft;

  if (!SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_future_type))
    scheme_wrong_type("touch", "future", 0, argc, argv);

  ft = (future_t*)argv[0];

#ifdef DEBUG_FUTURES 
  LOG("touch (future %d)", futureid);	
  dump_state();
#endif

  mzrt_mutex_lock(fs->future_mutex);
  if ((((ft->status == PENDING) 
        && prefer_to_apply_future_in_runtime())
       || (ft->status == PENDING_OVERSIZE)
       || (ft->status == SUSPENDED))
      && (!ft->suspended_lw
          || scheme_can_apply_lightweight_continuation(ft->suspended_lw))) {
    int what = FEVENT_START_WORK;
    if (ft->status == PENDING_OVERSIZE) {
      what = FEVENT_START_RTONLY_WORK;
    } else if (ft->status != SUSPENDED) {
      dequeue_future(fs, ft);
    }
    ft->status = RUNNING;
    mzrt_mutex_unlock(fs->future_mutex);

    future_in_runtime(fs, ft, what);

    retval = ft->retval;
    
    receive_special_result(ft, retval, 0);

    flush_future_logs(fs);

    return retval;
  }
  mzrt_mutex_unlock(fs->future_mutex);

  /* Spin waiting for primitive calls or a return value from
     the worker thread */
  while (1) {
    if (!future_ready((Scheme_Object *)ft)) {
      record_fevent(FEVENT_TOUCH_PAUSE, ft->id);
      scheme_block_until(future_ready, NULL, (Scheme_Object*)ft, 0);
      record_fevent(FEVENT_TOUCH_RESUME, ft->id);
    }

    mzrt_mutex_lock(fs->future_mutex);
    if ((ft->status == RUNNING)
        || (ft->status == WAITING_FOR_FSEMA)
        || (ft->status == HANDLING_PRIM)) 
      {
        /* someone else got to it first */
        mzrt_mutex_unlock(fs->future_mutex);
      } 
    else if (ft->status == FINISHED)
      {
        int id;

        retval = ft->retval;

        id = ft->id;

        mzrt_mutex_unlock(fs->future_mutex);
        
        break;
      }
    else if (ft->status == WAITING_FOR_PRIM)
      {
        /* Invoke the primitive and stash the result.
           Release the lock so other threads can manipulate the queue
           while the runtime call executes. */
        ft->status = HANDLING_PRIM;
        mzrt_mutex_unlock(fs->future_mutex);
        LOG("Invoking primitive on behalf of future %d...", ft->id);
        invoke_rtcall(fs, ft, 0);
        LOG0("done.\n");
      }
    else if (ft->maybe_suspended_lw && (ft->status != WAITING_FOR_FSEMA))
      {
        ft->maybe_suspended_lw = 0;
        if (ft->suspended_lw) {
          if (scheme_can_apply_lightweight_continuation(ft->suspended_lw)
              && prefer_to_apply_future_in_runtime()) {
            if (ft->status != SUSPENDED)
              dequeue_future(fs, ft);
            ft->status = RUNNING;
            /* may raise an exception or escape: */
            mzrt_mutex_unlock(fs->future_mutex);
            future_in_runtime(fs, ft, FEVENT_START_WORK);
          } else {
            /* Someone needs to handle the future. We're banking on some
               future thread eventually picking up the future, which is
               not actually guaranteed if they're all busy looping... */
            mzrt_mutex_unlock(fs->future_mutex);
          }
        } else {
          mzrt_mutex_unlock(fs->future_mutex);
        }
      }
    else
      {
        mzrt_mutex_unlock(fs->future_mutex);
      }
  }

  if (!retval) {
    scheme_signal_error("touch: future previously aborted");
  }

  receive_special_result(ft, retval, 0);

  flush_future_logs(fs);

  return retval;
}

Scheme_Object *touch(int argc, Scheme_Object *argv[])
  XFORM_SKIP_PROC
/* can be called in future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;

  if (!fts) {
    return general_touch(argc, argv);
  } else {
    if (SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_future_type)) {
      Scheme_Future_State *fs = scheme_future_state;
      future_t *ft = (future_t *)argv[0];
      int status;

      mzrt_mutex_lock(fs->future_mutex);      
      status = ft->status;
      mzrt_mutex_unlock(fs->future_mutex);

      if (status == FINISHED) {
        Scheme_Object *retval = ft->retval;
        receive_special_result(ft, retval, 0);
        return retval;
      } else {
#ifdef MZ_PRECISE_GC
        /* Try adding current future to ft's chain of touching futures */
        Scheme_Object *wb, *pr;
        future_t *current_ft = scheme_current_thread->current_ft;

        wb = GC_malloc_weak_box(current_ft, NULL, 0, 0);
        if (wb) {
          pr = GC_malloc_pair(wb, scheme_null);
          if (pr) {
            mzrt_mutex_lock(fs->future_mutex);
            if (ft->status != FINISHED) {
              if (ft->touching)
                SCHEME_CDR(pr) = ft->touching;
              ft->touching = pr;
              current_ft->in_touch_queue = 1;
              mzrt_mutex_unlock(fs->future_mutex);
            } else {
              /* `ft' switched to FINISHED while we were trying add,
                 so carry on with its result */
              Scheme_Object *retval = ft->retval;
              mzrt_mutex_unlock(fs->future_mutex);
              receive_special_result(ft, retval, 0);
              return retval;
            }
          }
        }
#endif
      }
    }
    return scheme_rtcall_iS_s("touch", FSRC_PRIM, touch, argc, argv);
  }
}

#if defined(linux)
# include <unistd.h>
#elif defined(OS_X)
# include <sys/param.h>
# include <sys/sysctl.h>
#elif defined(DOS_FILE_SYSTEM)
# include <windows.h>
#endif 

static void init_cpucount(void)
/* Called in runtime thread */
{
#if defined(linux)
  cpucount = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(OS_X)
  size_t size = sizeof(cpucount);

  if (sysctlbyname("hw.ncpu", &cpucount, &size, NULL, 0))
    cpucount = 2;
#elif defined(DOS_FILE_SYSTEM)
  SYSTEM_INFO sysinfo;
  GetSystemInfo(&sysinfo);
  cpucount = sysinfo.dwNumberOfProcessors;
#else
  /* Conservative guess! */
  /* A result of 1 is not conservative, because claiming a
     uniprocessor means that atomic cmpxchg operations are not used
     for setting pair flags and hash codes. */
  cpucount = 2;
#endif
}

int scheme_is_multiprocessor(int now)
{
  if (cpucount > 1) {
    if (!now) 
      return 1;
    else {
      Scheme_Future_State *fs = scheme_future_state;
      return (fs && fs->future_threads_created);
    }
  } else
    return 0;
}

Scheme_Object *processor_count(int argc, Scheme_Object *argv[])
/* Called in runtime thread */
{
  return scheme_make_integer(cpucount);
}

Scheme_Object *scheme_current_future(int argc, Scheme_Object *argv[])
  XFORM_SKIP_PROC
/* Called from any thread (either runtime or future) */
{
  future_t *ft = scheme_current_thread->current_ft;

  return (ft ? (Scheme_Object *)ft : scheme_false);
}

/* Entry point for a worker thread allocated for
   executing futures.  This function will never terminate
   (until the process dies). */
void *worker_thread_future_loop(void *arg)
  XFORM_SKIP_PROC
/* Called in future thread; runtime thread is blocked until ready_sema
  is signaled. */
{
  /* valid only until signaling */
  future_thread_params_t *params = (future_thread_params_t *)arg;
  Scheme_Future_Thread_State *fts = params->fts;
  Scheme_Future_State *fs = params->fs;
  Scheme_Object *v;
  Scheme_Closed_Prim *jitcode;
  future_t *ft;
  mz_jmp_buf newbuf;
  int fid;

  scheme_future_state = fs;
  scheme_future_thread_state = fts;

  GC_instance = params->shared_GC;

  GC_gen0_alloc_only = 1;

  /* Set processor affinity */
  /*mzrt_mutex_lock(fs->future_mutex);
      static uintptr_t cur_cpu_mask = 1;
    if (pthread_setaffinity_np(pthread_self(), sizeof(g_cur_cpu_mask), &g_cur_cpu_mask))
    {
    printf(
    "Could not set CPU affinity (%lu) for thread %p!\n", 
    ++g_cur_cpu_mask, 
    pthread_self());
    }

    mzrt_mutex_unlock(fs->future_mutex);
  */

  mzrt_sema_create(&fts->worker_can_continue_sema, 0);

  scheme_use_rtcall = 1;

  scheme_current_thread = fts->thread;

  scheme_fuel_counter = 1;
  scheme_jit_stack_boundary = ((uintptr_t)&v) - INITIAL_C_STACK_SIZE;

  fts->need_gc_pointer = &scheme_future_need_gc_pause;
  fts->fuel_pointer = &scheme_fuel_counter;
  fts->stack_boundary_pointer = &scheme_jit_stack_boundary;

  MZ_RUNSTACK_START = params->runstack_start;
  MZ_RUNSTACK = MZ_RUNSTACK_START + fts->runstack_size;

  params->scheme_current_runstack_ptr = &scheme_current_runstack;
  params->scheme_current_runstack_start_ptr = &scheme_current_runstack_start;
  params->current_thread_ptr = &scheme_current_thread;
  params->jit_future_storage_ptr = &jit_future_storage[0];

  scheme_init_thread_lwc();
  params->lwc = scheme_current_lwc;

  mzrt_sema_post(params->ready_sema);

  while (1) {
    mzrt_sema_wait(fs->future_pending_sema);
    mzrt_mutex_lock(fs->future_mutex);
    start_gc_not_ok(fs);

    ft = get_pending_future(fs);

    if (ft) {
      LOG0("Got a signal that a future is pending...");
        
      fid = ft->id;
      record_fevent(FEVENT_START_WORK, fid);

      /* Work is available for this thread */
      ft->status = RUNNING;
      ft->maybe_suspended_lw = 0;
      mzrt_mutex_unlock(fs->future_mutex);

      ft->thread_short_id = fts->id;

      /* Set up the JIT compiler for this thread  */
      scheme_jit_fill_threadlocal_table();

      fts->thread->current_ft = ft;

      MZ_RUNSTACK = MZ_RUNSTACK_START + fts->runstack_size;
      MZ_CONT_MARK_STACK = 0;
      MZ_CONT_MARK_POS = (MZ_MARK_POS_TYPE)1;

      if (ft->suspended_lw) {
        /* invoke a lightweight continuation */
        scheme_current_thread->error_buf = &newbuf;
        if (scheme_future_setjmp(newbuf)) {
          /* failed or suspended */
          v = NULL;
        } else {
          v = _apply_future_lw(ft);
        }
      } else {
        jitcode = ft->code;

        /* Run the code:
           The lambda passed to a future will always be a parameterless
           function.
           From this thread's perspective, this call will never return
           until all the work to be done in the future has been completed,
           including runtime calls. 
           If jitcode asks the runtime thread to do work, then
           a GC can occur. */
        LOG("Running JIT code at %p...\n", ft->code);

        scheme_current_thread->error_buf = &newbuf;
        if (scheme_future_setjmp(newbuf)) {
          /* failed or suspended */
          v = NULL;
        } else {
          scheme_fill_lwc_start();
          v = scheme_call_as_lightweight_continuation(jitcode, ft->orig_lambda, 0, NULL);
          if (SAME_OBJ(v, SCHEME_TAIL_CALL_WAITING)) {
            v = scheme_ts_scheme_force_value_same_mark(v);
          }
        }

        LOG("Finished running JIT code at %p.\n", ft->code);
      }

      /* Get future again, since a GC may have occurred or
         future may have been suspended */
      ft = fts->thread->current_ft;

      mzrt_mutex_lock(fs->future_mutex);

      if (!ft) {
        /* continuation of future will be requeued, and this future
           thread can do something else */
      } else {
        /* Set the return val in the descriptor */
        ft->retval = v;

        /* In case of multiple values: */
        send_special_result(ft, v);
        
        /* Update the status */
        ft->status = FINISHED;
        trigger_added_touches(fs, ft);

        record_fevent(FEVENT_COMPLETE, fid);

        fts->thread->current_ft = NULL;
      }

      /* Clear stacks */
      MZ_RUNSTACK = MZ_RUNSTACK_START + fts->runstack_size;
      MZ_CONT_MARK_STACK = 0;

      if (ft)
        scheme_signal_received_at(fs->signal_handle);

      record_fevent(FEVENT_END_WORK, fid);
    }

    end_gc_not_ok(fts, fs, NULL);
    mzrt_mutex_unlock(fs->future_mutex);
  }

  return NULL;
}

static Scheme_Object *_apply_future_lw(future_t *ft)
  XFORM_SKIP_PROC
/* Called from any thread (either runtime or future) */
{
  struct Scheme_Lightweight_Continuation *lw = ft->suspended_lw;
  Scheme_Object *v;
  int result_is_rs_argv;

  ft->suspended_lw = NULL;
  
  v = ft->retval_s;
  if (ft->retval_is_rs_argv) {
    result_is_rs_argv = 1;
    ft->retval_is_rs_argv = 0;
  } else {
    ft->retval_s = NULL;
    receive_special_result(ft, v, 1);
    result_is_rs_argv = 0;
  }

  v = scheme_apply_lightweight_continuation(lw, v, result_is_rs_argv, 
                                            FUTURE_RUNSTACK_SIZE);
  
  if (SAME_OBJ(v, SCHEME_TAIL_CALL_WAITING)) {
    v = scheme_ts_scheme_force_value_same_mark(v);
  }

  return v;
}

static void *apply_future_lw_k(void)
{
  Scheme_Thread *p = scheme_current_thread;
  future_t *ft = (future_t *)p->ku.k.p1;

  p->ku.k.p1 = NULL;
  
  return _apply_future_lw(ft);
}

static Scheme_Object *apply_future_lw(future_t *ft)
{
  Scheme_Thread *p = scheme_current_thread;

  p->ku.k.p1 = ft;

  return (Scheme_Object *)scheme_top_level_do(apply_future_lw_k, 0);
}

static int capture_future_continuation(future_t *ft, void **storage)
  XFORM_SKIP_PROC
/* This function explicitly cooperates with the GC by storing the
   pointers it needs to save across a collection in `storage', so
   it can be used in a future thread. If future-thread-local 
   allocation fails, the result is 0.

   It also grabs the future-modification lock as needed to modify the
   future. */
{
  Scheme_Lightweight_Continuation *lw;
  Scheme_Object **arg_S;

  storage[2] = ft;

  lw = scheme_capture_lightweight_continuation(ft->arg_p, ft->lwc, storage);
  if (!lw) return 0;
       
  ft = (future_t *)storage[2];

  ft->suspended_lw = lw;
  ft->maybe_suspended_lw = 1;
  
  ft->want_lw = 0;
  ft->fts->thread->current_ft = NULL; /* tells worker thread that it no longer
                                         needs to handle the future */
  

  if (ft->arg_S0) {
    arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S0);
    ft->arg_S0 = arg_S;
  }
  if (ft->arg_S1) {
    arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S1);
    ft->arg_S1 = arg_S;
  }
  if (ft->arg_S2) {
    arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S2);
    ft->arg_S2 = arg_S;
  }

  return 1;
}

void scheme_check_future_work()
/* Called in the runtime thread by the scheduler */
{
  /* Check for work that future threads need from the runtime thread
     and that can be done in any Scheme thread (e.g., get a new page
     for allocation). */
  future_t *ft, *other_ft;
  Scheme_Future_State *fs = scheme_future_state;
  int more;

  if (!fs) return;

  flush_future_logs(fs);

  check_future_thread_creation(fs);

  more = 1;
  while (more) {
    /* Try to get a future waiting on a atomic operation */
    mzrt_mutex_lock(fs->future_mutex);
    ft = fs->future_waiting_atomic;
    if (ft) {
      fs->future_waiting_atomic = ft->next_waiting_atomic;
      ft->next_waiting_atomic = NULL;
      ft->in_queue_waiting_for_lwc = 0;
      if ((ft->status == WAITING_FOR_PRIM) && ft->rt_prim_is_atomic) {
        ft->status = HANDLING_PRIM;
        ft->want_lw = 0; /* we expect to handle it quickly,
                            so the future thread should just wait */
      } else
        ft = NULL;
      more = 1;
    } else
      more = 0;
    mzrt_mutex_unlock(fs->future_mutex);

    if (ft)
      invoke_rtcall(fs, ft, 1);
  }

  more = 1;
  while (more) {
    /* Try to get a future that's waiting to touch another future: */
    mzrt_mutex_lock(fs->future_mutex);
    ft = fs->future_waiting_touch;
    if (ft) {
      fs->future_waiting_touch = ft->next_waiting_touch;
      ft->next_waiting_touch = NULL;
      other_ft = get_future_for_touch(ft);
      more = 1;
    } else {
      other_ft = NULL;
      more = 0;
    }
    mzrt_mutex_unlock(fs->future_mutex);

    if (other_ft) {
      /* Chain to `ft' from `other_ft': */
      Scheme_Object *wb, *pr;
      int was_done;
      
      wb = scheme_make_weak_box((Scheme_Object *)ft);
      pr = scheme_make_pair(wb, scheme_null);

      mzrt_mutex_lock(fs->future_mutex);
      if (other_ft->status == FINISHED) {
        /* Completed while we tried to allocated a chain link. */
        ft->status = HANDLING_PRIM;
        ft->want_lw = 0;
        was_done = 1;
      } else {
        /* enqueue */
        if (other_ft->touching)
          SCHEME_CDR(pr) = other_ft->touching;
        other_ft->touching = pr;
        was_done = 0;
      }
      mzrt_mutex_unlock(fs->future_mutex);

      if (was_done) {
        /* other_ft is done: */
        direct_future_to_future_touch(fs, other_ft, ft);
      }
    }
  }

  while (1) {
    /* Try to get a future waiting to be suspended */
    mzrt_mutex_lock(fs->future_mutex);
    ft = fs->future_waiting_lwc;
    if (ft) {
      fs->future_waiting_lwc = ft->next_waiting_lwc;
      ft->next_waiting_lwc = NULL;
      if (!ft->want_lw)
        ft = NULL;
    }
    mzrt_mutex_unlock(fs->future_mutex);

    if (ft) {
      void *storage[3];

      if (capture_future_continuation(ft, storage)) {
        /* Signal the waiting worker thread that it
           can continue doing other things: */
        mzrt_mutex_lock(fs->future_mutex);
        if (ft->can_continue_sema) {
          mzrt_sema_post(ft->can_continue_sema);
          ft->can_continue_sema = NULL;
        }
        mzrt_mutex_unlock(fs->future_mutex);
      }
    } else
      break;
  }

  /* If any future thread has its fuel revoked (must have been a custodian
     shutdown) but doesn't have a future (shutdown future must have been
     handled), then we can restore the thread's fuel. Races are
     possible, but they should be rare, and they lead at worst to bad
     performance. */
  {
    int i;
    for (i = 0; i < fs->thread_pool_size; i++) { 
      if (fs->pool_threads[i]) {
        if (!*(fs->pool_threads[i]->fuel_pointer)
            && !fs->pool_threads[i]->thread->current_ft) {
          *(fs->pool_threads[i]->fuel_pointer) = 1;
          *(fs->pool_threads[i]->stack_boundary_pointer) -= INITIAL_C_STACK_SIZE;
        }
      }
    }
  }
}

static void future_do_runtimecall(Scheme_Future_Thread_State *fts,
                                  void *func,
                                  int is_atomic,
                                  int can_suspend)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  future_t *future;
  Scheme_Future_State *fs = scheme_future_state;
  void *storage[3];
  int insist_to_suspend, prefer_to_suspend, fid;

  /* Fetch the future descriptor for this thread */
  future = fts->thread->current_ft;

  scheme_fill_lwc_end();
  future->lwc = scheme_current_lwc;
  future->fts = fts;
  future->arg_p = scheme_current_thread;

  fid = future->id;

  /* Policy question: When should the future thread suspend
     the current future? It costs something to suspend and
     resume a future.
     The current policy:
     Always suspend for a non-atomic (i.e, "unsafe") operation,
     because there's no guarantee that `touch' will allow progress
     anytime soon. For atomic operations, only suspend if there's
     more work available in the future queue, and only if we
     can suspend ourselves (because asking the runtime thread
     to suspend wouldn't accomplish anything). */
  insist_to_suspend = !is_atomic;
  prefer_to_suspend = (insist_to_suspend || fs->future_queue_count);

  if (!scheme_custodian_is_available(future->cust)) {
    insist_to_suspend = 1;
    prefer_to_suspend = 1;
  }

  if (!can_suspend) {
    insist_to_suspend = 0;
    prefer_to_suspend = 0;
  }

  if (prefer_to_suspend
      && GC_gen0_alloc_page_ptr
      && capture_future_continuation(future, storage)) {
    /* this future thread will suspend handling the future
       continuation until the result of the blocking call is ready;
       fts->thread->current_ft was set to NULL */
  }

  mzrt_mutex_lock(fs->future_mutex);

  if (func == touch) {
    record_fevent(FEVENT_RTCALL_TOUCH, fid);
  } else {
    record_fevent(is_atomic ? FEVENT_RTCALL_ATOMIC : FEVENT_RTCALL, fid);
  }

  /* Set up the arguments for the runtime call
     to be picked up by the main rt thread */
  future->prim_func = func;
  future->rt_prim_is_atomic = is_atomic;
  future->status = WAITING_FOR_PRIM;

  if (is_atomic) {
    future->next_waiting_atomic = fs->future_waiting_atomic;
    fs->future_waiting_atomic = future;
  }

  if (fts->thread->current_ft) {
    if (insist_to_suspend) {
      /* couldn't capture the continuation locally, so ask
         the runtime thread to capture it: */
      if (!future->in_queue_waiting_for_lwc) { 
        future->next_waiting_lwc = fs->future_waiting_lwc;
        fs->future_waiting_lwc = future;
        future->in_queue_waiting_for_lwc = 1;
      }

      future->want_lw = 1;
    }
  }

  if (func == touch) {
    if (!future->in_touch_queue) {
      /* Ask the runtime thread to put this future on the queue
         of the future being touched: */
      future->next_waiting_touch = fs->future_waiting_touch;
      fs->future_waiting_touch = future;
    } else {
      future->in_touch_queue = 0; /* done with back-door argument */
    }
  }

  scheme_signal_received_at(fs->signal_handle);

  if (fts->thread->current_ft) {
    /* Wait for the signal that the RT call is finished
       or a lightweight continuation has been captured: */
    future->can_continue_sema = fts->worker_can_continue_sema;
    end_gc_not_ok(fts, fs, MZ_RUNSTACK); /* we rely on this putting MZ_CONT_MARK_STACK into the thread record */
    mzrt_mutex_unlock(fs->future_mutex);

    mzrt_sema_wait(fts->worker_can_continue_sema);

    mzrt_mutex_lock(fs->future_mutex);
    start_gc_not_ok(fs);    
  }

  /* Fetch the future instance again, in case the GC has moved the pointer
     or the future has been requeued. */
  future = fts->thread->current_ft;

  if (future) {
    future->want_lw = 0;
    if (future->no_retval) {
      record_fevent(FEVENT_RTCALL_ABORT, fid);
      future->status = FINISHED;
    } else {
      record_fevent(FEVENT_RTCALL_RESULT, fid);
      future->status = RUNNING;
    }
  } else {
    record_fevent(FEVENT_RTCALL_SUSPEND, fid);
  }

  mzrt_mutex_unlock(fs->future_mutex);

  if (!future) {
    /* future continuation was requeued */
    scheme_future_longjmp(*scheme_current_thread->error_buf, 1);
  } else if (future->no_retval) {
    /* there was an error => abort the future */
    future->no_retval = 0;
    scheme_future_longjmp(*scheme_current_thread->error_buf, 1);
  }
}

/**********************************************************************/
/* Functions for primitive invocation          			      */
/**********************************************************************/
void scheme_wrong_type_from_ft(const char *who, const char *expected_type, int what, int argc, Scheme_Object **argv) 
  XFORM_SKIP_PROC 
/* Called in future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  future_t *future = fts->thread->current_ft;

  future->prim_protocol = SIG_WRONG_TYPE_EXN;
  future->arg_str0 = who;
  future->arg_str1 = expected_type;
  future->arg_i2 = what;
  future->arg_i3 = argc;
  future->arg_S4 = argv;

  future->time_of_request = get_future_timestamp();
  future->source_of_request = who;
  future_do_runtimecall(fts, (void*)scheme_wrong_type, 0, 1);
  
  /* Fetch the future again, in case moved by a GC */ 
  future = fts->thread->current_ft;
}

Scheme_Object **scheme_rtcall_on_demand(const char *who, int src_type, prim_on_demand_t f, Scheme_Object **argv)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  future_t *future = fts->thread->current_ft;

  future->prim_protocol = SIG_ON_DEMAND;

  if ((MZ_RUNSTACK + 2) != argv) {
    fprintf(stderr, "internal error: expected arguments on runstack");
    abort();
  }

  future->arg_S0 = MZ_RUNSTACK;

  future->time_of_request = get_future_timestamp();
  future->source_of_request = who;
  future->source_type = src_type;

  future_do_runtimecall(fts, (void*)f, 1, 1);

  /* Fetch the future again, in case moved by a GC */ 
  future = fts->thread->current_ft;

  future->arg_S0 = NULL;
  future->retval_is_rs_argv = 0;

  return MZ_RUNSTACK + 2;
}

Scheme_Object *scheme_rtcall_make_fsemaphore(const char *who, int src_type, Scheme_Object *ready)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  Scheme_Object *retval;
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  future_t *future = fts->thread->current_ft;
  int is_atomic;
  
  future->prim_protocol = SIG_MAKE_FSEMAPHORE;
  future->arg_s1 = ready;
  future->time_of_request = get_future_timestamp();
  future->source_of_request = who;
  future->source_type = src_type;

  /* conservative check for when creation can succeed atomically: */
  if (SCHEME_INT_VAL(ready) 
      && (SCHEME_INT_VAL(ready) >= 0)
      && (SCHEME_INT_VAL(ready) < 1024))
    is_atomic = 1;
  else
    is_atomic = 0;
  
  future_do_runtimecall(fts, (void*)scheme_make_fsemaphore_inl, is_atomic, 1);

  /* Fetch the future again, in case moved by a GC */ 
  future = fts->thread->current_ft;

  retval = future->retval_s;
  future->retval_s = NULL;

  return retval;
}

Scheme_Object *scheme_rtcall_make_future(const char *who, int src_type, Scheme_Object *proc)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  Scheme_Object *retval;
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  future_t *future = fts->thread->current_ft;
  int is_atomic = 0;
  
  if (SAME_TYPE(SCHEME_TYPE(proc), scheme_native_closure_type)
      && scheme_native_arity_check(proc, 0)) {
    is_atomic = 1;
  }
  
  future->prim_protocol = SIG_FUTURE;
  future->arg_s1 = proc;
  future->time_of_request = get_future_timestamp();
  future->source_of_request = who;
  future->source_type = src_type;
  
  future_do_runtimecall(fts, (void*)scheme_future, is_atomic, 1);

  /* Fetch the future again, in case moved by a GC */ 
  future = fts->thread->current_ft;

  retval = future->retval_s;
  future->retval_s = NULL;

  return retval;  
}

void scheme_rtcall_allocate_values(const char *who, int src_type, int count, Scheme_Thread *t, 
                                   prim_allocate_values_t f)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  future_t *future = fts->thread->current_ft;

  future->prim_protocol = SIG_ALLOC_VALUES;

  future->arg_i0 = count;
  future->arg_s0 = (Scheme_Object *)t;

  future->time_of_request = get_future_timestamp();
  future->source_of_request = who;
  future->source_type = src_type;

  future_do_runtimecall(fts, (void*)f, 1, 1);

  /* Fetch the future again, in case moved by a GC */ 
  future = fts->thread->current_ft;

  future->arg_s0 = NULL;
}

#ifdef MZ_PRECISE_GC

uintptr_t scheme_rtcall_alloc(const char *who, int src_type)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  future_t *future;
  uintptr_t retval;
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;
  intptr_t align, sz;
  
  align = GC_alloc_alignment();

  /* Do we actually still have space? */
  if (fts->gen0_start) {
    intptr_t cur;
    cur = GC_gen0_alloc_page_ptr;
    if (cur < (GC_gen0_alloc_page_end - align)) {
      if (cur & (align - 1)) {
        /* round up to next page boundary */
        cur &= ~(align - 1);
        cur += align;
      }
      cur += fts->gen0_initial_offset;
      return cur;
    }
  }

  /* Grow nursery size as long as we don't trigger a GC */
  if (fts->gen0_size < 16)
    fts->gen0_size <<= 1;

  while (1) {
    future = fts->thread->current_ft;
    future->time_of_request = get_future_timestamp();
    future->source_of_request = who;
    future->source_type = src_type;
  
    future->prim_protocol = SIG_ALLOC;
    future->arg_i0 = fts->gen0_size;

    future_do_runtimecall(fts, (void*)GC_make_jit_nursery_page, 1, 0);

    future = fts->thread->current_ft;
    retval = future->alloc_retval;
    sz = future->alloc_sz_retval;
    future->alloc_retval = 0;

    if (fts->worker_gc_counter == future->alloc_retval_counter) {
      fts->gen0_start = retval;
      fts->gen0_initial_offset = retval & (align - 1);
      break;
    }
  }

  GC_gen0_alloc_page_end = retval + sz;

  return retval;
}

#endif

void scheme_rtcall_new_mark_segment(Scheme_Thread *p)
  XFORM_SKIP_PROC
/* Called in future thread */
{
  future_t *future;
  Scheme_Future_Thread_State *fts = scheme_future_thread_state;

  future = fts->thread->current_ft;
  future->time_of_request = get_future_timestamp();
  future->source_of_request = "[allocate_mark_segment]";
  future->source_type = FSRC_OTHER;
  
  future->prim_protocol = SIG_ALLOC_MARK_SEGMENT;
  future->arg_s0 = (Scheme_Object *)p;
  
  future_do_runtimecall(fts, (void*)scheme_new_mark_segment, 1, 0);
}

static int push_marks(future_t *f, Scheme_Cont_Frame_Data *d)
{
  if (f->suspended_lw) {
    return scheme_push_marks_from_lightweight_continuation(f->suspended_lw, d);
  } else if (f->arg_p) {
    return scheme_push_marks_from_thread(f->arg_p, d);
  }

  return 0;
}

static void pop_marks(Scheme_Cont_Frame_Data *d)
{
  scheme_pop_continuation_frame(d);
}

static void receive_special_result(future_t *f, Scheme_Object *retval, int clear)
  XFORM_SKIP_PROC
/* Called in future or runtime thread */
{
  if (SAME_OBJ(retval, SCHEME_MULTIPLE_VALUES)) {
    Scheme_Thread *p = scheme_current_thread;

    p->ku.multiple.array = f->multiple_array;
    p->ku.multiple.count = f->multiple_count;
    if (clear)
      f->multiple_array = NULL;
  } else if (SAME_OBJ(retval, SCHEME_TAIL_CALL_WAITING)) {
    Scheme_Thread *p = scheme_current_thread;

    p->ku.apply.tail_rator = f->tail_rator;
    p->ku.apply.tail_rands = f->tail_rands;
    p->ku.apply.tail_num_rands = f->num_tail_rands;
    if (clear) {
      f->tail_rator = NULL;
      f->tail_rands = NULL;
    }
  }
}

#include "jit_ts_future_glue.c"

static void send_special_result(future_t *f, Scheme_Object *retval)
  XFORM_SKIP_PROC
/* Called in future or runtime thread */
{
  if (SAME_OBJ(retval, SCHEME_MULTIPLE_VALUES)) {
    Scheme_Thread *p = scheme_current_thread;

    f->multiple_array = p->ku.multiple.array;
    f->multiple_count = p->ku.multiple.count;
    if (SAME_OBJ(p->ku.multiple.array, p->values_buffer))
      p->values_buffer = NULL;
    p->ku.multiple.array = NULL;
  } else if (SAME_OBJ(retval, SCHEME_TAIL_CALL_WAITING)) {
    Scheme_Thread *p = scheme_current_thread;

    f->tail_rator = p->ku.apply.tail_rator;
    f->tail_rands = p->ku.apply.tail_rands;
    f->num_tail_rands = p->ku.apply.tail_num_rands;
    p->ku.apply.tail_rator = NULL;
    p->ku.apply.tail_rands = NULL;
  }
}

#define ADJUST_RS_ARG(ft, arg_Sx) if (ft->suspended_lw) arg_Sx = scheme_adjust_runstack_argument(ft->suspended_lw, arg_Sx)

/* Does the work of actually invoking a primitive on behalf of a 
   future.  This function is always invoked on the main (runtime) 
   thread. */
static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future, int is_atomic)
/* Called in runtime thread */
{
  Scheme_Cont_Frame_Data mark_d;
  int need_pop;

#ifdef DEBUG_FUTURES
  g_rtcall_count++;
#endif

  if (scheme_log_level_p(scheme_main_logger, SCHEME_LOG_DEBUG)) {
    const char *src;

    src = future->source_of_request;
    if (future->source_type == FSRC_RATOR) {
      int len;
      if (SCHEME_PROCP(future->arg_s0)) {
        const char *src2;
        src2 = scheme_get_proc_name(future->arg_s0, &len, 1);
        if (src2) src = src2;
      }
    } else if (future->source_type == FSRC_PRIM) {
      const char *src2;
      src2 = scheme_look_for_primitive(future->prim_func);
      if (src2) src = src2;
    }

    flush_future_logs(fs);
   
    /* use lg_future_event so we can include `str' in the message: */
    log_future_event(fs,
                     "future %d, process %d: %s: %s; time: %f",
                     src,
                     -1, 
                     (is_atomic ? FEVENT_HANDLE_RTCALL_ATOMIC : FEVENT_HANDLE_RTCALL),
                     get_future_timestamp(),
                     future->id);
  }

  if ((future->source_type == FSRC_RATOR)
      || (future->source_type == FSRC_MARKS)
      || (future->source_type == FSRC_PRIM))
    need_pop = push_marks(future, &mark_d);
  else
    need_pop = 0;
  future->arg_p = NULL;

  switch (future->prim_protocol)
    {
    case SIG_ON_DEMAND:
      {
        prim_on_demand_t func = (prim_on_demand_t)future->prim_func;
        GC_CAN_IGNORE Scheme_Object **arg_S0 = future->arg_S0;

        future->arg_S0 = NULL;

        ADJUST_RS_ARG(future, arg_S0);

        func(arg_S0, arg_S0 + 2);

        future->retval_is_rs_argv = 1;

        break;
      }
#ifdef MZ_PRECISE_GC
    case SIG_ALLOC:
      {
        uintptr_t ret, sz;
        int amt = future->arg_i0;
        ret = GC_make_jit_nursery_page(amt, &sz);
        future->alloc_retval = ret;
        future->alloc_sz_retval = sz;
        future->alloc_retval_counter = scheme_did_gc_count;
        break;
      }
#endif
    case SIG_ALLOC_MARK_SEGMENT:
      {
        GC_CAN_IGNORE Scheme_Thread *p_seg;
        p_seg = (Scheme_Thread *)future->arg_s0;
        future->arg_s0 = NULL;
        scheme_new_mark_segment(p_seg);
        break;
      }
    case SIG_MAKE_FSEMAPHORE: 
      {
        Scheme_Object *s = future->arg_s1; 
        future->arg_s1 = NULL;
        s = scheme_make_fsemaphore_inl(s);
        future->retval_s = s;
        break;
      }
    case SIG_FUTURE: 
      {
        Scheme_Object *s = future->arg_s1; 
        future->arg_s1 = NULL;
        s = make_future(s);
        future->retval_s = s;
        break;
      }
    case SIG_ALLOC_VALUES:
      {
        prim_allocate_values_t func = (prim_allocate_values_t)future->prim_func;
        GC_CAN_IGNORE Scheme_Object *arg_s0 = future->arg_s0;

        future->arg_s0 = NULL;

        func(future->arg_i0, (Scheme_Thread *)arg_s0);

        break;
      }
    case SIG_WRONG_TYPE_EXN:
      {
        const char *who;
        const char *expected_type;
        int what;
        int argc;
        Scheme_Object **argv;

        who = future->arg_str0;
        expected_type = future->arg_str1;
        what = future->arg_i2;
        argc = future->arg_i3;
        argv = future->arg_S4;
        
        future->arg_str0 = NULL;
        future->arg_str1 = NULL;
        future->arg_S4 = NULL;

        ADJUST_RS_ARG(future, argv);

        scheme_wrong_type(who, expected_type, what, argc, argv);

        /* doesn't return */

        break;
      }
# define JIT_TS_LOCALIZE(t, f) GC_CAN_IGNORE t f = future->f
# include "jit_ts_runtime_glue.c"
    default:
      scheme_signal_error("unknown protocol %d", future->prim_protocol);
      break;
    }

  if (need_pop)
    pop_marks(&mark_d);

  record_fevent(FEVENT_HANDLE_RTCALL_RESULT, future->id);

  mzrt_mutex_lock(fs->future_mutex);
  complete_rtcall(fs, future);
  mzrt_mutex_unlock(fs->future_mutex);
}

typedef Scheme_Object *(*overflow_k_t)(void);

static void *do_invoke_rtcall_k(void)
{
  Scheme_Thread *p = scheme_current_thread;
  Scheme_Future_State *fs = (Scheme_Future_State *)p->ku.k.p1;
  future_t *future = (future_t *)p->ku.k.p2;

#ifdef DO_STACK_CHECK
  {
# include "mzstkchk.h"
    return scheme_handle_stack_overflow((overflow_k_t)do_invoke_rtcall_k);
  }
#endif

  p->ku.k.p1 = NULL;
  p->ku.k.p2 = NULL;
  
  do_invoke_rtcall(fs, future, p->ku.k.i1);

  return scheme_void;
}

static void invoke_rtcall(Scheme_Future_State * volatile fs, future_t * volatile future,
                          volatile int is_atomic)
{
  Scheme_Thread *p = scheme_current_thread;
  mz_jmp_buf newbuf, * volatile savebuf;

  savebuf = p->error_buf;
  p->error_buf = &newbuf;
  if (scheme_setjmp(newbuf)) {
    record_fevent(FEVENT_HANDLE_RTCALL_ABORT, future->id);
    mzrt_mutex_lock(fs->future_mutex);
    future->no_retval = 1;
    if (future->suspended_lw) {
      /* Abandon the future */
      future->status = FINISHED;
      future->retval = 0;
      future->suspended_lw = NULL;
      mzrt_mutex_unlock(fs->future_mutex);
    } else {
      /* Signal the waiting worker thread that it
         can continue running machine code */
      mzrt_sema_post(future->can_continue_sema);
      future->can_continue_sema = NULL;
      mzrt_mutex_unlock(fs->future_mutex);
    }
    if (is_atomic) {
      scheme_log_abort("internal error: failure during atomic");
      abort();
    }
    scheme_longjmp(*savebuf, 1);
  } else {
    if (future->rt_prim_is_atomic) {
      do_invoke_rtcall(fs, future, is_atomic);
    } else {
      /* call with continuation barrier. */
      p->ku.k.p1 = fs;
      p->ku.k.p2 = future;
      p->ku.k.i1 = is_atomic;

      (void)scheme_top_level_do(do_invoke_rtcall_k, 1);
    }
  }
  p->error_buf = savebuf;
}


/**********************************************************************/
/* Helpers for manipulating the futures queue                         */
/**********************************************************************/

future_t *enqueue_future(Scheme_Future_State *fs, future_t *ft)
/* Called in runtime thread */
{
  if (fs->future_queue_end) {
    fs->future_queue_end->next = ft;
    ft->prev = fs->future_queue_end;
  }
  fs->future_queue_end = ft;
  if (!fs->future_queue)
    fs->future_queue = ft;
  fs->future_queue_count++;
  
  /* Signal that a future is pending */
  mzrt_sema_post(fs->future_pending_sema);
  
  return ft;
}

future_t *get_pending_future(Scheme_Future_State *fs)
  XFORM_SKIP_PROC
/* Called in future thread with lock held */
{
  future_t *f;

  while (1) {
    f = fs->future_queue;
    if (f) {
      dequeue_future(fs, f);
      if (!scheme_custodian_is_available(f->cust)) {
        f->status = SUSPENDED;
      } else {
        return f;
      }
    } else
      return NULL;
  }
}

#endif

/**********************************************************************/
/*                           Precise GC                               */
/**********************************************************************/

#ifdef MZ_PRECISE_GC

START_XFORM_SKIP;

#include "mzmark_future.inc"

static void register_traversers(void)
{
#ifdef MZ_USE_FUTURES
  GC_REG_TRAV(scheme_future_type, future);
  GC_REG_TRAV(scheme_fsemaphore_type, fsemaphore);
#else
  GC_REG_TRAV(scheme_future_type, sequential_future);
  GC_REG_TRAV(scheme_fsemaphore_type, sequential_fsemaphore);
#endif
}

END_XFORM_SKIP;

#endif
