/* Copyright 2002, Jonathan S. Shapiro.
   All Rights Reserved.

   XDCS is a quasi-archiver based on the XDelta copy/insert algorithm
   published by Josh MacDonald.

   XDCS differs from the XDelta in several ways:

   1. It is not intended for use as a streaming delta
      protocol. Therefore, it does limit back-searching when expanding
      matches.

   2. It is intended for use in the same scenarios as RCS/SCCS. It
      therefore assumes that the sequence of inputs is inherently
      self-similar.

   3. The original implementation encodes by taking two versions
      (/base/, /new/) and generating a sequence of copy and insert
      operations relative to /base/ that produce /new/. That is, it is
      conceived as producing one version from another. This
      implementation redefines /base/ to be the entire previous
      insertion history. See the notes below for details of this.

   4. Josh's implementation clobbers hash entries when hashes
      collide. Since the hashes are of relatively short sequences this
      can penalize things quite a bit. We keep all collisions (at
      least for now -- need to experiment) and use the "longest match"
      rule.

   Classical delta algorithms use one of the following strategies:

   a) forward deltas: start from a baseline and then add compute all
      deltas relative to existing baseline.

      PRO: Simple to do.
      PRO: Existing entries never rewritten.
      PRO: Any version can be extracted in time that is linear in
           sizeof(baseline) + sizeof(encoded delta).
      CON: Inserts grow progressively larger, as new content is
	   progressively added.

      The insert problem is usually solved by periodically storing
      a save point and then restarting the sequence from there.

   b) backward deltas: most recent entry is complete, all others are
      computed relative to the most recent.

      PRO: Most recent deltas come out quickly.
      CON: All deltas must be re-encoded every time an insert is
           done, or sequential decoding must be done to extract old
           versions. 
      CON: Because of the sequential decoding requirement, the
           memory required is on the same order as that of two
           entries in the file.

   c) interleaved deltas, in which all encodings are kept in some
      form of weighted tree.

      PRO: Any version extracs in roughly NlogN time, N the size of
           the version.
      CON: I don't know how to do this.
      CON: The algorithms are complicated enough that they do not
           promote confidence.

   The main downside to the forward delta scheme is that version N+1
   is likely to be most similar to version N. Computing a strictly
   forward delta therefore leads to progressively longer diffs.

   Because Xdelta is a copy+insert scheme, the algorithm for archives
   can use a minor *variant* on the forward delta scheme.

   Each entry i in the archive can be thought of as an "insert buffer"
   S_i consisting of bytes and a "command string "C_i". The command
   string is a sequence of operations, each of which is a copy or an
   insert. The "copy" commands copy byte sequences from the base
   buffer, while the insert commands copy successive byte sequences
   from the insert buffer.

   EXPERIMENTS:

   There are two optimizations on this that I plan to try. The first
   takes advantage of self-similarity in any given file (e.g. C
   code). The second takes advantage of self-similarity across the
   entire archive.

   First, note that the *initial* entry in the archive normally
   consists of an insert string that is the entire initial file and a
   single insert command inserting this buffer into the output.

   Proposal 1: Redefine that for any given entry in the archive the
   /base/ buffer should be defined as a (normalized) concatenation of
   all previous insert buffers. This allows later insertions to draw
   their copies from both earlier and later versions, and *should*
   preserve the advantages of backward deltas without introducing the
   extra associated regeneration costs.

   This ought to be pretty simple.

   Proposal 2: More work, but possibly interesting, is to revise the
   construction algorithm such that it considers the insertion buffer
   to be a sequence of individual insertions si_0, si_1,
   si_2. Following each insertion i it appends the resulting si_i to
   the /base/ string, allowing subsequent copy operations to
   self-reference the insertion buffer and thereby achieving some
   degree of compression.

   Proposal 3: Use a slightly trickier encoding of inserts to take
   advantage of short offsets.
 */

#include <opencm.h>

/* #define PARANOID */
/* #define HASH_TEST */
/* #define VERBOSE */

#define SHORT_INS  0u
#define SHORT_COPY 0x40u
#define LONG_INS   0x80u
#define MID_INS    0x81u
#define LONG_COPY  0xC0u
#define MID_COPY   0xC1u
#define ISCOPY(cmd) (cmd & 0x40u)
#define CMDTY(len) ((len < 64) ? 'S' : ((len < 65536) ? 'M' : 'L'))

#define LG_CHUNKSZ  4
#define CHUNKSZ     16
#define SKIPSZ      CHUNKSZ

SXDelta *
sxdelta_create(Buffer *cmd, Buffer *ins)
{
  SXDelta *xd = (SXDelta *) GC_MALLOC(sizeof(SXDelta));
  ser_init(xd, &SXDelta_SerType, SXDelta_SerType.ver);
  SER_MODIFIED(xd);

  xd->ins = ins;
  xd->cmd = cmd;

  return xd;
}

void
sxdelta_serialize(SDR_stream *strm, const void *vp)
{
  const SXDelta *sxd = vp;

  sdr_write("cmd", strm, sxd->cmd);
  sdr_write("ins", strm, sxd->ins);
}

void *
sxdelta_deserialize(const DeserializeInfo *di, SDR_stream *strm)
{
  SXDelta *sxd = (SXDelta *) GC_MALLOC(sizeof(SXDelta));
  ser_init(sxd, &SXDelta_SerType, di->ver);

  sxd->cmd = sdr_read("cmd", strm);
  sxd->ins = sdr_read("ins", strm);

  SER_MODIFIED(sxd);
  return sxd;
}

OC_bool
sxdelta_check(const void *v)
{
  return TRUE;
}

void
sxdelta_show(const void *vp)
{
  const SXDelta *sxd = vp;

  report(0, "Command bytes:  %s\n", 
	 xunsigned64_str(buffer_length(sxd->cmd)));
  report(0, "Insert bytes:   %s\n",
	 xunsigned64_str(buffer_length(sxd->ins)));
}

void
sxdelta_mark(Repository *r, const void *container,
	    const void *ob, rbtree *memObs)
{
  /* do nothing */
}


typedef struct hashpos_t {
  phash_t hash;
  size_t offset;
} hashpos_t ;

typedef struct hashvec_t {
  unsigned  nhash;
  hashpos_t *hash;		/* array */
} hashvec_t;

static int
cmp_hash(const void *v1, const void *v2)
{
  const hashpos_t *bh1 = (const hashpos_t *) v1;
  const hashpos_t *bh2 = (const hashpos_t *) v2;

  assert(bh1 != 0);
  assert(bh2 != 0);

  if (bh1->hash < bh2->hash)
    return -1;

  if (bh1->hash > bh2->hash)
    return 1;
  
  return 0;
}

static int
cmp_hashkey(const void *vkey, const void *vmember)
{
  const hashpos_t *member = (const hashpos_t *) vmember;
  const phash_t key = * ((const phash_t *) vkey);

  if (key < member->hash)
    return -1;
  if (key > member->hash)
    return 1;
  
  return 0;
}

static hashvec_t *
compute_base_hashes(Buffer *buf)
{
  unsigned i;
  ocmoff_t bufLen = buffer_length(buf);

#ifdef HASH_TEST
  phash_t phash = 0;		/* for the progressive hash */
#endif
  phash_t hash = 0;		/* for the normal hash */
  hashvec_t *bh;

  assert(SKIPSZ >= CHUNKSZ);

  bh = GC_MALLOC(sizeof(hashvec_t));
  bh->nhash = bufLen - (bufLen % CHUNKSZ); /* chop residual */
  bh->nhash = bufLen / SKIPSZ;
  bh->hash = GC_MALLOC_ATOMIC(sizeof(hashpos_t) * bh->nhash);

  if (bh->nhash == 0)
    return bh;

  for (i = 0; i < bh->nhash; i++) {
    unsigned char sxdbuf[CHUNKSZ];

    buffer_read(buf, sxdbuf, i * SKIPSZ, CHUNKSZ);

    hash = psum(sxdbuf, CHUNKSZ);

    bh->hash[i].offset = i * SKIPSZ;
    bh->hash[i].hash = hash;

#ifdef HASH_TEST
    if (i == 0) {
      phash = hash;
    }
    else {
      unsigned j;

      for (j = 0; j < CHUNKSZ; j++)
	phash = sxd_progressive_sum(phash, 
				    buffer_getc(buf, i*SKIPSZ + j-CHUNKSZ),
				    buffer_getc(buf, i*SKIPSZ + j));

      if (phash != hash)
	log_trace(ERR_SXDELTA, "Progressive 0x%x, base 0x%x\n", phash, hash);
    }
#endif
  }

  qsort(bh->hash, bh->nhash, sizeof(hashpos_t), cmp_hash);

  log_trace(DBG_SXDELTA, "# hashes: %d\n", bh->nhash);

  return bh;
}

static hashpos_t *
find_first_matching_hash(hashvec_t *bh, phash_t key)
{
  hashpos_t *hp;
  hp = xbsearch(&key, bh->hash, bh->nhash, sizeof(*bh->hash), cmp_hashkey);

  if (hp == 0)
    return hp;

  while (bh->hash != hp && hp[-1].hash == hp->hash)
    hp --;

  return hp;
}

typedef struct match_t {
  off_t src_start;		/* inclusive */
  off_t src_end;		/* exclusive */
  off_t dst_start;		/* inclusive */
  off_t dst_end;		/* exclusive */
  size_t len;
} match_t;

static void
expand_match(match_t *match, 
	     Buffer *src, 
	     Buffer *dst, off_t floor)
{
  /* See if the range discovered by the hash actually matches: */
  unsigned u;
  for (u = 0; match->src_start + u < match->src_end; u++) {
    if (buffer_getc(src, match->src_start + u) 
	!= buffer_getc(dst, match->dst_start + u)) {
      match->len = 0;
      return;
    }
  }

  /* Extend backwards, but not past floor */
  while (match->src_start > 0 && match->dst_start > floor) {

    if (buffer_getc(src, match->src_start-1) 
	== buffer_getc(dst, match->dst_start-1)) {
      match->src_start--;
      match->dst_start--;

      match->len ++;

      continue;
    }

    break;
  }

  {
    ocmoff_t srcLen = buffer_length(src);
    ocmoff_t dstLen = buffer_length(dst);

    while(match->src_end < srcLen && match->dst_end < dstLen) {
      if (buffer_getc(src, match->src_end) 
	  == buffer_getc(dst, match->dst_end)) {
	match->src_end++;
	match->dst_end++;

	match->len ++;
	continue;
      }
      break;
    }
  }

  assert(match->len == match->dst_end - match->dst_start);
}

#define TAKE_BEST_MATCH
/* #define TAKE_LAST_MATCH */

/* NOTE: At the moment this does NOT actually find the best match. It
   finds the *last* match. This is equivalent to the behavior of the
   MacDonald XDelta implementation, though XDelta accomplishes the
   same result by simply throwing away the earlier candidates. */
static OC_bool
find_best_match(hashvec_t *bh, phash_t key, match_t *match,
		Buffer *src, Buffer *dst, off_t floor, 
		off_t pos)
{
  hashpos_t *found;

  found = find_first_matching_hash(bh, key);
  if (!found)
    return FALSE;

  match->dst_start = pos;
  match->dst_end = pos+CHUNKSZ;
  match->src_start = found->offset;
  match->src_end = found->offset + CHUNKSZ;
  match->len = CHUNKSZ;

  expand_match(match, src, dst, floor);

  /* Matching hash found. We now have the correct hash, but the
     actual characters may not match due to hash collision. Also, this
     match needs to be expanded backwards and forwards to maximize the
     match length. */

#if defined(TAKE_BEST_MATCH)
  /* A match is better if (a) found->offset is reduced by using it or
     (b) the total length is longer and the offset doesn't get any
     worse. */
 {
   hashpos_t *last = bh->hash + bh->nhash - 1;

   while (found != last && found[1].hash == found[0].hash) {
      match_t m;

      found++;
      m.dst_start = pos;
      m.dst_end = pos+CHUNKSZ;
      m.src_start = found->offset;
      m.src_end = found->offset + CHUNKSZ;
      m.len = CHUNKSZ;

      expand_match(&m, src, dst, floor);

      /* Take earlier (expanded) match: */
      if (m.len > 0 && m.dst_start < match->dst_start)
	memcpy(match, &m, sizeof(m));
      /* Then take longer match */
      else if (m.dst_start == match->dst_start && m.len > match->len)
	memcpy(match, &m, sizeof(m));
   }
 }
#elif defined(TAKE_LAST_MATCH)
  {
    hashpos_t *last = bh->hash + bh->nhash - 1;

    while (found != last && found[1].hash == found[0].hash) {
      match_t m;

      found++;
      m.dst_start = pos;
      m.dst_end = pos+CHUNKSZ;
      m.src_start = found->offset;
      m.src_end = found->offset + CHUNKSZ;
      m.len = CHUNKSZ;

      expand_match(&m, src, dst, floor);
      if (m.len > 0)
	memcpy(match, &m, sizeof(m));
    }
  }
#else
  {
    hashpos_t *last = bh->hash + bh->nhash - 1;

    while (found != last && found[1].hash == found[0].hash) {
      match_t m;

      found++;
      m.dst_start = pos;
      m.dst_end = pos+CHUNKSZ;
      m.src_start = found->offset;
      m.src_end = found->offset + CHUNKSZ;
      m.len = CHUNKSZ;

      expand_match(&m, src, dst, floor);
      if (m.len > 0) {
	memcpy(match, &m, sizeof(m));
	break;
      }
    }
  }
#endif

   if (match->len == 0)
     return FALSE;

  return TRUE;
}

static void
emit_insert(SDR_stream *cmd, SDR_stream *ins,
	    Buffer *src, Buffer *dst, 
	    off_t start, off_t end)
{
  size_t len = end - start;

  char ty = CMDTY(len);

  switch(ty) {
  case 'S':
    sdr_w_u8("cmd", cmd, SHORT_INS|len);
    break;
  case 'M':
    sdr_w_u8("cmd", cmd, MID_INS);
    sdr_w_u16("len", cmd, len);
    break;
  case 'L':
    sdr_w_u8("cmd", cmd, LONG_INS);
    sdr_w_u32("len", cmd, len);
    break;
  }

  stream_write_partial_buffer(ins, dst, start, end - start);

  log_trace(DBG_SXDELTA, "%cI %d [%d,%d)\n", 
	    CMDTY(len), end - start, start, end);
}

static void
emit_copy(SDR_stream *cmd, SDR_stream *ins,
	  Buffer *src, Buffer *dst, 
	  match_t *match)
{
  off_t len = match->src_end - match->src_start;

  char ty = CMDTY(len);

  switch(ty) {
  case 'S':
    sdr_w_u8("cmd", cmd, SHORT_COPY |len);
    break;
  case 'M':
    sdr_w_u8("cmd", cmd, MID_COPY);
    sdr_w_u16("len", cmd, len);
    break;
  case 'L':
    sdr_w_u8("cmd", cmd, LONG_COPY);
    sdr_w_u32("len", cmd, len);
    break;
  }
  sdr_w_u32("start", cmd, match->src_start);

  log_trace(DBG_SXDELTA, "%cC %d [%d,%d) matches dst [%d,%d)\n", 
	  CMDTY(len), 
	  len,
	  match->src_start, match->src_end, 
	  match->dst_start, match->dst_end);
}

static void
emit_finish(SDR_stream *cmd)
{
#if 0
  stream_printf(cmd, "F\n");
#endif

  log_trace(DBG_SXDELTA, "F\n");
}

#define HT_SHA1  0

XDeltaArchive_t *
xda_fromStream(SDR_stream *s)
{
  unsigned u;
  XDeltaArchive_t *xda = GC_MALLOC(sizeof(XDeltaArchive_t));

  if (s == 0 || stream_length(s) == 0) {
    xda->magic = "sxd2";
    xda->version = 2;
    xda->flags = 0;
    xda->nDirent = 0;
    xda->maxDirent = xda->nDirent + 1;

    /* We allocate an EXTRA directory slot. This is because the insert
       operation adds at most one object, and it is cheaper to avoid
       reallocating the array later. */
    xda->dir = GC_MALLOC(sizeof(xdirent_t) * xda->maxDirent);
    return xda;
  }

  xda->magic = (const char *) sdr_r_bytes("magic", s, 4);
  xda->version = sdr_r_u16("version", s);
  xda->flags = sdr_r_u16("flags", s);
  xda->nDirent = sdr_r_u32("nDirent", s);
  xda->maxDirent = xda->nDirent + 1;

  /* We allocate an EXTRA directory slot. This is because the insert
     operation adds at most one object, and it is cheaper to avoid
     reallocating the array later. */
  xda->dir = GC_MALLOC(sizeof(xdirent_t) * xda->maxDirent);

  /* For each dirent, read in its values */
  for (u = 0; u < xda->nDirent; u++) {
    xdirent_t *xdd = &xda->dir[u];

    xdd->name      = sdr_r_string("name", s);
    xdd->sxdelta   = sdr_read("XDELTA", s);
  }

  return xda;
}

#if 0
/* We no longer sort dirents by name, since we need to be able to
extract by index.  This makes the extract-by-name operation linear,
but who really cares? */
static int
xdirent_cmp(const void *v1, const void *v2)
{
  const xdirent_t *xd1 = (const xdirent_t *) v1;
  const xdirent_t *xd2 = (const xdirent_t *) v2;

  assert(xd1 != 0);
  assert(xd2 != 0);
  return strcmp(xd1->name, xd2->name);
}
#endif

static int
xdirent_keycmp(const void *vkey, const void *vmember)
{
  const char *key = (const char *) vkey;
  const xdirent_t *xd = (const xdirent_t *) vmember;
  return strcmp(key, xd->name);
}

static void
xda_write(XDeltaArchive_t *xda, SDR_stream *s)
{
  unsigned u;

  sdr_w_bytes("magic", s, 4, xda->magic);
  sdr_w_u16("version", s, xda->version);
  sdr_w_u16("flags", s, xda->flags);
  sdr_w_u32("nDirent", s, xda->nDirent);

  for (u = 0; u < xda->nDirent; u++) {
    xdirent_t *xdd = &xda->dir[u];

    sdr_w_string("name", s, xdd->name);
    sdr_write("XDELTA", s, xdd->sxdelta);
  }
}

/* Return a dirent representing the delta from src to dst.  This
   dirent is "floating in space" (start offset = 0) until it's
   inserted into the archive */
SXDelta *
xdcs_gen_delta(Buffer *src, Buffer *dst)
{
  hashvec_t *bh = compute_base_hashes(src);
  SDR_stream *ins = 0;
  SDR_stream *cmd = 0;

  OC_bool found = FALSE;
  phash_t h;
  off_t len = buffer_length(dst);
  off_t floor = 0;		/* number of bytes we can already generate */
  match_t match = {0, 0, 0, 0};

  ins = stream_createBuffer(SDR_XDCS);
  cmd = stream_createBuffer(SDR_XDCS);

  if (len < CHUNKSZ)
    goto finish;

  /* Proceed as follows:
   *
   * 1. Iterate to find best match against first matching hash:
   *    A. Find a match.
   *    B. Extend it backwards (not past floor) and forwards (not past
   *       end).
   *    C. If result length not better than previous, take it
   * 2. If there is a gap between the beginning of this match and
   *    floor, add an "insert" instruction for the gap.
   * 3. Add the discovered match as a "copy" operation.
   */

  do {
    off_t pos = floor;		/* search position in target file */
    found= FALSE;

    {
      unsigned char sxdsum[CHUNKSZ];
      buffer_read(dst, sxdsum, pos, CHUNKSZ);

      h = psum(sxdsum, CHUNKSZ);
    }

    while (pos + CHUNKSZ < len) {
#ifdef HASH_TEST
      phash_t check;
#endif

      found = find_best_match(bh, h, &match, src, dst, floor, pos);
      if (found)
	break;

      h = psum_progressive(h, LG_CHUNKSZ, 
			   buffer_getc(dst, pos), 
			   buffer_getc(dst, pos+CHUNKSZ));
#ifdef HASH_TEST
      {
	unsigned char sxdsum[CHUNKSZ];
	buffer_read(dst, sxdsum, pos + 1, CHUNKSZ);
	check = psum(sxdsum, CHUNKSZ);
      }

      if (h != check) {
	log_trace(ERR_SXDELTA, "Hash fail at pos %d: progressive 0x%x real 0x%x\n", pos, h, check);
	return 0;
      }
#endif

      /* No match found at this position. Proceed forward. */
      pos++;
    }

    if (!found)
      break;

    /* See if we need to output an insert instruction before the next
       copy: */
    if (match.dst_start > floor)
      emit_insert(cmd, ins, src, dst, floor, match.dst_start);

    emit_copy(cmd, ins, src, dst, &match);
    
    floor = match.dst_end;
  } while (floor + CHUNKSZ < len);

 finish:
  if (floor < buffer_length(dst))
    emit_insert(cmd, ins, src, dst, floor, buffer_length(dst));

  emit_finish(cmd);

  log_trace(DBG_SXDELTA, "Yielded %d bytes from %d bytes\n", 
	    stream_length(cmd) + stream_length(ins), 
	    buffer_length(dst));

  log_trace(DBG_SXDELTA, "Commands take %d bytes inserting %d bytes\n", 
	    stream_length(cmd), stream_length(ins));

  /* Concatenate the two component buffers for use in the xdirent */
  return sxdelta_create(stream_asBuffer(cmd), stream_asBuffer(ins));
}

Buffer *
xdcs_apply_delta(Buffer *base, SXDelta *xd)
{
  off_t insertPos = 0;
  
  SDR_stream *out = stream_createBuffer(SDR_RAW);
  SDR_stream *cmd = stream_fromBuffer(xd->cmd, SDR_RAW);
  Buffer *insBuf = xd->ins;

  while(stream_position(cmd) != stream_length(cmd)) {
    unsigned char theCmd = sdr_r_u8("cmd", cmd);

    {
      off_t len;
#ifdef VERBOSE
      unsigned startPos = out->pos;
#endif

      if (ISCOPY(theCmd)) {
	off_t start;
	off_t startPos = stream_length(out);

	if (theCmd == LONG_COPY)
	  len = sdr_r_u32("len", cmd);
	else if (theCmd == MID_COPY)
	  len = sdr_r_u16("len", cmd);
	else
	  len = theCmd & 0x3fu;

	start = sdr_r_u32("start", cmd);
	stream_write_partial_buffer(out, base, start, len);

	log_trace(DBG_SXDELTA, "Applied %cC %d from %d [%d,%d)\n", 
		  CMDTY(len), len, start, 
		  startPos, stream_length(out));
      }
      else {
	unsigned startPos = stream_length(out);

	if (theCmd == LONG_INS)
	  len = sdr_r_u32("len", cmd);
	else if (theCmd == MID_INS)
	  len = sdr_r_u16("len", cmd);
	else
	  len = theCmd & 0x3fu;

	stream_write_partial_buffer(out, insBuf, insertPos, len);
	insertPos += len;

	log_trace(DBG_SXDELTA, "Applied %cI %d [%d,%d)\n", CMDTY(len), 
		  startPos, stream_length(out));
      }
    }
  }
  return stream_asBuffer(out);
}

static Buffer *
xdcs_extract_entity(XDeltaArchive_t *xda,
		    unsigned index)
{
  xdirent_t *xde = 0;

  if (index >= xda->nDirent)
    THROW(ExNoObject, format("Requested index %u not found\n", index));

  xde = &xda->dir[index];

  /* Always believe the in-memory content buffers inside each dirent
     as opposed to the content buffer in xda! */
  if (index == xda->nDirent-1)	/* trivial case */
    return xdcs_apply_delta(buffer_create(), xde->sxdelta);
  else				/* recursively apply xdeltas */
    return xdcs_apply_delta(xdcs_extract_entity(xda, index+1), xde->sxdelta);
}

/* Given a current archive, insert new content as name and serialize
   result to out stream. */
void
xdcs_insert(SDR_stream *archive, 
	    SDR_stream *content,
	    const char *name,
	    SDR_stream *out)
{
  XDeltaArchive_t *xda;
  Buffer *curLast = 0;
  Buffer *contentBuffer = stream_asBuffer(content);

  xda = xda_fromStream(archive);

  if (xbsearch(name, xda->dir, xda->nDirent, sizeof(*xda->dir), 
	       xdirent_keycmp))
    return;			/* entity is already stored */

  /* Only need to do this first part if archive isn't empty */
  if (xda->nDirent > 0) {
    /* This only works if xda->dir is sorted by "start" index! (which
       it should be since we're not doing any explicit qsorts
       anymore) */
    curLast = xdcs_extract_entity(xda, xda->nDirent-1);

    /* Generate the newest xdelta (as a reverse from new content to last
       known content) */
    xda->dir[xda->nDirent - 1].sxdelta = xdcs_gen_delta(contentBuffer, 
							curLast);
  }

  /* Build a trivial xdt for the new content: */
  xda->dir[xda->nDirent].sxdelta = 
    xdcs_gen_delta(buffer_create(), contentBuffer);
  xda->dir[xda->nDirent].name = name;
  xda->nDirent++;

#if defined(PARANOID)
  /* Test it: */
  {
    unsigned u;
    SXDelta *xd = sxdelta_create();
    SDR_stream *out = stream_createMemory(SDR_RAW);

    buffer_freeze(xdt_new->xdelta);

    {
      xd->ins = buffer_fromBuffer(xdt_new->xdelta,
				       0, xdt_new->cmdPos);
      xd->cmd = buffer_fromBuffer(xdt_new->xdelta,
				       xdt_new->cmdPos,
				       xdt_new->cmdLen);
    }

    out = stream_fromBuffer(xdcs_apply_delta(buffer_create(), xd), SDR_RAW);

    if (stream_length(out) != stream_length(content))
      THROW(ExIntegrityFail, 
	    format("Inserted file length mismatch: %d (orig %d)\n", 
		   (int32_t)(stream_length(out)), 
		   (int32_t)(stream_length(content))));

    stream_reread(out);
    stream_reread(content);

    for (u = 0; u < stream_length(out); u++) {
      unsigned char oc= stream_getc(out);
      unsigned char cc= stream_getc(content);

      
      if (oc != cc)
	THROW(ExIntegrityFail, 
	      format("Content mismatch at position %d\n", u));
    }
  }
#endif

  /* Rewrite archive with new xdelta */
  xda_write(xda, out);
}

#if 0
void xdcs_extract_fromArchiveStream(SDR_stream *archive,
	     const char *name,
	     SDR_stream *out)
{
  XDeltaArchive_t *xda;
  xda = xda_fromStream(archive);
  xdcs_extract(xda, name, out);
}
#endif

/* When extracting an entity by name, find its index, call the
   extraction routine, and then serialize the resulting buffer to
   'out' */
void
xdcs_extract(XDeltaArchive_t *xda,
	     const char *name,
	     SDR_stream *out)
{
  unsigned u;
  uint32_t index = 0;
  xdirent_t *xde = 0;

  for (u = 0; u < xda->nDirent; u++) {
    if (nmequal(xda->dir[u].name, name)) {
      xde = &xda->dir[u];
      index = u;
      break;
    }
  }

  if (xde == 0)
    THROW(ExNoObject, format("Requested hash %s not found\n", name));

  stream_write_buffer(out, xdcs_extract_entity(xda, index));
}

void
xdcs_iterate(XDeltaArchive_t *xda, 
	     void (*fn)(XDeltaArchive_t *, xdirent_t *, void *aux),
	     void *aux)
{
  unsigned u;

  for (u = 0; u < xda->nDirent; u++)
    fn(xda, &xda->dir[u], aux);
}
