/*  $Header: /home/cvsroot/dvipdfmx/src/pdfobj.c,v 1.40 2006/12/06 13:14:54 chofchof Exp $

    This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.

    Copyright (C) 2002 by Jin-Hwan Cho and Shunsaku Hirata,
    the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>
    
    Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/

#if HAVE_CONFIG_H
#include "config.h"
#endif

#include <ctype.h>
#include <string.h>

#include "system.h"
#include "mem.h"
#include "error.h"
#include "mfileio.h"
#include "pdflimits.h"
#include "pdfencrypt.h"
#include "pdfparse.h"

#ifdef HAVE_ZLIB
#include <zlib.h>
#endif /* HAVE_ZLIB */

#include "pdfobj.h"

#define STREAM_ALLOC_SIZE      4096u
#define ARRAY_ALLOC_SIZE       256
#define IND_OBJECTS_ALLOC_SIZE 512

/* Any of these types can be represented as follows */
struct pdf_obj 
{
  int type;

  unsigned long  label;  /* Only used for indirect objects
			    all other "label" to zero */
  unsigned short generation;  /* Only used if "label" is used */
  unsigned refcount;  /* Number of links to this object */
  void    *data;
};

struct pdf_boolean
{
  char  value;
};

struct pdf_number
{
  double value;
};

struct pdf_string
{
  unsigned char *string;
  unsigned short length;
};

struct pdf_name
{
  char *name;
};

struct pdf_array
{
  unsigned long max;
  unsigned long size;
  struct pdf_obj **values;
};

struct pdf_dict
{
  struct pdf_obj  *key;
  struct pdf_obj  *value;
  struct pdf_dict *next;
};

struct pdf_stream
{
  struct pdf_obj *dict;
  unsigned char  *stream;
  unsigned long   stream_length;
  unsigned long   max_length;
  unsigned char   _flags;
};

struct pdf_indirect
{
  unsigned label;
  unsigned generation;
  int dirty;  /* Dirty objects came from an input file and were not
		 generated by this program.  They have a label in a
		 different numbering sequence.  These are translated
		 when the object is written out */
  FILE *dirty_file;
};

typedef void                pdf_null;
typedef struct pdf_boolean  pdf_boolean;
typedef struct pdf_number   pdf_number;
typedef struct pdf_string   pdf_string;
typedef struct pdf_name     pdf_name;
typedef struct pdf_array    pdf_array;
typedef struct pdf_dict     pdf_dict;
typedef struct pdf_stream   pdf_stream;
typedef struct pdf_indirect pdf_indirect;

static FILE *pdf_output_file = NULL;
static FILE *pdf_input_file  = NULL;

static long pdf_output_file_position = 0;
static long pdf_output_line_position = 0;
static long compression_saved        = 0;

#define FORMAT_BUF_SIZE 4096
static char format_buffer[FORMAT_BUF_SIZE];

static struct xref_entry 
{
  long     file_position;
  pdf_obj *object;
  int      used; /* Used by PDF import */
} *output_xref = NULL;

static unsigned long pdf_max_ind_objects = 0;
static unsigned long next_label          = 1;

static unsigned long startxref;

static unsigned pdf_root_obj    = 0;
static unsigned pdf_info_obj    = 0;
static unsigned pdf_encrypt_obj = 0;

/* Internal static routines */

static void pdf_flush_obj (pdf_obj *object, FILE *file);
static void pdf_label_obj (pdf_obj *object);
static void pdf_write_obj (pdf_obj *object, FILE *file);

static void pdf_out_char (FILE *file, char c);
static void pdf_out      (FILE *file, const void *buffer, long length);

static void release_indirect (pdf_indirect *data);
static void write_indirect   (pdf_indirect *indirect, FILE *file);

static void release_boolean (pdf_obj *data);
static void write_boolean   (pdf_boolean *data, FILE *file);

static void release_null (pdf_null *data);
static void write_null   (pdf_null *data, FILE *file);

static void release_number (pdf_number *number);
static void write_number   (pdf_number *number, FILE *file);

static void write_string   (pdf_string *str, FILE *file);
static void release_string (pdf_string *str);

static void write_name   (pdf_name *name, FILE *file);
static void release_name (pdf_name *name);

static void write_array   (pdf_array *array, FILE *file);
static void release_array (pdf_array *array);

static void write_dict   (pdf_dict *dict, FILE *file);
static void release_dict (pdf_dict *dict);

static void write_stream   (pdf_stream *stream, FILE *file);
static void release_stream (pdf_stream *stream);

static pdf_obj *pdf_ref_file_obj (unsigned long obj_num, unsigned obj_gen);
static pdf_obj *pdf_read_object  (unsigned long obj_num, unsigned obj_gen);

static int  verbose = 0;
static char compression_level = 9;

void
pdf_set_compression (int level)
{
#ifndef   HAVE_ZLIB
  ERROR("You don't have compression compiled in. Possibly libz wasn't found by configure.");
#else
#ifndef HAVE_ZLIB_COMPRESS2
  if (level != 0) 
    WARN("Unable to set compression level -- your zlib doesn't have compress2().");
#endif
  if (level >= 0 && level <= 9) 
    compression_level = level;
  else {
    ERROR("set_compression: invalid compression level: %d", level);
  }
#endif /* !HAVE_ZLIB */

  return;
}

static unsigned pdf_version = 3;

void
pdf_set_version (unsigned version)
{
  /* Don't forget to update CIDFont_stdcc_def[] in cid.c too! */
  if (version >= 3 && version <= 6) {
    pdf_version = version;
  }
}

unsigned
pdf_get_version (void)
{
  return pdf_version;
}

void
pdf_obj_set_verbose(void)
{
  verbose++;
}

#define BINARY_MARKER "%\344\360\355\370\n"
void
pdf_out_init (const char *filename)
{
  char v;

  pdf_output_file = MFOPEN(filename, FOPEN_WBIN_MODE);
  if (!pdf_output_file) {
    if (strlen(filename) < 128)
      ERROR("Unable to open \"%s\".", filename);
    else
      ERROR("Unable to open file.");
  }
  pdf_out(pdf_output_file, "%PDF-1.", strlen("%PDF-1."));
  v = '0' + pdf_version;
  pdf_out(pdf_output_file, &v, 1);
  pdf_out(pdf_output_file, "\n", 1);
  pdf_out(pdf_output_file, BINARY_MARKER, strlen(BINARY_MARKER));
}

static void
dump_xref (void)
{
  long length;
  unsigned long i;

  /* Record where this xref is for trailer */
  startxref = pdf_output_file_position;

  pdf_out(pdf_output_file, "xref\n", 5);

  length = sprintf(format_buffer, "%d %lu\n", 0, next_label);
  pdf_out(pdf_output_file, format_buffer, length);

  length = sprintf(format_buffer, "%010ld %05ld f \n", 0L, 65535L);
  /*
   * Every space counts.  The space after the 'f' and 'n' is * *essential*.
   * The PDF spec says the lines must be 20 characters long including the
   * end of line character.
   */
  pdf_out(pdf_output_file, format_buffer, length);
  for (i = 1; i < next_label; i++){
    length = sprintf(format_buffer, "%010ld %05ld n \n",
		     output_xref[i-1].file_position, 0L);
    pdf_out(pdf_output_file, format_buffer, length);
  }
  /* Done with xref table */
  RELEASE(output_xref);
  output_xref = NULL;
}

static void
dump_trailer (void)
{
  long length;
  unsigned long starttrailer;

  starttrailer = pdf_output_file_position;

  pdf_out(pdf_output_file, "trailer\n", 8);
  pdf_out(pdf_output_file, "<<\n", 3);

  length = sprintf(format_buffer, "/Size %lu\n", next_label);
  pdf_out(pdf_output_file, format_buffer, length);

  if (pdf_root_obj == 0) 
    ERROR ("dump_trailer:  Invalid root object");
  length = sprintf(format_buffer, "/Root %u %u R\n", pdf_root_obj, 0);
  pdf_out(pdf_output_file, format_buffer, length);

  if (pdf_encrypt_obj != 0) {
    length = sprintf(format_buffer, "/Encrypt %u %u R\n", pdf_encrypt_obj, 0);
    pdf_out(pdf_output_file, format_buffer, length);
  }
  if (pdf_info_obj != 0) {
    length = sprintf(format_buffer, "/Info %u %u R\n", pdf_info_obj, 0);
    pdf_out(pdf_output_file, format_buffer, length);
  }
  if (pdf_encrypt_obj != 0) {
    unsigned char *id;
    id = pdf_enc_id_string();
    length = sprintf(format_buffer, "/ID [<%s> <%s>]\n", id, id);
    pdf_out(pdf_output_file, format_buffer, length);
  }
  pdf_out(pdf_output_file, ">>\n", 3);
  pdf_out(pdf_output_file, "startxref\n", 10);

  length = sprintf(format_buffer, "%lu\n", startxref);
  pdf_out(pdf_output_file, format_buffer, length);

  pdf_out(pdf_output_file, "%%EOF\n", 6);
}

void
pdf_out_flush (void)
{
  if (pdf_output_file) {
    dump_xref();
    dump_trailer();
    MESG("\n");
    if (verbose) {
      if (compression_level > 0) {
	MESG("Compression eliminated approximately %lu bytes\n", compression_saved);
      }
    }
    MESG("%lu bytes written", pdf_output_file_position);

    MFCLOSE(pdf_output_file);
  }
}

void
pdf_error_cleanup (void)
{
  /*
   * This routine is the cleanup required for an abnormal exit.
   * For now, simply close the file.
   */
  if (pdf_output_file)
    MFCLOSE(pdf_output_file);
}


void
pdf_set_root (pdf_obj *object)
{
  if (pdf_root_obj != 0) {
    ERROR("Root object already set!");
  }
  if (object->label == 0) {  /* Make sure this object has a label */
    pdf_label_obj(object);
  }
  pdf_root_obj = object->label;
}

void
pdf_set_info (pdf_obj *object)
{
  if (pdf_info_obj != 0) {
    ERROR ("Info object already set!");
  }
  if (object->label == 0) {  /* Make sure this object has a label */
    pdf_label_obj(object);
  }
  pdf_info_obj = object->label;
}

void
pdf_set_encrypt (pdf_obj *object)
{
  if (pdf_encrypt_obj != 0) {
    ERROR("Encrypt object already set!");
  }
  if (object->label == 0) {  /* Make sure this object has a label */
    pdf_label_obj(object);
  }
  pdf_encrypt_obj = object->label;
}

static
void pdf_out_char (FILE *file, char c)
{
  fputc(c, file);
  /* Keep tallys for xref table *only* if writing a pdf file. */
  if (file == pdf_output_file) {
    pdf_output_file_position += 1;
    if (c == '\n')
      pdf_output_line_position  = 0;
    else
      pdf_output_line_position += 1;
  }
}

#define pdf_out_xchar(f,c) do {\
  int __tmpnum;\
  __tmpnum = ((c) >> 4) & 0x0f;\
  pdf_out_char((f), (((__tmpnum) >= 10) ? (__tmpnum)+'W' : (__tmpnum)+'0'));\
  __tmpnum = (c) & 0x0f;\
  pdf_out_char((f), (((__tmpnum) >= 10) ? (__tmpnum)+'W' : (__tmpnum)+'0'));\
} while (0)

static
void pdf_out (FILE *file, const void *buffer, long length)
{
  fwrite(buffer, 1, length, file);
  /* Keep tallys for xref table *only* if writing a pdf file */
  if (file == pdf_output_file) {
    pdf_output_file_position += length;
    pdf_output_line_position += length;
    /* "foo\nbar\n "... */
    if (length > 0 &&
	((char *)buffer)[length-1] == '\n')
      pdf_output_line_position = 0;
  }
}

static
void pdf_out_white (FILE *file)
{
  if (file == pdf_output_file && pdf_output_line_position >= 80) {
    pdf_out_char(file, '\n');
  } else {
    pdf_out_char(file, ' ');
  }
}

#define TYPECHECK(o,t) if (!(o) || (o)->type != (t)) {\
  ERROR("typecheck: Invalid object type. %d %d", (o) ? (o)->type : -1, (t));\
}

#define INVALIDOBJ(o)  ((o) == NULL || (o)->type <= 0 || (o)->type > PDF_INDIRECT)

pdf_obj *
pdf_new_obj(int type)
{
  pdf_obj *result;

  if (type > PDF_INDIRECT || type < PDF_UNDEFINED)
    ERROR("Invalid object type: %d", type);

  result = NEW(1, pdf_obj);
  result->type  = type;
  result->data  = NULL;
  result->label      = 0;
  result->generation = 0;
  result->refcount   = 1;

  return result;
}

int
pdf_obj_typeof (pdf_obj *object)
{
  if (INVALIDOBJ(object))
    return PDF_OBJ_INVALID;

  return object->type;
}

static void
pdf_label_obj (pdf_obj *object)
{
  if (INVALIDOBJ(object))
    ERROR("pdf_label_obj(): passed invalid object.");

  if (next_label > pdf_max_ind_objects) {
    pdf_max_ind_objects += IND_OBJECTS_ALLOC_SIZE;
    output_xref = RENEW(output_xref, pdf_max_ind_objects, struct xref_entry);
  }
  /*
   * Don't change label on an already labeled object. Ignore such calls.
   */
  if (object->label == 0) {
    /* Save so we can lookup this object by its number */
    output_xref[next_label-1].object = object;
    output_xref[next_label-1].file_position = 0L;
    object->label      = next_label;
    object->generation = 0;
    next_label++;
  }
}

/*
 * This doesn't really copy the object, but allows  it to be used without
 * fear that somebody else will free it.
 */
pdf_obj *
pdf_link_obj (pdf_obj *object)
{
  if (INVALIDOBJ(object))
    ERROR("pdf_link_obj(): passed invalid object.");

  object->refcount += 1;

  return object;
}


pdf_obj *
pdf_ref_obj (pdf_obj *object)
{
  pdf_obj      *result;
  pdf_indirect *indirect;

  if (INVALIDOBJ(object))
    ERROR("pdf_ref_obj(): passed invalid object.");
  
  if (object->refcount == 0) {
    MESG("\nTrying to refer already released object!!!\n");
    pdf_write_obj(object, stderr);
    ERROR("Cannot continue...");
  }

  result   = pdf_new_obj(PDF_INDIRECT);
  indirect = NEW(1, pdf_indirect);
  result->data = indirect;
  if (object->type == PDF_INDIRECT) {
    indirect->label      = ((pdf_indirect *) (object->data))->label;
    indirect->generation = ((pdf_indirect *) (object->data))->generation;
    indirect->dirty      = ((pdf_indirect *) (object->data))->dirty;
    indirect->dirty_file = ((pdf_indirect *) (object->data))->dirty_file;
  } else {
    if (object->label == 0) {
      pdf_label_obj(object);
    }
    indirect->label      = object->label;
    indirect->generation = object->generation;
    indirect->dirty      = 0;
    indirect->dirty_file = NULL;
  }

  return result;
}

static void
release_indirect (pdf_indirect *data)
{
  RELEASE(data);
}

static void
write_indirect (pdf_indirect *indirect, FILE *file)
{
  long length;

  if (indirect->dirty) {
    if (file == stderr) {
      pdf_out(file, "{d}", 3);
      length = sprintf(format_buffer, "%d %d R", indirect->label, indirect->generation);
      pdf_out(stderr, format_buffer, length);
    } else {
      pdf_obj *clean;

      if (indirect->dirty_file != pdf_input_file) {
        ERROR("Input PDF file doesn't match object: label=%d, from_file=%p, current_file=%p",
              indirect->label, indirect->dirty_file, pdf_input_file);
      }
      clean = pdf_ref_file_obj(indirect->label, indirect->generation);
      pdf_write_obj(clean, file);
      pdf_release_obj(clean);
    }
  } else {
    length = sprintf(format_buffer, "%d %d R", indirect->label, indirect->generation);
    pdf_out(file, format_buffer, length);
  }
}

pdf_obj *
pdf_new_null (void)
{
  pdf_obj *result;

  result = pdf_new_obj(PDF_NULL);
  result->data = NULL;

  return result;
}

static void
release_null (pdf_null *obj)
{
  return;
}

static void
write_null (pdf_null *obj, FILE *file)
{
  pdf_out(file, "null", 4);
}

pdf_obj *
pdf_new_boolean (char value)
{
  pdf_obj     *result;
  pdf_boolean *data;

  result = pdf_new_obj(PDF_BOOLEAN);
  data   = NEW(1, pdf_boolean);
  data->value  = value;
  result->data = data;

  return result;
}

static void
release_boolean (pdf_obj *data)
{
  RELEASE (data);
}

static void
write_boolean (pdf_boolean *data, FILE *file)
{
  if (data->value) {
    pdf_out(file, "true", 4);
  } else {
    pdf_out(file, "false", 5);
  }
}

void
pdf_set_boolean (pdf_obj *object, char value)
{
  pdf_boolean *data;

  TYPECHECK(object, PDF_BOOLEAN);

  data = object->data;
  data->value = value;
}

char
pdf_boolean_value (pdf_obj *object)
{
  pdf_boolean *data;

  TYPECHECK(object, PDF_BOOLEAN);

  data = object->data;

  return data->value;
}

pdf_obj *
pdf_new_number (double value)
{
  pdf_obj    *result;
  pdf_number *data;

  result = pdf_new_obj(PDF_NUMBER);
  data   = NEW(1, pdf_number);
  data->value  = value;
  result->data = data;

  return result;
}

static void
release_number (pdf_number *data)
{
  RELEASE (data);
}

static void
write_number (pdf_number *number, FILE *file)
{
  int count;

  count = pdf_sprint_number(format_buffer, number->value);

  pdf_out(file, format_buffer, count);
}


void
pdf_set_number (pdf_obj *object, double value)
{
  pdf_number *data;

  TYPECHECK(object, PDF_NUMBER);

  data = object->data;
  data->value = value;
}

double
pdf_number_value (pdf_obj *object)
{
  pdf_number *data;

  TYPECHECK(object, PDF_NUMBER);

  data = object->data;

  return data->value;
}

pdf_obj *
pdf_new_string (const void *str, unsigned length)
{
  pdf_obj    *result;
  pdf_string *data;

  result = pdf_new_obj(PDF_STRING);
  data   = NEW(1, pdf_string);
  result->data = data;
  if (length != 0) {
    data->length = length;
    data->string = NEW(length+1, unsigned char);
    memcpy(data->string, str, length);
    /* Shouldn't assume NULL terminated. */
    data->string[length] = '\0';
  } else {
    data->length = 0;
    data->string = NULL;
  }

  return result;
}

void *
pdf_string_value (pdf_obj *object)
{
  pdf_string *data;

  TYPECHECK(object, PDF_STRING);

  data = object->data;

  return data->string;
}

unsigned
pdf_string_length (pdf_obj *object)
{
  pdf_string *data;

  TYPECHECK(object, PDF_STRING);

  data = object->data;

  return (unsigned) (data->length);
}

/*
 * This routine escapes non printable characters and control
 * characters in an output string.
 */
int
pdfobj_escape_str (char *buffer, int bufsize, const unsigned char *s, int len)
{
  int result = 0;
  int i;

  for (i = 0; i < len; i++) {
    unsigned char ch;

    ch = s[i];
    if (result > bufsize - 4)
      ERROR("pdfobj_escape_str: Buffer overflow");

    /*
     * We always write three octal digits. Optimization only gives few Kb
     * smaller size for most documents when zlib compressed.
     */
    if (ch < 32 || ch > 126) {
      buffer[result++] = '\\';
#if 0
      if (i < len - 1 && !isdigit(s[i+1]))
	result += sprintf(buffer+result, "%o", ch);
      else
	result += sprintf(buffer+result, "%03o", ch);
#endif
      result += sprintf(buffer+result, "%03o", ch);
    } else {
      switch (ch) {
      case '(':
	buffer[result++] = '\\';
	buffer[result++] = '(';
	break;
      case ')':
	buffer[result++] = '\\';
	buffer[result++] = ')';
	break;
      case '\\':
	buffer[result++] = '\\';
	buffer[result++] = '\\';
	break;
      default:
	buffer[result++] = ch;
	break;
      }
    }
  }

  return result;
}

static void
write_string (pdf_string *str, FILE *file)
{
  unsigned char *s;
  char wbuf[FORMAT_BUF_SIZE]; /* Shouldn't use format_buffer[]. */
  int  nescc = 0, i, count;

  s = str->string;
  pdf_encrypt_data(s, str->length);
  /*
   * Count all ASCII non-printable characters.
   */
  for (i = 0; i < str->length; i++) {
    if (!isprint(s[i]))
      nescc++;
  }
  /*
   * If the string contains much escaped chars, then we write it as
   * ASCII hex string.
   */
  if (nescc > str->length / 3) {
    pdf_out_char(file, '<');
    for (i = 0; i < str->length; i++) {
      pdf_out_xchar(file, s[i]);
    }
    pdf_out_char(file, '>');
  } else {
    pdf_out_char(file, '(');
    /*
     * This section of code probably isn't speed critical.  Escaping the
     * characters in the string one at a time may seem slow, but it's
     * safe if the formatted string length exceeds FORMAT_BUF_SIZE.
     * Occasionally you see some long strings in PDF.  pdfobj_escape_str
     * is also used for strings of text with no kerning.  These must be
     * handled as quickly as possible since there are so many of them.
     */ 
    for (i = 0; i < str->length; i++) {
      count = pdfobj_escape_str(wbuf, FORMAT_BUF_SIZE, &(s[i]), 1);
      pdf_out(file, wbuf, count);
    }
    pdf_out_char(file, ')');
  }
}

static void
release_string (pdf_string *data)
{
  if (data->string != NULL) {
    RELEASE(data->string);
    data->string = NULL;
  }
  RELEASE(data);
}

void
pdf_set_string (pdf_obj *object, unsigned char *str, unsigned length)
{
  pdf_string *data;

  TYPECHECK(object, PDF_STRING);

  data = object->data;
  if (data->string != 0) {
    RELEASE(data->string);
  }
  if (length != 0) {
    data->length = length;
    data->string = NEW(length + 1, unsigned char);
    memcpy(data->string, str, length);
    data->string[length] = '\0';
  } else {
    data->length = 0;
    data->string = NULL;
  }
}

/* Name does *not* include the /. */ 
pdf_obj *
pdf_new_name (const char *name)
{
  pdf_obj  *result;
  unsigned  length;
  pdf_name *data;

  result = pdf_new_obj(PDF_NAME);
  data   = NEW (1, pdf_name);
  result->data = data;
  length = strlen(name);
  if (length != 0) {
    data->name = NEW(length+1, char);
    memcpy(data->name, name, length);
    data->name[length] = '\0';
  } else {
    data->name = NULL;
  }

  return result;
}

static void
write_name (pdf_name *name, FILE *file)
{
  char *s;
  int i, length;

  s      = name->name;
  length = name->name ? strlen(name->name) : 0;
  /*
   * From PDF Reference, 3rd ed., p.33:
   *
   *  Beginning with PDF 1.2, any character except null (character code 0)
   *  may be included in a name by writing its 2-digit hexadecimal code,
   *  preceded bythe number sign character (#); see implementation notes 3
   *  and 4 in Appendix H. This syntax is required in order to represent
   *  any of the delimiter or white-space characters or the number sign
   *  character itself; it is recommended but not required for characters
   *  whose codes are outside the range 33 (!) to 126 (~).
   */
#ifndef is_delim
  /* Avoid '{' and '}' for PostScript compatibility? */
#define is_delim(c) ((c) == '(' || (c) == '/' || \
                     (c) == '<' || (c) == '>' || \
                     (c) == '[' || (c) == ']' || \
                     (c) == '{' || (c) == '}' || \
                     (c) == '%')
#endif
  pdf_out_char(file, '/');
  for (i = 0; i < length; i++) {
    if (s[i] < '!' || s[i] > '~' || s[i] == '#' || is_delim(s[i])) {
      /*     ^ "space" is here. */
      pdf_out_char (file, '#');
      pdf_out_xchar(file, s[i]);
    } else {
      pdf_out_char (file, s[i]);
    }
  }
}

static void
release_name (pdf_name *data)
{
  if (data->name != NULL) {
    RELEASE(data->name);
    data->name = NULL;
  }
  RELEASE(data);
}

void
pdf_set_name (pdf_obj *object, const char *name)
{
  pdf_name *data;
  unsigned length;

  TYPECHECK(object, PDF_NAME);

  length = strlen(name);
  data   = object->data;
  if (data->name != NULL) {
    RELEASE(data->name);
  }
  if (length != 0) {
    data->name = NEW(length+1, char);
    memcpy(data->name, name, length);
    data->name[length] = 0;
  } else {
    data->name = NULL;
  }
}

char *
pdf_name_value (pdf_obj *object)
{
  pdf_name *data;

  TYPECHECK(object, PDF_NAME);

  data = object->data;

  return data->name;
}

/*
 * We do not have pdf_name_length() since '\0' is not allowed
 * in PDF name object.
 */

pdf_obj *
pdf_new_array (void)
{
  pdf_obj   *result;
  pdf_array *data;

  result = pdf_new_obj(PDF_ARRAY);
  data   = NEW(1, pdf_array);
  data->values = NULL;
  data->max    = 0;
  data->size   = 0;
  result->data = data;

  return result;
}

static void
write_array (pdf_array *array, FILE *file)
{
  pdf_out_char(file, '[');
  if (array->size > 0) {
    unsigned long i;

    for (i = 0; i < array->size; i++) {
      if (i > 0)
	pdf_out_white(file);
      if (!array->values[i])
	WARN("PDF array element #ld undefined.", i);
      pdf_write_obj(array->values[i], file);
    }
  }
  pdf_out_char(file, ']');
}

pdf_obj *
pdf_get_array (pdf_obj *array, long idx)
{
  pdf_obj   *result = NULL;
  pdf_array *data;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (idx < 0)
    result = data->values[idx + data->size];
  else if (idx < data->size) {
    result = data->values[idx];
  }

  return result;
}

unsigned int
pdf_array_length (pdf_obj *array)
{
  pdf_array *data;

  TYPECHECK(array, PDF_ARRAY);

  data = (pdf_array *) array->data;

  return (unsigned int) data->size;
}

static void
release_array (pdf_array *data)
{
  unsigned long i;

  if (data->values) {
    for (i = 0; i < data->size; i++) {
      pdf_release_obj(data->values[i]);
      data->values[i] = NULL;
    }
    RELEASE(data->values);
    data->values = NULL;
  }
  RELEASE(data);
}

/*
 * The name pdf_add_array is misleading. It behaves differently than
 * pdf_add_dict(). This should be pdf_push_array().
 */
void
pdf_add_array (pdf_obj *array, pdf_obj *object)
{
  pdf_array *data;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (data->size >= data->max) {
    data->max   += ARRAY_ALLOC_SIZE;
    data->values = RENEW(data->values, data->max, pdf_obj *);
  }
  data->values[data->size] = object;
  data->size++;

  return;
}

void
pdf_put_array (pdf_obj *array, unsigned idx, pdf_obj *object)
{
  pdf_array *data;
  long       i;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (idx + 1 > data->max) {
    data->max   += ARRAY_ALLOC_SIZE;
    data->values = RENEW(data->values, data->max, pdf_obj *);
  }
  /*
   * Rangecheck error in PostScript interpreters if
   * idx > data->size - 1. But pdf_new_array() doesn't set
   * array size, pdf_add_array() dynamically increases size
   * of array. This might confusing...
   */
  if (idx + 1 > data->size) {
    for (i = data->size; i < idx; i++)
      data->values[i] = pdf_new_null(); /* release_array() won't work without this */
    data->values[idx] = object;
    data->size = idx + 1;
  } else {
    if (data->values[idx])
      pdf_release_obj(data->values[idx]);
    data->values[idx] = object;
  }
}

/* Easily leaks memory... */
pdf_obj *
pdf_shift_array (pdf_obj *array)
{
  pdf_obj   *result = NULL;
  pdf_array *data;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (data->size > 0) {
    int i;

    result = data->values[0];
    for (i = 1; i < data->size; i++)
      data->values[i-1] = data->values[i];
    data->size--;
  }

  return result;
}

/* Prepend an object to an array */
void
pdf_unshift_array (pdf_obj *array, pdf_obj *object)
{
  pdf_array *data;
  int        i;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (data->size >= data->max) {
    data->max   += ARRAY_ALLOC_SIZE;
    data->values = RENEW(data->values, data->max, pdf_obj *);
  }
  for (i = 0; i < data->size; i++)
    data->values[i+1] = data->values[i];
  data->values[0] = object;
  data->size++;
}

pdf_obj *
pdf_pop_array (pdf_obj *array)
{
  pdf_obj   *result;
  pdf_array *data;

  TYPECHECK(array, PDF_ARRAY);

  data = array->data;
  if (data->size > 0) {
    result = data->values[data->size - 1];
    data->size--;
  } else {
    result = NULL;
  }

  return result;
}


static void
write_dict (pdf_dict *dict, FILE *file)
{
  pdf_out (file, "<<\n", 3); /* dropping \n saves few kb. */
  while (dict->key != NULL) {
    pdf_write_obj(dict->key, file);
    if (((dict -> value)->type) == PDF_BOOLEAN  ||
	((dict -> value)->type) == PDF_NUMBER   ||
	((dict -> value)->type) == PDF_INDIRECT ||
	((dict -> value)->type) == PDF_NULL) {
      pdf_out_white(file);
    }
    pdf_write_obj(dict->value, file);
    pdf_out_char (file, '\n'); /* removing this saves few kb. */
    dict = dict->next;
  }
  pdf_out(file, ">>", 2);
}

pdf_obj *
pdf_new_dict (void)
{
  pdf_obj  *result;
  pdf_dict *data;

  result = pdf_new_obj(PDF_DICT);
  data   = NEW(1, pdf_dict);
  data->key    = NULL;
  data->value  = NULL;
  data->next   = NULL;
  result->data = data;

  return result;
}

static void
release_dict (pdf_dict *data)
{
  pdf_dict *next;

  while (data != NULL && data->key != NULL) {
    pdf_release_obj(data->key);
    pdf_release_obj(data->value);
    data->key   = NULL;
    data->value = NULL;
    next = data->next;
    RELEASE(data);
    data = next;
  }
  RELEASE(data);
}

/* Array is ended by a node with NULL this pointer */
void
pdf_add_dict (pdf_obj *dict, pdf_obj *key, pdf_obj *value)
{
  pdf_dict *data;

  TYPECHECK(dict, PDF_DICT);
  TYPECHECK(key,  PDF_NAME);

  /* It seems that NULL is sometimes used for null object... */
  if (value != NULL && INVALIDOBJ(value))
    ERROR("pdf_add_dict(): Passed invalid value");

  data = dict->data;
  /* If this key already exists, simply replace the value */
  while (data->key != NULL) {
    if (!strcmp(pdf_name_value(key), pdf_name_value(data->key))) {
      /* Release the old value */
      pdf_release_obj(data->value);
      /* Release the new key (we don't need it) */
      pdf_release_obj(key);
      data->value = value;
      break;
    }
    data = data->next;
  }
  /*
   * If we didn't find the key, build a new "end" node and add
   * the new key just before the end
   */
  if (data->key == NULL) {
    pdf_dict *new_node;

    new_node = NEW (1, pdf_dict);
    new_node->key = NULL;
    new_node->value = NULL;
    new_node->next = NULL;
    data->next  = new_node;
    data->key   = key;
    data->value = value;
  }
}

void
pdf_put_dict (pdf_obj *dict, const char *key, pdf_obj *value)
{
  pdf_dict *data;

  TYPECHECK(dict, PDF_DICT);

  if (!key) {
    ERROR("pdf_put_dict(): Passed invalid key.");
  }
  /* It seems that NULL is sometimes used for null object... */
  if (value != NULL && INVALIDOBJ(value)) {
    ERROR("pdf_add_dict(): Passed invalid value.");
  }

  data = dict->data;

  while (data->key != NULL) {
    if (!strcmp(key, pdf_name_value(data->key))) {
      pdf_release_obj(data->value);
      data->value = value;
      break;
    }
    data = data->next;
  }

  /*
   * If we didn't find the key, build a new "end" node and add
   * the new key just before the end
   */
  if (data->key == NULL) {
    pdf_dict *new_node;

    new_node = NEW (1, pdf_dict);
    new_node->key   = NULL;
    new_node->value = NULL;
    new_node->next  = NULL;
    data->next  = new_node;
    data->key   = pdf_new_name(key);
    data->value = value;
  }
}

/* pdf_merge_dict makes a link for each item in dict2 before stealing it */
void
pdf_merge_dict (pdf_obj *dict1, pdf_obj *dict2)
{
  pdf_dict *data;

  TYPECHECK(dict1, PDF_DICT);
  TYPECHECK(dict2, PDF_DICT);

  data = dict2->data;
  while (data->key != NULL) {
    pdf_add_dict(dict1, pdf_link_obj(data->key), pdf_link_obj(data->value));
    data = data->next;
  }
}

int
pdf_foreach_dict (pdf_obj *dict,
		  int (*proc) (pdf_obj *, pdf_obj *, void *), void *pdata)
{
  int       error = 0;
  pdf_dict *data;

  ASSERT(proc);

  TYPECHECK(dict, PDF_DICT);

  data = dict->data;
  while (!error &&
	 data->key != NULL) {
    error = proc(data->key, data->value, pdata);
    data = data->next;
  }

  return error;
}

#define pdf_match_name(o,s) ((o) && (s) && !strcmp(((pdf_name *)(o)->data)->name, (s)))
pdf_obj *
pdf_lookup_dict (pdf_obj *dict, const char *name)
{
  pdf_dict *data;

  ASSERT(name);

  TYPECHECK(dict, PDF_DICT);

  data = dict->data;
  while (data->key != NULL) {
    if (!strcmp(name, pdf_name_value(data->key))) {
      return data->value;
    }
    data = data->next;
  }

  return NULL;
}

/* Returns array of dictionary keys */
pdf_obj *
pdf_dict_keys (pdf_obj *dict)
{
  pdf_obj  *keys;
  pdf_dict *data;

  TYPECHECK(dict, PDF_DICT);

  keys = pdf_new_array();
  for (data = dict->data; (data &&
			   data->key != NULL); data = data->next) {
    /* We duplicate name object rather than linking keys.
     * If we forget to free keys, broken PDF is generated.
     */
    pdf_add_array(keys, pdf_new_name(pdf_name_value(data->key)));
  }

  return keys;
}

void
pdf_remove_dict (pdf_obj *dict, const char *name)
{
  pdf_dict *data, **data_p;

  TYPECHECK(dict, PDF_DICT);

  data   = dict->data;
  data_p = (pdf_dict **) &(dict->data);
  while (data->key != NULL) {
    if (pdf_match_name(data->key, name)) {
      pdf_release_obj(data->key);
      pdf_release_obj(data->value);
      *data_p = data->next;
      RELEASE(data);
      break;
    }
    data_p = &(data->next);
    data   = data->next;
  }

  return;
}

pdf_obj *
pdf_new_stream (int flags)
{
  pdf_obj    *result;
  pdf_stream *data;

  result = pdf_new_obj(PDF_STREAM);
  data   = NEW(1, pdf_stream);
  /*
   * Although we are using an arbitrary pdf_object here, it must have
   * type=PDF_DICT and cannot be an indirect reference.  This will be
   * checked by the output routine.
   */
  data->dict   = pdf_new_dict();
  data->_flags = flags;
  data->stream = NULL;
  data->stream_length = 0;
  data->max_length    = 0;

  result->data = data;

  return result;
}

static void
write_stream (pdf_stream *stream, FILE *file)
{
  unsigned char *filtered;
  unsigned long  filtered_length;
  unsigned long  buffer_length;
  unsigned char *buffer;

  /*
   * Always work from a copy of the stream. All filters read from
   * "filtered" and leave their result in "filtered".
   */
#if 0
  filtered = NEW(stream->stream_length + 1, unsigned char);
#endif
  filtered = NEW(stream->stream_length, unsigned char);
  memcpy(filtered, stream->stream, stream->stream_length);
  filtered_length = stream->stream_length;

#if 0
  if (stream->stream_length < 10)
    stream->_flags &= ^STREAM_COMPRESS;
#endif

#ifdef HAVE_ZLIB
  /* Apply compression filter if requested */
  if (stream->stream_length > 0 &&
      (stream->_flags & STREAM_COMPRESS) &&
      compression_level > 0) {

    buffer_length = filtered_length + filtered_length/1000 + 14;
    buffer = NEW(buffer_length, unsigned char);
    {
      pdf_obj *filters;

      filters = pdf_lookup_dict(stream->dict, "Filter");
      if (!filters)
	filters = pdf_new_array();
      /*
       * FlateDecode is the first filter to be applied to the stream.
       */
      pdf_unshift_array(filters, pdf_new_name("FlateDecode"));
      pdf_add_dict(stream->dict, pdf_new_name("Filter"), filters);
    }
#ifdef HAVE_ZLIB_COMPRESS2    
    if (compress2(buffer, &buffer_length, filtered,
		  filtered_length, compression_level)) {
      ERROR("Zlib error");
    }
#else 
    if (compress(buffer, &buffer_length, filtered,
		 filtered_length)) {
      ERROR ("Zlib error");
    }
#endif /* HAVE_ZLIB_COMPRESS2 */
    RELEASE(filtered);
    compression_saved += filtered_length - buffer_length - strlen("/Filter [/FlateDecode]\n");

    filtered        = buffer;
    filtered_length = buffer_length;
  }
#endif /* HAVE_ZLIB */

#if 0
  /*
   * An optional end-of-line marker preceding the "endstream" is
   * not part of stream data. See, PDF Reference 4th ed., p. 38.
   */
  /* Add a '\n' if the last character wasn't one */
  if (filtered_length > 0 &&
      filtered[filtered_length-1] != '\n') {
    filtered[filtered_length] = '\n';
    filtered_length++;
  }
#endif
  pdf_add_dict(stream->dict,
	       pdf_new_name("Length"), pdf_new_number(filtered_length));

  pdf_write_obj(stream->dict, file);

  pdf_out(file, "\nstream\n", 8);
  pdf_encrypt_data(filtered, filtered_length);
  if (filtered_length > 0) {
    pdf_out(file, filtered, filtered_length);
  }
  RELEASE(filtered);

  /*
   * This stream length "object" gets reset every time write_stream is
   * called for the stream object.
   * If this stream gets written more than once with different
   * filters, this could be a problem.
   */

  pdf_out(file, "\n", 1);
  pdf_out(file, "endstream", 9);
}

static void
release_stream (pdf_stream *stream)
{
  pdf_release_obj(stream->dict);
  stream->dict = NULL;

  if (stream->stream) {
    RELEASE(stream->stream);
    stream->stream = NULL;
  }
  RELEASE(stream);
}

pdf_obj *
pdf_stream_dict (pdf_obj *stream)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  data = stream->data;

  return data->dict;
}

const void *
pdf_stream_dataptr (pdf_obj *stream)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  data = stream->data;

  return (const void *) data->stream;
}

long
pdf_stream_length (pdf_obj *stream)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  data = stream->data;

  return (long) data->stream_length;
}

void
pdf_add_stream (pdf_obj *stream, const void *stream_data, long length)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  if (length < 1)
    return;
  data = stream->data;
  if (data->stream_length + length > data->max_length) {
    data->max_length += length + STREAM_ALLOC_SIZE;
    data->stream      = RENEW(data->stream, data->max_length, unsigned char);
  }
  memcpy(data->stream + data->stream_length, stream_data, length);
  data->stream_length += length;
}

void
pdf_stream_set_flags (pdf_obj *stream, int flags)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  data = stream->data;
  data->_flags = flags;
}

int
pdf_stream_get_flags (pdf_obj *stream)
{
  pdf_stream *data;

  TYPECHECK(stream, PDF_STREAM);

  data = stream->data;

  return data->_flags;
}

static void
pdf_write_obj (pdf_obj *object, FILE *file)
{
  if (object == NULL) {
    write_null(NULL, file);
    return;
  }

  if (INVALIDOBJ(object))
    ERROR("pdf_write_obj: Invalid object, type = %d\n", object->type);

  if (file == stderr)
    fprintf(stderr, "{%d}", object->refcount);

  switch (object->type) {
  case PDF_BOOLEAN:
    write_boolean(object->data, file);
    break;
  case PDF_NUMBER:
    write_number (object->data, file);
    break;
  case PDF_STRING:
    write_string (object->data, file);
    break;
  case PDF_NAME:
    write_name(object->data, file);
    break;
  case PDF_ARRAY:
    write_array(object->data, file);
    break;
  case PDF_DICT:
    write_dict (object->data, file);
    break;
  case PDF_STREAM:
    write_stream(object->data, file);
    break;
  case PDF_NULL:
    write_null(NULL, file);
    break;
  case PDF_INDIRECT:
    write_indirect(object->data, file);
    break;
  }
}

/* Write the object to the file */ 
static void
pdf_flush_obj (pdf_obj *object, FILE *file)
{
  long length;

  /*
   * Record file position.  No object is numbered 0, so subtract 1
   * when using as an array index
   */
  output_xref[object->label-1].file_position = pdf_output_file_position;
  length = sprintf(format_buffer, "%lu %d obj\n", object->label, object->generation);
  pdf_enc_set_label(object->label);
  pdf_enc_set_generation(object->generation);
  pdf_out(file, format_buffer, length);
  pdf_write_obj(object, file);
  pdf_out(file, "\nendobj\n", 8);
}

void
pdf_release_obj (pdf_obj *object)
{
  if (object == NULL)
    return;
  if (INVALIDOBJ(object) || object->refcount <= 0) {
    MESG("\npdf_release_obj: object=%p, type=%d, refcount=%d\n",
	 object, object->type, object->refcount);
    pdf_write_obj(object, stderr);
    ERROR("pdf_release_obj:  Called with invalid object.");
  }
  object->refcount -= 1;
  if (object->refcount == 0) {
    /*
     * Nothing is using this object so it's okay to remove it.
     * Nonzero "label" means object needs to be written before it's destroyed.
     */
    if (object->label && pdf_output_file != NULL) { 
      pdf_flush_obj(object, pdf_output_file);
    }
    switch (object->type) {
    case PDF_BOOLEAN:
      release_boolean(object->data);
      break;
    case PDF_NULL:
      release_null(object->data);
      break;
    case PDF_NUMBER:
      release_number(object->data);
      break;
    case PDF_STRING:
      release_string(object->data);
      break;
    case PDF_NAME:
      release_name(object->data);
      break;
    case PDF_ARRAY:
      release_array(object->data);
      break;
    case PDF_DICT:
      release_dict(object->data);
      break;
    case PDF_STREAM:
      release_stream(object->data);
      break;
    case PDF_INDIRECT:
      release_indirect(object->data);
      break;
    }
    /* This might help detect freeing already freed objects */
    object->type = -1;
    object->data = NULL;
    RELEASE(object);
  }
}

/* Copy object data without changing object label. */
void
pdf_copy_object (pdf_obj *dst, pdf_obj *src)
{
  if (!dst || !src)
    return;

  switch (dst->type) {
  case PDF_BOOLEAN:  release_boolean(dst->data);  break;
  case PDF_NULL:     release_null(dst->data);     break;
  case PDF_NUMBER:   release_number(dst->data);   break;
  case PDF_STRING:   release_string(dst->data);   break;
  case PDF_NAME:     release_name(dst->data);     break;
  case PDF_ARRAY:    release_array(dst->data);    break;
  case PDF_DICT:     release_dict(dst->data);     break;
  case PDF_STREAM:   release_stream(dst->data);   break;
  case PDF_INDIRECT: release_indirect(dst->data); break;
  }

  dst->type = src->type;
  switch (src->type) {
  case PDF_BOOLEAN:
    dst->data = NEW(1, pdf_boolean);
    pdf_set_boolean(dst, pdf_boolean_value(src));
    break;
  case PDF_NULL:
    dst->data = NULL;
    break;
  case PDF_NUMBER:
    dst->data = NEW(1, pdf_number);
    pdf_set_number(dst, pdf_number_value(src));
    break;
  case PDF_STRING:
    dst->data = NEW(1, pdf_string);
    pdf_set_string(dst,
		   pdf_string_value(src),
		   pdf_string_length(src));
    break;
  case PDF_NAME:
    dst->data = NEW(1, pdf_name);
    pdf_set_name(dst, pdf_name_value(src));
    break;
  case PDF_ARRAY:
    {
      pdf_array *data;
      unsigned long i;

      dst->data = data = NEW(1, pdf_array);
      data->size = 0;
      data->max  = 0;
      data->values = NULL;
      for (i = 0; i < pdf_array_length(src); i++) {
	pdf_add_array(dst, pdf_link_obj(pdf_get_array(src, i)));
      }
    }
    break;
  case PDF_DICT:
    {
      pdf_dict *data;

      dst->data = data = NEW(1, pdf_dict);
      data->key   = NULL;
      data->value = NULL;
      data->next  = NULL;
      pdf_merge_dict(dst, src);
    }
    break;
  case PDF_STREAM:
    {
      pdf_stream *data;

      dst->data = data = NEW(1, pdf_stream);
      data->dict = pdf_new_dict();
      data->_flags = ((pdf_stream *)src->data)->_flags;
      data->stream_length = 0;
      data->max_length    = 0;

      pdf_add_stream(dst, pdf_stream_dataptr(src), pdf_stream_length(src));
      pdf_merge_dict(data->dict, pdf_stream_dict(src));
    }
    break;
  case PDF_INDIRECT:
    {
      pdf_indirect *data;

      dst->data = data = NEW(1, pdf_indirect);
      data->label  = ((pdf_indirect *) (src->data))->label;
      data->generation = ((pdf_indirect *) (src->data))->generation;
      data->dirty      = ((pdf_indirect *) (src->data))->dirty;
      data->dirty_file = ((pdf_indirect *) (src->data))->dirty_file;
    }
    break;
  }

  return;
}

static int
backup_line (void)
{
  int ch = -1;

  /*
   * Note: this code should work even if \r\n is eol. It could fail on a
   * machine where \n is eol and there is a \r in the stream --- Highly
   * unlikely in the last few bytes where this is likely to be used.
   */
  if (tell_position(pdf_input_file) > 1)
    do {
      seek_relative (pdf_input_file, -2);
    } while (tell_position(pdf_input_file) > 0 &&
	     (ch = fgetc(pdf_input_file)) >= 0 &&
	     (ch != '\n' && ch != '\r' ));
  if (ch < 0) {
    return 0;
  }

  return 1;
}

static unsigned long pdf_file_size = 0;

static long
find_xref (void)
{
  long xref_pos;
  int  tries = 10;

  seek_end(pdf_input_file);

  pdf_file_size = tell_position(pdf_input_file);
  do {
    long currentpos;

    if (!backup_line()) {
      tries = 0;
      break;
    }
    currentpos = tell_position(pdf_input_file);
    fread(work_buffer, sizeof(char), strlen("startxref"), pdf_input_file);
    seek_absolute(pdf_input_file, currentpos);
    tries--;
  } while (tries > 0 &&
	   strncmp(work_buffer, "startxref", strlen("startxref")));
  if (tries <= 0)
    return 0;

  /* Skip rest of this line */
  mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
  /* Next line of input file should contain actual xref location */
  mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);

  {
    char *start, *end, *number;

    start = work_buffer;
    end   = start + strlen(work_buffer);
    skip_white(&start, end);
    number   = parse_number(&start, end);
    xref_pos = (long) atof(number);
    RELEASE(number);
  }

  return xref_pos;
}

/*
 * This routine must be called with the file pointer located
 * at the start of the trailer.
 */
static pdf_obj *
parse_trailer (void)
{
  pdf_obj *result;
  /*
   * Fill work_buffer and hope trailer fits. This should
   * be made a bit more robust sometime.
   */
  if (fread(work_buffer, sizeof(char),
	    WORK_BUFFER_SIZE, pdf_input_file) == 0 ||
      strncmp(work_buffer, "trailer", strlen("trailer"))) {
    WARN("No trailer.  Are you sure this is a PDF file?");
    WARN("buffer:\n->%s<-\n", work_buffer);
    result = NULL;
  } else {
    char *p = work_buffer + strlen("trailer");
    skip_white(&p, work_buffer + WORK_BUFFER_SIZE);
    result = parse_pdf_dict(&p, work_buffer + WORK_BUFFER_SIZE);
  }

  return result;
}

struct object
{
  unsigned long file_position;
  unsigned      generation;
  /*
   * Object numbers in original file and new file must have different
   * object numbers.
   * new_ref provides a reference for the object in the new file object
   * space.  When it is first set, an object in the old file is copied
   * to the new file with a new number.  new_ref remains set until the
   * file is closed so that future references can access the object via
   * new_ref instead of copying the object again.
   */
  pdf_obj *direct;
  pdf_obj *indirect;
  int      used;
} *xref_table = NULL;
long num_input_objects;

/*
 * This routine tries to estimate an upper bound for character position
 * of the end of the object, so it knows how big the buffer must be.
 * The parsing routines require that the entire object be read into
 * memory. It would be a major pain to rewrite them.  The worst case
 * is that an object before an xref table will grab the whole table
 * :-(
 */
static long
next_object_offset (unsigned long obj_num)
{
  long  next = pdf_file_size;  /* Worst case */
  long  i, curr;

  curr = xref_table[obj_num].file_position;
  /* Check all other objects to find next one */
  for (i = 0; i < num_input_objects; i++) {
    if (xref_table[i].used &&
        xref_table[i].file_position > curr &&
        xref_table[i].file_position < next)
      next = xref_table[i].file_position;
  }

  return  next;
}

#define checklabel(n,g) ((n) > 0 && (n) < num_input_objects && \
  xref_table[(n)].generation == (g))
#define labelfreed(n,g) (!xref_table[(n)].used)

/*
 * The following routine returns a reference to an object existing
 * only in the input file.  It does this as follows.  If the object
 * has never been referenced before, it reads the object
 * in and creates a reference to it.  Then it writes
 * the object out, keeping the existing reference. If the
 * object has been read in (and written out) before, it simply
 * returns the retained existing reference to that object
 */
static pdf_obj *
pdf_ref_file_obj (unsigned long obj_num, unsigned obj_gen)
{
  pdf_obj *obj, *ref;

  if (!checklabel(obj_num, obj_gen)) {
    WARN("Can't resolve object: %lu %u",
         obj_num, obj_gen);
    return NULL;
  }
  ref = xref_table[obj_num].indirect;
  if (ref != NULL)
    return  pdf_link_obj(ref);

  obj = pdf_read_object(obj_num, obj_gen);
  if (!obj) {
    WARN("Could not read object: %lu %u",
         obj_num, obj_gen);
    return NULL;
  }

  ref = pdf_ref_obj(obj);
  xref_table[obj_num].indirect = ref;
  xref_table[obj_num].direct   = obj;
  /* Make sure the caller doesn't free this object */

  return  pdf_link_obj(ref);
}


pdf_obj *
pdf_new_ref (unsigned long obj_num, int obj_gen) 
{
  pdf_obj      *result;
  pdf_indirect *indirect;

  if (!checklabel(obj_num, obj_gen)) {
    WARN("Invalid object label: %lu %d", obj_num, obj_gen);
    return NULL;
  }
  result   = pdf_new_obj(PDF_INDIRECT);
  indirect = NEW(1, pdf_indirect);
  result->data = indirect;
  indirect->label      = obj_num;
  indirect->generation = obj_gen;
  indirect->dirty      = 1;
  indirect->dirty_file = pdf_input_file;

  return result;
}


/* Label without corresponding object definition should
 * be replaced with "null". But we won't do that.
 */ 
static pdf_obj *
pdf_read_object (unsigned long obj_num, unsigned obj_gen) 
{
  pdf_obj *result;
  long     offset, limit, length;
  char    *buffer, *p, *endptr;

  if (!checklabel(obj_num, obj_gen)) {
    WARN("Trying to read nonexistent object: %lu %u",
         obj_num, obj_gen);
    return NULL;
  }
  if (labelfreed(obj_num, obj_gen)) {
    WARN("Trying to read deleted object: %lu %u",
        obj_num, obj_gen);
    return NULL;
  }
  offset = xref_table[obj_num].file_position;
  limit  = next_object_offset(obj_num);
  length = limit - offset;

  if (length <= 0)
    return NULL;

  buffer = NEW(length + 1, char);

  seek_absolute(pdf_input_file, offset);
  fread(buffer, sizeof(char), length, pdf_input_file);

  p      = buffer;
  endptr = p + length;

  /* Check for obj_num and obj_gen */
  {
    char         *q = p; /* <== p */
    char         *sp;
    unsigned long n, g;

    skip_white(&q, endptr);
    sp = parse_unsigned(&q, endptr);
    if (!sp) {
      RELEASE(buffer);
      return NULL;
    }
    n = strtoul(sp, NULL, 10);
    RELEASE(sp);

    skip_white(&q, endptr);
    sp = parse_unsigned(&q, endptr);
    if (!sp) {
      RELEASE(buffer);
      return NULL;
    }
    g = strtoul(sp, NULL, 10);
    RELEASE(sp);

    if (n != obj_num || g != obj_gen) {
      RELEASE(buffer);
      return NULL;
    }

    p = q; /* ==> p */
  }


  skip_white(&p, endptr);
  if (memcmp(p, "obj", strlen("obj"))) {
    WARN("Didn't find \"obj\".");
    RELEASE(buffer);
    return NULL;
  }
  p += strlen("obj");

  result = parse_pdf_object(&p, endptr);

  skip_white(&p, endptr);
  if (memcmp(p, "endobj", strlen("endobj"))) {
    WARN("Didn't find \"endobj\".");
    if (result)
      pdf_release_obj(result);
    result = NULL;
  }
  RELEASE(buffer);

  return  result;
}

/* pdf_deref_obj always returns a link instead of the original */ 
pdf_obj *
pdf_deref_obj (pdf_obj *obj)
{
  pdf_obj  *result;

  if (obj == NULL)
    return NULL;
  else if (pdf_obj_typeof(obj) != PDF_INDIRECT) {
    return  pdf_link_obj(obj);
  }

#define OBJ_NUM(o) (((pdf_indirect *)((o)->data))->label)
#define OBJ_GEN(o) (((pdf_indirect *)((o)->data))->generation)
#define ISDIRTY(o) (((pdf_indirect *)((o)->data))->dirty)
#define OBJ_SRC(o) (((pdf_indirect *)((o)->data))->dirty_file)
  if (!ISDIRTY(obj))
    ERROR("Tried to deref a non-file object");
  result = pdf_read_object(OBJ_NUM(obj), OBJ_GEN(obj));
  while (result &&
         pdf_obj_typeof(result) == PDF_INDIRECT
         ) {
    pdf_obj *tmp;

    tmp = pdf_read_object(OBJ_NUM(result), OBJ_GEN(result));
    pdf_release_obj(result);
    result = tmp;
  }

  return  result;
}

static void
extend_xref (long new_size) 
{
  unsigned long i;

  xref_table = RENEW(xref_table, new_size, struct object);
  for (i = num_input_objects; i < new_size; i++) {
    xref_table[i].direct   = NULL;
    xref_table[i].indirect = NULL;
    xref_table[i].used     = 0;
    xref_table[i].generation = 0;
    xref_table[i].file_position = 0L;
  }
  num_input_objects = new_size;
}

static int
parse_xref (void)
{
  unsigned long first, size;
  unsigned long i, offset;
  unsigned int  obj_gen;
  char          flag;
  int           r;

  /*
   * This routine reads one xref segment.  It must be called positioned
   * at the beginning of an xref table.  It may be called multiple times
   * on the same file.  xref tables sometimes come in pieces.
   */
  mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
  if (memcmp(work_buffer, "xref", strlen("xref"))) {
    WARN("No xref.  Are you sure this is a PDF file?");
    return 0;
  }
  /* Next line in file has first item and size of table */
  for (;;) {
    unsigned long current_pos;

    current_pos = tell_position(pdf_input_file);
    if (mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file) == NULL)
      ERROR("parse_xref: premature end of PDF file while parsing xref");
    if (!strncmp(work_buffer, "trailer", strlen ("trailer"))) {
      /*
       * Backup... This is ugly, but it seems like the safest thing to
       * do.  It is possible the trailer dictionary starts on the same
       * logical line as the word trailer.  In that case, the mfgets
       * call might have started to read the trailer dictionary and
       * parse_trailer would fail.
       */
      seek_absolute(pdf_input_file, current_pos);
      break;
    }
    sscanf(work_buffer, "%lu %lu", &first, &size);
    if (num_input_objects < first + size) {
      extend_xref (first + size);
    }

    for (i = first; i < first + size; i++) {
      fread(work_buffer, sizeof(char), 20, pdf_input_file);
      /*
       * Don't overwrite positions that have already been set by a
       * modified xref table.  We are working our way backwards
       * through the reference table, so we only set "position" 
       * if it hasn't been set yet.
       */
#define POSITION_UNSET(t,n)     ((t)[(n)].file_position == 0)
#define SET_XREF_ENTRY(t,n,o,g) \
do { \
  (t)[(n)].file_position = (o); \
  (t)[(n)].generation = (g); \
} while (0)
#define SET_XREF_FLAG(t,n,f) ((t)[(n)].used = ((f) == 'n') ? 1 : 0)
      work_buffer[19] = 0;
      offset = 0UL; obj_gen = 0; flag = 0;
      r = sscanf(work_buffer, "%010lu %05u %c", &offset, &obj_gen, &flag);
      if ( r != 3 ||
          ((flag != 'n' && flag != 'f') ||
           (flag == 'n' &&
           (offset >= pdf_file_size || (offset > 0 && offset < 4))))) {
        WARN("Invalid xref table entry [%lu]. PDF file is corrupt...", i);
        return  0;
      }
      if (POSITION_UNSET(xref_table, i))
        SET_XREF_ENTRY(xref_table, i, offset, obj_gen);
      SET_XREF_FLAG(xref_table, i, flag);
    }
  }

  return  1;
}

static pdf_obj *
read_xref (void)
{
  pdf_obj *main_trailer;
  long     xref_pos;

  xref_pos = find_xref();
  if (xref_pos == 0) {
    WARN("Can't find xref table.");
    return NULL;
  }

  /* Read primary xref table */
  seek_absolute(pdf_input_file, xref_pos);
  if (!parse_xref()) {
    WARN("Couldn't read xref table. Is this a correct PDF file?");
    return NULL;
  }
  main_trailer = parse_trailer();
  if (main_trailer == NULL) {
    WARN("Couldn't read xref trailer. Is this a correct PDF file?");
    return NULL;
  }

  {
    pdf_obj *xref_size;

    xref_size = pdf_lookup_dict(main_trailer, "Size");
    if (xref_size == NULL ||
	pdf_lookup_dict(main_trailer, "Root") == NULL) {
      WARN("Trailer doesn't have catalog or a size. Is this a correct PDF file?");
      return NULL;
    }
    if (num_input_objects < pdf_number_value(xref_size)) {
      extend_xref((long)pdf_number_value(xref_size));
    }
  }

  /* Read any additional xref tables */
  {
    pdf_obj *prev_trailer, *prev_xref;

    prev_trailer = pdf_link_obj(main_trailer);
    while ((prev_xref = pdf_lookup_dict(prev_trailer, "Prev")) != NULL) {
      xref_pos = (long) pdf_number_value(prev_xref);
      seek_absolute(pdf_input_file, xref_pos);
      pdf_release_obj(prev_trailer);
      if (!parse_xref()) {
	WARN("Couldn't read xref table.  Is this a correct PDF file?");
	return NULL;
      }
      prev_trailer = parse_trailer();
      if (prev_trailer == NULL) {
	WARN("Couldn't read xref trailer. Is this a correct PDF file?");
	return NULL;
      }
    }
    pdf_release_obj(prev_trailer);
  }

  return main_trailer;
}

static char any_open = 0;

pdf_obj *
pdf_open (FILE *file)
{
  pdf_obj *trailer;

  if (any_open) {
    WARN("Only one PDF file may be open at one time.");
    any_open = 1;
    exit(1);
  }
  pdf_input_file = file;
  if (!check_for_pdf(pdf_input_file)) {
    WARN("pdf_open: Not a PDF 1.[1-3] file.");
    return NULL;
  }
  if ((trailer = read_xref()) == NULL) {
    WARN("No trailer.");
    pdf_close();
    return NULL;
  }

  return trailer;
}

void
pdf_close (void)
{
  unsigned long i;
  int done;

  /*
   * Following loop must be iterated because each write could trigger
   * an additional indirect reference of an object with a lower number!
   */
  do {
    done = 1;
    for (i = 0; i < num_input_objects; i++) {
      if (xref_table[i].direct != NULL) {
	pdf_release_obj(xref_table[i].direct);
	xref_table[i].direct = NULL;
	done = 0;
      }
    }
  } while (!done);
  /*
   * Now take care of the indirect objects. They had to be left around
   * until all the direct objects were flushed.
   */
  for (i = 0; i < num_input_objects; i++) {
    if (xref_table[i].indirect != NULL) {
      pdf_release_obj(xref_table[i].indirect);
    }
  }
  RELEASE(xref_table);
  xref_table = NULL;
  num_input_objects = 0;
  any_open = 0;
  pdf_input_file = NULL;
}

int
check_for_pdf (FILE *file) 
{
  int result = 0;

  rewind(file);
  if (fread(work_buffer, sizeof(char), strlen("%PDF-1.x"), file) ==
      strlen("%PDF-1.x") &&
      !strncmp(work_buffer, "%PDF-1.", strlen("%PDF-1."))) {
    if (work_buffer[7] >= '0' && work_buffer[7] <= '0' + pdf_version)
      result = 1;
    else {
      WARN("Version of PDF file (1.%c) is newer than version limit specification.",
	   work_buffer[7]);
    }
  }

  return result;
}

static int CDECL
import_dict (pdf_obj *key, pdf_obj *value, void *pdata)
{
  pdf_obj *copy;
  pdf_obj *tmp;

  copy = (pdf_obj *) pdata;

  tmp  = pdf_import_object(value);
  if (!tmp) {
    return -1;
  }
  pdf_add_dict(copy, pdf_link_obj(key), tmp);

  return 0;
}

pdf_obj *
pdf_import_object (pdf_obj *object)
{
  pdf_obj  *imported;
  pdf_obj  *tmp;
  int       i;

  switch (pdf_obj_typeof(object)) {

  case PDF_INDIRECT:
    if (ISDIRTY(object)) {
      if (OBJ_SRC(object) != pdf_input_file) {
        WARN("Different file is opened before object is imported?");
        return NULL;
      }
      imported = pdf_ref_file_obj(OBJ_NUM(object), OBJ_GEN(object));
    } else {
      imported = pdf_link_obj(object);
    }
    break;

  case PDF_STREAM:
    {
      pdf_obj *stream_dict;

      tmp = pdf_import_object(pdf_stream_dict(object));
      if (!tmp)
	return NULL;

      imported    = pdf_new_stream(0);
      stream_dict = pdf_stream_dict(imported);
      pdf_merge_dict(stream_dict, tmp);
      pdf_release_obj(tmp);
      pdf_add_stream(imported,
		     pdf_stream_dataptr(object),
		     pdf_stream_length(object));
    }
    break;

  case PDF_DICT:

    imported = pdf_new_dict();
    if (pdf_foreach_dict(object, import_dict, imported) < 0) {
      pdf_release_obj(imported);
      return NULL;
    }

    break;

  case PDF_ARRAY:

    imported = pdf_new_array();
    for (i = 0; i < pdf_array_length(object); i++) {
      tmp = pdf_import_object(pdf_get_array(object, i));
      if (!tmp) {
	pdf_release_obj(imported);
	return NULL;
      }
      pdf_add_array(imported, tmp);
    }
    break;

  default:
    imported = pdf_link_obj(object);
  }

  return imported;
}


int
pdf_compare_reference (pdf_obj *ref1, pdf_obj *ref2)
{
  pdf_indirect *data1, *data2;

  if (!PDF_OBJ_INDIRECTTYPE(ref1) ||
      !PDF_OBJ_INDIRECTTYPE(ref2)) {
    ERROR("Not indirect reference...");
  }

  data1 = (pdf_indirect *) ref1->data;
  data2 = (pdf_indirect *) ref2->data;

  if (data1->dirty != data2->dirty)
    return (int) (data1->dirty - data2->dirty);
  if (data1->dirty_file != data2->dirty_file)
    return (int) (data1->dirty_file - data2->dirty_file);
  if (data1->label != data2->label)
    return (int) (data1->label - data2->label);
  if (data1->generation != data2->generation)
    return (int) (data1->generation - data2->generation);

  return 0;
}
