/*
 * Copyright (c) 2004 Jean-Yves Lefort
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of Jean-Yves Lefort nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <glib.h>
#include "st-entities.h"

/*** function declarations ***************************************************/

static gunichar st_sgml_ref_get_unichar (const char *ref);

/*** implementation **********************************************************/

static gunichar
st_sgml_ref_get_unichar (const char *ref)
{
  gunichar c = 0;		/* 0 means "invalid reference" */

  g_return_val_if_fail(ref != NULL, 0);

  if (*ref == '#')
    {				/* numeric reference */
      const char *nptr;
      int base;

      if (*(ref + 1) == 'x' || *(ref + 1) == 'X')
	{			/* hexadecimal number */
	  nptr = ref + 2;
	  base = 16;
	}
      else
	{			/* decimal number */
	  nptr = ref + 1;
	  base = 10;
	}

      if (*nptr)
	{
	  char *end;
	  unsigned long code;
	  
	  code = strtoul(nptr, &end, base);
	  if (*end == 0)	/* could convert */
	    c = code;
	}
    }
  else
    {				/* entity reference */
      int i;

      for (i = 0; i < G_N_ELEMENTS(entities); i++)
	if (! strcmp(ref, entities[i].name))
	  {
	    c = entities[i].character;
	    break;
	  }
    }
  
  return c;
}

/*
 * Parse STR, possibly containing SGML character references, and
 * return a newly-allocated UTF-8 string with the references expanded
 * to their Unicode character.
 */
char *
st_sgml_ref_expand (const char *str)
{
  GString *unescaped;
  char *ampersand;
  char *start;

  g_return_val_if_fail(str != NULL, NULL);

  unescaped = g_string_new(NULL);
  start = (char *) str;

  while ((ampersand = strchr(start, '&')))
    {
      char *semicolon = strchr(ampersand, ';');

      if (semicolon)
	{
	  char *ref;
	  gunichar c;

	  ref = g_strndup(ampersand + 1, semicolon - ampersand - 1);
	  c = st_sgml_ref_get_unichar(ref);
	  g_free(ref);

	  g_string_append_len(unescaped, start, ampersand - start);
	  if (c)
	    g_string_append_unichar(unescaped, c);
	  else			/* invalid reference, append it raw */
	    g_string_append_len(unescaped, ampersand, semicolon - ampersand + 1);

	  start = semicolon + 1;
	}
      else
	break;
    }

  g_string_append(unescaped, start);

  return g_string_free(unescaped, FALSE);
}
