static char rcsid[] = "@(#)$Id: charset.c,v 1.13.2.3 1999/11/09 19:29:14 hurtta Exp $";

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 1.13.2.3 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@ozone.FMI.FI>
 *****************************************************************************/

#include "headers.h"
#include "s_me.h"

#include <errno.h>
#ifndef ANSI_C
extern int errno;
#endif

#define ASCII &precompiled_sets[0]

static struct charcode_info precompiled_sets[] = {
    { SET_valid|SET_nodata,  "US-ASCII",   { NULL  } },
    { SET_valid,  "ISO-8859-1", { ASCII } },
    { SET_valid,  "ISO-8859-2", { ASCII } },
    { SET_valid,  "ISO-8859-3", { ASCII } },
    { SET_valid,  "ISO-8859-4", { ASCII } },
    { SET_valid,  "ISO-8859-5", { ASCII } },
    { SET_valid,  "ISO-8859-6", { ASCII } },
    { SET_valid,  "ISO-8859-7", { ASCII } },
    { SET_valid,  "ISO-8859-8", { ASCII } },
    { SET_valid,  "ISO-8859-9", { ASCII } },
    { SET_valid,  "ISO-8859-10", { ASCII } },
    { SET_valid,  "ISO-8859-14", { ASCII } },
    { SET_valid,  "ISO-8859-15", { ASCII } },
    { SET_valid,  "KOI8-R", { ASCII } },
    { 0,       NULL, { NULL } }
};

static struct extra_sets {
    struct charcode_info set;
    struct extra_sets    * next;
} * extra_sets = NULL, * extra_sets_tail = NULL;
static int extra_sets_count = 0;

union compare_info {
    char *name;
    struct charcode_info * part;
};

typedef  int compare_set P_((struct charcode_info *set, 
			     union compare_info data));

static struct charcode_info * find_set P_(( compare_set *func,
					    union compare_info data ));

static struct charcode_info * find_set(func,data)
     compare_set *func;
     union compare_info data;
{
    int i;
    struct extra_sets *walk;

#define TEST(ITEM) \
    if (0 != (ITEM.flags & SET_valid) && func(&(ITEM),data)) { \
      dprint(12,(debugfile,"find_set: found=%X (%s)\n",&(ITEM),\
        ITEM.MIME_name ? ITEM.MIME_name : "<no MIME name>")); \
    return &(ITEM); }

#define SCAN(SET) \
    for (i = 0; i < sizeof SET / sizeof (struct charcode_info); i++) {\
       dprint(100,(debugfile,"find_set: (scan#%d) %X -- flags=%d\n",\
             i,&(SET[i]),SET[i].flags)); \
      TEST(SET[i]); } 


    dprint(99,(debugfile,"find_set: precompiled_sets\n"));
    SCAN(precompiled_sets);
    
    dprint(99,(debugfile,"find_set: extra_sets\n"));
    for (walk = extra_sets; walk; walk = walk -> next) {
	dprint(100,(debugfile,"find_set: (walk) %X -- flags=%d\n",
		    &(walk->set),walk->set.flags)); 
	TEST(walk->set);
    }

    dprint(99,(debugfile,"find_set: NOT FOUND\n"));
    return NULL;
}	
#undef SCAN
#undef SET

static struct charcode_info * add_set P_((struct charcode_info s));
static struct charcode_info * add_set(s)
     struct charcode_info s;
{
    struct extra_sets *tmp = safe_malloc( sizeof (struct extra_sets));
    
    tmp->set  = s;
    tmp->next = NULL;

    if (extra_sets_tail)
	extra_sets_tail->next = tmp;    
    extra_sets_tail = tmp;    
    if (!extra_sets)
	extra_sets = tmp;
    
    extra_sets_count++;

    if (tmp->set.MIME_name) {
	dprint(1,(debugfile,"charset: Adding charset %s\n",
		  tmp->set.MIME_name));	
    }

    return &(tmp->set);
}

static int same_name P_((struct charcode_info *set, union compare_info data));
static int same_name (set,data)
     struct charcode_info *set;
     union compare_info data;
{
    dprint(99,(debugfile,"same_name -- set=%X (MIME_name=%s,data.name=%s)\n",
	       set,set->MIME_name ? set->MIME_name : "<NULL>",data.name));
    if (!set->MIME_name)
	return 0;
    return 0 == istrcmp(data.name,set->MIME_name);
}

charset_t MIME_name_to_charset (name,create)
     char *name;
     int create;
{
    charset_t ret;
    struct charcode_info new;
    
    union compare_info D;
    D.name =  name;
    
    ret = find_set(same_name,D);

    if (ret) {
	dprint(12,(debugfile,"MIME_name_to_charset(%s), found=%X\n",
		   name,ret));
	return ret;
    }
    if (!create) {
	dprint(12,(debugfile,"MIME_name_to_charset(%s), not found\n",
		   name));
	return NULL;
    }    

    new.flags          = SET_nodata|SET_valid;
    new.MIME_name      = safe_strdup(name);
    bzero((void *)new.parts, sizeof new.parts);

    ret = add_set(new);

    dprint(12,(debugfile,"MIME_name_to_charset(%s), adding=%X\n",
	       name,ret));

    return ret;
}

static int add_it_ P_((charset_t ascii_ptr,
		       struct charcode_info *item,
		       struct charcode_info **list,
		       int *count, int max));


static int add_it_ (ascii_ptr,item,list,count,max)
     charset_t ascii_ptr;
     struct charcode_info *item;
     struct charcode_info **list;
     int *count; 
     int max;
{
    /* Pick only MIME charsets ... */

    if (item->MIME_name) 
	if (item->parts[0] == ascii_ptr) 		    
	    if (*count < max) 
		list[(*count)++] = item; 
	    else { 
		lib_error(FRM("charset: Internal error -- overflow")); 
		return 0;
	    }
    
    return 1;
}


static int array_walk_ P_((charset_t ascii_ptr,
			   struct charcode_info *arr,
			   int arrsize,
			   struct charcode_info **list,
			   int *count, int max));


static int array_walk_ (ascii_ptr,arr,arrsize, list, count, max)
     charset_t ascii_ptr;
     struct charcode_info *arr;
     int arrsize;
     struct charcode_info **list;
     int *count;
     int max;
{
    int i;

    
    for(i = 0; i < arrsize; i++) 
	if (0 != (arr[i].flags & SET_valid))
	    if (!add_it_(ascii_ptr,&arr[i],list,count,max))
		return 0;

    return 1;
}

static int name_ok P_((CONST char *name));
static int name_ok(name)
     CONST char *name;
{
    if ('\0' == *name)
	return 0;

    return strlen(name) == strspn(name,"ABCDEFGHIJKLMNOPQRSTUVXYZ-0123456789");
}

int charset_compatfunc (value,enter)
     char **value;
     int enter;
{
    struct extra_sets *walk;
    int ret = 0;
    charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0);

#define ARRSIZE(SET) sizeof SET / sizeof (struct charcode_info)

    const int size = ARRSIZE(precompiled_sets) + 
	extra_sets_count;

    struct charcode_info **need_modify_list =
	safe_malloc(size * sizeof(struct charcode_info *));
    int count = 0;
    
    if (!ascii_ptr) {
	lib_error(FRM("charset: Internal error -- US_ASCII not available"));
	goto fail;
    }

#define HANDLE(SET) if (!array_walk_(ascii_ptr,SET,ARRSIZE(SET),need_modify_list,&count,size)) goto fail;
#define ADD_IT(ITEM) if (!add_it_(ascii_ptr,ITEM,need_modify_list,&count,size)) goto fail;
    
    HANDLE(precompiled_sets);

    for (walk = extra_sets; walk; walk = walk -> next) 
	if (0 != (walk->set.flags & SET_valid))
	    ADD_IT(&(walk->set));

    if (enter) {
	char * temp = safe_strdup(*value);
	char *wrk,*next;
	int j;
	
	for (j = 0; j < count; j++)
	    need_modify_list[j]->flags |= SET_mark;

	for (wrk = temp; wrk && *wrk; wrk = next) {
	    charset_t charset_ptr = NULL;

	    next = qstrpbrk(wrk," ,;");
	retry:
	    if (next) {
		char *a = next;
		if (';' != *next)
		    next++;
		while (' ' == *next)
		    next++;
		if (';' == *next) {
		    next++;
		    while (' ' == *next)
			next++;
		    next = qstrpbrk(next," ,");
		    goto retry;
		}
		*a = '\0';
	    }

	    if (!load_charset_map_info(&charset_ptr,wrk)) 
		charset_ptr = MIME_name_to_charset(wrk,1); 

	    if (0 != (charset_ptr -> flags & SET_mark)) {
		charset_ptr -> flags &=  ~SET_mark;
		charset_ptr -> flags &=  ~SET_nodata;
		dprint(98,(debugfile,
			   "charset_compatfunc: modify=%X (%s), flags=%d\n",
			   charset_ptr,
			   charset_ptr->MIME_name ? 
			   charset_ptr->MIME_name : "<no MIME name>",
			   charset_ptr->flags));

	    } else {  /* new charset create information */
		int j;
		charset_ptr -> flags         = SET_valid;
		charset_ptr -> parts[0]      = ascii_ptr;
		for (j = 1; j < MAX_charcode_parts; j++)
		    charset_ptr -> parts[j]      = NULL;
		dprint(98,(debugfile,
			   "charset_compatfunc: new=%X (%s), flags=%d\n",
			   charset_ptr,
			   charset_ptr->MIME_name ? 
			   charset_ptr->MIME_name : "<no MIME name>",
			   charset_ptr->flags));
	    }

	}
	     
	free(temp);

	/* invalidate rest */
	for (j = 0; j < count; j++)
	    if (0 != (need_modify_list[j]->flags & SET_mark)) {
		need_modify_list[j] -> flags &=  ~SET_valid;
		dprint(1,(debugfile,"charset: Invalidating charset %s\n",
			  need_modify_list[j]->MIME_name));	
	    }

    } else {
	int j;
	
	/* static pointer to buffer accross invocations */
	static char * return_buffer = NULL;

	return_buffer = strmcpy(return_buffer,"");    /* Reset result */
	for (j = 0; j < count; j++) {
	    char * s = need_modify_list[j]->MIME_name;
	    int l    = strlen(s);
	    
	    if (return_buffer[0])
		return_buffer = strmcat(return_buffer," ");
	    
	    if (name_ok(s))
		return_buffer = strmcat(return_buffer,
					need_modify_list[j]->MIME_name);
	    else {
		char * tmp = elm_message("%Q;!",s);
		return_buffer = strmcat(return_buffer,tmp);
		free(tmp);
	    }
	}

	*value = return_buffer;
    }
    ret = 1;
    
	fail:
    free(need_modify_list);
    return ret;
}
#undef ADD_IT
#undef HANDLE
#undef ARRSIZE

static char * dequote_opt P_((CONST char *source, int len));
static char * dequote_opt(source,len)
     CONST char *source; 
     int len;
{
    int size = len + 1;
    int ptr = 0;
    char * store = safe_malloc(size);
    CONST char *p;
    int q = 0;
    
    for (p = source; p - source < len && ptr < size-1; p++) {
	switch(*p) {
	case '"':
	    q = !q;
	    break;
	case '\\':
	    if (q) {
		p++;
		if (*p != '\0')
		    store[ptr++] = *p;
		else {
		    lib_error(CATGETS(elm_msg_cat, MeSet, 
				      MeTrailingBackslash,
				      "Trailing backslash (\\): %.20s..."),
			      source);
		}
		break;
	    }
	    /* FALLTHRU */
	default:
	    if (q || *p != ' ')
		store[ptr++] = *p;
	    break;
	}    
    }
    store[ptr] = '\0';

    if (q)
	lib_error(CATGETS(elm_msg_cat, MeSet, 
			  MeUnbalancedQuote,
			  "Unbalanced quote (\"): %.20s..."),
		  source);


    dprint(95, (debugfile, "dequote_opt(%.*s (len=%d))=%s\n",
		len,source,len,store));

    return store;
}

int load_charset_map_info(buffer,data)
     charset_t *buffer; 
     CONST char *data;
{
    char * temp = safe_strdup(data);
    char *c = qstrpbrk(temp,";");
    CONST char *p;
    char *store,*opt;
    int q = 0, ptr = 0,size;
    struct charcode_info new_vals;
    int reallocation_needed = 0;
    
    if (!c) {
	*buffer = NULL;
	
	dprint (11, (debugfile, 
		     "load_charset_map_info(\"%s\") = 0 (FAIL)\n", 
		     data));
	free(temp);
	return 0;
    }
    store = dequote_opt(temp,c - temp);
    dprint (11, (debugfile, 
		 "load_charset_map_info: charset=%s\n", 
		 store));

    dprint (11, (debugfile, 
		 "load_charset_map_info: info=%s\n", c));

    *buffer = MIME_name_to_charset(store,0);
    if (!*buffer) {
	reallocation_needed  = 1;
	new_vals.flags       = SET_valid;
	new_vals.MIME_name   = safe_strdup(store);
	bzero((void *)new_vals.parts, sizeof new_vals.parts);	
    } else
	new_vals = **buffer;

    c++;

    if ('\0' == *c || 0 == strcmp(c,"!")) {
	dprint(11,(debugfile,"charset: %s: no data\n",store));
	new_vals.flags       |= SET_nodata;
    } else { 
	for (opt = mime_parse_content_opts(c); 
	     opt; 
	     opt = mime_parse_content_opts(NULL)) {
	    char * q = strchr(opt,'=');
	    
	    dprint(95, (debugfile, "mime_parse_content_opts gives: %s\n",
			opt));

	    if (q) {
		char *val; 
		*q++ = '\0';
		val = dequote_opt(q,strlen(q));
		
		if (0 == strcmp(opt,"MIME-subset")) {
		    int j;
		    reallocation_needed = 1;
		    new_vals.parts[0]  = MIME_name_to_charset(val,0);
		    for (j = 1; j < MAX_charcode_parts; j++)
			new_vals.parts[j] = NULL;
		    if (!new_vals.parts[0]) {
			lib_error(CATGETS(elm_msg_cat, MeSet, 
					  MeUnknownMIMEsubset,
					  "Unknown MIME-subset %s"),
				  val);				  
		    }
		    
		    free(val);
		    
		} else {
		    lib_error(CATGETS(elm_msg_cat, MeSet, 
				      MeUnsupportedCharsetOption,
				      "Unsupported charset option %s (value %.20s...)"),
			      opt,val);				  
		    free(val);
		}
	    } else {
		lib_error(CATGETS(elm_msg_cat, MeSet, 
				  MeUnsupportedCharsetOptionNoValue,
				  "Unsupported charset option %s (no value)"),
			  opt);				  
	    }
	}
    }

    if (reallocation_needed) {
	if (*buffer) {
	    (*buffer) -> flags  &= ~SET_valid;
	    dprint(1,(debugfile,
		      "charset: Invalidating charset %s (and regenerating)\n",
		      (*buffer)->MIME_name ? 
		      (*buffer)->MIME_name : "<no MIME name>"));	
	}
	*buffer = add_set(new_vals);
	dprint(11,(debugfile,
		   "charset: Adding charset %s\n",
		   (*buffer)->MIME_name ? 
		   (*buffer)->MIME_name : "<no MIME name>"));	
    }

    dprint (11, (debugfile, 
		 "load_charset_map_info(\"%s\") = 1 (SUCCEED)\n", 
		 data));    
    free(temp);
    free(store);
    return 1;    
}

static void dump_map_info P_((FILE *f,struct charcode_info *info));
static void dump_map_info(f,info)
     FILE *f;
     struct charcode_info *info;
{
    if (!info->MIME_name) {
	fputs("# <No MIME name> ",f);
    } if (name_ok(info->MIME_name)) {
	fputs(info->MIME_name,f);
    } else {
	elm_fprintf(f,FRM("%Q"),info->MIME_name);
    }
    
    if (0 != (info->flags & SET_nodata)) {
	fputs(";!",f);
	return;
    }

    if (info->parts[0] && 
	info->parts[0]->MIME_name &&
	name_ok(info->parts[0]->MIME_name))
	elm_fprintf(f,FRM(";MIME-subset=%s"),info->parts[0]->MIME_name);
    else if (info->parts[0] && 
	     info->parts[0]->MIME_name)
	elm_fprintf(f,FRM(";MIME-subset=%Q"),info->parts[0]->MIME_name);
}

int get_charset_map_info (buffer,data,size)
     char *buffer;
     CONST char *data; 
     int size;
{
    charset_t set;

    if (!load_charset_map_info(&set,data)) {
	buffer[0] = '\0';

	dprint (11, (debugfile, 
		     "get_charset_map_info(\"%s\") = 0 (FAIL)\n", 
		     data));

	return 0;
    }

    strfcpy(buffer,set->MIME_name,size);

    dprint (11, (debugfile, 
		 "get_charset_map_info(\"%s\") = 1 (SUCCEED)\n", 
		 data));
    return 1;
}

static int charset_superset_of_p P_((charset_t charset, 
				     charset_t subset,
				     int level));

static int charset_superset_of_p(charset,subset,level)
     charset_t charset,subset;
     int level;
{
    int i;

    if (level > 10) {
	lib_error(CATGETS(elm_msg_cat, MeSet, MeCharsetSubsetLoop,
			  "Charset subset defination loop detected for %s"),
		  charset->MIME_name ? charset->MIME_name : "<no MIME name>");
	return 0;
    }

    for (i = 0; i < MAX_charcode_parts; i++) {
	if (charset->parts[i] == subset)
	    return 1;
    }

    for (i = 0; i < MAX_charcode_parts; i++) {
	if (charset->parts[i] &&
	    charset_superset_of_p(charset->parts[i],subset,level+1))
	    return 1;
    }

    return 0;
}

int charset_superset_of(charset,subset)
     charset_t charset,subset;
{
    int res = charset_superset_of_p(charset,subset,0);

    dprint(7,(debugfile,"charset_superset_of(%X (%s),%X (%s))=%d\n",
	      charset,charset->MIME_name ? charset->MIME_name :"<no MIME name",
	      subset,subset->MIME_name ? subset->MIME_name :"<no MIME name",
	      res));
    return res;
}

int charset_ok_p(ptr)
     charset_t ptr;
{
    charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0);

    if (!ascii_ptr) {
	lib_error(FRM("charset: Internal error -- US_ASCII not available"));
	return FALSE;
    }
    return charset_superset_of(ptr,ascii_ptr);
}

struct locale_map_item  * load_locale_map(filename) 
     CONST char *filename;
{
    struct locale_map_item *result;
    int result_len = 0;
    FILE * f = fopen(filename,"r");
    int max_result = 0;
    int c;
    char buf[LONG_STRING];

    if (!f) {
	int err = errno;
	dprint(2,(debugfile,"locale_map_item: %s: %s\n",
		  filename,error_description(err)));
	return NULL;
    }

    while(EOF != (c = fgetc(f)))
	if ('\n' == c)
	    max_result++;

    dprint(11,(debugfile,"locale_map_item: %s, max_result=%d\n",
	       filename,max_result));

    if (!max_result) {
	fclose(f);
	return NULL;
    }
    rewind(f);

    result = safe_malloc((max_result +1) * sizeof (struct locale_map_item));

    while (result_len < max_result &&
	   fgets(buf,sizeof buf, f) != NULL) {
	char * c = buf,*c1;
	int l1 = strlen(buf);
	
	if ('\n' == buf[l1 -1]) 
	    buf[l1 - 1] = '\0';
	else {
	    lib_error(CATGETS(elm_msg_cat, MeSet, MeTooLongLine,
			      "%30s: Too long line: %.30s..."),
		      filename,buf);
	    break;
	}
	
	while (l1-- > 0 && whitespace(buf[l1]))
	    buf[l1] = '\0';
	
	c = buf;
	while (*c && whitespace (*c)) /* skip leading whitespace */
	    c++;
	if ('#' == *c)
	    continue;
	if (!*c)
	    continue;

	c1 = strpbrk(c," \t");

	if (!c1) {
	    lib_error(CATGETS(elm_msg_cat, MeSet, MeBadLine,
			      "%30s: Bad line: %.30s..."),
		      filename,buf);
	    break;	    
	}
	*c1 = '\0';

	c1++;

	while (*c1 && whitespace (*c1)) /* skip leading whitespace */
	    c1++;
	if (!*c1)
	    continue;

	if (0 == strcmp(c,"-")) {
	    result[result_len].match = NULL;
	    if (!load_charset_map_info(&result[result_len].charset,c1)) {
		lib_error(CATGETS(elm_msg_cat, MeSet, MeNotCharsetDeclaration,
				  "%30s: Not charset declaration: %.30s..."),
			  filename,c1);
		break;	    
	    }
	    result_len++;
	} else {
	    result[result_len].match = safe_strdup(c);

	    if (!load_charset_map_info(&result[result_len].charset,c1))
		result[result_len].charset = MIME_name_to_charset(c1,1); 
	    
	    result_len++;
	}
    }
    result[result_len].match   = NULL;
    result[result_len].charset = NULL;

    dprint(11,(debugfile,"locale_map_item: %s, result_len=%d\n",
	       filename,result_len));

    return result;
}

void dump_locale_map(f,map)
     FILE *f; 
     struct locale_map_item *map;
{
    struct locale_map_item *ptr;

    for (ptr = map; ptr && ptr->charset; ptr++) {
	ptr->charset->flags &= ~SET_mark;
	ptr->charset->flags &= ~SET_printed;
    }
    for (ptr = map; ptr && ptr->charset; ptr++) {
	if (0 != (ptr->charset->flags & ~SET_mark) &&
	    0 == (ptr->charset->flags & SET_nodata) &&
	    0 == (ptr->charset->flags & SET_printed)) {
	    fputs("-\t",f);
	    dump_map_info(f,ptr->charset);
	    ptr->charset->flags |= SET_printed;
	    fputc('\n',f);
	}
	ptr->charset->flags |= SET_mark;
    }

    for (ptr = map; ptr && ptr->charset; ptr++) {
	if (ptr->match) {
	    fputs(ptr->match,f);
	    fputc('\t',f);
	    if ((0 != (ptr->charset->flags & SET_nodata) ||
		 0 != (ptr->charset->flags & SET_printed)) && 
		ptr->charset->MIME_name &&
		NULL == strpbrk(ptr->charset->MIME_name,";\""))
		fputs(ptr->charset->MIME_name,f);
	    else
		dump_map_info(f,ptr->charset);
	    fputc('\n',f);
	}
    }
}

char * mime_parse_content_opts (str)
     char *str;
{
    /* A routine for parsing the options in a Content-Type: - like field.  
     * The important point here is to skip the semi-colon if it appears
     * inside of quotes.  This works sort of like strtok, except that
     * the token is already known.
     */
    static char *ptr;
    char *ret;
    int in_quote = 0;
    
    /* This is the initialization call */
    if (str) {
	dprint(50,(debugfile,"mime_parse_content_opts: string=%s\n",
		   str));
	ptr = str;
    }    
    /* skip leading spaces */
    while (*ptr && isascii(*ptr) && isspace (*ptr))
	ptr++;

    if (*ptr == '\0') {
	dprint(50,(debugfile,"mime_parse_content_opts: EOS\n"));
	return NULL;
    }
    
    ret = ptr;
    while (*ptr) {
	if (*ptr == '\\' && in_quote) {		  
	    /* \ escapes next character  
	     * (not allowed outside of quotes) */
	    ptr++;
	    if (*ptr == '\0')
		break;
	} else if (*ptr == '\"') {
	    if (in_quote)
		in_quote = 0;
	    else
		in_quote = 1;
	}
	else if (! in_quote) {
	    if (*ptr == ';') {
		*ptr++ = '\0';
		dprint(50,(debugfile,"mime_parse_content_opts: token=%s\n",
			   ret));
		return (ret);
	    }
	} 
	ptr++;
    }
    dprint(50,(debugfile,"mime_parse_content_opts: token=%s\n",
	       ret));
    return (ret);
}

/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 * End:
 */
