/***************************************************************************
 *   copyright           : (C) 2002 by Hendrik Sattler                     *
 *   mail                : post@hendrik-sattler.de                         *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "charsets.h"
#include "helpers.h"
#include "common.h"
#include <string.h>

/*
 * These two function assume that wchar_t has UCS-4 in little endian format
 * I really do not know if this is always true. If that is false it should
 * be fixed.
 *
 * Converting to GSM and back does _NOT_ always result in the same string!
 *
 * You should only use the last 7 bits of the encoded characters!
 * (e.g. using the mask "&127")
 */

unsigned char* convert_to_gsm (wchar_t* input)  {
  unsigned char* retval;
  int i;
  int k=0;
    
  /*
   * allocate double size of wcslen because some characters have an
   * encoded width of 2 (14 bit instead of 7 bit).
   */


  retval=mem_alloc((wcslen(input)*2)+1,0);
  for (i=0;i<wcslen(input);i++) {
    switch (input[i]) {
    default:
      if (input[i] < 0x0080 && (
				/*
				 * see Unicode table for descriptions
				 */
				input[i] == 0x000A ||
				input[i] == 0x000D ||
				(input[i] >= 0x0020 && input[i] <= 0x0023) || 
				(input[i] >= 0x0025 && input[i] <= 0x003f) || 
				(input[i] >= 0x0041 && input[i] <= 0x005a) || 
				(input[i] >= 0x0061 && input[i] <= 0x007a))) {
	retval[k] = input[i];
      } else {
	myprintf(0,"%lc (=0x%04lx) is not a GSM character\n",input[i],input[i]);
	--k;
      }
      break;
    case 0x0040: // commercial at
      /*
       * we map this to 0x80 instead of 0x00 to avoid problem with C's char*
       * just make sure you only use the last 7 bits of these values,
       * you must do that anyway
       */
      retval[k] =  0x80;
      break;
    case 0x00a3: // pound sign
      retval[k] =  0x01;
      break;
    case 0x0024: // dollar sign
      retval[k] =  0x02;
      break;
    case 0x00a5: // yen sign
      retval[k] =  0x03;
      break;
    case 0x00e8: // small 'e' with grave
      retval[k] =  0x04;
      break;
    case 0x00e9: // small 'e' with acute
      retval[k] =  0x05;
      break;
    case 0x00f9: // small 'u' with grave
      retval[k] =  0x06;
      break;
    case 0x00ec: // small 'i' with grave
      retval[k] =  0x07;
      break;
    case 0x00f2: // small 'o' with grave
      retval[k] =  0x08;
      break;
    case 0x00c7: // capital 'C' with cedilla
    case 0x00e7: // small 'c' with cedilla
      retval[k] =  0x09;
      break;
    case 0x00d8: // capital 'O' with stroke
      retval[k] =  0x0b;
      break;
    case 0x00f8: // small 'o' with stroke
      retval[k] =  0x0c;
      break;
    case 0x00c5: // capital 'A' with ring above
      retval[k] =  0x0e;
      break;
    case 0x00e5: // small 'a' with ring above
      retval[k] =  0x0f;
      break;
    case 0x0394: // capital delta
      retval[k] =  0x10;
      break;
    case 0x005f: // underscore / low line
      retval[k] =  0x11;
      break;
    case 0x03a6: // capital phi
      retval[k] =  0x12;
      break;
    case 0x0393: // capital gamma
      retval[k] =  0x13;
      break;
    case 0x039b: // capital lambda
      retval[k] =  0x14;
      break;
    case 0x03a9: // capital omega
      retval[k] =  0x15;
      break;
    case 0x03a0: // capital pi
      retval[k] =  0x16;
      break;
    case 0x03a8: // capital psi
      retval[k] =  0x17;
      break;
    case 0x03a3: // capital sigma
      retval[k] =  0x18;
      break;
    case 0x0398: // capital theta
      retval[k] =  0x19;
      break;
    case 0x039e: // capital xi
      retval[k] =  0x1a;
      break;
    case 0x000c: // form feed
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x0a;
      break;
    case 0x005e: // circumflex accent
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x14;
      break;
    case 0x007b: // left curly bracket
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x28;
      break;
    case 0x007d: // right curly bracket
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x29;
      break;
    case 0x005c: // reverse solidus (back slash)
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x2f;
      break;
    case 0x005b: // left square bracket
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x3c;
      break;
    case 0x007e: // tilde
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x3d;
      break;
    case 0x005d: // right square bracket
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x3e;
      break;
    case 0x007c: // vertical line
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x40;
      break;
    case 0x20ac: // Euro sign
      retval[k] =  0x1b;
      ++k;
      retval[k] =  0x65;
      break;
    case 0x00c6: // capital 'AE'
      retval[k] =  0x1c;
      break;
    case 0x00e6: // small 'ae'
      retval[k] =  0x1d;
      break;
    case 0x00df: // small sharp s (german)
      retval[k] =  0x1e;
      break;
    case 0x00c9: // capital 'E' with acute
      retval[k] =  0x1f;
      break;
    case 0x00a4: // currency sign
    case 0x00b0: // degree sign (not offical, preference of author)
      retval[k] =  0x24;
      break;
    case 0x00a1: // inverted exclamation mark
      retval[k] =  0x40;
      break;
    case 0x0391: // capital alpha
      retval[k] =  0x41;
      break;
    case 0x0392: // capital beta
      retval[k] =  0x42;
      break;
    case 0x0395: // capital epsilon
      retval[k] =  0x45;
      break;
    case 0x0397: // capital eta
      retval[k] =  0x48;
      break;
    case 0x0399: // capital iota
      retval[k] =  0x49;
      break;
    case 0x039a: // capital kappa
      retval[k] =  0x4b;
      break;
    case 0x039c: // capital mu
      retval[k] =  0x4d;
      break;
    case 0x039d: // capital nu
      retval[k] =  0x4e;
      break;
    case 0x039f: // capital omicron
      retval[k] =  0x4f;
      break;
    case 0x03a1: // capital rho
      retval[k] =  0x50;
      break;
    case 0x03a4: // capital tau
      retval[k] =  0x54;
      break;
    case 0x03a5: // capital upsilon
      retval[k] =  0x55;
      break;
    case 0x03a7: // capital chi
      retval[k] =  0x58;
      break;
    case 0x0396: // capital zeta
      retval[k] =  0x5a;
      break;
    case 0x00c4: // capital 'A' with diaeresis
      retval[k] =  0x5b;
      break;
    case 0x00d6: // capital 'O' with diaeresis
      retval[k] =  0x5c;
      break;
    case 0x00d1: // capital 'N' with tilde
      retval[k] =  0x5d;
      break;
    case 0x00dc: // capital 'U' with diaeresis
      retval[k] =  0x5e;
      break;
    case 0x00a7: // section sign
      retval[k] =  0x5f;
      break;
    case 0x00bf: // inverted question mark
      retval[k] =  0x60;
      break;
    case 0x00e4: // small 'a' with diaeresis
      retval[k] =  0x7b;
      break;
    case 0x00f6: // small 'o' with diaeresis
      retval[k] =  0x7c;
      break;
    case 0x00f1: // small 'n' with tilde
      retval[k] =  0x7d;
      break;
    case 0x00fc: // small 'u' with diaeresis
      retval[k] =  0x7e;
      break;
    case 0x00e0: // small 'a' with grave
      retval[k] =  0x7f;
      break;
    }
    ++k;
  }
  retval[k]=0;
  return mem_realloc(retval,k+1);
}

wchar_t* convert_from_gsm (unsigned char* input)  {
  wchar_t* retval;
  int i;
  int k=0;
    
  retval=mem_alloc((strlen(input)+1)*sizeof(wchar_t),1);
  for (i=0;i<strlen(input);i++) {
    switch (input[i]) {
    default:
      retval[k] = input[i];
      break;
    case 0x80: // commercial at
      /*
       * we expect this as 0x80 instead of 0x00 to avoid problem with C's char*
       */
      retval[k] =  0x0040;
      break;
    case 0x01: // pound sign
      retval[k] =  0x00a3;
      break;
    case 0x02: // dollar sign
      retval[k] =  0x0024;
      break;
    case 0x03: // yen sign
      retval[k] =  0x00a5;
      break;
    case 0x04: // small 'e' with grave
      retval[k] =  0x00e8;
      break;
    case 0x05: // small 'e' with acute
      retval[k] =  0x00e9;
      break;
    case 0x06: // small 'u' with grave
      retval[k] =  0x00f9;
      break;
    case 0x07: // small 'i' with grave
      retval[k] =  0x00ec;
      break;
    case 0x08: // small 'o' with grave
      retval[k] =  0x00f2;
      break;
    case 0x09: // small/capital 'c' with cedilla (we map to small)
      retval[k] =  0x00e7; // or 0x00c7 ?
      break;
    case 0x0b: // capital 'O' with stroke
      retval[k] =  0x00d8;
      break;
    case 0x0c: // small 'o' with stroke
      retval[k] =  0x00f8;
      break;
    case 0x0e: // capital 'A' with ring above
      retval[k] =  0x00c5;
      break;
    case 0x0f: // small 'a' with ring above
      retval[k] =  0x00e5;
      break;
    case 0x10: // capital delta
      retval[k] =  0x0394;
      break;
    case 0x11: // underscore / low line
      retval[k] =  0x005f;
      break;
    case 0x12: // capital phi
      retval[k] =  0x03a6;
      break;
    case 0x13: // capital gamma
      retval[k] =  0x0393;
      break;
    case 0x14: // capital lambda
      retval[k] =  0x039b;
      break;
    case 0x15: // capital omega
      retval[k] =  0x03a9;
      break;
    case 0x16: // capital pi
      retval[k] =  0x03a0;
      break;
    case 0x17: // capital psi
      retval[k] =  0x03a8;
      break;
    case 0x18: // capital sigma
      retval[k] =  0x03a3;
      break;
    case 0x19: // capital theta
      retval[k] =  0x0398;
      break;
    case 0x1a: // capital xi
      retval[k] =  0x039e;
      break;
    case 0x1b:
      ++i;
      switch (input[i]) {
      default: // non-breaking space
	--i;
	retval[k] =  0x00a0;
	break;
      case 0x1a: // form feed
	retval[k] =  0x000c;
	break;
      case 0x14: // circumflex accent
	retval[k] =  0x005e;
	break;
      case 0x28: // left curly bracket
	retval[k] =  0x007b;
	break;
      case 0x29: // right curly bracket
	retval[k] = 0x007d;
	break;
      case 0x2f: // reverse solidus
	retval[k] = 0x005c;
	break;
      case 0x3c: // left square bracket
	retval[k] = 0x005b;
	break;
      case 0x3d: // tilde
	retval[k] = 0x007e;
	break;
      case 0x3e: // right square bracket
	retval[k] = 0x005d;
	break;
      case 0x40: // vertical line
	retval[k] = 0x007c;
	break;
      case 0x65: // Euro sign
	retval[k] = 0x20ac;
	break;
      }
      break;
    case 0x1c: // capital 'AE'
      retval[k] =  0x00c6;
      break;
    case 0x1d: // small 'ae'
      retval[k] =  0x00e6;
      break;
    case 0x1e: // small sharp s (german)
      retval[k] =  0x00df;
      break;
    case 0x1f: // capital 'E' with acute
      retval[k] =  0x00c9;
      break;
    case 0x24: // currency sign
      //retval[k] =  0x20ac;
      retval[k] =  0x00a4;
      break;
    case 0x40: // inverted exclamation mark
      retval[k] =  0x00a1;
      break;
    case 0x5b: // capital 'A' with diaeresis
      retval[k] =  0x00c4;
      break;
    case 0x5c: // capital 'O' with diaeresis
      retval[k] =  0x00d6;
      break;
    case 0x5d: // capital 'N' with tilde
      retval[k] =  0x00d1;
      break;
    case 0x5e: // capital 'U' with diaeresis
      retval[k] =  0x00dc;
      break;
    case 0x5f: // section sign
      retval[k] =  0x00a7;
      break;
    case 0x60: // inverted question mark
      retval[k] =  0x00bf;
      break;
    case 0x7b: // small 'a' with diaeresis
      retval[k] =  0x00e4;
      break;
    case 0x7c: // small 'o' with diaeresis
      retval[k] =  0x00f6;
      break;
    case 0x7d: // small 'n' with tilde
      retval[k] =  0x00f1;
      break;
    case 0x7e: // small 'u' with diaeresis
      retval[k] =  0x00fc;
      break;
    case 0x7f: // small 'a' with grave
      retval[k] =  0x00e0;
      break;
    }
    ++k;
  }
  retval[k]=0;
  /*
   * must be (k+1)*sizeof(wchar_t) because index count from 0
   * but size from 1
   */
  return mem_realloc(retval,(k+1)*sizeof(wchar_t));
}
