/*  GNU Moe - My Own Editor
    Copyright (C) 2005, 2006 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <cctype>
#include <string>

#include "encoding.h"
#include "iso_8859.h"


namespace Encoding {

// Charset independent Base64 alphabet (RFC 3548).
//
int base64_value( const unsigned char ch )
  {
  switch( ch )
    {
    case 'A': return 0;
    case 'B': return 1;
    case 'C': return 2;
    case 'D': return 3;
    case 'E': return 4;
    case 'F': return 5;
    case 'G': return 6;
    case 'H': return 7;
    case 'I': return 8;
    case 'J': return 9;
    case 'K': return 10;
    case 'L': return 11;
    case 'M': return 12;
    case 'N': return 13;
    case 'O': return 14;
    case 'P': return 15;
    case 'Q': return 16;
    case 'R': return 17;
    case 'S': return 18;
    case 'T': return 19;
    case 'U': return 20;
    case 'V': return 21;
    case 'W': return 22;
    case 'X': return 23;
    case 'Y': return 24;
    case 'Z': return 25;
    case 'a': return 26;
    case 'b': return 27;
    case 'c': return 28;
    case 'd': return 29;
    case 'e': return 30;
    case 'f': return 31;
    case 'g': return 32;
    case 'h': return 33;
    case 'i': return 34;
    case 'j': return 35;
    case 'k': return 36;
    case 'l': return 37;
    case 'm': return 38;
    case 'n': return 39;
    case 'o': return 40;
    case 'p': return 41;
    case 'q': return 42;
    case 'r': return 43;
    case 's': return 44;
    case 't': return 45;
    case 'u': return 46;
    case 'v': return 47;
    case 'w': return 48;
    case 'x': return 49;
    case 'y': return 50;
    case 'z': return 51;
    case '0': return 52;
    case '1': return 53;
    case '2': return 54;
    case '3': return 55;
    case '4': return 56;
    case '5': return 57;
    case '6': return 58;
    case '7': return 59;
    case '8': return 60;
    case '9': return 61;
    case '+': return 62;
    case '/': return 63;
    default: return -1;
    }
  }


int map_to_byte( const int code ) throw()
  {
  enum {
       CGBREVE = 0X011E,	// latin capital letter g with breve
       SGBREVE = 0x011F,	// latin small letter g with breve
       CIDOT   = 0x0130,	// latin capital letter i with dot above
       SINODOT = 0x0131,	// latin small letter i dotless
       CSCEDI  = 0x015E,	// latin capital letter s with cedilla
       SSCEDI  = 0x015F,	// latin small letter s with cedilla
       CSCARON = 0x0160,	// latin capital letter s with caron
       SSCARON = 0x0161,	// latin small letter s with caron
       CZCARON = 0x017D,	// latin capital letter z with caron
       SZCARON = 0x017E,	// latin small letter z with caron
       EURO    = 0x20AC 	// symbole euro
       };
  if( code < 0 ) return -1;
  if( code < 256 ) return code;
  switch( code )
    {
    case CGBREVE: return 0xD0;
    case SGBREVE: return 0xF0;
    case CIDOT  : return 0xDD;
    case SINODOT: return 0xFD;
    case CSCEDI : return 0xDE;
    case SSCEDI : return 0xFE;
    case CSCARON: return 0xA6;
    case SSCARON: return 0xA8;
    case CZCARON: return 0xB4;
    case SZCARON: return 0xB8;
    case EURO   : return 0xA4;
    default     : return -1;
    }
  }


// `seq' contains an utf8 (possibly) multibyte character sequence.
// Returns the corresponding code and, in *lenp, the characters read.
// Returns -1 if error.
//
int utf8_to_ucs( const std::string & seq, const unsigned int i, int *lenp ) throw()
  {
  if( i >= seq.size() ) return -1;
  int len = 1;
  unsigned char first = seq[i];
  if( first < 128 )					// plain ascii
    { if( lenp ) { *lenp = len; } return first; }
  if( first < 192 || first > 253 ) return -1;		// invalid byte

  ++len;
  unsigned char bit = 0x20, mask = 0x1F;
  while( first & bit ) { ++len; bit >>= 1; mask >>= 1; }
  int code = first & mask;

  for( int j = 1; j < len; ++j )
    {
    unsigned char next = seq[i+j];
    if( ( next & 0xC0 ) != 0x80 ) return -1;		// invalid byte
    code = ( code << 6 ) | ( next & 0x3F );
    }

  if( code < 0x80 || ( len > 2 && code < 0x800 << ( ( len - 3 ) * 5 ) ) )
    return -1;						// no minimum length
  if( lenp ) *lenp = len;
  return code;
  }

} // end namespace Encoding


void Encoding::base64_encode( const std::string & in, std::string & out ) throw()
  {
  const unsigned char b64str[65] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

  out.clear();
  for( unsigned int i = 0; i < in.size(); i += 3 )
    {
    const bool s1 = ( i + 1 < in.size() );
    const bool s2 = ( i + 2 < in.size() );
    const unsigned char c0 = in[i];
    const unsigned char c1 = ( s1 ? in[i+1] : 0 );
    const unsigned char c2 = ( s2 ? in[i+2] : 0 );
    out += b64str[(c0 >> 2) & 0x3f];
    out += b64str[((c0 << 4) + (c1 >> 4)) & 0x3f];
    out += ( s1 ? b64str[((c1 << 2) + (c2 >> 6)) & 0x3f] : '=' );
    out += ( s2 ? b64str[c2 & 0x3f] : '=' );
    }
  }


/* Decode base64 encoded input string `in' to output string `out'.
   Return true if decoding was successful, i.e. if the input was valid
   base64 data. Note that as soon as any invalid character is
   encountered, decoding is stopped and false is returned. This means
   that you must remove any line terminators from the input string
   before calling this function.
*/
bool Encoding::base64_decode( const std::string & in, std::string & out ) throw()
  {
  unsigned int i;
  out.clear();

  for( i = 0; i + 1 < in.size(); i += 4 )
    {
    const int i0 = base64_value( in[i] );
    const int i1 = base64_value( in[i+1] );
    if( i0 < 0 || i1 < 0 ) break;
    out += ( i0 << 2 ) | ( i1 >> 4 );
    if( i + 2 >= in.size() ) break;
    if( in[i+2] == '=' )
      { if( i + 4 != in.size() || in[i+3] != '=' ) break; }
    else
      {
      const int i2 = base64_value( in[i+2] );
      if( i2 < 0 ) break;
      out += ( ( ( i1 << 4 ) & 0xf0 ) | ( i2 >> 2 ) );
      if( i + 3 >= in.size() ) break;
      if( in[i+3] == '=')
        { if( i + 4 != in.size() ) break; }
      else
	{
        const int i3 = base64_value( in[i+3] );
        if( i3 < 0 ) break;
        out += ( ( ( i2 << 6 ) & 0xc0 ) | i3 );
	}
      }
    }
  return ( i == in.size() );
  }


bool Encoding::quoted_printable_decode( const std::string & in, std::string & out ) throw()
  {
  unsigned int i;
  out.clear();

  for( i = 0; i < in.size(); ++i )
    {
    const unsigned char ch = in[i];
    if( ch != '=' ) { out += ch; continue; }
    if( i + 1 < in.size() )
      {
      const unsigned char ch1 = in[i+1];
      if( ch1 == '\n' ) { ++i; continue; }
      if( i + 2 >= in.size() ) break;
      const unsigned char ch2 = in[i+2];
      if( ch1 == '\r' )
        { if( ch2 == '\n' ) { i += 2; continue; } else break; }
      const int i1 = ISO_8859::xtoi( ch1 );
      const int i2 = ISO_8859::xtoi( ch2 );
      if( i1 < 0 || i2 < 0 || std::islower( ch1 ) || std::islower( ch2 ) )
        break;
      out += ( ( i1 << 4 ) + i2 );
      i += 2;
      }
    }
  return ( i == in.size() );
  }


unsigned char Encoding::rot13( const unsigned char ch ) throw()
  {
  switch( ch )
    {
    case 'A': return 'N';
    case 'B': return 'O';
    case 'C': return 'P';
    case 'D': return 'Q';
    case 'E': return 'R';
    case 'F': return 'S';
    case 'G': return 'T';
    case 'H': return 'U';
    case 'I': return 'V';
    case 'J': return 'W';
    case 'K': return 'X';
    case 'L': return 'Y';
    case 'M': return 'Z';
    case 'N': return 'A';
    case 'O': return 'B';
    case 'P': return 'C';
    case 'Q': return 'D';
    case 'R': return 'E';
    case 'S': return 'F';
    case 'T': return 'G';
    case 'U': return 'H';
    case 'V': return 'I';
    case 'W': return 'J';
    case 'X': return 'K';
    case 'Y': return 'L';
    case 'Z': return 'M';
    case 'a': return 'n';
    case 'b': return 'o';
    case 'c': return 'p';
    case 'd': return 'q';
    case 'e': return 'r';
    case 'f': return 's';
    case 'g': return 't';
    case 'h': return 'u';
    case 'i': return 'v';
    case 'j': return 'w';
    case 'k': return 'x';
    case 'l': return 'y';
    case 'm': return 'z';
    case 'n': return 'a';
    case 'o': return 'b';
    case 'p': return 'c';
    case 'q': return 'd';
    case 'r': return 'e';
    case 's': return 'f';
    case 't': return 'g';
    case 'u': return 'h';
    case 'v': return 'i';
    case 'w': return 'j';
    case 'x': return 'k';
    case 'y': return 'l';
    case 'z': return 'm';
    default : return  ch;
    }
  }


unsigned char Encoding::rot47( const unsigned char ch ) throw()
  {
  if( ch >= 33 && ch <= 126 )
    { if( ch <= 79 ) return ch + 47; else return ch - 47; }
  else return ch;
  }


/* Decode utf8 encoded input string `in' to output string `out'.
   Return true if decoding was successful, i.e. if the input was valid
   utf8 data in the iso-8859-[1|15] range. Note that as soon as any
   invalid character is encountered, decoding is stopped and false is
   returned.
*/
bool Encoding::utf8_decode( const std::string & in, std::string & out ) throw()
  {
  unsigned int i;
  out.clear();

  for( i = 0; i < in.size(); )
    {
    int len, code = map_to_byte( utf8_to_ucs( in, i, &len ) );
    if( code < 0 ) break;
    out += code; i += len;
    }
  return ( i == in.size() );
  }
