/*
/ TextCsv.cpp
/ methods related to CSV/TXT loading 
/
/ version 1.2, 2008 October 9
/
/ Author: Sandro Furieri a-furieri@lqt.it
/
/ Copyright (C) 2008  Alessandro Furieri
/
/    This program is free software: you can redistribute it and/or modify
/    it under the terms of the GNU General Public License as published by
/    the Free Software Foundation, either version 3 of the License, or
/    (at your option) any later version.
/
/    This program is distributed in the hope that it will be useful,
/    but WITHOUT ANY WARRANTY; without even the implied warranty of
/    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/    GNU General Public License for more details.
/
/    You should have received a copy of the GNU General Public License
/    along with this program.  If not, see <http://www.gnu.org/licenses/>.
/
*/

#include "Classdef.h"

#if defined(_WIN32) && !defined(__MINGW32__)
#define strcasecmp	_stricmp
#endif

#define VRTTXT_TEXT		1
#define VRTTXT_INTEGER	2
#define VRTTXT_DOUBLE	3

struct row_buffer
{
/* a complete row */
  int n_cells;                  /* how many cells are stored into this line */
  char **cells;                 /* the cells array */
  struct row_buffer *next;      /* pointer for linked list */
};

struct text_buffer
{
  int max_n_cells;              /* the maximun cell index */
  char **titles;                /* the column titles array */
  char *types;                  /* the column types array */
  int n_rows;                   /* the number of rows */
  struct row_buffer **rows;     /* the rows array */
  struct row_buffer *first;     /* pointers to build a linked list of rows */
  struct row_buffer *last;
};

static void text_insert_row(struct text_buffer *text, char **fields,
                            int max_cell)
{
/* inserting a row into the text buffer struct */
  int i;
  struct row_buffer *row =
    (struct row_buffer *) malloc(sizeof(struct row_buffer));
  row->n_cells = max_cell + 1;
  if (max_cell < 0)
    row->cells = NULL;
  else
    {
      row->cells = (char **) malloc(sizeof(char *) * (max_cell + 1));
      for (i = 0; i < row->n_cells; i++)
        {
          /* setting cell values */
          *(row->cells + i) = *(fields + i);
        }
    }
  row->next = NULL;
/* inserting the row into the linked list */
  if (!(text->first))
    text->first = row;
  if (text->last)
    text->last->next = row;
  text->last = row;
}

static struct text_buffer *text_buffer_alloc()
{
/* allocating and initializing the text buffer struct */
  struct text_buffer *text =
    (struct text_buffer *) malloc(sizeof(struct text_buffer));
  text->max_n_cells = 0;
  text->titles = NULL;
  text->types = NULL;
  text->n_rows = 0;
  text->rows = NULL;
  text->first = NULL;
  text->last = NULL;
  return text;
}

static void text_buffer_free(struct text_buffer *text)
{
/* memory cleanup - freeing the text buffer */
  int i;
  struct row_buffer *row;
  if (!text)
    return;
  row = text->first;
  while (row)
    {
      for (i = 0; i < row->n_cells; i++)
        {
          if (*(row->cells + i))
            free(*(row->cells + i));
        }
      row = row->next;
    }
  if (text->types)
    free(text->types);
  free(text);
}

static int text_is_integer(char *value)
{
/* checking if this value can be an INTEGER */
  int invalids = 0;
  int digits = 0;
  int signs = 0;
  char last = '\0';
  char *p = value;
  while (*p != '\0')
    {
      last = *p;
      if (*p >= '0' && *p <= '9')
        digits++;
      else if (*p == '+' || *p == '-')
        signs++;
      else
        signs++;
      p++;
    }
  if (invalids)
    return 0;
  if (signs > 1)
    return 0;
  if (signs)
    {
      if (*value == '+' || *value == '-' || last == '+' || last == '-')
        ;
      else
        return 0;
    }
  return 1;
}

static int text_is_double(char *value, char decimal_separator)
{
/* checking if this value can be a DOUBLE */
  int invalids = 0;
  int digits = 0;
  int signs = 0;
  int points = 0;
  char last = '\0';
  char *p = value;
  while (*p != '\0')
    {
      last = *p;
      if (*p >= '0' && *p <= '9')
        digits++;
      else if (*p == '+' || *p == '-')
        points++;
      else
        {
          if (decimal_separator == ',')
            {
              if (*p == ',')
                points++;
              else
                invalids++;
          } else
            {
              if (*p == '.')
                points++;
              else
                invalids++;
            }
        }
      p++;
    }
  if (invalids)
    return 0;
  if (points > 1)
    return 0;
  if (signs > 1)
    return 0;
  if (signs)
    {
      if (*value == '+' || *value == '-' || last == '+' || last == '-')
        ;
      else
        return 0;
    }
  return 1;
}

static void text_clean_integer(char *value)
{
/* cleaning an integer value */
  char last;
  char buffer[35536];
  int len = strlen(value);
  last = value[len - 1];
  if (last == '-' || last == '+')
    {
      /* trailing sign; transforming into a leading sign */
      *buffer = last;
      strcpy(buffer + 1, value);
      buffer[len - 1] = '\0';
      strcpy(value, buffer);
    }
}

static void text_clean_double(char *value)
{
/* cleaning an integer value */
  char *p;
  char last;
  char buffer[35536];
  int len = strlen(value);
  last = value[len - 1];
  if (last == '-' || last == '+')
    {
      /* trailing sign; transforming into a leading sign */
      *buffer = last;
      strcpy(buffer + 1, value);
      buffer[len - 1] = '\0';
      strcpy(value, buffer);
    }
  p = value;
  while (*p != '\0')
    {
      /* transforming COMMAs into POINTs */
      if (*p == ',')
        *p = '.';
      p++;
    }
}

static int text_clean_text(char **value, void *toUtf8)
{
/* cleaning a TEXT value and converting to UTF-8 */
  char *text = *value;
  char *utf8text;
  int err;
  int i;
  int oldlen = strlen(text);
  int newlen;
  for (i = oldlen - 1; i > 0; i++)
    {
      /* cleaning up trailing spaces */
      if (text[i] == ' ')
        text[i] = '\0';
      else
        break;
    }
  utf8text = gaiaConvertToUTF8(toUtf8, text, oldlen, &err);
  if (err)
    return 1;
  newlen = strlen(utf8text);
  if (newlen <= oldlen)
    strcpy(*value, utf8text);
  else
    {
      free(*value);
      *value = (char *) malloc(newlen + 1);
      strcpy(*value, utf8text);
    }
  return 0;
}

static struct text_buffer *text_parse(const char *path, const char *encoding,
                                      bool first_line_titles,
                                      char field_separator, char text_separator,
                                      char decimal_separator)
{
/* trying to open and parse the text file */
  int c;
  int fld;
  int len;
  int max_cell;
  int is_string = 0;
  char last = '\0';
  char *fields[4096];
  char buffer[35536];
  char *p = buffer;
  struct text_buffer *text;
  int nrows;
  int ncols;
  int errs;
  struct row_buffer *row;
  void *toUtf8;
  int encoding_errors;
  int ir;
  char title[64];
  char *first_valid_row;
  int i;
  char *name;
  for (fld = 0; fld < 4096; fld++)
    {
      /* preparing an empty row */
      fields[fld] = NULL;
    }
/* trying to open the text file */
  FILE *in = fopen(path, "rb");
  if (!in)
    return NULL;
  text = text_buffer_alloc();
  fld = 0;
  while ((c = getc(in)) != EOF)
    {
      /* parsing the file, one char at each time */
      if (c == '\r' && !is_string)
        {
          last = c;
          continue;
        }
      if (c == field_separator && !is_string)
        {
          /* insering a field into the fields tmp array */
          last = c;
          *p = '\0';
          len = strlen(buffer);
          if (len)
            {
              fields[fld] = (char *) malloc(len + 1);
              strcpy(fields[fld], buffer);
            }
          fld++;
          p = buffer;
          *p = '\0';
          continue;
        }
      if (c == text_separator)
        {
          /* found a text separator */
          if (is_string)
            {
              is_string = 0;
              last = c;
          } else
            {
              if (last == text_separator)
                *p++ = text_separator;
              is_string = 1;
            }
          continue;
        }
      last = c;
      if (c == '\n' && !is_string)
        {
          /* inserting the row into the text buffer */
          *p = '\0';
          len = strlen(buffer);
          if (len)
            {
              fields[fld] = (char *) malloc(len + 1);
              strcpy(fields[fld], buffer);
            }
          fld++;
          p = buffer;
          *p = '\0';
          max_cell = -1;
          for (fld = 0; fld < 4096; fld++)
            {
              if (fields[fld])
                max_cell = fld;
            }
          text_insert_row(text, fields, max_cell);
          for (fld = 0; fld < 4096; fld++)
            {
              /* resetting an empty row */
              fields[fld] = NULL;
            }
          fld = 0;
          continue;
        }
      *p++ = c;
    }
  fclose(in);
/* checking if the text file really seems to contain a table */
  nrows = 0;
  ncols = 0;
  errs = 0;
  row = text->first;
  while (row)
    {
      if (first_line_titles == true && row == text->first)
        {
          /* skipping first line */
          row = row->next;
          continue;
        }
      nrows++;
      if (row->n_cells > ncols)
        ncols = row->n_cells;
      row = row->next;
    }
  if (nrows == 0 && ncols == 0)
    {
      text_buffer_free(text);
      return NULL;
    }
  text->n_rows = nrows;
/* going to check the column types */
  text->max_n_cells = ncols;
  text->types = (char *) malloc(sizeof(char) * text->max_n_cells);
  first_valid_row = (char *) malloc(sizeof(char) * text->max_n_cells);
  for (fld = 0; fld < text->max_n_cells; fld++)
    {
      /* initally assuming any cell contains TEXT */
      *(text->types + fld) = VRTTXT_TEXT;
      *(first_valid_row + fld) = 1;
    }
  row = text->first;
  while (row)
    {
      if (first_line_titles == true && row == text->first)
        {
          /* skipping first line */
          row = row->next;
          continue;
        }
      for (fld = 0; fld < row->n_cells; fld++)
        {
          if (*(row->cells + fld))
            {
              if (text_is_integer(*(row->cells + fld)))
                {
                  if (*(first_valid_row + fld))
                    {
                      *(text->types + fld) = VRTTXT_INTEGER;
                      *(first_valid_row + fld) = 0;
                    }
              } else if (text_is_double(*(row->cells + fld), decimal_separator))
                {
                  if (*(first_valid_row + fld))
                    {
                      *(text->types + fld) = VRTTXT_DOUBLE;
                      *(first_valid_row + fld) = 0;
                  } else
                    {
                      /* promoting an INTEGER column to be of the DOUBLE type */
                      if (*(text->types + fld) == VRTTXT_INTEGER)
                        *(text->types + fld) = VRTTXT_DOUBLE;
                    }
              } else
                {
                  /* this column is anyway of the TEXT type */
                  *(text->types + fld) = VRTTXT_TEXT;
                  if (*(first_valid_row + fld))
                    *(first_valid_row + fld) = 0;
                }
            }
        }
      row = row->next;
    }
  free(first_valid_row);
/* preparing the column names */
  text->titles = (char **) malloc(sizeof(char *) * text->max_n_cells);
  if (first_line_titles == true)
    {
      for (fld = 0; fld < text->max_n_cells; fld++)
        {
          if (fld >= text->first->n_cells)
            {
              /* this column name is NULL; setting a default name */
              sprintf(title, "COL%03d", fld + 1);
              len = strlen(title);
              *(text->titles + fld) = (char *) malloc(len + 1);
              strcpy(*(text->titles + fld), title);
          } else
            {
              if (*(text->first->cells + fld))
                {
                  len = strlen(*(text->first->cells + fld));
                  *(text->titles + fld) = (char *) malloc(len + 1);
                  strcpy(*(text->titles + fld), *(text->first->cells + fld));
                  name = *(text->titles + fld);
                  for (i = 0; i < len; i++)
                    {
                      /* masking any space in the column name */
                      if (*(name + i) == ' ')
                        *(name + i) = '_';
                    }
              } else
                {
                  /* this column name is NULL; setting a default name */
                  sprintf(title, "COL%03d", fld + 1);
                  len = strlen(title);
                  *(text->titles + fld) = (char *) malloc(len + 1);
                  strcpy(*(text->titles + fld), title);
                }
            }
        }
  } else
    {
      for (fld = 0; fld < text->max_n_cells; fld++)
        {
          sprintf(title, "COL%03d", fld + 1);
          len = strlen(title);
          *(text->titles + fld) = (char *) malloc(len + 1);
          strcpy(*(text->titles + fld), title);
        }
    }
/* cleaning cell values when needed */
  toUtf8 = gaiaCreateUTF8Converter(encoding);
  if (!toUtf8)
    {
      text_buffer_free(text);
      return NULL;
    }
  encoding_errors = 0;
  row = text->first;
  while (row)
    {
      if (first_line_titles == true && row == text->first)
        {
          /* skipping first line */
          row = row->next;
          continue;
        }
      for (fld = 0; fld < row->n_cells; fld++)
        {
          if (*(row->cells + fld))
            {
              if (*(text->types + fld) == VRTTXT_INTEGER)
                text_clean_integer(*(row->cells + fld));
              else if (*(text->types + fld) == VRTTXT_DOUBLE)
                text_clean_double(*(row->cells + fld));
              else
                encoding_errors += text_clean_text(row->cells + fld, toUtf8);
            }
        }
      row = row->next;
    }
  gaiaFreeUTF8Converter(toUtf8);
  if (encoding_errors)
    {
      text_buffer_free(text);
      return NULL;
    }
/* ok, we can now go to prepare the rows array */
  text->rows =
    (struct row_buffer **) malloc(sizeof(struct text_row *) * text->n_rows);
  ir = 0;
  row = text->first;
  while (row)
    {
      if (first_line_titles == true && row == text->first)
        {
          /* skipping first line */
          row = row->next;
          continue;
        }
      *(text->rows + ir++) = row;
      row = row->next;
    }
  return text;
}

void
  MyFrame::LoadText(wxString & path, wxString & table, wxString & charset,
                    bool first_titles, char decimal_separator, char separator,
                    char text_separator)
{
//
// loading a CSV/TXT as a new DB table
//
  struct text_buffer *text = NULL;
  struct row_buffer *row;
  int seed;
  int dup;
  int idup;
  char dummy[65536];
  char dummyName[4096];
  char **col_name = NULL;
  int i;
  char sql[65536];
  int len;
  int ret;
  int rows = 0;
  char *errMsg = NULL;
  bool sqlError = false;
  char xtable[1024];
  int current_row;
  wxString msg;
//
// performing some checks before starting
//
  if (TableAlreadyExists(table) == true)
    {
      wxMessageBox(wxT("a table name '") + table + wxT("' already exists"),
                   wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
      return;
    }
  text =
    text_parse(path.ToUTF8(), charset.ToUTF8(), first_titles, separator,
               text_separator, decimal_separator);
  if (!text)
    return;
  ::wxBeginBusyCursor();
//
// checking for duplicate / illegal column names and antialising them 
//
  col_name = (char **) malloc(sizeof(char *) * text->max_n_cells);
  seed = 0;
  for (i = 0; i < text->max_n_cells; i++)
    {
      strcpy(dummyName, *(text->titles + i));
      dup = 0;
      for (idup = 0; idup < i; idup++)
        {
          if (strcasecmp(dummyName, *(col_name + idup)) == 0)
            dup = 1;
        }
      if (strcasecmp(dummyName, "PKUID") == 0)
        dup = 1;
      if (strcasecmp(dummyName, "Geometry") == 0)
        dup = 1;
      if (dup)
        sprintf(dummyName, "COL_%d", seed++);
      len = strlen(dummyName);
      *(col_name + i) = (char *) malloc(len + 1);
      strcpy(*(col_name + i), dummyName);
    }
//
// starting a transaction
//
  ret = sqlite3_exec(SqliteHandle, "BEGIN", NULL, 0, &errMsg);
  if (ret != SQLITE_OK)
    {
      wxMessageBox(wxT("load CSV/TXT error:") + wxString::FromUTF8(errMsg),
                   wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
      sqlite3_free(errMsg);
      sqlError = true;
      goto clean_up;
    }
//
// creating the Table 
//
  strcpy(xtable, table.ToUTF8());
  sprintf(sql, "CREATE TABLE \"%s\"", xtable);
  strcat(sql, " (\n\"PK_UID\" INTEGER PRIMARY KEY AUTOINCREMENT");
  for (i = 0; i < text->max_n_cells; i++)
    {
      strcat(sql, ",\n\"");
      strcat(sql, *(col_name + i));
      if (*(text->types + i) == VRTTXT_INTEGER)
        strcat(sql, "\" INTEGER");
      else if (*(text->types + i) == VRTTXT_DOUBLE)
        strcat(sql, "\" DOUBLE");
      else
        strcat(sql, "\" TEXT");
    }
  strcat(sql, ")");
  ret = sqlite3_exec(SqliteHandle, sql, NULL, 0, &errMsg);
  if (ret != SQLITE_OK)
    {
      wxMessageBox(wxT("load text error:") + wxString::FromUTF8(errMsg),
                   wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
      sqlite3_free(errMsg);
      sqlError = true;
      goto clean_up;
    }
  current_row = 0;
  while (current_row < text->n_rows)
    {
      //
      // inserting rows from CSV/TXT
      //
      sprintf(sql, "INSERT INTO \"%s\" (\n\"PK_UID\"", xtable);
      for (i = 0; i < text->max_n_cells; i++)
        {
          // columns corresponding to some CSV/TXT column
          strcat(sql, ",\"");
          strcat(sql, *(col_name + i));
          strcat(sql, "\"");
        }
      strcat(sql, ")\nVALUES (");
      sprintf(dummy, "%d", current_row);
      strcat(sql, dummy);
      for (i = 0; i < text->max_n_cells; i++)
        {
          // column values
          row = *(text->rows + current_row);
          strcat(sql, ",");
          if (i >= row->n_cells)
            strcat(sql, "NULL");
          else
            {
              if (*(row->cells + i))
                {
                  if (*(text->types + i) == VRTTXT_INTEGER)
                    {
                      sprintf(dummy, "%d", atoi(*(row->cells + i)));
                      strcat(sql, dummy);
                  } else if (*(text->types + i) == VRTTXT_DOUBLE)
                    {
                      sprintf(dummy, "%1.6f", atof(*(row->cells + i)));
                      strcat(sql, dummy);
                  } else
                    {
                      strcpy(dummy, *(row->cells + i));
                      CleanSqlString(dummy);
                      strcat(sql, "'");
                      strcat(sql, dummy);
                      strcat(sql, "'");
                    }
              } else
                strcat(sql, "NULL");
            }
        }
      strcat(sql, ")");
      ret = sqlite3_exec(SqliteHandle, sql, NULL, 0, &errMsg);
      if (ret != SQLITE_OK)
        {
          wxMessageBox(wxT("load text error:") + wxString::FromUTF8(errMsg),
                       wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
          sqlite3_free(errMsg);
          sqlError = true;
          goto clean_up;
        }
      rows++;
      current_row++;
    }
clean_up:
  if (col_name)
    {
      // releasing memory allocation for column names 
      for (i = 0; i < text->max_n_cells; i++)
        free(*(col_name + i));
      free(col_name);
    }
  free(text);
  if (sqlError == true)
    {
      // some error occurred - ROLLBACK 
      ret = sqlite3_exec(SqliteHandle, "ROLLBACK", NULL, 0, &errMsg);
      if (ret != SQLITE_OK)
        {
          wxMessageBox(wxT("load text error:") + wxString::FromUTF8(errMsg),
                       wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
          sqlite3_free(errMsg);
        }
      ::wxEndBusyCursor();
      msg =
        wxT("CSV/TXT not loaded\n\n\na ROLLBACK was automatically performed");
      wxMessageBox(msg, wxT("spatialite-gui"), wxOK | wxICON_WARNING, this);
  } else
    {
      // ok - confirming pending transaction - COMMIT 
      ret = sqlite3_exec(SqliteHandle, "COMMIT", NULL, 0, &errMsg);
      if (ret != SQLITE_OK)
        {
          wxMessageBox(wxT("load text error:") + wxString::FromUTF8(errMsg),
                       wxT("spatialite-gui"), wxOK | wxICON_ERROR, this);
          sqlite3_free(errMsg);
          return;
        }
      ::wxEndBusyCursor();
      sprintf(dummy, "CSV/TXT loaded\n\n%d inserted rows", rows);
      msg = wxString::FromUTF8(dummy);
      wxMessageBox(msg, wxT("spatialite-gui"), wxOK | wxICON_INFORMATION, this);
      InitTableTree();
    }
}
