/*=======================================================================
 * Version: $$
 * Project: MediaTeX
 * Module : Extract metadata to seda v0.2
 *
 * Example using libmediatex.a

 MediaTex is an Electronic Records Management System
 Copyright (C) 2016  Nicolas Roche
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 =======================================================================*/

#include "misc.h"

/* #include "mediatex.h" */
/* #include "misc/log.h" */
#include "misc/command.h"
#include "misc/md5sum.h"
#include "memory/confTree.h"
#include "common/openClose.h"

#include <locale.h>

typedef struct Data {
  char* basename;
  char* zipFile;
  Collection* coll;
  FILE* fd;
  int oldFormat;
} Data;

/*=======================================================================
 * Function   : 
 * Description: 
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int 
serializeArchiveTransfert(Data* data, Document* self)
{
  int rc = FALSE;
  AssoCarac* assoCarac = 0;
  AssoRole* assoRole = 0;
  RGIT* curr = 0;
  char* value1 = 0;
  char* value2 = 0;

  logMain(LOG_DEBUG, "%s", "serialize ArchiveTransfer");
  
  // Comment + Date (alphabetical order)
  if (!(assoCarac = rgNext_r(self->assoCaracs, &curr))) goto error;
  if (strcmp(assoCarac->carac->label, "Comment")) goto error;
  fprintf(data->fd, "<Comment>%s</Comment>    ", assoCarac->value);

  if (!(assoCarac = rgNext_r(self->assoCaracs, &curr))) goto error;
  if (strcmp(assoCarac->carac->label, "Date")) goto error;
  fprintf(data->fd, "<Date>%s</Date>    ", assoCarac->value);

  // Label
  fprintf(data->fd, "<TransferIdentifier>%s</TransferIdentifier>    ",
	  self->label);

  // Partners
  curr = 0;
  if (!(assoRole = rgNext_r(self->assoRoles, &curr))) goto error;
  if (strcmp(assoRole->role->label, "Archival Agency")) goto error;
  value1 = assoRole->human->firstName;
  if (!(assoRole = rgNext_r(self->assoRoles, &curr))) goto error;
  if (strcmp(assoRole->role->label, "Transferring Agency")) goto error;
  value2 = assoRole->human->firstName;

  fprintf(data->fd, "<TransferringAgency>    "
	  "<Identification>%s</Identification>    "
	  "</TransferringAgency>    ", value2);
  fprintf(data->fd, "<ArchivalAgency>    "
	  "<Identification>%s</Identification>    "
	  "</ArchivalAgency>", value1);

  rc = TRUE;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "serializeArchiveTransfert fails");
  }
  return rc;
}

/*=======================================================================
 * Function   : 
 * Description: 
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int 
serializeIntegrity(Data* data, Container* self)
{
  int rc = FALSE;
  FromAsso* fromAsso = 0;
  Archive* archive = 0;
  AVLNode *node = 0;
  AssoCarac* assoCarac = 0;
  RGIT* curr = 0;

  logMain(LOG_DEBUG, "%s", "serialize Integrity");
  
  for(node = self->childs->head; node; node = node->next) {
    fromAsso = node->item;
    archive = fromAsso->archive;

    curr = 0;
    while ((assoCarac = rgNext_r(archive->assoCaracs, &curr))) {
      if (!strcmp(assoCarac->carac->label, "sha1")) break;
    }
    if (strcmp(assoCarac->carac->label, "sha1")) goto error;

    fprintf(data->fd, "  <Integrity>    "
	    "<Contains"
	    " algorithme=\"http://www.w3.org/2000/09/xmldsig#sha1\">"
	    "%s</Contains>    "
	    "<UnitIdentifier>%s</UnitIdentifier>    "
	    "</Integrity>", 
	    assoCarac->value,
	    fromAsso->path);
  }

  rc = TRUE;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "serializeIntegrity fails");
  }
  return rc;
}


/*=======================================================================
 * Function   : sedaString
 * Description: remove escape sequence for special caracteres (mainly \")
 * Synopsis   : char* sedaString(char* string)
 * Input      : char* string : the string to rewrite
 *              char* buffer : the buffer to use
 * Output     : char* : a static buffer
 =======================================================================*/
char*
sedaString(char* string, char* buffer)
{
  char* ptr = buffer;
  int len = 0;
  char car = 0;
  int i;

  len = strlen(string);

  for (i=0; i<len; ++i) {
    car = string[i];

    switch (car) {
    case '\\':
      break;
    default:
      *ptr++ = car;
    }
  }

  *ptr=0;
  return buffer;
}

/*=======================================================================
 * Function   : sedaString1
 * Description: replace special caracteres
 * Synopsis   : char* sedaString1(char* string)
 * Input      : char* string : the string to rewrite
 * Output     : char* : a static buffer
 =======================================================================*/
char*
sedaString1(char* string)
{
  static char buffer[128];
  return sedaString(string, buffer);
}

/*=======================================================================
 * Function   : sedaString2
 * Description: replace special caracteres
 * Synopsis   : char* sedaString2(char* string)
 * Input      : char* string : the string to rewrite
 * Output     : char* : a static buffer
 =======================================================================*/
char*
sedaString2(char* string)
{
  static char buffer[128];
  return sedaString(string, buffer);
}

/*=======================================================================
 * Function   : sedaString3
 * Description: replace special caracteres
 * Synopsis   : char* sedaString3(char* string)
 * Input      : char* string : the string to rewrite
 * Output     : char* : a static buffer
 =======================================================================*/
char*
sedaString3(char* string)
{
  static char buffer[128];
  return sedaString(string, buffer);
}

/*=======================================================================
 * Function   : sedaString3
 * Description: replace special caracteres
 * Synopsis   : char* sedaString3(char* string)
 * Input      : char* string : the string to rewrite
 * Output     : char* : a static buffer
 =======================================================================*/
char*
sedaString4(char* string)
{
  static char buffer[128];
  return sedaString(string, buffer);
}

/*=======================================================================
 * Function   : 
 * Description: 
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int 
serializeContains(Data* data, Container* self)
{
  int rc = FALSE;
  FromAsso* fromAsso = 0;
  Archive* archive = 0;
  AVLNode *node = 0;
  Document* document = 0;
  Category* category = 0;
  AssoCarac* assoCarac = 0;
  AssoRole* assoRole = 0;
  RGIT* curr = 0;

  char* docValue[19];
  char* docLabel[] = {
    "Access Restriction Code",
    "Access Restriction Start Date",
    "Appraisal Code",
    "Appraisal Duration",
    "Appraisal Start Date",
    "Archival Agreement",
    "Content Description",
    "Description Language",
    "Description Level",
    "Format",
    "Language",
    "Name",
    "Originating agency",
    "annee",
    "genreform",
    "mois",
    "numavis",
    "numint",
    "numloc"
  };

  char* archValue[7];
  char* archLabel[] = {
    "Control",
    "Copy",
    "Description",
    "Issue",
    "Purpose",
    "Type",
    "sha1"
  };

  char* keyValue[8];
  char* keyLabel[] = {
    "genreform",
    "numavis",
    "famname",
    "persname",
    "numloc",
    "annee",
    "mois",
    "numint",
  };
  		 
  int i = 0;

  logMain(LOG_DEBUG, "%s", "serialize Contains");
  
  for(node = self->childs->head; node; node = node->next) {
    fromAsso = node->item;
    archive = fromAsso->archive;
    document = archive->documents->head->it;
    category = document->categories->head->it;
    assoRole = document->assoRoles->head->it;

    for (i=0; i<19; ++i) {
      logMain(LOG_DEBUG, "try to read %s", docLabel[i]);      
      docValue[i] = 0;
      curr = 0;
      while ((assoCarac = rgNext_r(document->assoCaracs, &curr))) {
	if (!strcmp(assoCarac->carac->label, docLabel[i])) {
	  docValue[i] = assoCarac->value;
	  break;
	}
      }
      if (!docValue[i] && i != 14) { // genreform
	logMain(LOG_ERR, "fails to read %s", docLabel[i]);
	goto error;
      }
    }

    for (i=0; i<7; ++i) {
      logMain(LOG_DEBUG, "try to read %s", archLabel[i]);
      archValue[i] = 0;
      curr = 0;
      while ((assoCarac = rgNext_r(archive->assoCaracs, &curr))) {
	if (!strcmp(assoCarac->carac->label, archLabel[i])) {
	  archValue[i] = assoCarac->value;
	  break;
	}
      }
      if (!archValue[i]) {
	logMain(LOG_ERR, "fails to read %s", archLabel[i]);
	goto error;
      }
    }
    
    fprintf(data->fd, "    <Contains>    "
	    "<ArchivalAgreement>%s</ArchivalAgreement>    "
	    "<ArchivalProfile>%s</ArchivalProfile>    "
	    "<DescriptionLanguage listVersionID=\"edition 2009\">"
	    "%s</DescriptionLanguage>    "
	    "<DescriptionLevel listVersionID=\"edition 2009\">"
	    "%s</DescriptionLevel>    "
	    "<Name>%s</Name>    "
	    "<ContentDescription>    "
	    "<Description>%s</Description>    "
	    "<Format>%s</Format>    "
	    "<Language listVersionID=\"edition 2009\">%s</Language>    "
	    "<OriginatingAgency>    "
	    "<Identification>%s</Identification>    "
	    "</OriginatingAgency>    ",
	    docValue[5],
	    category->label,
	    docValue[7],
	    docValue[8],
	    docValue[11],
	    sedaString1(docValue[6]),
	    docValue[9]
,	    docValue[10],
	    docValue[12]);

    keyValue[0] = docValue[14];
    keyValue[1] = docValue[16];
    keyValue[2] = sedaString2(assoRole->human->firstName);
    keyValue[3] = sedaString3(assoRole->human->secondName);
    keyValue[4] = docValue[18];
    keyValue[5] = docValue[13];
    keyValue[6] = docValue[15];
    keyValue[7] = docValue[17];
    
    for (i = 0; i < 8; ++i) {
      if (i == 0 && !keyValue[i]) continue;
      fprintf(data->fd, "<ContentDescriptive>    "
	      "<KeywordContent>%s</KeywordContent>    "
	      "<KeywordType listVersionID=\"edition 2009\">"
	      "%s</KeywordType>    "
	      "</ContentDescriptive>    ",
	      keyValue[i], keyLabel[i]);
    }

    fprintf(data->fd, "</ContentDescription>    "
	    "<Appraisal>    "
	    "<Code listVersionID=\"edition 2009\">%s</Code>    "
	    "<Duration>%s</Duration>    "
	    "<StartDate>%s</StartDate>    "
	    "</Appraisal>    "
	    "<AccessRestriction>    "
	    "<Code listVersionID=\"edition 2009\">%s</Code>    "
	    "<StartDate>%s</StartDate>    "
	    "</AccessRestriction>    ",
	    docValue[2], docValue[3], docValue[4],
	    docValue[0], docValue[1]);

    fprintf(data->fd, "<Document>    "
	    "<Attachment filename=\"%s\">"
	    "</Attachment>    "
	    "<Control>%s</Control>    "
	    "<Copy>%s</Copy>    "
	    "<Description>%s</Description>    "
	    "<Issue>%s</Issue>    "
	    "<Purpose>%s</Purpose>    "
	    "<Type listVersionID=\"edition 2009\">%s</Type>    "
	    "</Document>    "
	    "</Contains>",
	    fromAsso->path,
	    archValue[0],
	    archValue[1],
	    archValue[2],
	    archValue[3],
	    sedaString4(archValue[4]),
	    archValue[5]);
  }

  rc = TRUE;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "serializeContains fails");
  }
  return rc;
}

/*=======================================================================
 * Function   : 
 * Description: skip XXXXXX_ and final _AVIS_HABITAT76.pdf
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int getNumber1(char* path)
{
  int rc = 0;
  char number[7];
  char* p1 = path;
  char* p2 = number;

  // state 0 : look for first '/'
  while (*p1 != 0 && *p1 != '/') ++p1;
  if (*p1 != '/') goto error;
  ++p1;

  // state 1 : look for first '_'
  while (*p1 != 0 && *p1 != '_') ++p1;
  if (*p1 != '_') goto error;
  ++p1;

  // state 2 : look for second '_'
  while (*p1 != 0 && *p1 != '_') *p2++ = *p1++;
  if (*p1 != '_') goto error;
  *p2 = 0;

  rc = atoi(number);
  logMain(LOG_DEBUG, "%s => %i", path, rc);
  goto end;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "getNumbet fails");
     exit(1);
  }
 end:
  return rc;
}

/*=======================================================================
 * Function   : 
 * Description: get first XXXXXX_ number
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int getNumber2(char* path)
{
  int rc = 0;
  char number[6];
  char* p1 = path;
  char* p2 = number;

  // state 0 : look for first '/'
  while (*p1 != 0 && *p1 != '/') ++p1;
  if (*p1 != '/') goto error;
  ++p1;

  // look for first '_'
  while (*p1 != 0 && *p1 != '_') *p2++ = *p1++;
  if (*p1 != '_') goto error;
  *p2 = 0;

  rc = atoi(number);
  logMain(LOG_DEBUG, "%s ==> %i", path, rc);
  goto end;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "getNumbet fails");
     exit(1);
  }
 end:
  return rc;
}

/*=======================================================================
 * Function   : cmpFromAssoAvlSedaOld
 * Description: compare two FromAsso
 * Synopsis   : int cmpFromAssoAvl(const void *p1, const void *p2)
 * Input      : p1 and p2 are pointers on FromAsso
 * Output     : p1 = p2 ? 0 : (p1 < p2 ? -1 : 1)
 =======================================================================*/
int 
cmpFromAssoAvlSedaOld(const void *p1, const void *p2)
{
  int rc = 0;

  /* p1 and p2 are pointers on &items */
  FromAsso* v1 = (FromAsso*)p1;
  FromAsso* v2 = (FromAsso*)p2;

  // from 2013-012 to 2014-02
  rc = getNumber1(v1->path) - getNumber1(v2->path);
  if (!rc) rc = getNumber2(v1->path) - getNumber2(v2->path);
  //logMain(LOG_DEBUG, "%s - %s = %i", v1->path, v2->path, rc);  

  return rc;
}

/*=======================================================================
 * Function   : cmpFromAssoAvl
 * Description: compare two FromAsso
 * Synopsis   : int cmpFromAssoAvl(const void *p1, const void *p2)
 * Input      : p1 and p2 are pointers on FromAsso
 * Output     : p1 = p2 ? 0 : (p1 < p2 ? -1 : 1)
 =======================================================================*/
int 
cmpFromAssoAvlSeda(const void *p1, const void *p2)
{
  int rc = 0;

  /* p1 and p2 are pointers on &items */
  FromAsso* v1 = (FromAsso*)p1;
  FromAsso* v2 = (FromAsso*)p2;;  

  // from 2014-03
  rc = strcmp(v1->path, v2->path);

  return rc;
}

/*=======================================================================
 * Function   : 
 * Description: 
 * Synopsis   : 
 * Input      : 
 * Output     : TRUE on success
 =======================================================================*/
int 
serializeZip(Data* data)
{
  int rc = FALSE;
  char hash[MAX_SIZE_MD5+1];
  off_t size = 0;
  char* xmlFile = 0;
  Archive* archive = 0;
  Container* container = 0;
  Document* document = 0;
  FromAsso* fromAsso = 0;
  AVLTree* tree = 0;
  AVLNode *node = 0;

  hash[MAX_SIZE_MD5] = 0;
  if (!(tree = 
	avl_alloc_tree(data->oldFormat?
		       cmpFromAssoAvlSedaOld:cmpFromAssoAvlSeda, 
		       (avl_freeitem_t)destroyFromAsso)))
    goto error;

  // get zip archive
  if (!getArchiveIds(data->zipFile, hash, &size)) goto error;
  if (!(archive = getArchive(data->coll, hash, size))) goto error;
  container = archive->toContainer;

  // open output file
  if (!(xmlFile = createString(data->zipFile))) goto error;
  strcpy(xmlFile + strlen(xmlFile) - 3, "xml");
  if (!(xmlFile = catString(xmlFile, "-new"))) goto error;

  logMain(LOG_NOTICE, "serialize '%s'", xmlFile);

  if ((data->fd = fopen(xmlFile, "w")) == 0) {
    logMain(LOG_ERR, "fopen %s fails: %s", xmlFile, strerror(errno));
    goto error;
  }

  // sort archives
  for(node = container->childs->head; node; node = node->next) {
    fromAsso = node->item;    
    if (!avl_insert(tree, fromAsso)) goto error;
  }
  container->childs->freeitem = 0;
  avl_free_tree(container->childs);
  container->childs = tree;

  fprintf(data->fd, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
	  "<ArchiveTransfer"
	  " xmlns=\"fr:gouv:ae:archive:draft:standard_echange_v0.2\">  ");
  
  document = archive->documents->head->it;
  if (!serializeArchiveTransfert(data, document)) goto error;
  if (!serializeIntegrity(data, container)) goto error;
  if (!serializeContains(data, container)) goto error;

  fprintf(data->fd, "</ArchiveTransfer>\n");

  rc = TRUE;
 error:
  if (!rc) {
     logMain(LOG_ERR, "%s", "serializeZip fails");
  }
  if (data->fd) fclose(data->fd);
  xmlFile = destroyString(xmlFile);
  return rc;
}

/*=======================================================================
 * Function   : usage
 * Description: Print the usage.
 * Synopsis   : static void usage(char* programName)
 * Input      : programName = the name of the program; usually
 *                                  argv[0].
 * Output     : N/A
 =======================================================================*/
static void 
usage(char* programName)
{
  mdtxUsage(programName);
  fprintf(stderr, "\n\t\t-b basename -z zipFile [-o]");
  mdtxOptions();
  fprintf(stderr, "  ---\n"
	  "  -b, --basename\tbase name to use for mdtx inputs\n"
	  "  -z, --zipFile\t\tpath of the zip file\n"
	  "  -o, --old-format\tup to 2014-03 included\n");
  return;
}

/*=======================================================================
 * Function   : loadMetadata
 * Description: parse mediatex metadata in current directory
 * Synopsis   : int loadMetadata(Data* data) 
 * Input      : Data* data
 * Output     : TRUE on success
 =======================================================================*/
int 
loadMetadata(Data* data) 
{
  int rc = FALSE;
  char* extractDB = 0;
  char* catalogDB = 0;
  
  logMain(LOG_INFO, "load '%s'", data->basename);

  // initialise data
  if (!(data->coll = addCollection("seda02"))) goto error;
  if (!(data->coll->extractTree = createExtractTree())) goto error;
  if (!(data->coll->catalogTree = createCatalogTree())) goto error;

  // get paths
  if (!(extractDB = createString(data->basename))) goto error;
  if (!(extractDB = catString(extractDB, "_ext"))) goto error;
  if (!(catalogDB = createString(data->basename))) goto error;
  if (!(catalogDB = catString(catalogDB, "_cat"))) goto error;

  // parse mediatex metadata in current directory
  strcpy(data->coll->label, "seda02");
  if (!(data->coll->masterLabel = createString("mdtx"))) goto error;
  strcpy(data->coll->masterHost, "localhost");
  data->coll->memoryState |= EXPANDED;
  if (!(data->coll->user = createString("mdtx"))) goto error;
  data->coll->extractDB = extractDB;
  data->coll->catalogDB = catalogDB;
  logMain(LOG_NOTICE, "parse '%s'", extractDB);
  logMain(LOG_NOTICE, "parse '%s'", catalogDB);
  if (!loadCollection(data->coll, EXTR|CTLG)) goto error;
  
  rc = TRUE;
 error:
  if (!rc) {
    logMain(LOG_ERR, "%s", "loadMetadata fails");
  }
  return rc;
}


/*=======================================================================
 * Function   : main 
 * Author     : Nicolas ROCHE
 * modif      : 2010/12/10
 * Description: Entry point for mdtx wrapper
 * Synopsis   : ./mdtx
 * Input      : stdin
 * Output     : rtfm
 =======================================================================*/
int 
main(int argc, char** argv)
{
  Data data;
  // ---
  int rc = 0;
  int cOption = EOF;
  char* programName = *argv;
  char* options = MDTX_SHORT_OPTIONS"b:z:o";
  struct option longOptions[] = {
    {"isoBaseName", required_argument, 0, 'b'},
    {"zipFile", required_argument, 0, 'z'},
    {"old-format", no_argument, 0, 'o'},
    MDTX_LONG_OPTIONS,
    {0, 0, 0, 0}
  };

  setlocale (LC_ALL, "");
  setlocale(LC_NUMERIC, "C"); // so as printf do not write comma in float

  // import mdtx environment
  env.allocLimit = 256;
  getEnv(&env);
  memset(&data, 0, sizeof(Data));
  
  // parse the command line
  while((cOption = getopt_long(argc, argv, options, longOptions, 0)) 
	!= EOF) {
    switch(cOption) {
      
    case 'b':
      if(isEmptyString(optarg)) {
	fprintf(stderr, 
		"%s: nil or empty argument for the basename\n",
		programName);
	rc = 2;
      }
      else {
	if ((data.basename = 
	     (char*)malloc(sizeof(char) * strlen(optarg) + 1))
	    == 0) {
	  fprintf(stderr, 
		  "%s: cannot allocate memory for the basename\n", 
		  programName);
	  rc = 3;
	}
	else {
	  strcpy(data.basename, optarg);
	}
      }
      break;

    case 'z':
      if(isEmptyString(optarg)) {
	fprintf(stderr, 
		"%s: nil or empty argument for the zip file\n",
		programName);
	rc = 2;
      }
      else {
	if ((data.zipFile = 
	     (char*)malloc(sizeof(char) * strlen(optarg) + 1))
	    == 0) {
	  fprintf(stderr, 
		  "%s: cannot allocate memory for the zip file\n", 
		  programName);
	  rc = 3;
	}
	else {
	  strcpy(data.zipFile, optarg);
	}
      }
      break;

    case 'o':
      data.oldFormat = TRUE;
      break;

      GET_MDTX_OPTIONS; // generic options
    }
    if (rc) goto optError;
  }
      
  // export mdtx environment
  env.allocDiseaseCallBack = clientDiseaseAll;
  if (!setEnv(programName, &env)) goto optError;
 
  /************************************************************************/
  if (!data.basename || !data.zipFile) {
    usage(argv[0]); // expect a basename to work
    goto optError;
  }

  logMain(LOG_INFO, "txt2xml %s", data.basename);

  // load metadata
  if (!loadMetadata(&data)) goto error;

  // get zip container
  if (!serializeZip(&data)) goto error;
  
  rc = TRUE;
  /************************************************************************/

  rc = TRUE;
 error:
  free(data.basename);
  free(data.zipFile);
  freeConfiguration();
  ENDINGS;
  rc=!rc;
 optError:
  exit(rc);
}

/* Local Variables: */
/* mode: c */
/* mode: font-lock */
/* mode: auto-fill */
/* End: */
