/* GNU polyxmass - the massist's program.
   -------------------------------------- 
   Copyright (C) 2000,2001,2002,2003,2004 Filippo Rusconi

   http://www.polyxmass.org

   This file is part of the "GNU polyxmass" project.
   
   The "GNU polyxmass" project is an official GNU project package (see
   www.gnu.org) released ---in its entirety--- under the GNU General
   Public License and was started at the Centre National de la
   Recherche Scientifique (FRANCE), that granted me the formal
   authorization to publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
   
   You should have received a copy of the GNU  General Public
   License along with this software; if not, write to the
   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
   Boston, MA 02110-1301, USA.
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "pxmchem-modif.h"
#include "libpolyxmass-plugin.h"
#include "pxmchem-formula.h"



/* NEW'ING FUNCTIONS, DUPLICATING FUNCTIONS ...
 */
PxmModif *
pxmchem_modif_new (void)
{
  PxmModif *modif = g_malloc0 (sizeof (PxmModif));
  
  modif->propGPA = g_ptr_array_new ();
  
  return modif;
}

PxmProp*
pxmchem_modif_prop_new (void)
{
  PxmProp *prop = libpolyxmass_prop_new ();
  
  PxmModif *modif = pxmchem_modif_new ();

  libpolyxmass_prop_set_name (prop, "MODIF");
  
  prop->data = (gpointer) modif;

  prop->custom_dup = pxmchem_modif_prop_dup;
  prop->custom_cmp = pxmchem_modif_prop_cmp;
  prop->custom_free = pxmchem_modif_prop_free;
  
  return prop;
}

PxmModif *
pxmchem_modif_new_by_name (gchar *name, GPtrArray *GPA)
{
  PxmModif *modif = NULL;
  gint iter = 0;
  

  g_assert (name != NULL);
  g_assert (GPA != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    {
      modif = g_ptr_array_index (GPA, iter);
            
      if (0 == strcmp (modif->name, name))
	return pxmchem_modif_dup (modif, PXM_DUP_NO_DEEP);
    }
  
  return NULL;
}


PxmModif *
pxmchem_modif_dup (const PxmModif *modif, PxmHowDup how_dup)
{
  PxmModif *modif_new = NULL;
  
  g_assert (modif != NULL);
  
  /* Do not allocate prop array, we'll copy it later if 'how_dup'
     requires it.
   */
  modif_new = g_malloc0 (sizeof (PxmModif)); 
  
  /* The template monomer is assumed to be well formed,
     which is without NULL members!
  */
  g_assert (modif->name != NULL);
  modif_new->name = g_strdup (modif->name);
  
  g_assert (modif->actform != NULL);
  modif_new->actform = g_strdup (modif->actform);
  
  g_assert (modif->propGPA != NULL);

  if (how_dup == PXM_DUP_DEEP)
    {
      modif_new->propGPA = libpolyxmass_prop_GPA_dup (modif->propGPA, how_dup);
    }
  else
    {
      /* All we do is allocate the array as if we had used 
	 pxmchem_modif_new ().
      */
      modif_new->propGPA = g_ptr_array_new ();
    }
  
  return modif_new;
}


PxmProp *
pxmchem_modif_prop_dup (PxmProp *prop, PxmHowDup how_dup)
{
  PxmProp *prop_new = NULL;
  PxmModif *modif_new = NULL;
  

  g_assert (prop != NULL);
  
  prop_new = libpolyxmass_prop_new ();
  libpolyxmass_prop_set_name (prop_new, prop->name);
  
  modif_new = pxmchem_modif_dup ((PxmModif *) prop->data, how_dup);
  prop_new->data = (gpointer) modif_new;
  
  /* And now set the pointer to the prop's housekeeping functions:
   */
  g_assert (prop->custom_dup != NULL);
  prop_new->custom_dup = prop->custom_dup ;

  g_assert (prop->custom_cmp != NULL);
  prop_new->custom_cmp = prop->custom_cmp;

  g_assert (prop->custom_free != NULL);
  prop_new->custom_free = prop->custom_free;
  
  return prop_new;
}


gboolean
pxmchem_modif_set_name (PxmModif *modif, gchar *name)
{
  g_assert (modif != NULL && name != NULL);
  
  /* The member data may be NULL, as this function can be called
     right after pxmchem_modif_new () which leaves the members
     NULL (except the propGPA which is allocated).
  */
  if (modif->name != NULL)
    g_free (modif->name);
  
  modif->name = g_strdup (name);
  
  return TRUE;
}



gboolean
pxmchem_modif_set_actform (PxmModif *modif, gchar *actform)
{
  g_assert (modif != NULL && actform != NULL);
  
  /* The member data may be NULL, as this function can be called
     right after pxmchem_modif_new () which leaves the members
     NULL (except the propGPA which is allocated).
  */
  if (modif->actform != NULL)
    g_free (modif->actform);
 
  modif->actform = g_strdup (actform);  

  return TRUE;
}


/* COMPARISON FUNCTIONS -- PROP AND NO-PROP --
 */
gint
pxmchem_modif_cmp (PxmModif *modif1, PxmModif *modif2, PxmHowCmp how_cmp)
{
  gint result = 0;
  gint iter = 0;
  gint jter = 0;

  PxmProp *prop1 = NULL;
  PxmProp *prop2 = NULL;
  
  gboolean found_prop = FALSE;
  
  g_assert (modif1 != NULL);
  g_assert (modif2 != NULL);
  
  if (0 != strcmp (modif1->name, modif2->name))
    result++;
  
  if (0 != strcmp (modif1->actform, modif2->actform))
    result++;
  
  
  if (how_cmp & PXM_CMP_1_SUBSET_OF_2)
    {
      /* It is admitted that the two modifs are not absolutely
	 identical, only if all the prop objects in modif1->propGPA
	 are in modif2->propGPA (ie modif1 is a subset of
	 modif2). Thus, a very first easy check is to ensure that the
	 number of objects in modif1->propGPA is lower or equal to
	 modif2->propGPA.
      */
      if (modif1->propGPA->len > modif2->propGPA->len)
	{
	  return ++result;
	}

      for (iter = 0; iter < modif1->propGPA->len; iter++)
	{
	  found_prop = FALSE;
	  
	  prop1 = g_ptr_array_index (modif1->propGPA, iter);
	  g_assert (prop1 != NULL);
	  
	  for (jter = 0; jter < modif2->propGPA->len; jter++) 
	    {
	      prop2 = g_ptr_array_index (modif2->propGPA, jter);
	      g_assert (prop2 != NULL);
	      
	      /*
		The comparison here is the most thoroughful it can be.
	      */
	      if (0 == 
		  libpolyxmass_prop_cmp (prop1, prop2, 
					 PXM_CMP_1_SUBSET_OF_2 | PXM_CMP_2_SUBSET_OF_1))
		{
		  found_prop = TRUE;
		  
		  break;
		}
	      else
		continue;
	    }
	  
	  if (found_prop == FALSE)
	    {
	      return ++result;
	    }
	}
    }

  if (how_cmp & PXM_CMP_2_SUBSET_OF_1)
    {
      /* It is admitted that the two modifs are not absolutely
	 identical, only if all the prop objects in modif2->propGPA
	 are in modif1->propGPA (ie modif2 is a subset of
	 modif1). Thus, a very first easy check is to ensure that the
	 number of objects in modif2->propGPA is lower or equal to
	 modif1->propGPA.
      */
      if (modif2->propGPA->len > modif1->propGPA->len)
	{
	  return ++result;
	}

      for (iter = 0; iter < modif2->propGPA->len; iter++)
	{
	  found_prop = FALSE;
	  
	  prop2 = g_ptr_array_index (modif2->propGPA, iter);
	  g_assert (prop2 != NULL);
	  
	  for (jter = 0; jter < modif1->propGPA->len; jter++) 
	    {
	      prop1 = g_ptr_array_index (modif1->propGPA, jter);
	      g_assert (prop1 != NULL);
	      
	      /*
		The comparison here is the most thoroughful it can be.
	      */
	      if (0 == 
		  libpolyxmass_prop_cmp (prop1, prop2,
					 PXM_CMP_1_SUBSET_OF_2 | PXM_CMP_2_SUBSET_OF_1))
		{
		  found_prop = TRUE;
		  
		  break;
		}
	      else
		continue;
	    }
	  
	  if (found_prop == FALSE)
	    {
	     return ++result;
	    }
	}
    }
  
  return result;
}


/* Returns 0 if the entities to be compared are identical, > 0 otherwise.
 */
gint
pxmchem_modif_prop_cmp (PxmProp *prop1, PxmProp *prop2, PxmHowCmp how_cmp)
{
  gint result = 0;
  
  PxmModif *modif1 = NULL;
  PxmModif *modif2 = NULL;

  g_assert (prop1 != NULL);
  g_assert (prop2 != NULL);

  modif1 = (PxmModif *) prop1->data;
  modif2 = (PxmModif *) prop2->data;
  
  g_assert (modif1 != NULL);
  g_assert (modif2 != NULL);

  if (0 != strcmp (prop1->name, prop2->name))
    result++;
  
  result += pxmchem_modif_cmp (modif1, modif2, how_cmp);
  
  return result;
}


/* INTEGRITY CHECKING FUNCTIONS
 */
gboolean
pxmchem_modif_validate (PxmModif *modif, GPtrArray *atom_refGPA, 
			gchar **valid)
{
  GString *gs = NULL;
  
  gchar *help = NULL;
  

  g_assert (modif != NULL);
  g_assert (atom_refGPA != NULL);

  /* Note that for integrity reasons, *valid MUST BE NULL to ensure 
   * that it is empty.
   */
  g_assert (valid != NULL);
  g_assert (*valid == NULL);


  gs = g_string_new ("");
  
  /* The name: it must be non-NULL and be longer than 0 chars.
   */
  if (modif->name == NULL)
    {
      g_string_append_printf (gs, 
			      _("modif has a NULL 'name' member\n"));
    }
  else
    {
      /* Make a copy of the string, so that we can strip it of its
       * spaces and next check its "real" length.
       */
      help = g_strdup (modif->name);
      help = g_strstrip (help);
      
      if (strlen (help) <= 0)
	{
	  g_string_append_printf (gs, 
				  _("modif has an in valid 'name' member: '%s'\n"),
				  modif->name);
	}
      
      g_free (help);
    }
  
  /* The actform: it must be non-NULL and be chemically valid.
   */
  if (modif->actform == NULL)
    {
      g_string_append_printf (gs, 
			      _("modif has a NULL 'actform' member\n"));
    }
  else
    {
      /* We do not need to strip the string of its spaces, because
       * the actform checking function will do this itself.
       */
      if (FALSE == pxmchem_actform_check (modif->actform, atom_refGPA))
	{
	  g_string_append_printf (gs, 
				  _("modif has an invalid 'actform' member: '%s'\n"),
				  modif->actform);
	}
    }
  
  /* Finally the validation is finished.
   */
  if (strlen (gs->str) > 0)
    {
      /* String is not empty, which is there were errors.
       */
      *valid = gs->str;
      
      g_string_free (gs, FALSE);
      
      return FALSE;
    }

  g_string_free (gs, TRUE);
  
  return TRUE;
}


gboolean
pxmchem_modif_unique_by_name (PxmModif *modif, 
			      GPtrArray *GPA)
{
  g_assert (modif != NULL);
  g_assert (modif->name != NULL);
  g_assert (GPA != NULL);
  
  return (pxmchem_modif_get_index_top_by_name (modif->name, GPA)
	  == 
	  pxmchem_modif_get_index_bottom_by_name (modif->name, GPA));
}


gboolean
pxmchem_modif_check_actform_syntax (PxmModif *modif)
{
  /* This function will use another function to make sure that
   * the actform is syntactically correct.
   */
  g_assert (modif != NULL);
  
  if (modif->actform == NULL)
    return FALSE;
  
  if (strlen (modif->actform) <= 0)
    return FALSE;
  

  return FALSE;
}


/*  LOCATING FUNCTIONS
 */
gint
pxmchem_modif_get_index_by_name (gchar *name, GPtrArray *GPA)
{
  return pxmchem_modif_get_index_top_by_name (name, GPA);
}


gint
pxmchem_modif_get_index_top_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;

  PxmModif *modif = NULL;
  

  g_assert (GPA != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      modif = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (modif->name, name))
	return iter;
    }
  
  return -1;
}


gint
pxmchem_modif_get_index_bottom_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = 0;

  PxmModif *modif = NULL;
  

  g_assert (GPA != NULL);
  
  if (GPA->len > 0)
    {
      for (iter = GPA->len -1 ; iter >= 0; iter--)
	{
	  modif = g_ptr_array_index (GPA, iter);
	  
	  if (0 == strcmp (modif->name, name))
	    return iter;
	}
    }
  
  return -1;
}


gint
pxmchem_modif_get_index_by_ptr (PxmModif *modif, GPtrArray *GPA)
{
  gint iter = 0;
  

  g_assert (GPA != NULL && modif != NULL);
  
  for (iter = 0; iter < GPA->len; iter++)
    if ((PxmModif *) g_ptr_array_index (GPA, iter) == modif)
      return iter;
      
  return -1;
}


PxmModif *
pxmchem_modif_get_ptr_by_name (gchar *name, GPtrArray *GPA)
{
  gint iter = -1;

  PxmModif *modif = NULL;
  

  g_assert (GPA != NULL);
  g_assert (name != NULL);
  
  for (iter = 0; iter < GPA->len; iter ++)
    {
      modif = g_ptr_array_index (GPA, iter);
      
      if (0 == strcmp (modif->name, name))
	return modif;
    }
  
  return NULL;
}


/* UTILITY FUNCTIONS
 */


/* MASS ACCOUNTING FUNCTIONS
 */
gboolean
pxmchem_modif_account_mass_by_name (gchar *name,
				    GPtrArray *modifGPA,
				    GPtrArray *atom_refGPA,
				    gint times,
				    PxmMasspair *masspair)
{
  /* We have a modif name, and we want to calculate its masses
   * according to its actform ( ie actform of the modif ) - Thus we
   * need to search the modif in the array of modifs pointed to by
   * refGPA, get its actform and calculate the masses from the actform.
   */
  gint iter = 0;

  PxmModif *modif = NULL;

  g_assert (modifGPA != NULL);
  g_assert (atom_refGPA != NULL);
  g_assert (name != NULL);
  g_assert (masspair != NULL);

  for (iter = 0; iter < modifGPA->len; iter++)
    {
      modif = g_ptr_array_index (modifGPA, iter);

      if (0 == strcmp (modif->name, name))
	{
	  /* OK, we have our modif here, just extract its actform 
	   * and account for it.
	   */
	  if (FALSE == 
	      pxmchem_actform_account_mass (modif->actform,
					    atom_refGPA,
					    times,
					    masspair))
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: failed to account for modif: '%s'"
		       " with actform: '%s'\n"),
		     __FILE__, __LINE__, 
		     modif->name, modif->actform);

	      return FALSE;
	    }

	  /* We now have the masses for the modif currently parsed.
	   */
	  return TRUE;
	}
    }

  /* If we are here, that means that we did not find a modif in the 
   * modifGPA array of modifs that had the same name as the one
   * provided in the function parameters. This an error contidition :
   */
  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	 _("%s@%d: failed to find modif in the reference array: '%s'\n"),
	 __FILE__, __LINE__, name);

  return FALSE;
}


gboolean
pxmchem_modif_account_elemcompos_by_name (gchar *name,
					  GPtrArray *modifGPA,
					  GPtrArray *atom_refGPA,
					  gint times,
					  GPtrArray *acGPA)
{
  /* We have a modif name, and we want to calculate its elemental
     contribution according to its actform ( ie actform of the modif )
     - Thus we need to search the modif in the array of modifs pointed
     to by refGPA, get its actform and calculate its contribution into the
     array of atomcount instances passed as parameter.
  */
  gint iter = 0;

  PxmModif *modif = NULL;

  g_assert (modifGPA != NULL);
  g_assert (name != NULL);
  g_assert (acGPA != NULL);
  g_assert (atom_refGPA != NULL);


  for (iter = 0; iter < modifGPA->len; iter++)
    {
      modif = g_ptr_array_index (modifGPA, iter);

      if (0 == strcmp (modif->name, name))
	{
	  /* OK, we have our modif here, just extract its actform 
	   * and account for it.
	   */
	  if (FALSE == 
	      pxmchem_actform_account_elemcompos (modif->actform,
						  atom_refGPA,
						  times,
						  acGPA))
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: failed to account for modif: '%s'"
		       " with formula: '%s'\n"),
		     __FILE__, __LINE__, 
		     modif->name, modif->actform);
	      
	      return FALSE;
	    }
	  
	  /* We now have the elemental contributions for the modif
	     currently parsed.
	   */
	  return TRUE;
	}
    }

  /* If we are here, that means that we did not find a modif in the
   * modifGPA array of modifs that had the same name as the one
   * provided in the function parameters. This an error condition :
   */
  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	 _("%s@%d: failed to find modif from the reference array: '%s'\n"),
	 __FILE__, __LINE__, name);

  return FALSE;
}




/* XML-format TRANSACTIONS
 */
gchar *
pxmchem_modif_format_xml_string_mdf (PxmModif *modif, 
				     gchar *indent, gint offset)
{
  /* The pointer to the modif will allow to create a string representing
   * its member data: name and actform.
   */
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;
  

  
  g_assert (modif != NULL && indent != NULL);
  
  gs = g_string_new ("");
  g_assert (gs != NULL);

  /* We are willing to create an <mdf> node that should look like this:
   *
   * <mdf>
   *   <name>Acetylation</name>
   *   <actform>+CH3COOH-H20</actform>
   * </mdf>
   *
   */

  /* Open the <mdf> element and immediately insert the data.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<mdf>\n", lead);

  g_free (lead);

  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_assert (modif->name != NULL && strlen (modif->name) > 0);
  g_string_append_printf (gs, "%s<name>%s</name>\n",
			  lead, modif->name);
  
  g_assert (modif->actform != NULL && strlen (modif->actform) > 0);
  g_string_append_printf (gs, "%s<actform>%s</actform>\n",
			  lead, modif->actform);
  
  g_free (lead);

  /* Finally close the mdf element.
   */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);
  
  g_string_append_printf (gs, "%s</mdf>\n", lead);

  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}



PxmModif *
pxmchem_modif_render_xml_node_mdf (xmlDocPtr xml_doc,
				   xmlNodePtr xml_node,
				   gpointer user_data)
{
  /* The xml node we are in is structured this way:
   *
   * <mdf>
   *   <name>Acetylation</name>
   *   <actform>+CH3COOH-H20</actform>
   * </mdf>
   *
   * And the xml_node parameter points to the 
   *
   * <mdf> element tag:
   *  ^
   *  |
   *  +----- here we are right now.
   * 
   * Which means that xml_node->name == "mdf" and that
   * we'll have to go one step down to the first child of the 
   * current node in order to get to the <name> element.
   *
   */
  PxmModif *modif = NULL;

  /* Make sure we have parameters pointing bona fide to the right
   * xml element.
   */
  g_assert (xml_node != NULL);
  g_assert (0 == strcmp ((gchar *) xml_node->name, "mdf"));
  
  /* Now go to the first child of current node: <name>.
   */
  xml_node = xml_node->children;

  /* From a rigorous XML parsing point of view, the blanks found in
   * the XML document are considered to be nodes, and we have to detect
   * these and take proper action: go next sibling (next blank) as long
   * as blanks are encountered.
   */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /* Check that we have effectively a <name> element here.
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "name"));

  modif = pxmchem_modif_new ();
  
  /* Since we have allocated the modif instance at the line above,
     we know that its member data are NULL, so we can make direct
     assignements, without recoursing to the _set_xxx ().
  */
  modif->name = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (modif->name != NULL);
    
  /* Now go to the second child: <actform>, which is the next 
   * sibling of <name>.
   */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /* Check that we have effectively a <actform> element here.
   */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "actform"));

  modif->actform = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (modif->actform != NULL);
  
  /* Finished the rendering of the current <mdf> node.
   */
  return modif;
}





/* FREE'ING FUNCTIONS
 */
gboolean
pxmchem_modif_free (PxmModif *modif)
{
  g_assert (modif != NULL);
  
  /* Free inner material first.
   */
  if (modif->propGPA != NULL)
    libpolyxmass_prop_GPA_free (modif->propGPA);
  
  if (modif->name != NULL)
    g_free (modif->name);
  
  if (modif->actform != NULL)
    g_free (modif->actform);
  
  g_free (modif);
  
  return TRUE;
}


gint
pxmchem_modif_prop_free (PxmProp *prop)
{

  g_assert (prop != NULL);
  
  if (prop->name != NULL)
    g_free (prop->name);
  
  if (prop->data != NULL)
    pxmchem_modif_free ((PxmModif *) prop->data);
  
  g_free (prop);
    
  return 1;
}
  



/* GPtrArray-RELATED FUNCTIONS
 */
gint
pxmchem_modif_GPA_empty (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmModif *modif = NULL;
  

  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      modif = g_ptr_array_remove_index (GPA, 0);
      g_assert (modif != NULL);
      pxmchem_modif_free (modif);
      count++;
    }
  
  return count;
}

gint
pxmchem_modif_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  

  g_assert (GPA != NULL);
  
  count = pxmchem_modif_GPA_empty (GPA);
  
  g_ptr_array_free (GPA, TRUE);

  return count;
}


gint
pxmchem_modif_GPA_prop_free (PxmProp *prop)
{
  gint count = 0;
  
  /* We get a PxmProp object in which the data member is actually 
     a GPtrArray * of PxmModif instances.
  */
  g_assert (prop != NULL);
  
  if (prop->name != NULL)
    g_free (prop->name);
  
  if (prop->data != NULL)
    count = pxmchem_modif_GPA_free ((GPtrArray *) prop->data);
  
  g_free (prop);

  return count;
}











