/*--------------------------------------------------------------------
 *	$Id: grdhisteq.c,v 1.11 2005/12/17 05:59:22 pwessel Exp $
 *
 *	Copyright (c) 1991-2006 by P. Wessel and W. H. F. Smith
 *	See COPYING file for copying and redistribution conditions.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; version 2 of the License.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	Contact info: gmt.soest.hawaii.edu
 *--------------------------------------------------------------------*/
/*
 * read a grdfile and find the values which divide its range
 * into n_cell number of quantiles.
 *
 * Author:	W.H.F. Smith
 * Date: 	31 May 1990
 *
 * Modified:	12 June, 1990 by whf smith, adding [-Q] option for
 *	quadratic scaling.  Some rgb color systems consider that
 *	if black = (0,0,0) and white = (1,1,1) or (255,255,255),
 *	then a neutral gray "halfway" between black and while should
 *	be set to gray = (0.75,0.75,0.75) or (191,191,191).  If so,
 *	let 0 <= x <= 1 be the desired gradation between black and
 *	white (the intensity factor used by the coloring program.
 *	Then the gray tone level 0 <= y <= 1 is given by:
 *		y = 2*x - x**2.
 *	Using the -Q option will find the data values which divide
 *	the data range into <n_cells> values of y; default linear
 *	scaling will find the values for <n_cells> divisions of x.
 *
 * Updated to v2.0 15-May-1991 Paul Wessel
 * Updated to v3.1 14-Jun-1998 Paul Wessel
 * Updated to v3.3.5 14-Jun-2000 Paul Wessel
 * Version:	4
 */
 
#include "gmt.h"

struct	INDEXED_DATA {
	float	x;
	int	i;
}	*indexed_data;

struct	CELL {
	float	low;
	float	high;
}	*cell;

float	*data, data_min, data_max;
float	get_cell(float x);
double	qsnorm(double p), norm = 0.0;

int	last_cell, n_cells = 0, n_cells_m1 = 0;
int	i, j, nxy;
int	compare_indexed_floats(const void *point_1, const void *point_2);
int	compare_indices(const void *point_1, const void *point_2);

void do_usual (char *infile, char *outfile, int n_cells, int quadratic, int dump_intervals, int argc, char **argv);
void do_gaussian (char *infile, char *outfile, int argc, char **argv);

int main (int argc, char **argv)
{

	int	i;
	int	dump = FALSE, error = FALSE, quadratic = FALSE, gaussian = FALSE;
	char *infile = CNULL, *outfile = CNULL;

	argc = GMT_begin (argc, argv);

	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {

				/* Common parameters */

				case 'V':
				case '\0':
					error += GMT_get_common_args (argv[i], 0, 0, 0, 0);
					break;

				/* Supplemental parameters */

				case 'C':
					n_cells = atoi(&argv[i][2]);
					break;
				case 'D':
					dump = TRUE;
					break;
				case 'G':
					outfile = &argv[i][2];
					break;
				case 'N':
					gaussian = TRUE;
					if (argv[i][2]) norm = atof (&argv[i][2]);
					break;
				case 'Q':
					quadratic = TRUE;
					break;
				default:
					error = TRUE;
					GMT_default_error (argv[i][1]);
					break;
			}
		}
		else
			infile = argv[i];
	}

	if (argc == 1 || GMT_quick) {
		fprintf (stderr,"grdhisteq %s - Histogram equalization for grdfiles\n\n", GMT_VERSION);
		fprintf (stderr, "usage: grdhisteq <infile> -G<outfile> [-C<n_cells> -D -N[<norm>] -Q -V]\n");
		if (GMT_quick) exit (EXIT_FAILURE);
		fprintf (stderr, "\t-C<n_cells> sets how many cells (divisions) of data range to make.\n");
		fprintf (stderr, "\t-D dump level information to stdout\n");
		fprintf (stderr, "\t-G<outfile> will create an equalized output grdfile.\n");
		fprintf (stderr, "\t-N use with -G to make an output grdfile with standard normal scores.\n");
		fprintf (stderr, "\t   Append <norm> to normalize the scores to <-1,+1>\n");
		fprintf (stderr, "\t-Q to use quadratic intensity scaling.  [Default is linear]\n");
		GMT_explain_option ('V');
		exit (EXIT_FAILURE);
	}

	if (!infile) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  Must specify input file\n", GMT_program);
		error++;
	}
	if (gaussian && !outfile) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -N option:  Must also specify output file with -G\n", GMT_program);
		error++;
	}
	if (!gaussian && n_cells <= 0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -C option:  n_cells must be positive\n", GMT_program);
		error++;
	}
	if (error) exit (EXIT_FAILURE);

	GMT_put_history (argc, argv);	/* Update .gmtcommands4 */

	if (!strcmp (infile, "=")) {
		fprintf (stderr, "%s: Piping of input grdfile not supported!\n", GMT_program);
		exit (EXIT_FAILURE);
	}

	if (gaussian)
		do_gaussian (infile, outfile, argc, argv);
	else
		do_usual (infile, outfile, n_cells, quadratic, dump, argc, argv);

	GMT_end (argc, argv);

	exit (EXIT_SUCCESS);
}

void do_usual (char *infile, char *outfile, int n_cells, int quadratic, int dump_intervals, int argc, char **argv)
{

	double	delta_cell, target;
	struct GRD_HEADER header;
	int	nxy, nxy_0, current_cell;
	char format[BUFSIZ];

	sprintf (format, "%s\t%s\t%%d\n", gmtdefs.d_format, gmtdefs.d_format);

	if (GMT_read_grd_info (infile, &header)) {
                fprintf (stderr, "%s: GMT SYNTAX ERROR:  File %s not found\n", GMT_program, infile);
		exit (EXIT_FAILURE);
	}

	GMT_grd_init (&header, argc, argv, TRUE);

	nxy_0 = header.nx * header.ny;
	data = (float *) GMT_memory (VNULL, (size_t)nxy_0, sizeof (float), GMT_program);
	GMT_read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, GMT_pad, FALSE);

	cell = (struct CELL *) GMT_memory (VNULL, (size_t)n_cells, sizeof(struct CELL), GMT_program);

	/* Sort the data and find the division points:  */

	qsort ((void *)data, (size_t)nxy_0, sizeof(float), GMT_comp_float_asc);
	nxy = nxy_0;
	while (nxy > 0 && GMT_is_fnan (data[nxy-1])) nxy--;	/* Only deal with real numbers */

	data_min = data[0];
	data_max = data[nxy - 1];
	last_cell = n_cells/2;
	n_cells_m1 = n_cells - 1;

	current_cell = 0;
	i = 0;
	delta_cell = ((double)nxy) / ((double)n_cells);

	while (current_cell < n_cells) {

		if (current_cell == (n_cells - 1) ) {
			j = nxy - 1;
		}
		else if (quadratic) {	/* Use y = 2x - x**2 scaling  */

			target = ( (double) (current_cell + 1) ) / ( (double) n_cells);
			j = (int)floor(nxy * (1.0 - sqrt(1.0 - target)));
		}
		else {	/* Use simple linear scale  */

			j = (int)(floor( (current_cell + 1) * delta_cell)) - 1;
		}

		cell[current_cell].low = data[i];
		cell[current_cell].high = data[j];

		if (dump_intervals) fprintf (GMT_stdout, format, data[i], data[j], current_cell);

		i = j;
		current_cell++;
	}

	if (outfile) {
		GMT_read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, GMT_pad, FALSE);

		for (i = 0; i < nxy_0; i++) data[i] = (GMT_is_fnan (data[i])) ? GMT_f_NaN : get_cell (data[i]);

		GMT_write_grd (outfile, &header, data, 0.0, 0.0, 0.0, 0.0, GMT_pad, FALSE);
	}

	GMT_free ((void *) data);
	GMT_free ((void *) cell);
}

float	get_cell(float x)
{
	int		low, high, i;

	low = 0;
	high = n_cells_m1;
	i = last_cell;

	do {
		if (cell[i].low <= x && cell[i].high >= x) {
			last_cell = i;
			return ( (float)i);
		}
		else if (cell[low].low <= x && cell[low].high >= x) {
			return ( (float)low);
		}
		else if (cell[high].low <= x && cell[high].high >= x) {
			return ( (float)high);
		}
		else if (cell[i].low > x) {
			high = i;
			i = (low + high) / 2;
		}
		else if (cell[i].high < x) {
			low = i;
			i = (low + high) / 2;
		}
	} while (TRUE);
}

void do_gaussian (char *infile, char *outfile, int argc, char **argv)
{
	int	i, j, nxy_0;
	double	dnxy;
	struct GRD_HEADER header;

	if (GMT_read_grd_info (infile, &header)) {
                fprintf (stderr, "%s: GMT SYNTAX ERROR:  File %s not found\n", GMT_program, infile);
		exit (EXIT_FAILURE);
	}

	GMT_grd_init (&header, argc, argv, TRUE);

	nxy_0 = header.nx * header.ny;
	data = (float *) GMT_memory (VNULL, (size_t)nxy_0, sizeof (float), GMT_program);
	GMT_read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, GMT_pad, FALSE);

	indexed_data = (struct INDEXED_DATA *) GMT_memory (VNULL, (size_t)nxy_0, sizeof (struct INDEXED_DATA), GMT_program);

	for (i = j = 0, nxy = nxy_0; i < nxy_0; i++) {
		if (GMT_is_fnan (data[i])) {	/* Put NaNs in the back */
			nxy--;
			indexed_data[nxy].i = i;
			indexed_data[nxy].x = data[i];
		}
		else {
			indexed_data[j].i = i;
			indexed_data[j].x = data[i];
			j++;
		}
	}

	/* Sort on data value  */

	qsort ((void *)indexed_data, (size_t)nxy, sizeof(struct INDEXED_DATA), compare_indexed_floats);

	dnxy = 1.0 / (nxy + 1);

	if (norm != 0.0) norm /= fabs (qsnorm ((double)dnxy));	/* Normalize by abs(max score) */

	for (i = 0; i < nxy; i++) {
		indexed_data[i].x = (float)qsnorm ((double)((i + 1) * dnxy));
		if (norm != 0.0) indexed_data[i].x *= (float)norm;
	}

	/* Sort on data index  */

	qsort ((void *)indexed_data, (size_t)nxy_0, sizeof(struct INDEXED_DATA), compare_indices);

	for (i = 0; i < nxy_0; i++) data[i] = indexed_data[i].x;

	GMT_write_grd (outfile, &header, data, 0.0, 0.0, 0.0, 0.0, GMT_pad, FALSE);

	GMT_free ((void *) indexed_data);
	GMT_free ((void *) data);
}

int	compare_indexed_floats(const void *point_1, const void *point_2)
{
	if ( ((struct INDEXED_DATA *)point_1)->x < ((struct INDEXED_DATA *)point_2)->x )
		return (-1);
	else if ( ((struct INDEXED_DATA *)point_1)->x > ((struct INDEXED_DATA *)point_2)->x )
		return (1);
	else 
		return (0);
}

int	compare_indices(const void *point_1, const void *point_2)
{
	if ( ((struct INDEXED_DATA *)point_1)->i < ((struct INDEXED_DATA *)point_2)->i )
		return (-1);
	else if ( ((struct INDEXED_DATA *)point_1)->i > ((struct INDEXED_DATA *)point_2)->i )
		return (1);
	else 
		return (0);
}

/* double qsnorm(p)
 * double	p;
 *
 * Function to invert the cumulative normal probability
 * function.  If z is a standardized normal random deviate,
 * and Q(z) = p is the cumulative Gaussian probability 
 * function, then z = qsnorm(p).
 *
 * Note that 0.0 < p < 1.0.  Data values outside this range
 * will return +/- a large number (1.0e6).
 * To compute p from a sample of data to test for Normalcy,
 * sort the N samples into non-decreasing order, label them
 * i=[1, N], and then compute p = i/(N+1).
 *
 * Author:	Walter H. F. Smith
 * Date:	19 February, 1991.
 *
 * Based on a Fortran subroutine by R. L. Parker.  I had been
 * using IMSL library routine DNORIN(DX) to do what qsnorm(p)
 * does, when I was at the Lamont-Doherty Geological Observatory
 * which had a site license for IMSL.  I now need to invert the
 * gaussian CDF without calling IMSL; hence, this routine.
 *
 */

double	qsnorm(double p)
{
	double	t, z;

	if (p <= 0.0) {
		fprintf(stderr,"%s: qsnorm:  Bad probability.\n", GMT_program);
		return(-1.0e6);
	}
	else if (p >= 1.0) {
		fprintf(stderr,"%s: qsnorm:  Bad probability.\n", GMT_program);
		return(1.0e6);
	}
	else if (p == 0.5) {
		return(0.0);
	}
	else if (p > 0.5) {
		t = sqrt(-2.0 * log(1.0 - p) );
		z = t - (2.515517 +t*(0.802853 +t*0.010328))/
			(1.0 + t*(1.432788 + t*(0.189269+ t*0.001308)));
		return(z);
	}
	else {
		t = sqrt(-2.0 * log(p) );
		z = t - (2.515517 +t*(0.802853 +t*0.010328))/
			(1.0 + t*(1.432788 + t*(0.189269+ t*0.001308)));
		return(-z);
	}
}
