// -*- C++ -*-
//
// Copyright (C) 1998, 1999, 2000, 2002  Los Alamos National Laboratory,
// Copyright (C) 1998, 1999, 2000, 2002  CodeSourcery, LLC
//
// This file is part of FreePOOMA.
//
// FreePOOMA is free software; you can redistribute it and/or modify it
// under the terms of the Expat license.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Expat
// license for more details.
//
// You should have received a copy of the Expat license along with
// FreePOOMA; see the file LICENSE.
//

//-----------------------------------------------------------------------------
// Contents:
//   Classes needed for Krylov example
//-----------------------------------------------------------------------------
// Conjugate-Gradient Poisson solver.
// Solves -Laplace(x)=f, 
//
// Compares the result to the exact solution
//
//-----------------------------------------------------------------------------

// include files

#include "Pooma/Arrays.h"
#include "Utilities/Clock.h"
#include "../Tools/PCGSolve.h"

class Dot
{
public:
  Dot() {}

  template<class EngineTag>
  double operator()(
		    const Array<2, double, EngineTag>& a,
		    const Array<2, double, EngineTag>& b
		    ) const
  {
    return sum(a * b);
  }
};

template<class Layout, class EngineTag>
class Initializer
{
public:
  typedef Array<2, double, EngineTag> Array_t;

  Initializer() {}
  Initializer(const Layout &layout)
    : layout_m(layout)
  {
  }

  void operator()(Array<2, double, EngineTag>& a) const
  {
    a.initialize(layout_m);
  }

private:
  Layout layout_m;
};

#include "../Tools/ExtendedPatchEvaluator.h"
#include "FivePointJacobi.h" // new stuff
#include "FivePointSymmetricGaussSeidel.h" // new stuff
#include "FivePointIncompleteCholeski0.h" // new stuff
#include "FivePointIncompleteCholeski1.h" // new stuff
#include "../Tools/NullPreconditioner.h"

#include <iostream>

class H1Norm
{
public:
  H1Norm(int n) : n_m(n) {}

  template<class EngineTag>
  double operator()(const Array<2, double, EngineTag>& a) const {
    int i, j;
    double result;

    result = 0.0;
    for (j = 2; j <= n_m-1; j++) {
      for (i = 2; i <= n_m-1; i++) {
	if (a(i, j)*a(i, j)>result) result = a(i, j)*a(i, j);
      }
    }
    return sqrt(result);
  }
private:
  int n_m;
};


struct DiffOp
{
  DiffOp(double dx2)
    : fact_m(0.5 / dx2)
  {
  }

  template<class A>
  inline FivePoint
  operator()(const A &d, int i, int j) const
  {
    return FivePoint(
                     -fact_m * (d(i, j - 1) + d(i, j)),
                     -fact_m * (d(i - 1, j) + d(i, j)),
                     fact_m * (d(i - 1, j) + 4 * d(i, j) + d(i + 1, j)
                                + d(i, j - 1) + d(i, j + 1)),
                     -fact_m * (d(i, j) + d(i + 1, j)),
                     -fact_m * (d(i, j) + d(i, j + 1))
                     );
  }

  inline int lowerExtent(int) const { return 1; }
  inline int upperExtent(int) const { return 1; }

  double fact_m;
};

template<class T>
struct FunctorResult<DiffOp, T>
{
  typedef FivePoint Type_t;
};

int main(int argc, char* argv[])
{
  Pooma::initialize(argc, argv);
  Pooma::blockingExpressions(true);

  std::cout << "Conjugate Gradient Solver for Poisson Problem" << std::endl
            << "---------------------------------------------" << std::endl
            << "Solves Laplace(x)=f on an N x N grid         " << std::endl
            << " where:                                      " << std::endl
            << "   x = 0 on the boundary                     " << std::endl
            << "   x = 1 on the interior.                    " << std::endl
            << "   f = A * 1, and the initial guess x0       " << std::endl
            << "   is zero.                                  " << std::endl
            << "                                             " << std::endl
            << "Results are compared to the known solution   " << std::endl
            << " x = 1.                                      " << std::endl
            << "---------------------------------------------" << std::endl
            << std::endl;

  int n;

  std::cout << "Problem size (N) : ";
  std::cin >> n;
  std::cout << "Problem size (N) = " << n << std::endl;

  int b0, b1;

  std::cout << "blocks in x direction : ";
  std::cin >> b0;
  std::cout << "blocks in x direction = " << b0 << std::endl << std::endl;
  std::cout << "blocks in y direction : ";
  std::cin >> b1;
  std::cout << "blocks in y direction = " << b1 << std::endl << std::endl;

  int maxSteps;

  std::cout << "max iterations : ";
  std::cin >> maxSteps;
  std::cout << "max iterations = " << maxSteps << std::endl << std::endl;

  typedef MultiPatch<UniformTag, Brick> EngineTag_t;
  typedef UniformGridPartition<2> Partition_t;
  typedef UniformGridLayout<2> Layout_t;
  typedef Interval<2> Domain_t;
  typedef Array<2, double, EngineTag_t> Array_t;
  typedef FivePointMatrix<EngineTag_t> Matrix_t;
  typedef GeneratePreconditioner<Matrix_t, FivePointJacobiTag> GPjac_t;
  typedef GeneratePreconditioner<Matrix_t, FivePointSymmetricGaussSeidelTag>
     GPsgs_t;
  typedef GeneratePreconditioner<Matrix_t, FivePointIncompleteCholeski0Tag>
     GPic0_t;
  typedef GeneratePreconditioner<Matrix_t, FivePointIncompleteCholeski1Tag>
     GPic1_t;

  typedef GPjac_t::Type_t PreconditionerJac_t;
  typedef GPsgs_t::Type_t PreconditionerSGS_t;
  typedef GPic0_t::Type_t PreconditionerIc0_t;
  typedef GPic1_t::Type_t PreconditionerIc1_t;

  Interval<1> i1(1, n);
  Domain_t domain(i1, i1);

  Loc<2> blocks(b0, b1);
 
  Partition_t partition(blocks, Matrix_t::guardLayers());   
  Layout_t    layout(domain, partition, ReplicatedTag());

  Partition_t p2jac(blocks, PreconditionerJac_t::guardLayers());
  Partition_t p2sgs(blocks, PreconditionerSGS_t::guardLayers());
  Partition_t p2ic0(blocks, PreconditionerIc0_t::guardLayers());
  Partition_t p2ic1(blocks, PreconditionerIc1_t::guardLayers());
  Layout_t    layoutPreconditioner(domain, partition, ReplicatedTag());

  Initializer<Layout_t, EngineTag_t> init(layout);

  Array_t f, x;
  init(f);
  init(x);

  Dot dot;

// the old problem (Poisson)

/*
  // put the Laplacian coeffs in a FivePoint Array
  
  Matrix_t minusLaplace(layout, domain);
  minusLaplace.values() = (n + 1) * (n + 1) * FivePoint(-1, -1, 4, -1, -1);

// end of the old problem

*/

// Stephen's new problem

  double dx = 1.0 / (n + 1);

  Stencil<DiffOp> diffOp(dx * dx);

  Array_t px, py, diffusion;
  init(px);
  init(py);
  init(diffusion);

  px = dx * iota(px.domain()).comp(0);
  py = dx * iota(py.domain()).comp(1);
  diffusion = 1.0 + px * px + py;

  // put the Laplacian coeffs in a FivePoint Array

  Matrix_t minusLaplace(layout, domain);
  minusLaplace.values() = FivePoint(0.0, 0.0, 0.0, 0.0, 0.0);
  minusLaplace.values()(domain) = diffOp(diffusion);

  Pooma::blockAndEvaluate();

// Stephen's new problem ends

  PreconditionerJac_t jac(layoutPreconditioner, domain);
  GPjac_t::fill(minusLaplace, jac);

  PreconditionerSGS_t sgs(layoutPreconditioner, domain);
  GPsgs_t::fill(minusLaplace, sgs);

  PreconditionerIc0_t ic0(layoutPreconditioner, domain);
  GPic0_t::fill(minusLaplace, ic0);

  PreconditionerIc1_t ic1(layoutPreconditioner, domain);
  GPic1_t::fill(minusLaplace, ic1);

  bool passed;
  double error;

  x = 0.0;
  x(domain) = 1.0;
  minusLaplace(x, f);
  x = 0.0;

  Array_t diff, xTrue;
  init(diff);
  init(xTrue);

  xTrue = 0.0;
  xTrue(domain) = 1.0;

  Array_t t1;
  init(t1);
  t1 = 0.0;

  double scale = 0.0;

  std::cout << std::endl << "Solving ..." << std::endl;

  double tol;
  int maxIter = maxSteps;

// solve using null preconditioner

  tol = 1e-6;
  x = 0.0;

  std::cout << "solving using no preconditioning.";
  std::cout << std::endl;
  double noprectime = Pooma::Clock::value();
  PCGSolve (minusLaplace, NullPreconditioner(), x, f, dot, init, maxIter,
	    tol);
  std::cout << std::endl;
  std::cout << "unpreconditioned iterations = " << maxIter;
  std::cout << " unpreconditioned solve time = " << Pooma::Clock::value()
     - noprectime << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// have to reset tol because the CG solver over-writes it

// solve using one-step Jacobi preconditioning

  int jac_steps = 1;
  GPjac_t::set_steps(jac_steps);
  jac(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;

  std::cout << "solving using one-step Jacobi preconditioning.";
  std::cout << std::endl;
  maxIter = maxSteps;
  double onejactime = Pooma::Clock::value();
  PCGSolve (minusLaplace, jac, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "one-step Jacobi iterations = " << maxIter;
  std::cout << " one-step Jacobi solve time = " << Pooma::Clock::value()
     - onejactime << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// solve using two-step Jacobi preconditioning

  jac_steps = 2;
  GPjac_t::set_steps(jac_steps);
  jac(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;

  std::cout << "solving using two-step Jacobi preconditioning.";
  std::cout << std::endl;
  maxIter = maxSteps;

  double twojactime = Pooma::Clock::value();
  PCGSolve (minusLaplace, jac, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "two-step Jacobi iterations = " << maxIter;
  std::cout << " two-step Jacobi solve time = " << Pooma::Clock::value()
     - twojactime << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// solve using one-step SGS preconditioning

  int sgs_steps = 1;
  GPsgs_t::set_steps(sgs_steps);
  jac(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;

  std::cout << "solving using one-step symmetric Gauss-Seidel ";
  std::cout << "preconditioning." << std::endl;
  maxIter = maxSteps;
  double onesgstime = Pooma::Clock::value();
  PCGSolve (minusLaplace, sgs, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "one-step SGS iterations = " << maxIter;
  std::cout << " one-step SGS solve time = " << Pooma::Clock::value()
     - onesgstime << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// solve using one-step SGS preconditioning

  sgs_steps = 2;
  GPsgs_t::set_steps(sgs_steps);
  sgs(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;

  std::cout << "solving using two-step symmetric Gauss-Seidel ";
  std::cout << "preconditioning." << std::endl;
  maxIter = maxSteps;
  double twosgstime = Pooma::Clock::value();
  PCGSolve (minusLaplace, sgs, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "two-step SGS iterations = " << maxIter;
  std::cout << " two-step SGS solve time = " << Pooma::Clock::value()
     - twosgstime << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// solve using the IC(0) preconditioner

  ic0(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;
  
  std::cout << "solving using no-fill Incomplete Choleski preconditioning.";
  std::cout << std::endl;
  maxIter = maxSteps;
  double ic0time = Pooma::Clock::value();
  PCGSolve (minusLaplace, ic0, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "no-fill Choleski iterations = " << maxIter;
  std::cout << " no-fill Choleski solve time = " << Pooma::Clock::value()
     - ic0time << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

// solve using the IC(1) preconditioner

  ic1(xTrue, t1);
  scale = max(t1);
  tol = 1e-6 * scale;
  x = 0.0;
  
  std::cout << "solving using level-one Incomplete Choleski ";
  std::cout << "preconditioning." << std::endl;
  maxIter = maxSteps;
  double ic1time = Pooma::Clock::value();
  PCGSolve (minusLaplace, ic1, x, f, dot, init, maxIter, tol);
  std::cout << std::endl;
  std::cout << "level-one Choleski iterations = " << maxIter;
  std::cout << " level-one Choleski solve time = " << Pooma::Clock::value()
     - ic1time << std::endl << std::endl;

  std::cout << "Checking solution..." << std::endl;
  diff = x - xTrue;
  error = max(diff);
  std::cout << "Max Error: " << error << std::endl << std::endl;
  error = sqrt(dot(diff, diff));
  std::cout << "l2 Error: " << error << std::endl << std::endl;

  passed = (error < 1.0e-5);

  std::cout << "-----------------------------------------------------------"
	        << std::endl
	        << "success?: " << (passed ? "PASSED" : "FAILED")
	        << std::endl
	        << "-----------------------------------------------------------"
	        << std::endl;

  Pooma::finalize();
  return 0;
}
