/*
 *  Copyright (c) 2007-2008 Cyrille Berger <cberger@cberger.net>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * either version 2, or (at your option) any later version of the License.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

// C++ Headers
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cmath>
#include <cstring>
#include <sys/time.h>
#include <vector>

#include <GTLCore/Array.h>
#include <GTLCore/PixelDescription.h>
#include <GTLCore/Type.h>
#include <GTLCore/Debug.h>

#define COUMPONENT_NAME "ctlbenchmark"

// OpenCTL Headers
#include <OpenCTL/Module.h>
#include <OpenCTL/ModulesManager.h>
#include <OpenCTL/Program.h>
#include <OpenCTL/Version.h>
#include <GTLCore/CompilationMessages.h>

class PointF {
  public:
    PointF(double x, double y) : m_x(x), m_y(y)
    {
    }
    double x() const { return m_x; }
    double y() const { return m_y; }
  private:
    double m_x, m_y;
};

float interpolateCubic1D (const std::vector<PointF>& table, double p)
{
  if( p <= table[0].x() ) return table[0].y();
  if( p >= table[table.size()-1].x() ) return table[table.size()-1].y();
  
  for( std::size_t i = 0; i < table.size() - 1; ++i )
  {
    if( table[i].x() <= p && p < table[i+1].x() )
    {
      double s = (p - table[i].x()) / (table[i+1].x() - table[i].x());
      double dx1 = (table[i+1].x() - table[i].x());
      double dy1 = (table[i+1].y() - table[i].y());
      
      double m0 = 0.0;
      double m1 = 0.0;
      if( i > 0 )
      {
        double dy0 = (table[i].y() - table[i-1].y());
        double dx0 = (table[i].x() - table[i-1].x());
        m0 = (dy1 + dx1 * dy0 / dx0) / 2;
      }
      if( i < table.size()-2 )
      {
        double dx2 = (table[i+2].x() - table[i+1].x());
        double dy2 = (table[i+2].y() - table[i+1].y());
        m1 = (dy1 + dx1 * dy2 / dx2) / 2;
      }
      if( i == 0) {
        m0 = (3 * dy1 - m1) / 2;
      }
      if( i == table.size()-2 )
      {
        m1 = (3 * dy1 - m0) / 2;
      }
      return table[i].y() * (2 * s*s*s - 3 * s*s + 1) +
          m0 * (s*s*s - 2 * s*s + s) +
          table[i+1].y() * (-2 * s*s*s + 3 * s*s) +
          m1 * (s*s*s - s*s);

    }
  }
  return 0.0;
}

void printVersion()
{
  std::cout << OpenCTL::LibraryShortName() << " - " << OpenCTL::LibraryName() << " - " << OpenCTL::LibraryVersionString() << std::endl;
  std::cout << OpenCTL::LibraryCopyright() << std::endl;
  std::cout << "CTL Version : " << OpenCTL::LanguageVersion() << std::endl;
}
void printHelp()
{
  std::cout << "Usage : ctlbenchmark" << std::endl;
  std::cout << std::endl;
  std::cout << "  -h --help               print this message" << std::endl;
  std::cout << "  -v --version            print the version information" << std::endl;
}

#define ARG_IS(a,b) argv[ai] == GTLCore::String(a) or argv[ai] == GTLCore::String(b)

class Benchmark {
  public:
    Benchmark(int runCount ) : m_runCount(runCount)  {}
    virtual ~Benchmark() {}
    void start()
    {
      initialise();
      m_totalTime = 0;
      m_bestTime = 0x0FFFFFFF;
      m_worseTime = 0;
      timeval timeBefore, timeAfter;
      for(int i = 0; i < m_runCount; ++i)
      {
        preRun();
        gettimeofday( &timeBefore, 0 );
        run();
        gettimeofday( &timeAfter, 0 );
        int delta = timeAfter.tv_usec - timeBefore.tv_usec + 1000000 * (timeAfter.tv_sec - timeBefore.tv_sec);
        postRun();
        if(delta > m_worseTime) m_worseTime = delta;
        if(delta < m_bestTime) m_bestTime = delta;
        m_totalTime += delta;
      }
      return;
    }
    int totalTime()
    {
      return m_totalTime;
    }
    int bestTime()
    {
      return m_bestTime;
    }
    int worseTime()
    {
      return m_worseTime;
    }
    int meanTime()
    {
      return m_totalTime / m_runCount;
    }
    void dump()
    {
      std::cout << "Total: " << totalTime() << " mean: " << meanTime() << " best: " << bestTime() << " worse: " << worseTime() << std::endl;
    }
  protected:
    virtual void initialise() = 0;
    virtual void run() = 0;
    virtual void preRun() = 0;
    virtual void postRun() = 0;
  private:
    int m_runCount;
    int m_totalTime;
    int m_bestTime;
    int m_worseTime;
};

float randomNum( int n )
{
  return 0.5*(cos(pow(n, 4)) + 1);
}

void randomArray( GTLCore::Array* array )
{
  for( std::size_t i = 0; i < array->size() / sizeof(float); ++i)
  {
    array->data<float>()[ i ] = randomNum( i );
  }
}

bool compareArrays( const GTLCore::Array* array1, const GTLCore::Array* array2 )
{
  GTL_ASSERT( array1 != array2 );
  GTL_ASSERT( array1->size() == array2->size() );
  if( memcmp( array1->rawData(), array2->rawData(), array2->size() ) != 0 )
  {
    for( unsigned int i = 0; i < array2->size() / sizeof(float); ++i )
    {
      if( fabs( array1->data<float>()[ i ] - array2->data<float>()[ i ] ) > 1e-6 )
      {
        GTL_DEBUG( i << " " << array1->data<float>()[ i ] << " " << array2->data<float>()[ i ] << " " << fabs( array1->data<float>()[ i ] - array2->data<float>()[ i ] ) );
        return false;
      }
    }
    return true;
  } else {
    return true;
  }
}

class NativeBenchmark : public Benchmark {
  public:
    NativeBenchmark( const std::vector< PointF >& _points, int runCount, GTLCore::Array* array, const GTLCore::Array* refArray ) : Benchmark( runCount), points(_points), m_array(array), m_refArray( refArray )
    {
      
    }
    virtual ~NativeBenchmark() {}
  protected:
    virtual void initialise() {}
    virtual void preRun();
    virtual void postRun();
    virtual void run();
  private:
     std::vector< PointF > points;
    GTLCore::Array* m_array;
    const GTLCore::Array* m_refArray;
};

void NativeBenchmark::preRun()
{
  randomArray( m_array );
}

void NativeBenchmark::run()
{
  GTLCore::PixelDescription pd( GTLCore::Type::Float32, 4);
  for( int i = 0; i < m_array->size(); i += pd.bitsSize() / 8)
  {
    float* arr = reinterpret_cast<float*>(m_array->rawData() + i);
    arr[0] = interpolateCubic1D( points, interpolateCubic1D( points, arr[0] ) );
    arr[1] = interpolateCubic1D( points, interpolateCubic1D( points, arr[1] ) );
    arr[2] = interpolateCubic1D( points, interpolateCubic1D( points, arr[2] ) );
    arr[3] = arr[3];
  }
}

void NativeBenchmark::postRun()
{
    if(not compareArrays( m_array, m_refArray ) )
    {
      abort();
    }
}

class CtlBenchmark : public Benchmark {
  protected:
    CtlBenchmark(const std::vector< PointF >& _points, int runCount, GTLCore::Array* array, const GTLCore::Array* refArray ) : Benchmark(runCount ), points(_points), program(0), m_array(array), m_refArray( refArray ) {}
    virtual ~CtlBenchmark() {}
    void preRun();
    void compile();
    virtual void postRun();
    void apply();
    GTLCore::Array* array() { return m_array; }
  public:
    static GTLCore::String sourceCode( const std::vector<PointF>& _points);
    static GTLCore::String valueListToCTL( const std::vector<PointF>& _points, double _scale );
  private:
     std::vector< PointF > points;
     OpenCTL::Program* program;
     GTLCore::Array* m_array;
     const GTLCore::Array* m_refArray;
};

void CtlBenchmark::preRun()
{
  randomArray( m_array );
}

void CtlBenchmark::compile()
{
  OpenCTL::Module p;
  p.setSource( "benchmark", sourceCode(points) );
  p.compile();
  if(not p.isCompiled())
  {
    std::cout << "Error: " << std::endl << p.compilationMessages().toString() << std::endl;
    abort();
  }
  program = new OpenCTL::Program( "apply", &p, GTLCore::PixelDescription( GTLCore::Type::Float32, 4) );
}

void CtlBenchmark::postRun()
{
    if(not compareArrays( m_array, m_refArray ) )
    {
      abort();
    }
}

void CtlBenchmark::apply()
{
  program->apply( *array(), *array() );
}

GTLCore::String CtlBenchmark::valueListToCTL( const std::vector<PointF>& _points, double _scale )
{
  GTLCore::String result = "{ {";
  for(std::size_t i = 0; i < _points.size(); ++i)
  {
    PointF point = _points[i];
    result += GTLCore::String::number(point.x() * _scale) + "," + GTLCore::String::number(point.y() * _scale) + "}";
    if( i != _points.size() - 1)
    {
      result += ",{";
    }
  }
  return result + " }";
}

GTLCore::String CtlBenchmark::sourceCode(const std::vector<PointF>& _points)
{
  GTLCore::String program16Src = "const float lightTable[][2] = " + valueListToCTL( _points, 1.0 ) + ";";
  program16Src += "void apply( float rIn, float gIn, float bIn, float aIn, output float rOut, output float gOut, output float bOut, output float aOut) \
  { \
      rOut = interpolateCubic1D( lightTable, interpolateCubic1D( lightTable, rIn) ); \
      gOut = interpolateCubic1D( lightTable, interpolateCubic1D( lightTable, gIn) ); \
      bOut = interpolateCubic1D( lightTable, interpolateCubic1D( lightTable, bIn) ); \
      aOut = aIn; \
  }";
  return program16Src;
}

class CompileEachTimeCtlBenchmark : public CtlBenchmark {
  public:
    CompileEachTimeCtlBenchmark(const std::vector< PointF >& points, int runCount, GTLCore::Array* array, const GTLCore::Array* refArray  ) : CtlBenchmark(points, runCount, array, refArray ) {}
    virtual ~CompileEachTimeCtlBenchmark() {}
  protected:
    virtual void initialise()
    {
    }
    virtual void run()
    {
      compile();
      apply();
    }
};

class CompileOnceCtlBenchmark : public CtlBenchmark {
  public:
    CompileOnceCtlBenchmark(const std::vector< PointF >& points, int runCount, GTLCore::Array* array, const GTLCore::Array* refArray  ) : CtlBenchmark(points, runCount, array, refArray ) {}
    virtual ~CompileOnceCtlBenchmark() {}
  protected:
    virtual void initialise()
    {
      compile();
    }
    virtual void run()
    {
      apply();
    }
};

void computeRefArray( GTLCore::Array& _array, const std::vector< PointF >& points )
{
  OpenCTL::Module p;
  p.setSource( "benchmark", CtlBenchmark::sourceCode(points) );
  p.compile();
  if(not p.isCompiled())
  {
    std::cout << "Error: " << std::endl << p.compilationMessages().toString() << std::endl;
    abort();
  }
  OpenCTL::Program program( "apply", &p, GTLCore::PixelDescription( GTLCore::Type::Float32, 4) );
  randomArray( &_array );
  program.apply( _array, _array );
}

int main(int argc, char** argv)
{
  GTLCore::String fileName = "";
  for(int ai = 1; ai < argc; ai++)
  {
    if(ARG_IS("-h","--help"))
    {
      printHelp();
      return EXIT_SUCCESS;
    } else if(ARG_IS("-v","--version"))
    {
      printVersion();
      return EXIT_SUCCESS;
    } else {
      if( ai != argc - 1)
      {
        std::cerr << "Invalid command line parameters." << std::endl;
      } else {
        fileName = argv[ai];
      }
    }
  }
  std::vector< PointF > points;
  points.push_back( PointF( 0.0, 0.0) );
  points.push_back( PointF( 0.3, 0.2) );
  points.push_back( PointF( 0.5, 0.5) );
  points.push_back( PointF( 0.6, 0.8) );
  points.push_back( PointF( 1.0, 1.0) );
  
  GTLCore::PixelDescription pd( GTLCore::Type::Float32, 4);
  
  int runs = 100;
  int arraySize = 1000000;
  
  GTLCore::Array array( arraySize * pd.bitsSize() / 8 );
  GTLCore::Array refArray( arraySize * pd.bitsSize() / 8 );
  
  computeRefArray( refArray, points );
  
  {
    std::cout << "Native" << std::endl;
    NativeBenchmark benchmark( points, runs, &array, &refArray );
    benchmark.start();
    benchmark.dump();
  }

  {
    std::cout << "Compile once " << std::endl;
    CompileOnceCtlBenchmark benchmark(points, runs, &array, &refArray);
    benchmark.start();
    benchmark.dump();
  }

  {
    std::cout << "Compile each time for level" << std::endl;
    CompileEachTimeCtlBenchmark benchmark(points, runs, &array, &refArray);
    benchmark.start();
    benchmark.dump();
  }
  
  return EXIT_SUCCESS;
}
