///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2012 DreamWorks Animation LLC
//
// All rights reserved. This software is distributed under the
// Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
//
// Redistributions of source code must retain the above copyright
// and license notice and the following restrictions and disclaimer.
//
// *     Neither the name of DreamWorks Animation nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
// LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
//
///////////////////////////////////////////////////////////////////////////

#ifndef OPENVDB_MATH_MAT4_H_HAS_BEEN_INCLUDED
#define OPENVDB_MATH_MAT4_H_HAS_BEEN_INCLUDED

#include <openvdb/Exceptions.h>
#include <iomanip>
#include <assert.h>
#include <math.h>
#include <algorithm>
#include "Math.h"
#include "Mat3.h"
#include "Vec3.h"
#include "Vec4.h"


namespace openvdb {
OPENVDB_USE_VERSION_NAMESPACE
namespace OPENVDB_VERSION_NAME {
namespace math {

template<typename T> class Vec4;


/// @class Mat4 Mat4.h
/// @brief 4x4 -matrix class.
template<typename T>
class Mat4: public Mat<4, T>
{
public:
    /// Data type held by the matrix.
    typedef T value_type;
    typedef T ValueType;

    /// Trivial constructor, the matrix is NOT initialized
    Mat4() {}

    /// Constructor given array of elements, the ordering is in row major form:
    /** @verbatim
        a[0]  a[1]  a[2]  a[3]
        a[4]  a[5]  a[6]  a[7]
        a[8]  a[9]  a[10] a[11]
        a[12] a[13] a[14] a[15]
        @endverbatim */
    template<typename Source>
    Mat4(Source *a)
    {
        register int i;

        for (i = 0; i < 16; i++) {
            this->mm[i] = a[i];
        }
    }

    /// Constructor given array of elements, the ordering is in row major form:
    /** @verbatim
        a b c d
        e f g h
        i j k l
        m n o p
        @endverbatim */
    template<typename Source>
    Mat4(Source a, Source b, Source c, Source d,
        Source e, Source f, Source g, Source h,
        Source i, Source j, Source k, Source l,
        Source m, Source n, Source o, Source p)
    {
        this->mm[0] = a;
        this->mm[1] = b;
        this->mm[2] = c;
        this->mm[3] = d;

        this->mm[4] = e;
        this->mm[5] = f;
        this->mm[6] = g;
        this->mm[7] = h;

        this->mm[8] = i;
        this->mm[9] = j;
        this->mm[10]= k;
        this->mm[11]= l;

        this->mm[12]= m;
        this->mm[13]= n;
        this->mm[14]= o;
        this->mm[15]= p;
    }

    /// Construct matrix given basis vectors (columns)
    template<typename Source>
    Mat4(const Vec4<Source> &v1, const Vec4<Source> &v2,
         const Vec4<Source> &v3, const Vec4<Source> &v4)
    {
        setBasis(v1, v2, v3, v4);
    }

    /// Copy constructor
    Mat4(const Mat<4, T> &m)
    {
        for (int i=0; i<4; ++i) {
            for (int j=0; j<4; ++j) {
                this->mm[i*4 + j] = m[i][j];
            }
        }
    }

    /// Conversion constructor
    template<typename Source>
    explicit Mat4(const Mat4<Source> &m)
    {
        const Source *src = m.asPointer();

        for (int i=0; i<16; ++i) {
            this->mm[i] = static_cast<T>(src[i]);
        }
    }

    /// Predefined constant for identity matrix
    static const Mat4<T>& identity() {
        return sIdentity;
    }

    /// Predefined constant for zero matrix
    static const Mat4<T>& zero() {
        return sZero;
    }

    /// Set ith row to vector v
    void setRow(int i, const Vec4<T> &v)
    {
        // assert(i>=0 && i<4);
        int i4 = i * 4;
        this->mm[i4+0] = v[0];
        this->mm[i4+1] = v[1];
        this->mm[i4+2] = v[2];
        this->mm[i4+3] = v[3];
    }

    /// Get ith row, e.g.    Vec4f v = m.row(1);
    Vec4<T> row(int i) const
    {
        // assert(i>=0 && i<3);
        return Vec4<T>((*this)(i,0), (*this)(i,1), (*this)(i,2), (*this)(i,3));
    }

    /// Set jth column to vector v
    void setCol(int j, const Vec4<T>& v)
    {
        // assert(j>=0 && j<4);
        this->mm[0+j]  = v[0];
        this->mm[4+j]  = v[1];
        this->mm[8+j]  = v[2];
        this->mm[12+j] = v[3];
    }

    /// Get jth column, e.g.    Vec4f v = m.col(0);
    Vec4<T> col(int j) const
    {
        // assert(j>=0 && j<4);
        return Vec4<T>((*this)(0,j), (*this)(1,j), (*this)(2,j), (*this)(3,j));
    }

    //@{
    /// Array style reference to ith row
    /// e.g.    m[1][3] = 4;
    T* operator[](int i) { return &(this->mm[i<<2]); }
    const T* operator[](int i) const { return &(this->mm[i<<2]); }
    //@}

    /// For certain legacy APIs, we need to treat the contents as a pointer.
    /// @remark This was originally designed as an "operator T*()", but that
    /// automatic pointer cast would be applied in unexpected cases, such as
    /// std::min/max.
#ifdef  ALLOW_CAST_TO_POINTER
    /// Cast to T*
    operator T*()
#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
            __attribute__ ((deprecated))
#endif
            {return this->mm;}
    operator const T*() const
#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
            __attribute__ ((deprecated))
#endif
            {return this->mm;}
#endif
    T* asPointer() {return this->mm;}
    const T* asPointer() const {return this->mm;}

    /// Alternative indexed reference to the elements
    /// Note that the indices are row first and column second.
    /// e.g.    m(0,0) = 1;
    T& operator()(int i, int j)
    {
        // assert(i>=0 && i<4);
        // assert(j>=0 && j<4);
        return this->mm[4*i+j];
    }

    /// Alternative indexed constant reference to the elements,
    /// Note that the indices are row first and column second.
    /// e.g.    float f = m(1,0);
    T operator()(int i, int j) const
    {
        // assert(i>=0 && i<4);
        // assert(j>=0 && j<4);
        return this->mm[4*i+j];
    }

    /// Set the columns of "this" matrix to the vectors v1, v2, v3, v4
    void setBasis(const Vec4<T> &v1, const Vec4<T> &v2,
                         const Vec4<T> &v3, const Vec4<T> &v4)
    {
        this->mm[0 ] = v1[0];
        this->mm[1 ] = v1[1];
        this->mm[2 ] = v1[2];
        this->mm[3 ] = v1[3];
        this->mm[4 ] = v2[0];
        this->mm[5 ] = v2[1];
        this->mm[6 ] = v2[2];
        this->mm[7 ] = v2[3];
        this->mm[8 ] = v3[0];
        this->mm[9 ] = v3[1];
        this->mm[10] = v3[2];
        this->mm[11] = v3[3];
        this->mm[12] = v4[0];
        this->mm[13] = v4[1];
        this->mm[14] = v4[2];
        this->mm[15] = v4[3];
    }


    // Set "this" matrix to zero
    void setZero()
    {
        this->mm[0 ] = 0;
        this->mm[1 ] = 0;
        this->mm[2 ] = 0;
        this->mm[3 ] = 0;
        this->mm[4 ] = 0;
        this->mm[5 ] = 0;
        this->mm[6 ] = 0;
        this->mm[7 ] = 0;
        this->mm[8 ] = 0;
        this->mm[9 ] = 0;
        this->mm[10] = 0;
        this->mm[11] = 0;
        this->mm[12] = 0;
        this->mm[13] = 0;
        this->mm[14] = 0;
        this->mm[15] = 0;
    }

    /// Set "this" matrix to identity
    void setIdentity()
    {
        this->mm[0 ] = 1;
        this->mm[1 ] = 0;
        this->mm[2 ] = 0;
        this->mm[3 ] = 0;
        this->mm[4 ] = 0;
        this->mm[5 ] = 1;
        this->mm[6 ] = 0;
        this->mm[7 ] = 0;
        this->mm[8 ] = 0;
        this->mm[9 ] = 0;
        this->mm[10] = 1;
        this->mm[11] = 0;
        this->mm[12] = 0;
        this->mm[13] = 0;
        this->mm[14] = 0;
        this->mm[15] = 1;
    }


    /// Set upper left to a Mat3
    void setMat3(const Mat3<T> &m)
    {
        for (int i = 0; i < 3; i++)
            for (int j=0; j < 3; j++)
                this->mm[i*4+j] = m[i][j];
    }

    Mat3<T> getMat3() const
    {
        Mat3<T> m;

        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
                m[i][j] = this->mm[i*4+j];

        return m;
    }

    /// Return the translation component
    Vec3<T> getTranslation() const
    {
        return Vec3<T>(this->mm[12],this->mm[13],this->mm[14]);
    }

    void setTranslation(const Vec3<T> &t)
    {
        this->mm[12] = t[0];
        this->mm[13] = t[1];
        this->mm[14] = t[2];
    }

    /// Assignment operator
    template<typename Source>
    const Mat4& operator=(const Mat4<Source> &m)
    {
        const Source *src = m.asPointer();

        // don't suppress warnings when assigning from different numerical types
        std::copy(src, (src + this->numElements()), this->mm);
        return *this;
    }

    /// Test if "this" is equivalent to m with tolerance of eps value
    bool eq(const Mat4 &m, T eps=1.0e-8) const
    {
        for (int i = 0; i < 16; i++) {
            if (!isApproxEqual(this->mm[i],m.mm[i],eps))
                return false;
        }
        return true;
    }

    /// Negation operator, for e.g.   m1 = -m2;
    Mat4<T> operator-() const
    {
        return Mat4<T>(
            -this->mm[0], -this->mm[1], -this->mm[2], -this->mm[3],
            -this->mm[4], -this->mm[5], -this->mm[6], -this->mm[7],
            -this->mm[8], -this->mm[9], -this->mm[10],-this->mm[11],
            -this->mm[12],-this->mm[13],-this->mm[14],-this->mm[15]);
    } // trivial

    /// this = m1 + m2
    /// @note m1, m2 and this need not be distinct objects, e.g., m1.add(m1, m2)
    template <typename T0, typename T1>
    const Mat4& add(const Mat4<T0> &m1, const Mat4<T1> &m2)
    {
        const T0 *src1 = m1.asPointer();
        const T1 *src2 = m2.asPointer();
        this->mm[0] = src1[0] + src2[0];
        this->mm[1] = src1[1] + src2[1];
        this->mm[2] = src1[2] + src2[2];
        this->mm[3] = src1[3] + src2[3];

        this->mm[4] = src1[4] + src2[4];
        this->mm[5] = src1[5] + src2[5];
        this->mm[6] = src1[6] + src2[6];
        this->mm[7] = src1[7] + src2[7];

        this->mm[8] = src1[8] + src2[8];
        this->mm[9] = src1[9] + src2[9];
        this->mm[10] = src1[10] + src2[10];
        this->mm[11] = src1[11] + src2[11];

        this->mm[12] = src1[12] + src2[12];
        this->mm[13] = src1[13] + src2[13];
        this->mm[14] = src1[14] + src2[14];
        this->mm[15] = src1[15] + src2[15];
        return *this;
    }

    /// this = m1 - m2
    /// m1 and m2 need not be distinct objects than "this", e.g. m1.sub(m1,m2);
    template <typename T0, typename T1>
    const Mat4& sub(const Mat4<T0> &m1, const Mat4<T1> &m2)
    {
        const T0 *src1 = m1.asPointer();
        const T1 *src2 = m2.asPointer();
        this->mm[0] = src1[0] - src2[0];
        this->mm[1] = src1[1] - src2[1];
        this->mm[2] = src1[2] - src2[2];
        this->mm[3] = src1[3] - src2[3];

        this->mm[4] = src1[4] - src2[4];
        this->mm[5] = src1[5] - src2[5];
        this->mm[6] = src1[6] - src2[6];
        this->mm[7] = src1[7] - src2[7];

        this->mm[8] = src1[8] - src2[8];
        this->mm[9] = src1[9] - src2[9];
        this->mm[10] = src1[10] - src2[10];
        this->mm[11] = src1[11] - src2[11];

        this->mm[12] = src1[12] - src2[12];
        this->mm[13] = src1[13] - src2[13];
        this->mm[14] = src1[14] - src2[14];
        this->mm[15] = src1[15] - src2[15];
        return *this;
    }

    /// this = m * scalar
    template <typename T0, typename T1>
    const Mat4& scale(T0 scalar, const Mat4<T1> &m)
    {
        const T1 *src = m.asPointer();
        this->mm[0] = scalar * src[0];
        this->mm[1] = scalar * src[1];
        this->mm[2] = scalar * src[2];
        this->mm[3] = scalar * src[3];

        this->mm[4] = scalar * src[4];
        this->mm[5] = scalar * src[5];
        this->mm[6] = scalar * src[6];
        this->mm[7] = scalar * src[7];

        this->mm[8] = scalar * src[8];
        this->mm[9] = scalar * src[9];
        this->mm[10] = scalar * src[10];
        this->mm[11] = scalar * src[11];

        this->mm[12] = scalar * src[12];
        this->mm[13] = scalar * src[13];
        this->mm[14] = scalar * src[14];
        this->mm[15] = scalar * src[15];
        return *this;
    }

    /// this = m1 * m2
    /// @note m1 and m2 must not be the same object as this.
    template <typename T0, typename T1>
    const Mat4& mult(const Mat4<T0> &m1, const Mat4<T1> &m2)
    {
        register int  i, i4;
        const T0 *src1 = m1.asPointer();
        const T1 *src2 = m2.asPointer();

        for (i = 0; i < 4; i++) {
            i4 = 4 * i;
            this->mm[i4+0] = static_cast<T>(src1[i4+0]*src2[4*0+0] +
                                    src1[i4+1]*src2[4*1+0] +
                                    src1[i4+2]*src2[4*2+0] +
                                    src1[i4+3]*src2[4*3+0]);

            this->mm[i4+1] = static_cast<T>(src1[i4+0]*src2[4*0+1] +
                                    src1[i4+1]*src2[4*1+1] +
                                    src1[i4+2]*src2[4*2+1] +
                                    src1[i4+3]*src2[4*3+1]);

            this->mm[i4+2] = static_cast<T>(src1[i4+0]*src2[4*0+2] +
                                    src1[i4+1]*src2[4*1+2] +
                                    src1[i4+2]*src2[4*2+2] +
                                    src1[i4+3]*src2[4*3+2]);

            this->mm[i4+3] = static_cast<T>(src1[i4+0]*src2[4*0+3] +
                                    src1[i4+1]*src2[4*1+3] +
                                    src1[i4+2]*src2[4*2+3] +
                                    src1[i4+3]*src2[4*3+3]);
        }

        return *this;
    }

    /// @brief Returns m, where \f$m_{i,j} *= scalar\f$ for \f$i, j \in [0, 3]\f$
    template <typename S>
    const Mat4<T>& operator*=(S scalar)
    {
        this->mm[0] *= scalar;
        this->mm[1] *= scalar;
        this->mm[2] *= scalar;
        this->mm[3] *= scalar;

        this->mm[4] *= scalar;
        this->mm[5] *= scalar;
        this->mm[6] *= scalar;
        this->mm[7] *= scalar;

        this->mm[8] *= scalar;
        this->mm[9] *= scalar;
        this->mm[10] *= scalar;
        this->mm[11] *= scalar;

        this->mm[12] *= scalar;
        this->mm[13] *= scalar;
        this->mm[14] *= scalar;
        this->mm[15] *= scalar;
        return *this;
    }

    /// @brief Returns m0, where \f$m0_{i,j} += m1_{i,j}\f$ for \f$i, j \in [0, 3]\f$
    template <typename S>
    const Mat4<T> &operator+=(const Mat4<S> &m1)
    {
        const S* s = m1.asPointer();

        this->mm[0] += s[0];
        this->mm[1] += s[1];
        this->mm[2] += s[2];
        this->mm[3] += s[3];

        this->mm[4] += s[4];
        this->mm[5] += s[5];
        this->mm[6] += s[6];
        this->mm[7] += s[7];

        this->mm[8] += s[8];
        this->mm[9] += s[9];
        this->mm[10] += s[10];
        this->mm[11] += s[11];

        this->mm[12] += s[12];
        this->mm[13] += s[13];
        this->mm[14] += s[14];
        this->mm[15] += s[15];

        return *this;
    }

    /// @brief Returns m0, where \f$m0_{i,j} -= m1_{i,j}\f$ for \f$i, j \in [0, 3]\f$
    template <typename S>
    const Mat4<T> &operator-=(const Mat4<S> &m1)
    {
        const S* s = m1.asPointer();

        this->mm[0] -= s[0];
        this->mm[1] -= s[1];
        this->mm[2] -= s[2];
        this->mm[3] -= s[3];

        this->mm[4] -= s[4];
        this->mm[5] -= s[5];
        this->mm[6] -= s[6];
        this->mm[7] -= s[7];

        this->mm[8] -= s[8];
        this->mm[9] -= s[9];
        this->mm[10] -= s[10];
        this->mm[11] -= s[11];

        this->mm[12] -= s[12];
        this->mm[13] -= s[13];
        this->mm[14] -= s[14];
        this->mm[15] -= s[15];

        return *this;
    }

    /// @brief Returns m, where \f$m_{i,j} = \sum_{k} m0_{i,k}*m1_{k,j}\f$ for \f$i, j \in [0, 3]\f$
    template <typename S>
    const Mat4<T> &operator*=(const Mat4<S> &m1)
    {
        Mat4<T> m0(*this);

        const T* s0 = m0.asPointer();
        const S* s1 = m1.asPointer();

        for (int i = 0; i < 4; i++) {
            int i4 = 4 * i;
            this->mm[i4+0] = static_cast<T>(s0[i4+0]*s1[0] +
                                            s0[i4+1]*s1[4] +
                                            s0[i4+2]*s1[8] +
                                            s0[i4+3]*s1[12]);

            this->mm[i4+1] = static_cast<T>(s0[i4+0]*s1[1] +
                                            s0[i4+1]*s1[5] +
                                            s0[i4+2]*s1[9] +
                                            s0[i4+3]*s1[13]);

            this->mm[i4+2] = static_cast<T>(s0[i4+0]*s1[2] +
                                            s0[i4+1]*s1[6] +
                                            s0[i4+2]*s1[10] +
                                            s0[i4+3]*s1[14]);

            this->mm[i4+3] = static_cast<T>(s0[i4+0]*s1[3] +
                                            s0[i4+1]*s1[7] +
                                            s0[i4+2]*s1[11] +
                                            s0[i4+3]*s1[15]);
        }
        return *this;
    }

    /// @return transpose of this
    Mat4 transpose() const
    {
        return Mat4<T>(
            this->mm[0], this->mm[4], this->mm[8], this->mm[12],
            this->mm[1], this->mm[5], this->mm[9], this->mm[13],
            this->mm[2], this->mm[6], this->mm[10], this->mm[14],
            this->mm[3], this->mm[7], this->mm[11], this->mm[15]);
    }


    /// @return inverse of this
    /// @throw ArithmeticError if singular
    Mat4 inverse(T tolerance = 0) const
    {
        //
        // inv [ A  | b ]  =  [ E  | f ]    A: 3x3, b: 3x1, c': 1x3 d: 1x1
        //     [ c' | d ]     [ g' | h ]
        //
        // If A is invertible use
        //
        //   E  = A^-1 + p*h*r
        //   p  = A^-1 * b
        //   f  = -p * h
        //   g' = -h * c'
        //   h  = 1 / (d - c'*p)
        //   r' = c'*A^-1
        //
        // Otherwise use gauss-jordan elimination
        //

        //
        // We create this alias to ourself so we can easily use own subscript
        // operator.
        const Mat4<T>& m(*this);

        T m0011 = m[0][0] * m[1][1];
        T m0012 = m[0][0] * m[1][2];
        T m0110 = m[0][1] * m[1][0];
        T m0210 = m[0][2] * m[1][0];
        T m0120 = m[0][1] * m[2][0];
        T m0220 = m[0][2] * m[2][0];

        T detA = m0011 * m[2][2] - m0012 * m[2][1] - m0110 * m[2][2]
               + m0210 * m[2][1] + m0120 * m[1][2] - m0220 * m[1][1];

        bool hasPerspective =
                (!isExactlyEqual(m[0][3], T(0.0)) ||
                 !isExactlyEqual(m[1][3], T(0.0)) ||
                 !isExactlyEqual(m[2][3], T(0.0)) ||
                 !isExactlyEqual(m[3][3], T(1.0)));

        T det;
        if (hasPerspective) {
            det = m[0][3] * det3(m, 1,2,3, 0,2,1)
                + m[1][3] * det3(m, 2,0,3, 0,2,1)
                + m[2][3] * det3(m, 3,0,1, 0,2,1)
                + m[3][3] * detA;
        } else {
            det = detA * m[3][3];
        }

        Mat4<T> inv;
        bool invertible;

        if (isApproxEqual(det,T(0.0),tolerance)) {
            invertible = false;

        } else if (isApproxEqual(detA,T(0.0),T(1e-8))) {
            // det is too small to rely on inversion by subblocks
            invertible = m.invert(inv, tolerance);

        } else {
            invertible = true;
            detA = 1.0 / detA;

            //
            // Calculate A^-1
            //
            inv[0][0] = detA * ( m[1][1] * m[2][2] - m[1][2] * m[2][1]);
            inv[0][1] = detA * (-m[0][1] * m[2][2] + m[0][2] * m[2][1]);
            inv[0][2] = detA * ( m[0][1] * m[1][2] - m[0][2] * m[1][1]);

            inv[1][0] = detA * (-m[1][0] * m[2][2] + m[1][2] * m[2][0]);
            inv[1][1] = detA * ( m[0][0] * m[2][2] - m0220);
            inv[1][2] = detA * ( m0210   - m0012);

            inv[2][0] = detA * ( m[1][0] * m[2][1] - m[1][1] * m[2][0]);
            inv[2][1] = detA * ( m0120 - m[0][0] * m[2][1]);
            inv[2][2] = detA * ( m0011 - m0110);

            if (hasPerspective) {
                //
                // Calculate r, p, and h
                //
                Vec3<T> r;
                r[0] = m[3][0] * inv[0][0] + m[3][1] * inv[1][0]
                     + m[3][2] * inv[2][0];
                r[1] = m[3][0] * inv[0][1] + m[3][1] * inv[1][1]
                     + m[3][2] * inv[2][1];
                r[2] = m[3][0] * inv[0][2] + m[3][1] * inv[1][2]
                     + m[3][2] * inv[2][2];

                Vec3<T> p;
                p[0] = inv[0][0] * m[0][3] + inv[0][1] * m[1][3]
                     + inv[0][2] * m[2][3];
                p[1] = inv[1][0] * m[0][3] + inv[1][1] * m[1][3]
                     + inv[1][2] * m[2][3];
                p[2] = inv[2][0] * m[0][3] + inv[2][1] * m[1][3]
                     + inv[2][2] * m[2][3];

                T h = m[3][3] - p.dot(Vec3<T>(m[3][0],m[3][1],m[3][2]));
                if (isApproxEqual(h,T(0.0),tolerance)) {
                    invertible = false;

                } else {
                    h = 1.0 / h;

                    //
                    // Calculate h, g, and f
                    //
                    inv[3][3] = h;
                    inv[3][0] = -h * r[0];
                    inv[3][1] = -h * r[1];
                    inv[3][2] = -h * r[2];

                    inv[0][3] = -h * p[0];
                    inv[1][3] = -h * p[1];
                    inv[2][3] = -h * p[2];

                    //
                    // Calculate E
                    //
                    p *= h;
                    inv[0][0] += p[0] * r[0];
                    inv[0][1] += p[0] * r[1];
                    inv[0][2] += p[0] * r[2];
                    inv[1][0] += p[1] * r[0];
                    inv[1][1] += p[1] * r[1];
                    inv[1][2] += p[1] * r[2];
                    inv[2][0] += p[2] * r[0];
                    inv[2][1] += p[2] * r[1];
                    inv[2][2] += p[2] * r[2];
                }
            } else {
                // Equations are much simpler in the non-perspective case
                inv[3][0] = - (m[3][0] * inv[0][0] + m[3][1] * inv[1][0]
                                + m[3][2] * inv[2][0]);
                inv[3][1] = - (m[3][0] * inv[0][1] + m[3][1] * inv[1][1]
                                + m[3][2] * inv[2][1]);
                inv[3][2] = - (m[3][0] * inv[0][2] + m[3][1] * inv[1][2]
                                + m[3][2] * inv[2][2]);
                inv[0][3] = 0.0;
                inv[1][3] = 0.0;
                inv[2][3] = 0.0;
                inv[3][3] = 1.0;
            }
        }

        if (!invertible) OPENVDB_THROW(ArithmeticError, "Inversion of singular 4x4 matrix");
        return inv;
    }


    /// Determinant of matrix
    T det() const
    {
        const T *ap;
        Mat3<T> submat;
        T       det;
        T       *sp;
        int     i, j, k, sign;

        det = 0;
        sign = 1;
        for (i = 0; i < 4; i++) {
            ap = &this->mm[0];
            sp = submat.asPointer();
            for (j = 0; j < 4; j++) {
                for (k = 0; k < 4; k++) {
                    if ((k != i) && (j != 0)) {
                        *sp++ = *ap;
                    }
                    ap++;
                }
            }

            det += sign * this->mm[i] * submat.det();
            sign = -sign;
        }

        return det;
    }

    /// This function snaps a specific axis to a specific direction,
    /// preserving scaling. It does this using minimum energy, thus
    /// posing a unique solution if basis & direction arent parralel.
    /// Direction need not be unit.
    Mat4 snapBasis(Axis axis, const Vec3<T> &direction)
    {return snapBasis(*this, axis, direction);}

    /// Sets the matrix to a matrix that translates by v
    static Mat4 translation(const Vec3d& v)
    {
        return Mat4(
            T(1),     T(0),    T(0),     T(0),
            T(0),     T(1),    T(0),     T(0),
            T(0),     T(0),    T(1),     T(0),
            T(v.x()), T(v.y()),T(v.z()), T(1));
    }

    /// Sets the matrix to a matrix that translates by v
    template <typename T0>
    void setToTranslation(const Vec3<T0>& v)
    {
        this->mm[0] = 1;
        this->mm[1] = 0;
        this->mm[2] = 0;
        this->mm[3] = 0;

        this->mm[4] = 0;
        this->mm[5] = 1;
        this->mm[6] = 0;
        this->mm[7] = 0;

        this->mm[8]  = 0;
        this->mm[9]  = 0;
        this->mm[10] = 1;
        this->mm[11] = 0;

        this->mm[12] = v.x();
        this->mm[13] = v.y();
        this->mm[14] = v.z();
        this->mm[15] = 1;
    }

    /// Accumulates a translation of v into the matrix
    template <typename T0>
    void accumTranslation(const Vec3<T0>& v)
    {
        this->mm[12] += v.dot(Vec3<T>(this->mm[0], this->mm[4], this->mm[8]));
        this->mm[13] += v.dot(Vec3<T>(this->mm[1], this->mm[5], this->mm[9]));
        this->mm[14] += v.dot(Vec3<T>(this->mm[2], this->mm[6], this->mm[10]));
        this->mm[15] += v.dot(Vec3<T>(this->mm[3], this->mm[7], this->mm[11]));
    }

    /// Sets the matrix to a matrix that scales by v
    template <typename T0>
    void setToScale(const Vec3<T0>& v)
    {
        this->setIdentity();
        this->mm[0] = v.x();
        this->mm[5] = v.y();
        this->mm[10] = v.z();
    }

    /// Accumulates a scaling of v into the matrix
    template <typename T0>
    void accumScale(const Vec3<T0>& v)
    {
        this->mm[0] *= v.x();
        this->mm[1] *= v.x();
        this->mm[2] *= v.x();
        this->mm[3] *= v.x();

        this->mm[4] *= v.y();
        this->mm[5] *= v.y();
        this->mm[6] *= v.y();
        this->mm[7] *= v.y();

        this->mm[8]  *= v.z();
        this->mm[9]  *= v.z();
        this->mm[10] *= v.z();
        this->mm[11] *= v.z();
    }

    /// @brief Sets the matrix to a rotation about the given axis.
    /// @param axis The axis (one of X, Y, Z) to rotate about.
    /// @param angle The rotation angle, in radians.
    void setToRotation(Axis axis, T angle) {*this = rotation<Mat4<T> >(axis, angle);}

    /// @brief Sets the matrix to a rotation about an arbitrary axis
    /// @param axis The axis of rotation (cannot be zero-length)
    /// @param angle The rotation angle, in radians.
    void setToRotation(const Vec3<T>& axis, T angle) {*this = rotation<Mat4<T> >(axis, angle);}

    /// @brief Sets the matrix to a rotation that maps v1 onto v2 about the cross
    /// product of v1 and v2.
    void setToRotation(const Vec3<T>& v1, const Vec3<T>& v2) {*this = rotation<Mat4<T> >(v1, v2);}

    /// @brief Accumulates a rotation about the given axis into this matrix.
    /// @param axis The axis (one of X, Y, Z) of rotation.
    /// @param angle The rotation angle, in radians.
    void accumRotation(Axis axis, T angle)
    {
        T c = static_cast<T>(cos(angle));
        T s = static_cast<T>(sin(angle));

        T a10, a11, a12;
        T a00, a01, a02;

        switch (axis) {
        case X_AXIS:
            a10 = c*this->mm[4] + s*this->mm[8];
            a11 = c*this->mm[5] + s*this->mm[9];
            a12 = c*this->mm[6] + s*this->mm[10];

            this->mm[8]  = -s*this->mm[4] + c*this->mm[8];
            this->mm[9]  = -s*this->mm[5] + c*this->mm[9];
            this->mm[10] = -s*this->mm[6] + c*this->mm[10];

            this->mm[4] = a10;
            this->mm[5] = a11;
            this->mm[6] = a12;

            break;

        case Y_AXIS:
            a00 = c*this->mm[0] - s*this->mm[8];
            a01 = c*this->mm[1] - s*this->mm[9];
            a02 = c*this->mm[2] - s*this->mm[10];

            this->mm[8]  = s*this->mm[0] + c*this->mm[8];
            this->mm[9]  = s*this->mm[1] + c*this->mm[9];
            this->mm[10] = s*this->mm[2] + c*this->mm[10];

            this->mm[0] = a00;
            this->mm[1] = a01;
            this->mm[2] = a02;

            break;

        case Z_AXIS:
            a00 = c*this->mm[0] + s*this->mm[4];
            a01 = c*this->mm[1] + s*this->mm[5];
            a02 = c*this->mm[2] + s*this->mm[6];

            this->mm[4] = -s*this->mm[0] + c*this->mm[4];
            this->mm[5] = -s*this->mm[1] + c*this->mm[5];
            this->mm[6] = -s*this->mm[2] + c*this->mm[6];

            this->mm[0] = a00;
            this->mm[1] = a01;
            this->mm[2] = a02;

            break;

        default:
            assert(axis==X_AXIS || axis==Y_AXIS || axis==Z_AXIS);
        }
    }

    /// @brief Sets the matrix to a shear along axis0 by a fraction of axis1.
    /// @param axis0 The fixed axis of the shear.
    /// @param axis1 The shear axis.
    /// @param shearby The shear factor.
    void setToShear(Axis axis0, Axis axis1, T shearby)
    {
        *this = shear<Mat4<T> >(axis0, axis1, shearby);
    }

    /// @brief Accumulates a shearing transformation into the matrix.
    /// @see setToShear
    void accumShear(Axis axis0, Axis axis1, T shear)
    {
        int index0 = static_cast<int>(axis0);
        int index1 = static_cast<int>(axis1);

        this->mm[index1*4]     += shear*this->mm[index0*4];
        this->mm[index1*4 + 1] += shear*this->mm[index0*4 + 1];
        this->mm[index1*4 + 2] += shear*this->mm[index0*4 + 2];
        this->mm[index1*4 + 3] += shear*this->mm[index0*4 + 3];
    }


    /// Transform a Vec4 by post-multiplication.
    template<typename T0>
    Vec4<T0> transform(const Vec4<T0> &v) const
    {
        return static_cast< Vec4<T0> >(v * *this);
    }

    /// Transform a Vec3 by post-multiplication, without homogenous division.
    template<typename T0>
    Vec3<T0> transform(const Vec3<T0> &v) const
    {
        return static_cast< Vec3<T0> >(v * *this);
    }

    /// Transform a Vec4 by pre-multiplication.
    template<typename T0>
    Vec4<T0> pretransform(const Vec4<T0> &v) const
    {
        return static_cast< Vec4<T0> >(*this * v);
    }

    /// Transform a Vec3 by pre-multiplication, without homogenous division.
    template<typename T0>
    Vec3<T0> pretransform(const Vec3<T0> &v) const
    {
        return static_cast< Vec3<T0> >(*this * v);
    }

    /// Transform a Vec3 by post-multiplication, doing homogenous divison.
    template<typename T0>
    Vec3<T0> transformH(const Vec3<T0> &p) const
    {
        T0  w;

        // w = p * (*this).col(3);
        w = p[0]*this->mm[3] + p[1]*this->mm[7] + p[2]*this->mm[11] + this->mm[15];

        if ( !isExactlyEqual(w , 0.0) ) {
            return Vec3<T0>(static_cast<T0>((p[0]*this->mm[0] + p[1]*this->mm[4] +
                                            p[2]*this->mm[8] + this->mm[12]) / w),
                            static_cast<T0>((p[0]*this->mm[1] + p[1]*this->mm[5] +
                                            p[2]*this->mm[9] + this->mm[13]) / w),
                            static_cast<T0>((p[0]*this->mm[2]  + p[1]*this->mm[6] +
                                            p[2]*this->mm[10] + this->mm[14]) / w));
        }

        return Vec3<T0>(0, 0, 0);
    }

    /// Transform a Vec3 by pre-multiplication, doing homogenous division.
    template<typename T0>
    Vec3<T0> pretransformH(const Vec3<T0> &p) const
    {
        T0  w;

        // w = p * (*this).col(3);
        w = p[0]*this->mm[12] + p[1]*this->mm[13] + p[2]*this->mm[14] + this->mm[15];

        if ( !isExactlyEqual(w , 0.0) ) {
            return Vec3<T0>(static_cast<T0>((p[0]*this->mm[0] + p[1]*this->mm[1] +
                                            p[2]*this->mm[2] + this->mm[3]) / w),
                            static_cast<T0>((p[0]*this->mm[4] + p[1]*this->mm[5] +
                                            p[2]*this->mm[6] + this->mm[7]) / w),
                            static_cast<T0>((p[0]*this->mm[8]  + p[1]*this->mm[9] +
                                            p[2]*this->mm[10] + this->mm[11]) / w));
        }

        return Vec3<T0>(0, 0, 0);
    }

    /// Transform a Vec3 by post-multiplication, without translation.
    template<typename T0>
    Vec3<T0> transform3x3(const Vec3<T0> &v) const
    {
        return Vec3<T0>(
            static_cast<T0>(v[0]*this->mm[0] + v[1]*this->mm[4] + v[2]*this->mm[8]),
            static_cast<T0>(v[0]*this->mm[1] + v[1]*this->mm[5] + v[2]*this->mm[9]),
            static_cast<T0>(v[0]*this->mm[2] + v[1]*this->mm[6] + v[2]*this->mm[10]));
    }


private:
    bool invert(Mat4<T> &inverse, T tolerance) const;

    T det2(const Mat4<T> &a, int i0, int i1, int j0, int j1) const {
        int i0row = i0 * 4;
        int i1row = i1 * 4;
        return a.mm[i0row+j0]*a.mm[i1row+j1] - a.mm[i0row+j1]*a.mm[i1row+j0];
    }

    T det3(const Mat4<T> &a, int i0, int i1, int i2,
           int j0, int j1, int j2) const {
        int i0row = i0 * 4;
        return a.mm[i0row+j0]*det2(a, i1,i2, j1,j2) +
            a.mm[i0row+j1]*det2(a, i1,i2, j2,j0) +
            a.mm[i0row+j2]*det2(a, i1,i2, j0,j1);
    }

    static const Mat4<T> sIdentity;
    static const Mat4<T> sZero;
}; // class Mat4


template <typename T>
const Mat4<T> Mat4<T>::sIdentity = Mat4<T>(1, 0, 0, 0,
                                           0, 1, 0, 0,
                                           0, 0, 1, 0,
                                           0, 0, 0, 1);

template <typename T>
const Mat4<T> Mat4<T>::sZero = Mat4<T>(0, 0, 0, 0,
                                       0, 0, 0, 0,
                                       0, 0, 0, 0,
                                       0, 0, 0, 0);

/// @relates Mat4
/// @brief Equality operator, does exact floating point comparisons
template <typename T0, typename T1>
bool operator==(const Mat4<T0> &m0, const Mat4<T1> &m1)
{
    const T0 *t0 = m0.asPointer();
    const T1 *t1 = m1.asPointer();

    for (int i=0; i<16; ++i) if (!isExactlyEqual(t0[i], t1[i])) return false;
    return true;
}

/// @relates Mat4
/// @brief Inequality operator, does exact floating point comparisons
template <typename T0, typename T1>
bool operator!=(const Mat4<T0> &m0, const Mat4<T1> &m1) { return !(m0 == m1); }

/// @relates Mat4
/// @brief Returns M, where \f$M_{i,j} = m_{i,j} * scalar\f$ for \f$i, j \in [0, 3]\f$
template <typename S, typename T>
Mat4<typename promote<S, T>::type> operator*(S scalar, const Mat4<T> &m)
{
    return m*scalar;
}

/// @relates Mat4
/// @brief Returns M, where \f$M_{i,j} = m_{i,j} * scalar\f$ for \f$i, j \in [0, 3]\f$
template <typename S, typename T>
Mat4<typename promote<S, T>::type> operator*(const Mat4<T> &m, S scalar)
{
    Mat4<typename promote<S, T>::type> result(m);
    result *= scalar;
    return result;
}

/// @relates Mat4
/// @brief Returns v, where \f$v_{i} = \sum_{n=0}^3 m_{i,n} * v_n \f$ for \f$i \in [0, 3]\f$
template<typename T, typename MT>
inline Vec4<typename promote<T, MT>::type>
operator*(const Mat4<MT> &_m,
          const Vec4<T> &_v)
{
    MT const *m = _m.asPointer();
    return Vec4<typename promote<T, MT>::type>(
        _v[0]*m[0]  + _v[1]*m[1]  + _v[2]*m[2]  + _v[3]*m[3],
        _v[0]*m[4]  + _v[1]*m[5]  + _v[2]*m[6]  + _v[3]*m[7],
        _v[0]*m[8]  + _v[1]*m[9]  + _v[2]*m[10] + _v[3]*m[11],
        _v[0]*m[12] + _v[1]*m[13] + _v[2]*m[14] + _v[3]*m[15]);
}

/// @relates Mat4
/// @brief Returns v, where \f$v_{i} = \sum_{n=0}^3 m_{n,i} * v_n \f$ for \f$i \in [0, 3]\f$
template<typename T, typename MT>
inline Vec4<typename promote<T, MT>::type>
operator*(const Vec4<T> &_v,
          const Mat4<MT> &_m)
{
    MT const *m = _m.asPointer();
    return Vec4<typename promote<T, MT>::type>(
        _v[0]*m[0] + _v[1]*m[4] + _v[2]*m[8]  + _v[3]*m[12],
        _v[0]*m[1] + _v[1]*m[5] + _v[2]*m[9]  + _v[3]*m[13],
        _v[0]*m[2] + _v[1]*m[6] + _v[2]*m[10] + _v[3]*m[14],
        _v[0]*m[3] + _v[1]*m[7] + _v[2]*m[11] + _v[3]*m[15]);
}

/// @relates Mat4
/// @brief Returns v, where
///     \f$v_{i} = \sum_{n=0}^3\left(m_{i,n} * v_n + m_{i,3}\right)\f$ for \f$i \in [0, 2]\f$
template<typename T, typename MT>
inline Vec3<typename promote<T, MT>::type>
operator*(const Mat4<MT> &_m,
          const Vec3<T> &_v)
{
    MT const *m = _m.asPointer();
    return Vec3<typename promote<T, MT>::type>(
        _v[0]*m[0] + _v[1]*m[1] + _v[2]*m[2]  + m[3],
        _v[0]*m[4] + _v[1]*m[5] + _v[2]*m[6]  + m[7],
        _v[0]*m[8] + _v[1]*m[9] + _v[2]*m[10] + m[11]);
}

/// @relates Mat4
/// @brief Returns v, where
///     \f$v_{i} = \sum_{n=0}^3\left(m_{n,i} * v_n + m_{3,i}\right)\f$ for \f$i \in [0, 2]\f$
template<typename T, typename MT>
inline Vec3<typename promote<T, MT>::type>
operator*(const Vec3<T> &_v,
          const Mat4<MT> &_m)
{
    MT const *m = _m.asPointer();
    return Vec3<typename promote<T, MT>::type>(
        _v[0]*m[0] + _v[1]*m[4] + _v[2]*m[8]  + m[12],
        _v[0]*m[1] + _v[1]*m[5] + _v[2]*m[9]  + m[13],
        _v[0]*m[2] + _v[1]*m[6] + _v[2]*m[10] + m[14]);
}

/// @relates Mat4
/// @brief Returns M, where  \f$M_{i,j} = m0_{i,j} + m1_{i,j}\f$ for \f$i, j \in [0, 3]\f$
template <typename T0, typename T1>
Mat4<typename promote<T0, T1>::type>
operator+(const Mat4<T0> &m0, const Mat4<T1> &m1)
{
    Mat4<typename promote<T0, T1>::type> result(m0);
    result += m1;
    return result;
}

/// @relates Mat4
/// @brief Returns M, where  \f$M_{i,j} = m0_{i,j} - m1_{i,j}\f$ for \f$i, j \in [0, 3]\f$
template <typename T0, typename T1>
Mat4<typename promote<T0, T1>::type>
operator-(const Mat4<T0> &m0, const Mat4<T1> &m1)
{
    Mat4<typename promote<T0, T1>::type> result(m0);
    result -= m1;
    return result;
}

/// @relates Mat4
/// @brief Returns M, where
///     \f$M_{ij} = \sum_{n=0}^3\left(m0_{nj} + m1_{in}\right)\f$ for \f$i, j \in [0, 3]\f$
template <typename T0, typename T1>
Mat4<typename promote<T0, T1>::type>
operator*(const Mat4<T0> &m0, const Mat4<T1> &m1)
{
    Mat4<typename promote<T0, T1>::type> result(m0);
    result *= m1;
    return result;
}


/// Transform a Vec3 by pre-multiplication, without translation.
/// Presumes this matrix is inverse of coordinate transform
/// Synonymous to "pretransform3x3"
template<typename T0, typename T1>
Vec3<T1> transformNormal(const Mat4<T0> &m, const Vec3<T1> &n)
{
    return Vec3<T1>(
        static_cast<T1>(m[0][0]*n[0] + m[0][1]*n[1] + m[0][2]*n[2]),
        static_cast<T1>(m[1][0]*n[0] + m[1][1]*n[1] + m[1][2]*n[2]),
        static_cast<T1>(m[2][0]*n[0] + m[2][1]*n[1] + m[2][2]*n[2]));
}


/// Invert via gauss-jordan elimination. Modified from mx library
template<typename T>
bool Mat4<T>::invert(Mat4<T> &inverse, T tolerance) const
{
    Mat4<T> temp(*this);
    inverse.setIdentity();

    // Forward elimination step
    double det = 1.0;
    for (int i = 0; i < 4; ++i) {
        int row = i;
        double max = fabs(temp[i][i]);

        for (int k = i+1; k < 4; ++k) {
            if (fabs(temp[k][i]) > max) {
                row = k;
                max = fabs(temp[k][i]);
            }
        }

        if (isExactlyEqual(max, 0.0)) return false;

        // must move pivot to row i
        if (row != i) {
            det = -det;
            for (int k = 0; k < 4; ++k) {
                std::swap(temp[row][k], temp[i][k]);
                std::swap(inverse[row][k], inverse[i][k]);
            }
        }

        double pivot = temp[i][i];
        det *= pivot;

        // scale row i
        for (int k = 0; k < 4; ++k) {
            temp[i][k] /= pivot;
            inverse[i][k] /= pivot;
        }

        // eliminate in rows below i
        for (int j = i+1; j < 4; ++j) {
            double t = temp[j][i];
            if (!isExactlyEqual(t, 0.0)) {
                // subtract scaled row i from row j
                for (int k = 0; k < 4; ++k) {
                    temp[j][k] -= temp[i][k] * t;
                    inverse[j][k] -= inverse[i][k] * t;
                }
            }
        }
    }

    // Backward elimination step
    for (int i = 3; i > 0; --i) {
        for (int j = 0; j < i; ++j) {
            double t = temp[j][i];

            if (!isExactlyEqual(t, 0.0)) {
                for (int k = 0; k < 4; ++k) {
                    inverse[j][k] -= inverse[i][k]*t;
                }
            }
        }
    }
    return det*det >= tolerance*tolerance;
}

template <typename T>
inline bool isAffine(const Mat4<T>& m) {
    return (m.col(3) == Vec4<T>(0, 0, 0, 1));
}

template <typename T>
inline bool hasTranslation(const Mat4<T>& m) {
    return (m.row(3) != Vec4<T>(0, 0, 0, 1));
}
    

typedef Mat4<float>  Mat4s;
typedef Mat4<double> Mat4d;

#if DWREAL_IS_DOUBLE == 1
typedef Mat4d    Mat4f;
#else
typedef Mat4s    Mat4f;
#endif // DWREAL_IS_DOUBLE

} // namespace util
} // namespace OPENVDB_VERSION_NAME
} // namespace openvdb

#endif // OPENVDB_UTIL_MAT4_H_HAS_BEEN_INCLUDED

// Copyright (c) 2012 DreamWorks Animation LLC
// All rights reserved. This software is distributed under the
// Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
