#include <fem.hpp>   

namespace ngfem
{
  using namespace ngfem;


  inline AutoDiff<3> Cross (const AutoDiff<3> & u,
			    const AutoDiff<3> & v)
  {
    AutoDiff<3> hv;
    hv.Value() = 0.0;
    hv.DValue(0) = u.DValue(1)*v.DValue(2)-u.DValue(2)*v.DValue(1);
    hv.DValue(1) = u.DValue(2)*v.DValue(0)-u.DValue(0)*v.DValue(2);
    hv.DValue(2) = u.DValue(0)*v.DValue(1)-u.DValue(1)*v.DValue(0);
    return hv;
  }
			    
  

  //------------------------------------------------------------------------
  // HCurlHighOrderFiniteElement
  //------------------------------------------------------------------------

  template <int D>
  HCurlHighOrderFiniteElement<D> ::
  HCurlHighOrderFiniteElement (ELEMENT_TYPE aeltype)
    : HCurlFiniteElementD<D> (aeltype, -1, -1) 
  { 
    for (int i = 0; i < 8; i++)
      vnums[i] = i;
  }

  template <int D>
  void HCurlHighOrderFiniteElement<D>::
  SetVertexNumbers (FlatArray<int> & avnums)
  {
    for (int i = 0; i < avnums.Size(); i++)
      vnums[i] = avnums[i];
  }
    
  template <int D>
  void HCurlHighOrderFiniteElement<D>::
  SetOrderInner (int oi)
  {
    order_inner = oi; 
  }

  template <int D>
  void HCurlHighOrderFiniteElement<D>::
  SetOrderFace (FlatArray<int> & of)
  {
    for (int i = 0; i < of.Size(); i++)
      order_face[i] = of[i];
  }

  template <int D>
  void HCurlHighOrderFiniteElement<D>::
  SetOrderEdge (FlatArray<int> & oen)
  {
    for (int i = 0; i < oen.Size(); i++)
      order_edge[i] = oen[i];
  }


  template <int D> 
  void HCurlHighOrderFiniteElement<D>:: 
  SetUsegradEdge (FlatArray<int> & uge)
  {
    for (int i=0; i<uge.Size(); i++) usegrad_edge[i]=uge[i]; 
  }
  
  template <int D> 
  void HCurlHighOrderFiniteElement<D>:: 
  SetUsegradFace (FlatArray<int> & ugf)
  {
    for (int i=0; i<ugf.Size(); i++) usegrad_face[i]=ugf[i]; 
  }
  
  template <int D> 
  void HCurlHighOrderFiniteElement<D>:: 
  SetUsegradCell (int ugc)
  {
    usegrad_cell=ugc; 
  }
  
  //------------------------------------------------------------------------
  // HCurlHighOrderSegm
  //------------------------------------------------------------------------
  
  template <class T_ORTHOPOL> 
  HCurlHighOrderSegm<T_ORTHOPOL> :: HCurlHighOrderSegm (int aorder)
    : HCurlHighOrderFiniteElement<1>(ET_SEGM)
  {
    order_inner = aorder;
    usegrad_cell = 1; 
    ComputeNDof(); 
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderSegm<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof =1; 
    if(usegrad_cell)
      ndof += order_inner;    
    order = order_inner;
    order++; // integration order 
  }

  
  template <class T_ORTHOPOL>
  void HCurlHighOrderSegm<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
				     FlatMatrixFixWidth<1> shape) const
  {
    AutoDiff<1> x (ip(0),0); 
    AutoDiff<1> lami[2] = {1-x,x}; 
    
    int es = 0, ee =1;
    if (vnums[es] > vnums[ee]) swap(es,ee); 
    AutoDiff<1> xi = lami[ee] - lami[es]; 
    
    // Nedelec0-shapes
    shape(0,0) = 0.5*(xi.DValue(0)); 
      
    if (order_inner>=1)
      if(usegrad_cell)
	{ 
	  ArrayMem<AutoDiff<1>, 10> pol_xi(order_inner+2);
	  T_ORTHOPOL::Calc (order_inner+1, xi,pol_xi);  
	  
	  for (int j = 0; j < order_inner; j++)
	    shape(j+1,0) = pol_xi[j].DValue(0); 
	}    
  }

   //------------------------------------------------------------------------
  // HCurlHighOrderTrig
  //------------------------------------------------------------------------
  
  template <class T_ORTHOPOL>
  HCurlHighOrderTrig<T_ORTHOPOL> :: HCurlHighOrderTrig (int aorder)
    : HCurlHighOrderFiniteElement<2>(ET_TRIG)
  {
    order_inner = aorder;
    for (int i = 0; i < 3; i++)
      order_edge[i] = aorder; 

    ComputeNDof();
  }
  
  template <class T_ORTHOPOL>
  void HCurlHighOrderTrig<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 3; // Nedelec 

    int i; 
    for(i=0; i<3; i++)
      ndof += usegrad_edge[i]*order_edge[i]; 
 	
    if (order_inner > 1)  
      ndof += ((usegrad_cell + 1) * order_inner +2) * (order_inner-1) /2;
	
    order = 1; // max(order_edges_tang,order_inner);  
    for (i = 0; i < 3; i++)
      {
	if (order_edge[i] > order)
	  order = order_edge[i];
      }
   
    if (order_inner > order) 
      order = order_inner;
   
    // order++; //for integration_order  (not necessary, JS)
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderTrig<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						    FlatMatrixFixWidth<2> shape) const
  {
    AutoDiff<2> x(ip(0),0); 
    AutoDiff<2> y(ip(1),1); 
    AutoDiff<2> lami[3] = {x,y,1-x-y};

    shape = 0.0; 
    
    int i, j, k, l;
    int ii = 3; 
    
    const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
    ArrayMem<double,30> mem(2*(order+2)); 

    for (i = 0; i < 3; i++)
      {
	int es = edges[i][0]; 
	int ee = edges[i][1];
	
	if (vnums[es] > vnums[ee])  swap (es, ee);

	//Nedelec low order edge shape function 
	for(l=0; l<2; l++) 
	  //SZ 
	  shape(i,l) = lami[ee].DValue(l)*lami[es].Value() - 
	    lami[es].DValue(l)*lami[ee].Value();  
	// shape(i,l) = lami[ee].Value()*lami[es].DValue(l) - 
	//   lami[es].Value()*lami[ee].DValue(l);  //AinsworthCoyle

	int p = order_edge[i]; 
	//Edge tangential shapes  
	if(p>0 && usegrad_edge[i]) 
	  { 
	    AutoDiff<2> xi = lami[ee] - lami[es]; 
	    AutoDiff<2> eta = 1 - lami[ee] - lami[es]; 
	    FlatMatrixFixWidth<2> DExt(p+2,&mem[0]);

	    // HO-Edges = gradient fields 
	    T_ORTHOPOL::CalcTrigExtDeriv(p+1,xi.Value(),eta.Value(), DExt); 
	   
	    for(j=0; j<= p-1;j++,ii++) 
	      for(l=0;l<2;l++)
		shape(ii,l) = xi.DValue(l) * DExt(j,0) + eta.DValue(l) * DExt(j,1); 
	  }
      }   
     
    int p = order_inner;      
    //Inner shapes (Face) 
    if(p>1) 
      {
	int fav[3] = {0,1,2}; 
	//Sort vertices ... v(f0) < v(f1) < v(f2) 
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  

	AutoDiff<2> xi  = lami[fav[2]]-lami[fav[1]];
	AutoDiff<2> eta = lami[fav[0]]; 

	ArrayMem<AutoDiff<2>,10> adpol1(order),adpol2(order);	
	T_INNERSHAPES::CalcSplitted(p+1,xi,eta,adpol1,adpol2);
	
	
	// gradients:
	if(usegrad_cell)
	  for (j = 0; j < p-1; j++)
	    for (k = 0; k < p-1-j; k++, ii++)
	      for (l = 0; l < 2; l++)
		shape(ii, l) = adpol1[j].DValue(l) * adpol2[k].Value()
		  + adpol1[j].Value() * adpol2[k].DValue(l); 
	
	// other combination
	for (j = 0; j < p-1; j++)
	  for (k = 0; k < p-1-j; k++, ii++)
	    for (l = 0; l < 2; l++)
	       shape(ii, l) = adpol1[j].DValue(l) * adpol2[k].Value()
		 - adpol1[j].Value() * adpol2[k].DValue(l); 
		
	
	double ned0[2] = {lami[fav[2]].Value()*lami[fav[1]].DValue(0) -
			  lami[fav[1]].Value()*lami[fav[2]].DValue(0),
			  lami[fav[2]].Value()*lami[fav[1]].DValue(1) -
			  lami[fav[1]].Value()*lami[fav[2]].DValue(1)};
	// rec_pol * Nedelec0 
	for (j = 0; j < p-1; j++, ii++)
	  for (l = 0; l < 2; l++)
	    shape(ii,l) = adpol2[j].Value() * ned0[l]; 
      }
  }
  
  template <class T_ORTHOPOL>
  void HCurlHighOrderTrig<T_ORTHOPOL> :: CalcCurlShape (const IntegrationPoint & ip, 
							FlatMatrixFixWidth<1> shape) const
  { 
    shape = 0.0; 
    
    AutoDiff<2> x(ip(0),0); 
    AutoDiff<2> y(ip(1),1); 
    AutoDiff<2> lami[3] = {x,y,1-x-y};
    
    const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
    
    int i, j, k, l, ii;
    ii=3; 
    for (i = 0; i < 3; i++)
      {
	int es = edges[i][0]; 
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);
	
	//Nedelec low order edge shape function 2*grad(lam_e) x grad(lam_s)
	shape(i,0) = -2*(lami[ee].DValue(0)*lami[es].DValue(1)
			 - lami[ee].DValue(1)*lami[es].DValue(0));  
	// Curl(Edge tangential shapes)  = 0.0; 
 	if(usegrad_edge[i]) ii += order_edge[i];
      }    
      
    int p = order_inner; 
    
    // Inner shapes (Face) 
    
    int fav[3] ={0,1,2};    
    //Sort vertices  first edge op minimal vertex 
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
    if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  
    
    AutoDiff<2> xi  = lami[fav[2]]-lami[fav[1]];
    AutoDiff<2> eta = lami[fav[0]]; // 1- lami[f2] - lami[f1]; 

    ArrayMem<AutoDiff<2>,10> adpol1(order),adpol2(order); 
    T_INNERSHAPES::CalcSplitted(p+1,xi,eta,adpol1,adpol2); 
    
    // curl(gradients) = 0.0
    if(usegrad_cell) ii += (p-1)*p/2 ;

    // curl(other combinations) = 2 grad(v) x grad(u) = 2 ( v1 u2 - v2 u1 ) 
    for (j = 0; j <= p-2; j++)
      for (k = 0; k <= p-2-j; k++)
	shape(ii++, 0) = 2*(adpol2[k].DValue(0) * adpol1[j].DValue(1) 
			    - adpol2[k].DValue(1) * adpol1[j].DValue(0));

    // curl(adpol2 * Nedelec0)   
    double ned0[2] = { lami[fav[2]].Value()*lami[fav[1]].DValue(0) - 
	 	       lami[fav[1]].Value()*lami[fav[2]].DValue(0), 
		       lami[fav[2]].Value()*lami[fav[1]].DValue(1) - 
		       lami[fav[1]].Value()*lami[fav[2]].DValue(1) };
        
    double curlned0 = 2*(lami[fav[2]].DValue(0)*lami[fav[1]].DValue(1)
			 - lami[fav[2]].DValue(1)*lami[fav[1]].DValue(0)); 
 
    for (j = 0; j <= p-2; j++)  
      shape(ii++,0) = curlned0 * adpol2[j].Value() + adpol2[j].DValue(0)*ned0[1] - 
 	adpol2[j].DValue(1)*ned0[0]; 
  }
  
  //------------------------------------------------------------------------
  // HCurlHighOrderQuad
  //------------------------------------------------------------------------
  
  template <class T_ORTHOPOL>  
  HCurlHighOrderQuad<T_ORTHOPOL> :: HCurlHighOrderQuad (int aorder)
    : HCurlHighOrderFiniteElement<2>(ET_QUAD)
  {
    order_inner = aorder;
    for (int i = 0; i < 4; i++)
	order_edge[i] = aorder; 
    
    ComputeNDof();
  }
  
  template <class T_ORTHOPOL>
  void HCurlHighOrderQuad<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 4; // Nedelec 

    int i; 
    for(i=0;i<4;i++)
      ndof += usegrad_edge[i]*order_edge[i];
          
    if (order_inner > 0) 
      ndof += ((usegrad_cell+ 1) * order_inner + 2) * order_inner; 
	  
    order = 0; // max(order_edges_tang,order_inner);  
    for (i = 0; i < 4; i++)
      if (order_edge[i] > order) order = order_edge[i];
   
    if (order_inner > order) // order_edge_normal = order_inner; 
      order = order_inner;

    order++; // for integration order
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderQuad<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
				       FlatMatrixFixWidth<2> shape) const
  {
    int i, j, k, l, p;
    AutoDiff<2> x (ip(0),0);
    AutoDiff<2> y (ip(1),1);

    AutoDiff<2> lami[4] = {(1-x)*(1-y),x*(1-y),x*y,(1-x)*y};  
    AutoDiff<2> sigma[4] = {(1-x)+(1-y),x+(1-y),x+y,(1-x)+y};  
 
    const EDGE * edges = ElementTopology::GetEdges (ET_QUAD);

    shape = 0;
    int ii = 4;

    ArrayMem<AutoDiff<2>, 10> pol_xi(order+2);
    ArrayMem<AutoDiff<2>, 10> pol_eta(order+2);

    // edges
    for (i = 0; i < 4; i++)
      {
	p = order_edge[i]; 
	int es = edges[i][0];
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);

	AutoDiff<2> xi  = sigma[ee]-sigma[es];
	AutoDiff<2> lam_e = lami[ee]+lami[es];  // attention in [0,1]

	// Nedelec0-shapes
	for(l=0; l<2; l++) 
	  shape(i,l) = 0.5*xi.DValue(l)*lam_e.Value(); 
	
	// High Order edges ... Gradient fields 
	if(usegrad_edge[i])
	  {
	    T_ORTHOPOL::Calc (p+1, xi,pol_xi);  
	    for (j = 0; j < p; j++, ii++)
	      for(l=0; l<2; l++)
		shape(ii,l) =  lam_e.DValue(l)*pol_xi[j].Value() + 
		  lam_e.Value()*pol_xi[j].DValue(l);
	  }
      }
     
    p = order_inner;
    int fmax = 0; 
    for (j = 1; j < 4; j++)
      if (vnums[j] > vnums[fmax])
	fmax = j;
    
    int f1 = (fmax+3)%4; 
    int f2 = (fmax+1)%4; 
    if(vnums[f2] > vnums[f1]) swap(f1,f2);  // fmax > f1 > f2; 

    AutoDiff<2> xi = sigma[fmax]-sigma[f1];  // in [-1,1]
    AutoDiff<2> eta = sigma[fmax]-sigma[f2]; // in [-1,1]
    
    T_ORTHOPOL::Calc(p+1, xi,pol_xi);
    T_ORTHOPOL::Calc(p+1,eta,pol_eta);
    
    //Gradient fields 
    if(usegrad_cell)
      for (k = 0; k < p; k++)
	for (j= 0; j < p; j++, ii++)
	  for(l=0; l<2; l++)
	    shape(ii,l) = pol_xi[k].Value()*pol_eta[j].DValue(l) 
	      + pol_xi[k].DValue(l)*pol_eta[j].Value(); 
    
    //Rotation of Gradient fields 
    for (k = 0; k < p; k++)
      for (j= 0; j < p; j++, ii++)
	for(l=0; l<2; l++)
	  shape(ii,l) = pol_xi[k].DValue(l)*pol_eta[j].Value() 
	    - pol_xi[k].Value()*pol_eta[j].DValue(l); 

    //Missing ones 
      for (j= 0; j < p; j++, ii+=2)
	for(l=0; l<2; l++)
	  {  
	    shape(ii,l) = 0.5*xi.DValue(l)*pol_eta[j].Value(); 
	    shape(ii+1,l) = 0.5*eta.DValue(l)*pol_xi[j].Value();
	  }
    
      return;
  }
  
  /* void HCurlHighOrderQuad :: CalcCurlShape (const IntegrationPoint & ip, 
					    FlatMatrixFixWidth<1> shape) const
  {
    int i, j, k, l, p;
    AutoDiff<2> x (ip(0),0);
    AutoDiff<2> y (ip(1),1);
    
    AutoDiff<2> lami[4] = {(1-x)*(1-y),x*(1-y),x*y,(1-x)*y};  
    AutoDiff<2> sigma[4] = {(1-x)+(1-y),x+(1-y),x+y,(1-x)+y};  
    
    const EDGE * edges = ElementTopology::GetEdges (ET_QUAD);
     
    shape = 0;
    int ii = 4;

    ArrayMem<AutoDiff<2>, 10> pol_xi(order+2);
    ArrayMem<AutoDiff<2>, 10> pol_eta(order+2);
   
    // edges
    for (i = 0; i < 4; i++)
      {
	p = order_edge[i]; 
	int es = edges[i][0];
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);

	AutoDiff<2> xi  = sigma[ee]-sigma[es];
	AutoDiff<2> eta = lami[ee]+lami[es];  // attention in [0,1]

	// Nedelec0-shapes
	shape(i,0) = eta.DValue(0)*xi.DValue(1) - eta.DValue(1)*xi.DValue(0); 
	
	// High Order edges ... Gradient fields 
	if(usegrad_edge[i]) ii+=p; 
      } 
    
    p = order_inner;
    int fmax = 0; 
    for (j = 1; j < 4; j++)
      if (vnums[j] > vnums[fmax])
	fmax = j;
     
    int f1 = (fmax+3)%4; 
    int f2 = (fmax+1)%4; 
    if(vnums[f2] > vnums[f1]) swap(f1,f2);  // fmax > f1 > f2; 
    
    AutoDiff<2> xi  = sigma[fmax]-sigma[f1]; // in [-1,1] 
    AutoDiff<2> eta = sigma[fmax]-sigma[f1];  // in [-1,1]
    
    T_ORTHOPOL::Calc(p+1, xi,pol_xi);
    T_ORTHOPOL::Calc(p+1,eta,pol_eta);
     
    //Gradient fields --> curl x grad() = 0.0 
    if(usegrad_cell) ii+=p*p; 
    
    //Rotation of Gradient fields 
    for (k = 0; k < p; k++)
      for (j= 0; j < p; j++)
	shape(ii++,0) = 2*(pol_eta[j].DValue(0)*pol_xi[k].DValue(1)
	 		   - pol_eta[j].DValue(1)* pol_xi[k].DValue(0)); 
    
    //Missing ones 
    for (j= 0; j < p; j++)
      {	
	shape(ii++,0) = pol_eta[j].DValue(0)*xi.DValue(1) 
	  - pol_eta[j].DValue(0)*xi.DValue(1); 
	shape(ii++,0) = pol_xi[j].DValue(0)*eta.DValue(1) 
	  - pol_xi[j].DValue(1)*eta.DValue(0); 
      }
    
    return;
    }*/
  
  //------------------------------------------------------------------------
  // HCurlHighOrderTet
  //------------------------------------------------------------------------
 
  template <class T_ORTHOPOL>
  HCurlHighOrderTet<T_ORTHOPOL> :: HCurlHighOrderTet (int aorder)
    : HCurlHighOrderFiniteElement<3>(ET_TET)
  {
    nv = 4; 
    ned = 6; 
    nf = 4; 

    int i;
    order_inner = aorder;
    for (i = 0; i < ned; i++)
      order_edge[i] = aorder;
    
    for (i=0; i<nf; i++)  
      order_face[i] = aorder; 

    ComputeNDof();
  }
  
  template <class T_ORTHOPOL>
  void HCurlHighOrderTet<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 6; // Nedelec 

    int i;
    order = 1; 
    
    for (i = 0; i < 6; i++)
      ndof += usegrad_edge[i]*order_edge[i];
  
    for(i=0; i<4; i++)
      if (order_face[i] > 1) 
	ndof += ((usegrad_face[i]+1)*order_face[i]+2)*(order_face[i]-1)/2 ;
    
    if(order_inner > 2)
      ndof += ((usegrad_cell + 2) * order_inner + 3) 
	* (order_inner-2) * (order_inner-1) / 6; 

    order = 0; // max(order_edges,order_face,order_inner);  
    for (i = 0; i < 6; i++)
      if (order_edge[i] > order)  order = order_edge[i];
    for(i=0; i<4; i++) 
      if (order_face[i] > order) 
	order = order_face[i]; 
    if (order_inner > order) // order_edge_normal = order_inner; 
      order = order_inner;
    
    // order++; // for integration order .. 
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderTet<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int i, base = 6; // Nedelec 
    for (i = 0; i < 6; i++)
      base += usegrad_edge[i]*order_edge[i];
    for(i=0; i<4; i++)
      if (order_face[i] > 1) 
	base += ((usegrad_face[i]+1)*order_face[i]+2)*(order_face[i]-1)/2 ;
    
    if(order_inner > 2)
      {
	int ni = ((usegrad_cell + 2) * order_inner + 3) 
	  * (order_inner-2) * (order_inner-1) / 6; 
	
	for (i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }


  template <class T_ORTHOPOL>
  void HCurlHighOrderTet<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						   FlatMatrixFixWidth<3> shape) const
  {    
    AutoDiff<3> x(ip(0), 0);
    AutoDiff<3> y(ip(1), 1);
    AutoDiff<3> z(ip(2), 2);
    
    AutoDiff<3> lami[4] = { x, y, z, 1-x-y-z };

    int i, j, ii, k,l,p;
    
    shape = 0.0; 
       
    const EDGE * edges = ElementTopology::GetEdges (ET_TET);
    const FACE * faces = ElementTopology::GetFaces (ET_TET); 
    
    ArrayMem<AutoDiff<3>,20> polxi(order+1), poleta(order+1), polzeta(order+1);
    ArrayMem<double,30> mem(2*order*sizeof(double)); 

    ii = 6; 
    for (i = 0; i < 6; i++)
      { 
	p = order_edge[i]; 
	int es = edges[i][0]; 
	int ee = edges[i][1]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);
	
	//Nedelec low order edge shape function 
	for(l=0; l<3; l++) 
	  shape(i,l) = - lami[ee].Value()*lami[es].DValue(l) 
	    + lami[ee].DValue(l)*lami[es].Value(); //SZ 
	// shape(i,l) = lami[ee].Value()*lami[es].DValue(l) 
	// - lami[ee].DValue(l)*lami[es].Value(); // Ainswort&Coyle
	
	//HO-Edge shape functions (Gradient Fields) 	
	if(p>0 && usegrad_edge[i]) 
	  {
	    AutoDiff<3> xi = lami[ee] - lami[es]; 
	    AutoDiff<3> eta = 1 - lami[ee] - lami[es]; 

	    FlatMatrixFixWidth<2> DExt(p+2,&mem[0]); 
       
	    T_ORTHOPOL::CalcTrigExtDeriv(p+1, xi.Value(), eta.Value(), DExt); 

	    for(j=0; j< p;j++,ii++) 	      
	      for(l=0; l<3; l++) 
		shape(ii,l) = xi.DValue(l) * DExt(j,0) + eta.DValue(l) * DExt(j,1) ;
	  }
      }

    ArrayMem<AutoDiff<3>,10> adpol1(order+2),adpol2(order+2),adpol3(order+2); 
 
    // face shape functions
    for(i=0; i<4; i++) 
      {
	int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };

	//Sort vertices  first edge op minimal vertex 
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  	

	int vop = 6 - fav[0] - fav[1] - fav[2];  	
	p = order_face[i];

	if (p >= 2)
	  {
	    AutoDiff<3> xi = lami[fav[2]]-lami[fav[1]];
	    AutoDiff<3> eta = lami[fav[0]]; // lo 
	    AutoDiff<3> zeta = lami[vop];   // lz 

	    T_FACESHAPES::CalcSplitted (p+1, xi, eta, zeta, adpol1, adpol2); 

	    // gradients 
	    if (usegrad_face[i])
	      for (j = 0; j <= p-2; j++)
		for (k = 0; k <= p-2-j; k++, ii++)
		  for (l = 0; l < 3; l++)
		    shape(ii, l) =  adpol1[j].DValue(l)*adpol2[k].Value() 
		      + adpol1[j].Value()*adpol2[k].DValue(l); 

	    // other combination
	    for (j = 0; j <= p-2; j++)
	      for (k = 0; k <= p-2-j; k++, ii++)
		  for (l = 0; l < 3; l++)
		    shape(ii, l) =  adpol1[j].DValue(l)*adpol2[k].Value() 
		      - adpol1[j].Value()*adpol2[k].DValue(l); 

	    double ned[3]; 
	    for(l=0;l<3;l++) 
	      ned[l] = lami[fav[2]].Value()*lami[fav[1]].DValue(l) 
		- lami[fav[1]].Value()*lami[fav[2]].DValue(l);
	    // rec_pol2 * Nedelec0
	    for (j = 0; j <= p-2; j++, ii++)
	      for (l = 0; l < 3; l++)
		shape(ii,l) = adpol2[j].Value()*ned[l]; 
	  }
      }
    
    p = order_inner; 
    
    T_INNERSHAPES::CalcSplitted(p+1, x-(1-x-y-z), y, z,adpol1, adpol2, adpol3 );

    //gradient fields 
    if(usegrad_cell)
      for (i = 0; i <= p-3; i++)
	for (j = 0; j <= p-3-i; j++)
	  for (k = 0; k <= p-3-i-j; k++, ii++)
	    for(l=0; l<3; l++) 
	      shape(ii,l) = adpol1[i].DValue(l) * adpol2[j].Value()* adpol3[k].Value() 
		+  adpol1[i].Value() * adpol2[j].DValue(l)* adpol3[k].Value() 
		+  adpol1[i].Value() * adpol2[j].Value()* adpol3[k].DValue(l) ; 
    // other combinations
    for (i = 0; i <= p-3; i++)
      for (j = 0; j <= p-3-i; j++)
	for (k = 0; k <= p-3-i-j; k++, ii++, ii++)
	  for(l=0; l<3; l++) 
	    {
	      shape(ii,l) = adpol1[i].DValue(l) * adpol2[j].Value()* adpol3[k].Value() 
		- adpol1[i].Value() * adpol2[j].DValue(l)* adpol3[k].Value() 
		+  adpol1[i].Value() * adpol2[j].Value()* adpol3[k].DValue(l) ; 
	      shape(ii+1,l) = adpol1[i].DValue(l) * adpol2[j].Value()* adpol3[k].Value() 
		+  adpol1[i].Value() * adpol2[j].DValue(l)* adpol3[k].Value() 
		-  adpol1[i].Value() * adpol2[j].Value()* adpol3[k].DValue(l) ; 
	    } 
       
     // Nedelec-types  l4.DValue()*x.Value() - l4.Value()*x.DValue(); 
     
    AutoDiff<3> l4 = 1-x-y-z; 
    double ned[3] = {- x.Value() - l4.Value(), - x.Value(), - x.Value()};
        
     for (j= 0; j <= p-3; j++)
       for (k = 0; k <= p-3-j; k++, ii++)
	 for(l=0; l<3; l++) 
	   shape(ii,l) = adpol2[j].Value()*adpol3[k].Value()*ned[l]; 
    
  }
  


  template <class T_ORTHOPOL>
  void HCurlHighOrderTet<T_ORTHOPOL> :: CalcCurlShape (const IntegrationPoint & ip, 
						       FlatMatrixFixWidth<3> shape) const
  {  
    /*
    MatrixFixWidth<3> shape2(shape.Height()); 
    HCurlHighOrderFiniteElement<3> :: CalcCurlShape(ip,shape2); 
    */
  
    AutoDiff<3> x (ip(0), 0); 
    AutoDiff<3> y (ip(1), 1); 
    AutoDiff<3> z (ip(2), 2); 
    
    AutoDiff<3> lami[4] = { x, y, z, 1.0-x-y-z };
    
    int i, j, k,l;
    int c[3][2] = {{1,2},{2,0},{0,1}}; 
      
    shape = 0.0; 
    
    const EDGE * edges = ElementTopology::GetEdges (ET_TET);
    const FACE * faces = ElementTopology::GetFaces (ET_TET); 
  
    int ii = 6; 
    for (i = 0; i < 6; i++)
      {
	int es = edges[i][0]; 
	int ee = edges[i][1]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);
	
	//Nedelec low order edge shape function  
	for(l=0; l<3; l++) 
	  shape(i,l) = -2*(lami[ee].DValue(c[l][0])*lami[es].DValue(c[l][1])
			   -lami[ee].DValue(c[l][1])*lami[es].DValue(c[l][0])); 
	 
	// Tangential Edge Shapes are grad-fields ->  0.0
 	if(usegrad_edge[i]) ii += order_edge[i]; 
      }  
    
    ArrayMem<AutoDiff<3>,20 > adpol1(order+2),adpol2(order+2) ,adpol3(order+2); 

    // face shape functions
    for(i=0; i<4; i++) // faces 
      {
	int fav[3];
	for(j=0; j<3; j++) fav[j]=faces[i][j];

	//Sort vertices  first edge op minimal vertex 
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  	

	int vop = 6 - fav[0] - fav[1] - fav[2];  	
	int p = order_face[i];

	if (p >= 2)
	  {
	    AutoDiff<3> xi = lami[fav[2]]-lami[fav[1]];
	    AutoDiff<3> eta = lami[fav[0]]; // in [0,1]
	    AutoDiff<3> zeta = lami[vop];  // in [0,1] // lam_F
	     
	    T_FACESHAPES::CalcSplitted (p+1, xi, eta, zeta, adpol1, adpol2); 
	    // gradients  =>  0 
	    if(usegrad_face[i]) ii+=(p-1)*p/2; 
	    
	    // other combination => 2 * (nabla pol2) x (nabla pol1) 
	    for (j = 0; j <= p-2; j++)
	      for (k = 0; k <= p-2-j; k++, ii++)
		for (l = 0; l < 3; l++)
		  shape(ii, l) =
		    2*(adpol2[k].DValue(c[l][0])*adpol1[j].DValue(c[l][1])
		       - adpol2[k].DValue(c[l][1])*adpol1[j].DValue(c[l][0])); 
	 						       
	    // curl(adpol2 * Nedelec0)      
	    double ned[3],curlned[3]; 
	    for (l=0;l<3;l++) 
	      {
		ned[l] = lami[fav[2]].Value()*lami[fav[1]].DValue(l) -  
		  lami[fav[1]].Value()*lami[fav[2]].DValue(l);  
		curlned[l] = 
		  2*(lami[fav[2]].DValue(c[l][0])*lami[fav[1]].DValue(c[l][1])
		     -lami[fav[2]].DValue(c[l][1])*lami[fav[1]].DValue(c[l][0])); 
	      } 
	   
	    for (j = 0; j <= p-2; j++, ii++)
	      for (l = 0; l < 3; l++)
		shape(ii,l) = curlned[l] * adpol2[j].Value() 
		  + adpol2[j].DValue(c[l][0])*ned[c[l][1]] 
		  - adpol2[j].DValue(c[l][1])*ned[c[l][0]]; 
	  }
      } 

    // element-based shapes 
    int p = order_inner;
    if (p >= 3)
      {
	T_INNERSHAPES::CalcSplitted(p+1, x-(1-x-y-z), y, z,adpol1, adpol2, adpol3 );

	//gradient fields  => 0.0
	if(usegrad_cell) ii+= (p-2)*(p-1)*p/6; 
		
	// other  combinations => 
	for (i = 0; i <= p-3; i++)
	  for (j = 0; j <= p-3-i; j++)
	    for (k = 0; k <= p-3-i-j; k++, ii+=2)
	      {
		// 2 grad v  x  grad (uw)
		AutoDiff<3> grad_uw = adpol1[i] * adpol3[k];
		// AutoDiff<3> grad_uw = Mult (adpol1[i], adpol3[k]);
		for (l = 0; l < 3; l++)
		  shape(ii,l) = 2 * (adpol2[j].DValue(c[l][0]) * grad_uw.DValue(c[l][1]) -
				     adpol2[j].DValue(c[l][1]) * grad_uw.DValue(c[l][0]));

		// 2 grad w  x  grad (uv)
		AutoDiff<3> grad_uv = adpol1[i] * adpol2[j];
		// AutoDiff<3> grad_uv = Mult (adpol1[i], adpol2[j]);
		for (l = 0; l < 3; l++)
		  shape(ii+1,l) = 2 * (adpol3[k].DValue(c[l][0]) * grad_uv.DValue(c[l][1]) -
				       adpol3[k].DValue(c[l][1]) * grad_uv.DValue(c[l][0]));
	      }
		
	
	// Nedelec-types 
	AutoDiff<3>  l4 = 1-x-y-z; 
 	double ned[3] = {l4.DValue(0)*x.Value() - l4.Value()*x.DValue(0),
			 l4.DValue(1)*x.Value() - l4.Value()*x.DValue(1),
	  		 l4.DValue(2)*x.Value() - l4.Value()*x.DValue(2)};
	double curlned[3]; 
 	for (l=0; l<3; l++) 
	  curlned[l] = 2*(x.DValue(c[l][0])*l4.DValue(c[l][1])
			  - x.DValue(c[l][1])*l4.DValue(c[l][0])); 
          
 	for (j= 0; j <= p-3; j++)
	  for (k = 0; k <= p-3-j; k++, ii++)
	    for(l=0; l<3; l++) 
	      shape(ii,l) = adpol2[j].Value()*adpol3[k].Value()*curlned[l]
		+ adpol3[k].Value()*(adpol2[j].DValue(c[l][0])*ned[c[l][1]]
				     -adpol2[j].DValue(c[l][1])*ned[c[l][0]]) 
		+ adpol2[j].Value()*(adpol3[k].DValue(c[l][0])*ned[c[l][1]] 
				     -adpol3[k].DValue(c[l][1])*ned[c[l][0]]); 

	
      } 
    
  
    /*
    // shape2 -=shape; 
    (*testout) << " curlshape code  " << endl << shape << endl ; 
    (*testout) << " curlshape num " << endl << shape2 << endl; 
    shape2 -= shape; 
    (*testout) << " curlshape diff " << endl << shape2 << endl; 
    */
    }				         
  
  //------------------------------------------------------------------------
  // HCurlHighOrderPrism 
  //------------------------------------------------------------------------

  template <class T_ORTHOPOL>
  HCurlHighOrderPrism<T_ORTHOPOL> :: HCurlHighOrderPrism (int aorder)
    : HCurlHighOrderFiniteElement<3>(ET_PRISM)
  {
    nv = 6; 
    ned = 9; 
    nf = 5; 
    
    int i;
    order_inner = aorder;

    for (i = 0; i < ned; i++)
      order_edge[i] = aorder;
    
    for (i=0; i<nf; i++) 
      order_face[i] = aorder; 
    
    ComputeNDof();
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderPrism<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 9; // Nedelec 
    // if oder_edge <= 1 --> Nedelec
    int i;
    
    for (i = 0; i < 9; i++)
      ndof += usegrad_edge[i]*order_edge[i];
    
    // trig_faces 
    for (i=0; i < 2; i++) 
      if (order_face[i] > 1)  
	ndof += (usegrad_face[i]+1)*(order_face[i]-1)*order_face[i]/2 
	  + (order_face[i] - 1); 

    // quad_faces
    for (i=2; i < 5; i++)
      ndof +=  ((usegrad_face[i]+1)*order_face[i] + 2)*order_face[i];

    // inner_dof
    ndof += ((usegrad_cell+2)*order_inner + 3) * order_inner*(order_inner-1)/2; 
    
    order = 0; // max(order_edges_tang,order_inner);  
    for (i = 0; i < 9; i++)
      {
	if (order_edge[i] > order)
	  order = order_edge[i];
      }
    for(i=0; i<5; i++) 
      { 
	if (order_face[i] > order) 
	  order = order_face[i]; 
      }
    
    if (order_inner > order) // order_edge_normal = order_inner; 
      order = order_inner;
   
    order++; 
  }

  template <class T_ORTHOPOL>
  void HCurlHighOrderPrism<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
		  			     FlatMatrixFixWidth<3> shape) const
  {
    AutoDiff<2> x (ip(0), 0);
    AutoDiff<2> y (ip(1), 1);
    AutoDiff<1> z (ip(2), 0);
    AutoDiff<2> lami[6] = { x, y, 1-x-y, x, y, 1-x-y };
    AutoDiff<1> muz[6]  = { 1-z, 1-z, 1-z, z, z, z };
    
    int i, j, k,l, ii,p;
    
    shape = 0.0; 
       
    const EDGE * edges = ElementTopology::GetEdges (ET_PRISM);
    const FACE * faces = ElementTopology::GetFaces (ET_PRISM); 

    // order+1 auf order+3 erhht, test JS
    ArrayMem<AutoDiff<2>,20> adpolxy1(order+3),adpolxy2(order+3); 
    ArrayMem<AutoDiff<1>,20> adpolz(order+3);   

    ii = 9;
    // horizontal edge shapes
    for (i = 0; i < 6; i++)
      {
	p = order_edge[i]; 
        int es = edges[i][0]; 
	int ee = edges[i][1]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);
	
	//Nedelec0
	for(k=0; k<2; k++) 
	  // SZ
	  shape(i,k) = muz[ee].Value()*
	    (-lami[ee].Value()* lami[es].DValue(k)+lami[es].Value()*
	     lami[ee].DValue(k)); 

	//high order \nabla (P_edge(x,y) * muz)
	if(usegrad_edge[i])
	  {
	    T_ORTHOPOL::CalcTrigExt(p+1, lami[ee]-lami[es],
				    1-lami[es]-lami[ee],adpolxy1);
	    
	    for(j=0;j<=p-1;j++)
	      {
		for(l=0;l<2;l++)
		  shape(ii,l) = adpolxy1[j].DValue(l)*muz[ee].Value(); 
		shape(ii++,2) = adpolxy1[j].Value()*muz[ee].DValue(0);
	      }
	  }
      }
    
    //Vertical Edge Shapes
    for (i = 6; i < 9; i++)
      {
	p = order_edge[i]; 
        int es = edges[i][0]; 
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);
	// Nedelec0: lami_E * grad(mu_E)  
	shape(i,2) = lami[ee].Value()*muz[ee].DValue(0); 
	
	//high order edges:  \nabla (T_ORTHOPOL^{p+1}(2z-1) * lami(x,y))
	if(usegrad_edge[i])
	  {
	    T_ORTHOPOL::Calc (p+1, muz[ee]-muz[es], adpolz);
	    
	    for (j = 0; j < p; j++)
	      {
		for (l = 0; l < 2; l++)
		  shape(ii,l) = adpolz[j].Value() * lami[ee].DValue(l);
		shape(ii++,2) = adpolz[j].DValue(0) * lami[ee].Value();
	      }
	  }
      }

    // trig face shapes
    for (i = 0; i < 2; i++)
      {
	p = order_face[i];
	if (p < 2) continue;

	int fav[3] = {faces[i][0], faces[i][1], faces[i][2]};

	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  	
	
	AutoDiff<2> xi = lami[fav[2]]-lami[fav[1]];
	AutoDiff<2> eta = lami[fav[0]]; // 1-lami[f2]-lami[f1];
	
	T_TRIGFACESHAPES::CalcSplitted(p+1,xi,eta,adpolxy1,adpolxy2); 

	if(usegrad_face[i])
	  // gradient-fields =>  \nabla( adpolxy1*adpolxy2*muz )
	  for (j = 0; j <= p-2; j++)
	    for (k = 0; k <= p-2-j; k++,ii++)
	      {
		for (l = 0; l < 2; l++)
		  shape(ii, l) = (adpolxy1[j].DValue(l)*adpolxy2[k].Value() 
				  + adpolxy1[j].Value()*adpolxy2[k].DValue(l))
		    *muz[fav[2]].Value();
		
		shape(ii,2) = muz[fav[2]].DValue(0)*(adpolxy1[j].Value()
						     * adpolxy2[k].Value());
	      }

	
	// rotations of grad-fields => grad(uj)*vk*w -  uj*grad(vk)*w 

	for (j = 0; j <= p-2; j++)
	  for (k = 0; k <= p-2-j; k++)
	    {
	      for (l = 0; l < 2; l++)
		shape(ii, l) = (adpolxy1[j].DValue(l)*adpolxy2[k].Value() 
				- adpolxy1[j].Value()*adpolxy2[k].DValue(l))
		  *muz[fav[2]].Value();
	      
	    
	       shape(ii++,2) = muz[fav[2]].DValue(0)*(adpolxy1[j].Value()
	      		     *adpolxy2[k].Value());
	       //ii++;
	      /* koennte man durch ii++ ersetzen ... 
	      was spricht dagegen ? (JS) 
		... Nix :-)) (SZ) is nur konsistenter so 
			       ... find ich halt ... 
			       SZ und ich habs ja gsagt:-) 
	      */

	    }
	
	double ned[2]= {lami[fav[2]].Value()*lami[fav[1]].DValue(0) - 
			lami[fav[1]].Value()*lami[fav[2]].DValue(0),
			lami[fav[2]].Value()*lami[fav[1]].DValue(1) - 
			lami[fav[1]].Value()*lami[fav[2]].DValue(1)}; 
	
	//  Ned0*adpolxy2[j]*muz 
	for (j = 0; j <= p-2; j++,ii++)
	  for (l = 0; l < 2; l++)
	    shape(ii,l) = ned[l] * adpolxy2[j].Value()*muz[fav[2]].Value(); 
      }
    
    
    // quad faces
    for (i = 2; i < 5; i++)
      {
	p = order_face[i];
	 
	int fmax = 0;
	for (j = 1; j < 4; j++)
	  if (vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;

	int fz = 3-fmax; 
	int ftrig = fmax^1; 
	AutoDiff<2> xi = lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
	AutoDiff<2> eta = 1-lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
	AutoDiff<1> zeta = muz[faces[i][fmax]]-muz[faces[i][fz]]; 
	
	T_ORTHOPOL::CalcTrigExt(p+1,xi,eta,adpolxy1); 
	T_ORTHOPOL::Calc(p+1,zeta,adpolz); 
	
	if(usegrad_face[i])
	  {
	    // Gradientfields nabla(polxy*polz) 
	    if (vnums[faces[i][ftrig]] > vnums[faces[i][fz]]) //SZ org: < 
	      for (k = 0; k <= p-1; k++)
		for (j = 0; j <= p-1; j++)
		  { 
		    for (l=0; l<2;l++) 
		      shape(ii,l) = adpolxy1[k].DValue(l) * adpolz[j].Value();
		    shape(ii++,2) = adpolxy1[k].Value()*adpolz[j].DValue(0); 
		  } 
	    else
	      for (j = 0; j <= p-1; j++)
		for (k = 0; k <= p-1; k++)
		  {
		    for (l=0; l<2;l++) 
		      shape(ii,l) = adpolxy1[k].DValue(l) * adpolz[j].Value();
		    shape(ii++,2) = adpolxy1[k].Value()*adpolz[j].DValue(0); 
		  }
	  }
	  
	// Rotations of GradFields => nabla(polxy)*polz - polxy*nabla(polz)
	if (vnums[faces[i][ftrig]] > vnums[faces[i][fz]]) //SZ org: <
	  for (k = 0; k <= p-1; k++)
	    for (j = 0; j <= p-1; j++)
	      { 
		for (l=0; l<2;l++) 
		      shape(ii,l) = adpolxy1[k].DValue(l) * adpolz[j].Value();
		shape(ii++,2) = -adpolxy1[k].Value()*adpolz[j].DValue(0); 
	      }
	else
	  for (j = 0; j <= p-1; j++)
	    for (k = 0; k <= p-1; k++)
	      {
		for (l=0; l<2;l++) 
		  shape(ii,l) = -adpolxy1[k].DValue(l) * adpolz[j].Value();
		shape(ii++,2) = adpolxy1[k].Value()*adpolz[j].DValue(0); 
	      }
	
	// Missing ones of Ned0-type 
	// (ned0_trig)*polz, (ned0_quad)* polxy 
	double ned0trig[2] = {
	  lami[faces[i][fmax]].Value()*lami[faces[i][ftrig]].DValue(0)  -
	  lami[faces[i][fmax]].DValue(0)*lami[faces[i][ftrig]].Value(),
	  lami[faces[i][fmax]].Value()*lami[faces[i][ftrig]].DValue(1)  -
	  lami[faces[i][fmax]].DValue(1)*lami[faces[i][ftrig]].Value()};
	  
	for (j= 0; j <= p-1; j++, ii+=2)
	  { 
	    if(vnums[faces[i][ftrig]] > vnums[faces[i][fz]])
	      {
		for(l=0; l<2; l++)
		  shape(ii,l) =  - ned0trig[l]*adpolz[j].Value(); 
		shape(ii+1,2) = 0.5*zeta.DValue(0)*adpolxy1[j].Value();  
	      }
	      else
	      {
		shape(ii,2) = 0.5* zeta.DValue(0)*adpolxy1[j].Value();  
		for(l=0; l<2; l++)
		  shape(ii+1,l) = - ned0trig[l]*adpolz[j].Value(); 
	      }
	  }
      }

    
    
    p = order_inner;
     
    
    if(p>=2) 
      {
	        
	T_TRIGFACESHAPES::CalcSplitted(p+1,x-y,1-x-y,adpolxy1,adpolxy2);
	T_ORTHOPOL::Calc(p+1,2*z-1,adpolz); 
	
	// gradientfields
	if(usegrad_cell)
	  for(i=0;i<=p-2;i++)
	    for(j=0;j<=p-2-i;j++)
	      for(k=0;k<=p-1;k++)
		{
		  for(l=0;l<2;l++)
		    shape(ii,l) = (adpolxy1[i].DValue(l)*adpolxy2[j].Value() +
				   adpolxy1[i].Value()*adpolxy2[j].DValue(l))
		      *adpolz[k].Value(); 
		  shape(ii++,2) = adpolxy1[i].Value()*adpolxy2[j].Value()*
		    adpolz[k].DValue(0); 
		}
	 

	// Rotations of gradientfields
	for(i=0;i<=p-2;i++)
	  for(j=0;j<=p-2-i;j++)
	    for(k=0;k<=p-1;k++)
	      {
		for(l=0;l<2;l++)
		  shape(ii,l) = (adpolxy1[i].DValue(l)*adpolxy2[j].Value() -
				 adpolxy1[i].Value()*adpolxy2[j].DValue(l))
		    *adpolz[k].Value(); 
		shape(ii++,2) = adpolxy1[i].Value()*adpolxy2[j].Value()*adpolz[k].DValue(0); 
		
		for(l=0;l<2;l++)
		  shape(ii,l) = (adpolxy1[i].DValue(l)*adpolxy2[j].Value() +
				  adpolxy1[i].Value()*adpolxy2[j].DValue(l))
		    *adpolz[k].Value(); 
		shape(ii++,2) = -adpolxy1[i].Value()*adpolxy2[j].Value()
		*adpolz[k].DValue(0); 
	      }

	// Missing shapes 
	// ned0(trig) * polxy2[j]*polz 
	// z.DValue(0) * polxy1[i] * polxy2[j] 
	double ned_trig[2] = {y.Value(),-x.Value()};  
	for(j=0;j<=p-2;j++) 
	  for (k=0;k<=p-1;k++,ii++) 
	      for(l=0;l<2;l++) 
		shape(ii,l) = ned_trig[l]*adpolxy2[j].Value()*adpolz[k].Value(); 
	    
    	for(i=0;i<=p-2;i++) 
	  for(j=0;j<=p-2-i;j++) 
	      shape(ii++,2) = adpolxy1[i].Value()*adpolxy2[j].Value(); 
	
      }
    
    //    (*testout) << "shape = " << shape << endl;
  }
  
    
  template <class T_ORTHOPOL>
  void HCurlHighOrderPrism<T_ORTHOPOL> :: CalcCurlShape (const IntegrationPoint & ip, 
							 FlatMatrixFixWidth<3> shape) const
  {
    //   MatrixFixWidth<3> shape2(shape.Height()); 
    //  HCurlHighOrderFiniteElement<3> :: CalcCurlShape(ip,shape2); 
    
	
    AutoDiff<3> x (ip(0), 0);
    AutoDiff<3> y (ip(1), 1);
    AutoDiff<3> z (ip(2), 2);
    AutoDiff<3> lami[6] = { x, y, 1-x-y, x, y, 1-x-y };
    AutoDiff<3> muz[6]  = { 1-z, 1-z, 1-z, z, z, z };
    
    int i, j, k,l, ii,p;
    
    shape = 0.0; 
        
    const EDGE * edges = ElementTopology::GetEdges (ET_PRISM);
    const FACE * faces = ElementTopology::GetFaces (ET_PRISM); 

    // order+1 auf order+3 erhht, test JS
    ArrayMem<AutoDiff<3>,20> adpolxy1(order+3),adpolxy2(order+3); 
    ArrayMem<AutoDiff<3>,20> adpolz(order+3);   
    
    ii = 9;
    // horizontal edge shapes
    for (i = 0; i < 6; i++)
      {
	p = order_edge[i]; 
        int es = edges[i][0]; 
	int ee = edges[i][1]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);
	
	AutoDiff<3> hd = Cross(muz[ee]*lami[es],lami[ee])-Cross(muz[ee]*lami[ee],
							    lami[es]); 
	
	//Nedelec0
	for(l=0; l<3; l++) 
	  shape(i,l) = hd.DValue(l); 
	
	//high order edges = gradfields --> curl = 0.0 
	if(usegrad_edge[i])
	  ii+=p; 
      }
     
    //Vertical Edge Shapes
    for (i = 6; i < 9; i++)
      {
	p = order_edge[i]; 
        int es = edges[i][0]; 
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);
	// curl(ned0) = grad(lami_ee) x  grad(mu_E)
	AutoDiff<3> hd = Cross(lami[ee],muz[ee]); 
	
	
	for(l=0;l<3;l++)
	  shape(i,l) = hd.DValue(l); 
		
	//high order edges:  \nabla (T_ORTHOPOL^{p+1}(2z-1) * lami(x,y))
	if(usegrad_edge[i]) ii+=p; 
      }
    
    // trig face shapes
    for (i = 0; i < 2; i++)
      {
	p = order_face[i];
	if (p < 2) continue;

	int fav[3] = {faces[i][0], faces[i][1], faces[i][2]};

	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  	
	
	AutoDiff<3> xi = lami[fav[2]]-lami[fav[1]];
	AutoDiff<3> eta = lami[fav[0]]; // 1-lami[f2]-lami[f1];
	
	T_TRIGFACESHAPES::CalcSplitted(p+1,xi,eta,adpolxy1,adpolxy2); 

	// gradient-fields =>  curl = 0.0
	if(usegrad_face[i])
	  ii+=(p-1)*p/2; 


	// rotations of grad-fields => grad(p1)*p2 -  p1*grad(p2)
	// --> 2 * grad(p2) x grad(p1)
	for (j = 0; j <= p-2; j++)
	  for (k = 0; k <= p-2-j; k++)
	    {
	      AutoDiff<3> hd = 2*Cross(adpolxy2[k],adpolxy1[j]*muz[fav[2]]); 
	      for (l = 0; l < 3; l++)
		shape(ii,l) = hd.DValue(l); 
	      ii++;
	    }

	//  Ned0*(adpolxy2[j]*muz) 
	//  --> curlned * a + curl(a) x ned 

	for (j = 0; j <= p-2; j++,ii++)
	  {
	    AutoDiff<3> hd = 
	      Cross(lami[fav[2]]*adpolxy2[j]*muz[fav[2]],lami[fav[1]]) 
	      - Cross(lami[fav[1]]*adpolxy2[j]*muz[fav[2]],lami[fav[2]]); 
	    
	    for (l = 0; l < 3; l++)
	      shape(ii,l) = hd.DValue(l); 
	  }
      }
    
    // quad faces
    for (i = 2; i < 5; i++)
      {
	p = order_face[i]; 

	int fmax = 0;
	for (j = 1; j < 4; j++)
	  if (vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;

	int fz = 3-fmax; 
	int ftrig = fmax^1; 
	AutoDiff<3> xi = lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
	AutoDiff<3> eta = 1-lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
	AutoDiff<3> zeta = muz[faces[i][fmax]]-muz[faces[i][fz]]; 
	
	T_ORTHOPOL::CalcTrigExt(p+1,xi,eta,adpolxy1); 
	T_ORTHOPOL::Calc(p+1,zeta,adpolz); 
	
	// Gradientfields nabla(polxy*polz) 
	if(usegrad_face[i])
	  ii+=p*p; 
	  
	// Rotations of GradFields => nabla(polxy)*polz - polxy*nabla(polz)
	// --> 2* grad(polz) * grad(polxy1) 
	if (vnums[faces[i][ftrig]] > vnums[faces[i][fz]]) //SZ org: <
	  for (k = 0; k <= p-1; k++)
	    for (j = 0; j <= p-1; j++,ii++)
	      {
		AutoDiff<3> hd = 2*Cross(adpolz[j],adpolxy1[k]);
		for (l=0; l<3;l++) 
		  shape(ii,l) = hd.DValue(l); 
	      }
	else
	  // --> -2 *  
	  for (j = 0; j <= p-1; j++)
	    for (k = 0; k <= p-1; k++,ii++)
	      {
		AutoDiff<3> hd = -2*Cross(adpolz[j],adpolxy1[k]);
		for (l=0; l<3;l++) 
		  shape(ii,l) = hd.DValue(l);  
		
	      }
	
 	for (j= 0; j <= p-1; j++, ii++)
	  { 
	    if(vnums[faces[i][ftrig]] > vnums[faces[i][fz]])
	      {
		AutoDiff<3> hd = 
		  Cross(lami[faces[i][ftrig]]*adpolz[j],lami[faces[i][fmax]])
		  -Cross(lami[faces[i][fmax]]*adpolz[j],lami[faces[i][ftrig]]); 
		for(l=0; l<3; l++)
		  shape(ii,l) = hd.DValue(l);
		ii++;
		hd = 0.5*Cross(adpolxy1[j],zeta);
		for(l=0; l<3; l++)
		  shape(ii,l) = hd.DValue(l);  
	      }
   	    else 
	      {
		AutoDiff<3> hd = 0.5*Cross(adpolxy1[j],zeta);
		for(l=0; l<3; l++)
		  shape(ii,l) = hd.DValue(l);  
		ii++; 

		hd = 
		  Cross(lami[faces[i][ftrig]]*adpolz[j],lami[faces[i][fmax]])
		  -Cross(lami[faces[i][fmax]]*adpolz[j],lami[faces[i][ftrig]]); 
		for(l=0; l<3; l++)
		  shape(ii,l) = hd.DValue(l);
	      }
	    
	  }
	  
      }
 
    p = order_inner;
       if(p>=2) 
      {
	//ArrayMem<AutoDiff<3>,20> polxy1(p+2),polxy2(p+2); 
	T_TRIGFACESHAPES::CalcSplitted(p+1,x-y,1-x-y,adpolxy1,adpolxy2);
	T_ORTHOPOL::Calc(p+1,2*z-1,adpolz); 
	
	// gradientfields
	if(usegrad_cell)
	  ii+=p*(p-1)*p/2; 

	// Rotations of gradientfields
	for(i=0;i<=p-2;i++)
	  for(j=0;j<=p-2-i;j++)
	    for(k=0;k<=p-1;k++,ii++)
	      {
		AutoDiff<3> hd = 2*Cross(adpolxy2[j],adpolxy1[i]*adpolz[k]); 
		for(l=0;l<3;l++)
		  shape(ii,l) = hd.DValue(l); 
		ii++; 
		hd = 2*Cross(adpolz[k],adpolxy1[i]*adpolxy2[j]); 
		for(l=0;l<3;l++)
		  shape(ii,l) = hd.DValue(l); 
	      }
	
	// Missing shapes 
	// ned0(trig) * polxy2[j]*polz 
	// z.DValue(0) * polxy1[i] * polxy2[j] 
	double ned0trig[3] = {y.Value(),-x.Value(),0.};  
	double curlned[3] = {0.,0.,-2.};

	for(j=0;j<=p-2;j++) 
	  for (k=0;k<=p-1;k++,ii++)
	    {
	      AutoDiff<3> hd = 
		Cross(adpolz[k]*adpolxy2[j]*y, x) 
		- Cross(adpolz[k]*adpolxy2[j]*x, y);  
		
	      for(l=0;l<3;l++) 
		shape(ii,l) = hd.DValue(l); 
	    }
    	for(i=0;i<=p-2;i++) 
	  for(j=0;j<=p-2-i;j++,ii++) 
	    {
	      AutoDiff<3> hd =	Cross(adpolxy1[i]*adpolxy2[j],z); 
	      
	    for(l=0;l<3;l++)
	      shape(ii,l) = hd.DValue(l); 
      
	    } 
      }
    // shape2 -=shape; 
       /*      (*testout) << " curlshape code  " << endl << shape << endl ; 
    (*testout) << " curlshape num " << endl << shape2 << endl; 
    shape2 -= shape; 
    (*testout) << " curlshape diff " << endl << shape2 << endl; 
       */
       
      } 

//------------------------------------------------------------------------
 // HCurlHighOrderPyr 
 //------------------------------------------------------------------------
  /*
  template <class TRIG_EXT>
  HCurlHighOrderPyr<TRIG_EXT> :: HCurlHighOrderPyr (int aorder)
    : HCurlHighOrderFiniteElement<3>(ET_PYR)
  {
    nv = 5;
    ned = 8; 
    nf = 5; 

    int i;
    order_inner = aorder;
    for (i = 0; i < ned; i++)
      order_edge[i] = aorder;
    
    for (i=0; i<nf; i++) 
      order_face[i] = aorder; 

    ComputeNDof();
  }

  template <class TRIG_EXT>
  void HCurlHighOrderPyr<TRIG_EXT> :: ComputeNDof()
  {
    ndof_edge = 0; 
    ndof = 8; // Nedelec 
    // if oder_edge <= 1 --> Nedelec
    int i;

    for (i = 0; i < 8; i++)
      {
	ndof += order_edge[i];
	ndof_edge += order_edge[i]; 
      }
   
    // dof_face  
    for (i=0; i < 4; i++) // Trig_Faces 
      if (order_face[i] > 1)  
	ndof += order_face[i]*order_face[i]-1;
    // Quad_face 
    ndof += 2*(order_face[4]+1)*order_face[4];
    
    // ???? 
    // inner_dof = dof_inner_face + dof_inner_inner 
    ndof += (order_inner)*(order_inner*order_inner-1) +
      (order_inner+1)*order_inner*(order_inner-1)/2; 
    
    order = 0; // max(order_edges_tang,order_inner);  
    for (i = 0; i < 8; i++)
      {
	if (order_edge[i] > order)
	  order = order_edge[i];
      }
    for(i=0; i<5; i++) 
      { 
	if (order_face[i] > order) 
	  order = order_face[i]; 
      }
    
    if (order_inner > order) // order_edge_normal = order_inner; 
      order = order_inner;

    order++;
  }

  template <class TRIG_EXT>
  void HCurlHighOrderPyr<TRIG_EXT> :: CalcShape (const IntegrationPoint & ip, 
				       FlatMatrixFixWidth<3> shape) const
  {
    AutoDiff<3> x (ip(0), 0);
    AutoDiff<3> y (ip(1), 1);
    AutoDiff<3> z (ip(2), 2);

    int i, j, k,l, ii;
    
    AutoDiff<3> xt = x/(1-z); 
    AutoDiff<3> yt = x/(1-z); 
    AutoDiff<3> sigma[5] = {3-xt-yt-z,2+xt-yt-z, 1+xt+yt-z,2-xt+yt-z,z}; 
    
    AutoDiff<3> lami[5]; 
    lami[0] = (1-xt)*(1-yt)*(1-z); 
    lami[1] = xt*(1-yt)*(1-z); 
    lami[2] = xt*yt*(1-z); 
    lami[3] = (1-xt)*yt*(1-z);
    lami[4] = z; 
    
    shape = 0.0; 
       
    const EDGE * edges = ElementTopology::GetEdges (ET_PYR);
    const FACE * faces = ElementTopology::GetFaces (ET_PYR); 
    
    int is, ie, iop, p;

    ArrayMem<double,10> rec_pol(order+1);   
    ArrayMem<double,10> drec_pol(order+1);   

    
    ii = 0;
    // horizontal edges, Nedelec 0
    for (i = 0; i < 4; i++)
      {
	is = edges[i][0]; 
	ie = edges[i][1]; 
	if (vnums[is] > vnums[ie])
	  swap (is, ie);

	for(j=0; j<3; j++) 
	  shape(ii,j) = 0.5*(sigma[ie]-sigma[is].DValue(j))*(lami[ie] + lami[is]);   
	ii++;
      }
    
    // vertical edges: lami
    for (j = 4; j < 8; j++)
      {
	shape(ii,2) = lami[edges[j][0]];
	if (vnums[edges[j][0]] < vnums[edges[j][1]]) // according to low order space 
	  shape(ii,2) *= -1;
	ii++;
      }

    // high order horizontal edges:
    // \nabla (P_edge(x,y) * lamz)
    for (i = 0; i < 6; i++)
      {
	is = edges[i][0]; 
	ie = edges[i][1]; 
	if (vnums[is] > vnums[ie])
	  swap (is, ie);
	iop = 3 - is % 3 - ie % 3;
	
	MatrixFixWidth<2> DExt(order_edge[i]); 
	Vector<> Ext(order_edge[i]); 
	TRIG_EXT::Calc(order_edge[i]+1, lami[ie]-lami[is], lami[iop], Ext); 	
	TRIG_EXT::CalcDeriv(order_edge[i]+1, lami[ie]-lami[is], lami[iop], DExt); 	

	for(j = 0; j < order_edge[i]; j++) 
	  {
	    for(k=0; k<2; k++) 
	      shape(ii,k) = lamz[is] * ((dlami(ie,k) - dlami(is,k)) * DExt(j,0) + dlami(iop,k) * DExt(j,1));
	    shape(ii,2) = dlamz[is] * Ext(j);
	    ii++; 
	  } 
      }

    // \nabla (b_z * P^{p-1}(z) * lami(x,y))
    for (i = 6; i < 9; i++)
      {
	p = order_edge[i];

	is = edges[i][0]; 
	ie = edges[i][1]; 
	if (vnums[is] > vnums[ie])
	  swap (is, ie);	
	    
	double hz = lamz[ie];
	double dhz = dlamz[ie];

	LegendrePolynomialandDiff (p-1, 2*hz-1, rec_pol, drec_pol);
	
	for (j = 0; j < p; j++)
	  {
	    for (k = 0; k < 2; k++)
	      shape(ii,k) = hz*(1-hz) * rec_pol[j] * dlami(edges[i][0],k);
	    shape(ii,2) = ((1-2*hz)*rec_pol[j]+hz*(1-hz)*2*drec_pol[j]) * dhz * lami[edges[i][0]];
	    ii++;
	  }
      }

    ArrayMem<double,10> drec_polx(order-1),  drec_polt(order-1);
    ArrayMem<double,10> rec_pol2(order-1), drec_pol2x(order-1), drec_pol2t(order-1);  
    ArrayMem<double,30> mem(3*order*sizeof(double));
    ArrayMem<double,30> mem2(3*order*sizeof(double));
    FlatMatrixFixWidth<3> grad_recpol(order, &mem[0]);
    FlatMatrixFixWidth<3> grad_recpol2(order, &mem2[0]);

    for (i = 0; i < 2; i++)
      {
	p = order_face[i];
	if (p < 2) continue;

	int fav[3];
	for(j=0; j<3; j++) fav[j]=faces[i][j];

	//Sort vertices  first edge op minimal vertex 
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	  	

	double ls = lami[fav[0]]; 
	double le = lami[fav[1]]; 
	double lo = lami[fav[2]];  

	double lz = lamz[fav[0]];
	
	ScaledLegendrePolynomialandDiff(p-2, le-ls, 1-lo, 
					rec_pol, drec_polx, drec_polt);  	    
	ScaledLegendrePolynomialandDiff(p-2, 2*lo-1, 1, 
					rec_pol2, drec_pol2x, drec_pol2t);  	    
	
	// \nabla (ls * le * lz * rec_pol), and \nabla (lo rec_pol2)
	for (j = 0; j <= p-2; j++)
	  {
	    for (l = 0; l < 2; l++)
	      {
		grad_recpol(j,l) =
		  ls * le * lz * (drec_polx[j] * (dlami(fav[1],l)-dlami(fav[0],l)) + 
				  drec_polt[j] * (-dlami(fav[2],l))) +
		  (ls * dlami(fav[1],l) + le * dlami(fav[0],l) ) * lz * rec_pol[j];

		grad_recpol2(j,l) = 
		lo * (drec_pol2x[j] * 2*dlami(fav[2],l)) +
		  dlami(fav[2],l) * rec_pol2[j];
	      }
	    grad_recpol(j,2) = ls * le * rec_pol[j] * dlamz[fav[0]];
	    grad_recpol2(j,2) = 0;
	  }

	// gradients:
	for (j = 0; j <= p-2; j++)
	  for (k = 0; k <= p-2-j; k++, ii++)
	    for (l = 0; l < 3; l++)
	      shape(ii, l) = 
		ls*le*lz*rec_pol[j]*grad_recpol2(k,l) + 
		grad_recpol(j,l)*lo*rec_pol2[k];
	
	// other combination
	for (j = 0; j <= p-2; j++)
	  for (k = 0; k <= p-2-j; k++, ii++)
	    for (l = 0; l < 3; l++)
	      shape(ii, l) = 
		ls*le*lz*rec_pol[j]*grad_recpol2(k,l) - 
		grad_recpol(j,l) * lo * rec_pol2[k];
	
	// rec_pol * Nedelec0
	for (j = 0; j <= p-2; j++, ii++)
	  {
	    for (l = 0; l < 2; l++)
	      shape(ii,l) = lo*lz*rec_pol2[j] * (ls*dlami(fav[1],l)-le*dlami(fav[0],l));
	    shape(ii,2) = 0;
	  }
      }


    // quad faces
    for (i = 2; i < 5; i++)
      {
	p = order_face[i];
	int vimax = faces[i][0];
	for (j = 1; j < 4; j++)
	  if (vnums[faces[i][j]] > vnums[vimax])
	    vimax = faces[i][j];
	iop = 3 - faces[i][0]-faces[i][1]; 


	double locx = lami[vimax];
	double locy = lami[iop];
	double locz = lamz[vimax];

	MatrixFixWidth<2> DExt(p); 
	Vector<> Ext(p); 
	TRIG_EXT::Calc(p+1, -1+2*locx+locy, locy, Ext); 	
	TRIG_EXT::CalcDeriv(p+1, -1+2*locx+locy, locy, DExt); 	

	LegendrePolynomialandDiff (p-1, 2*locz-1, rec_pol, drec_pol);

	// b(z)*P^{p-1}(z) * Nedelec_0(x,y)
	for (l = 0; l < p; l++)
	  {
	    // N_0 = (1-y, x)
	    double locux = (1-locy) * rec_pol[l] * locz * (1-locz);
	    double locuy =   locx  * rec_pol[l] * locz * (1-locz);
	    shape(ii,0) = locux * dlami(vimax, 0) + locuy * dlami(iop, 0);
	    shape(ii,1) = locux * dlami(vimax, 1) + locuy * dlami(iop, 1);
	    ii++;
	  }
	for (l = 0; l < p; l++)
	  {
	    shape(ii++,2) = Ext[l] * dlamz[vimax];
	  }
	
	// nabla (b(z)*P^{p-1}(z) * Ext(x,y))
	for(j=0; j < p; j++)
	  for (l = 0; l < p; l++)
	    {
	      for(k=0; k<2; k++) 
		shape(ii,k) = locz*(1-locz) * rec_pol[l] * 
		  ((2*dlami(vimax,k)+dlami(iop,k)) * DExt(j,0) + dlami(iop,k) * DExt(j,1));
	      ii++;
	      shape(ii++,2) = ((1-2*locz)*rec_pol[l] + locz*(1-locz)*drec_pol[l]*2)*dlamz[vimax] * Ext(j);
	  } 
      }


    p = order_inner;

    if (p >= 2)
      {

	Matrix<> hc_trigshape ((p+1)*(p+2), 2);
	HCurlHighOrderTrig<TRIG_EXT> hc_trig (p);
	ArrayMem<int,3> trig_vnums(3);
	
	for (j = 0; j < 3; j++) trig_vnums[j] = vnums[j];
	hc_trig.SetVertexNumbers (trig_vnums);
	
	hc_trig.CalcShape (ip, hc_trigshape);
	
	LegendrePolynomial (p, 2*z-1, rec_pol);
	int first_inner_trig = 3*p+3;
	for (l = 0; l < p; l++)
	  for (j = 0; j < p*p-1; j++)
	    {
	      for (k = 0; k < 2; k++)
		shape(ii,k) = hc_trigshape(first_inner_trig+j,k) * (1-z)*z * rec_pol[l];
	      ii++;
	    }
	
	ArrayMem<double,10> rec_pol2((p-1)*p/2); 
	TrigShapesInnerJacobi::Calc (p+1, -1+2*x+y, y, rec_pol2);
	for (l = 0; l < p+1; l++)
	  for (j = 0; j < (p-1)*p/2; j++)
	    shape(ii++,2) = rec_pol[l]*rec_pol2[j];
      }
  }
  */

  //------------------------------------------------------------------------
  // HCurlHighOrderHex
  //------------------------------------------------------------------------
  template <class T_ORTHOPOL>
  HCurlHighOrderHex<T_ORTHOPOL> :: HCurlHighOrderHex (int aorder)
    : HCurlHighOrderFiniteElement<3>(ET_HEX)
  {
    int i; 

    order_inner = aorder;
    for (i = 0; i < 12; i++)
      order_edge[i] = aorder; 
    for (i=0; i<6; i++) 
      order_face[i] = aorder; 
    ComputeNDof();
  }
  
  template <class T_ORTHOPOL> 
  void HCurlHighOrderHex<T_ORTHOPOL> :: ComputeNDof()
  { 
    ndof = 12; // Nedelec 

    int i;
    for (i = 0; i < 12; i++)
      ndof += usegrad_edge[i]*order_edge[i];
    
    for(i=0; i< 6; i++)   
      ndof += ((usegrad_face[i]+1)*order_face[i] + 2)*order_face[i];
      
    ndof += ((usegrad_cell + 2)* order_inner + 3)*order_inner*order_inner;  
    
    
    order = 0; // max(order_edges_tang,order_faces,order_inner);  
    for (i = 0; i < 12; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    for (i = 0; i < 6; i++)
      if (order_face[i] > order)
	order = order_face[i];
    
    if (order_inner > order) // order_edge_normal = order_inner; 
      order = order_inner;
    
    order++; // integration order 
  }

  template <class T_ORTHOPOL> 
  void HCurlHighOrderHex<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
				       FlatMatrixFixWidth<3> shape) const
  {
    int i, j, k, l, p;
    AutoDiff<3> x (ip(0),0);
    AutoDiff<3> y (ip(1),1);
    AutoDiff<3> z (ip(2),2); 
    
    AutoDiff<3> lami[8]={(1-x)*(1-y)*(1-z),x*(1-y)*(1-z),x*y*(1-z),(1-x)*y*(1-z),
			 (1-x)*(1-y)*z,x*(1-y)*z,x*y*z,(1-x)*y*z}; 
    AutoDiff<3> sigma[8]={(1-x)+(1-y)+(1-z),x+(1-y)+(1-z),x+y+(1-z),(1-x)+y+(1-z),
			  (1-x)+(1-y)+z,x+(1-y)+z,x+y+z,(1-x)+y+z}; 
     
    const EDGE * edges = ElementTopology::GetEdges (ET_HEX);

    shape = 0.0;
    int ii = 12; 

    ArrayMem<AutoDiff<3>, 20> pol_xi(order+2),pol_eta(order+2),pol_zeta(order+2);
   
     // edges
    for (i = 0; i < 12; i++)
      {
	int es = edges[i][0];
	int ee = edges[i][1];
	p = order_edge[i]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);

	AutoDiff<3> xi = sigma[ee]-sigma[es]; 
	AutoDiff<3> lam_e = lami[ee]+lami[es]; 
	
	// Nedelec0-shapes
	for(l=0; l<3; l++) 
	  shape(i,l) = 0.5*xi.DValue(l)*lam_e.Value(); 
	  	
	if(usegrad_edge[i])
	  {
	    // High Order edges ... Gradient fields 
	    T_ORTHOPOL::Calc (p+1, xi, pol_xi);
	    
	    for (j = 0; j < p; j++, ii++)
	      for(l=0; l<3; l++)
		shape(ii,l) = lam_e.DValue(l)*pol_xi[j].Value() 
		  + lam_e.Value()*pol_xi[j].DValue(l); 
	  }
      }
    
    const FACE * faces = ElementTopology::GetFaces (ET_HEX);
    //Faces 
    for (i = 0; i<6; i++)
      {
	p = order_face[i];
	AutoDiff<3> lam_f = 0;
	for (j = 0; j < 4; j++)
	  lam_f += lami[faces[i][j]];
	
	int qmax = 0;
	for (j = 1; j < 4; j++)
	  if (vnums[faces[i][j]] > vnums[faces[i][qmax]])
	    qmax = j;
	
	int q1 = (qmax+3)%4; 
	int q2 = (qmax+1)%4; 

	if(vnums[faces[i][q2]] > vnums[faces[i][q1]])
	  swap(q1,q2);  // fmax > f1 > f2

	// horiz = 1 if sigma_{fmax}-sigma_{f1} is horizontal coordinate 
	// for anisotropic meshes (vertical(esp. z) is the longer one) 
	double horiz=1.; 
	horiz = 1; 
	if( (qmax & 2) == (q2 & 2)) 
	  horiz = -1.; 
		
	int fmax = faces[i][qmax]; 
	int f1 = faces[i][q1]; 
	int f2 = faces[i][q2]; 
	      
	AutoDiff<3> xi = sigma[fmax]-sigma[f1]; 
	AutoDiff<3> eta = sigma[fmax]-sigma[f2]; 
    
	T_ORTHOPOL::Calc(p+1, xi,pol_xi);
	T_ORTHOPOL::Calc(p+1,eta,pol_eta);
	
	//Gradient fields 
	if(usegrad_face[i])
	  for (k = 0; k < p; k++)
	    for (j= 0; j < p; j++, ii++)
	      for(l=0; l<3; l++)
		shape(ii,l) = lam_f.Value()*(pol_xi[k].DValue(l)*pol_eta[j].Value() + 
					     pol_xi[k].Value()*pol_eta[j].DValue(l))
		  + lam_f.DValue(l)*pol_xi[k].Value()*pol_eta[j].Value();
	
	//Rotation of Gradient fields 
	for (k = 0; k < p; k++)
	  for (j= 0; j < p; j++, ii++)
	    for(l=0; l<3; l++)
	      shape(ii,l) =  
		lam_f.Value()*(pol_xi[k].DValue(l)*pol_eta[j].Value()
			       -pol_xi[k].Value()*pol_eta[j].DValue(l)) 
		+ horiz*lam_f.DValue(l)*pol_xi[k].Value()*pol_eta[j].Value();

	//Missing ones 
	for (j= 0; j < p; j++, ii+=2)
	  for(l=0; l<3; l++)
	    {
	      shape(ii,l) = 0.5* xi.DValue(l)*pol_eta[j].Value()*lam_f.Value();
	      shape(ii+1,l) = 0.5* eta.DValue(l)*pol_xi[j].Value()*lam_f.Value(); 
	    }
      }
    

    
    // Element-based shapes
    p = order_inner; 
       
    T_ORTHOPOL::Calc(p+1,2*x-1,pol_xi);
    T_ORTHOPOL::Calc(p+1,2*y-1,pol_eta);
    T_ORTHOPOL::Calc(p+1,2*z-1,pol_zeta); 
    
    //Gradient fields
    if(usegrad_cell)
      for (i=0; i<p; i++)
	for(j=0; j<p; j++) 
	  for(k=0; k<p; k++,ii++)
	    for (l=0; l<3; l++)
	      shape(ii,l) = 
		pol_xi[i].DValue(l)*pol_eta[j].Value()*pol_zeta[k].Value() 
		+ pol_xi[i].Value()*pol_eta[j].DValue(l)*pol_zeta[k].Value()
		+ pol_xi[i].Value()*pol_eta[j].Value()*pol_zeta[k].DValue(l); 
    
    //Rotations of gradient fields
    for (i=0; i<p; i++)
      for(j=0; j<p; j++) 
	for(k=0; k<p; k++,ii+=2)
	  for (l=0; l<3; l++)
	    {
	      shape(ii,l) = pol_xi[i].DValue(l)*pol_eta[j].Value()*pol_zeta[k].Value() 
		- pol_xi[i].Value()*pol_eta[j].DValue(l)*pol_zeta[k].Value()
		+ pol_xi[i].Value()*pol_eta[j].Value()*pol_zeta[k].DValue(l); 
	      shape(ii+1,l) = pol_xi[i].DValue(l)*pol_eta[j].Value()*pol_zeta[k].Value() 
		+ pol_xi[i].Value()*pol_eta[j].DValue(l)*pol_zeta[k].Value()
		- pol_xi[i].Value()*pol_eta[j].Value()*pol_zeta[k].DValue(l); 
	    } 
    
    //Missing ones 
    for(j=0; j<p; j++) 
	for(k=0; k<p; k++,ii+=3)
	  {
	    shape(ii,0)   = pol_eta[j].Value()*pol_zeta[k].Value();  
	    shape(ii+1,1) = pol_xi[j].Value()*pol_zeta[k].Value(); 
	    shape(ii+2,2) = pol_xi[j].Value()*pol_eta[k].Value(); 
	  }
    
    return;
    
  }


 template <class T_ORTHOPOL> 
  void HCurlHighOrderHex<T_ORTHOPOL> :: CalcCurlShape (const IntegrationPoint & ip, 
				       FlatMatrixFixWidth<3> shape) const
  {
    /*    MatrixFixWidth<3> shape2(shape.Height()); 
    HCurlHighOrderFiniteElement<3> :: CalcCurlShape(ip,shape2); 
    */
    int i, j, k, l, p;
    AutoDiff<3> x (ip(0),0);
    AutoDiff<3> y (ip(1),1);
    AutoDiff<3> z (ip(2),2); 
    
    AutoDiff<3> lami[8]={(1-x)*(1-y)*(1-z),x*(1-y)*(1-z),x*y*(1-z),(1-x)*y*(1-z),
			 (1-x)*(1-y)*z,x*(1-y)*z,x*y*z,(1-x)*y*z}; 
    AutoDiff<3> sigma[8]={(1-x)+(1-y)+(1-z),x+(1-y)+(1-z),x+y+(1-z),(1-x)+y+(1-z),
			  (1-x)+(1-y)+z,x+(1-y)+z,x+y+z,(1-x)+y+z}; 
     
    const EDGE * edges = ElementTopology::GetEdges (ET_HEX);

    shape = 0.0;
     int c[3][2] = {{1,2},{2,0},{0,1}}; 

    int ii = 12; 

    ArrayMem<AutoDiff<3>, 20> pol_xi(order+2),pol_eta(order+2),pol_zeta(order+2);
    ArrayMem<AutoDiff<3>, 20> pol_lfxi(order+2),pol_lfeta(order+2);
   
     // edges
    for (i = 0; i < 12; i++)
      {
	int es = edges[i][0];
	int ee = edges[i][1];
	p = order_edge[i]; 
	if (vnums[es] > vnums[ee]) swap (es, ee);

	AutoDiff<3> xi = sigma[ee]-sigma[es]; 
	AutoDiff<3> lam_e = lami[ee]+lami[es]; 
	
	// Nedelec0-shapes
	for(l=0; l<3; l++) 
	  shape(i,l) = 0.5*(lam_e.DValue(c[l][0])*xi.DValue(c[l][1]) - 
			    lam_e.DValue(c[l][1])*xi.DValue(c[l][0]));
	    
	  	
	if(usegrad_edge[i])
	    ii+=p; 
      }
    
    const FACE * faces = ElementTopology::GetFaces (ET_HEX);
    //Faces 
    for (i = 0; i<6; i++)
      {
	p = order_face[i];
	AutoDiff<3> lam_f = 0;
;	for (j = 0; j < 4; j++)
	  lam_f += lami[faces[i][j]];
	
	int qmax = 0;
	for (j = 1; j < 4; j++)
	  if (vnums[faces[i][j]] > vnums[faces[i][qmax]])
	    qmax = j;
	
	int q1 = (qmax+3)%4; 
	int q2 = (qmax+1)%4; 

	if(vnums[faces[i][q2]] > vnums[faces[i][q1]])
	  swap(q1,q2);  // fmax > f1 > f2

	// horiz = 1 if sigma_{fmax}-sigma_{f1} is horizontal coordinate 
	// for anisotropic meshes (vertical(esp. z) is the longer one) 
	double horiz=1.; 
	horiz = 1; 
	if( (qmax & 2) == (q2 & 2)) 
	  horiz = -1.; 
		
	int fmax = faces[i][qmax]; 
	int f1 = faces[i][q1]; 
	int f2 = faces[i][q2]; 
	      
	AutoDiff<3> xi = sigma[fmax]-sigma[f1]; 
	AutoDiff<3> eta = sigma[fmax]-sigma[f2]; 
    
	T_ORTHOPOL::Calc(p+1, xi,pol_xi);
	T_ORTHOPOL::Calc(p+1,eta,pol_eta);
	
	//Gradient fields 
	if(usegrad_face[i])
	  ii+=p*p; 
	
	//Rotation of Gradient fields 

	for (j = 0; j < p; j++)
	  pol_lfxi[j] = lam_f*pol_xi[j];
	for (j = 0; j < p; j++)
	  pol_lfeta[j] = lam_f*pol_eta[j];
	    
	if (horiz == 1)
	  for (k = 0; k < p; k++)
	    for (j= 0; j < p; j++, ii++)
	      {
		AutoDiff<3> hd = 2.0 * Cross (pol_eta[j], pol_lfxi[k]);
		for(l=0; l<3; l++) shape(ii,l) = hd.DValue(l);
	      }
	else
	  for (k = 0; k < p; k++)
	    for (j= 0; j < p; j++, ii++)
	      {
		AutoDiff<3> hd = 2.0 * Cross (pol_lfeta[j], pol_xi[k]);
		for(l=0; l<3; l++) shape(ii,l) = hd.DValue(l);
	      }

	//Missing ones 
	for (j= 0; j < p; j++, ii++)
	  {
	    AutoDiff<3> hd = 0.5 * Cross (pol_lfeta[j], xi);
	    for(l=0; l<3; l++)
	      shape(ii,l) = hd.DValue(l);
	    hd = 0.5 * Cross(pol_lfxi[j],eta);
	    ii++; 
	    for(l=0; l<3; l++)
	      shape(ii,l) = hd.DValue(l);  
	  }
      }
        
    // Element-based shapes
    p = order_inner; 
       
    T_ORTHOPOL::Calc(p+1,2*x-1,pol_xi);
    T_ORTHOPOL::Calc(p+1,2*y-1,pol_eta);
    T_ORTHOPOL::Calc(p+1,2*z-1,pol_zeta); 
    
    //Gradient fields
    if(usegrad_cell)
      ii+=p*p*p; 
    
    //Rotations of gradient fields
    for (i=0; i<p; i++)
      for(j=0; j<p; j++) 
	for(k=0; k<p; k++,ii++)
	  
	    {
	      AutoDiff<3> uw = pol_xi[i]*pol_zeta[k]; 
	      AutoDiff<3> hd = 2* Cross(pol_eta[j],uw); 
	     
	      for (l=0; l<3; l++) shape(ii,l)=hd.DValue(l); 

	      AutoDiff<3> uv = pol_xi[i]*pol_eta[j]; 
	      hd = 2* Cross(pol_zeta[k],uv); 
	      ii++;
	      for (l=0; l<3; l++) shape(ii,l)=hd.DValue(l); 
	      
	    } 
    
    //Missing ones 
    for(j=0; j<p; j++) 
	for(k=0; k<p; k++,ii++)
	  {
	    AutoDiff<3> vw = pol_eta[j]*pol_zeta[k]; 
	    AutoDiff<3> hd = Cross(vw,x); 
	    for (l=0; l<3; l++) shape(ii,l)=hd.DValue(l); 
	    ii++; 
	    AutoDiff<3> uw = pol_xi[j]*pol_zeta[k]; 
	    hd = Cross(uw,y); 
	    for (l=0; l<3; l++) shape(ii,l)=hd.DValue(l); 
	    ii++; 
	    AutoDiff<3> uv = pol_xi[j]*pol_eta[k]; 
	    hd = Cross(uv,z); 
	    for (l=0; l<3; l++) shape(ii,l)=hd.DValue(l); 
	  }
    /*
      (*testout) << " curlshape code  " << endl << shape << endl ; 
    (*testout) << " curlshape num " << endl << shape2 << endl; 
    shape2 -= shape; 
    (*testout) << " curlshape diff " << endl << shape2 << endl; 
    */
    return;
    
  }




  template class  HCurlHighOrderFiniteElement<1>;
  template class  HCurlHighOrderFiniteElement<2>;
  template class  HCurlHighOrderFiniteElement<3>; 
  //template class  HCurlHighOrderTrig<TrigExtensionMonomial>;
  //template class  HCurlHighOrderTrig<TrigExtensionOptimal>; 
  //template class  HCurlHighOrderTrig<TrigExtensionMin>;
  template class  HCurlHighOrderSegm<IntegratedLegendreMonomialExt>;
  template class  HCurlHighOrderTrig<IntegratedLegendreMonomialExt>;
  template class  HCurlHighOrderTet<IntegratedLegendreMonomialExt>; 
  //template class  HCurlHighOrderTet<TrigExtensionMonomial>;
  //template class  HCurlHighOrderTet<TrigExtensionOptimal>; 
  //template class  HCurlHighOrderTet<TrigExtensionMin>; 
  //template class  HCurlHighOrderPrism<TrigExtensionMonomial>;
  //template class  HCurlHighOrderPrism<TrigExtensionOptimal>; 
  //template class  HCurlHighOrderPrism<TrigExtensionMin>; 
  template class  HCurlHighOrderPrism<IntegratedLegendreMonomialExt>; 
  template class  HCurlHighOrderQuad<IntegratedLegendreMonomialExt>; 
  template class  HCurlHighOrderHex<IntegratedLegendreMonomialExt>; 
} 
