/* {{{1 GNU General Public License

Program Tops - a stack-based computing environment
Copyright (C) 1999-2014  Dale R. Williamson

Author: Dale R. Williamson <dale.williamson@prodigy.net>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1}}} */

/* wapp.c  December 1999

   Copyright (c) 1999-2014   D. R. Williamson

Words for applications and projects.

These are functions for wapp.p, the file included by nativewords() when
the catalog is built at start up.

Prototypes for these functions are in header file wapp.h. 

See file doc/design.doc, "Adding new native functions," for a descrip-
tion of the procedure for adding compiled words to the program. */

/*--------------------------------------------------------------------*/

/* Fri Jul  6 08:22:16 PDT 2012.  The following has been placed in file
   boot.v (and formatting done for width) to show with the -h switch
   output. */

/* Thu Jul  5 04:54:55 PDT 2012.  How to write and test a new word in C.

   Written for mmaxN() (file wapp.c) but general enough to show here.

   int mmaxN() // mmaxN (hA n N --- hB) \ moving maximums, N highest //

   As in most functions of this program, arrays in mmaxN() such as 
   output matrix B and internal working matrices ride on the stack.

   This makes it relatively easy to write, test and debug new words in
   C, as described below for the process used to develop word mmaxN().

   Place "return 1;" at any line in the word's C code, recompile and 
   run the word interactively at the ready prompt.  When the word re-
   turns, inspect the elements on the stack as they are at the return 1
   point.

   Each array is simply an item on the stack and is available to be
   operated upon at the ready prompt by any word.  From the ready 
   prompt, run eview to look at numbers in large arrays with a text 
   program like vi or nedit.

   A few lines of debug output using gprintf() can be added to show in-
   dices as a loop runs, or other variables before return.  Their out-
   put flows to the interactive window of the ready prompt where you 
   are working, and can be scrolled back through after the word returns.

   So the interactive window when running the program interactively 
   provides two things: arrays as items on the stack the way they are
   at return 1, and a flow of debug output that can be scrolled back
   through. 

   Expression return 1; placed in the code is like a breakpoint in a
   debugger, but debuggers cannot get at the arrays and their numbers 
   (perhaps thousands of them) the way you can when the array is right 
   there as an item on the stack.

   As progress is made and new code is added, move "return 1;" and 
   "gprintf()" statements further down.  Finally "return 1;" is at the
   bottom, and the word's own return expression can be used instead.

   When the word finally runs on its own, there is no need to write 
   programs to run test cases since words receive input from items you
   can set up on the stack.  

   Just continue testing as above and run the word through a series of
   test cases and view returned results on the stack.  See this word,
   mmaxN() (C file wapp.c), for the test cases run (and which can be 
   run right now by pasting them at the interactive ready prompt).

   While somewhat tedious (but all programming is), this procedure is
   much more direct and far easier than writing and running a separate
   test program (how do you easily view those large arrays?) just to
   make another word like this one. */

/*--------------------------------------------------------------------*/

#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>

#include "main.h"
#include "stk.h"

#include "ctrl.h"
#include "exe.h"
#include "inpo.h"
#include "mat.h"
#include "math1.h"
#include "mem.h"
#include "mmath.h"
#include "sys.h"
#include "tex.h"
#include "tag.h"
#include "wapp.h"

/*--------------------------------------------------------------------*/

/* Example of new word compilation in file word.h */

int alpha() /* prototype is in wapp.h, pattern "alpha" is in wapp.p */
{
   gprintf(" This is new function alpha\n");
   return 1;
}

int funtest() /* funtest (... --- ...) */
/* A word to test functions.  Put things here to try, then load up
   the stack and say funtest at the ready prompt. */
{
   gprintf(" This is where to put some code to run and test"); nc();
   return 1;
}

#ifdef FUNTEST

/*--------------------------------------------------------------------*/

/* Archive of previous funtest code. */

/* 7. Sat Apr 20 19:40:01 PDT 2013.  Testing function insrt().

   Test cases for insrt().

   Paste at the ready prompt each of the cases below to test function
   insrt() under different conditions.

   Test case 1: insert before first (paste at the ready prompt of the
   recompiled program the two lines shown below that follow the ready
   prompt):
      [tops@kaffia] ready > list: 5 4 3 2 1 ; \
                            list: 10 20 30 40 50 ; -1 35 funtest
      funtest: Test of function insrt()
      funtest: Cnew -1.000000 tnew 35.000000
       Row 1:       -1       35 <<<
       Row 2:        1       10
       Row 3:        2       20
       Row 4:        3       30
       Row 5:        4       40
       Row 6:        5       50
       Row 7:        0        0
      [tops@kaffia] ready > 

   Test case 2: insert at first:
      [tops@kaffia] ready > list: 5 4 3 2 1 ; \
                            list: 10 20 30 40 50 ; 1 35 funtest
      funtest: Test of function insrt()
      funtest: Cnew 1.000000 tnew 35.000000
       Row 1:        1       35 <<<
       Row 2:        1       10
       Row 3:        2       20
       Row 4:        3       30
       Row 5:        4       40
       Row 6:        5       50
       Row 7:        0        0
      [tops@kaffia] ready > 

   Test case 3: insert into body:
      [tops@kaffia] ready > list: 5 4 3 2 1 ; \
                            list: 10 20 30 40 50 ; 3.5 35 funtest
      funtest: Test of function insrt()
      funtest: Cnew 3.500000 tnew 35.000000
       Row 1:        1       10
       Row 2:        2       20
       Row 3:        3       30
       Row 4:      3.5       35 <<<
       Row 5:        4       40
       Row 6:        5       50
       Row 7:        0        0
      [tops@kaffia] ready > 

   Test case 4: insert at last:
      [tops@kaffia] ready > list: 5 4 3 2 1 ; \
                            list: 10 20 30 40 50 ; 5 35 funtest
      funtest: Test of function insrt()
      funtest: Cnew 5.000000 tnew 35.000000
       Row 1:        1       10
       Row 2:        2       20
       Row 3:        3       30
       Row 4:        4       40
       Row 5:        5       50
       Row 6:        5       35 <<<
       Row 7:        0        0
      [tops@kaffia] ready > 

   Test case 5: insert beyond last:
      [tops@kaffia] ready > list: 5 4 3 2 1 ; \
                            list: 10 20 30 40 50 ; 6 35 funtest
      funtest: Test of function insrt()
      funtest: Cnew 6.000000 tnew 35.000000
       Row 1:        1       10
       Row 2:        2       20
       Row 3:        3       30
       Row 4:        4       40
       Row 5:        5       50
       Row 6:        6       35 <<<
       Row 7:        0        0
      [tops@kaffia] ready > 

   These cases cover the range of inserts into a list, and are correct
   by inspection. */
{
   register double *C,*t,*W1,*W2;
   int rnew,rows;

   double Cnew,tnew;
   popd(&tnew);
   popd(&Cnew);

   C=(tos-1)->mat;
   rows=tos->row;
   t=tos->mat;

   qsort1(C,rows,1);

   if(!matstk(rows+2,1,"_W1")) return 0;
   W1=tos->mat;
   memset(W1,0,(rows+2)*sizeof(double));
   memcpy(W1,C,rows*sizeof(double));

   if(!matstk(rows+2,1,"_W2")) return 0;
   W2=tos->mat;
   memset(W2,0,(rows+2)*sizeof(double));
   memcpy(W2,t,rows*sizeof(double));

   gprintf("funtest: Test of function insrt()"); nc();
   gprintf("funtest: Cnew %f tnew %f",Cnew,tnew); nc();

   insrt(W1,rows,&rnew,Cnew,W2,tnew);
   pushstr("park lop lop .m"); xmain(0);

   return 1;
}

/* 6. Test function rmnull().
   int len,len1;
   char S[100];
   len1=tos->col;
   memcpy(S,tos->tex,len1);
   drop();
   len=rmnull(S,len1);
   strstk(len, "rm");
   memcpy(tos->tex,S,len);
   return 1;
*/

int funtest1() /* funtest (... --- ...) */
/* A word to test functions.  Put things here to try, then load up
   the stack and say funtest at the ready prompt. */
{
/* 5. Demonstrate reading a 4-byte int table from the library of high 
   level word USET.

   To run this demo, these lines must be sourced first.  Word export4
   turns matrix A into volume T.  Each *row* of T contains a *column*
   of A, but the values of A have been converted to 4-byte integers.
   Thus the bytes in T is equal to 4 times the rows in A.

      "USET" library
      1 10 items (hA) endian export4 (hT) "USET" "G" bank
      3 3  items (hA) endian export4 (hT) "USET" "S" bank
      7 2  items (hA) endian export4 (hT) "USET" "M" bank

   Running this shows the USET library:
      USET wholib

   Running these will execute this demo.  Function extract1(), which
   is the low-level equivalent of yank, gets the desired set from the 
   library of USET.
   
      "M" funtest
      "S" funtest
      "G" funtest
      "X" funtest \ causes error
*/
   char *set_name;
   int k=0,nrows,*rows=NULL;

   set_name=tos->tex;            /* quoted set name incoming on stack */
   extract1("USET",set_name);    /* putting VOL on stack from USET lib*/
   if(tos->typ != VOL) return 0; /* if error, won't have VOL on stk */

   rows=(int *)tos->tex;       /* set int pointer to VOL array */
   nrows=tos->col/sizeof(int); /* bytes of VOL = nrows*sizeof(int) */

/* Note that we can drop T from stack.  It will not be deallocated
   because T is stored in the catalog (in the library of USET). */
   drop(); /* T from stack */ 

/* This shows that the 4 byte integer array from USET lib is now in 
   array rows: */
   gprintf(" Read %d ints for set %s:\n",nrows,set_name);
   for(;k<nrows;k++) gprintf("    row %d\n",*(rows+k));

   drop(); /* drop qSet from stack */
   return 1;
}
/* 4. Looking at some functions in sys/dir.h.

   DIR *dp;
   dp=opendir(tos->tex);
   gprintf(" dp: %d",dp); nc();
   if(dp) closedir(dp);
   return 1;
*/
/* 3. Looking for memory leaks using memprobe().

   fprintf(stderr,"f: ");
   memprobe();
   memprobe();
   pushq2("TEST_STRING",11);
   memprobe();
   drop();
   memprobe();
   fprintf(stderr,":f\n");
   return 1;
*/
/*  2. Verifying action of system function ntohs().

   From /usr/include/asm/byteorder.h:
      extern __inline__ unsigned short int
      __ntohs(unsigned short int x)

   Use int and .hex to display result.  Shows that ntohs() swaps 
   the 2 bytes of a 2-byte number:

      [tops@gutter] ready # 65534 int .hex 65534 funtest int nl .hex
       FE FF 00 00 00 00 00 00
       FF FE 00 00 00 00 00 00
      [tops@gutter] ready # 

   int m,n;
   if(!popint(&n)) return 0;
   m=ntohs(n);
   return(pushint(m)); 
*/
/* 1. Testing pointer to function hAlarm().

   void (*hAlarm)(int);

   hAlarm=(void (*)(int)) handleAlarm1;
   hAlarm(SIGALRM);

   hAlarm=(void (*)()) handleAlarm2;
   hAlarm(999);
   return 1;
*/ 
/*--------------------------------------------------------------------*/
#endif 

/* Functions to demonstrate overloading */

int starA() /* *A (hA hB --- hC) */
{
   gprintf(" running *A");
   return(star());
}

int starB() /* *B (hA hB --- hC) */
{
   gprintf(" running *B");
   return(star());
}

int starC() /* *C (hA hB --- hC) */
{
   gprintf(" running *C");
   return(star());
}
   
/*--------------------------------------------------------------------*/

/* Functions for dynamic analysis */

int centroid() /* centroid (hW hA --- hC) */
/* For weights W (such as moment arms) applied to rows of A, compute 
   centroid of each column of A. */
{
   int cols,j=0,k,rows;
   double *A,*C,S,T,*W,*W0;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" centroid: ",MATNOT2);
      return 0;
   }
   if(tos->row!=(tos-1)->row) {
      stkerr(" centroid: ",MATSNOTC);
      return 0;
   }
   A=tos->mat;
   W0=(tos-1)->mat;
   
   rows=tos->row;
   cols=tos->col;
   if(!matstk(cols,1,"_centroid")) return 0;
   C=tos->mat;

   for(;j<cols;j++) {
      S=0;
      T=0;
      W=W0;
      for(k=0;k<rows;k++) {
         S+=*A;
         T+=*A*(*W);
         A++;
         W++;
      }
      if(S) *C=T/S;
      else *C=0;
      C++;
   }
   return(lop() && lop());
}

int cgm() /* cgm (hf hA --- hx) */
/* Solve A*x=f for unknown x using the conjugate gradient method. */
{
   register double *A,*f,*x;
   double *r,*s;
   double a,c,*dnull,eps,eps2=1e-8,rm1;
   register int i,k=0;
   int cols,iterating,j=0,kmax,numiter=0,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" cgm: ",MATNOT2);
      return 0;
   }
   if((rows=tos->row)!=(tos-1)->row) {
      stkerr(" cgm: ",ROWSNOT);
      return 0;
   }
   if(rows!=tos->col) {
      stkerr(" cgm: ",SQUNOT);
      return 0;
   }
   A=tos->mat;
   f=(tos-1)->mat;
   cols=(tos-1)->col;

   if(!matstk(rows,cols,"_x")) return 0;
   x=tos->mat;
   memset(x,0,rows*cols*sizeof(double));

   if((r=(double *)memget(rows,1))==NULL ||
      (s=(double *)memget(rows,1))==NULL ||
      (dnull=(double *)memget0(rows,1))==NULL) {
      stkerr(" cgm: ",MEMNOT);
      return 0;
   }
/* For sym A, should converge in 2*rows; here, give it 5*rows: */
   kmax=5*rows; /* max iterations */

   while(j<cols && k<kmax) {

      dot1(f,f,rows,&a);
      eps=a*eps2;

      for(i=0;i<rows;i++) *(r+i)=-*(f+i);
      memcpy(s,f,rows*sizeof(double));

      k=0;
      iterating=1;
      while(iterating && k<kmax) {
         numiter++;
         dot1(r,r,rows,&rm1);

         mpyad1(dnull,A,s,rows,rows,1,r);
         dot1(s,r,rows,&a);
         a=rm1/a;

         for(i=0;i<rows;i++) *(x+i)+=*(s+i)*a;

         mpyad1(dnull,A,x,rows,rows,1,r);
         for(i=0;i<rows;i++) *(r+i)-=*(f+i);

         dot1(r,r,rows,&c);
         if(c<eps) iterating=0;
         else {
            c=c/rm1;
            for(i=0;i<rows;i++) *(s+i)=*(s+i)*c - *(r+i);
         }
         k++;
      }
      if(TRACE) {
         gprintf(" cgm: column %d %d iterations",j,numiter); nc();
         numiter=0;
      }
      f+=rows;
      x+=rows;
      j++;
   }
   mallfree((void *)&r);
   mallfree((void *)&s);
   mallfree((void *)&dnull);

   if(k>=kmax) {
      stkerr(" cgm: ","convergence failure");
      drop();
      drop2();
      pushint(0);
      pushint(0);
      null();
      return 0;
   }
   return(lop() && lop());
}

int cmagphase() /* cmagphase (Ar Ai --- hMag hPhase) */
/* Complex matrices into magnitude-phase form.  Phase angle P has units
   of radians. */
{
   register double *Ai,*Ar,*M,*P;
   register int k=0;
   int cols,num=0,rows;
   
   if(is_complex(tos)) {
      cmplxdbl();
   }
   if(stkdepth()>1) {
      if(tos->typ==NUM && (tos-1)->typ==NUM) num=1;
      hand(); swap(); hand(); swap();
   }
   else {
      stkerr(" cmagphase: ",STKNOT);
      return 0;
   }
   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" cmagphase: ",STKNOT);
      return 0;
   }
   Ai=tos->mat;
   Ar=(tos-1)->mat;
   if((rows=tos->row)!=(tos-1)->row || (cols=tos->col)!=(tos-1)->col) {
      stkerr(" cmagphase: ",MATSNOTC);
      return 0;
   }
   if(!matstk(rows,cols,"_mag")) return 0;
   M=tos->mat;
   if(!matstk(rows,cols,"_phase")) return 0;
   P=tos->mat;

   for(;k<rows*cols;k++) {
      *M=sqrt(*Ar*(*Ar) + *Ai*(*Ai));
      *P=atan2(*Ai,*Ar);
      Ai++;
      Ar++;
      M++;
      P++;
   }
   if(num) {
      swap();
      pushd(*tos->mat);
      lop();
      swap();
      pushd(*tos->mat);
      lop();
   }
   return(
      rot() &&
      drop() &&
      rot() &&
      drop()
   );
}

int cmagphase1() /* -cmagphase (R P --- Ar Ai) */
/* Undo complex magnitude-phase, convert back to real-imaginary form
   Incoming P contains phase angles in radians.

   Equivalent to this high level word:
   inline: -cmagphase (R P --- Ar Ai) \ magnitude-phase into real-imag
      this cos other *by "_Ar" naming, rev sin *by "_Ai" naming
   end
*/
{
   register double *Ai,*Ar,*P,*R;
   register int k=0;
   int cols,num=0,rows;
   
   if(stkdepth()>1) {
      if(tos->typ==NUM && (tos-1)->typ==NUM) num=1;
      hand(); swap(); hand(); swap();
   }
   else {
      stkerr(" -cmagphase: ",STKNOT);
      return 0;
   }
   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" -cmagphase: ",STKNOT);
      return 0;
   }
   P=tos->mat;
   R=(tos-1)->mat;
   if((rows=tos->row)!=(tos-1)->row || (cols=tos->col)!=(tos-1)->col) {
      stkerr(" -cmagphase: ",MATSNOTC);
      return 0;
   }
   if(!matstk(rows,cols,"_Ar")) return 0;
   Ar=tos->mat;
   if(!matstk(rows,cols,"_Ai")) return 0;
   Ai=tos->mat;

   for(;k<rows*cols;k++) {
      *Ar=*R*cos(*P);
      *Ai=*R*sin(*P);
      R++;
      P++;
      Ar++;
      Ai++;
   }
   if(num) {
      swap();
      pushd(*tos->mat);
      lop();
      swap();
      pushd(*tos->mat);
      lop();
   }
   return(
      rot() &&
      drop() &&
      rot() &&
      drop()
   );
}

int detrend() /* detrend (hA hB0 hB1 --- hA1) */
/* Removing linear trend B from each column of N uniformly spaced
   points in A:
      A1(k,j) = A(k,j) - B0(j) - k*B1(j) 
   where k=1,N.

   Word trend provides the matrices B0 and B1. */
{
   double *A,*A1,*B0,*B1;
   int cols,j=0,k,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" detrend: ",MATNOT);
      return 0;
   }
   A=(tos-2)->mat;
   rows=(tos-2)->row;
   cols=(tos-2)->col;

   B0=(tos-1)->mat;
   B1=tos->mat;

   if(!matstk(rows,cols,"_A1")) return 0;
   A1=tos->mat;

   for(;j<cols;j++) {
      for(k=1;k<=rows;k++) {
         *A1=*A - (*B0) - k*(*B1);
         A++;
         A1++;
      }
      B0++;
      B1++;
   }
   return(lpush() && drop2() && drop() && lpull());
}

int logarea() /* logarea (hP hF --- hA) */
/* Logarithmic trapezoidal integration.  Returned A contains the 
   incremental areas between elements of P and F, discrete points
   of function P(F).  The total area is the sum of all elements
   in A. 

   Assumes F is in ascending order. */
{
   int cols,i,j=0,rows;
   double *A,*F,*P;
   double G;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" logarea: ",MATSNOT);
      return 0;
   }
   P=(tos-1)->mat;

   cols=(tos-1)->col;
   rows=(tos-1)->row;

   if(rows!=tos->row) {
      stkerr(" logarea: ",MATSNOTC);
      return 0;
   }
   if(!matstk(rows,cols,"_A")) return 0;
   A=tos->mat;

   for(;j<cols;j++) {
      *A=0;
      F=(tos-1)->mat;
      for(i=1;i<rows;i++) {
         A++;
         P++;
         F++;
         G=*P*(*F) - *(P-1)*(*(F-1));

         if(G) {
            if(*F>*(F-1))  
               *A=G/( log10(*P/(*(P-1))) / log10((*F)/(*(F-1))) + 1. );
             
            else *A=0;
         }
         else {
            if(*F>*(F-1)) *A=log(*(F)/(*(F-1)))*(*P)*(*F);
            else *A=0;
         }
      }
      P++;
      A++;
   }
   return(lop() && lop());
}

int moveave() /* ma (hA n --- hB) */
/* Simple moving average; B(i,j) is average of A(i-n+1,j) to A(i,j). */
{
   register int i,j=0;
   register double *A,*B,*B1,sum;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" ma: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" ma: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   rows=tos->row;
   n=MIN(n,rows);

   cols=tos->col;
   if(!matstk(rows,cols,"_ma")) return 0;

   B=tos->mat;
   A=(tos-1)->mat;

   for(;j<cols;j++) {
      sum=0;
      for(i=0;i<n;i++) {
         *B=*A;
         sum+=*B;
         A++;
         B++;
      }
      if(n>1) {
         B1=B-1;
         if(n<rows) *B1=sum/n;
         for(;i<rows;i++) {
            *B=*B1+(*A-*(A-n))/n;
            A++;
            B++;
            B1++;
         }
      }
   }
   return(lop());
}

int movetot() /* mtot (hA n --- hB) */
/* Moving total; B(i,j) is total of A(i-n+1,j) to A(i,j).

   Sun Aug 26 17:22:50 PDT 2012.  Revised to give valid partial sums
   before index n is reached.  Here are the test cases used to validate
   the updated code: 

      20 ones 4 clone 15 mtot .m 
      20 ones 4 clone 25 mtot .m 
*/
{
   register int i,j=0;
   register double *A,*B;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mtot: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mtot: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   rows=tos->row;
   n=MIN(n,rows);

   cols=tos->col;
   if(!matstk(rows,cols,"_mtot")) return 0;

   B=tos->mat;
   A=(tos-1)->mat;

   for(;j<cols;j++) {
      *B=*A;
      B++;
      A++;
      for(i=1;i<n;i++) { 
         *B=*(B-1)+*A; 
         B++;
         A++;
      }
      for(;i<rows;i++) {
         *B=*(B-1)+*A-*(A-n);
         B++;
         A++;
      }
   }
   return(lop());
}

int op2mat1() /* op2mat1 (hFile hMap rows cols type endian --- hA) */
/* COMPLEX TYPES NOT CHECKED OUT. */

/* Reading Nastran Output2 style matrix, rows-by-cols, from File.

   Map has been prepared by word op2mat in nas.v, from which the call
   to this function has originated.

   The four columns of Map contain:
      0 - Fortran record number, 1-based, suitable for word fortseek
      1 - record size, bytes
      2 - value of the first four bytes of the record interpreted as
          an integer; in this case, the integer is the starting row
          for a column of data to be read
      3 - the number of file bytes offset to the record, a 0-based
          offset suitable for fseek()

   The values for type are: 1=SP, 2=DP, 3=SP complex, 4=DP complex.
  
   Informal timings for the following matrix:
      PHIU1, 2064-by-1230, 0.8081 density:
         - No endian conversion: 11 sec
         - With endian conversion: 16 sec
         - High level op2map version in Appendix of file.v, that
           prompted writing this low-level version: 105 sec */
{
   int byord,cols,endi,i=0,mrows,nw=1,r0,rows,size=8,type;
   double *A,*Ai,*hFile,*recsize,*recstrt,*rowstrt;
   char *T=NULL;
   FILE *fp;
   long bytes;
   size_t items=1,red;

   if(!popint(&endi)) return 0;
   byord=byteorder(NULL);
 
   if(!popint(&type)) return 0;

/* import() is unnecessary if same endian and DP type: */
   if(byord==endi && (type==2 || type==4)) endi=0;

   if(!popint(&cols)) return 0;
   if(!popint(&rows)) return 0;

/* Double the rows if complex: */
   if(type==3 || type==4) rows+=rows;

/* If SP, double the bytes transferred after import(): */
   if(type==1 || type==3) {
      nw=2;
      size=-4; /* changing flag for import() */
   }
   if((tos-1)->typ!=MAT ||
      !(fp=filehand((tos-1)->mat,(tos-1)->row*(tos-1)->col))) {
      stkerr(" op2mat1: ",FILHNDNOT);
      return 0;
   }
   hFile=(tos-1)->mat;

   mrows=tos->row;
   recsize=(tos->mat)+locvec(1,mrows); /* column 1 of Map */
   rowstrt=(tos->mat)+locvec(2,mrows); /* column 2 of Map */
   recstrt=(tos->mat)+locvec(3,mrows); /* column 3 of Map */
   
   if(!matstk(rows,cols,"_op2mat")) return 0;
   A=tos->mat;
   memset(A,0,rows*cols*sizeof(double));
   if(type==3 || type==4) set_complex(tos);

   if(endi) { /* Using this branch if endian conversion is done. */

      if(!volstk(1,rows*sizeof(double),"_op2char")) return 0;
      T=tos->tex;

      for(;i<mrows;i++) {
         Ai=A;
         if((r0=(int)*(rowstrt+i))>0) { /* r0=1-based start row */
            if((bytes=(long)(*(recsize+i))-sizeof(int))>0) {

               fseek(fp,(long)(*(recstrt+i)+sizeof(int)),SEEK_SET);

               red=fread(T,(size_t)bytes,items,fp); /* read bytes */
               tos->col=bytes; /* 4- or 8-byte data in 1-row VOL */
               dup1s(); /* want T on the stack after import() */

               import(size,endi); /* on return, 8-byte col is on stk */

               Ai+=r0-1; /* bytes go to this 0-based starting row */
               memcpy(Ai,tos->mat,bytes*nw); /* 8-byte col into A */
               drop(); /* 8-byte col off stk */
            }
         }
         else A+=rows;
      }
      drop() ; /* T off stk */
   }
   else {
      for(;i<mrows;i++) {
         Ai=A;
         if((r0=(int)*(rowstrt+i))>0) { /* r0=1-based start row */
            if((bytes=(long)(*(recsize+i))-sizeof(int))>0) {

               fseek(fp,(long)(*(recstrt+i)+sizeof(int)),SEEK_SET);

               Ai+=r0-1; /* bytes go to this 0-based starting row */

               red=fread(Ai,(size_t)bytes,items,fp); /* read bytes */
            }
         }
         else A+=rows;
      }
   }
   fupdate(hFile,fp); /* keeping File structure up-to-date */
   return(
      lop() && /* Map off stk */
      lop()    /* File off stk */
   );
}

int retrend() /* retrend (hA1 hB0 hB1 --- hA) */
/* Sun Jul 28 14:12:54 PDT 2013

   Apply linear trend B to detrended A1 and reconstruct original A:

      A(k,j) = A1(k,j) + B0(j) + k*B1(j)

   where there are N uniformly spaced points in A1 and k=1,N.

   Refer to functions trend() and detrend() that were used to compute
   incoming A1. */
{
   double *A,*A1,b0,*B0,b1,*B1;
   int cols,j=0,k,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" retrend: ",MATNOT);
      return 0;
   }
   A1=(tos-2)->mat;
   rows=(tos-2)->row;
   cols=(tos-2)->col;

   B0=(tos-1)->mat;
   B1=tos->mat;

   if(!matstk(rows,cols,"_A")) return 0;
   A=tos->mat;

   for(;j<cols;j++) {
      b0=*B0;
      b1=*B1;
      for(k=1;k<=rows;k++) {
         *A=*A1 + b0 + k*b1;
         A++;
         A1++;
      }
      B0++;
      B1++;
   }
   return(lpush() && drop2() && drop() && lpull());
}

int stardircos9() /* *dircos9 (hCba hXa --- hXb) */
/* Rotational transformation.
   Compute Xb=Cba*Xa, where each 9-element column of Cba holds a 
   3-by-3 matrix stacked by columns, and each column of Xa is a 
   3-by-1 matrix. */
{
   register double *Cba,*Xa,*Xb;
   register int cols,j=0;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" *dircos9: ",MATNOT2);
      return 0;
   }
   if(tos->row!=3 || (tos-1)->row!=9) {
      stkerr(" *dircos9: ","expecting matrices of 9 and 3 rows");
      return 0;
   }
   if((cols=tos->col)!=(tos-1)->col) {
      stkerr(" *dircos9: ",COLSNOT);
      return 0;
   }
   if(!matstk(3,cols,"_*dircos9")) return 0;

   Xb=tos->mat;
   Xa=(tos-1)->mat;
   Cba=(tos-2)->mat;

   for(;j<cols;j++) {
      *(Xb  )=*(Cba  )*(*(Xa))+*(Cba+3)*(*(Xa+1))+*(Cba+6)*(*(Xa+2));
      *(Xb+1)=*(Cba+1)*(*(Xa))+*(Cba+4)*(*(Xa+1))+*(Cba+7)*(*(Xa+2));
      *(Xb+2)=*(Cba+2)*(*(Xa))+*(Cba+5)*(*(Xa+1))+*(Cba+8)*(*(Xa+2));
      Cba+=9;
      Xa+=3;
      Xb+=3;
   }
   return(lop() && lop());
}

int steady() /* steady (hZ hW hQ hF --- hqr hqi) */
/* Steady-state frequency response solution of uncoupled equations.  

   Solving 2nd order differential equations of the form:

      qdd + (2*Z*W)*qd + (W*W)*q = Q(F)

   for steady-state response to harmonic input Q(F).

   Adapted from the corresponding fortran function of program express.

      function freqresp() ! steady state frequency response
c
c     Stack description:
c            steady (hZ hW hQ hF --- hqr hqi)
c      where
c          hZ is nW vector of critical damping factors
c          hW is nW vector of undamped natural frequencies, rad/sec
c          hQ is nW-by-nF matrix of generalized force amplitudes (real)
c          hF is nF vector of frequencies, rad/sec, that correspond to
c             each column of Q
c
c          hqr is nW-by-nF matrix of response real part
c          hqi is nW-by-nF matrix of response imaginary part
c
c     It is ok if n-by-1 vectors are stored as 1-by-n. */
{
   double *F,*Q,*qi,*qr,*W0,*W,*Z0,*Z;
   register double A,D,Fj,P,R;
   int j=0,nF,nQrows,nW,nZ;
   register int i=0,k=0;
  
   if(stkdepth()>3) {
      i=0;
      while(i<4) { /* making any NUMs into 1-by-1 MATs */
         pushint(3); pushq2("roll",4); xmain(0); /* doing 3 roll */
         hand();
         i++;
      }
   }
   else {
      stkerr(" steady: ",STKNOT);
      return 0;
   }
   F=tos->mat;
   nF=(tos->row)*(tos->col); /* tos->col should be 1 */

   if(is_complex(tos-1)) {
      stkerr(" steady: ","generalized force matrix Q must be real");
      return 0;
   }
   Q=(tos-1)->mat;
   nQrows=(tos-1)->row;

   Z0=(tos-3)->mat;
   nZ=((tos-3)->row)*((tos-2)->col);

   W0=(tos-2)->mat;
   nW=((tos-2)->row)*((tos-2)->col);

   if(nW!=nQrows) {
      stkerr(" steady: ", \
         "rows of matrix Q must match length of vector W");
      return 0;
   }
   if(nF!=(tos-1)->col) { /* nF len and cols of Q must agree */
      stkerr(" steady: ", \
         "columns of matrix Q must match length of vector F");
      return 0;
   }
   if(nZ!=nW) {
      stkerr(" steady: ", \
         "vector Z length and vector W length do not agree");
      return 0;
   }
   if(!matstk(nW,nF,"_qr")) return 0;
   qr=tos->mat;
   if(!matstk(nW,nF,"_qi")) return 0;
   qi=tos->mat;

   for(;j<nF;j++) { /* looping over nF frequencies */
      W=W0;
      Z=Z0;
      Fj=*(F+j);
      for(i=0;i<nW;i++) { /* looping over nW degrees-of-freedom */
         if(*W) {
            R=Fj/(*W);
            D=(1-R*R);
            R*=2*(*Z);
            A=1/(sqrt(D*D+R*R));
            A*=(*(Q+k))/(*W*(*W));
            P=atan2(R,D); /* phase ang P from Q to q; q always lags Q */
            *(qr+k)=A*cos(P);  /* component along real axis */
            *(qi+k)=-A*sin(P); /* component along imaginary axis */
         }
         else { /* rigid body motion: */
            *(qr+k)=-(*(Q+k))/(Fj*Fj);
            *(qi+k)=0;
         }
         W++;
         Z++;
         k++;
      }
   }
   return( /* (hZ hW hQ hF hqr hqi) dropping F, Q, W, and Z */
      lpush() &&
      lpush() &&
      drop2() &&
      drop2() &&
      lpull() &&
      lpull()
   );
}

int tr() /* tr (hSig n --- hTr) */
/* The trace Tr at step k depends upon signal Sig at step k and the 
   trace at the previous step, in the recursive equation:

      Tr(k) = Tr(k-1) + [Sig(k) - Tr(k-1)]/(n+1)

   To get the recursion started, an initial value is assumed:

      Tr(0)=Sig(0)

   The equation for Tr(k) can be written into a "difference" equation
   form with decay constant A and signal forcing coefficient B:

      Tr(k) = A*Tr(k-1) + B*S(k)

   where 

      A = 1 - 1/(1+n) = n/(1+n)
      B = 1/(1+n)

   Term A*Tr(k-1) represents an exponential decay, and Tr may be called
   an exponential trace. */
{
   register double A,B,*Sig,*Tr;
   register int i=1,j=0;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" tr: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" tr: ",MATNOT);
      return 0;
   }
   popint(&n);

   rows=tos->row;
   cols=tos->col;
   Sig=tos->mat;

   if(!matstk(rows,cols,"_Tr")) return 0;
   Tr=tos->mat;

   B=1.0/(n+1);
   A=n*B;

   for(;j<cols;j++) {
      *Tr=*Sig;
      Tr++;
      Sig++;
      for(i=1;i<rows;i++) {
         *Tr=*(Tr-1)*A + *Sig*B;
         Tr++;
         Sig++;
      }
   }
   return(lop());
}

int tr1() /* tr1 (hSig Tr0 n --- hTr) */
/* The trace Tr at step k depends upon signal Sig at step k and the 
   trace at the previous step, in the recursive equation:

      Tr(k) = Tr(k-1) + [Sig(k) - Tr(k-1)]/(n+1)

   This word computes Tr for the last r rows in Sig, so returned Tr
   has only r rows.

   The equation for Tr(k) can be written into a "difference" equation
   form with decay constant A and signal forcing coefficient B:

      Tr(k) = A*Tr(k-1) + B*S(k)

   where 

      A = 1 - 1/(1+n) = n/(1+n)
      B = 1/(1+n)

   Incoming Tr0 is the initial value for Tr needed to start the recur-
   sion, when k=0.  In real time processing, Tr0 is the last Tr value
   from the last set of data processed, and Sig is a new set of data:

      Tr(-1) = Tr0

   so the first value in returned Tr is:

      Tr(0) = A*Tr0 + B*S(0)

   Note that Tr0 is a row vector if Sig contains more than one column.

   Term A*Tr(k-1) represents an exponential decay, and Tr may be called
   an exponential trace. 

   This test shows usage and verifies matching results with tr() batch
   processing:

      if(missing("sine")) source("math.v");
   
      (S, t) = sine(<< 1 10 2pi * 0 0.001 100 >>); // sine curve
      L = uniform(.01, 100); // straight line
      A = [S, L];

      n = 20;
      trA = tr(A, n); // batch 

      plot([A, trA], t);
 
      B = A[rows(A)/2:rows(A)];
      trB = tr1(B, trA[rows(A)/2-1], n); // second half using tr1

      N = trA[rows(A)/2:rows(A)] - trB; // compare tr1 with tr batch

      if(null?(N)) dot("OK, N is null"); // expect exactly zero
      else dot(" ERROR."); */
{
   register double A,B,*Sig,*Tr0,*Tr;
   register int i=1,j=0;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" tr1: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" tr1: ",MATNOT2);
      return 0;
   }
   popint(&n);
   hand();

   if(tos->col!=(tos-1)->col) {
      stkerr(" tr1: ",MATSNOTC);
      return 0;
   }
   cols=tos->col;
   rows=(tos-1)->row;

   Tr0=tos->mat;
   Sig=(tos-1)->mat;

   if(!matstk(rows,cols,"_Tr")) return 0;
   Tr=tos->mat;

   B=1.0/(n+1);
   A=n*B;

   for(;j<cols;j++) {
      *Tr=*Tr0*A + *Sig*B;
      Tr0++;
      Tr++;
      Sig++;
      for(i=1;i<rows;i++) {
         *Tr=*(Tr-1)*A + *Sig*B;
         Tr++;
         Sig++;
      }
   }
   return(lop() && lop());
}

int trend() /* trend (hA --- hB0 hB1) */
/* Linear trend for data in A uniformly spaced.

   This Least-squares solution is subject to illconditioning, so double 
   precision is important.

   Each column in A is a data record of N uniformly spaced points.

   For A(k,j), k=1,N, the one-row trend is:
      trend(k,j) = B0(j) + k*B1(j)

   Size of matrices B0 and B1 is one row by the number of columns
   in A.

   Reference:

      Bendat, J. S., and Piersol, A. G., "Random Data Analysis and
      Measurement Procedures," 3rd edition, 2000, J. W. Wiley & Sons,
      Inc., page 397, equation 11.16. */
{
   double *A,*B0,*B1,sumA,sumiA;
   double drows,N1,N2,N3,N4;
   int cols,i,j=0,rows;

   if(tos->typ!=MAT) {
      stkerr(" trend: ",MATNOT);
      return 0;
   }
   A=tos->mat;
   rows=tos->row;
   cols=tos->col;

   drows=rows; /* needs to be double or N4 calc won't work */
   N1=(2*drows+1)*2;
   N2=drows*(drows-1);
   N3=(drows+1)*6;
   N4=drows*(drows-1)*(drows+1); /* very large when rows is > 50000 */

   if(!matstk(1,cols,"_B0")) return 0;
   B0=tos->mat;

   if(!matstk(1,cols,"_B1")) return 0;
   B1=tos->mat;

   for(;j<cols;j++) {
      sumA=0;
      sumiA=0;
      for(i=1;i<=rows;i++) {
         sumA+=*A;
         sumiA+=i*(*A);
         A++;
      }
      *B0=(sumA*N1 - 6*(sumiA))/N2;
      *B1=(sumiA*12 - sumA*N3)/N4;
      B0++;
      B1++;
   }
   return(rot() && drop());
}

/*--------------------------------------------------------------------*/

/* Back-propagation network.
   Functions for netbp.v */

enum OutCols {value,grad,utype,decay,bias,dbias,input,error};

int logistic() /* logistic (hH nBeta --- hFexp) */
/* For values in array H, compute the logistic function given by:

      Fexp(H) = 1/[1 + exp(-2*Beta*H)]
*/
{
   double Beta,*Fexp,*H;
   unsigned int len,k=0;

   if(tos->typ!=NUM) {
      stkerr(" logistic: ",NUMNOT);
      return 0;
   }

   if((tos-1)->typ!=MAT && (tos-1)->typ!=NUM) {
      stkerr(" logistic: ",NUMORMATNOT);
      return 0;
   }
   popd(&Beta);
   Beta*=-2.0;

   hand();
   H=tos->mat;

   if(!matstk(tos->row,tos->col,"_Fexp")) return 0;
   Fexp=tos->mat;
   len=tos->row*tos->col;

   for(;k<len;k++) {
      *(Fexp)=1.0/(1.0 + exp(Beta*(*H)));
      Fexp++;
      H++;
   }
   return(lop());
}
   
int net_activate() /* (hIn hOut --- ) */
/* Compute the output, Out, from activation of units in a layer.  

   Matrix Out is a cataloged item, so it is not deallocated when its
   handle is dropped from the stack.  In general, matrix In is not
   cataloged, so its handle is kept on the stack until it is no longer
   needed. 

   In addition to receiving computed outputs, matrix Out contains pro-
   perties of the units needed to compute their outputs.  The columns 
   of Out are defined by enum OutCols. */
{
   double *Grad,*Decay,*Bias,*Utype;
   double A,*In,*Out;

   int i=0,units;

   In=(tos-1)->mat;
   units=tos->row;

   Out=tos->mat;
   Grad=Out+locvec(grad,units);
   Decay=Out+locvec(decay,units);
   Bias=Out+locvec(bias,units);
   Utype=Out+locvec(utype,units);
   drop();

   for(;i<units;i++) { 

      if(*Utype<2) { /* exp sigmoid function, type 0 or 1: */

         /* for 0 to +1 sigmoid range: */
         A=-(*Decay)*(*In-*Bias);           /* -a*(In-b) */
         *Out=1./(1+exp(A));                /* 1/[1 + e^(-a*(In-b))] */
         *Grad=(*Decay)*(1. - *Out)*(*Out); /* a*[1 - f]*f */

         if(*(Utype)) { /* adjusting for -1 to +1 sigmoid range: */
            *(Out)*=2; *(Out)-=1;           /* 2*f - 1 */
            *(Grad)*=2;                     /* 2*f' */
         }
      }
      else { /* linear function, type 2: */
         *Out=(*In-*Bias);
         *Grad=1.;
      }
      In++;
      Out++;
      Grad++;
      Decay++;
      Bias++;
      Utype++;
   }
   return(drop()); /* dropping In from stack */
}

int net_step() /* stepping (hGoal hIn qNet --- ) */
/* Stepping the back-propagation network called Net.  If Goal is purged,
   then just forward pass is done.

   This function is intended to work inside a loop, and so it does not 
   check if Net is a valid network word, or even if qNet is type STR.  
   Word 'net?' can be used to do this ahead of time, outside the loop.

   Uses extract1() to fetch items from the local library of Net. 

   Uses exe1() to fire ptrs that are elements in vectors of ptrs; the
   vectors of ptrs are fetched from the local library of Net.

   In general, after a matrix address is obtained from a MAT that is
   obtained from a ptr list, the ptr list and the MAT itself can be 
   dropped from the stack since they are both in the catalog (in this 
   case, in Net's local library) and therefore will not be deallocated.

   Example usage of vectors of ptrs.  

      Matrix handle hOut is extracted from the local library of word 
      Net using function extract1() (which is a speedy version of
      extract(), the native function for word extract):

         extract1(Net,"Out");
         hOut=tos->mat;
         drop(); // will not deallocate because Out is in lib of Net //

      If hOut is a vector of ptrs, exe1() will fire one of its elements.
      The following puts onto the stack the handle to the matrix of out-
      put units for the 3rd layer, by firing the ptr to the matrix at 
      the 3rd element of hOut:

         exe1(*(hOut+2)); 

   How to develop and debug a function like this one, which really is
   an application:

      A function like this one is developed step by step, so get ready 
      to do cycles of recompile-run-edit.

      Review notes in doc/design.doc: "Adding new native functions"
      and "Easing the pain of debugging."

      Make a simple demo case that can be run interactively.  File
      usr/netbp.v, a companion to sys/netbp.v, has example cases.

      Run the recompiled program interactively from the ready prompt.  

      This function runs straight through from beginning to end, so
      simply insert "return 1;" at the spot of interest, recompile,
      run the demo from the ready prompt and review results on the
      stack when return occurs.
      
      Make changes, perhaps insert gprintf() or fprintf() lines to 
      display function variables, move "return 1;" and repeat the 
      cycle. */ 
{
/* DEBUG=1 compiles the writing of unit inputs and unit errors to 
   columns of Out, which are displayed by word netprops. */
   #define DEBUG 1

   char Net[33];

   int i,j,k,layers,punits,train=0,units;

   double A,Alpha,Eta;
   double *Out,*W;
   double *hdW,*hOut,*hW; 
   double *Atmp,*Bias,*dW,*dBias,*E,*Ek,*Grad,*In,*NetGoal,*NetIn;

   #if DEBUG
      double *Esav,*Insav;
   #endif

   strncpy(Net,tos->tex,32);
   drop();

/* Setting up network input, and network target output. */

   if(tos->typ!=MAT) {
      stkerr(" stepping: ",MATNOT);
      return 0;
   }
   extract1(Net,"NetIn");
   NetIn=tos->mat;
   units=tos->row;

/* Storing incoming stack item In into local library NetIn (no bounds 
   checking): */
   memcpy(NetIn,(tos-1)->mat,sizeof(double)*units);
   drop2();

   if(tos->typ!=MAT) {
      stkerr(" stepping: ",MATNOT);
      return 0;
   }
   if(tos->row) { /* If hGoal is not purged, set up for training: */
      extract1(Net,"NetGoal");
      NetGoal=tos->mat;
      punits=tos->row;
      /* Storing incoming stack item Goal into local library NetGoal 
         (no bounds checking): */
      memcpy(NetGoal,(tos-1)->mat,sizeof(double)*punits);
      drop2();
      train=1;
   }
   else drop();

/* Fetching vectors of ptrs from local library of Net. */

   extract1(Net,"Out");
   hOut=tos->mat;
   layers=tos->row;
   drop();

   extract1(Net,"W");
   hW=tos->mat;
   drop();

/* Forward pass. */

   /* In this implementation, the input layer (1st layer) applies 
      activation (weights and biases in an activation function) to 
      the network input.  It is subject to training, and does not 
      simply pass the network input to the 2nd layer. */

/* Input to 1st layer is the weighted network input: */
   if(!matstk(units,1,"_In")) return 0;
   In=tos->mat;

   exe1(*hW); /* hW(input_layer) weights */
   W=tos->mat;
   drop();

   for(i=0;i<units;i++) { /* In(i)=W(i)*NetIn(i) */
      *(In+i)=(*W)*(*NetIn); /* weighted input */
      W++;
      NetIn++;
   }

/* Output from the 1st layer comes from driving the weighted inputs 
   through the activation function: */
   exe1(*hOut); /* hOut(input_layer) */

   #if DEBUG
      Insav=(tos->mat)+locvec(input,units);
      memcpy(Insav,In,sizeof(double)*units);
   #endif

   net_activate(); /* output from the units of the input layer */

/* Forward pass through the 2nd and following layers: */
   for(k=1;k<layers;k++) {

   /* Input to layer k is the weight matrix times the output from 
      the previous layer: In(k)=W(k)*Out(k-1) */

      exe1(*(hW+k));     /* hW(k) on stk */
      exe1(*(hOut+k-1)); /* hOut(k-1) on stk */
      tos->col=1;        /* 1st col of Out holds values for mpy */

      mpy();             /* hIn(k)=W(k)*Out(k-1) on stk */

      #if DEBUG
         exe1(*(hOut+k)); 
         Insav=(tos->mat)+locvec(input,tos->row); 
         drop();
         memcpy(Insav,tos->mat,sizeof(double)*tos->row);
      #endif

      exe1(*(hOut+k));   /* hOut(k) on stk */

   /* Output from layer k comes from driving the inputs to layer k
      through the activation function: */
      net_activate();
   }
   if(!train) return 1;

/* Back-propagation. */

/* Fetching vectors of ptrs for training, from local library of Net. */
   extract1(Net,"dW");
   hdW=tos->mat;
   drop();

   extract1(Net,"NetGoal");
   NetGoal=tos->mat;
   drop();

/* Fetching learning parameters from the local library of Net. */
   extract1(Net,"Alpha");
   popd(&Alpha);

   extract1(Net,"Eta");
   popd(&Eta);

   exe1(*(hOut+layers-1)); /* hOut(output) */
   Out=tos->mat;
   units=tos->row;
   drop();

   Grad=Out+locvec(grad,units);
   Bias=Out+locvec(bias,units);
   dBias=Out+locvec(dbias,units);

   #if DEBUG
      Esav=Out+locvec(error,units); 
   #endif

   matstk(units,1,"_E");
   E=tos->mat;

   /* Errors in the output layer: 
         E(output) = Grad(output)*(NetGoal - Out(output)) */

   for(i=0;i<units;i++) {
      *(E+i)=*Grad*(*NetGoal - *Out);
      Grad++;
      NetGoal++;
      Out++;
   }

   #if DEBUG
      memcpy(Esav,E,sizeof(double)*units);
   #endif
   
   /* Change in output layer weights due to output layer errors:
         dW(output,k) = Eta*E(output)*Out(k) + Alpha*dW(output,k)~

      Change in output layer bias values due to output layer errors:
         dBias(output) = Eta*E(output) + Alpha*dBias(output)~

      where Out(k) is output from kth unit in the previous layer. 
      and ~ denotes previous change. */

   exe1(*(hdW+layers-1));
   dW=tos->mat;  
   drop();

   exe1(*(hOut+layers-2)); 
   Out=tos->mat;
   punits=tos->row;
   drop();

   for(j=0;j<punits;j++) { /* changes for output layer weights: */
      A=Eta*(*(Out+j));
      for(i=0;i<units;i++) {
         *dW=(*(E+i))*A + Alpha*(*dW);
         dW++;
      }
   }
   for(i=0;i<units;i++) { /* changes for output layer unit biases: */
      *dBias=Eta*(*(E+i)) + Alpha*(*dBias);
      dBias++;
   }
/* Errors in layers below the output layer: 
      E(k) = Grad(k)*(W(k+1,k)'*E(k+1) */

   for(k=layers-2;k>-1;k--) { /* top of back-prop loop */

      E=tos->mat; /* E(k+1), errors in next-higher layer ride on stk */

      exe1(*(hOut+k)); /* hOut(k) */
      Out=tos->mat;
      units=tos->row;
      Grad=Out+locvec(grad,units);
      Bias=Out+locvec(bias,units);
      dBias=Out+locvec(dbias,units);

      #if DEBUG
         Esav=Out+locvec(error,units);
      #endif

      drop(); /* dropping hOut(k) */

      if(!matstk(units,1,"_Ek")) return 0;
      Ek=tos->mat; /* vector for errors, Ek, onto stk */

      exe1(*(hW+k+1));  
      bend(); /* W'(k+1) onto stk */

      rot(); /* stk: Ek W' E */

      mpy(); /* E(k) = W'(k)*E(k+1) */
      Atmp=tos->mat;

      for(i=0;i<units;i++) { /* this layer's errors */
         *(Ek+i)=(*Grad)*(*Atmp); /* E(k) = Grad(k)*W'(k)*E(k+1) */
         Grad++;
         Atmp++;
      }
      drop(); /* dropping Atmp */

      #if DEBUG
         memcpy(Esav,Ek,sizeof(double)*units);
      #endif

      /* Change in layer k weights due to layer k errors:
            dW(k) = Eta*E(k)*Out(k-1) + Alpha*dW(k)~

         Change in layer k bias values due to layer k errors:
            dBias(k) = Eta*E(k) + Alpha*dBias(k)~

         where Out(k-1) is output from the previous layer
         and ~ denotes previous change. */

      exe1(*(hdW+k));
      dW=tos->mat;
      drop();

      if(k) { /* weight updates for an intermediate layer */
         exe1(*(hOut+k-1)); /* output of previous layer to stk */
         Out=tos->mat;
         punits=tos->row;
         drop();
      
         for(j=0;j<punits;j++) { /* updates for weight matrix: */
            A=*(Out+j)*Eta;
            for(i=0;i<units;i++) { 
               *dW=(*(Ek+i))*A + Alpha*(*dW);
               dW++;
            }
         }
      }
      else { /* weight updates for the input layer */
         extract1(Net,"NetIn"); /* network input to stk */
         Out=tos->mat;
         punits=tos->row;
         drop();

         for(i=0;i<punits;i++) { /* updates for weight vector: */
            *dW=Eta*(*(Ek+i))*(*(Out+i)) + Alpha*(*dW);
            dW++;
         }
      }
      for(i=0;i<units;i++) { /* updates for layer bias vector: */
         *dBias=Eta*(*(Ek+i)) + Alpha*(*dBias);
         dBias++;
      }
   } /* bottom of back-prop loop */

   drop(); /* Ek off stack */

/* Applying updates to all biases and weights. */

   for(k=0;k<layers;k++) {
      
      exe1(*(hOut+k));
      Out=tos->mat;
      units=tos->row; /* all terms in vector */
      drop();

      Bias=Out+locvec(bias,units);
      dBias=Out+locvec(dbias,units);

      for(i=0;i<units;i++) {
         *Bias-=*dBias; /* applying negative change because dBias has 
            been computed in the same form as dW for weights, which are
            used positively.  But Bias is used negatively, as -b in 
            net_activate. */
         Bias++;
         dBias++;
      }
      exe1(*(hdW+k));
      dW=tos->mat;
      drop();

      exe1(*(hW+k));
      W=tos->mat;
      units=tos->row*tos->col; /* all terms in matrix */
      drop();

      for(i=0;i<units;i++) {
         *W+=*dW;
         W++;
         dW++;
      }
   }
   return 1;

   #undef DEBUG
}

/* End of functions for netbp.v */

/*--------------------------------------------------------------------*/

/* Functions for linear prediction.

   Reference: 
      Press, W. H., et al, "Numerical Recipes in C," Cambridge 
      University Press, second edition, 1999; chapter 13.6. */

int lpcoef() /* lpcoef (hA m --- hC hMsq) */
/* Each column in returned C contains m linear prediction coefficients 
   for the corresponding column of data in A.  

   Equal spacing between points in the rows of A is assumed.

   If the values in A are called y, and coefficients in C are called d,
   then the estimation equation for point k is:

      yk = [Sum(j=1,m)dj*y(k-j)] + xk  

   This word determines coefficients d, and returns them in matrix C.  
   The evaluation of yk using the equation above is done by word lpeval.

   The mean square discrepancy is <x^2>, and is returned in Msq.  Each 
   row in Msq goes with the corresponding column of A, and each holds 
   the mean square discrepancy of the estimated residual error as the
   coefficients are constructed; see the Reference, p. 568.

   Word lpeval uses C and A (only the last m values) to extend A by a
   number of data points into the future. */
{
   double *A,*C,den,*Msq,num,p,*wk1,*wk2,*wkm;
   int cols,i,j=0,k,m,rows;

   if((tos-1)->typ!=MAT) {
      stkerr(" lpcoef: ",MATNOT);
      return 0;
   }
   if(!popint(&m)) return 0;

   A=tos->mat;
   rows=tos->row;
   cols=tos->col;

   if(!matstk(m,cols,"_C")) return 0;
   C=tos->mat;

   if(!matstk(cols,1,"_Msq")) return 0;
   Msq=tos->mat;

   if(!matstk(rows,1,"_wk1")) return 0;
   wk1=tos->mat;

   if(!matstk(rows,1,"_wk2")) return 0;
   wk2=tos->mat;

   if(!matstk(m,1,"_wkm")) return 0;
   wkm=tos->mat;

   for(;j<cols;j++) { 

   /* The following is based on function memcof() in the Reference. */

      p=0;
      for(i=0;i<rows;i++) p+=*(A+i) * (*(A+i));

      *Msq=p/rows;

      memcpy(wk1,A,rows*sizeof(double)); /* wk1(i)=A(i) */
      memcpy(wk2,A+1,(rows-1)*sizeof(double)); /* wk2(i)=A(i+1) */

      for(k=0;k<m;k++) {
         num=0;
         den=0;
         for(i=0;i<(rows-1-k);i++) {
            num+=*(wk1+i) * (*(wk2+i));
            den+=*(wk1+i)*(*(wk1+i)) + *(wk2+i)*(*(wk2+i));
         }
         if(!den) *(C+k)=0; /* do not divide by zero */
         else *(C+k)=2*num/den;

         *Msq*=(1 - *(C+k)*(*(C+k)));

         for(i=0;i<k;i++) 
            *(C+i)=*(wkm+i) - *(C+k)*(*(wkm+k-i-1));

         if(k+1<m) {
            memcpy(wkm,C,(k+1)*sizeof(double)); /* wkm(i)=C(i) */
            for(i=0;i<(rows-k-2);i++) {
               *(wk1+i)-=*(wkm+k) * (*(wk2+i));
               *(wk2+i)=*(wk2+i+1) - *(wkm+k) * (*(wk1+i+1));
            }
         }
      }
      A+=rows;
      C+=m;
      Msq++;
   }
   return(drop2() && drop() && rot() && drop());
}

int lpeval() /* lpeval (hA hC n --- hP) */
/* Evaluate the data series in each column of A, using linear prediction
   coefficients in the corresponding column of C, for n points beyond 
   the end (last row) of A.  

   Equal spacing between points in the rows of A is assumed.

   Evaluation of points n is made using 

         yn = [Sum(j=1,m)dj*y(n-j)] + xn  

   and points are returned in matrix P.  Residual xn is assumed to be 
   zero.

   Coefficients d contained in incoming matrix C may have been computed
   by word lpcoef. */
{
   double *A,*C,*P,sum,*wrk;
   int cols,i,j=0,k,m,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" lpeval: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" lpeval: ",MATNOT2);
      return 0;
   }
   if((cols=(tos-1)->col)!=(tos-2)->col) {
      stkerr(" lpeval: ",MATSNOTC);
      return 0;
   }
   m=(tos-1)->row; /* rows (coefficients) in C */
   rows=(tos-2)->row; /* rows (past data) in A */

   if(rows<m) {
      stkerr(" lpeval: ","rows in A must equal or exceed rows in C");
      return 0;
   }
   popint(&n);
   n=MAX(n,0);

   C=tos->mat;
   A=(tos-1)->mat;

   if(!matstk(m,1,"_wrk")) return 0;
   wrk=tos->mat;

   if(!matstk(n,cols,"_P")) return 0;
   P=tos->mat;

   for(;j<cols;j++) {

   /* The following is based on function predic() in the Reference. */

      for(i=0;i<m;i++) *(wrk+i)=*(A+rows-i-1); /* ith thru mth */

      for(i=0;i<n;i++) {
         sum=0;

         /* dot product of wrk (imagine a row) and coefficients C 
         (imagine a column): */
         for(k=0;k<m;k++) sum+=*(wrk+k) * *(C+k); /* sum across row i */

         for(k=(m-1);k>0;k--) *(wrk+k)=*(wrk+k-1); /* shift wrk right */

         *(P+i)=*wrk=sum; /* P(k) is sum, and so is first term in wrk */
      }
      A+=rows;
      C+=m;
      P+=n;
   }
   return(lop() && lop() && lop());
}

/*--------------------------------------------------------------------*/

/* Functions for bdf.v: processing bulk data files */

int cylvec() /* cylvec (hRTZ --- hXYZ) */
/* R, Theta, Z cylindrical coordinates transformed into rectangular
   X, Y, Z.  The unit of angles in incoming Theta is degrees. 

   RTZ is a 3-by-N matrix, containing coordinates for N points. */
{
   register double *RTZ,*XYZ;
   register int j=0;
   int cols;

   if(tos->typ!=MAT) {
      stkerr(" cylvec: ",MATNOT);
      return 0;
   }
   if(tos->row!=3) {
      stkerr(" cylvec: ","expect matrix of 3 rows");
      return 0;
   }
   if(!matstk(3,(cols=tos->col),"_cylvec")) return 0;

   XYZ=tos->mat;
   RTZ=(tos-1)->mat;

   for(;j<cols;j++) {
      *(XYZ  )=*(RTZ)*(cos(*(RTZ+1)*RADPERDEG));
      *(XYZ+1)=*(RTZ)*(sin(*(RTZ+1)*RADPERDEG));
      *(XYZ+2)=*(RTZ)*(*(RTZ+2));
      XYZ+=3;
      RTZ+=3;
   }
   return(lop());
}

int render() /* render (hT --- hT1) */
/* Rendering electronic bulk data cards of field width 8 and where 
   double fields may be used to represent numbers.

   Each row of incoming volume T is a series of 9-field cards, each
   field 8 bytes wide, and continuation cards extending left to right
   as might be obtained from word cards, file bdf.v.  

   Saying something like "'PSHELL' cards render" will run this func-
   tion to produce volume T1 on the stack, ready to extract patterns
   and numbers from the appropriate fields of PSHELL cards.

   The first field in each incoming card of 72 bytes is an 8-byte 
   identifier, followed by ascii data in either 8 fields of 8 bytes
   (single field) or 4 fields of 16 bytes (double field).  The first-
   field identifier contains * somewhere in its pattern when the card
   type is double field.

   This function returns volume T1 having rows with two 8-byte parts 
   (side by side) for each original data field:

      The first part contains 8 bytes of the original text, right
      justified for proper sorting of patterns that might represent 
      integer numbers.

      The second part contains an 8-byte floating point number as 
      follows, extracted from the text of the original field:

         the actual number if the text can be converted

         INF (000000000000F07Fh little endian machine) if the
            text cannot be converted

         -INF (000000000000F0FFh little endian) if the text
            field is blank (all 20h).

   The last 8 bytes of double field text will be missing in the first 
   part returned, but double field text is assumed to be used only to 
   represent numbers--and all 64 floating point bits of numbers are 
   contained in the second, binary part.

   The beginning 8-byte identifier field in each card of T is not re-
   turned in T1.

   An example showing output from this function is given after its
   code. */
{
   char *T,*T1;
   register char *t,*t1;
   int i=0,j=0;
   register int k=0,m=0,n=0;

   double xblank,x;
   int cards,chars,chars1,rows;

   char buf[32],id[16],*name="_render";

   if(tos->typ!=VOL) { /* top of stack (tos) type must be VOL */
      stkerr(" render: ",VOLNOT);
      return 0;
   }
   chars=tos->col; /* chars is tos->col when tos->typ is VOL */
   rows=tos->row;
 
   if(!rows || !chars) {
      return(
         volstk(rows,chars,name) &&
         lop() 
      );
   }
   T=tos->tex; /* pointer to incoming text */

   cards=MAX(1,(chars/72)); /* assumes chars is int multiple of 72 */
   chars1=64*cards*2; /* upper bound for T1: all fields of 8 into 16 */

   if(!volstk(rows,chars1,name)) return 0;
   T1=tos->tex; /* T1 is now on top of stack */
   memset(T1,' ',rows*chars1); /* initial T1 to all blanks */

   *(id+8)='\0';
   xblank=-INF;

   for(;i<rows;i++) {
      t=T+loclin(i,chars);
      t1=T1+loclin(i,chars1);

      for(j=0;j<cards;j++) {
         memcpy(id,t,8);
         t+=8;

         if(strchr(id,'*')) {
            for(k=0;k<4;k++) { /* doing double fields */
               memcpy(buf,t,16);
               t+=16;

               *(buf+16)='\0';
               n=strspn(buf," "); /* n is offset to 1st char */

               if(n<16) {

                  m=15;
                  while(isspace((int)*(buf+m))) m--; 
                  m++; /* m is len of buf */
                  *(buf+m)='\0'; 

                  m=MIN(8,(m-n)); /* m is len to copy, <= 8 */
                  t1+=8;
                  memcpy(t1-m,buf+n,m); /* text right justified */

                  if(number((buf+n),&x)) memcpy(t1,(char *)&x,8);

                  else memcpy(t1,(char *)&INF,8);
               }
               else { /* field is blank */
                  t1+=8;
                  memcpy(t1,(char *)&xblank,8);
               }
               t1+=8;
            }
         }
         else {
            for(k=0;k<8;k++) { /* doing single fields */
               memcpy(buf,t,8);
               t+=8;

               *(buf+8)='\0';
               n=strspn(buf," "); /* n is offset to 1st char */

               if(n<8) {

                  m=7;
                  while(isspace((int)*(buf+m))) m--;
                  m++; /* m is len of buf */
                  *(buf+m)='\0';

                  m=MIN(8,(m-n)); /* m is length, <= 8 */
                  t1+=8;
                  memcpy(t1-m,buf+n,m); /* text right justified */

                  if(number((buf+n),&x)) memcpy(t1,(char *)&x,8);

                  else memcpy(t1,(char *)&INF,8);
               }
               else { /* field is blank */
                  t1+=8;
                  memcpy(t1,(char *)&xblank,8);
               }
               t1+=8;
            }
         }
      }
   }
   return(lop());
}
/* 
An example of word render.

Note: this example is best viewed in a window set to character width 72
or 80.

For these 80-character width bulk data cards (two single field, one 
double, and lines ignored with $ in the first column):

$2345678123456781234567812345678123456781234567812345678123456781234567812345678
JOINT     1981    1100    x8.5    -6d-1   41.9    1000

$23456780123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef12345678
JOINT*          10010   0               7.345678901234565.34567890123456JOINT  1
*OINT  11.345678901234561

JOINT     1951    1100    88.5    -62.0   41.9    1000

here is an xray of volume T1 from word render, verifying right justified
text for the first 8 bytes and double precision floating point numbers
(in little endian byte order from host gutter) for the second: 

[tops@gutter] ready > "se.bdf" bdf
 file mapping complete

[tops@gutter] ready > "joint" cards render, INF xray .
       0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0123456789ABCDEF
   0  20 20 20 20 31 39 38 31 00 00 00 00 00 F4 9E 40      1981.......@
   2  20 20 20 20 31 31 30 30 00 00 00 00 00 30 91 40      1100.....0.@
   4  20 20 20 20 78 38 2E 35 00 00 00 00 00 00 F0 7F      x8.5........
   6  20 20 20 2D 36 64 2D 31 33 33 33 33 33 33 E3 BF     -6d-1333333..
   8  20 20 20 20 34 31 2E 39 33 33 33 33 33 F3 44 40      41.933333.D@
  10  20 20 20 20 31 30 30 30 00 00 00 00 00 40 8F 40      1000.....@.@
  12  20 20 20 20 20 20 20 20 00 00 00 00 00 00 F0 FF          ........
...
  30  20 20 20 20 20 20 20 20 00 00 00 00 00 00 F0 FF          ........
  32  20 20 20 31 30 30 31 30 00 00 00 00 00 8D C3 40     10010.......@
  34  20 20 20 20 20 20 20 30 00 00 00 00 00 00 00 00         0........
  36  37 2E 33 34 35 36 37 38 EC E0 5E A6 F9 61 1D 40  7.345678..^..a.@
  38  35 2E 33 34 35 36 37 38 EC E0 5E A6 F9 61 15 40  5.345678..^..a.@
  40  31 2E 33 34 35 36 37 38 AF 83 7B 99 E6 87 F5 3F  1.345678..{....?
  42  20 20 20 20 20 20 20 31 00 00 00 00 00 00 F0 3F         1.......?
  44  20 20 20 20 20 20 20 20 00 00 00 00 00 00 F0 FF          ........
  46  20 20 20 20 20 20 20 20 00 00 00 00 00 00 F0 FF          ........
  48  20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
...
  62  20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
  64  20 20 20 20 31 39 35 31 00 00 00 00 00 7C 9E 40      1951.....|.@
  66  20 20 20 20 31 31 30 30 00 00 00 00 00 30 91 40      1100.....0.@
  68  20 20 20 20 38 38 2E 35 00 00 00 00 00 20 56 40      88.5..... V@
  70  20 20 20 2D 36 32 2E 30 00 00 00 00 00 00 4F C0     -62.0......O.
  72  20 20 20 20 34 31 2E 39 33 33 33 33 33 F3 44 40      41.933333.D@
  74  20 20 20 20 31 30 30 30 00 00 00 00 00 40 8F 40      1000.....@.@
  76  20 20 20 20 20 20 20 20 00 00 00 00 00 00 F0 FF          ........
...

The following agrees with the hex pattern in line 36, and verifies that
all 16 characters for 7.3456... in the double field card of JOINT 10010
are accounted for:

[tops@gutter] ready > 7.34567890123456 .hex
 EC E0 5E A6 F9 61 1D 40
[tops@gutter] ready > 

Later error checking will reject the x8.5 in line 4, since x8.5 could
not be converted to a number and in JOINT cards it is the value of the
X coordinate.  The INF in the second 8-byte part of the card, shown 
again below, exposes bad numbers like this to a rake of INFs, to be 
easily culled out:

   4  20 20 20 20 78 38 2E 35 00 00 00 00 00 00 F0 7F      x8.5........
*/

int sphvec() /* sphvec (hRTP --- hXYZ) */
/* R, Theta, Phi spherical coordinates transformed into rectangular
   X, Y, Z.  The unit of angles in Theta and Phi is degrees. */
{
   register double *RTP,sinP,*XYZ;
   register int j=0;
   int cols;
   const double radperdeg=0.017453292519943295;

   if(tos->typ!=MAT) {
      stkerr(" sphvec: ",MATNOT);
      return 0;
   }
   if(tos->row!=3) {
      stkerr(" sphvec: ","expect matrix of 3 rows");
      return 0;
   }
   if(!matstk(3,(cols=tos->col),"_sphvec")) return 0;

   XYZ=tos->mat;
   RTP=(tos-1)->mat;

   for(;j<cols;j++) {
      sinP=sin(*(RTP+1)*radperdeg);
      *(XYZ  )=*(RTP)*sinP*cos(*(RTP+2)*radperdeg);
      *(XYZ+1)=*(RTP)*sinP*sin(*(RTP+2)*radperdeg);
      *(XYZ+2)=*(RTP)*cos(*(RTP+1)*radperdeg);
      XYZ+=3;
      RTP+=3;
   }
   return(lop());
}

/* End of functions for bdf.v */

/*--------------------------------------------------------------------*/

/* Functions for genetic algorithm */

int ga_bit() /* ga_bit (hV hR --- hB) */
/* Fetch the bits in V that are listed in R.  B(i)=-1 if R(i) is on,
   and 0 otherwise.
   Bits are numbered from left to right, starting with 1st. */
{
   char *V;
   double *B,*R;
   int bit,byte,k=0,maxbit,rows;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(tos->typ!=MAT && tos->typ!=NUM) {
      stkerr(" ga_bit: ",NUMORMATNOT);
      return 0;
   }
   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR) {
      stkerr(" ga_bit: ",STRORVOLNOT);
      return 0;
   }
   hand();
   R=tos->mat;

   V=(tos-1)->tex;
   maxbit=(tos-1)->col<<3; /* multiply by 8 */

   if(!matstk((rows=tos->row),1,"_bit")) return 0;
   B=tos->mat;

   for(;k<rows;k++) {
      bit=*(R+k) - XBASE;
      if(bit<0 || 1+bit>maxbit) {
         stkerr(" ga_bit: ","bit index out of range");
         return 0;
      }
      byte=bit>>3; /* divide by 8 to get byte in V */
      bit-=byte<<3; /* bit local within byte = mask index */

      if(*(V+byte) & *(mask+bit)) *(B+k)=xTRUE;
      else *(B+k)=xFALSE;
   }
   return(lop() && lop());
}

int ga_bitoff() /* ga_bitoff (hV hR --- hV1) */
/* Turn off the bits in V that are listed in R.
   Bits are numbered from left to right, starting with 1st. */
{
   char *V1;
   double *R;
   int bit,byte,k=0,maxbit,rows;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(tos->typ!=MAT && tos->typ!=NUM) {
      stkerr(" ga_bitoff: ",NUMORMATNOT);
      return 0;
   }
   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR) {
      stkerr(" ga_bitoff: ",STRORVOLNOT);
      return 0;
   }
   hand();
   R=tos->mat;
   rows=tos->row;

   swap();
   cop();
   V1=tos->tex;
   maxbit=tos->col<<3; /* multiply by 8 */

   for(;k<rows;k++) {
      bit=*(R+k) - XBASE;
      if(bit<0 || 1+bit>maxbit) {
         stkerr(" ga_bitoff: ","bit index out of range");
         return 0;
      }
      byte=bit>>3; /* divide by 8 to get byte in V */
      bit-=byte<<3; /* bit local within byte = mask index */

      *(V1+byte)=*(V1+byte) | *(mask+bit); /* turn bit on */
      *(V1+byte)=*(V1+byte) ^ *(mask+bit); /* xor bit the mask's 1 */

   }
   return(lop());
}

int ga_biton() /* ga_biton (hV hR --- hV1) */
/* Turn on the bits in V that are listed in R.
   Bits are numbered from left to right, starting with 1st. */
{
   char *V1;
   double *R;
   int bit,byte,k=0,maxbit,rows;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(tos->typ!=MAT && tos->typ!=NUM) {
      stkerr(" ga_biton: ",NUMORMATNOT);
      return 0;
   }
   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR) {
      stkerr(" ga_biton: ",STRORVOLNOT);
      return 0;
   }
   hand();
   R=tos->mat;
   rows=tos->row;

   swap();
   cop();
   V1=tos->tex;
   maxbit=tos->col<<3; /* multiply by 8 */

   for(;k<rows;k++) {
      bit=*(R+k) - XBASE;
      if(bit<0 || 1+bit>maxbit) {
         stkerr(" ga_biton: ","bit index out of range");
         return 0;
      }
      byte=bit>>3; /* divide by 8 to get byte in V */
      bit-=byte<<3; /* bit local within byte = mask index */

      *(V1+byte)=*(V1+byte) | *(mask+bit); /* turn bit on */
   }
   return(lop());
}

int ga_crossover() /* ga_crossover (hA hB nbit --- hC hD) */
/* Cut A and B at nbit and join the head of A to the tail of B to make 
   C, and the head of B to the tail of A to make D.  Bit nbit goes with
   the head segment.

   Bits are numbered from left to right, starting with 1st. 

   If nbit = 1st-1, then C=B and D=A.
   If nbit = Abits = Bbits, then C=A and D=B.

\  Demo (works for 0based and 1based, BIG_ENDIAN and LITTLE_ENDIAN):

      32 ga_random 1st 32 items ga_biton "A" book
      4 nulls "B" book
      A .bin nl B .bin nl

      20 12 ndx DO A B I ga_crossover .bin nl LOOP nl nl \
      20 12 ndx DO .bin nl LOOP nl

\  Bounding cases:
      A B 0  ndx ga_crossover .bin nl .bin
      A B 32 ndx ga_crossover .bin nl .bin
*/
{
   char *A,*B,*C,*D;
   int byte=0,bytes,nbit;

/* Bit masks */
   unsigned char mask[8]={128,192,224,240,248,252,254,255};
/* nask is not(mask): */
   unsigned char nask[8]={127, 63, 31, 15,  7,  3,  1,  0};

/* Looking at some bit masks:
      [tops@rugger] ready > 240 int4 1st orderbyte catch .bin
       11110000
      [tops@rugger] ready > 240 int4 1st orderbyte catch nott .bin
       00001111
      [tops@rugger] ready > 15 int4 1st orderbyte catch .bin      
       00001111
      [tops@rugger] ready > 
*/
   if(!popint(&nbit)) return 0;
   nbit-=XBASE; /* can range from -1 to 8*(tos->col)-1 */

   if(tos->typ!=VOL && tos->typ!=STR) {
      stkerr(" ga_crossover: ",STRORVOLNOT);
      return 0;
   }
   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR) {
      stkerr(" ga_crossover: ",STRORVOLNOT);
      return 0;
   }
   bytes=tos->col;

   if(bytes!=(tos-1)->col) {
      stkerr(" ga_crossover: ","strings are not the same length");
      return 0;
   }
   if(nbit<-1 || nbit>(bytes<<3)-1) {
      stkerr(" ga_crossover: ","crossover point is outside strings");
      return 0;
   }
   B=tos->tex;
   A=(tos-1)->tex;

   if(!ga_strstk(bytes,"_C")) return 0;
   C=tos->tex;

   if(!ga_strstk(bytes,"_D")) return 0;
   D=tos->tex;

   if(nbit>0) byte=nbit>>3; /* divide by 8 to get byte in V */

   memcpy(C,A,byte); /* head of A to C */
   memcpy(C+byte+1,B+byte+1,bytes-byte-1); /* tail of B to C */

   memcpy(D,B,byte); /* head of B to D */
   memcpy(D+byte+1,A+byte+1,bytes-byte-1); /* tail of A to D */

/* Create byte at the head/tail intersection: */
   nbit-=byte<<3; /* bit local within byte = index for mask and nask */

   if(nbit>-1) {
      *(C+byte)=(*(A+byte) & *(mask+nbit)) | (*(B+byte) & *(nask+nbit));
      *(D+byte)=(*(B+byte) & *(mask+nbit)) | (*(A+byte) & *(nask+nbit));
   }
   else {
      *(C+byte)=*(B+byte);
      *(D+byte)=*(A+byte);
   }
   return(rot() && drop() && rot() && drop());
}

int ga_fill() /* ga_fill (hR --- hV) */
/* Create a 1-row volume of bits where bit k is 0 if R(k)=0, and 1
   otherwise.  Bits are numbered from left to right, starting with 
   1st, and 1st bit corresponds to R(1st). */
{
   char *V;
   double *R;
   int b,bit,byte,h,k=0,n;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(tos->typ!=MAT && tos->typ!=NUM) {
      stkerr(" ga_fill: ",NUMORMATNOT);
      return 0;
   }
   hand();
   R=tos->mat;
   n=tos->row;

   h=(((b=n>>3)<<3)!=n);

   if(!ga_strstk(h+b,"_V")) return 0;
   V=tos->tex;
   memset(V,0,tos->col);

   for(;k<n;k++) {
      if(*R) {
         bit=k;

         byte=bit>>3; /* divide by 8 to get byte in V */
         bit-=byte<<3; /* bit local within byte = mask index */

         *(V+byte)=*(V+byte) | *(mask+bit); /* turn bit on */
      }
      R++;
   }
   return(lop());
}

int ga_random() /* ga_random (n --- hV) */
/* Create a 1-row volume of n random bits.
   Bits are numbered from left to right, starting with 1st. */
{
   char *V;
   int b,bit,byte,h,k=0,n;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(!popint(&n)) return 0;

   h=(((b=n>>3)<<3)!=n);

   if(!ga_strstk(h+b,"_V")) return 0;
   V=tos->tex;
   memset(V,0,tos->col);

   for(;k<n;k++) {
      if(rand0(&SEED)>0.5) {
         bit=k;

         byte=bit>>3; /* divide by 8 to get byte in V */
         bit-=byte<<3; /* bit local within byte = mask index */

         *(V+byte)=*(V+byte) | *(mask+bit); /* turn bit on */
      }
   }
   return 1;
}

int ga_strstk(int bytes, char *name) 
/* Push uninitialized string to stack, stored as a 1-row VOL that is 
   bytes long. */
{
   char *S;

   bytes=MAX(0,bytes);

   if((S=malloc(1+bytes))==NULL) {
      stkerr(" ga_strstk: ",MEMNOT);
      return 0;
   }
   *(S+bytes)='\0';

   return(push(VOL,(char *)memgetn(name,strlen(name)),NOTAG,0,NULL,S, \
      1,bytes,NULL));
}

int ga_toggle() /* ga_toggle (hV hR --- hV1) */
/* Toggle the bits in V that are listed in R.
   Bits are numbered from left to right, starting with 1st. */
{
   char *V1;
   double *R;
   int bit,byte,k=0,maxbit,rows;
   unsigned char mask[8]={128,64,32,16,8,4,2,1};

   if(tos->typ!=MAT && tos->typ!=NUM) {
      stkerr(" ga_biton: ",NUMORMATNOT);
      return 0;
   }
   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR) {
      stkerr(" ga_biton: ",STRORVOLNOT);
      return 0;
   }
   hand();
   R=tos->mat;
   rows=tos->row;

   swap();
   cop();
   V1=tos->tex;
   maxbit=tos->col<<3; /* multiply by 8 */

   for(;k<rows;k++) {
      bit=*(R+k) - XBASE;
      if(bit<0 || 1+bit>maxbit) {
         stkerr(" ga_biton: ","bit index out of range");
         return 0;
      }
      byte=bit>>3; /* divide by 8 to get byte in V */
      bit-=byte<<3; /* bit local within byte = mask index */

      *(V1+byte)=*(V1+byte) ^ *(mask+bit); /* xor with the mask's 1 */
   }
   return(lop());
}

/*--------------------------------------------------------------------*/

/* Functions for mday.n and other related files. */

int cposn() /* cposn (hS hC ht nT --- hS1 hC1 ht1 ht2) */
/* Wed Jul 23 10:24:16 PDT 2014

*/
{
   double *C,*S,*t,*W;
   double *C1,Ck,*S1,*t1,*t2;
   int i,k1,k2,n,nr,*p,T;

   if(!popint(&T)) return 0;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" cposn: ",STKNOT);
      return 0;
   }
/* Make copies of incoming stack items if they are in the catalog: */
   cop(); t=tos->mat; rev();
   cop(); C=tos->mat; rev();
   cop(); S=tos->mat; rev();

   nr=tos->row;
   if((tos-1)->row!=nr || (tos-2)->row!=nr) {
      gprintf(\
         " cposn: S, C and t number of rows must be equal");
      nc();
      stkerr("","");
      drop2(); drop();
      return 0;
   }
   lpush(); lpush(); lpush();

   if(!matstk(nr,1,"_S1")) return 0;
   S1=tos->mat;
   if(!matstk(nr,1,"_C1")) return 0;
   C1=tos->mat;
   if(!matstk(nr,1,"_t1")) return 0;
   t1=tos->mat;
   if(!matstk(nr,1,"_t2")) return 0;
   t2=tos->mat;
   if(!matstk(nr,1,"_W")) return 0;
   W=tos->mat;

   lpull(); lpull(); lpull(); /* tos items: hS hC ht */

/* Sort C and reorder t and S accordingly: */
   p=qsort2(C,nr,1);
   if(p==NULL) {
      gprintf(" cposn: qsort2 error: null p");
      nc();
      stkerr("","");
      return 0;
   }
   memcpy(W,t,nr*sizeof(double));
   for(i=0;i<nr;i++) *(t+i)=*(W+*(p+i));
   memcpy(W,S,nr*sizeof(double));
   for(i=0;i<nr;i++) *(S+i)=*(W+*(p+i));
   mallfree((void *)&p);

   k2=1;
   k1=0;
   Ck=*C;
   while(k2<nr) {
gprintf("k1 %d k2 %d Ck %d Ck2 %d\n",k1,k2,(int)Ck,(int)*(C+k2));
      if(Ck==*(C+k2)) k2++;
      else {
         n=k2-k1;

gprintf("   n %d\n",n);
         if(n>1) {
gprintf("      sort %d:%d\n",k1,k2-1);
         /* Sort t(k1:k2-1) and reorder S(k1:k2-1) accordingly: */
            p=qsort2(t+k1,n,1);
            if(p==NULL) {
               gprintf(" cposn: qsort2 error: null p");
               nc();
               stkerr("","");
               return 0;
            }
            memcpy(W,S+k1,n*sizeof(double));
            for(i=0;i<n;i++) *(S+i)=*(W+*(p+i));
            mallfree((void *)&p);
          
         /* Store values for t1: */
            for(i=k1;i<k2-1;i++) {
               *(t1+i)=*(t+i+1)-1;
            }
         }
         *(t1+k2-1)=*(t+k2-1)+T;
         Ck=*(C+k2);
         k2++;
         k1=k2-1;
gprintf("   k1 %d k2 %d Ck %d\n",k1,k2,(int)Ck);
      }
   }

return 1;
   return(drop2() && drop2());
}

int crossing() /* crossing (hX hY --- hF) */
/* Wed Mar 30 12:18:17 PDT 2011

   Incoming X and Y are compatible matrices.

   Returned flag F(k,j) is +1 when X(k,j) has crossed above Y(k,j), -1
   when X(k,j) has crossed below Y(k,j), and 0 otherwise.

   Sun May 12 09:35:23 PDT 2013.  Revised to remove occasional double
   counts.  Renamed from crossover to crossing.

   Testing.  Paste the following lines at the ready prompt:

      >>                                                           \
      r = 100000;                                                  \
      DO (10, 1)                                                   \
         R = random(r, 1);                                         \
         A = [R , R , R];                                          \
         B = fill(0.5, dims(A));                                   \
         C = crossing(A, B);                                       \
         ndif = abs(totals(C==1) - totals(C==-1));                 \
         << I "%3.0f" format . ndif "%3.0f" format vol2str . nl >> \
      LOOP; <<

      Results.  The loop above runs ten tests of random numbers, zero 
      to one, in random sequences of 100,000.  Tests show the number of
      plus and minus 0.5 crossings does not vary by more than one.

         Below is typical output, showing the difference in number of
         crossings above and below 0.5 does not vary by more than one. 

         All values in each row being equal is a necessary condition
         that each column of A is processed the same, and implies that 
         each of the identical columns of A gave the same result and
         that the bumping of indices in the while loops is correct as
         each column is processed.

            [tops@kaffia] ready >  
              1  1   1   1 
              2  0   0   0 
              3  0   0   0 
              4  0   0   0 
              5  1   1   1 
              6  0   0   0 
              7  0   0   0 
              8  0   0   0 
              9  1   1   1 
             10  0   0   0 

         Continuing to paste the same lines and thereby run other ran-
         dom sequences did not produce plus and minus crossings that
         varied by more than one.  

      This function has been verified by testing. */
{
   double *F,*X,*Y;
   int cols,j=0,k,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" crossing: ",MATNOT2);
      return 0;
   }
   if(tos->row!=(tos-1)->row || tos->col!=(tos-1)->col) {
      stkerr(" crossing: ",MATSNOTC);
      return 0;
   }
   Y=tos->mat;
   X=(tos-1)->mat;

   rows=tos->row;
   cols=tos->col;
   if(!matstk(rows,cols,"_crossing")) return 0;
   F=tos->mat;
   memset(F,0,rows*cols*sizeof(double));

   X++; Y++; F++;
   while(j<cols) {
      k=1;
      while(k<rows) {
         if(*(X-1)<=*(Y-1) && *X>*Y) {
            *F=1;
            k++; X++; Y++; F++;
            while(k<rows && *X>=*Y) { k++; X++; Y++; F++; }
         }
         else {
            if(*(X-1)>=*(Y-1) && *X<*Y) {
               *F=-1;
               k++; X++; Y++; F++;
               while(k<rows && *X<=*Y) { k++; X++; Y++; F++; }
            }
            else { 
               if(k<rows) { k++; X++; Y++; F++; } 
            }
         }
      }
      if(j<cols) { j++; X++; Y++; F++; }
   }
   return(lop() && lop());
}

int crossing1() /* crossing1 (hX hY --- hF) */
/* Wed May 15 09:13:37 PDT 2013.

   Incoming X and Y are compatible matrices.

   Processing in this function matches processing in crossing(); the 
   difference is in output matrix F: where F(i,j) from crossing()
   equals zero, F(i,j) from crossing1() will equal F(i-1,j).

   In detail:

      Returned flag F(n,j) is +1 when, for k<=n, X(k,j) has crossed
      above Y(k,j) and X(n,j) is still above Y(n,j),

      returned flag F(n,j) is -1 when, for k<=n, X(k,j) has crossed
      below Y(k,j) and X(n,j) is still below Y(n,j).

   Initial rows of F(n,j) will contain zero before enough values have
   been processed. 

   See man crossing1 for example usage, where the various runs of all
   heads and all tails in a row are summed up for 10,000 flips of a
   balanced coin. */
{
   double *F,*X,*Y;
   int cols,j=0,k,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" crossing: ",MATNOT2);
      return 0;
   }
   if(tos->row!=(tos-1)->row || tos->col!=(tos-1)->col) {
      stkerr(" crossing: ",MATSNOTC);
      return 0;
   }
   Y=tos->mat;
   X=(tos-1)->mat;

   rows=tos->row;
   cols=tos->col;
   if(!matstk(rows,cols,"_crossing")) return 0;
   F=tos->mat;
   memset(F,0,rows*cols*sizeof(double));

   X++; Y++; F++;
   while(j<cols) {
      k=1;
      while(k<rows) {
         if(*(X-1)<=*(Y-1) && *X>*Y) {
            *F=1;
            k++; X++; Y++; F++;
            while(k<rows && *X>=*Y) { *F=1; k++; X++; Y++; F++; }
         }
         else {
            if(*(X-1)>=*(Y-1) && *X<*Y) {
               *F=-1;
               k++; X++; Y++; F++;
               while(k<rows && *X<=*Y) { *F=-1; k++; X++; Y++; F++; }
            }
            else { 
               if(k<rows) { *F=*(F-1); k++; X++; Y++; F++; } 
            }
         }
      }
      if(j<cols) { j++; X++; Y++; F++; }
   }
   return(lop() && lop());
}

int dposn() /* dposn (hC hNd --- hP) */
/* Mon Mar  4 13:24:03 PST 2013.  Decimated positions P for C.

   Fri Jan  3 05:28:16 PST 2014.  Values in Nd are number of steps (the
   same as number of rows) which matches usage in lag().  This means 
   that values in row k of P now roughly correspond to the values in
   row k+1 of former P and may contain C(k).  

   Sun Sep 29 22:02:50 PDT 2013.  Number of points N has been removed
   from stack input formerly given by dposn: (hC hNd N --- hP).  N is
   computed from maximum step in Nd as N=max(Nd)+1.

   Positions are computed at every point and then decimated to a smaller
   set afterward to avoid a form of aliasing that appears as spurious
   oscillations of positions.

   At every point in vector C, compute and sort a set of positions at
   1, 2, ... N steps ago, where N is one plus the maximum step from 
   list Nd.

   From the full set of N positions in working vector W, fetch the 
   subset defined by the steps in Nd (these are called decimated posi-
   tions).  

   For example, if Nd has values 0, 10, 99, then there are N=99+1 sorted
   positions in W(0:99), and values at W(0), W(10) and W(99) will be
   returned in each row of P.

   Returned matrix P has number of rows matching steps in C, and number
   of columns equal to the number of rows in vector Nd.  

   To avoid aliasing, N positions must be computed at every step in C
   and then sorted before decimating to the subset of rows defined by
   vector Nd.  Each decimation operation produces another column for P.

   Wed Mar  6 06:24:21 PST 2013.  This version uses rsort() to insert a
   new value into an ascending order vector of numbers, and avoid sort-
   ing the entire vector.  Since the vector is already in ascending or-
   der, binary search is used in rsort() to really kick up the speed  
   of search and replace.

   The initial version using qsort() is now called dposn1().  

   Speed up of this version over dposn1() is about a factor of 30.  See
   below "5. Timing tests" for results of timing tests.

   Testing

      Fri Jan  3 05:28:16 PST 2014.  Cases 1, 2 and 3 shown below are
      for revised output in P (see note above).
      
      Tue Oct 15 15:11:02 PDT 2013.  Cases 1, 2 and 3 have been rerun
      for revised definition of Nd steps as zero based offsets, and 
      results match previous.

      Mon Sep 30 12:23:23 PDT 2013.  Cases 1, 2 and 3 have been rerun
      for revised stack, and revised definition of Nd as steps, not in-
      dices (so XBASE is not subtracted), and results match previous.

      1. No decimation.

      [tops@kaffia] ready > 1 20 items reversed dup (hC) \
                            list: 0 4 thru ; (hNd) dposn \
                            (hC hP) park .m

                     C        P1       P2       P3       P4       P5

        Row 1:       20       20       20       20       20       20
        Row 2:       19       19       20       20       20       20
        Row 3:       18       18       19       20       20       20
        Row 4:       17       17       18       19       20       20
        Row 5:       16       16       17       18       19       20
        Row 6:       15       15       16       17       18       19
        Row 7:       14       14       15       16       17       18
        Row 8:       13       13       14       15       16       17
        Row 9:       12       12       13       14       15       16
       Row 10:       11       11       12       13       14       15
       Row 11:       10       10       11       12       13       14
       Row 12:        9        9       10       11       12       13
       Row 13:        8        8        9       10       11       12
       Row 14:        7        7        8        9       10       11
       Row 15:        6        6        7        8        9       10
       Row 16:        5        5        6        7        8        9
       Row 17:        4        4        5        6        7        8
       Row 18:        3        3        4        5        6        7
       Row 19:        2        2        3        4        5        6
       Row 20:        1        1        2        3        4        5
      [tops@kaffia] ready >

      These results are correct by inspection.

      2. With decimation.

      [tops@kaffia] ready > 1 20 items reversed dup (hC) \
                            list: 0 2 4 ; (hNd) dposn    \
                            (hC hP) park .m

                     C        P1       P2       P3

        Row 1:       20       20       20       20
        Row 2:       19       19       20       20
        Row 3:       18       18       20       20
        Row 4:       17       17       19       20
        Row 5:       16       16       18       20
        Row 6:       15       15       17       19
        Row 7:       14       14       16       18
        Row 8:       13       13       15       17
        Row 9:       12       12       14       16
       Row 10:       11       11       13       15
       Row 11:       10       10       12       14
       Row 12:        9        9       11       13
       Row 13:        8        8       10       12
       Row 14:        7        7        9       11
       Row 15:        6        6        8       10
       Row 16:        5        5        7        9
       Row 17:        4        4        6        8
       Row 18:        3        3        5        7
       Row 19:        2        2        4        6
       Row 20:        1        1        3        5
      [tops@kaffia] ready >

      These results are correct by inspection.

      3. Shuffled case. 

      [tops@kaffia] ready > list: 17 9 1 3 19 8 11 4 12 16 15       \
                                  6 7 2 20 13 5 10 14 18 ; dup (hC) \
                            list: 0 4 thru ; (hNd) dposn            \
                            (hC hP) park .m

                     C        P1       P2       P3       P4       P5
 
        Row 1:       17       17       17       17       17       17
        Row 2:        9        9       17       17       17       17
        Row 3:        1        1        9       17       17       17
        Row 4:        3        1        3        9       17       17
        Row 5:       19        1        3        9       17       19
        Row 6:        8        1        3        8        9       19
        Row 7:       11        1        3        8       11       19
        Row 8:        4        3        4        8       11       19
        Row 9:       12        4        8       11       12       19
       Row 10:       16        4        8       11       12       16
       Row 11:       15        4       11       12       15       16
       Row 12:        6        4        6       12       15       16
       Row 13:        7        6        7       12       15       16
       Row 14:        2        2        6        7       15       16
       Row 15:       20        2        6        7       15       20
       Row 16:       13        2        6        7       13       20
       Row 17:        5        2        5        7       13       20
       Row 18:       10        2        5       10       13       20
       Row 19:       14        5       10       13       14       20
       Row 20:       18        5       10       13       14       18
      [tops@kaffia] ready >
 
      These results no longer match results from dposn1() due to the 
      revision of P (any future use of dposn1() should incorporate the
      revision).  

      These results are correct by inspection.  For example, row 20 in-
      cludes four values of C from row 19 (14, 10, 5, 13) in sorted 
      order.  Value 20 in row 19, originally from row 15, is now more 
      than 4 steps old, and in row 20 it is replaced by 18, the value
      of C in row 20.

      4. Real time case.  Updating in real time uncovered a bug due to
      having incoming Nd booked in the catalog and then changing Nd by
      XBASE in the code below.

      Changing incoming arrays, like Nd, should only be done on copies
      because an incoming array could be stored in a library.  Using
      cop() below fixed the problem and results exactly match results
      using high level expressions that this function will replace.

      [Mon Sep 30 12:23:23 PDT 2013.  Nd is no longer updated, so this
      case is moot.]

      5. Timing tests.

      Results below show that dposn() is about 30 times faster than 
      dposn1(), and that results from each function agree exactly (as
      they must).

      Problem size for dposn(C, Nd, N):

         [tops@kaffia] ready > t rows .i 
          22080 [steps (rows) in C]

         [tops@kaffia] ready > N .i 
          481 [steps 1, 2, 3, ..., N]

         [tops@kaffia] ready > Nd .i 
          Column 1:
               1     31     61     91    121    151    181    211
             241    271    301    331    361    391    421    451
             481 [decimate to steps Nd]

      This shows phrases that run the timing test using a vector C:
           
         C = Pt[*, .C];
         << "Timing test for dposn for problem size (steps):" .
         C rows .i nl "Begin test" . timeprobe nl >>
 
         P = dposn(C, Nd, N);
         << "ET dposn (microsec): " . timeprobe nl >>

         P1 = dposn1(C, Nd, N);
         << "ET dposn1 (microsec):" . timeprobe nl >>
 
         NULL = null?(P - P1);
         << NULL IF "Results agree" ELSE "Results do not agree" THEN 
         . nl HALT >>

      This shows two timing tests using new function rsort() to replace
      qsort() when creating sorted vectors for dposn(); speed up of 
      dposn() over dposn1() using qsort() is about 8 to 10:

         [dale@kaffia] /home/dale > tops
                  Tops 3.2.0
         Wed Mar  6 10:46:35 PST 2013
         [tops@kaffia] ready > "mobius.n" psource eu
         EU real time
         Analyzing the last 45 days ...
         Timing test for dposn for problem size (steps): 22080
         Begin test 1362595624709063
         ET dposn (microsec):  487731
         ET dposn1 (microsec): 4814876 [factor of 9.9]
         Results agree
         [tops@kaffia] ready > exit
 
         [dale@kaffia] /home/dale > tops
                  Tops 3.2.0
         Wed Mar  6 10:47:04 PST 2013
         [tops@kaffia] ready > "mobius.n" psource eu
         EU real time
         Analyzing the last 45 days ...
         Timing test for dposn for problem size (steps): 22080
         Begin test 1362595654843379
         ET dposn (microsec):  939031
         ET dposn1 (microsec): 7412009 [factor of 7.9]
         Results agree
         [tops@kaffia] ready >

      This shows two timing tests after implementing binary search in
      new function rsort() used by dposn().  From about 8 to 10 in the
      previous test, speed up of dposn() over dposn1() has risen to 
      about 30:
 
         [dale@kaffia] /home/dale > tops
                  Tops 3.2.0
         Wed Mar  6 14:42:59 PST 2013
         [tops@kaffia] ready > "mobius.n" psource eu
         EU real time
         Analyzing the last 45 days ...
         Timing test for dposn for problem size (steps): 22080
         Begin test 1362609804881307
         ET dposn (microsec):  154713
         ET dposn1 (microsec): 4721720 [factor of 30.5]
         Results agree
         [tops@kaffia] ready > exit
 
         [dale@kaffia] /home/dale > tops
                  Tops 3.2.0
         Wed Mar  6 14:43:26 PST 2013
         [tops@kaffia] ready > "mobius.n" psource eu
         EU real time
         Analyzing the last 45 days ...
         Timing test for dposn for problem size (steps): 22080
         Begin test 1362609828853364
         ET dposn (microsec):  154678
         ET dposn1 (microsec): 4764052 [factor of 30.8]
         Results agree
         [tops@kaffia] ready > */
{ 
   register int k=0,r=0;
   register double *C,*D,*Nd,*P,*W,*X;
   int cols,N=-INF,rnew,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" dposn: ",MATNOT2);
      return 0;
   }
   Nd=tos->mat;
   cols=tos->row;
   for(;k<cols;k++) { N=MAX(N,*(Nd+k)); }
   N+=1;

   swap(); 
   C=tos->mat; /* hC, incoming C tos) */
   rows=tos->row;
   if(!matstk(N+rows,1,"_C")) return 0;
   D=tos->mat;

/* Pad the beginning of C with N fictitious past values: */
   for(k=0;k<N;k++) { *D=*C; D++; }

   memcpy(D,C,rows*sizeof(double));
   C=D; /* real time C values start here */
   lop(); /* drop incoming C off the stack */

   if(!matstk(N,1,"_W")) return 0;
   W=tos->mat;

   if(!matstk(rows,cols,"_P")) return 0;
   P=tos->mat;
   
/*gprintf(" dposn.  rows %d cols %d N %d Nd0 %d Ndn %d\n",\
rows,cols,N,(int)*Nd,(int)*(Nd+cols-1));*/

   memcpy(W,C-N,N*sizeof(double));
   qsort1(W,N,1);

   X=P;
   for(k=0;k<cols;k++) {
      *X=*(W+(int)*(Nd+k));

/*gprintf("   r %d k %d Nd(k) %d *X %f\n",r,k,(int)*(Nd+k),*X);*/

      X+=rows;
   }
   for(r=0;r<rows;r++) { /* Fri Jan  3 05:28:16 PST 2014.  Loop starts
                            at r=0, not r=1. */
      if(!rsort(W,N,&rnew,*C,*(C-N))) {
         gprintf(" dposn: error in rsort");
         nc();
         stkerr("","");
         return 0;
      }

/*if(r==rows-1) return 1;*/

      X=P+r;
      for(k=0;k<cols;k++) {
         *X=*(W+(int)*(Nd+k));

/* Sat Oct 19 10:58:32 PDT 2013.  Test for uniformity.*/
/*if(r==20593) gprintf("   r %d k %d Nd(k) %d *X %f\n",r,k,\
    (int)*(Nd+k),*X);*/

         X+=rows;
      }
/*return 1;*/

/* Sat Oct 19 10:58:32 PDT 2013.  Test for uniformity.*/
/*if(r==20593) return 1;*/

      C++;
   }
   return(lop() && lop() && lop());
}

/* Dposn1() is no longer used.  Dposn() gives the same result and is
   30 times faster.  But dposn1() is simpler, and may be useful for 
   studying updates to dposn().

   To reactivate dposn1(), change $ to * and redefine dposn1() in 
   wapp.h and wapp.p. */

/* Sat Sep 28 19:07:58 PDT 2013.  Use dposn1() to develop updates that
   will be put into dposn() when completed. */

/*---
int dposn1() /$ dposn1 (hC hNd N --- hP) $/
/$ Mon Mar  4 13:24:03 PST 2013.  Decimated positions.
  
   Positions are computed at every step and then decimated to a smaller
   set afterward to avoid a form of aliasing that appears as spurious
   oscillations of positions.

   At every step (row) in vector C, compute and sort a set of positions
   at 1, 2, ... N steps ago.

   Then at every step decimate the set of N positions to the subset of
   rows (between 1 and N or 0 and N-1 depending upon index base) listed
   in vector Nd, and return the result as a row in P.

   Returned matrix P has number of rows matching steps in C, and number
   of columns equal to the number of rows in vector Nd.  

   To avoid aliasing, N positions must be computed at every step in C
   and then sorted before decimating to the subset of rows defined by
   vector Nd.  Each decimation operation produces another column for P.

   Testing
      See function dposn(). $/
{ 
   register int k=0,r=0;
   register double *C,*D,*Nd,*P,*W,*X;
   int cols,N,*p,rows;
   int Nd0;

   if(tos->typ!=NUM) {
      stkerr(" dposn1: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" dposn1: ",MATNOT2);
      return 0;
   }
   popint(&N);

   cop(); /$ makes a copy of Nd if it is in the library $/
   pushq2("_Nd",3);
   naming();
   Nd=tos->mat;
   cols=tos->row;
   for(;r<cols;r++) *(Nd+r)-=XBASE; /$ subtract XBASE from Nd indices $/
   Nd0=(int)*Nd;

   swap(); 
   C=tos->mat; /$ hC, incoming C tos) $/
   rows=tos->row;
   if(!matstk(N+rows,1,"_C")) return 0;
   D=tos->mat;

/$ Pad the beginning of C with N fictitious past values: $/
   for(;k<N;k++) { 
      *D=*C; 
      D++; 
   }
   memcpy(D,C,rows*sizeof(double));
   C=D; /$ real time C values start here $/
   lop(); /$ drop incoming C off the stack $/

   if(!matstk(N,1,"_W")) return 0;
   W=tos->mat;

   if(!matstk(rows,cols,"_P")) return 0;
   P=tos->mat;
   
gprintf(" dposn1.  rows %d cols %d N %d Nd0 %d Ndn %d\n",rows,cols,N,(int)*Nd,(int)*(Nd+cols-1));
   
   for(r=0;r<rows;r++) {
      /$memcpy(W,C-N-Nd0,(N+Nd0)*sizeof(double));
      memcpy(W,C-N,N*sizeof(double)); $/

      memcpy(W,C-N,N*sizeof(double));
      p=qsort2(W,N,1);
      if(p==NULL) {
         gprintf(" dposn1: qsort2 error: null p");
         stkerr("","");
         nc();
         return 0;
      }
      X=P+r;
      for(k=0;k<cols;k++) {

gprintf("   r %d k %d Nd(k) %d\n",r,k,(int)*(Nd+k));

         *X=*(W+(int)*(Nd+k));

/$    Sun Sep 29 19:53:08 PDT 2013.  Using p from qsort2()
         p=qsort2(W,N,1);
      in this expression
         *X=*((C-N)+*(p+(int)*(Nd+k))); 
      is equivalent to 
         *X=*(W+(int)*(Nd+k));
      because W was defined as 
         memcpy(W,C-N,N*sizeof(double)); $/
      X+=rows;

      }
      mallfree((void *)&p);
      C++;
if(r>0) return 1;
   }
   return(lop() && lop() && lop());
}
---*/

int dposn2() /* dposn2 (hC hNd N --- hP hV) */
/* Mon Apr  1 07:19:46 PDT 2013.  Decimated positions P for C.

   This function operates the same as dposn() and returns vector V of
   indices.  V(k) is equal to the index (under the current index base)
   that corresponds to C(k-1) within the ascending order set of posi-
   tions at 1, 2, ... N steps ago.

   Positions are computed at every step and then decimated to a smaller
   set afterward to avoid a form of aliasing that appears as spurious
   oscillations of positions.

   At every step (row) in vector C, compute and sort a set of positions
   at 1, 2, ... N steps ago.

   Then at every step decimate the set of N positions to the subset of
   rows (between 1 and N or 0 and N-1 depending upon index base) listed
   in vector Nd, and return the result as a row in P.

   Returned matrix P has number of rows matching steps in C, and number
   of columns equal to the number of rows in vector Nd.  

   Like P, returned vector V has number of rows matching steps in C.

   Testing

      1. Shuffled case (from dposn() case 3).

      Previously this case was verified with dposn() and dposn1() com-
      puting the same result after speedup changes.  Results here agree
      with earlier.  In addition, vector V is verified by inspection.

      [tops@kaffia] ready > list: 17 9 1 3 19 8 11 4 12 16 15 \
                                  6 7 2 20 13 5 10 14 18 ; dup (hC) \
                            list: 1 5 thru ; (hNd) 5 (N) dposn2 \
                            (hC hP hV) 3 parkn "%6.0f" mformatset .m

                   C      P1     P2     P3     P4     P5      V

        Row 1:     17     17     17     17     17     17      5
        Row 2:      9     17     17     17     17     17      5
        Row 3:      1      9     17     17     17     17      1
        Row 4:      3      1      9     17     17     17      1
        Row 5:     19      1      3      9     17     17      2
        Row 6:      8      1      3      9     17     19      5
        Row 7:     11      1      3      8      9     19      3
        Row 8:      4      1      3      8     11     19      4
        Row 9:     12      3      4      8     11     19      2
       Row 10:     16      4      8     11     12     19      4
       Row 11:     15      4      8     11     12     16      5
       Row 12:      6      4     11     12     15     16      4
       Row 13:      7      4      6     12     15     16      2
       Row 14:      2      6      7     12     15     16      2
       Row 15:     20      2      6      7     15     16      1
       Row 16:     13      2      6      7     15     20      5
       Row 17:      5      2      6      7     13     20      4
       Row 18:     10      2      5      7     13     20      2
       Row 19:     14      2      5     10     13     20      3
       Row 20:     18      5     10     13     14     20      4
      [tops@kaffia] ready >

      C(8)=4, and in row 9 it is P2 (2nd) in the list, which agrees
      with V(9)=2.

      C(19)=14 and in row 20 it is P4 (4th) in the list, which agrees
      with V(20)=4.

      These results are correct by inspection. */
{ 
   register int k=0,r=0;
   register double *C,*D,*Nd,*P,*V,*W,*X;
   int cols,N,rnew,rows;

   if(tos->typ!=NUM) {
      stkerr(" dposn: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" dposn: ",MATNOT2);
      return 0;
   }
   popint(&N);

   cop(); /* makes a copy of Nd if it is in the catalog */
   Nd=tos->mat;
   cols=tos->row;
   for(;r<cols;r++) *(Nd+r)-=XBASE; /* subtract XBASE from Nd indices */

   swap(); 
   C=tos->mat; /* hC, incoming C tos) */
   rows=tos->row;
   if(!matstk(N+rows,1,"_C")) return 0;
   D=tos->mat;

/* Pad the beginning of C with N fictitious past values: */
   for(;k<N;k++) { 
      *D=*C;
      D++; 
   }
   memcpy(D,C,rows*sizeof(double));
   C=D; /* real time C values start here */
   lop(); /* drop incoming C off the stack */

   if(!matstk(N,1,"_W")) return 0;
   W=tos->mat;

   if(!matstk(rows,cols,"_P")) return 0;
   P=tos->mat;

   if(!matstk(rows,1,"_V")) return 0;
   V=tos->mat;
   
   memcpy(W,C-N,N*sizeof(double));
   qsort1(W,N,1);

   X=P;
   for(k=0;k<cols;k++) {
      *X=*(W+(int)*(Nd+k));
      X+=rows;
   }
   *V=(N-1)+XBASE;
   V++;

   for(r=1;r<rows;r++) {
      if(!rsort(W,N,&rnew,*C,*(C-N))) {
         gprintf(" dposn: error in rsort");
         nc();
         stkerr("","");
         return 0;
      }
      X=P+r;
      for(k=0;k<cols;k++) {
         *X=*(W+(int)*(Nd+k));
         X+=rows;
      }
      *V=rnew+XBASE;
      V++;
      C++;
   }
   return(lpush() && lop() && lop() && lop() && lpull());
}

/*
Sun May 12 09:17:42 PDT 2013.  Work in progress, now archived.  
Reference to gposn in wapp.h and wapp.p have been removed.

int gposn() // gposn (hC hB1 hB2 nb --- hR1 hR2) //
// Fri May  3 03:39:15 PDT 2013 

   Lower and upper boundaries B1 and B2 contain C and correspond to the
   range of positions over period b.  When B1 is pushed lower or B2 is
   pushed higher to contain C, the range of positions is increasing, 
   and for the direction of push this corresponds to a longer period p,
   where p>b.

   For each step (row) in C, return period p.  When B1(k) is pushed 
   lower, period R1(k) is greater than b; and when B2(k) is pushed 
   higher, period R2(k) is greater than b.
 
   When B1(k) is not pushed lower, R1(k)=b.  When B2(k) is not pushed
   higher, R2(k)=b. 

   All arrays are column vectors and all are assumed to have the same 
   number of rows as incoming B2 (which is not checked). 

   Sat May  4 11:34:36 PDT 2013.  B1, B2 and b must be consistent; for
   example, B1 and B2 could come from (B1, B2) = fposn(C, b).  That 
   means any B1, B2 and b will produce the same output R1 and R2, after
   an initial period less than b where there is mismatch as higher b
   requires more steps to properly initialize.

   Testing.  

      Initial testing:

      // Fri May  3 05:44:02 PDT 2013

         if(missing("fposn")) source("mmath.v");
         mformatset("%6.1f");

      // Data with two future values ("5, 5") appended:
         C = numbers("5, 4, 3, 2, 1, 2.1, 3.1, 4.1, 5.1, " + "5, 5");

         N = 2; // steps (equals N+1 points)

         P = fposn(C, N);
         P0 = P[*, 1];
         P1 = P[*, 2];
         P2 = P[*, 3];

         (R1, R2) = gposn(C, P1, P2, N); 

      Final testing was done with real time data. //
{
   register int k,r;
   register double *B1,*B2,*C1,C2,*R1,*R2,*X;
   double *C;
   int b,rows;

   if(tos->typ!=NUM) {
      stkerr(" gposn: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT || (tos-3)->typ!=MAT) {
      gprintf(" gposn: expect three matrices on stack");
      nc();
      stkerr("","");
      return 0;
   }
   popint(&b);

   B2=tos->mat;
   rows=tos->row;

   B1=(tos-1)->mat;
   C=(tos-2)->mat;

   if(!matstk(rows,1,"_R1")) return 0;
   R1=tos->mat;

   if(!matstk(rows,1,"_R2")) return 0;
   R2=tos->mat;

   *R1=*R2=1;
   X=C;
   for(k=1;k<rows;k++) {
      X++;

      B1++;
      R1++;
      C2=*X;
      C1=X-1;
      r=k-b+1;
      while(r>0 && *C1>C2) { C1--; r--; }
      if(r>0) *R1=MAX(1,(k-b+1)-r);
      else *R1=k;

      B2++;
      R2++;
      C2=*X;
      C1=X-1;
      r=k-b+1;
      while(r>0 && *C1<C2) { C1--; r--; }
      if(r>0) *R2=MAX(1,(k-b+1)-r);
      else *R2=k;
   }
   return(lpush() && lpush() && drop() && drop2() && 
          lpull() && lpull());
}
*/

int mmax() /* mmax (hA n --- hB) */
/* Moving max; B(i,j) is max of A(i-n+1,j) to A(i,j).

   Tue Jan 14 11:45:03 PST 2014.  Code at the heart of this function
   has been moved to function _mmax() in file mmath.c.

   It is amazing what a little rearranging can do.  Words mmax and mmin
   are 100 to 150 times faster than old versions mmaxX and mminX (and
   of course they even produce matching results).

      [tops@kaffia] ready > ww
      mmax and mmaxX results match
      mmin and mminX results match
      mmax  (microsec):  22376
      mmaxX (microsec):  2504952
      mmin  (microsec):  24064
      mminX (microsec):  3720356

   Here are the phrases that produced the results above:

      seed0 seedset
      20000 1 random 1000 * integer "A" book

      1000 "N" book

      A N mmaxX "B1" book
      A N mmax  "B2" book
      B2 B1 - null? IF "match" ELSE "do not match" THEN
      " mmax and mmaxX results " swap + . nl

      A N mminX "C1" book
      A N mmin  "C2" book
      C2 C1 - null? IF "match" ELSE "do not match" THEN
      " mmin and mminX results " swap + . nl

      time push 10 1 DO A N mmax drop LOOP time pull less
      " mmax  (microsec): " . 1E6 * .i nl

      time push 10 1 DO A N mmaxX drop LOOP time pull less
      " mmaxX (microsec): " . 1E6 * .i nl

      time push 10 1 DO A N mmin drop LOOP time pull less
      " mmin  (microsec): " . 1E6 * .i nl

      time push 10 1 DO A N mminX drop LOOP time pull less
      " mminX (microsec): " . 1E6 * .i nl \ */
{
   register int j=0;
   register double *A,*B;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mmax: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mmax: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mmax")) return 0;

   for(;j<cols;j++) {
      A=(tos-1)->mat+j*rows;
      B=tos->mat+j*rows;
      _mmax(A,rows,n,B);
   }
   return(lop());
}

int mmaxf() /* mmaxf (hA n --- hB hR hf) */
/* Thu Mar 21 15:04:28 PDT 2013

   Moving max; B(i,j) is max of A(i-n+1,j) to A(i,j).

   This function operates identically to mmax():

      B = mmax(A, n)

   and also returns vectors of row numbers R and truth flags f.  

   Row numbers R (in the current index base) replicate B from A in the
   following expression:

      B = reach(A, R)

   Truth flags f apply to A and B and are true at step k where A(k) and
   B(k) have moved to a new moving max from step k-1:

      B' = looking(A, f) = looking(B, f);

   Where R is equal to its index and B is moving higher, flag f is true.
   For index base 0 or 1, this can be expressed as:

      f = ([xbase:ndx(rows(R))] - R)==0 && B>lag(B,1);

   This means that when f is true, A is driving B higher to new max
   values.  Testing below verifies that this expression holds.
  
   Testing.

      Paste at the ready prompt the two lines following:
         syspath "../src/wapp.c" + \ 
         "#def Testing mmaxf" "#end Testing mmaxf" msource1
\
      #def Testing mmaxf 
      {" 

      seedset(seed0);
      A = 1000*random(200,1);
      n = 30;

      nl;

      B = mmax(A, n);
      (B1, R, f) = mmaxf(A, n);
      if(null?(B1-B)) nl(dot(" mmax and mmaxf results match"));
      else nl(dot(" mmax and mmaxf results do not match"));

      B2 = looking(B1, f);
      f2 = ([xbase:ndx(rows(R))] - R)==0 && B1>lag(B1,1);
      if(null?(f-f2)) nl(dot(" flag from mmaxf is valid"));
      else nl(dot(" flag from mmaxf is not valid"));

      B3 = reach(A, R);
      if(null?(B1-B3)) nl(dot(" rows from mmaxf are valid"));
      else nl(dot(" rows from mmaxf are not valid"));

      C = mmin(A, n);
      (C1, R, f) = mminf(A, n);
      if(null?(C1-C)) nl(dot(" mmin and mminf results match"));
      else nl(dot(" mmin and mminf results do not match"));

      C2 = looking(C1, f);
      f2 = ([xbase:ndx(rows(R))] - R)==0 && C1<lag(C1,1);
      if(null?(f-f2)) nl(dot(" flag from mminf is valid"));
      else nl(dot(" flag from mminf is not valid"));

      C3 = reach(A, R);
      if(null?(C1-C3)) nl(dot(" rows from mminf are valid"));
      else nl(dot(" rows from mminf are not valid"));

      plot([A, B1, B2+10, C1, C2-10], 1:rows(A));
      halt(plotclose(pause));

      "} (hT) eval

      #end Testing mmaxf 

   Running test case for mmaxf() and mminf():  

               Tops 3.2.1
      Sat Dec  7 14:57:22 PST 2013
      [tops@kaffia] ready > syspath "../src/wapp.c" + \
      >          "#def Testing mmaxf" "#end Testing mmaxf" msource1

       mmax and mmaxf results match
       flag from mmaxf is valid
       rows from mmaxf are valid
       mmin and mminf results match
       flag from mminf is valid
       rows from mminf are valid

       press Enter to continue 

      [tops@kaffia] ready > 

   Results match mmax() and mmin(). */
{
   register int i,j=0,k1,k2;
   register double *A,*B,*f,*R,X;
   int cols,kX,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mmaxf: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mmaxf: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mmaxf")) return 0;

   if(!matstk(rows,cols,"_R")) return 0;

   if(!matstk(rows,cols,"_f")) return 0;
   memset(tos->mat,0,rows*cols*sizeof(double));

   for(;j<cols;j++) {
      A=(tos-3)->mat+j*rows;
      B=(tos-2)->mat+j*rows;
      X=*B=*A;

      R=(tos-1)->mat+j*rows;
      *R=XBASE;

      f=tos->mat+j*rows;

      k2=1;
      kX=0;
      while(k2<n) {
         if(*(A+k2)>X) {
            X=*(A+k2);
            *(R+k2)=k2+XBASE;
            *(f+k2)=xTRUE;
            kX=k2;
         }
         *(B+k2)=X;
         *(R+k2)=kX+XBASE;
         k2++;
      }
      for(;k2<rows;k2++) {
         if(*(A+k2)>X) { /* newest is max */
            X=*(A+k2);
            *(R+k2)=k2+XBASE;
            *(f+k2)=xTRUE;
            kX=k2;
         }
         else {
            k1=k2-n+1;
            if(kX<k1) { /* max is gone; find new max */
               X=*(A+k1);
               kX=k1;
               for(i=k1+1;i<=k2;i++) {
                  if(*(A+i)>X) {
                     X=*(A+i);
                     kX=i;
                  }
               }
            }
         }
         *(B+k2)=X;
         *(R+k2)=kX+XBASE;
      }
   }
   return(pushint(3) && roll() && drop());
}

int mmaxN() /* mmaxN (hA n N --- hB) \ moving maximums, N highest */
/* Tue Jul  3 17:36:00 PDT 2012

   Moving max; B(j,i) is the jth highest max of A(i-n+1) to A(i). 

   Incoming A is a column vector. 

   The N row vectors of returned B contain different moving max traces
   of A for the same period n, where row 1 of B holds the trace of 
   lowest moving max of A and row N the trace of highest.

   Answers questions like "What are the N highest values of A during 
   the latest period n?" 

   Examples and test cases for mmaxN(), mmaxN2(), mminN(), mminN2():
     
      1. Pasting the following at the ready prompt,

         >> 
            n = 5;
            N = 3;
            A = 2 + uniform(1, 15);
            B = mmaxN(A, n, N);
         << A B transpose park .m 
     
         gives this result (with column header added),

                           A      B[1]'    B[2]'    B[3]'
             Row 1:        2        2        2        2
             Row 2:        3        2        3        3
             Row 3:        4        2        3        4
             Row 4:        5        3        4        5
             Row 5:        6        4        5        6
             Row 6:        7        5        6        7
             Row 7:        8        6        7        8
             Row 8:        9        7        8        9
             Row 9:       10        8        9       10
            Row 10:       11        9       10       11
            Row 11:       12       10       11       12
            Row 12:       13       11       12       13
            Row 13:       14       12       13       14
            Row 14:       15       13       14       15
            Row 15:       16       14       15       16
           [tops@kaffia] ready > 

         which is correct by cursory inspection.

      2. This example is more complex and points out the difficulties
         in this type of calculation:

         >>
            seedset(seed0);
            n = 5;
            N = 3;
            A = integer(1000*random(15, 1));
            B = mmaxN(A, n, N);
         << A B transpose park .m

                          A      B[1]'    B[2]'    B[3]'
             Row 1:      509      509      509      509
             Row 2:      332      332      332      509
             Row 3:       70       70      332      509
             Row 4:      268      268      332      509
             Row 5:      808      332      509      808
             Row 6:       65      268      332      808
             Row 7:      390      268      390      808
             Row 8:      753      390      753      808
             Row 9:      418      418      753      808
            Row 10:      812      418      753      812
            Row 11:       73      418      753      812
            Row 12:      515      515      753      812
            Row 13:      126      418      515      812
            Row 14:      396      396      515      812
            Row 15:      635      396      515      635
           [tops@kaffia] ready > 

         The first three rows show the result of initialization for N=3,
         where the rows are simply populated as each new A value comes
         along.

         In row 4, value 70 is replaced by 268.

         From row 5 to row 6, 509 disappears because it is older than
         the period n=5.  It is replaced by 268 which had been in row 4
         but got crowded out in row 5 by new high 808.

         Continued inspection along these lines shows no discrepancies
         and this function is judged to be working correctly for this
         somewhat complex problem. 

      3. This function should produce the same result as mmax() for a
         case of N=1.  Here is a test using a vector of 30000 random
         numbers:

            >> seedset(seed0); N=1;
            A = integer(1000*random(30000, 1));
            E = null?(mmaxN(A, 5, N)' - mmax(A, 5));
            if(E) dot(" mmaxN() and mmax() results are identical"); <<

            mmaxN() and mmax() results are identical
           [tops@kaffia] ready > 

         When first run, this third case did not produce identical
         results in a small fraction of the 30000 rows.  It might be 
         tempting to call the disagreement minor and just let it go.
         But in this, less than perfect agreement is major disagreement
         no matter how small. 

         Getting correct index resets in all branches (setting kX,
         kXold and kXprev) in mmaxN2() made the agreement perfect.

      4. Wed Jul  4 07:14:58 PDT 2012.  Feeling confident after case 3?
         Forget it.  

         The trace of highest moving max for any N, the one that is 
         output in row N, should produce the same result as mmax() just
         like case 3 did.  The following runs such a case:

            >> seedset(seed0); N=3;
            A = integer(1000*random(30000, 1));
            E = null?(mmaxN(A, 5, N)[N]' - mmax(A, 5));
            if(E) dot(" mmaxN() and mmax() results are identical"); <<

            mmaxN() and mmax() results are identical
           [tops@kaffia] ready > 

         and did not produce identical results the first time.  The 
         else branch where processing flows when an item in the N high-
         est is too old, was missing this expression to reset X after
         new max *(A+kX) was stored in B:

             X=*B; // minimum max (do not use X=*(A+kX);) //

         This bug does not cause a machine problem, like a seg fault, 
         which might be better.  Function mmaxN2() just hums merrily
         along producing garbage results once in a while.  The problem
         is, they look kind of ok. 

         Here is the same test applied to mminN():

         Wed Jul  4 08:37:59 PDT 2012.  The trace of lowest moving min
         for any N, the one that is output in row N, should produce the
         same result as mmin().  The following runs such a case and
         obtains identical results:

            >> seedset(seed0); N=3;
            A = integer(1000*random(30000, 1));
            E = null?(mminN(A, 5, N)[N]' - mmin(A, 5));
            if(E) dot(" mminN() and mmin() results are identical"); <<

            mminN() and mmin() results are identical
           [tops@kaffia] ready >

      5. Thu Jul  5 03:26:52 PDT 2012.  Verify that mapping matrix J 
         from mmaxN2() or mminN2() exactly maps original function A to
         each of the N returned functions in B.  

         Transpose (') is used to align rows and columns of J with A,
         and reach1() is used to operate on A with row numbers from J.

         Pasting these lines at the ready prompt 

            >> seedset(seed0); N=3;
            if(missing("reach1")) source(sourceof("reach1"));

            A = integer(1000*random(30000, 1));

            (B1, J1) = mmaxN2(A, 5, N);
            B2 = reach1(A, J1');

            (B3, J2) = mminN2(A, 5, N);
            B4 = reach1(A, J2');

            E = null?(B1' - B2) && null?(B3' - B4);
            if(E) dot(" OK mapping mmaxN2() and mminN2()"); <<

         produces the OK message:

            OK mapping mmaxN2() and mminN2()
           [tops@kaffia] ready > 

         and verifies that J correctly tracks the transformations that
         make the N traces in B from the single trace A. */
{
   return(mmaxN2() && drop());
}

int mmaxN2() /* mmaxN2 (hA n N --- hB hJ) */
/* Tue Jul  3 03:37:41 PDT 2012

   Moving max; B(j,i) is the jth highest max of A(i-n+1) to A(i). 

   Incoming A is a column vector.  

   The N row vectors of returned B contain different moving max traces
   of A for the same period n, where row 1 of B holds the trace of 
   lowest moving max of A and row N the trace of highest.

   The N row vectors of returned J correlate with those of B.  Term 
   J(j,i) contains the row number of A that provided term B(j,i). 

   Function mmaxN() is identical to this one except J is not returned;
   see mmaxN() for some examples and test cases.

   Adapted from mmax() with its 100 factor speed up rearrangement (see
   mmax(), this file). */
{
   register int i,j,k2;
   register double *A,*B,*J,*W,*W1,*W2,X;
   int kX=0,kXold=0,kXprev=0,n,N,*p,rows;

   if(tos->typ!=NUM || (tos-1)->typ!=NUM) {
      stkerr(" mmaxN2: ",NUMS2NOT);
      return 0;
   }
   if((tos-2)->typ!=MAT) {
      stkerr(" mmaxN2: ",MATNOT);
      return 0;
   }
   if(!popint(&N)) return 0;
   if(!popint(&n)) return 0;

   A=tos->mat;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(N>n) {
      gprintf(" mmaxN2: count N cannot exceed period n");
      nc();
      stkerr("","");
      return 0;
   }
   if(!matstk(n,3,"_W")) return 0;
   W=tos->mat; W1=W+n; W2=W1+n; /* work arrays */

   if(!matstk(N,rows,"_B")) return 0;
   if(!matstk(N,rows,"_J")) return 0;

   B=(tos-1)->mat; /* moving max traces from A */
   J=tos->mat;     /* moving max rows of A */

   for(k2=0;k2<N;k2++) {
      for(i=k2;i<N;i++) {
         *(B+i)=*(A+k2);
         *(J+i)=k2;
      }
      memcpy(B+N,B,N*sizeof(double));
      memcpy(J+N,J,N*sizeof(double));
      B+=N;
      J+=N;
   }
   B=(tos-1)->mat;
   J=tos->mat;

   j=0;
   while(j<N) {
      p=qsort2(B,N,1);
      if(p==NULL) {
         gprintf(" mmaxN2: qsort2 error: null p");
         nc();
         stkerr("","");
         return 0;
      }
      memcpy(W,J,N*sizeof(double));
      for(i=0;i<N;i++) *(J+i)=*(W+*(p+i));
      mallfree((void *)&p);
      J+=N;
      B+=N;
      j++;
   }
   B-=N;
   J-=N;
   X=*B;

   for(;k2<rows;k2++) {
      if(*(A+k2)>X) { /* newest is max */
   /* if(*(A+k2)>=X) { removed Aug  6 10:04:42 PDT 2012 */ 
         X=*(A+k2);
         kX=k2;
      }
      if(k2<kXold+n) {
         memcpy(B+N,B,N*sizeof(double));
         memcpy(J+N,J,N*sizeof(double));
         B+=N;
         J+=N;
         if(kX>kXprev && X>*B) { 
      /* if(kX>kXprev && X>=*B) { removed Aug  6 10:04:42 PDT 2012 */
            *B=X; /* put X into the list of N highest */
            *J=kX; 
            p=qsort2(B,N,1); /* lowest high is first */
            if(p==NULL) {
               gprintf(" mmaxN2: qsort2 error: null p");
               nc();
               stkerr("","");
               return 0;
            }
            memcpy(W,J,N*sizeof(double));
            for(i=0;i<N;i++) *(J+i)=*(W+*(p+i));
            mallfree((void *)&p);
         }
      }
      else { /* an item in N highest is too old */
         memcpy(W,A+k2-n+1,n*sizeof(double)); /* A data to W */
         for(i=0;i<n;i++) *(W2+i)=k2-n+i+1;   /* A rows to W2 */

         p=qsort2(W,n,0); /* descending sort over period n */
         if(p==NULL) {
            gprintf(" mmaxN2: qsort2 error: null p");
            nc();
            stkerr("","");
            return 0;
         }
         for(i=0;i<n;i++) *(W1+i)=*(W2+*(p+i)); /* A rows to W1 */
         mallfree((void *)&p);

         p=qsort2(W,N,1); /* ascending sort over N highest within n */
         if(p==NULL) {
            gprintf(" mmaxN2: qsort2 error: null p");
            nc();
            stkerr("","");
            return 0;
         }
         memcpy(B+N,W,N*sizeof(double)); /* A data to B */
         B+=N;

         for(i=0;i<N;i++) *(W+i)=*(W1+*(p+i));
         mallfree((void *)&p);
         memcpy(J+N,W,N*sizeof(double)); /* A rows to J */
         J+=N;

         kXold=*W;
         for(i=1;i<N;i++) if(*(W+i)<kXold) kXold=*(W+i);

         kX=*W;
         for(i=1;i<N;i++) if(*(W+i)>kX) kX=*(W+i);
      }
      X=*B; /* minimum max (do not use X=*(A+kX);) */
      kXprev=kX;
   }
   J=tos->mat;
   for(i=0;i<N*rows;i++) *(J+i)+=XBASE; /* add XBASE to J row offsets */

   return(rot() && drop() && rot() && drop());
}

/* int mmaxX() */ /* mmax (hA n --- hB) */
/* Moving max; B(i,j) is max of A(i-n+1,j) to A(i,j). */
/*
{
   register int i,j=0,k1,k2;
   register double *A,*B,X;
   int cols,kX,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mmax: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mmax: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mmax")) return 0;

   for(;j<cols;j++) {
      A=(tos-1)->mat+j*rows;
      B=tos->mat+j*rows;

      k2=1;
      kX=0;
      X=*B=*A;
      
      while(k2<n) {
         if(*(A+k2)>X) {
            X=*(A+k2);
            kX=k2;
         }
         *(B+k2)=X;
         k2++;
      }
      for(;k2<rows;k2++) {
         k1=k2-n+1;
         if(kX>=k1 && *(A+k2)>X) {
            X=*(A+k2);
            kX=k2;
         }
         else {
            X=*(A+k1);
            kX=k1;
            for(i=k1+1;i<=k2;i++) {
               if(*(A+i)>X) {
                  X=*(A+i);
                  kX=i;
               }
            }
         }
         *(B+k2)=X;
      }
   }
   return(lop());
}
*/

int mmaxr() /* mmaxr (hA hB0 n --- hB) */
/* Moving max; B(i,j) is max of A(i-n+1,j) to A(i,j). 
   For use in real time, as vector A grows.  B0 is B from a previous 
   call to this function, or to function mmax(), and has number of rows
   equal to A at that time.  Now, additional rows have been added to A,
   and this function computes moving max for these new rows and appends
   them to B0 to create B. 

   Here is some high level code for testing this word, and verifying 
   that the incremental result agrees with batch function mmax():

      seed0 seedset 
      30001 "L" book L 1 random 1000 * integer "A" book
      200 "N" book

      A dup pile N mmax "B" book \ batch values from mmax

      A purged N mmaxr (hB0)                \ first set
      A dup pile swap (hA hB0) N mmaxr (hB) \ appended set
      "Br" book \ incremental values from mmaxr

      Br B - null? \ get true flag when valid */
{
   register int i,j=0,k;
   register double *A,*B,max;
   int cols,kmin,n,rows,rows0;

   if(tos->typ!=NUM) {
      stkerr(" mmax: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" mmax: ",MATNOT2);
      return 0;
   }
   if((tos-1)->col<=0) { /* B0 is purged; run mmax() */
      lop(); /* B0 off stack */
      return(mmax());
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   if((tos-1)->col != cols) {
      stkerr(" mmax: ","columns of A and B0 do not match");
      return 0;
   }
   rows=(tos-1)->row; /* rows of A and B */
   rows0=tos->row;    /* rows of B0 */

   if(rows0>rows) {
      stkerr(" mmax: ","rows of B0 cannot exceed rows of A");
      return 0;
   }
   else {
      if(rows0==rows) return(lop()); /* B = B0 */
   }
   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows-rows0,cols,"_mmax")) return 0;
   pile();

   kmin=MAX(0,rows0-n-1);

   for(;j<cols;j++) {
      A=(tos-1)->mat+rows*cols-1 - j*rows;
      B=tos->mat+rows*cols-1 - j*rows;
      k=rows-1;
      for(;k>kmin;k--) {
         max=*A;
         i=1;
         for(;i<n && i<=k;i++) max=MAX(max,*(A-i));
         if(k>=rows0) *B=max;
         A--;
         B--;
      }
      if(k==0) *B=*A;
   }
   return(lop());
}

int mmaxV() /* mmaxV (hV hN --- hB) */
/* Tue Jan 14 12:53:09 PST 2014

   Moving max; B(i,j) is max of vector V(i-n+1) to V(i) where
   n=N(j).

   Test case: see function mminV(). */
{
   register int j=0;
   register double *B,*N,*V;
   int cols,n,rows;

   if(tos->typ==NUM) hand();

   if((tos-1)->typ!=MAT || tos->typ!=MAT) {
      stkerr(" mmaxV: ",MATNOT2);
      return 0;
   }
   V=(tos-1)->mat;
   N=tos->mat;

   cols=tos->row;
   rows=(tos-1)->row;

   if(!matstk(rows,cols,"_mmaxV")) return 0;

   for(;j<cols;j++) {
      n=MIN(*(N+j),rows);
      n=MAX(n,1);
      B=tos->mat+j*rows;
      _mmax(V,rows,n,B);
   }
   return(lop() && lop());
}

int mmaxv() /* mmaxv (hA hf --- hB) */
/* Wed Nov 14 19:39:00 PST 2012

   Moving max of A over ranges defined by true flags in f.  B is max
   of A over ranges of consecutive zeroes in f that are preceded by 
   nonzero; that is, while f is false, B is the max of A and when f
   is true B is reset to A.
 
   Example: 

      [tops@kaffia] ready > seed0 seedset \
                            10 1 random "A" book \
                            list: 0 0 -1 0 0 -1 0 -1 0 0 ; "f" book

      [tops@kaffia] ready > A f 2dup mmaxv 3 parkn .m
                   A           f     B
        Row 1:   0.5098        0   0.5098
        Row 2:   0.3328        0   0.5098
        Row 3:  0.07094       -1  0.07094
        Row 4:   0.2681        0   0.2681
        Row 5:   0.8089        0   0.8089
        Row 6:  0.06571       -1  0.06571
        Row 7:     0.39        0     0.39
        Row 8:   0.7532       -1   0.7532
        Row 9:   0.4184        0   0.7532
       Row 10:   0.8124        0   0.8124
      [tops@kaffia] ready > 

      This example shows reset of B to A where f is nonzero, and selec-
      tion of correct maximums with respect to the reset point. */
{
   register int j=0,k2;
   register double *A,*B,*f,X;
   int cols,kX,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" mmaxv: ",MATNOT2);
      return 0;
   }
   if(tos->row!=(tos-1)->row) {
      stkerr(" mmaxv: ",MATSNOTC);
      return 0;
   }
   rows=tos->row;
   cols=(tos-1)->col;

   f=tos->mat;
   if(!matstk(rows,cols,"_mmaxv")) return 0;

   for(;j<cols;j++) {
      A=(tos-2)->mat+j*rows;
      f=(tos-1)->mat;
      B=tos->mat+j*rows;

      k2=1;
      kX=0;
      X=*B=*A;
      
      while(k2<rows) {
         if(*(A+k2)>X || *(f+k2)) {
            X=*(A+k2);
            kX=k2;
         }
         *(B+k2)=X;
         k2++;
      }
   }
   return(lop() && lop());
}

int mmin() /* mmin (hA n --- hB) */
/* Moving min; B(i,j) is min of A(i-n+1,j) to A(i,j).

   Tue Jan 14 12:08:25 PST 2014.  Code at the heart of this function
   has been moved to function _mmin() in file mmath.c.

   See companion function mmax() for tests that verify speed up of this
   function of 100 to 150 times. */
{
   register int j=0;
   register double *A,*B;
   int cols,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mmin: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mmin: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mmin")) return 0;

   for(;j<cols;j++) {
      A=(tos-1)->mat+j*rows;
      B=tos->mat+j*rows;
      _mmin(A,rows,n,B);
   }
   return(lop());
}

int mminf() /* mminf (hA n --- hB hR hf) */
/* Thu Mar 21 15:04:28 PDT 2013

   Moving min; B(i,j) is min of A(i-n+1,j) to A(i,j).

   This function operates identically to mmin():

      B = mmin(A, n)

   and also returns vectors of row numbers R and truth flags f.

   Row numbers R (in the current index base) replicate B from A in the
   following expression:

      B = reach(A, R)

   Truth flags f apply to A and B and are true at step k where A(k) and
   B(k) have moved to a new moving min from step k-1:

      B' = looking(A, f) = looking(B, f);

   Where R is equal to its index and B is moving lower, flag f is true.
   For index base 0 or 1, this can be expressed as:

      f = ([xbase:ndx(rows(R))] - R)==0 && B<lag(B,1);

   This means that when f is true, A is driving B lower to new mmin
   values.  Testing below verifies that this expression holds.

   Testing.  See mmaxf() in this file. */
{
   register int i,j=0,k1,k2;
   register double *A,*B,*f,*R,X;
   int cols,kX,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mminf: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mminf: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mminf")) return 0;

   if(!matstk(rows,cols,"_R")) return 0;

   if(!matstk(rows,cols,"_f")) return 0;
   memset(tos->mat,0,rows*cols*sizeof(double));

   for(;j<cols;j++) {
      A=(tos-3)->mat+j*rows;
      B=(tos-2)->mat+j*rows;
      X=*B=*A;

      R=(tos-1)->mat+j*rows;
      *R=XBASE;

      f=tos->mat+j*rows;

      k2=1;
      kX=0;
      while(k2<n) {
         if(*(A+k2)<X) {
            X=*(A+k2);
            *(R+k2)=k2+XBASE;
            *(f+k2)=xTRUE;
            kX=k2;
         }
         *(B+k2)=X;
         *(R+k2)=kX+XBASE;
         k2++;
      }
      for(;k2<rows;k2++) {
         if(*(A+k2)<X) { /* newest is min */
            X=*(A+k2);
            *(R+k2)=k2+XBASE;
            *(f+k2)=xTRUE;
            kX=k2;
         }
         else {
            k1=k2-n+1;
            if(kX<k1) { /* min is gone; find new min */
               X=*(A+k1);
               kX=k1;
               for(i=k1+1;i<=k2;i++) {
                  if(*(A+i)<X) {
                     X=*(A+i);
                     kX=i;
                  }
               }
            }
         }
         *(B+k2)=X;
         *(R+k2)=kX+XBASE;
      }
   }
   return(pushint(3) && roll() && drop());
}

int mminN() /* mminN (hA n N --- hB) \ moving minimums, N lowest */
/* Wed Jul  4 04:24:58 PDT 2012

   Moving min; B(i,j) is min of A(i-n+1,j) to A(i,j).

   Incoming A is a column vector. 

   The N row vectors of returned B contain different moving min traces
   of A for the same period n, where row 1 of B holds the trace of 
   highest moving min of A and row N the trace of lowest.

   See mmaxN() for examples and test cases. */
{
   return(mminN2() && drop());
}

int mminN2() /* mminN2 (hA n N --- hB hJ) */
/* Wed Jul  4 07:21:00 PDT 2012

   Moving min; B(j,i) is the jth highest min of A(i-n+1) to A(i).

   Incoming A is a column vector.

   The N row vectors of returned B contain different moving min traces
   of A for the same period n, where row 1 of B holds the trace of 
   highest moving min of A and row N the trace of lowest.

   The N row vectors of returned J correlate with those of B.  Term
   J(j,i) contains the row number of A that provided term B(j,i).

   Function mminN() is identical to this one except J is not returned;
   see related function mmaxN() for some examples and test cases.

   Adapted from mmin() with its 100 factor speed up rearrangement (see
   mmin(), this file). */
{
   register int i,j,k2;
   register double *A,*B,*J,*W,*W1,*W2,X;
   int kX=0,kXold=0,kXprev=0,n,N,*p,rows;

   if(tos->typ!=NUM || (tos-1)->typ!=NUM) {
      stkerr(" mminN2: ",NUMS2NOT);
      return 0;
   }
   if((tos-2)->typ!=MAT) {
      stkerr(" mminN2: ",MATNOT);
      return 0;
   }
   if(!popint(&N)) return 0;
   if(!popint(&n)) return 0;

   A=tos->mat;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(N>n) {
      gprintf(" mminN2: count N cannot exceed period n");
      nc();
      stkerr("","");
      return 0;
   }
   if(!matstk(n,3,"_W")) return 0;
   W=tos->mat; W1=W+n; W2=W1+n; /* work arrays */

   if(!matstk(N,rows,"_B")) return 0;
   if(!matstk(N,rows,"_J")) return 0;

   B=(tos-1)->mat; /* moving min traces from A */
   J=tos->mat;     /* moving min rows of A */

   for(k2=0;k2<N;k2++) {
      for(i=k2;i<N;i++) {
         *(B+i)=*(A+k2);
         *(J+i)=k2;
      }
      memcpy(B+N,B,N*sizeof(double));
      memcpy(J+N,J,N*sizeof(double));
      B+=N;
      J+=N;
   }
   B=(tos-1)->mat;
   J=tos->mat;

   j=0;
   while(j<N) {
      p=qsort2(B,N,0);
      if(p==NULL) {
         gprintf(" mminN2: qsort2 error: null p");
         nc();
         stkerr("","");
         return 0;
      }
      memcpy(W,J,N*sizeof(double));
      for(i=0;i<N;i++) *(J+i)=*(W+*(p+i));
      mallfree((void *)&p);
      J+=N;
      B+=N;
      j++;
   }
   B-=N;
   J-=N;
   X=*B;

   for(;k2<rows;k2++) {
      if(*(A+k2)<X) { /* newest is min */
   /* if(*(A+k2)<=X) { removed Aug  6 10:04:42 PDT 2012 */
         X=*(A+k2);
         kX=k2;
      }
      if(k2<kXold+n) {
         memcpy(B+N,B,N*sizeof(double));
         memcpy(J+N,J,N*sizeof(double));
         B+=N;
         J+=N;
         if(kX>kXprev && X<*B) { 
      /* if(kX>kXprev && X<=*B) { removed Aug  6 10:04:42 PDT 2012 */
            *B=X; /* put X into the list of N lowest */
            *J=kX; 
            p=qsort2(B,N,0); /* highest low is first */
            if(p==NULL) {
               gprintf(" mminN2: qsort2 error: null p");
               nc();
               stkerr("","");
               return 0;
            }
            memcpy(W,J,N*sizeof(double));
            for(i=0;i<N;i++) *(J+i)=*(W+*(p+i));
            mallfree((void *)&p);
         }
      }
      else { /* an item in N highest is too old */
         memcpy(W,A+k2-n+1,n*sizeof(double)); /* A data to W */
         for(i=0;i<n;i++) *(W2+i)=k2-n+i+1;   /* A rows to W2 */

         p=qsort2(W,n,1); /* ascending sort over period n */
         if(p==NULL) {
            gprintf(" mminN2: qsort2 error: null p");
            nc();
            stkerr("","");
            return 0;
         }
         for(i=0;i<n;i++) *(W1+i)=*(W2+*(p+i)); /* A rows to W1 */
         mallfree((void *)&p);

         p=qsort2(W,N,0); /* descending sort over N lowest within n */
         if(p==NULL) {
            gprintf(" mminN2: qsort2 error: null p");
            nc();
            stkerr("","");
            return 0;
         }
         memcpy(B+N,W,N*sizeof(double)); /* A data to B */
         B+=N;

         for(i=0;i<N;i++) *(W+i)=*(W1+*(p+i));
         mallfree((void *)&p);
         memcpy(J+N,W,N*sizeof(double)); /* A rows to J */
         J+=N;

         kXold=*W;
         for(i=1;i<N;i++) if(*(W+i)<kXold) kXold=*(W+i);

         kX=*W;
         for(i=1;i<N;i++) if(*(W+i)>kX) kX=*(W+i);
      }
      X=*B; /* maximum min (do not use X=*(A+kX);) */
      kXprev=kX;
   }
   J=tos->mat;
   for(i=0;i<N*rows;i++) *(J+i)+=XBASE; /* add XBASE to J row offsets */

   return(rot() && drop() && rot() && drop());
}

/* int mminX() */ /* mmin (hA n --- hB) */
/* Moving min; B(i,j) is min of A(i-n+1,j) to A(i,j). */
/*
{
   register int i,j=0,k1,k2;
   register double *A,*B,X;
   int cols,kX,n,rows;

   if(tos->typ!=NUM) {
      stkerr(" mmin: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT) {
      stkerr(" mmin: ",MATNOT);
      return 0;
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   rows=tos->row;

   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows,cols,"_mmin")) return 0;

   for(;j<cols;j++) {
      A=(tos-1)->mat+j*rows;
      B=tos->mat+j*rows;

      k2=1;
      kX=0;
      X=*B=*A;
      
      while(k2<n) {
         if(*(A+k2)<X) {
            X=*(A+k2);
            kX=k2;
         }
         *(B+k2)=X;
         k2++;
      }
      for(;k2<rows;k2++) {
         k1=k2-n+1;
         if(kX>=k1 && *(A+k2)<X) {
            X=*(A+k2);
            kX=k2;
         }
         else {
            X=*(A+k1);
            kX=k1;
            for(i=k1+1;i<=k2;i++) {
               if(*(A+i)<X) {
                  X=*(A+i);
                  kX=i;
               }
            }
         }
         *(B+k2)=X;
      }
   }
   return(lop());
}
*/

int mminr() /* mminr (hA hB0 n --- hB) */
/* Moving min; B(i,j) is min of A(i-n+1,j) to A(i,j). 
   For use in real time, as vector A grows.  B0 is B from a previous 
   call to this function, or to function mmin(), and has number of rows
   equal to A at that time.  Now, additional rows have been added to A,
   and this function computes moving min for these new rows and appends
   them to B0 to create B. 

   Here is some high level code for testing this word, and verifying 
   that the incremental result agrees with batch function mmin():

      seed0 seedset 
      30001 "L" book L 1 random 1000 * integer "A" book
      200 "N" book

      A dup pile N mmin "B" book \ batch values from mmin

      A purged N mminr (hB0)                \ first set
      A dup pile swap (hA hB0) N mminr (hB) \ appended set
      "Br" book \ incremental values from mminr

      Br B - null? \ get true flag when valid */
{
   register int i,j=0,k;
   register double *A,*B,min;
   int cols,kmin,n,rows,rows0;

   if(tos->typ!=NUM) {
      stkerr(" mmin: ",NUMNOT);
      return 0;
   }
   if((tos-1)->typ!=MAT || (tos-2)->typ!=MAT) {
      stkerr(" mmin: ",MATNOT2);
      return 0;
   }
   if((tos-1)->col<=0) { /* B0 is purged; run mmin() */
      lop(); /* B0 off stack */
      return(mmin());
   }
   if(!popint(&n)) return 0;

   cols=tos->col;
   if((tos-1)->col != cols) {
      stkerr(" mmin: ","columns of A and B0 do not match");
      return 0;
   }
   rows=(tos-1)->row; /* rows of A and B */
   rows0=tos->row;    /* rows of B0 */

   if(rows0>rows) {
      stkerr(" mmin: ","rows of B0 cannot exceed rows of A");
      return 0;
   }
   else {
      if(rows0==rows) return(lop()); /* B = B0 */
   }
   n=MIN(n,rows);
   n=MAX(n,1);

   if(!matstk(rows-rows0,cols,"_mmin")) return 0;
   pile();

   kmin=MAX(0,rows0-n-1);

   for(;j<cols;j++) {
      A=(tos-1)->mat+rows*cols-1 - j*rows;
      B=tos->mat+rows*cols-1 - j*rows;
      k=rows-1;
      for(;k>kmin;k--) {
         min=*A;
         i=1;
         for(;i<n && i<=k;i++) min=MIN(min,*(A-i));
         if(k>=rows0) *B=min;
         A--;
         B--;
      }
      if(k==0) *B=*A;
   }
   return(lop());
}

int mminV() /* mminV (hV hN --- hB) */
/* Tue Jan 14 12:29:06 PST 2014

   Moving min; B(i,j) is min of vector V(i-n+1) to V(i) where
   n=N(j). 

   Test case: Paste the following lines at the ready prompt:

      seed0 seedset \
      20000 1 random 1000 * integer "V" book \
      list: 100 200 400 ; "N" book \
\
      V N mminV "B" book \
      V N 1st pry mmin B 1st catch - null? \
      V N 2nd pry mmin B 2nd catch - null? and \
      V N 3rd pry mmin B 3rd catch - null? and \
      IF " mminV results agree with mmin" \
      ELSE " mminV results do not agree with mmin" \
      THEN . nl \
\
      V N mmaxV "B" book \
      V N 1st pry mmax B 1st catch - null? \
      V N 2nd pry mmax B 2nd catch - null? and \
      V N 3rd pry mmax B 3rd catch - null? and \
      IF " mmaxV results agree with mmax" \
      ELSE " mmaxV results do not agree with mmax" \
      THEN . nl \ */
{
   register int j=0;
   register double *B,*N,*V;
   int cols,n,rows;

   if(tos->typ==NUM) hand();

   if((tos-1)->typ!=MAT || tos->typ!=MAT) {
      stkerr(" mminV: ",MATNOT2);
      return 0;
   }
   V=(tos-1)->mat;
   N=tos->mat;

   cols=tos->row;
   rows=(tos-1)->row;

   if(!matstk(rows,cols,"_mminV")) return 0;

   for(;j<cols;j++) {
      n=MIN(*(N+j),rows);
      n=MAX(n,1);
      B=tos->mat+j*rows;
      _mmin(V,rows,n,B);
   }
   return(lop() && lop());
}

int mminv() /* mminv (hA hf --- hB) */
/* Wed Nov 14 19:08:57 PST 2012 

   Moving min of A over ranges defined by true flags in f.  B is min 
   of A over ranges of consecutive zeroes in f that are preceded by 
   nonzero; that is, while f is false, B is the min of A and when f
   is true B is reset to A.
 
   Example: 

      [tops@kaffia] ready > seed0 seedset \
                            10 1 random "A" book \
                            list: 0 0 1 0 0 1 0 1 0 0 ; "f" book

      [tops@kaffia] ready > A f 2dup mminv 3 parkn .m
                   A           f     B
        Row 1:   0.5098        0   0.5098
        Row 2:   0.3328        0   0.3328
        Row 3:  0.07094        1  0.07094
        Row 4:   0.2681        0  0.07094
        Row 5:   0.8089        0  0.07094
        Row 6:  0.06571        1  0.06571
        Row 7:     0.39        0  0.06571
        Row 8:   0.7532        1   0.7532
        Row 9:   0.4184        0   0.4184
       Row 10:   0.8124        0   0.4184
      [tops@kaffia] ready > 

      This example shows reset of B to A where f is nonzero, and selec-
      tion of correct minimums with respect to the reset point. */
{
   register int j=0,k2;
   register double *A,*B,*f,X;
   int cols,kX,rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" mminv: ",MATNOT2);
      return 0;
   }
   if(tos->row!=(tos-1)->row) {
      stkerr(" mminv: ",MATSNOTC);
      return 0;
   }
   rows=tos->row;
   cols=(tos-1)->col;

   f=tos->mat;
   if(!matstk(rows,cols,"_mminv")) return 0;

   for(;j<cols;j++) {
      A=(tos-2)->mat+j*rows;
      f=(tos-1)->mat;
      B=tos->mat+j*rows;

      k2=1;
      kX=0;
      X=*B=*A;
      
      while(k2<rows) {
         if(*(A+k2)<X || *(f+k2)) {
            X=*(A+k2);
            kX=k2;
         }
         *(B+k2)=X;
         k2++;
      }
   }
   return(lop() && lop());
}

int newHL() /* newHL (hC hN --- hR2 hR1) */
/* Mon Jun  2 15:34:03 PDT 2014

   Given vector C that defines a function at r time points, and vector
   N that defines a set of s time steps.

   The high and low bounds of function C (i.e., its envelope) for a 
   period given by the number of steps N(j) are

      E2(j)=_mmax(C,1+N(j))
      E1(j)=_mmin(C,1+N(j))

   where plus 1 converts N(j) from number of steps to number of points
   as required for input to _mmax() and _mmin(), and j=1,2,...,s. 

   When C(k) is increasing (i.e., C(k)>C(k-1)) and E2(k,j) equals C(k),
   one more occurrence of C pushing E2 higher is recorded in R2(k). 

   When C(k) is decreasing and E1(k,j) equals C(k), one more occurrence
   of C pushing E1 lower is recorded in R1(k).

   On return, each time point R2(k) contains, after examining all N(j)
   for j=1,2,...,s, the total occurrences where C(k) pushed E2(k,j) to 
   a new high, and each R1(k) contains the total occurrences where C(k)
   pushed E1(k,j) to a new low.

   It follows that the values of R1(k) and R2(k), k=1,2,...,r, are in-
   tegers in the range of 0 to s, inclusive. */
{
   register int j=0, k;
   int n,r,s;
   register double *C,*dC,*E1,*E2,*R1,*R2;
   double *N;

   if((tos-1)->typ!=MAT || tos->typ!=MAT) {
      stkerr(" newHL: ",MATNOT2);
      return 0;
   }
   C=(tos-1)->mat;
   r=(tos-1)->row;

   N=tos->mat;
   s=tos->row;

   if(!matstk(r,1,"_R2")) return 0;
   memset(tos->mat,0,r*sizeof(double));
   rev();

   if(!matstk(r,1,"_R1")) return 0;
   memset(tos->mat,0,r*sizeof(double));
   rev();

   if(!matstk(r,1,"_E1")) return 0;
   if(!matstk(r,1,"_E2")) return 0;

   if(!matstk(r,1,"_dC")) return 0;
   dC=tos->mat;
   memcpy(dC,C,r*sizeof(double));
   *dC=0;
   dC++;
   for(k=1;k<r;k++) {
      *dC-=*C;
      dC++;
      C++;
   }
   for(;j<s;j++) {
      dC=tos->mat; C=(tos-4)->mat;
      E2=(tos-1)->mat; E1=(tos-2)->mat;
      R2=(tos-6)->mat; R1=(tos-5)->mat;

      n=1+MAX(MIN(*(N+j),r-1),1);
      _mmax(C,r,n,E2);
      _mmin(C,r,n,E1);

      for(k=1;k<r;k++) {
         dC++; C++;
         E1++; E2++;
         R1++; R2++;
         if(*dC>0 && *C==*E2) *R2+=1;
         if(*dC<0 && *C==*E1) *R1+=1;
      }
   }
   return(drop2() && drop2() && drop()); 
}

int pnckclps() /* pnckclps (hC hP d --- hF) */
/* Sat Jun 29 10:24:36 PDT 2013

   Pancake collapse in vector C.  For longer and longer time periods in
   vector P (given by number of points), determine states of pancake 
   collapse in plus or minus direction d.

   Returned matrix F, containing 0 and -1 flags, has rows equal to rows
   of C and columns equal to rows of P. 

   Verification was by null vector difference between results from this
   function and the high level expressions that it replaces. */
{
   double *C,*f,*F,*F0,*P,*X;
   int c,d,j=0,k,r;

   if(!popint(&d)) return 0;

   hand();
   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" pnckclps: ",MATNOT2);
      return 0;
   }
   P=tos->mat;
   c=tos->row;

   C=(tos-1)->mat;
   r=(tos-1)->row;

   if(!matstk(r,c,"_F")) return 0;
   F=tos->mat;

   for(;j<c;j++) {
      F0=F;
      other(); /* C to tos */
      pushd(*P); /* P(j) to tos */

      if(d<0) {
         mminf();
         lop();
         X=1+(tos-1)->mat;
         *F=*(f=tos->mat);
         F++;
         f++;
         for(k=1;k<r;k++) { /* collapse down */
            *F=(((int)(*X-*(X-1))<0) && ((int)*f)!=0)*xTRUE; 
            X++;
            F++;
            f++;
         }
      }
      else {
         mmaxf();
         lop();
         X=1+(tos-1)->mat;
         *F=*(f=tos->mat);
         F++;
         f++;
         for(k=1;k<r;k++) { /* collapse up */
            *F=(((int)(*X-*(X-1))>0) && ((int)*f)!=0)*xTRUE; 
            X++;
            F++;
            f++;
         }
      }
      drop2(); /* X and f off stack */

   /* The nature of pancake collapse makes this loop unnecessary.
      if(j>1) {
         F=F0;
         for(k=0;k<r;k++) { // pancake //
            *F=((int)*F!=0 && (int)*(F-r)!=0)*xTRUE;
            F++;
         }
      }
   */ P++;
      F=F0+r;
   }
   return(lop() && lop());
}

int rolldelta() /* rolldelta (hK hC hChg --- hDelta) */
/* Sun Feb 24 19:43:44 UTC 2002  
   Version to CVS.  Used by word data() in mfil.v. */
{
   register double *C,*Chg,*Delta,dsum=0,*K;
   register int k;
   int rows;

   rows=tos->row;

   if(!matstk(rows,1,"_delta")) return 0;

   Delta=(tos)->mat;
   Chg=(tos-1)->mat;
   C=(tos-2)->mat;
   K=(tos-3)->mat;

   *(Delta+rows-1)=dsum;

   for(k=rows-2;k>-1;k--) {

      if(*(K+k)!=*(K+k+1)) dsum=dsum + *(C+k+1) - *(C+k) - *(Chg+k+1);

      *(Delta+k)=dsum;
   }
   return(lop() && lop() && lop());
}

int runs() /* runs (hC hF --- hR) */
/* Mon Apr 22 16:16:37 PDT 2013   
   Revised: Wed May 15 11:57:00 PDT 2013

   Values in C are assumed to be nonzero. 

   Sequential nonzero values of vector F define a run.  During each run,
   compute the running sum of values in vector C and store in R; when a
   run ends, F equals zero and R also equals zero.  R remains zero until
   the next run that begins when F is nonzero.

   R(0) is set to 0.

   Test case.

      [tops@kaffia] ready > list: 0.5 1 2 3 9 1 2 3 9 1 2 3 ; \
                            list: 1 1 1 1 0 1 1 1 0 1 1 1 ; 2dup runs \
                            3 parkn .m nl
 
                      C        F        R 
        Row 1:      0.5        1      0.5
        Row 2:        1        1      1.5
        Row 3:        2        1      3.5
        Row 4:        3        1      6.5
        Row 5:        9        0        0
        Row 6:        1        1        1
        Row 7:        2        1        3
        Row 8:        3        1        6
        Row 9:        9        0        0
       Row 10:        1        1        1
       Row 11:        2        1        3
       Row 12:        3        1        6

      [tops@kaffia] ready > 

      Results are correct by inspection. */
{
   register int k=0;
   register double *C,*F,*R,S,*W;
   int rows;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" runs: ",MATNOT2);
      return 0;
   }
   F=tos->mat;
   rows=tos->row;

   C=(tos-1)->mat;
   if((tos-1)->row!=rows) {
      stkerr(" runs: ",ROWSNOT);
      return 0;
   }
   if(!matstk(rows,1,"_W")) return 0;
   W=tos->mat;
   memset(W,0,rows*sizeof(double));

   for(;k<rows;k++) {
      if(*(F+k)) *(W+k)=*(C+k);
   }
   if(!matstk(rows,1,"_R")) return 0;
   R=tos->mat;

   k=S=0;

   while(k<rows) {
      if(*W>0) {
         S+=*W; 
         *R=S;
      }
      else *R=S=0;
      k++;
      W++;
      R++;
   }
   return(lop() && lop() && lop());
}

int vec_origin() /* vec_origin (hE hL --- hO1 hL1) */
/* Sun Jan 19 16:30:57 PST 2014

   For a set of one-dimensional vectors of lengths L(k) ending at E(k), 
   compute origins, O(k)=E(k)-L(k).  

   Returned origins O1 are taken from O with no duplicates and are in 
   ascending order; for vectors that share the same origin, only the 
   longest one is returned in L1.

   Testing Sun Jan 19 21:08:09 PST 2014: drop the following line at the
   ready prompt:
      syspath "../src/wapp.c" + "test_vec_origin___" msource

      test_vec_origin___

         list: 5 -7 7 8 6 9 10 3 ; "E" book
         list: 3 -4 4 5 3 7 9 INF ; "L" book

         "E        L      O=E-L" 15 indent . nl
         E L 2dup - (hO) 3 parkn .m nl nl

         "O1       L1" 14 indent . nl
         E L vec_origin (hO1 hL1) park .m nl

         " Results are correct by inspection." nl . nl
      halt

   Results are correct by inspection. */
{
   register int k=0;
   register double *E,*L,*L1,*O,*O1;
   int *p,r1=0,rows;
   double Len,Org;

   if(tos->typ!=MAT || (tos-1)->typ!=MAT) {
      stkerr(" vec_origin: ",MATNOT2);
      return 0;
   }
   L1=tos->mat;
   rows=tos->row;

   E=(tos-1)->mat;
   if((tos-1)->row!=rows) {
      stkerr(" vec_origin: ",ROWSNOT);
      return 0;
   }
   if(!matstk(rows,1,"_O")) return 0;
   O=tos->mat;
   for(;k<rows;k++) *(O+k)=*(E+k)-*(L1+k);
   
   p=qsort2(O,rows,1); 
   if(p==NULL) {
      gprintf(" vec_origin: qsort2 error: null p");
      nc();
      stkerr("","");
      return 0;
   }
   if(!matstk(rows,1,"_L")) return 0;
   L=tos->mat;
   for(k=0;k<rows;k++) *(L+k)=*(L1+*(p+k));
   mallfree((void *)&p);

   rot(); drop();
   rot(); drop();

   if(!matstk(rows,1,"_O1")) return 0;
   O1=tos->mat;
   if(!matstk(rows,1,"_L1")) return 0;
   L1=tos->mat;

   k=0;
   while(k<rows) {
      Len=*(L+k);
      Org=*(O+k);
      k++;
      while(k<rows && Org==*(O+k)) {
         if(ABS(*(L+k))>ABS(Len)) Len=*(L+k);
         k++;
      }
      *(O1+r1)=Org;
      *(L1+r1)=Len;
      r1++;
   }
   rot(); drop();
   rot(); drop();

   if(!matstk(r1,1,"_O")) return 0;
   O=tos->mat;
   memcpy(O,O1,r1*sizeof(double));
   if(!matstk(r1,1,"_L")) return 0;
   L=tos->mat;
   memcpy(L,L1,r1*sizeof(double));

   return(rot() && drop() && rot() && drop());
}

/* End of functions for mday.n */

/*--------------------------------------------------------------------*/

/* Transient analysis */

int uncoupled() /* uncoupled (hB hK hP hqd0 hq0 step --- hqdd hqd hq) */
/*
c     Perform transient response analysis
c
c     Adapted from program Express subroutines transresp.f, integuc.f.
c
c     Integrate an uncoupled set of equations of the form:
c
c                      qdd + B*qd + K*q = P
c
c     using constant step and subject to initial conditions qd0, q0.
c
c     Incoming B and K are diagonal matrices stored as column vectors
c     of length m, P is an m by nt matrix of loads for nt time points,
c     where the time points range in value from 0 to step*(nt - 1).
c
c     Stack description:
c            (hB hK hP hqd0 hq0 step --- hqdd hqd hq)
c     where
c        step is time step between columns of p, sec
c        q0, qd0 are initial displacements, velocities
c        P is matrix of time varying loads
c        K, B are stiffness and damping for unity mass
*/
{
   double *B,*B0,*g,*g0,*h,*h0,*K,*K0,*P,*P0,*q,*q0,*qd,*qd0,*qdd,step;
   int err=1,i=0,j=1,ndof,nstep;

   if(!popd(&step)) return 0;

   B0=(tos-4)->mat;
   K0=(tos-3)->mat;
   P=(tos-2)->mat;

/* Get sizes from P: */
   ndof=(tos-2)->row;
   nstep=(tos-2)->col;

   lpush(); /* q0 to temp stack */
   lpush(); /* qd0 to temp stack */

   if(!matstk(ndof,nstep,"_qdd")) return 0;
   qdd=tos->mat;

   if(!matstk(ndof,nstep,"_qd")) return 0;
   qd=tos->mat;
   lpull(); /* qd0 */
   memcpy(qd,tos->mat,ndof*sizeof(double)); /* qd0 to 1st col */
   drop();

   if(!matstk(ndof,nstep,"_q")) return 0;
   q=tos->mat;
   lpull(); /* q0 */
   memcpy(q,tos->mat,ndof*sizeof(double)); /* q0 to 1st col */
   drop();

   if(!matstk(4,ndof,"_g")) return 0; /* working space, 4 rows/dof */
   g0=tos->mat;

   if(!matstk(4,ndof,"_h")) return 0; /* working space, 4 rows/dof */
   h0=tos->mat;

/* Compute initial accelerations and constants of integration: */
   for(;i<ndof;i++) {
      *(qdd+i)=*(P+i) - *(K0+i)*(*(q+i)) - *(B0+i)*(*(qd+i));
      RAMP0(&step,(B0+i),(K0+i),(g0+locvec(i,4)),(h0+locvec(i,4)),&err);
      if(err) {
         gprintf(" uncoupled: bad B or K data for mode %d",i);
         nc();
         stkerr("","");
         return 0;
      }
   }
   P0=P;
   P+=ndof;

   qdd+=ndof;

   qd0=qd;
   qd+=ndof;
  
   q0=q;
   q+=ndof;
  
/* Perform the integration (time points 2 thru nstep): */
   for(;j<nstep;j++) { /* loop over time steps */
      g=g0;
      h=h0;

      B=B0;
      K=K0;

      for(i=0;i<ndof;i++) { /* loop over dofs */
         RAMP(&step,P0,P,qd0,q0,g,h,B,K,qdd,qd,q);

         P0++;
         P++;

         g+=4;
         h+=4;

         B++;
         K++;

         qdd++;

         qd0++;
         qd++;

         q0++;
         q++;
      }
   }
   return(drop2() && /* g and h off stack */
          lpush() && lpush() && lpush() && /* q, qd, qdd on temp stk */
          drop() && drop2() && /* B, K, and P off stack */
          lpull() && lpull() && lpull() /* qd, qd, q on stk */
   );
}
/*--------------------------------------------------------------------*/

/* end wapp.c functions */
