{  File usr/netbp.v  April 2002

   Copyright (c) 2002   D. R. Williamson

   Training back-propagation networks

   References:

      1. Rumelhart, D. E. and J. L. McClelland, "Parallel Distributed
         Processing," Volumes 1 and 2, MIT Press, 1988.

      2. Hertz, J., A. Krogh and R. G. Palmer, "Introduction to the
         Theory of Neural Computation," Perseus Publishing, 1991.
}
\-----------------------------------------------------------------------

   "ranreal" missing IF math.v source THEN

   "net?" missing
   IF syspath "netbp.v" cat source \ sourcing syspath file first
   THEN "netbp.v" "Words" msource  \ then Words from this file

\  Choose a case:

\     "netbp.v" "\  XOR network demo." msource
\     "netbp.v" "\  XOR4 network demo." msource
\     "netbp.v" "\  Encoding problem." msource
\     "netbp.v" "\  Using intermediate values." msource
\     "netbp.v" "\  Symmetry detector." msource
\     "netbp.v" "\  T-C problem." msource
      "netbp.v" "\  Learning sines." msource

   private halt

\-----------------------------------------------------------------------

\  XOR network demo.
\  Reference 1, pp. 330-334.

   "XOR" list: 2 2 1 ; makeNet

   0.1 XOR "Eta" bank   \ learning constant (required)
                        \ Jumps around if Eta is too big, takes 
                        \ many iterations if too small

   0.1 XOR "Alpha" bank \ momentum constant (it can help)

   seed0 seedset

   XOR -1 1 Wrandom
   list: 1 -1 ; XOR "W" extract 1st pryexe (hList hW1) move

   XOR 3.0 Udecay \ if bigger, Eta must be smaller, and vice-versa
   XOR 0.0 Ubias  \ biases are updated like weights
   XOR 1 Utype    \ Utype=0 (0 to +1 range) trains slower
 
   inline: xtrain (n qNet --- ) \ run n training cycles on XOR network
      [ \ Patterns for XOR training.  Each column is a case, where the
        \ first two rows are inputs, the third row is output goal:
        \          input  output
             list:  0 0     0    ;
             list:  1 0     1    ;
             list:  0 1     1    ;
             list:  1 1     0    ; 4 parkn into patterns
        patterns cols is cases
        "1 4 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book 1st 
       DO patterns random_case catch,
         this 1 endmost (hGoal)
         swap 1st 2 items reach (hInput)
         Net stepping
       LOOP
   end

   inline: xtest (qNet --- hOut) \ test a trained XOR network
      [ "xtrain" "patterns" extract, 1st two items reach makes pat ]
      push
      list: pat cols 1st
         DO purged pat I catch peek stepping peek netout ontop LOOP
      end
      pull drop
   end

{  Training and testing the XOR network:
      [tops@clacker] ready > 600 XOR xtrain, XOR xtest .m1
       Row 1: 0.01
       Row 2: 0.95
       Row 3: 0.95
       Row 4: 0.01
      [tops@clacker] ready > 

   Reset everything and use a linear unit (type 2) for output:
      [tops@clacker] ready > "netbp.v" source

      [tops@clacker] ready > 2 XOR "Out" extract 3rd pryexe \
      [tops@clacker] ready > onet.utype poke

      [tops@clacker] ready > 400 XOR xtrain, XOR xtest .m1
       Row 1: 0.00
       Row 2: 1.00
       Row 3: 1.00
       Row 4: 0.00
      [tops@clacker] ready > 
}
   private halt

\-----------------------------------------------------------------------

\  XOR4 network demo.
{  Reference 2, pp. 131.

   Training took many trials of different parameters.  

   Sensitivity to random seed.  With 1E6=seed0, the demo below con-
   verges nicely in 10,000 iterations.  Using 1E5 for seed0 leads to 
   convergence difficulty for some of the units.
}
   "XOR4" list: 4 4 1 ; makeNet

   0.05 XOR4 "Eta" bank
   0.05 XOR4 "Alpha" bank 

   1E6 seedset

   XOR4 -1 1 Wrandom

   XOR4 3.0 Udecay
   XOR4 0.0 Ubias
   XOR4 1 Utype

   inline: x4train (n qNet --- ) \ run n training cycles on XOR4 network
      [ \ Patterns for XOR4 training. 
        \   Inputs:
           \ Even number of each will output 0:
             list: 0 0 0 0 ;
             list: 1 1 1 1 ;
             list: 1 1 0 0 ;
             list: 0 0 1 1 ;
             list: 0 1 1 0 ;
             list: 1 0 0 1 ;
             list: 1 0 1 0 ;
             list: 0 1 0 1 ;

           \ Odd number of each will output 1:
             list: 1 0 0 0 ;
             list: 0 1 0 0 ;
             list: 0 0 1 0 ;
             list: 0 0 0 1 ;
             list: 1 1 1 0 ;
             list: 0 1 1 1 ; 
             list: 1 0 1 1 ;
             list: 1 1 0 1 ;
             16 parkn into In

        \   Outputs:
             8 1 null, 8 ones pile bend into Out

        "1 In cols 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book 1st
       DO random_case (k)
         Out that (k) catch (hGoal)
         In rot (k) catch (hInput)
         Net stepping
       LOOP
   end

   inline: x4test (qNet --- hOut) \ test a trained XOR4 network
      [ "x4train" "Out" yank into Out, "x4train" "In" yank into In ]
      push
      Out cols 1st
      DO purged In I catch peek stepping peek netout
         In I catch peek "NetIn" bank
         Out I catch peek "NetGoal" bank
      LOOP
      pull drop Out cols parkn
   end

{  Training and testing the XOR4 network:

      [tops@clacker] ready > 10000 XOR4 x4train, XOR4 x4test bend .m1
        Row 1: -0.00
        Row 2: -0.01
        Row 3:  0.02
        Row 4: -0.02
        Row 5:  0.00
        Row 6:  0.01
        Row 7: -0.00
        Row 8:  0.01
        Row 9:  0.93
       Row 10:  0.93
       Row 11:  1.00
       Row 12:  1.00
       Row 13:  1.00
       Row 14:  0.90
       Row 15:  0.92
       Row 16:  1.00
      [tops@clacker] ready > 
}
   private halt

\-----------------------------------------------------------------------

\  Encoding problem.
\  Reference 1, pp. 335-337.

   "EP" list: 8 3 8 ; makeNet

   0.2 EP "Eta" bank
   0.2 EP "Alpha" bank 

   seed0 seedset

   EP -1 1 Wrandom
   8 ones EP "W" extract 1st pryexe (hOnes hW1) move

   EP 3.0 Udecay 
   EP 0.0 Ubias  
   EP 0 Utype \ 0 to +1 works best, with Ubias=0

   inline: etrain (n qNet --- ) \ run n training cycles on EP network
      [ \ Patterns for EP training:
             8 identity is In
             8 identity is Out

        \ Function to choose random number from 1 to 8:
        "1 8 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book (n) 1st
       DO random_case (k)
         Out that (k) catch (hGoal)
         In rot (k) catch (hInput)
         Net stepping
       LOOP
   end

   inline: etest (qNet --- hOut) \ test a trained EP network
      [ "etrain" "Out" yank into Out, "etrain" "In" yank makes In ]
      push
      Out cols 1st
      DO purged In I catch peek stepping peek netout
         In I catch peek "NetIn" bank
         Out I catch peek "NetGoal" bank
      LOOP
      pull drop Out cols parkn
   end

   5000 EP etrain, EP etest nl bend .m1 nl
{
       Row 1: 0.91 0.04 0.07 0.07 0.00 0.00 0.00 0.00
       Row 2: 0.00 0.93 0.00 0.00 0.04 0.00 0.00 0.05
       Row 3: 0.08 0.01 0.90 0.00 0.00 0.00 0.00 0.06
       Row 4: 0.05 0.00 0.00 0.91 0.05 0.04 0.00 0.00
       Row 5: 0.01 0.05 0.00 0.04 0.94 0.00 0.01 0.00
       Row 6: 0.00 0.00 0.07 0.05 0.00 0.92 0.06 0.02
       Row 7: 0.00 0.00 0.00 0.00 0.05 0.00 0.94 0.06
       Row 8: 0.00 0.05 0.05 0.00 0.00 0.00 0.05 0.92
}
   8 1st
   DO purged "etrain" "In" yank I catch EP stepping \
      EP "Out" extract 2nd pryexe 1st catch bend \
   LOOP 8 pilen nl .m1 nl
{
   The DO loop above extracts the 2nd layer outputs, which do some-
   thing akin to binary encoding.  They are shown below with the 
   nearest binary pattern in parentheses:

       Row 1: 0.85 0.01 0.28  (1 0 0)
       Row 2: 0.00 0.16 0.05  (0 0 0)
       Row 3: 0.91 0.62 0.01  (1 1 0)
       Row 4: 0.97 0.17 1.00  (1 0 1)
       Row 5: 0.05 0.01 0.92  (0 0 1)
       Row 6: 0.99 0.99 0.80  (1 1 1)
       Row 7: 0.01 0.98 0.98  (0 1 1)
       Row 8: 0.15 0.98 0.13  (0 1 0)
}
   private halt

\-----------------------------------------------------------------------

\  Using intermediate values.
\  Reference 1, pp. 337-339.

   "IV" list: 2 1 4 4 ; makeNet

   0.1 IV "Eta" bank
   0.1 IV "Alpha" bank

   5E6 seedset

   IV -1 1 Wrandom
   list: 1 1 ; IV "W" extract 1st pryexe (hOnes hW1) move

   IV 3.0 Udecay
   IV 0.0 Ubias
   IV 0 Utype

   inline: itrain (n qNet --- ) \ run n training cycles on IV network
      [ \ Patterns for IV training:

        \ Input:
           list: 0 0 ;
           list: 0 1 ;
           list: 1 0 ;
           list: 1 1 ; 4 parkn into In

        \ Output:
           4 identity into Out

        \ Function to choose random number from 1 to 4:
        "1 4 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book (n) 1st
       DO random_case (k)
         Out that (k) catch (hGoal)
         In rot (k) catch (hInput)
         Net stepping
       LOOP
   end

   inline: itest (qNet --- hOut) \ test a trained IV network
      [ "itrain" "Out" yank into Out, "itrain" "In" yank makes In ]
      push
      Out cols 1st
      DO purged In I catch peek stepping peek netout LOOP
      pull drop In cols parkn
   end

   10000 IV itrain, IV itest .m1
{
       Row 1: 0.96 0.04 0.00 0.00
       Row 2: 0.04 0.94 0.06 0.00
       Row 3: 0.00 0.03 0.94 0.04
       Row 4: 0.00 0.00 0.04 0.97
      [tops@clacker] ready > 
}

   4 1st
   DO purged "itrain" "In" yank I catch IV stepping \
      IV "Out" extract 2nd pryexe 1st catch bend \
   LOOP 4 pilen nl .m1 nl

{ 
   This shows that the single hidden unit maps each input, shown in
   parentheses, to different activation levels roughly evenly spaced
   apart at 0, 1/3, 2/3, 1: 

                    (input)
       Row 1: 0.01   (0 0)
       Row 2: 0.35   (0 1)
       Row 3: 0.68   (1 0)
       Row 4: 0.99   (1 1)
}
   private halt

\-----------------------------------------------------------------------

\  Symmetry detector.
\  Reference 1, pp. 340-341.

   "SYM" "7 2 1" numbers makeNet

   0.2 SYM "Eta" bank
   0.2 SYM "Alpha" bank

   seed0 seedset

   SYM -1 1 Wrandom

   SYM 3.0 Udecay
   SYM 0.0 Ubias
   SYM 0 Utype

   inline: symtrain (n qNet --- ) \ run n training cycles on SYM network
      [ \ Patterns for SYM training:
        \ Input:
             list: 1 1 1 0 1 1 1 ;
             list: 0 0 0 1 0 0 0 ;
             list: 1 0 0 0 0 0 1 ;
             list: 0 1 1 0 1 1 0 ;
             list: 3 2 1 0 1 2 3 ;

             list: 0 0 0 0 1 1 1 ;
             list: 1 1 1 1 0 0 0 ;
             list: 0 0 0 0 0 0 1 ;
             list: 1 1 1 0 1 1 0 ;
             list: 1 2 1 0 1 2 3 ;

             10 parkn into In

        \ Output:
             In cols 2 slash ones 
             In cols 2 slash one null pile bend into Out

        \ Function to choose random case number:
        "1 In cols 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book (n) 1st
       DO random_case (k)
         Out that (k) catch (hGoal)
         In rot (k) catch (hInput)
         Net stepping
       LOOP
   end

   inline: symtest (qNet --- hOut) \ test a trained SYM network
      [ "symtrain" "Out" yank into Out, "symtrain" "In" yank makes In ]
      push
      Out cols 1st
      DO purged In I catch peek stepping peek netout
         In I catch peek "NetIn" bank
         Out I catch peek "NetGoal" bank
      LOOP pull drop In cols parkn
   end
{
   Three trainings:

      [tops@clacker] ready > 1000 SYM symtrain, SYM symtest .m1 nl
       Row 1: 0.27 0.29 0.29 0.29 0.28 0.25 0.30 0.27 0.30 0.26 

      [tops@clacker] ready > 1000 SYM symtrain, SYM symtest .m1 nl
       Row 1: 0.72 0.89 0.77 0.87 0.69 0.05 0.19 0.07 0.66 0.08 

      [tops@clacker] ready > 1000 SYM symtrain, SYM symtest .m1 nl
       Row 1: 0.93 0.96 0.95 0.92 0.89 0.02 0.02 0.04 0.08 0.06 

   Trying patterns not seen:

      [tops@clacker] ready > purged "10 9 -8 0 -8 9 10" numbers negate \
      >                      SYM stepping SYM netout ontop

       stack elements:
             0 number: 9.4631E-01
       [1] ok!

      [tops@clacker] ready > purged list: -10 -10 -10 1 8 9 10 ; \
      >                      negate SYM stepping SYM netout ontop

       stack elements:
             0 number: 2.0515E-02
       [1] ok!
      [tops@clacker] ready > 
}
   private halt

\-----------------------------------------------------------------------

\  T-C problem.
\  Reference 1, pp. 348-351.

\  This is greatly simplified from the example in Reference 1, and
\  just looks at a 3-by-3 grid.

   "TC" "9 2 1" numbers makeNet

   0.2 TC "Eta" bank
   0.2 TC "Alpha" bank

   seed0 seedset

   TC -1 1 Wrandom

   TC 3.0 Udecay
   TC 0.0 Ubias
   TC 1 Utype

   inline: tctrain (n qNet --- ) \ run n training cycles on TC network
      [ \ Patterns for TC training:
        \ Input:
        \  Ts:
           list: 1 0 0 1 1 1 1 0 0 ;
           list: 0 1 0 0 1 0 1 1 1 ;
           list: 0 0 1 1 1 1 0 0 1 ;
           list: 1 1 1 0 1 0 0 1 0 ;

        \  Cs:
           list: 1 1 1 1 0 1 1 0 1 ;
           list: 0 1 1 0 1 0 0 1 1 ;
           list: 1 0 1 1 1 1 0 0 0 ;
           list: 0 1 1 0 0 1 0 1 1 ;

           8 parkn into In

        \ Output:
           In cols 2 slash one null \ Ts are zeroes
           In cols 2 slash ones     \ Cs are ones
           pile bend into Out

        \ Function to choose random number from the cases:
        "1 In cols 1 1 ranint ontop ndx" "random_case" inlinex
      ]
      "Net" book (n) 1st
       DO random_case (k)
         Out that (k) catch (hGoal)
         In rot (k) catch (hInput)
         Net stepping
       LOOP
   end

   inline: tctest (qNet --- hOut) \ test a trained TC network
      [ "tctrain" "Out" yank into Out, "tctrain" "In" yank makes In ]
      push
      Out cols 1st
      DO purged In I catch peek stepping peek netout LOOP
      pull drop In cols parkn
   end

{
   The first four tiny values are Ts, the last four near-one values
   are Cs:

      [tops@clacker] ready > 900 TC tctrain, TC tctest nl .m1

       Row 1: 0.01 0.04 0.05 0.08 0.97 0.95 0.95 0.98 
}
   private halt

\-----------------------------------------------------------------------

\  Learning sines.

\  Predict the next value of the sum of two beating sines.

\  This case is similar to the man example for lpcoef.  Since that does
\  well with just four coefficients for the four previous values, a 
\  network should do fine with four inputs and one output.

\  The first 500 points are used to train the network, then results
\  for all 4000 points are displayed.

   "sine" missing IF mmath.v source THEN
   "pgrid_off" missing IF "plot.v" source THEN
   "SINE" net? IF SINE purgeNet THEN

   5E6 seedset

   "SINE" list: 4 1 ; makeNet

   0.1 SINE "Eta"   bank \ learning rate
   0.0 SINE "Alpha" bank \ momentum is unnecessary

   SINE -.1 .1 Wrandom \ range of initial random weights

   SINE   2 Utype \ units are linear type
   SINE 0.0 Ubias
   SINE 0.0 Udecay 

   inline: random_case ( --- hGoal hIn) \ goal and inputs for training
\     The first 500 points go into S1 for training.
\     All 4000 points are used for testing.
      [ 1 10 2pi star 0 0.001 4000 sine drop
        1 11 2pi star 0 0.001 4000 sine drop plus into S
        S 1st 500 5 + items reach "S1" book
      ]
      1 S1 rows 4 less 1 1 ranint ontop push
      S1 peek 4 + reach (hGoal) 
      S1 pull (n) 4 items reach (hIn)
   end

   inline: s_train (n qNet --- ) \ run n training cycles on SINE network
      "Net" book (n) 1st
       DO random_case (hIn hOut) Net stepping LOOP
   end

   inline: s_test (qNet --- hOut) \ test the trained SINE network
      "random_case" "S" yank into S
      push
      S 1st 4 items reach
      list:
         S rows 5 ndx
         DO purged S I 4 less, 4 items reach peek stepping
            peek netout ontop
         LOOP
      end
      pile
      pull drop
   end

\  Training, testing and plotting (with a good fit, the predicted green
\  curve will cover the original blue one):
   4000 SINE s_train, SINE s_test "s_test" "S" yank swap park
   1 those rows items pgrid_off 2dup plot pause plotclose

   private halt

\-----------------------------------------------------------------------
;
   Words

   inline: .m1 (hA --- ) \ display a small matrix
      mformat "%4.2f" mformatset swap .m mformatset
   end

   inline: netout (qNet --- hOut) \ output layer for Net
      this net? not IF " not a network: " . . return THEN
      (qNet) "Out" extract (hOptr), 1 endmost ontop exe 1st catch
   end

   inline: Ubias (qNet b --- ) \ set all unit bias constants to b
\     Initialize all units in all layers to bias constant b.
      [ onet.bias is Col ]
      that net? not IF " not a network: " . . return THEN
      "b" book, "Out" extract (hOptr) these rows 1st
      DO dup I pryexe (hB) b those rows 1 fill Col rot (hb c hB) cram
      LOOP drop
   end

\  Note: the following word can be used as a template for setting
\  data for a single unit.
   inline: ubias_set (qNet layer row b --- ) \ set a unit to bias t
\     Set to bias b the unit at row in layer of Net.
      [ onet.bias is col ]
      three pick net? not IF " not a network: " . . return THEN
      "b" book "row" book "layer" book

      (qNet) "Out" extract (hOptr)
      layer pryexe b swap row col store
   end

   inline: Udecay (qNet d --- ) \ set all unit decay constants to d
\     Initialize all units in all layers to decay constant d.
      [ onet.decay is Col ]
      that net? not IF " not a network: " . . return THEN
      "d" book, "Out" extract (hOptr) these rows 1st
      DO dup I pryexe (hD) d those rows 1 fill Col rot (hd c hD) cram 
      LOOP drop
   end

   inline: Utype (qNet t --- ) \ set all units to type t
\     Initialize all units in all layers to type t.
      [ onet.utype is Col ]
      that net? not IF " not a network: " . . return THEN
      "t" book, "Out" extract (hOptr) these rows 1st
      DO dup I pryexe (hR) t those rows 1 fill Col rot (hr c hR) cram
      LOOP drop
   end

\  Note: the following word can be used as a template for setting
\  data for a single unit.
   inline: utype_set (qNet layer row t --- ) \ set a unit to type t
\     Set to type t the unit at row in layer of Net.
      [ onet.utype is col ]
      three pick net? not IF " not a network: " . . return THEN
      "t" book "row" book "layer" book

      (qNet) "Out" extract (hOptr)
      layer pryexe t swap row col store
   end

   inline: Wrandom (qNet w1 w2 --- ) \ initialize weights, random w1-w2
\     Initialize all weights in all layers to random values w1 - w2.
      other net? not IF " not a network: " . . return THEN
      "w2" book, "w1" book, "W" extract (hWptr) these rows 1st
      DO dup I pryexe (hW) w1 w2 them dims ranreal swap (hw hW) move
      LOOP drop
   end

   inline: Wrandom1 (qNet --- ) \ initialize weights plus-or-minus one
\     Initialize all weights in all layers to +1 or -1.
      this net? not IF " not a network: " . . return THEN
      (qNet) "W" extract (hWptr) these rows 1st
      DO dup I pryexe (hW) these dims star
         flips 2 *f 1 +d swap (hw hW) move
      LOOP drop
   end

