%% ``The contents of this file are subject to the Erlang Public License,
%% Version 1.0, (the "License"); you may not use this file except in
%% compliance with the License. You may obtain a copy of the License at
%% http://www.erlang.org/EPL1_0.txt
%% 
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% 
%% The Original Code is Erlang-4.7.3, December, 1998.
%% 
%% The Initial Developer of the Original Code is Ericsson Telecom
%% AB. Portions created by Ericsson are Copyright (C), 1998, Ericsson
%% Telecom AB. All Rights Reserved.
%% 
%% Contributor(s): ______________________________________.''
%%
%% Copyright (C) 1993, Ellemtel Telecommunications Systems Laboratories
%%
%% File    : beam_optimize.erl
%% Author  : Bogumil Hausman
%% Purpose : Code OptimizerCompiler for Turbo Erlang

-module(beam_optimize).
-copyright('Copyright (c) 1991-97 Ericsson Telecom AB').
-vsn('$Revision: /main/release/free/1').

-export([optimize_comp/6, optimize_arith/6, optimize_clause/1]).
-export([reg_to_save/1,remove_move/2]).

-import(lists, [reverse/1, reverse/2, append/1, append/2, flatten/1]).
-import(beam_compile,  [new_next_pattern_lb/0, new_next_lb/0,
			get_var_type/2,new_reg/1,symbol_table/1]).

% reg_to_save(Env) -> N
% get highest register number containing var + 1
reg_to_save(E) ->
    {N0,_} = new_reg(E), % get next available reg #
    N1 = highest_var(symbol_table(E)), % get highest reg # with var + 1
    if N0 > N1 -> N0;
       true -> N1
    end.


% highest_var(Table) -> N1
% get highest reg # with var + 1
highest_var([]) -> 0;
highest_var(Table) ->
    highest_var(Table, 0).

highest_var([{Key,{{x,N},Type}}|T], M) when N > M -> 
    highest_var(T,N);
highest_var([{Key,{x,N}}|T], M) when N > M -> 
    highest_var(T,N);
highest_var([_|T], M) -> 
    highest_var(T,M);
highest_var([], M) -> 
    M+1.

%______________________________________________________________________
% optimize_comp(Type0,Type1,Op,Reg0,Reg1,E) -> MacroName
% optimize arithmetic comp according to types of operands

optimize_comp(integer,integer,'>',_,_,_)   -> intComp;
optimize_comp(integer,integer,'<',_,_,_)   -> intComp;
optimize_comp(integer,integer,'>=',_,_,_)  -> intComp;
optimize_comp(integer,integer,'=<',_,_,_)  -> intComp;
optimize_comp(integer,_,'=:=',_,_,_) -> equal;
optimize_comp(integer,_,'=/=',_,_,_) -> nEqual;
optimize_comp(integer,_,'==',_,_,_)  -> equal;
optimize_comp(integer,_,'/=',_,_,_)  -> nEqual;

optimize_comp(atom,_,'=:=',_,_,_) -> equal;
optimize_comp(atom,_,'=/=',_,_,_) -> nEqual;
optimize_comp(atom,_,'==',_,_,_)  -> equal;
optimize_comp(atom,_,'/=',_,_,_)  -> nEqual;

optimize_comp(_,integer,'=:=',_,_,_) -> equal;
optimize_comp(_,integer,'=/=',_,_,_) -> nEqual;
optimize_comp(_,integer,'==',_,_,_)  -> equal;
optimize_comp(_,integer,'/=',_,_,_)  -> nEqual;

optimize_comp(_,atom,'=:=',_,_,_) -> equal;
optimize_comp(_,atom,'=/=',_,_,_) -> nEqual;
optimize_comp(_,atom,'==',_,_,_)  -> equal;
optimize_comp(_,atom,'/=',_,_,_)  -> nEqual;

optimize_comp(var,var,Op,R1,R2,E) ->
    optimize_comp(get_var_type(R1,E),get_var_type(R2,E),Op,R1,R2,E);
optimize_comp(var,T2,Op,R1,R2,E) ->
    optimize_comp(get_var_type(R1,E),T2,Op,R1,R2,E);
optimize_comp(T1,var,Op,R1,R2,E) ->
    optimize_comp(T1,get_var_type(R2,E),Op,R1,R2,E);

optimize_comp(_,_,_,_,_,_)  -> comp.

%______________________________________________________________________
% optimize_arith(Op,Type0,Type1,Reg0,Reg1,E) -> 
%                                       {MacroName,TypeOfResult,Reg2,Reg3}
% optimize arithmetic operations according to types of operands
% we assume that types generated by parser are 'integer','atom','float' !!!!!!

optimize_arith(Op,var,var,R1,R2,E) ->
    optimize_arith(Op,get_var_type(R1,E),get_var_type(R2,E),R1,R2,E);
optimize_arith(Op,var,T2,R1,R2,E) ->
    optimize_arith(Op,get_var_type(R1,E),T2,R1,R2,E);
optimize_arith(Op,T1,var,R1,R2,E) ->
    optimize_arith(Op,T1,get_var_type(R2,E),R1,R2,E);

optimize_arith('+',float,_,R0,R1,_)           -> {arith,float,R0,R1};
optimize_arith('+',_,float,R0,R1,_)           -> {arith,float,R0,R1};

optimize_arith('-',float,_,R0,R1,_)           -> {arith,float,R0,R1};
optimize_arith('-',_,float,R0,R1,_)           -> {arith,float,R0,R1};

optimize_arith('*',float,_,R0,R1,_)           -> {arith,float,R0,R1};
optimize_arith('*',_,float,R0,R1,_)           -> {arith,float,R0,R1};

optimize_arith('/',_,_,R0,R1,_)               -> {arith,float,R0,R1};

optimize_arith('div',_,_,R0,R1,_)    -> {arith,integer,R0,R1};
optimize_arith('rem',_,_,R0,R1,_)    -> {arith,integer,R0,R1};
optimize_arith('bsl',_,_,R0,R1,_)    -> {arith,integer,R0,R1};

optimize_arith('band',_,_,R0,R1,_)         -> {arith,integer,R0,R1};

optimize_arith('bor',_,_,R0,R1,_)          -> {arith,integer,R0,R1};

optimize_arith('bxor',_,_,R0,R1,_)         -> {arith,integer,R0,R1};

optimize_arith('bsr',_,_,R0,R1,_)          -> {arith,integer,R0,R1};

optimize_arith(_,_,_,R0,R1,_) -> {arith,no,R0,R1}.

%______________________________________________________________________
% optimize_clause(Code) -> CodeOpt

optimize_clause([{label,Name,Arity,FuncInfo,E}, [Clause|Clauses]|T]) -> 
    Code = optimize_clause0([[{label,Name,Arity}|Clause]|Clauses]),
    Opt = extract_common_cl(Code),
    Opt2 = check_indexing(Opt),
    Opt3 = check_tests(Opt2),
    [{label, Name, Arity, FuncInfo, E}|Opt3]++T.

optimize_clause0([H|T]) ->
    [optimize_clause1(H)|optimize_clause0(T)];
optimize_clause0([]) -> [].

optimize_clause1(Code) -> 
    case optimize_clause(Code,[],false) of
	{true,Code1} -> optimize_clause1(Code1);
	{false,[{label,_,_}|Code1]} -> Code1;
	{false,Code1} -> Code1
    end.

%______________________________________________________________________
% move up testHeap instructions
% group testHeap instr between calls
optimize_clause([{allocate,N0,E},{newTestHeap, Ng, Nb}|T],L,_) ->
    optimize_clause([{allocateH, N0, Ng+Nb, E}|T],L,true);
optimize_clause([{allocateH,N0,N1,E},{newTestHeap, Ng, Nb}|T],L,_) ->
    optimize_clause([{allocateH, N0, N1+Ng+Nb, E}|T],L,true);
optimize_clause([{testHeap, N0, E}, {newTestHeap, Ng, Nb}|T],L,_) ->
    optimize_clause([{testHeap, N0+Ng+Nb, E}|T], L, true);
optimize_clause([{newTestHeap, Ng1, Nb1}, {newTestHeap, Ng2, Nb2}|T],L,_) ->
    optimize_clause([{newTestHeap, Ng1+Ng2, Nb1+Nb2}|T],L,true);

optimize_clause([{put,Put},Next|T],L,F) ->
    In = {put,Put},
    case Next of
	{newTestHeap,_,_} ->
	    % to speed up compiling of strings
	    remove_testHeap(T,In,Next,L,F);
	{put,{putList2,_,_,_}} ->
	    % to speed up compiling of strings
	    remove_testHeap(T,In,Next,L,F);

	{move,{x,M},R1} ->
	    case Put of
		{putString,{x,M}, Len, Str} ->
		    case remove_move(T, M) of
			true ->
			    optimize_clause([{put, {putString, R1, Len, Str}}|T],
					    L, true);
			false ->
			    optimize_clause([Next|T],[In|L],F)
		    end;
		{putInt,{x,M},A,S} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{put,{putInt,R1,A,S}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{putFloat,{x,M},N} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{put,{putFloat,R1,N}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{putTuple0,{x,M},A} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{put,{putTuple0,R1,A}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{putList2,{x,M},R2,R3} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{put,{putList2,R1,R2,R3}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{putTuple,{x,M},A,R2} when R1 =/= R2 ->
                % tuple pointer is set before elements are build
		    case remove_move(T,M) of
			true -> optimize_clause([{put,{putTuple,R1,A,R2}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{putTuple,R2,A,R3} 
		when {x,M} =/= R2, {x,M} =/= R3, R1 =/= R2, R1 =/= R3 ->
		% can pass putTuple only when 
		% {putTuple,X,A,Y} {move,Z,W} X,Y =/= Z,W
		    optimize_clause([In|T],[Next|L],true);
		{putIntVal,_} ->
		% can pass putIntVal to be optimize with putInt
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{move,R0,R1} ->
	    case Put of
		{putTuple,R2,A,R3} 
		when R0 =/= R2, R0 =/= R3, R1 =/= R2, R1 =/= R3 ->
		    optimize_clause([In|T],[Next|L],true);
		{putIntVal,_} ->
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{get,Get},Next|T],L,F) ->
    In = {get,Get},
    case Next of
	TH when element(1, TH) == newTestHeap ->
	    optimize_clause([In|T], [TH|L], true);
	TH when element(1, TH) == testHeap ->
	    optimize_clause([In|T], [TH|L], true);
	{move,{x,N},R3} ->
	    case Get of
		{getList2, {x, 0}, {x, 0}, {x, N}} when R3 == {x, 0} ->
		    %% There is no getList2 rrr instruction -- can't optimize.
		    optimize_clause([Next|T], [In|L], F);
		{getList2,R0,R1,{x,N}}  ->
		    case remove_move(T,N) of
			true -> optimize_clause([{get,{getList2,R0,R1,R3}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{getList2,R0,{x,N},R2}  ->
		    case remove_move(T,N) of
			true -> optimize_clause([{get,{getList2,R0,R3,R2}}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{getList2,R0,X,Y} when R0 =/= {x,N}, R0 =/= R3, 
                % can pass GetList2 only 
		% when {getList2,R0,X,Y} {move,R1,R2} R1,R2 =/= R0,X,Y
		X =/= {x,N}, X =/= R3, Y =/= {x,N}, Y =/= R3 ->
		    optimize_clause([In|T],[Next|L],true);
                % can pass GetTupleElement only
                % when {getTupleElement,R0,E,N} {move,R1,R2} R1,R2 =/= R0,E
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{move,R4,R3} ->
	    case Get of
		{getList2,R0,X,Y} when R0 =/= R4, R0 =/= R3, 
		X =/= R4, X =/= R3, Y =/= R4, Y =/= R3 ->
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	% move allocate, init instruction up
	{allocate,N1,None} ->
	    optimize_clause([In|T],[Next|L],true);
	{init,N} ->
	    optimize_clause([In|T],[Next|L],true);
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{move,R0,R1},Next|T],L,F) ->
    In = {move,R0,R1},
    case R1 of
	R0 -> optimize_clause([Next|T],L,true);
	R1 ->
	    case Next of
		{testHeap, _, _} ->
		    optimize_clause([In|T],[Next|L],true);
		{newTestHeap, _, _} ->
		    optimize_clause([In|T],[Next|L],true);
		{move,{x,N},R2} when R1 == {x,N} ->
		    case remove_move(T,N) of
			true -> optimize_clause([{move,R0,R2}|T],L,true);
			false -> optimize_clause([Next|T],
				 [In|L],F)
		    end;
		{move,R1,R0} ->
		    optimize_clause([In|T],L,true);
		{put,{Put,R2,{x,N},R3}} when R1 == {x,N} ->
		    case remove_move(T,N) of
			true -> 
			    optimize_clause([{put,{Put,R2,R0,R3}}|T],L,true);
			false -> optimize_clause([Next|T],
				 [In|L],F)
		    end;
		{put,{Put,R2,R3,{x,N}}} when R1 == {x,N} ->
		    case remove_move(T,N) of
			true -> 
			    optimize_clause([{put,{Put,R2,R3,R0}}|T],L,true);
			false -> optimize_clause([Next|T],
				 [In|L],F)
		    end;
		Other -> 
		    optimize_clause([Next|T],[In|L],F)
	    end
    end;

%
%  Don't move the testHeap instruction above a catchEnd since we don't
%  want to forget to check the stack if we jump straight to catchend
%  We could move it past the catch if we knew we could move it before the
%  whole catch, but a catch expression will usually include a call of some
%  sort.							/Dalle
%
optimize_clause([{'catch',{catchEnd,A1,A2}},Next|T],L,F) ->
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause(T,[{testHeap, Ng+Nb, 1},
			       {'catch',{catchEnd,A1,A2}}|L],F);
	Other ->
	    optimize_clause([Next|T],[{'catch',{catchEnd,A1,A2}}|L],F)
    end;
optimize_clause([{'catch',R}, TH|T],L,_) when element(1, TH) == newTestHeap ->
    optimize_clause([{'catch',R}|T], [TH|L], true);

% assuming that all bifs have format {bif,{Func,Args,Res},W,E}
%    or {bif,{Func,Res},W,E}
% maybe move should pass bifs with different registers involved ?
optimize_clause([{bif,Bif,W,E},Next|T],L,F) ->
    In = {bif,Bif,W,E},
    case Next of
	{move,{x,M},R1} ->
	    case Bif of
		{Func,Args,{x,M}} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{bif,{Func,Args,R1},W,E}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{throw,R2,R3} ->
		    optimize_clause([In|T],L,true);
		{Func,{x,M}} ->
		    case remove_move(T,M) of
			true -> optimize_clause([{bif,{Func,R1},W,E}|T],
						L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{move,R0,R1} ->
	    case Bif of
		{Func,Args,Res} ->
		    case different([R0,R1],[Res|Args]) of
			true -> optimize_clause([In|T],[Next|L],true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{throw,R2,R3} ->
		    optimize_clause([In|T],L,true);
		{Func,Res} when R0 =/= Res, R1 =/= Res ->
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{newTestHeap, Ng, Nb} ->
        % can pass bifs in body but not in head
	    case W of
		body -> optimize_clause([In|T],[Next|L],true);
		body_case -> optimize_clause([In|T],[Next|L],true);
		{head_case,Lb} -> optimize_clause([In|T],[Next|L],true);
		{head,Lb} -> 
		    optimize_clause([{testHeap, Ng+Nb, reg_to_save(E)}|T],
				    [In|L],F);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;


optimize_clause([{arith,Ar,W,E},Next|T],L,F) ->
    In = {arith,Ar,W,E},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause([In|T],[Next|L],true);
	{move,{x,N},R3} ->
	    case Ar of
		{Arith,Op,R0,R1,{x,N}} ->
		    case remove_move(T,N) of
			true -> optimize_clause([{arith,{Arith,Op,R0,R1,R3},
						  W,E}|T],L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{Arith,Op,R0,R1,R2} when R0 =/= R3, R0 =/= {x,N}, R1 =/= R3,
		R1 =/= {x,N}, R2 =/= R3, R2 =/= {x,N} ->
		    optimize_clause([In|T],[Next|L],true);
		{Arith,R0,{x,N}} ->
		    case remove_move(T,N) of
			true -> optimize_clause([{arith,{Arith,R0,R3},
						  W,E}|T],L,true);
			false -> optimize_clause([Next|T],[In|L],F)
		    end;
		{Arith,R0,R1}
		when R0 =/= R3, R0 =/= {x,N}, R1 =/= R3, R1 =/= {x,N} ->
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{move,R4,R3} ->
	    case Ar of
		{Arith,Op,R0,R1,R2} when R0 =/= R3, R0 =/= R4, R1 =/= R3,
		R1 =/= R4, R2 =/= R3, R2 =/= R4 ->
		    optimize_clause([In|T],[Next|L],true);
		{Arith,R0,R1}
		when R0 =/= R3, R0 =/= R4, R1 =/= R3, R1 =/= R4 ->
		    optimize_clause([In|T],[Next|L],true);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([removeMessage, TH|T], L, _) when element(1, TH) == newTestHeap ->
    optimize_clause([removeMessage|T], [TH|L], true);

optimize_clause([{test,Test,W,E},Next|T],L,F) ->
    In = {test,Test,W,E},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    case W of
	    % can pass tests in body and in case head
		body -> optimize_clause([In|T],[Next|L],true);
		body_case -> 
		    optimize_clause([In|T],[Next|L],true);
		{head_case,Lb} -> 
		    optimize_clause([In|T],[Next|L],true);
		{head,Lb} ->
		    optimize_clause([{testHeap, Ng+Nb, reg_to_save(E)}|T],
		    [In|L],F);	    
		Other -> 
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{move,R0,R1} ->
	    case Test of
		{comp,'=:=',R0,R1} ->
		    optimize_clause([In|T],L,true);
		{comp,'=:=',R1,R0} ->
		    optimize_clause([In|T],L,true);
		{equal,X,R0,R1} ->
		    optimize_clause([In|T],L,true);
		{equal,X,R1,R0} ->
		    optimize_clause([In|T],L,true);
		{test,nil,R1} when R0 == nil ->
		    optimize_clause([In|T],L,true);
		Other -> 
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{allocate,N1,none} when element(1,W) == head ->
	    optimize_clause([{allocate,N1,reg_to_save(E)}|T],
		    [In|L],F);
	{allocateH,N1,N2,none} when element(1,W) == head ->
	    optimize_clause([{allocateH,N1,N2,reg_to_save(E)}|T],
		    [In|L],F);
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

% init cannot pass TestHeap as testHeap must pass init 
%    to be absorbed by Allocate
optimize_clause([{init,N},Next|T],L,F) ->
    In = {init,N},
    case Next of
	{newTestHeap,_,_} ->
        % can pass init to be absorbed later by allocate	    
	    optimize_clause([In|T],[Next|L],true);
	{move,_,_} ->
	    case remove_init([Next|T],N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end; 
	{arith,_,_,_} ->
	    case remove_init([Next|T],N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end;
	{init,_} ->
	    case remove_init(T,N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end;
	{put,_} ->
	    case remove_init([Next|T],N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end;	 
	{get,_} ->
	    case remove_init([Next|T],N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end;
	{'catch',_} -> 
	    case remove_init([Next|T],N) of
		true -> optimize_clause([Next|T],L,true);
		false -> optimize_clause([Next|T],[In|L],F)
	    end;
        {call,_,_,_,_} ->
            case remove_init([Next|T],N) of
                true -> optimize_clause([Next|T],L,true);
                false -> optimize_clause([Next|T],[In|L],F)
            end;
        {callEx,_,_,_,_,_} ->
            case remove_init([Next|T],N) of
                true -> optimize_clause([Next|T],L,true);
                false -> optimize_clause([Next|T],[In|L],F)
            end;                                                         
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{label,Name,Arity},Next|T],L,F) ->
    In = {label,Name,Arity},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause([{testHeap, Ng+Nb,Arity}|T],[In|L],F);
	{allocate,N1,none} ->
	    optimize_clause([{allocate,N1,Arity}|T],[In|L],F);
	{allocateH,N1,N2,none} ->
	    optimize_clause([{allocateH,N1,N2,Arity}|T],[In|L],F);
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{clause_header,Arity},Next|T],L,F) ->
    In = {clause_header,Arity},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause([{testHeap, Ng+Nb, Arity}|T],[In|L],F);
	{allocate,N1,none} ->
	    optimize_clause([{allocate,N1,Arity}|T],[In|L],F);
	{allocateH,N1,N2,none} ->
	    optimize_clause([{allocateH,N1,N2,Arity}|T],[In|L],F);
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{call,Label,Arity,Nm,FNm},Next|T],L,F) ->
    In = {call,Label,Arity,Nm,FNm},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause([{testHeap, Ng+Nb, 1}|T],[In|L],F);
	{deallocate,N} ->
	    case T of
		[{return}|T0] ->
		    optimize_clause(T0,[{callLast,Label,Arity,N,Nm,FNm}|L],F);
		Other ->
		    optimize_clause([Next|T],[In|L],F)
	    end;
	{return} ->
	    optimize_clause(T,[{callOnly,Label,Arity,Nm,FNm}|L],true);
	% the following can happen when saving exported var from case structures
	% some move instructions are not needed (those to local frame)
	% the problem is that var_to_export is generated by appending 
	% all options lists
	{move,_,{y,_}} ->
	    case can_remove_move_y(T) of
		false ->
		    optimize_clause([Next|T],[In|L],F);
		{T0,N} ->
		    optimize_clause(T0,[{callLast,Label,Arity,N,Nm,FNm}|L],F)
	    end;
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;	    

optimize_clause([{callEx,Label,M,Func,A,Nm},Next|T],L,F) ->
    In = {callEx,Label,M,Func,A,Nm},
    case Next of
	{newTestHeap, Ng, Nb} ->
	    optimize_clause([{testHeap, Ng+Nb, 1}|T], [In|L], F);
	{deallocate,N} ->
	    case T of
		[{return}|T0] ->
		    optimize_clause(T0,[{callExLast,Label,M,Func,A,N,Nm}|L],F);
		Other ->
		  optimize_clause([Next|T],[In|L],F)
	    end;
	Other ->
	    optimize_clause([Next|T],[In|L],F)
    end;

optimize_clause([{bif_gc,X,W,E},{newTestHeap, Ng, Nb}|T],L,F) ->
    optimize_clause([{testHeap, Ng+Nb, 1}|T],[{bif_gc,X,W,E}|L],F);
optimize_clause([{bif_gc,{exit,[R0],R1},W,E},{move,_,_}|T],L,_) ->
    optimize_clause([{bif_gc,{exit,[R0],R1},W,E}|T],L,true);

optimize_clause([{{'case',R},Lb,Options,Nm}|T],L,_) ->
    {true, reverse(L, optimize_options(Options,Lb,T,{'case',R},Nm))};
optimize_clause([{'if',Lb,Options,Nm}|T],L,_) ->
    {true, reverse(L, optimize_options(Options,Lb,T,'if',Nm))};
optimize_clause([{'receive',Lb0,Lb1,Options,Wait,Nm,M}|T],L,_) ->
    {true, reverse(L, optimize_options(Options,Lb0,T,{'receive',Lb1,Wait,M},Nm))};

%______________________________________________________________________
optimize_clause([H|T],L,F) ->
    optimize_clause(T,[H|L],F);
optimize_clause([],L,F) -> {F,reverse(L)}.

%______________________________________________________________________
% optimize_options(Options,Lb,T,What,Nm) -> Code
%
% Optimizes Options (branch) list, moves out testHeap instruction.
%
% Heap needed before a case:
% 	The sum of the heap requirements for all guards +
%	the maxium of the heap requirements for bodies
%
% The heap requirements for bodies include heap space needed after
% the case.
%
% Lb   - case return label
% T    - code after all options
% What - {'case', R}, 'if' or 'receive'
% Nm   - is the current function name atom index

optimize_options(Options,Lb,T0,What,Nm) -> 
    {T2,{TestHeapCode,Code}} =
	case optimize_clause1(T0) of
	    [TH|T1] when element(1, TH) == newTestHeap ->
		{T1, optimize_options0(Options, [TH], 0, 0)};
	    [{deallocate,N},{return}|T1] ->
		{T1,optimize_options0(Options,[{deallocate,N},{return}],0,0)};
	    [{move,R0,R1},{deallocate,N},{return}|T1] ->
		{T1,optimize_options0(Options,
				      [{move,R0,R1},{deallocate,N},{return}],0,0)};
	    [{move,R0,R1},{return}|T1] ->
		{T1,optimize_options0(Options,
				      [{move,R0,R1},{return}],0,0)};
	    [{return}|T1]  ->
		{T1,optimize_options0(Options,
				      [{return}],0,0)};
	    T1 ->
		{T1,optimize_options0(Options,[],0,0)}
	end,
    case What of
	{'case',R} ->
	    Code1 = extract_common(Code),
	    Code2 = check_indexing(Code1),
	    append([TestHeapCode,[{'case',R}|flatten(Code2)],
		    [{caseEnd,Lb,Nm,R}|T2]]);
	'if' ->
	    Code1 = extract_common(Code),
	    Code2 = check_indexing(Code1),
	    append([TestHeapCode,['if'| flatten(Code2)],
		    [{ifEnd,Lb,Nm}|T2]]);
	% TestHeap cannot leave receive.
	% We have already saved all registers.
	{'receive',Lb1,{wait,L},M} -> 
	    TestHeapCode0 =
		case TestHeapCode of
		    [{newTestHeap, Ng, Nb}] -> 
			[{testHeap, Ng+Nb, 0}];
		    _ ->
			TestHeapCode
		end,
	    Code1 = extract_common(Code),
	    Code2 = check_indexing(Code1),
	    append([[{'receive',Lb1,M}|TestHeapCode0], flatten(Code2),
		    [{wait,L,Nm},{receiveEnd,Lb}],T2]);

	{'receive',Lb1,{waitTimeOut,L,R,Nm},M} -> 
	    [ActionTimeOut|Code0] = Code,
	    TestHeapCode0 =
		case TestHeapCode of
		    [{newTestHeap, Ng, Nb}] -> 
			[{testHeap, Ng+Nb, 0}];
		    _ ->
			TestHeapCode
		end,
	    Code1 = extract_common(Code0),
	    Code2 = check_indexing(Code1),
	    append([[{'receive',Lb1,M}|TestHeapCode0],flatten(Code2),
		    [{waitTimeOut,L,R,Nm}|TestHeapCode0],
		    flatten(ActionTimeOut),
		    [{waitTimeOutEnd,Nm},{receiveEnd,Lb}],T2])
    end.
	
optimize_options0([H|T], Code0, NeedGuard, NeedBody) ->
    [TP|L0] = optimize_clause1(append(H, Code0)),
    {testPattern, LR, LP} = TP,
    {Code1, Ng1, Nb1} =
	case L0 of
	    [{newTestHeap, Ng0, Nb0}|L1] when Nb0 > NeedBody ->
		{[TP|L1], NeedGuard+Ng0, Nb0};
	    [{newTestHeap, Ng0, _}|L1] ->
		{[TP|L1], NeedGuard+Ng0, NeedBody};
	    L1 ->
		{[TP|L1], NeedGuard, NeedBody}
	end,
    {TestHeapCode, Code2} = optimize_options0(T, Code0, Ng1, Nb1),
    {TestHeapCode, [append(Code1, [{testPattern_end,LR,LP}])|Code2]};
optimize_options0([], _, 0, 0) ->
    {[], []};
optimize_options0([], _, NeedGuard, NeedBody) ->
    %% This instruction will always be part of a body.
    {[{newTestHeap, 0, NeedGuard+NeedBody}], []}.

% remove_init(T,N)
% if T contains sequence with some instr initializing N -> true
% otherwise false
% T is before other optimizations
remove_init([{move,_,{y,N}}|_],N) -> true;
remove_init([{get,{_,_,{y,N},_}}|_],N) -> true;
remove_init([{get,{_,_,_,{y,N}}}|_],N) -> true;

% we can do the following as Move is optimized later with put
remove_init([{get,_}|T],N) -> remove_init(T,N);
remove_init([{put,{_,{y,N}}}|_],N) -> true;
remove_init([{put,{_,{y,N},_}}|_],N) -> true;
remove_init([{put,{_,{y,N},_,_}}|_],N) -> true;
remove_init([{put,_}|T],N) -> remove_init(T,N);
remove_init([{arith,{_,_,_,_,{y,N}},W,E}|_],N) -> true;
remove_init([{arith,{_,_,{y,N}},W,E}|_],N) -> true;
remove_init([{arith,_,W,E}|T],N) -> remove_init(T,N);
remove_init([{'catch',{'catch',{y,N},_}}|_],N) -> true;
remove_init([{move,_,_}|T],N) -> remove_init(T,N);
remove_init([{init,_}|T],N) -> remove_init(T,N);

% we can do the following as testHeap moves up later
remove_init([{newTestHeap, _, _}|T],N) -> remove_init(T,N);
remove_init([{testHeap, _, _}|T],N) -> remove_init(T,N);
remove_init([{'catch',_}|T],N) -> remove_init(T,N);
remove_init(_,_) -> false.

% remove_move(T,N)
% if T contains sequence with move instr using {x,N} -> false
% otherwise true
% T is after some optimizations (testHeap can show up)
remove_move([{move,{x,N},_}|_],N) -> false;
remove_move([{move,_,{x,N}}|_],N) -> true;
remove_move([{move,_,_}|T],N) -> remove_move(T,N);
remove_move([{newTestHeap,_,_}|T],N) -> remove_move(T,N);
remove_move([{testHeap,_,_}|T],N) -> remove_move(T,N);

% {putInt,R,A,S}, {putList2,R1,R2,R3}, {putTuple,R1,A,R2}, {putString,R,Len,Str}
remove_move([{put,{putString, {x,N}, _, _}}|_],N) -> true;
remove_move([{put,{_,_,{x,N},_}}|_],N) -> false;
remove_move([{put,{_,_,_,{x,N}}}|_],N) -> false;
remove_move([{put,{_,{x,N},_,_}}|_],N) -> true;
% {putFloat,R,N}, {putTuple0,R,A}
remove_move([{put,{_,{x,N},_}}|_],N) -> true;
% {putTupleElement,R}
remove_move([{put,{_,{x,N}}}|_],N) -> false;
remove_move([{put,_}|T],N) -> remove_move(T,N);

remove_move([{get,{_,{x,N},_,_}}|_],N) -> false;
remove_move([{get,{_,_,{x,N},_}}|_],N) -> true;
remove_move([{get,{_,_,_,{x,N}}}|_],N) -> true;
remove_move([{get,_}|T],N) -> remove_move(T,N);

remove_move([{arith,{Arth,Op,{x,N},_,_},_,_}|_],N) -> false;
remove_move([{arith,{Arth,Op,_,{x,N},_},_,_}|_],N) -> false;
remove_move([{arith,{Arth,Op,_,_,{x,N}},_,_}|_],N) -> true;
remove_move([{arith,{Arth,{x,N},_},_,_}|_],N) -> false;
remove_move([{arith,{Arth,_,{x,N}},_,_}|_],N) -> true;
remove_move([{arith,_,_,_}|T],N) -> remove_move(T,N);

remove_move([{deallocate,_}|_],N) -> true;
remove_move([{deallocate_return,_}|_],N) -> true;
remove_move([{return}|_],N) -> true;

remove_move([{call,_,Arity,_,_}|_],N) when N >= Arity -> true;
remove_move([{callOnly,_,Arity,_,_}|_],N) when N >= Arity -> true;
remove_move([{callLast,_,Arity,_,_,_}|_],N) when N >= Arity -> true;
remove_move([{callEx,_,_,_,Arity,_}|_],N) when N >= Arity -> true;
remove_move([{callExLast,_,_,_,Arity,_,_}|_],N) when N >= Arity -> true;

remove_move(_,_) -> false.

% different(L0,L1) -> (true || false)
% true if intersection of L0 L1 is en empty list
% false otherwise
different([],L) -> true;
different([H|T],L) -> 
    case lists:member(H,L) of
	true -> false;
	false -> different(T,L)
    end.


% extract common code in different parts of options list
% extract_common(Options) -> Options'
% Options is a list of different options 

extract_common([H]) -> 
    [H];
extract_common([H|T]) -> 
    {Code,CodeRest,Rest} = extract_common0(H,T,[]),
    case Code of
	[] ->
	    Rest;
	_ ->
	    [{testPattern,N0,_}|CodeT] = Code,
	    Lb = new_next_pattern_lb(),
	    [ append([[{commonTestPattern,Lb}|update(CodeT,Lb,[])],
		      CodeRest,[{commonTestPattern_end,Lb}]]) | Rest]
    end;
extract_common([]) -> 
    [].


% extract_common0(H,T,[]) -> {Code,CodeRest,Rest}
% Code is common part for options in CodeRest,
% Rest contains other options
% H is the first list in options
extract_common0(H0,[H1|T],L) -> 
    {Code,T0,T1} = extract(H0,H1,[],[],[]),
    case Code of
	[{testPattern,_,_}] -> 
	    {[],[],[H0 | extract_common([H1|T])]};
	_ ->
	    extract_common1(Code,T,[T1,T0|L])
    end;
extract_common0(H,[],L) -> 
    {[],[],H}.

% as above but H0 is the common part from the first and second
% lists in options
extract_common1(H0,[H1|T],L) -> 
    {Code,T0,T1} = extract(H0,H1,[],[],[]),
    case Code of
	H0 ->
	    extract_common1(Code,T,[T1|L]);
	_ -> 
	    F = extract_common(reverse(L)),
	    %% G = check_indexing(F),
	    %% flatten(F)
	    {H0,F,extract_common([H1|T])}
    end;
extract_common1(H,[],L) -> 
    F = extract_common(reverse(L)),
    %% G = check_indexing(F),
    %%   io:format("extract_common1: ~w~n",[F]),
    %% flatten(F)
    {H,F,[]}.


% extract common part of two option lists
% extract(List0,List1,[],TestPattern0,TestPattern1) -> {Code,T0,T1}
% Code is the common part
% List0 = [Code|T0]
% List1 = [Code|T1]
% T = [{testPattern,_,_},...,{testPattern_end,_,_}]
% the following instructions canot be split
%   {putInt,R,A,S}, {putIntVal,V}
%   {putTuple,R0,A,R1}, {putTupleElement,R}, 
extract([H|T0],[H|T1],L,TP0,TP1) -> 
    case H of
	{testPattern,_,_} -> 
	% added after introducing {testPattern,_,0} which means not failing
	    extract(T0,T1,[H|L],[H|TP0],[H|TP1]); 
	{put,{putInt,_,_,_}} -> 
	    create_result(H,H,T0,T1,L,TP0,TP1);
	{put,{putTuple,_,_,_}} ->
	    create_result(H,H,T0,T1,L,TP0,TP1);
	_ ->
	    extract(T0,T1,[H|L],TP0,TP1)
    end;
extract([H0|T0],[H1|T1],L,TP0,TP1) -> 
    case H0 of
	{test,Test,{head_case,_},_} ->
	    case H1 of
		{test,Test,{head_case,_},_} ->
		    extract(T0,T1,[H0|L],TP0,TP1);
		_ -> 
		    create_result(H0,H1,T0,T1,L,TP0,TP1)
	    end;
	{testPattern,N0,N1} ->
	    case H1 of
		{testPattern,N0,N2} ->
		    extract(T0,T1,[H0|L],
			 [{testPattern,N0,N1}|TP0],[{testPattern,N0,N2}|TP1]);
		_ -> 
		    create_result(H0,H1,T0,T1,L,TP0,TP1)
	    end;
	_ ->
	    create_result(H0,H1,T0,T1,L,TP0,TP1)
    end;
extract(T0,T1,L,[TP00|_],[TP11|_]) -> 
    {reverse(L),
     [TP00|T0],
     [TP11|T1]
    }.


create_result(H0,H1,T0,T1,L,[TP00|_],[TP11|_]) ->
    {reverse(L),
     [TP00,H0|T0],
     [TP11,H1|T1]
    }.

% upade labels in tests
% update(Code,Lb,L) -> Code'
update([H|T],Lb,L) ->
    case H of 
	{test,Test,{head_case,_},E} ->
	    update(T,Lb,[{test,Test,{head_case,Lb},E}|L]);
	_ ->
	    update(T,Lb,[H|L])
    end;
update([],_,L) -> reverse(L).
    

check_indexing(L) ->
    case check_indexing(L, 0) of
	{Type, R, N} when N > 3 ->
	    build_switch(Type, L, R);
	_ ->
	    append(L)
    end.

%
%  Look for equal_int instructions.  First clause, no register yet.
%
check_indexing([], N) -> false;
check_indexing([[Pat,Test | _]|T], N) ->
    case check_pat(Pat,Test) of
	{atom,R} -> check_atom_indexing(T, N + 1, R);
	{int,R} ->  check_int_indexing(T, N + 1, R);
	_ -> false
    end;
check_indexing(_, N) -> false.

check_pat({clause_header,_},{test,{equal_int, R, _}, _, _}) -> {int,R};
check_pat({clause_header,_},{test,{equal_atom, R, _}, _, _}) -> {atom,R};
check_pat({commonCl,_},{test,{equal_int, R, _}, _, _}) -> {int,R};
check_pat({commonCl,_},{test,{equal_atom, R, _}, _, _}) -> {atom,R};
check_pat({testPattern,_,_},{test,{equal_int, R, _}, _, _}) -> {int,R};
check_pat({testPattern,_,_},{test,{equal_atom, R, _}, _, _}) -> {atom,R};
check_pat({commonTestPattern,_},{test,{equal_int, R, _}, _, _}) -> {int,R};
check_pat({commonTestPattern,_},{test,{equal_atom, R, _}, _, _}) -> {atom,R};
check_pat(_, _) -> false.

%
%  Look for equal_int instructions for register R.
%
check_int_indexing([], N, R) -> {equal_int, R, N};
check_int_indexing([[Pat,Test | _]|T], N, R) ->
    case check_pat(Pat,Test) of
	{int,R} -> check_int_indexing(T, N+1, R);
	_ ->
	    case T of	% Last case might be something else, but that's OK
		[] -> {equal_int, R, N};
		_ -> false
	    end
    end;
check_int_indexing([_], N, R) -> {equal_int,R,N};
check_int_indexing(_, N,R) -> false.

%
%  Look for equal_int instructions for register R.
%
check_atom_indexing([], N, R) -> {equal_atom, R, N};
check_atom_indexing([[Pat,Test | _]|T], N, R) ->
    case check_pat(Pat,Test) of
	{atom,R} -> check_atom_indexing(T, N + 1, R);
	_ ->
	    case T of	% Last case might be something else, but that's OK
		[] -> {equal_atom, R, N};
		_ -> false
	    end
    end;
check_atom_indexing([_], N, R) -> {equal_atom,R,N};
check_atom_indexing(_, N, R) -> false.

%
%  build switch instruction if there are enough cases
%
build_switch(Type, L, R) ->
    {L2, Cases, Fail} = insert_switch_labels(L, [], Type),
    C2 = lists:keysort(1,Cases),
    Len = length(C2),
    SwitchType = switch_type(Type, reverse(C2)),
    case SwitchType of
	jmp_switch when Len > 3 ->
	    [ {SwitchType, R, C2} | append(L2) ];
	lookup_switch when Len > 5 ->
	    [ {SwitchType, R, C2} | append(L2) ];
	atom_switch when Len > 5 ->
	    [ {SwitchType, R, C2} | append(L2) ];
	_ ->
	    append(L)
    end.


%
% If all the items in the lists are consequtive generate a special case
%
switch_type(equal_int, [{0,_}]) ->
    jmp_switch;
switch_type(equal_int, [{N1, _}, {N2, L} | T]) when N1 - 1 ==  N2 ->
    switch_type(equal_int, [{N2, L} | T]);
switch_type(equal_int, A) ->
    lookup_switch;
switch_type(equal_atom, _) ->
    atom_switch.

switch_pat({clause_header,N}) -> {clause_header,N};
switch_pat({commonCl,N}) -> {commonCl,N};
switch_pat({testPattern,LR,LP}) -> {testPattern,LR,LP};
switch_pat({commonTestPattern,Lb}) -> {commonTestPattern,Lb};
switch_pat(_) -> false.

%
% Insert an extra label for the switch after the initial equal_int
% instruction
%
insert_switch_labels([], _, _) ->
    Lb = new_next_lb(),
    {[[{switchlabel, Lb}]], [], Lb};

insert_switch_labels([[Pat,{test,{Test,R,V},H,E} | T] | T0], OldCases,Test) ->
    Pat = switch_pat(Pat),
    {T1, C1, F} = insert_switch_labels(T0, [V | OldCases], Test),
    {T2, C2} = case lists:member(V, OldCases) of
		   true ->			% Already in list, do nothing
		       {T, C1};
		   false ->			% Add label in code and list
		       Lb = new_next_lb(),
		       {[{switchlabel, Lb} | T], [{V, Lb} | C1]}
	       end,
    {[[Pat,{test,{Test, R, V}, H, E} | T2] | T1], C2, F};
insert_switch_labels([L], _, _) ->   % Catchall clause at the end of function
    Lb = new_next_lb(),
    {[[{switchlabel, Lb} | L]], [], Lb}.

% extract common code in different parts of function clauses
% extract_common_cl(Clauses) -> Clauses'

extract_common_cl([H]) -> 
    [H];
extract_common_cl([H|T]) -> 
    {Code,CodeRest,Rest} = extract_common_cl0(H,T,[]),
    case Code of
	[] -> 
	    Rest;
	_ ->
	    Lb = new_next_lb(),
	    [append([[{commonCl,Lb}], update_cl(Code,Lb,[]), CodeRest, 
		     [{commonCl_end, Lb}]]) | Rest]
    end;
extract_common_cl([]) -> 
    [].


% extract_common_cl0(H,T,[]) -> {Code,CodeRest,Rest}
% Code is common part for clauses in CodeRest,
% Rest contains other clauses
% H is the first clause
extract_common_cl0(H0,[H1|T],L) -> 
    {Code,T0,T1} = extract_cl(H0,H1,[]),
    case Code of
	[] -> 
	    {[],[],[H0 | extract_common_cl([H1|T])]};
	[{clause_header,_}] -> 
	    {[],[], [H0 | extract_common_cl([H1|T])]};
	_ ->
	    extract_common_cl1(Code,T,[T1,T0|L])
    end;
extract_common_cl0(H,[],L) -> 
    {[],[],H}.

% as above but H0 is the common part from the first and second
% clause
extract_common_cl1(H0,[H1|T],L) -> 
    {Code,T0,T1} = extract_cl(H0,H1,[]),
    case Code of
	H0 ->
	    extract_common_cl1(Code,T,[T1|L]);
	_ -> 
	    F = extract_common_cl(reverse(L)),
	    G = check_indexing(F),
	    {H0,flatten(G),extract_common_cl([H1|T])}
    end;
extract_common_cl1(H,[],L) -> 
    F = extract_common_cl(reverse(L)),
    G = check_indexing(F),
    {H,flatten(G),[]}.



% extract common part of two clauses
% extract_cl(List0,List1,[]) -> {Code,T0,T1}
% Code is the common part
% List0 = [Code|T0]
% List1 = [Code|T1]
% T = [{clause_header,_},...,{clause_end,_}]
% the following instructions cannot be split
%   {putInt,R,A,S}, {putIntVal,V}
%   {putTuple,R0,A,R1}, {putTupleElement,R}, 
extract_cl([H|T0],[H|T1],L) -> 
    case H of
	{put,{putInt,_,_,_}} -> 
	    extract_cl0([H|T0],[H|T1],L);
	{put,{putTuple,_,_,_}} ->
	    extract_cl0([H|T0],[H|T1],L);
	_ ->
	    extract_cl(T0,T1,[H|L])
    end;
extract_cl([H0|T0],[H1|T1],L) -> 
    case H0 of
	{test,Test,{head,_},_} ->
	    case H1 of
		{test,Test,{head,_},_} ->
		    extract_cl(T0,T1,[H0|L]);
		_ -> 
		    extract_cl0([H0|T0],[H1|T1],L)
	    end;
	_ ->
	    extract_cl0([H0|T0],[H1|T1],L)
    end;
extract_cl(T0,T1,L) -> 
    extract_cl0(T0,T1,L).

extract_cl0(T0,T1,L) -> 
    {reverse(L),[{clause_header,0}|T0],[{clause_header,0}|T1]}.


% upade labels in tests
% remove {clause_header,_}
% update_cl(Code,Lb,L) -> Code'
update_cl([H|T],Lb,L) ->
    case H of 
	{test,Test,{head,_},E} ->
	    update_cl(T,Lb,[{test,Test,{head,Lb},E}|L]);
	_ ->
	    update_cl(T,Lb,[H|L])
    end;
update_cl([],_,[]) -> [];
update_cl([],_,L) -> 
    [{clause_header,_}|L0] = reverse(L), 
    L0.
% remove all 'move to local frame' instructions to make last call optimization
can_remove_move_y([{move,_,{y,_}}|T]) ->
    can_remove_move_y(T);
can_remove_move_y([{deallocate,N},{return}|T0]) ->
    {T0,N};
can_remove_move_y(_) ->
    false.


% to speed up compilation of strings
% check for following testHeap instructions and sum them up
remove_testHeap(T,In,Next,L,F) ->
    case In of
	{put,{putList2,_,_,_}} ->
	    {Ng0, Nb0, T0, L0} = remove_testHeap0(T, 0, 0, []),
	    case Next of
		{newTestHeap, Ng, Nb} ->
		    optimize_clause(T0, L0++[In,{newTestHeap, Ng+Ng0, Nb+Nb0}|L],
				    true);
		_ -> % putList2
		    case Ng0+Nb0 of
			0 ->
			    optimize_clause(T0, L0++[Next, In|L], F);
			_ ->
			    optimize_clause(T0, L0++[Next, In, 
						     {newTestHeap, Ng0, Nb0}|L],
					    true)
		    end
	    end;
	_ -> % other {put,Put}
	    case Next of
		{newTestHeap,_,_} -> % testHeap can pass {put,Put}
		    optimize_clause([In|T],[Next|L],true);
		_ -> % Put cannot pass other Put 
		    optimize_clause([Next|T],[In|L],F)
	    end
    end.

remove_testHeap0([{put,{putList2,R1,R2,R3}}|T], Ng, Nb, L) ->
    remove_testHeap0(T, Ng, Nb, [{put,{putList2,R1,R2,R3}}|L]);
remove_testHeap0([{newTestHeap, Ng1, Nb1}|T], Ng0, Nb0, L) ->
    remove_testHeap0(T, Ng0+Ng1, Nb0+Nb1, L);
remove_testHeap0(T, Ng, Nb, [H|L]) ->
    {Ng, Nb, [H|T], L};
remove_testHeap0(T, Ng, Nb, []) ->
    {Ng, Nb, T, []}.

%% Replace always failing and true tests
check_tests(L) ->
    check_tests(L, []).
check_tests([{test, Test, W, E}|T], L) ->
    case Test of
	{test, Type, R} ->
	    check_test(Type, R, W, T, L, {test, Test, W, E});
	{testTupleArity, R, _} ->
	    check_test(testTupleArity, R, W, T, L, {test, Test, W, E});
	_ ->
	    check_tests(T, [{test, Test, W, E}|L])
    end;
check_tests([{get,{getTupleElement,R0,R1,N}}|T],L) -> 
    case check_outcome(getTupleElement, R0) of
	true ->
	    check_tests(T,L);
	_ ->
	    check_tests(T, [{get,{getTupleElement,R0,R1,N}}|L])
    end;
check_tests([H|T], L) ->
    check_tests(T, [H|L]);
check_tests([], L) ->
    reverse(L).

check_test(Type, R, W, T, L, Test) ->
    case check_outcome(Type, R) of
	true ->
	    check_tests(T, L);
	fail ->
	    check_tests(T, [{fail, R, W}|L]);
	unknown ->
	    check_tests(T, [Test|L])
    end.

%% Returns the outcome of a test: true (always true), fail (always failing),
%% or unknown (impossible to determine at compile-time).

check_outcome(nil,nil) ->               true;
check_outcome(nil,{i,_}) ->             fail;
check_outcome(nil,{atom, _}) ->         fail;
check_outcome(nonEmptyList,nil) ->      fail;
check_outcome(nonEmptyList,{i,_}) ->    fail;
check_outcome(nonEmptyList,{atom, _}) ->fail;
check_outcome(integer,nil) ->           fail;
check_outcome(integer,{i,_}) ->         true;
check_outcome(integer,{atom, _}) ->     fail;
check_outcome(float,nil) ->             fail;
check_outcome(float,{i,_}) ->           fail;
check_outcome(float,{atom, _}) ->       fail;
check_outcome(number,nil) ->            fail;
check_outcome(number,{i,_}) ->          true;
check_outcome(number,{atom, _}) ->      fail;
check_outcome(atom,nil) ->              fail;
check_outcome(atom,{i,_}) ->            fail;
check_outcome(atom,{atom, _}) ->        true;
check_outcome(constant,nil) ->          fail;
check_outcome(constant,{i,_}) ->        true;
check_outcome(constant,{atom, _}) ->    true;
check_outcome(tuple,nil) ->             fail;
check_outcome(tuple,{i,_}) ->           fail;
check_outcome(tuple,{atom, _}) ->       fail;
check_outcome(list,nil) ->              true;
check_outcome(list,{i,_}) ->            fail;
check_outcome(list,{atom, _}) ->        fail;
check_outcome(pid,nil) ->               fail;
check_outcome(pid,{i,_}) ->             fail;
check_outcome(pid,{atom, _}) ->         fail;
check_outcome(reference,nil) ->         fail;
check_outcome(reference,{i,_}) ->       fail;
check_outcome(reference,{atom, _}) ->   fail;
check_outcome(port,nil) ->              fail;
check_outcome(port,{i,_}) ->            fail;
check_outcome(port,{atom, _}) ->        fail;
check_outcome(binary,nil) ->            fail;
check_outcome(binary,{i,_}) ->          fail;
check_outcome(binary,{atom, _}) ->      fail;
check_outcome(testTupleArity,nil) ->    true;
check_outcome(testTupleArity,{i,_}) ->  true;
check_outcome(testTupleArity,{atom, _}) -> true;
check_outcome(getTupleElement,nil) ->   true;
check_outcome(getTupleElement,{i,_}) -> true;
check_outcome(getTupleElement,{atom, _}) -> true;
check_outcome(_,_) ->                   unknown.
