Added .globl, fix in Xfit call

1987-08-26 14:45:27 +00:00
parent ab5d79a8ce 7c64a2f075
commit 8feda2f95c
3805 changed files with 199429 additions and 14298 deletions
--- a/.distr
+++ b/.distr
@@ -0,0 +1,19 @@
+Action
+Copyright
+NEW
+README
+TakeAction
+bin
+doc
+emtest
+etc
+first
+h
+include
+modules
+lang
+lib
+mach
+man
+mkun
+util
--- a/59
+++ b/59
@@ -7,12 +7,25 @@ end
 name	"EM definition"
 dir etc
 end
-name "C preprocessor"
-dir util/cpp
+name "LL(1) Parser generator"
+dir util/LLgen
 end
 name "EM definition library"
 dir util/data
 end
+name "C utilities"
+dir util/cmisc
+end
+name "Modules"
+dir modules/src
+indir
+end
+name "C preprocessor"
+dir util/cpp
+end
+name "ACK object utilities"
+dir util/amisc
+end
 name "Encode/Decode"
 dir util/misc
 end
@@ -25,6 +38,10 @@ end
 name "EM Peephole optimizer"
 dir util/opt
 end
+name "EM Global optimizer"
+dir util/ego
+indir
+end
 name "ACK archiver"
 dir util/arch
 end
@@ -34,18 +51,24 @@ end
 name "Bootstrap for backend tables"
 dir util/cgg
 end
-name "LL(1) Parser generator"
-dir util/LLgen
-end
 name "Bootstrap for newest form of backend tables"
 dir util/ncgg
 end
+name "LED link editor"
+dir util/led
+end
+name "TOPGEN target optimizer generator"
+dir util/topgen
+end
 name "C frontend"
-dir lang/cem/comp
+dir lang/cem/cemcom
 end
 name "Basic frontend"
 dir lang/basic/src
 end
+name "Occam frontend"
+dir lang/occam/comp
+end
 name "Intel 8086 support"
 dir mach/i86
 indir
@@ -82,10 +105,6 @@ name "4-4 Interpreter support"
 dir mach/int44
 indir
 end
-name "IBM PC/IX support"
-dir mach/ix
-indir
-end
 name "Motorola 68000 2-4 support"
 dir mach/m68k2
 indir
@@ -114,14 +133,26 @@ name "Signetics 2650 support"
 dir mach/s2650
 indir
 end
-name "Vax 2-4 support"
-dir mach/vax2
-indir
-end
 name "Vax 4-4 support"
 dir mach/vax4
 indir
 end
+name "M68020 System V/68 support"
+dir mach/m68020
+indir
+end
+name "Sun 3 M68020 support"
+dir mach/sun3
+indir
+end
+name "Sun 2 M68000 support"
+dir mach/sun2
+indir
+end
+name "Mantra M68000 System V.0 support"
+dir mach/mantra
+indir
+end
 name "Z80 support"
 dir mach/z80
 indir
--- a/5
+++ b/5
@@ -1,7 +1,7 @@
 /*
- * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
+ *     A M S T E R D A M   C O M P I L E R   K I T
 *
- *          This product is part of the Amsterdam Compiler Kit.
+ * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
 *
 * Permission to use, sell, duplicate or disclose this software must be
 * obtained in writing. Requests for such permissions may be sent to
@@ -14,4 +14,3 @@
 *      The Netherlands
 *
 */
-
--- a/35
+++ b/35
@@ -0,0 +1,35 @@
+cmp:           # compile everything and compare
+	(cd etc  ; make cmp )
+	(cd util ; make cmp )
+	(cd lang ; make cmp )
+	(cd mach ; make cmp )
+
+install:         # compile everything to machine code
+	(cd etc  ; make install )
+	(cd util ; make install )
+	(cd lang/cem ; make install )
+	(cd mach ; make install )
+	(cd lang/pc ; make install )
+
+clean:        # remove all non-sources, except boot-files
+	(cd doc ; make clean )
+	(cd man ; make clean )
+	(cd h   ; make clean )
+	(cd etc  ; make clean )
+	(cd util ; make clean )
+	(cd lang ; make clean )
+	(cd mach ; make clean )
+
+opr:            # print all sources
+	make pr | opr
+
+pr:             # print all sources
+	@( pr Makefile ; \
+	  (cd doc ; make pr ) ; \
+	  (cd man ; make pr ) ; \
+	  (cd h ; make pr ) ; \
+	  (cd etc  ; make pr ) ; \
+	  (cd lang ; make pr ) ; \
+	  (cd util ; make pr ) ; \
+	  (cd mach ; make pr ) \
+	)
--- a/38
+++ b/38
@@ -1,17 +1,27 @@
 What's new:
-	A lot of things have changed since that previous distribution.
+	A lot of things have changed since the previous distribution.
 It is not wise to mix files created by the previous version of the Kit
 with files belonging to this version, although that might sometimes work.
-The major additions are:
-	- Basic frontend
-	- New codegenerator
-	- LL(1) parser generator
-	- Vax backend with 4-byte wordsize
-	- Motorola 68000 backend with 4-byte wordsize
-	- Motorola 68000 interpreter for 2- and 4-byte wordsize
-	- Z8000 assembler and backend.
-	- 6805 assembler
-	- NatSem 16032 assembler
-	- Intel 8080 backend
-	- Zilog Z80 backend
-	- Signetics 2650 assembler
+The major changes are:
+	- a new C-compiler and runtime system
+	- a new C preprocessor
+	- new assembler framework, allowing the generation of relocatable
+	  object code for most processors
+	- new versions of all assemblers, using the new assembler framework
+	- a new link-editor, linking is now a separate and fast phase for most
+	  machines
+	- improved Pascal compiler, now also handles 4-byte wordsize
+	- Motorola M68020 backend and assembler
+	- Support for (some) SUN systems
+	- improved version of LL(1) parser generator, producing faster code
+	- a new language: Occam
+	- better System V support, the Kit should now just compile and run
+
+				Ceriel J.H. Jacobs
+				Dept. of Math. and Computer Science
+				Vrije Universiteit
+				Postbus 7161
+				1007 MC  Amsterdam
+				The Netherlands
+
+				(UseNet: ceriel@cs.vu.nl)
--- a/16
+++ b/16
@@ -19,7 +19,7 @@ RETC=0
 do
 	eval set $LINE
 	case x"$1" in
-	x#*)	;;
+	x!*)	;;
 	xname)		SYS="$2"
 			ACTION='make $PAR'
 			DIR=.
@@ -44,12 +44,18 @@ do
 			FAIL="$2" ;;
 	xsuccess)	SUCC="$2" ;;
 	xdir)		DIR="$2" ;;
-	xsystem)	case `ack_sys` in
-			$2)	;;
-			*)	echo "Sorry, $SYS can only be made on $2 systems"
+	xsystem)	PAT="$2"
+			oIFS=$IFS
+			IFS="|"
+			eval set $2
+			case x`ack_sys` in
+			x$1|x$2|x$3|x$4|x$5|x$6|x$7)	;;
+			*)	echo "Sorry, $SYS can only be made on $PAT systems"
 				DOIT=no
 				;;
-			esac ;;
+			esac
+			IFS=$oIFS
+			;;
 	xend)		case $DOIT in
 			no)	continue ;;
 			esac
--- a/bin/.distr
+++ b/bin/.distr
@@ -0,0 +1 @@
+em.pascal
--- a/bin/em.pascal
+++ b/bin/em.pascal
@@ -1 +1 @@
-exec /usr/em/doc/em.doc/int/em /usr/em/doc/em.doc/int/tables ${1-e.out} core
+exec /usr/em/doc/em/int/em /usr/em/doc/em/int/tables ${1-e.out} core
--- a/distr/Action
+++ b/distr/Action
@@ -1,15 +1,12 @@
 name "Installation manual"
 dir doc
 end
-name "EM documentation"
-dir doc/em.doc
-end
 name "Pascal bootstrap files"
 dir lang/pc/pem
 end
 name "LLgen bootstrap files"
 dir util/LLgen
 end
-name "MSC6500 vend_library"
-dir mach/6500/libem
+name "ego share pop_push file"
+dir util/ego/share
 end
--- a/distr/Action1
+++ b/distr/Action1
@@ -1,6 +1,9 @@
-name "vax2/cg bootstrap files"
-dir mach/vax2/cg
+name "m68k2/cg bootstrap files"
+dir mach/m68k2/cg
 end
 name "vax4/cg bootstrap files"
 dir mach/vax4/cg
 end
+name "m68020/ncg bootstrap files"
+dir mach/m68020/ncg
+end
--- a/distr/Exceptions
+++ b/distr/Exceptions
@@ -1,5 +1,3 @@
-- ./bin/em.pascal no RCS file
-- ./doc/em.doc/doc.pr no RCS file
 -- ./doc/install.pr no RCS file
 -- ./h/em_mnem.h no RCS file
 -- ./h/em_pseu.h no RCS file
@@ -8,29 +6,15 @@
 -- ./lang/basic/src/y.tab.h no RCS file
 -- ./lang/pc/pem/pem22.m no RCS file
 -- ./lang/pc/pem/pem24.m no RCS file
+-- ./lang/pc/pem/pem44.m no RCS file
 -- ./lib/LLgen/incl no RCS file
 -- ./lib/LLgen/rec no RCS file
-- ./lib/ix/head_em no RCS file
-- ./lib/ix/head_i no RCS file
-- ./lib/ix/tail_em no RCS file
-- ./lib/ix/tail_em.vend no RCS file
-- ./lib/ix/tail_mon no RCS file
-- ./mach/6500/libem/tail_em.ve.s.a no RCS file
-- ./mach/vax2/cg/tables1.c no RCS file
-- ./mach/vax2/cg/tables1.h no RCS file
+-- ./mach/m68k2/cg/tables1.c no RCS file
+-- ./mach/m68k2/cg/tables1.h no RCS file
+-- ./mach/m68020/ncg/tables1.c no RCS file
+-- ./mach/m68020/ncg/tables1.h no RCS file
 -- ./mach/vax4/cg/tables1.c no RCS file
 -- ./mach/vax4/cg/tables1.h no RCS file
-- ./mach/z80/int/libpc/pc_tail.c.a no RCS file
-- ./mkun/pubmac no distr2 yet
-- ./mkun/tmac.q no distr2 yet
-- ./mkun/tmac.q1 no distr2 yet
-- ./mkun/tmac.q2 no distr2 yet
-- ./mkun/tmac.q3 no distr2 yet
-- ./mkun/tmac.q4 no distr2 yet
-- ./mkun/tmac.q5 no distr2 yet
-- ./mkun/tmac.q6 no distr2 yet
-- ./mkun/tmac.q7 no distr2 yet
-- ./mkun/tmac.q8 no distr2 yet
 -- ./util/LLgen/src/parser no RCS file
 -- ./util/LLgen/src/LLgen.c no RCS file
 -- ./util/LLgen/src/Lpars.c no RCS file
@@ -39,4 +23,4 @@
 -- ./util/data/em_flag.c no RCS file
 -- ./util/data/em_mnem.c no RCS file
 -- ./util/data/em_pseu.c no RCS file
-- ./util/data/em_ptyp.c no RCS file
+-- ./util/ego/share/pop_push.h no RCS file
--- a/distr/How_To
+++ b/distr/How_To
@@ -4,7 +4,7 @@ The EM home directory contains a file called ".distr". It contains
 the names of all the files and directories you want to have in the distribution.
 The directories should contain .distr files, the other files should
 be placed under RCS.
-The current RCS revision name is "distr2".
+The current RCS revision name is "distr3".
 The are files that derive from other files and yet should be placed
 in the distribution.
 These files should not be placed under RCS.
@@ -25,12 +25,12 @@ destination tree.
 For each file mentioned there it performes certain actions:
 1- Directory	Change to that directory and call yourself recursively.
 2- File
-   a-           Try to do "co -rdistr2 destination_tree/path/destination_file"
+   a-           Try to do "co -rdistr3 destination_tree/path/destination_file"
                on succes "chmod +w destination_file" 
              else
   b-           Try to do "co destination_tree/destination_file"
                on succes "chmod +w destination_file" and
-                give message that says "Missing distr2 entry" (or some such). 
+                give message that says "Missing distr3 entry" (or some such). 
              else
   c-           I   Does a file LIST exist in this directory AND
                    is the first line of LIST equal to the name of the
@@ -52,23 +52,23 @@ Some files derive from other files in the tree, those derivations should
 be done with the use of an already installed distribution.
 The files Action and Action1 in this directory contain the actions
 we now take. (Confession: most of the time we use /usr/em)
-One warning, to re-nroff the IR-81 report it takes more then just nroff
-because most nroff's can't stand that report and stop half-way.
-The ntroff program does the trick, but only on the 11's.
-	tbl sources | ntroff -Tlp | ntlp

 After running these re-derivation programs the distrubtion tree starts
 to look like the tree you need.
 There are too many files there though, especially the files created by
 the derivation process.
 That is why we now give the command:
-	dtar cdf distr2 .
-The file distr2 is the one you should put on tape!
+	dtar cdf distr3 .
+The file distr3 is the one you should put on tape!
 But,.... before doing that: Try it out!
 Repeat the process described in the installation manual.
 Only if that succeeds you are sure that you included the files needed,
-and gave all other files the correct "distr2" RCS id.
-After you sent the tape away, forbid ANYBODY to touch the distr2 id
+and gave all other files the correct "distr3" RCS id.
+After you sent the tape away, forbid ANYBODY to touch the distr3 id
 in your RCS files.
 					Good Luck,
 						Ed Keizer, 85/4/15.
+
+Updated for 3rd distribution by Ceriel Jacobs, 87/3/11.
+And again,
+					Good Luck!
--- a/distr/dwalk
+++ b/distr/dwalk
@@ -14,7 +14,7 @@ do
 			${DD-:} $CDIR $i
 			CDIR=$CDIR/$i
 			export CDIR
-			exec /usr/em/distr/dwalk
+			exec /proj/em/distr/dwalk
 		  else
 			echo ++ Could not access $CDIR/$i
 		  fi
--- a/distr/f.attf
+++ b/distr/f.attf
@@ -1,5 +1,3 @@
-- ./bin/em.pascal no RCS file
-- ./doc/em.doc/doc.pr no RCS file
 -- ./doc/install.pr no RCS file
 -- ./h/em_mnem.h no RCS file
 -- ./h/em_pseu.h no RCS file
@@ -8,29 +6,15 @@
 -- ./lang/basic/src/y.tab.h no RCS file
 -- ./lang/pc/pem/pem22.m no RCS file
 -- ./lang/pc/pem/pem24.m no RCS file
+-- ./lang/pc/pem/pem44.m no RCS file
 -- ./lib/LLgen/incl no RCS file
 -- ./lib/LLgen/rec no RCS file
-- ./lib/ix/head_em no RCS file
-- ./lib/ix/head_i no RCS file
-- ./lib/ix/tail_em no RCS file
-- ./lib/ix/tail_em.vend no RCS file
-- ./lib/ix/tail_mon no RCS file
-- ./mach/6500/libem/tail_em.ve.s.a no RCS file
-- ./mach/vax2/cg/tables1.c no RCS file
-- ./mach/vax2/cg/tables1.h no RCS file
+-- ./mach/m68k2/cg/tables1.c no RCS file
+-- ./mach/m68k2/cg/tables1.h no RCS file
+-- ./mach/m68020/ncg/tables1.c no RCS file
+-- ./mach/m68020/ncg/tables1.h no RCS file
 -- ./mach/vax4/cg/tables1.c no RCS file
 -- ./mach/vax4/cg/tables1.h no RCS file
-- ./mach/z80/int/libpc/pc_tail.c.a no RCS file
-- ./mkun/pubmac no distr2 yet
-- ./mkun/tmac.q no distr2 yet
-- ./mkun/tmac.q1 no distr2 yet
-- ./mkun/tmac.q2 no distr2 yet
-- ./mkun/tmac.q3 no distr2 yet
-- ./mkun/tmac.q4 no distr2 yet
-- ./mkun/tmac.q5 no distr2 yet
-- ./mkun/tmac.q6 no distr2 yet
-- ./mkun/tmac.q7 no distr2 yet
-- ./mkun/tmac.q8 no distr2 yet
 -- ./util/LLgen/src/parser no RCS file
 -- ./util/LLgen/src/LLgen.c no RCS file
 -- ./util/LLgen/src/Lpars.c no RCS file
@@ -39,4 +23,4 @@
 -- ./util/data/em_flag.c no RCS file
 -- ./util/data/em_mnem.c no RCS file
 -- ./util/data/em_pseu.c no RCS file
-- ./util/data/em_ptyp.c no RCS file
+-- ./util/ego/share/pop_push.h no RCS file
--- a/distr/mkf
+++ b/distr/mkf
@@ -1,10 +1,10 @@
-if co -q -rdistr2 $DESTDIR/$1/$2 >/dev/null 2>&1
+if co -q -rdistr3 $DESTDIR/$1/$2 >/dev/null 2>&1
 then
 	chmod +w $DESTDIR/$1/$2
 elif co -q $DESTDIR/$1/$2 >/dev/null 2>&1
 then
 	chmod +w $DESTDIR/$1/$2
-	echo -- $1/$2 no distr2 yet
+	echo -- $1/$2 no distr3 yet
 elif grep LIST .distr >/dev/null 2>&1 &&
     (test "$2" = "`head -1 $DESTDIR/$1/LIST`") >/dev/null 2>&1 &&
     ${DA-false} "$1" "$2"
--- a/distr/mktree
+++ b/distr/mktree
@@ -2,7 +2,7 @@ case $# in
 1)	;;
 *)	echo $0 directory ; exit 1 ;;
 esac
-DDIR=/usr/em/distr
+DDIR=/proj/em/distr
 case $1 in
 /*)	DESTDIR=$1 ;;
 *)	DESTDIR=`pwd`/$1 ;;
--- a/distr/todistr
+++ b/distr/todistr
@@ -23,4 +23,4 @@ esac
 case x$REV in
 x)	exit 2 ;;
 esac
-rcs -ndistr2:$REV $FLAGS $FILE
+rcs -ndistr3:$REV $FLAGS $FILE
--- a/doc/.distr
+++ b/doc/.distr
@@ -0,0 +1,23 @@
+Makefile
+ack.doc
+basic.doc
+cg.doc
+crefman.doc
+em
+install.doc
+install.pr
+ncg.doc
+pcref.doc
+peep.doc
+regadd.doc
+toolkit.doc
+v7bugs.doc
+val.doc
+LLgen
+6500.doc
+i80.doc
+z80.doc
+m68020.doc
+top
+ego
+occam
--- a/doc/6500.doc
+++ b/doc/6500.doc
@@ -1,6 +1,6 @@
 . \" $Header$"
-.po +10
-.ND
+.RP
+.ND Dec 1984
 .TL
 .B
 A backend table for the 6500 microprocessor
@@ -12,212 +12,6 @@ The backend table is part of the Amsterdam Compiler Kit (ACK).
 It translates the intermediate language family EM to a machine
 code for the MCS6500 microprocessor family.
 .AE
-.PP
-.bp
-.NH
-Introduction.
-.PP
-As more and more organizations aquire many micro and minicomputers,
-the need for portable compilers is becoming more and more acute.
-The present situation, in which each harware vendor provides its
-own compilers -- each with its own deficiencies and extensions, and
-none of them compatible -- leaves much to be desired.
-The ideal situation would be an integrated system containing
-a family of (cross) compilers, each compiler accepting a standard
-source language and, producing code for a wide variety of target
-machines. Furthermore, the compilers should be compatible, so programs 
-written in one language can call procedures written in another
-language. Finally, the system should be designed so as to make
-adding new languages and, new machines easy. Such an integerated
-system is being built at the Vrije Universiteit.
-.PP
-The compiler building system, which is called the "Amsterdam Compiler
-Kit" (ACK), can be thought of as a "tool kit." It consists of
-a number of parts that can be combined to form compilers (and
-interpreters) with various properties. The tool kit is based
-on an idea (UNCOL) that was first suggested in 1960 [5],
-but which never really caught on then. The problem which UNCOL
-attemps to solve is how to make a compiler for each of
-.B
-N
-.R
-languages on
-.B
-M
-.R
-different machines without having to write
-.B
-N
-.R
-x
-.B
-M
-.R
-programs.
-.PP
-As shown in Fig. 1, the UNCOL approach is to write
-.B
-N
-.R
-"front ends," each of which translates
-one source language to a common
-intermediate language, UNCOL (UNiversal Computer Oriented
-Language), and
-.B
-M
-.R
-"back ends," each of which translates programs
-in UNCOL to a specific machine language. Under these conditions,
-only
-.B
-N
-.R
-+
-.B
-M
-.R
-programs must be written to provide all
-.B
-N
-.R
-languages on all
-.B
-M
-.R
-machines, instead of
-.B
-N
-.R
-x
-.B
-M
-.R
-programs.
-.PP
-Various reseachers have attempted to design a suitable UNCOL [1,6],
-but none of these have become popular. It is the believe of the
-designers of the Amsterdam Compiler Kit that previous attemps 
-have failed because they have been too ambitious, that is, they have
-tried to cover all languages and all machines using a single UNCOL.
-The approach of the designers is more modest:
-they cater only to algebraic languages and machines whose memory
-consist of 8-bit bytes, each with its own address.
-Typical languages that could be handled include Ada, ALGOL 60,
-ALGOL 68, BASIC, C, FORTRAN, Modula, Pascal, PL/I, PL/M, PLAIN and
-RATFOR, where COBOL, LISP and SNOBOL would be less efficient.
-Examples of machines that could be included are the Intel 8080 and
-8086, Motorola 6800, 6809 and 68000, Zilog Z80 and Z8000, DEC PDP-11
-and Vax, MOS Technology MCS6500 family and IBM but not the Burroughs
-6700, CDC Cyber or Univac 1108 (because they are not byte_oriented).
-With these restrictions the designers believe that the old UNCOL
-idea can be used as the basis of a practical compiler-building 
-system.
-.sp 10
-.bp
-.NH
-An overview of the Amsterdam Compiler kit
-.PP
-The tool kit consists of eight components:
-.IP 1.
-The preprocessor.
-.IP 2.
-The front ends.
-.IP 3.
-The peephole optimizer.
-.IP 4.
-The global optimizer.
-.IP 5.
-The back end.
-.IP 6.
-The target machine optimizer.
-.IP 7.
-The universal assembler/linker.
-.IP 8.
-The utility package.
-.PP
-A fully optimizing compiler, depicted in Fig. 2, has seven cascaded
-phases. Conceptually, each component reads an input file and writes
-a transformed output file to be used as input to the next component.
-In practice, some components may use temporary files to allow
-multiple passes over the input or internal intermediate files.
-.sp 20
-.PP
-In the following paragraphs a brief decription of each component
-is given.
-A more detailed description of the back end will be given in the 
-rest of this document. For a more detailed descripiton on the rest
-of the components see [7]. A program to be compiled is first fed
-into the (language independed) preprocessor, which provides a
-simple macro facility and similar textual facilities.
-The preprocessor's ouput is a legal program in one of the programming
-languages supported, whereas the input is a program possibly
-augmented with macro's, etc.
-.PP
-This output goes into the appropriate front end, whose job it is to 
-produce intermediate cade.
-This intermediate code (the UNCOL of ACK) is the machine language
-for a simple stack machine EM (Encoding Machine).
-A typical front end might build a parse tree from the input
-and then use the parse tree to generate EM cade,
-which is similar to reverse Polish.
-In order to perform this work, the front end has to maintain tables of declare
-tables of declared variables, labels, etc., determine where
-to place the data structures in memory and so on.
-.PP
-The EM code generated by the front end is fed into the peephole
-optimizer, which scans it with a window of a view instructions,
-replacing certain inefficient code sequences by better ones.
-Such a search is important because EM contains instructions to
-handle numerous important special cases efficiently
-(e.g. incrementing a variable by 1).
-It is our strategy to relieve the front ends of the burden
-of hunting for special cases because there are many front ends
-and just one peephole optimizer.
-By handeling the special cases in the peephole optimizer,
-the front ends become simpler, easier to write and easier to maintain.
-.PP
-Following the peephole optimizer is a global optimizer [2],
-which unlike the peephole optimizer, examines the program as a whole.
-It builts a data flow graph to make possible a variety of global
-optimizations, among them, moving invariant code out of loops,
-avoiding redundant computations, live/dead analysis and
-eliminating tail recursion.
-Note that the output of the global optimizer is still EM code.
-.PP
-Next comes the back end, which differs from the front ends in a
-fundamental way.
-Each front end is a separate program, whereas the back end is a 
-single program that is driven by a machine dependent driving table.
-The driving table for a specific machine tells how EM code is
-mapped onto the machine's assembly language.
-Although a simple driving table just might macro expand each
-EM instruction into a sequence of target machine instructions,
-a much more sophisticated translation strategy is normaly used,
-as described later.
-For speech, the back end does not actually read in the driving
-table at run time.
-Instead, the tables are compiled along with the back end in advance,
-resulting in one binairy program per machine.
-.PP
-The output of the back end is a program in the assembly language
-of some particular machine.
-The next component in the pipeline reads this program and performs
-peephole optimization on it.
-The optimizations performed here involve idiosyncrasies of the
-target machine that cannot be performed by the machine-independent
-EM-to-EM peephole optimizer.
-Typically these optimizations take advantage of the special
-instructions or special addressing modes.
-.PP
-The optimized target machine assembly code then goes into the final
-component in the pipeline, the universal assembler/linker.
-This program assembles the input to object format, extracting
-routines from libraries and including them as needed.
-.PP
-The final component of the tool kit is the utility package,
-which contains various test programs, interpreters for EM code,
-EM libraries, conversion programs and other aids for the
-implementer and user.
 .bp
 .DS C
 .B
@@ -264,7 +58,7 @@ manufactured by Acorn Computer Ltd..
 The MOS Technology MCS6500
 .PP
 The MCS6500 is as a family of CPU devices developed by MOS
-Technology.
+Technology [1].
 The members of the MCS6500 family are the same chips in a 
 different housing.
 The MCS6502, the big brother in the family, can handle 64k
@@ -861,7 +655,7 @@ The above description of the machine table is
 a description of the table for the MCS6500.
 It uses only a part of the possibilities which the code generator
 generator offers.
-For a more precise and detailed description see [4].
+For a more precise and detailed description see [2].
 .DS C
 .B
 THE BACK END TABLE.
@@ -1141,7 +935,7 @@ This subroutine expects the multiplicand in zero page
 at locations ARTH, ARTH+1, while the multiplier is in zero
 page locations ARTH+2, ARTH+3.
 For a description of the algorithms used for multiplication and
-division, see [9].
+division, see [3].
 A table content is:
 .sp  1
 .br
@@ -2071,34 +1865,6 @@ if it is to be used on a MCS6500.
 REFERENCES.
 .R
 .IP 1.
-Haddon. B.K., and Waite, W.M.
-Experience with the Universal Intermediate Language Janus.
-.B
-Software Practice & Experience 8
-.R
-,
-5 (Sept.-Oct. 1978), 601-616.
-.RS
-.PP
-An intermediate language for use with Algol 68, Pascal, etc.
-is described.
-The paper discusses some problems encountered and how they were
-dealt with.
-.RE
-.IP 2.
-Lowry, E.S., and Medlock, C.W. Object Code Optimization.
-.B
-Commun. ACM 12
-.R
-,
-(Jan. 1969), 13-22.
-.RS
-.PP
-A classical paper on global object code optimization.
-It covers data flow analysis, common subexpressions, code motion,
-register allocation and other techniques.
-.RE
-.IP 3.
 Osborn, A., Jacobson, S., and Kane, J. The Mos Technology MCS6500.
 .B
 An Introduction to Microcomputers ,
@@ -2109,7 +1875,7 @@ Volume II, Some Real Products (june 1977) chap. 9.
 A hardware description of some real existing CPU's, such as
 the Intel Z80, MCS6500, etc. is given in this book.
 .RE
-.IP 4.
+.IP 2.
 van Staveren, H.
 The table driven code generator from the Amsterdam Compiler Kit.
 Vrije Universiteit, Amsterdam, (July 11, 1983).
@@ -2117,43 +1883,7 @@ Vrije Universiteit, Amsterdam, (July 11, 1983).
 .PP
 The defining document for writing a back end table.
 .RE
-.IP 5.
-Steel, T.B., Jr. UNCOL: The Myth and the Fact. in
-.B
-Ann. Rev. Auto. Prog.
-.R
-Goodman, R. (ed.), vol 2., (1960), 325-344.
-.RS
-.PP
-An introduction to the UNCOL idea by its originator.
-.RE
-.IP 6.
-Steel. T.B., Jr. A first Version of UNCOL.
-.B
-Proc. Western Joint Comp. Conf.
-.R
-,
-(1961), 371-377.
-.IP 7.
-Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren,
-H.
-A Practical Tool Kit for Making Portable Compilers.
-Informatica Rapport 74, Vrije Universiteit, Amsterdam, 1983.
-.RS
-.PP
-An overview on the Amsterdam Compiler Kit.
-.RE
-.IP 8.
-Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren,
-H.
-Description of an Experimental Machine Architecture for use with
-Block Structured Languages.
-Informatica Rapport 81, Vrije Universiteit, Amsterdam, 1983.
-.RS
-.PP
-The defining document for EM.
-.RE
-.IP 9.
+.IP 3.
 Tanenbaum, A.S. Structured Computer Organization.
 Prentice Hall. (1976).
 .RS
--- a/doc/LLgen/.distr
+++ b/doc/LLgen/.distr
@@ -0,0 +1,3 @@
+LLgen.n
+LLgen.refs
+Makefile
--- a/doc/LLgen/LLgen.n
+++ b/doc/LLgen/LLgen.n
--- a/doc/LLgen/LLgen.refs
+++ b/doc/LLgen/LLgen.refs
@@ -0,0 +1,54 @@
+%T An ALL(1) Compiler Generator
+%A D. R. Milton
+%A L. W. Kirchhoff
+%A B. R. Rowland
+%B Proc. of the SIGPLAN '79 Symposium on Compiler Construction
+%D August 1979 
+%J SIGPLAN Notices
+%N 8
+%P 152-157
+%V 14
+
+%T Lex - A Lexical Analyser Generator
+%A M. E. Lesk
+%I Bell Laboratories
+%D October 1975
+%C Murrey Hill, New Jersey
+%R Comp. Sci. Tech. Rep. No. 39
+
+%T Yacc: Yet Another Compiler Compiler
+%A S. C. Johnson
+%I Bell Laboratories
+%D 1975
+%C Murray Hill, New Jersey
+%R Comp. Sci. Tech. Rep. No. 32
+
+%T The C Programming Language
+%A B. W. Kernighan
+%A D. M. Ritchie
+%I Prentice-Hall, Inc.
+%C Englewood Cliffs, New Jersey
+%D 1978
+
+%A M. Griffiths
+%T LL(1) Grammars and Analysers
+%E F. L. Bauer and J. Eickel
+%B Compiler Construction, An Advanced Course
+%I Springer-Verlag
+%C New York, N.Y.
+%D 1974
+
+%T Make - A Program for Maintaining Computer Programs
+%A S. I. Feldman
+%J Software - Practice and Experience
+%V 10
+%N 8
+%P 255-265
+%D August 1979
+
+%T Methods for the Automatic Construction of Error Correcting Parsers
+%A J. R\*:ohrich
+%J Acta Informatica
+%V 13
+%P 115-139
+%D 1980
--- a/doc/LLgen/Makefile
+++ b/doc/LLgen/Makefile
@@ -0,0 +1,8 @@
+# $Header$
+
+EQN=eqn
+REFER=refer
+TBL=tbl
+
+../LLgen.doc:	LLgen.n LLgen.refs
+		$(REFER) -sA+T -p LLgen.refs LLgen.n | $(EQN) | $(TBL) > $@
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -2,52 +2,67 @@

 SUF=pr
 PRINT=cat
-RESFILES=cref.$(SUF) pcref.$(SUF) val.$(SUF) v7bugs.$(SUF) install.$(SUF)\
-ack.$(SUF) cg.$(SUF) regadd.$(SUF) peep.$(SUF) toolkit.$(SUF) LLgen.$(SUF)\
-basic.$(SUF) 6500.$(SUF) ncg.$(SUF)
 NROFF=nroff
+TBL=tbl
+EQN=eqn
+PIC=pic
+REFER=refer
 MS=-ms

-cref.$(SUF):        cref.doc
-		tbl $? | $(NROFF) >$@
-v7bugs.$(SUF):      v7bugs.doc
-		$(NROFF) $(MS) $? >$@
-ack.$(SUF):         ack.doc
-		$(NROFF) $(MS) $? >$@
-cg.$(SUF):		cg.doc
-		$(NROFF) $(MS) $? >$@
-ncg.$(SUF):		ncg.doc
-		$(NROFF) $(MS) $? >$@
-regadd.$(SUF):		regadd.doc
-		$(NROFF) $(MS) $? >$@
-install.$(SUF):     install.doc
-		$(NROFF) $(MS) $? >$@
-pcref.$(SUF):       pcref.doc
-		$(NROFF) $? >$@
-basic.$(SUF):       basic.doc
-		$(NROFF) $(MS) $? >$@
-peep.$(SUF):	peep.doc
-		$(NROFF) $(MS) $? >$@
-val.$(SUF):         val.doc
-		$(NROFF) $? >$@
-toolkit.$(SUF):	toolkit.doc
-		$(NROFF) $(MS) $? >$@
-LLgen.$(SUF):	LLgen.doc
-		eqn $? | $(NROFF) $(MS) >$@
+RESFILES= \
+	toolkit.$(SUF) install.$(SUF) em.$(SUF) ack.$(SUF) v7bugs.$(SUF) \
+	peep.$(SUF) cg.$(SUF) ncg.$(SUF) regadd.$(SUF) LLgen.$(SUF) \
+	basic.$(SUF) crefman.$(SUF) pcref.$(SUF) val.$(SUF) \
+	6500.$(SUF) i80.$(SUF) z80.$(SUF) top.$(SUF) ego.$(SUF) \
+	m68020.$(SUF) occam.$(SUF) nopt.$(SUF)
+
+.SUFFIXES: .doc .$(SUF)
+
+.doc.$(SUF):
+		$(NROFF) $(MS) $< > $@
+
+crefman.$(SUF):	crefman.doc
+		$(EQN) crefman.doc | $(NROFF) $(MS) >$@
+v7bugs.$(SUF):	v7bugs.doc
+		$(NROFF) v7bugs.doc >$@
+install.$(SUF):	install.doc
+		$(TBL) install.doc | $(NROFF) $(MS) >$@
+pcref.$(SUF):	pcref.doc
+		$(NROFF) pcref.doc >$@
+val.$(SUF):	val.doc
+		$(NROFF) val.doc >$@
 6500.$(SUF):	6500.doc
-		$(NROFF) $(MS) $? >$@
+		$(TBL) 6500.doc | $(NROFF) $(MS) >$@
+LLgen.doc:	LLgen.X
+LLgen.X:
+		cd LLgen; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER)
+top.doc:	top.X
+top.X:
+		cd top; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER)
+occam.doc:	occam.X
+occam.X:
+		cd occam; make "PIC="$(PIC) "TBL="$(TBL) "EQN="$(EQN)
+ego.doc:	ego.X
+ego.X:
+		cd ego; make "REFER="$(REFER)
+em.$(SUF):	em.X
+em.X:
+		cd em; make "TBL="$(TBL) "NROFF="$(NROFF) "SUF="$(SUF)

 install cmp:

 distr:		install.doc
-		nroff -Tlp install.doc >install.pr
+		tbl install.doc | nroff -Tlp $(MS) >install.pr
+
 pr:
-		@make "SUF="$SUF "NROFF="$NROFF "PRINT="$PRINT $(RESFILES) \
-			>make.pr.out 2>&1
+		@make "SUF="$(SUF) "NROFF="$(NROFF) "EQN="$(EQN) "TBL="$(TBL) \
+			"PIC="$(PIC) "MS="$(MS) \
+			$(RESFILES) >make.pr.out 2>&1
 		@$(PRINT) $(RESFILES)

 opr:
 		make pr | opr

 clean:
-		-rm -f *.old $(RESFILES) *.t
+		-rm -f *.old $(RESFILES) *.t *.out LLgen.doc top.doc \
+			occam.doc ego.doc
--- a/doc/ack.doc
+++ b/doc/ack.doc
@@ -1,7 +1,6 @@
 .\" $Header$
-.nr LL 7.5i
-.tr ~
 .nr PD 1v
+.tr ~
 .TL
 Ack Description File
 .br
@@ -9,7 +8,7 @@ Reference Manual
 .AU
 Ed Keizer
 .AI
-Wiskundig Seminarium
+Vakgroep Informatica
 Vrije Universiteit
 Amsterdam
 .NH
@@ -24,16 +23,16 @@ source file.
 Each transformation table entry tells which input suffixes are
 allowed and what suffix/name the output file has.
 When the output file does not already satisfy the request of the
-user, with the flag \fB-c.suffix\fP, the table is scanned
+user, with the flag \fB\-c.suffix\fP, the table is scanned
 starting with the next transformation in the table for another
 transformation that has as input suffix the output suffix of
 the previous transformation.
 A few special transformations are recognized, among them is the
-combiner.
-A program combining several files into one.
-When no stop suffix was specified (flag \fB-c.suffix\fP) \fIack\fP
-stops after executing the combiner with as arguments the -
-possibly transformed - input files and libraries.
+combiner, which is
+a program combining several files into one.
+When no stop suffix was specified (flag \fB\-c.suffix\fP) \fIack\fP
+stops after executing the combiner with as arguments the \-
+possibly transformed \- input files and libraries.
 \fIAck\fP will only perform the transformations in the order in
 which they are presented in the table.
 .LP
@@ -60,7 +59,7 @@ convoluted.
 First, when the last filename in the program call name is not
 one of \fIack\fP, \fIcc\fP, \fIacc\fP, \fIpc\fP or \fIapc\fP,
 this filename is used as the backend description name.
-Second, when the \fB-m\fP is present the \fB-m\fP is chopped of this
+Second, when the \fB\-m\fP is present the \fB\-m\fP is chopped of this
 flag and the rest is used as the backend description name.
 Third, when both failed the shell environment variable ACKM is
 used.
@@ -75,7 +74,8 @@ This descriptions are simply files read in at compile time.
 At the moment of writing this document, the descriptions
 included are: pdp, fe, i86, m68k2, vax2 and int.
 The name of a description is first searched for internally,
-then in the directory lib/ack and finally in the current
+then in lib/descr/\fIname\fP, then in
+lib/\fIname\fP/descr, band finally in the current
 directory of the user.
 .NH
 Using the description file
@@ -119,8 +119,8 @@ Syntax:  (\fIsuffix sequence\fP:\fIsuffix sequence\fP=\fItext\fP)
 .br
 Example: (.c.p.e:.e=tail_em)
 .br
-If the two suffix sequences have a common member -~\&.e in this
-case~- the text is produced.
+If the two suffix sequences have a common member \-~\&.e in this
+case~\- the text is produced.
 When no common member is present the empty string is produced.
 Thus the example given is a constant expression.
 Normally, one of the suffix sequences is produced by variable
@@ -134,17 +134,17 @@ the text following the \fIneed\fP is appended to both the HEAD and
 TAIL variable.
 The value of the variable RTS is determined by the first
 transformation used with a \fIrts\fP property.
-.LP
+.IP
 Two runtime flags have effect on the value of one or more of
 these variables.
-The flag \fB-.suffix\fP has the same effect on these three variables
+The flag \fB\-.suffix\fP has the same effect on these three variables
 as if a file with that \fBsuffix\fP was included in the argument list
 and had to be translated.
-The flag \fB-r.suffix\fP only has that effect on the TAIL
+The flag \fB\-r.suffix\fP only has that effect on the TAIL
 variable.
 The program call names \fIacc\fP and \fIcc\fP have the effect
-of an automatic \fB-.c\fB flag.
-\fIApc\fP and \fIpc\fP have the effect of an automatic \fB-.p\fP flag.
+of an automatic \fB\-.c\fP flag.
+\fIApc\fP and \fIpc\fP have the effect of an automatic \fB\-.p\fP flag.
 .IP "Line splitting"
 .br
 The string is transformed into a sequence of strings by replacing
@@ -168,7 +168,7 @@ of the line.
 Three special two-characters sequences exist: \e#, \e\e and
 \e<newline>.
 Their effect is described under 'backslashing' above.
-Each - nonempty - line starts with a keyword, possibly
+Each \- nonempty \- line starts with a keyword, possibly
 preceded by blank space.
 The keyword can be followed by a further specification.
 The two are separated by blank space.
@@ -193,7 +193,7 @@ The lines in between associate properties to a transformation
 and may be presented in any order.
 The identifier after the \fIname\fP keyword determines the name
 of the transformation.
-This name is used for debugging and by the \fB-R\fP flag.
+This name is used for debugging and by the \fB\-R\fP flag.
 The keywords are used to specify which input suffices are
 recognized by that transformation,
 the program to run, the arguments to be handed to that program
@@ -205,14 +205,13 @@ The possible keywords are:
 .br
 followed by a sequence of suffices.
 Each file with one of these suffices is allowed as input file.
-Preprocessor transformations, those with the \fBP\fP property
-after the \fIprop\fP keyword, do not need the \fIfrom\fP
+Preprocessor transformations do not need the \fIfrom\fP
 keyword. All other transformations do.
 .nr PD 0
 .IP \fIto\fP
 .br
 followed by the suffix of the output file name or in the case of a
-linker -~indicated by C option after the \fIprop\fP keyword~-
+linker
 the output file name.
 .IP \fIprogram\fP
 .br
@@ -235,9 +234,9 @@ assignment separated by blank space.
 As soon as both description files are read, \fIack\fP looks
 at all transformations in these files to find a match for the
 flags given to \fIack\fP.
-The flags \fB-m\fP, \fB-o\fP,
-\fI-O\fP, \fB-r\fP, \fB-v\fP, \fB-g\fP, -\fB-c\fP, \fB-t\fP,
-\fB-k\fP, \fB-R\fP and -\f-.\fP are specific to \fIack\fP and
+The flags \fB\-m\fP, \fB\-o\fP,
+\fB\-O\fP, \fB\-r\fP, \fB\-v\fP, \fB\-g\fP, \-\fB\-c\fP, \fB\-t\fP,
+\fB\-k\fP, \fB\-R\fP and \-\fB\-.\fP are specific to \fIack\fP and
 not handed down to any transformation.
 The matching is performed in the order in which the entries
 appear in the definition.
@@ -249,11 +248,11 @@ replaced by the characters matched by
 the * in the expression.
 The right hand part is also subject to variable replacement.
 The variable will probably be used in the program arguments.
-The \fB-l\fP flags are special,
+The \fB\-l\fP flags are special,
 the order in which they are presented to \fIack\fP must be
 preserved.
 The identifier LNAME is used in conjunction with the scanning of
-\fB-l\fP flags.
+\fB\-l\fP flags.
 The value assigned to LNAME is used to replace the flag.
 The example further on shows the use all this.
 .IP \fIargs\fP
@@ -261,39 +260,51 @@ The example further on shows the use all this.
 The keyword is followed by the program call arguments.
 It is subject to backslashing, variable replacement, expression
 replacement, line splitting and IO replacement.
-The variables assigned to by \fImapflags\P will probably be
+The variables assigned to by \fImapflags\fP will probably be
 used here.
 The flags not recognized by \fIack\fP or any of the transformations
 are passed to the linker and inserted before all other arguments.
-.IP \fIprop\fB
+.IP \fIstdin\fP
 .br
-This -~optional~- keyword is followed by a sequence of options,
-each option is indicated by one character
-signifying a special property of the transformation.
+This keyword indicates that the transformation reads from standard input.
+.IP \fIstdout\fP
+.br
+This keyword indicates that the transformation writes on standard output.
+.IP \fIoptimizer\fP
+.br
+This keyword indicates that this transformation is an optimizer.
+.IP \fIlinker\fP
+.br
+This keyword indicates that this transformation is the linker.
+.IP \fIcombiner\fP
+.br
+This keyword indicates that this transformation is a combiner. A combiner
+is a program combining several files into one, but is not a linker.
+An example of a combiner is the global optimizer.
+.IP \fIprep\fP
+.br
+This \-~optional~\- keyword is followed an option indicating its relation
+to the preprocessor.
 The possible options are:
 .DS X
-   <            the input file will be read from standard input
-   >            the output file will be written on standard output
-   p            the input files must be preprocessed
-   m            the input files must be preprocessed when starting with #
-   O            this transformation is an optimizer and may be skipped
-   P            this transformation is the preprocessor
-   C            this transformation is the linker
+  always	the input files must be preprocessed
+  cond	the input files must be preprocessed when starting with #
+  is	this transformation is the preprocessor
 .DE
 .IP \fIrts\fP
 .br
-This -~optional~- keyword indicates that the rest of the line must be
+This \-~optional~\- keyword indicates that the rest of the line must be
 used to set the variable RTS, if it was not already set.
 Thus the variable RTS is set by the first transformation
 executed which such a property or as a result from \fIack\fP's program
-call name (acc, cc, apc or pc) or by the \fB-.suffix\fP flag.
+call name (acc, cc, apc or pc) or by the \fB\-.suffix\fP flag.
 .IP \fIneed\fP
 .br
-This -~optional~- keyword indicates that the rest of the line must be
+This \-~optional~\- keyword indicates that the rest of the line must be
 concatenated to the NEEDS variable.
 This is done once for every transformation used or indicated
 by one of the program call names mentioned above or indicated
-by the \fB-.suffix\fP flag.
+by the \fB\-.suffix\fP flag.
 .br
 .nr PD 1v
 .NH
@@ -302,119 +313,118 @@ Conventions used in description files
 \fIAck\fP reads two description files.
 A few of the variables defined in the machine specific file
 are used by the descriptions of the front-ends.
-Other variables, set by \fack\fB, are of use to all
+Other variables, set by \fIack\fP, are of use to all
 transformations.
 .PP
 \fIAck\fP sets the variable EM to the home directory of the
 Amsterdam Compiler Kit.
 The variable SOURCE is set to the name of the argument that is currently
 being massaged, this is usefull for debugging.
+The variable SUFFIX is set to the suffix of the argument that is
+currently being massaged.
 .br
 The variable M indicates the
-directory in mach/{M}/lib/tail_..... and NAME is the string to
-be defined by the preprocessor with -D{NAME}.
+directory in lib/{M}/tail_..... and NAME is the string to
+be defined by the preprocessor with \-D{NAME}.
 The definitions of {w}, {s}, {l}, {d}, {f} and {p} indicate
 EM_WSIZE, EM_SSIZE, EM_LSIZE, EM_DSIZE, EM_FSIZE and EM_PSIZE
 respectively.
 .br
-The variable INCLUDES is used as the last argument to \fIcpp\fP,
-it is currently used to add the directory {EM}/include to
+The variable INCLUDES is used as the last argument to \fIcpp\fP.
+It is used to add directories to
 the list of directories containing #include files.
-{EM}/include contains a few files used by the library routines
-for part III from the
-.UX
-manual.
-These routines are included in the kit.
 .PP
 The variables HEAD, TAIL and RTS are set by \fIack\fP and used
 to compose the arguments for the linker.
 .NH
 Example
-.sp 1
-description for front-end
+.PP
+Description for front-end
 .DS X
-name cpp                        # the C-preprocessor
-        # no from, it's governed by the P property
-        to .i                   # result files have suffix i
-        program {EM}/lib/cpp    # pathname of loadfile
-        mapflag -I* CPP_F={CPP_F?} -I*          # grab -I.. -U.. and
-        mapflag -U* CPP_F={CPP_F?} -U*          # -D.. to use as arguments
-        mapflag -D* CPP_F={CPP_F?} -D*          # in the variable CPP_F
-        args {CPP_F?} {INCLUDES?} -D{NAME} -DEM_WSIZE={w} -DEM_PSIZE={p} \
-DEM_SSIZE={s} -DEM_LSIZE={l} -DEM_FSIZE={f} -DEM_DSIZE={d} <
-                                # The arguments are: first the -[IUD]...
-                                #  then the include dir's for this machine
-                                #  then the NAME and size valeus finally
-                                #  followed by the input file name
-        prop >P                 # Output on stdout, is preprocessor
+.ta 4n 40n
+name cpp	# the C-preprocessor
+		# no from, it's governed by the P property
+	to .i	# result files have suffix i
+	program {EM}/lib/cpp	# pathname of loadfile
+	mapflag \-I* CPP_F={CPP_F?} \-I*	# grab \-I.. \-U.. and
+	mapflag \-U* CPP_F={CPP_F?} \-U*	# \-D.. to use as arguments
+	mapflag \-D* CPP_F={CPP_F?} \-D*	# in the variable CPP_F
+	args {CPP_F?} {INCLUDES?} \-D{NAME} \-DEM_WSIZE={w} \-DEM_PSIZE={p} \e
+	    \-DEM_SSIZE={s} \-DEM_LSIZE={l} \-DEM_FSIZE={f} \-DEM_DSIZE={d} <
+		# The arguments are: first the \-[IUD]...
+		#  then the include dir's for this machine
+		#  then the NAME and size valeus finally
+		#  followed by the input file name
+	stdout	# Output on stdout
+	prep is	# Is preprocessor
 end
-name cem                        # the C-compiler proper
-        from .c                 # used for files with suffix .c
-        to .k                   # produces compact code files
-        program {EM}/lib/em_cem # pathname of loadfile
-        mapflag -p CEM_F={CEM_F?} -Xp   # pass -p as -Xp to cem
-        mapflag -L CEM_F={CEM_F?} -l    # pass -L as -l to cem
-        args -Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?}
-                                # the arguments are the object sizes in
-                                # the -V... flag and possibly -l and -Xp
-        prop <>p                # input on stdin, output on stdout, use cpp
-        rts .c                  # use the C run-time system
-        need .c                 # use the C libraries
+name cem	# the C-compiler proper
+	from .c	# used for files with suffix .c
+	to .k	# produces compact code files
+	program {EM}/lib/em_cem	# pathname of loadfile
+	mapflag \-p CEM_F={CEM_F?} \-Xp	# pass \-p as \-Xp to cem
+	mapflag \-L CEM_F={CEM_F?} \-l	# pass \-L as \-l to cem
+	args \-Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?}
+		# the arguments are the object sizes in
+		# the \-V... flag and possibly \-l and \-Xp
+	stdin	# input from stdin
+	stdout	# output on stdout
+	prep always	# use cpp
+	rts .c	# use the C run-time system
+	need .c	# use the C libraries
 end
-name decode                     # make human readable files from compact code
-        from .k.m               # accept files with suffix .k or .m
-        to .e                   # produce .e files
-        program {EM}/lib/em_decode      # pathname of loadfile
-        args <                  # the input file name is the only argument
-        prop >                  # the output comes on stdout
+name decode	# make human readable files from compact code
+	from .k.m	# accept files with suffix .k or .m
+	to .e	# produce .e files
+	program {EM}/lib/em_decode	# pathname of loadfile
+	args <	# the input file name is the only argument
+	stdout	# the output comes on stdout
 end
 .DE

 .DS X
+.ta 4n 40n
 Example of a backend, in this case the EM assembler/loader.

-var w=2                         # wordsize 2
-var p=2                         # pointersize 2
-var s=2                         # short size 2
-var l=4                         # long size 4
-var f=4                         # float size 4
-var d=8                         # double size 8
-var M=int                       # Unused in this example
-var NAME=int22                  # for cpp (NAME=int results in #define int 1)
-var LIB=mach/int/lib/tail_      # part of file name for libraries
-var RT=mach/int/lib/head_       # part of file name for run-time startoff
-var SIZE_FLAG=-sm               # default internal table size flag
-var INCLUDES=-I{EM}/include     # use {EM}/include for #include files
-name asld                       # Assembler/loader
-        from .k.m.a             # accepts compact code and archives
-        to e.out                # output file name
-        program {EM}/lib/em_ass         # load file pathname
-        mapflag -l* LNAME={EM}/{LIB}*   # e.g. -ly becomes
-                                        #   {EM}/mach/int/lib/tail_y
-        mapflag -+* ASS_F={ASS_F?} -+*  # recognize -+ and --
-        mapflag --* ASS_F={ASS_F?} --*
-        mapflag -s* SIZE_FLAG=-s*       # overwrite old value of SIZE_FLAG
-        args {SIZE_FLAG} \
-                ({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) -o > < \
-                (.p:{TAIL}={EM}/{LIB}pc) \
-                (.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \
-                (.c.p:{TAIL}={EM}/{LIB}mon)
-                # -s[sml] must be first argument
-                # the next line contains the choice for head_cc or head_pc
-                # and the specification of in- and output.
-                # the last three args lines choose libraries
-        prop C  # This is the final stage
+var w=2	# wordsize 2
+var p=2	# pointersize 2
+var s=2	# short size 2
+var l=4	# long size 4
+var f=4	# float size 4
+var d=8	# dou<6F><75>XY<17>H<EFBFBD>\<5C>و<19>[H<1B>e startoff
+var SIZE_FLAG=\-sm	# default internal table size flag
+var INCLUDES=\-I{EM}/include	# use {EM}/include for #include files
+name asld	# Assembler/loader
+	from .k.m.a	# accepts compact code and archives
+	to e.out	# output file name
+	program {EM}/lib/em_ass	# load file pathname
+	mapflag \-l* LNAME={EM}/{LIB}*	# e.g. \-ly becomes
+		#	{EM}/mach/int/lib/tail_y
+	mapflag \-+* ASS_F={ASS_F?} \-+*  # recognize \-+ and \-\-
+	mapflag \-\-* ASS_F={ASS_F?} \-\-*
+	mapflag \-s* SIZE_FLAG=\-s*	# overwrite old value of SIZE_FLAG
+	args {SIZE_FLAG} \e
+	    ({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) \-o > < \e
+	    (.p:{TAIL}={EM}/{LIB}pc) \e
+	    (.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \e
+	    (.c.p:{TAIL}={EM}/{LIB}mon)
+		# \-s[sml] must be first argument
+		# the next line contains the choice for head_cc or head_pc
+		# and the specification of in- and output.
+		# the last three args lines choose libraries
+	linker
 end
 .DE

-The command "ack -mint -v -v -I../h -L -ly prog.c"
- would result in the following
+The command \fIack \-mint \-v \-v \-I../h \-L \-ly prog.c\fP
+would result in the following
 calls (with exec(II)):
 .DS X
-1)  /lib/cpp -I../h -I/usr/em/include -Dint22 -DEM_WSIZE=2 -DEM_PSIZE=2
-      -DEM_SSIZE=2 -DEM_LSIZE=4 -DEM_FSIZE=4 -DEM_DSIZE=8 prog.c
-2)  /usr/em/lib/em_cem -Vw2i2p2f4s2l4d8 -l
-3)  /usr/em/lib/em_ass -sm /usr/em/mach/int/lib/head_cc -o e.out prog.k
-      /usr/em/mach/int/lib/tail_y /usr/em/mach/int/lib/tail_cc.1s
-      /usr/em/mach/int/lib/tail_cc.2g /usr/em/mach/int/lib/tail_mon
+.ta 4n
+1)	/lib/cpp \-I../h \-I/usr/em/include \-Dint22 \-DEM_WSIZE=2 \-DEM_PSIZE=2 \e
+	    \-DEM_SSIZE=2 \-DEM_LSIZE=4 \-DEM_FSIZE=4 \-DEM_DSIZE=8 prog.c
+2)	/usr/em/lib/em_cem \-Vw2i2p2f4s2l4d8 \-l
+3)	/usr/em/lib/em_ass \-sm /usr/em/mach/int/lib/head_cc \-o e.out prog.k
+	/usr/em/mach/int/lib/tail_y /usr/em/mach/int/lib/tail_cc.1s
+	/usr/em/mach/int/lib/tail_cc.2g /usr/em/mach/int/lib/tail_mon
 .DE
--- a/doc/cg.doc
+++ b/doc/cg.doc
@@ -1,5 +1,6 @@
 .\" $Header$
 .RP
+.ND Nov 1984
 .TL
 The table driven code generator from 
 .br
@@ -17,6 +18,11 @@ The Amsterdam Compiler Kit is such a collection of tools.
 This document provides a description of the internal workings
 of the table driven code generator in the Amsterdam Compiler Kit,
 and a description of syntax and semantics of the driving table.
+.PP
+>>>  NOTE  <<<
+.br
+This document pertains to the \fBold\fP code generator.  Refer to the
+"Second Revised Edition" for the new code generator.
 .AE
 .NH 1
 Introduction
@@ -197,10 +203,10 @@ This is given as
 .DS
 FORMAT = string
 .DE
-The default for string is "%d" or "%ld" depending on the wordsize of 
-the machine. For example on the PDP 11 one can use
+The default for string is "%ld".
+For example on the PDP 11 one can use
 .DS
-FORMAT= "0%o"
+FORMAT= "0%lo"
 .DE
 to satisfy the old UNIX assembler that reads octal unless followed by
 a period, and the ACK assembler that follows C conventions.
@@ -974,7 +980,7 @@ and their range depends on the machine at hand.
 The type 'int' is used for things like labelcounters that won't require
 more than 16 bits precision.
 The type 'word' is used among others to assemble datawords and
-is of type 'long' if EM_WSIZE>2.
+is of type 'long'.
 The type 'full' is used for addresses and is of type 'long' if
 EM_WSIZE>2 or EM_PSIZE>2.
 .PP
@@ -1115,13 +1121,13 @@ Example mach.h for the PDP-11

 #define cst_fmt		"$%d."
 #define off_fmt		"%d."
-#define ilb_fmt		"I%02x%x"
+#define ilb_fmt		"I%x_%x"
 #define dlb_fmt		"_%d"
 #define	hol_fmt		"hol%d"

-#define hol_off		"%d.+hol%d"
+#define hol_off		"%ld.+hol%d"

-#define con_cst(x)	fprintf(codefile,"%d.\en",x)
+#define con_cst(x)	fprintf(codefile,"%ld.\en",x)
 #define con_ilb(x)	fprintf(codefile,"%s\en",x)
 #define con_dlb(x)	fprintf(codefile,"%s\en",x)

--- a/doc/cref.doc
+++ b/doc/cref.doc
@@ -1,5 +1,4 @@
 .\" $Header$
-.ll 72
 .nr ID 4
 .de hd
 'sp 2
--- a/doc/crefman.doc
+++ b/doc/crefman.doc
@@ -0,0 +1,627 @@
+.EQ
+delim $$
+.EN
+.RP
+.TL
+ACK/CEM Compiler
+.br
+Reference Manual
+.AU
+Erik H. Baalbergen
+.AI
+Department of Mathematics and Computer Science
+Vrije Universiteit
+Amsterdam
+The Netherlands
+.AB no
+.AE
+.NH
+C Language
+.PP
+This section discusses the extensions to and deviations from the C language,
+as described in [1].
+The issues are numbered according to the reference manual.
+.SH
+2.2 Identifiers
+.PP
+Upper and lower case letters are different.
+The number of significant letters
+is 32 by default, but may be set to another value using the \fB\-M\fP option.
+The identifier length should be set according to the rest of the compilation
+programs.
+.SH
+2.3 Keywords
+.SH
+\f5asm\fP
+.PP
+The keyword \f5asm\fP
+is recognized.
+However, the statement
+.DS
+.ft 5
+asm(string);
+.ft R
+.DE
+is skipped, while a warning is given.
+.SH
+\f5enum\fP
+.PP
+The \f5enum\fP keyword is recognized and interpreted.
+.SH
+\f5entry\fP, \f5fortran\fP
+.PP
+The words \f5entry\fP and \f5fortran\fP
+are reserved under the restricted option.
+The words are not interpreted by the compiler.
+.SH
+2.4.1 Integer Constants
+.PP
+An octal or hex constant which is less than or equal to the largest unsigned
+(target) machine integer is taken to be \f5unsigned\fP.
+An octal or hex constant which exceeds the largest unsigned (target) machine
+integer is taken to be \f5long\fP.
+.SH
+2.4.3 Character Constants
+.PP
+A character constant is a sequence of 1 up to \f5sizeof(int)\fP characters
+enclosed in single quotes.
+The value of a character constant '$c sub 1 c sub 2 ... c sub n$'
+is $d sub n + M \(mu d sub {n - 1} + ... + M sup {n - 1} \(mu d sub 2 + M sup n \(mu d sub 1$,
+where M is 1 + maximum unsigned number representable in an \f5unsigned char\fP,
+and $d sub i$ is the signed value (ASCII)
+of character $c sub i$.
+.SH
+2.4.4 Floating Constants
+.PP
+The compiler does not support compile-time floating point arithmetic.
+.SH
+2.6 Hardware characteristics
+.PP
+The compiler is capable of producing EM code for machines with the following
+properties
+.IP \(bu
+a \f5char\fP is 8 bits
+.IP \(bu
+the size of \f5int\fP is equal to the word size
+.IP \(bu
+the size of \f5short\fP may not exceed the size of \f5int\fP
+.IP \(bu
+the size of \f5int\fP may not exceed the size of \f5long\fP
+.IP \(bu
+the size of pointers is equal to the size of either \f5short\fP, \f5int\fP
+or \f5long\fP
+.LP
+.SH
+4 What's in a name?
+.SH
+\f5char\fP
+.PP
+Objects of type \f5char\fP are taken to be signed.
+The combination \f5unsigned char\fP is legal.
+.SH
+\f5unsigned\fP
+.PP
+The type combinations \f5unsigned char\fP, \f5unsigned short\fP and
+\f5unsigned long\fP are supported.
+.SH
+\f5enum\fP
+.PP
+The data type \f5enum\fP is implemented as described 
+in \fIRecent Changes to C\fP (see appendix A).
+.I Cem
+treats enumeration variables as if they were \f5int\fP.
+.SH
+\f5void\fP
+.PP
+Type \f5void\fP is implemented.
+The type specifies an empty set of values, which takes no storage space.
+.SH
+\fRFundamental types\fP
+.PP
+The names of the fundamental types can be redefined by the user, using
+\f5typedef\fP.
+.SH
+7 Expressions
+.PP
+The order of evaluation of expressions depends on the complexity of the
+subexpressions.
+In case of commutative operations, the most complex subexpression is
+evaluated first.
+Parameter lists are evaluated from right to left.
+.SH
+7.2 Unary operators
+.PP
+The type of a \f5sizeof\fP expression is \f5unsigned int\fP.
+.SH
+7.13 Conditional operator
+.PP
+Both the second and the third expression in a conditional expression may
+include assignment operators.
+They may be structs or unions.
+.SH
+7.14 Assignment operators
+.PP
+Structures may be assigned, passed as arguments to functions, and returned
+by functions.
+The types of operands taking part must be the same.
+.SH
+8.2 Type specifiers
+.PP
+The combinations \f5unsigned char\fP, \f5unsigned short\fP
+and \f5unsigned long\fP are implemented.
+.SH
+8.5 Structure and union declarations
+.PP
+Fields of any integral type, either signed or unsigned,
+are supported, as long as the type fits in a word on the target machine.
+.PP
+Fields are left adjusted by default; the first field is put into the left
+part of a word, the next one on the right side of the first one, etc.
+The \f5-Vr\fP option in the call of the compiler
+causes fields to be right adjusted within a machine word.
+.PP
+The tags of structs and unions occupy a different name space from that of 
+variables and that of member names.
+.SH
+9.7 Switch statement
+.PP
+The type of \fIexpression\fP in
+.DS
+.ft 5
+\f5switch (\fP\fIexpression\fP\f5)\fP \fIstatement\fP
+.ft
+.DE
+must be integral.
+A warning is given under the restricted option if the type is \f5long\fP.
+.SH
+10 External definitions
+.PP
+See [4] for a discussion on this complicated issue.
+.SH
+10.1 External function definitions
+.PP
+Structures may be passed as arguments to functions, and returned
+by functions.
+.SH
+11.1 Lexical scope
+.PP
+Typedef names may be redeclared like any other variable name; the ice mentioned
+in \(sc11.1 is walked correctly.
+.SH
+12 Compiler control lines
+.PP
+Lines which do not occur within comment, and with \f5#\fP as first
+character, are interpreted as compiler control line.
+There may be an arbitrary number of spaces, tabs and comments (collectively
+referred as \fIwhite space\fP) following the \f5#\fP.
+Comments may contain newline characters.
+Control lines with only white space between the \f5#\fP and the line separator
+are skipped.
+.PP
+The #\f5include\fP, #\f5ifdef\fP, #\f5ifndef\fP, #\f5undef\fP, #\f5else\fP and
+#\f5endif\fP control lines and line directives consist of a fixed number of
+arguments.
+The list of arguments may be followed an arbitrary sequence of characters,
+in which comment is interpreted as such.
+(I.e., the text between \f5/*\fP and \f5*/\fP is skipped, regardless of
+newlines; note that commented-out lines beginning with \f5#\fP are not
+considered to be control lines.)
+.SH
+12.1 Token replacement
+.PP
+The replacement text of macros is taken to be a string of characters, in which
+an identifier may stand for a formal parameter, and in which comment is
+interpreted as such.
+Comments and newline characters, preceeded by a backslash, in the replacement
+text are replaced by a space character.
+.PP
+The actual parameters of a macro are considered tokens and are
+balanced with regard to \f5()\fP, \f5{}\fP and \f5[]\fP.
+This prevents the use of macros like
+.DS
+.ft 5
+CTL([)
+.ft
+.DE
+.PP
+Formal parameters of a macro must have unique names within the formal-parameter
+list of that macro.
+.PP
+A message is given at the definition of a macro if the macro has 
+already been #\f5defined\fP, while the number of formal parameters differ or
+the replacement texts are not equal (apart from leading and trailing
+white space).
+.PP
+Recursive use of macros is detected by the compiler.
+.PP
+Standard #\f5defined\fP macros are
+.DS
+\f5__FILE__\fP  name of current input file as string constant
+\f5__DATE__\fP  curent date as string constant; e.g. \f5"Tue Wed  2 14:45:23 1986"\fP
+\f5__LINE__\fP  current line number as an integer
+.DE
+.PP
+No message is given if \fIidentifier\fP is not known in
+.DS
+.ft 5
+#undef \fIidentifier\fP
+.ft
+.DE
+.SH
+12.2 File inclusion
+.PP
+A newline character is appended to each file which is included.
+.SH
+12.3 Conditional compilation
+.PP
+The #\f5if\fP, #\f5ifdef\fP and #\f5ifndef\fP control lines may be followed
+by an arbitrary number of
+.DS
+.ft 5
+#elif \fIconstant-expression\fP
+.ft
+.DE
+control lines, before the corresponding #\f5else\fP or #\f5endif\fP
+is encountered.
+The construct
+.DS
+.ft 5
+#elif \fIconstant-expression\fP
+some text
+#endif /* corresponding to #elif */
+.ft
+.DE
+is equivalent to
+.DS
+.ft 5
+#else
+#if \fIconstant-expression\fP
+some text
+#endif /* corresponding to #if */
+#endif /* corresponding to #else */
+.ft
+.DE
+.PP
+The \fIconstant-expression\fP in #\f5if\fP and #\f5elif\fP control lines
+may contain the construction
+.DS
+.ft 5
+defined(\fIidentifier\fP)
+.ft
+.DE
+which is replaced by \f51\fP, if \fIidentifier\fP has been #\f5defined\fP,
+and by \f50\fP, if not.
+.PP
+Comments in skipped lines are interpreted as such.
+.SH
+12.4 Line control
+.PP
+Line directives may occur in the following forms:
+.DS
+.ft 5
+#line \fIconstant\fP
+#line \fIconstant\fP "\fIfilename\fP"
+#\fIconstant\fP
+#\fIconstant\fP "\fIfilename\fP"
+.ft
+.DE
+Note that \fIfilename\fP is enclosed in double quotes.
+.SH
+14.2 Functions
+.PP
+If a pointer to a function is called, the function the pointer points to
+is called instead.
+.SH
+15 Constant expressions
+.PP
+The compiler distinguishes the following types of integral constant expressions
+.IP \(bu
+field-width specifier
+.IP \(bu
+case-entry specifier
+.IP \(bu
+array-size specifier
+.IP \(bu
+global variable initialization value
+.IP \(bu
+enum-value specifier
+.IP \(bu
+truth value in \f5#if\fP control line
+.LP
+.PP
+Constant integral expressions are compile-time evaluated while an effort
+is made to report overflow.
+Constant floating expressions are not compile-time evaluated.
+.NH
+Compiler flags
+.IP \fB\-C\fR
+Run the preprocessor stand-alone while maintaining the comments.
+Line directives are produced whenever needed.
+.IP \fB\-D\fP\fIname\fP=\fIstring-of-characters\fP
+.br
+Define \fIname\fR as macro with \fIstring-of-characters\fR as
+replacement text.
+.IP \fB\-D\fP\fIname\fP
+.br
+Equal to \fB\-D\fP\fIname\fP\fB=1\fP.
+.IP \fB\-E\fP
+Run the preprocessor stand alone, i.e.,
+list the sequence of input tokens and delete any comments.
+Line directives are produced whenever needed.
+.IP \fB\-I\fIpath\fR
+.br
+Prepend \fIpath\fR to the list of include directories.
+To put the directories "include", "sys/h" and "util/h" into the
+include directory list in that order, the user has to specify
+.DS
+.ft 5
+-Iinclude -Isys/h -Iutil/h
+.ft R
+.DE
+An empty \fIpath\fP causes the standard include
+directory (usually \f5/usr/include\fP) to be forgotten.
+.IP \fB\-M\fP\fIn\fP
+.br
+Set maximum significant identifier length to \fIn\fP.
+.IP \fB\-n\fP
+Suppress EM register messages.
+The user-declared variables are not stored into registers on the target
+machine.
+.IP \fB\-p\fP
+Generate the EM \fBfil\fP and \fBlin\fP instructions in order to enable
+an interpreter to keep track of the current location in the source code.
+.IP \fB\-P\fP
+Equivalent with \fB\-E\fP, but without line directives.
+.IP \fB\-R\fP
+Interpret the input as restricted C (according to the language as 
+described in [1]).
+.IP \fB\-T\fP\fIpath\fP
+.br
+Create temporary files, if necessary, in directory \fIpath\fP.
+.IP \fB\-U\fP\fIname\fP
+.br
+Get rid of the compiler-predefined macro \fIname\fP, i.e.,
+consider
+.DS
+.ft 5
+#undef \fIname\fP
+.ft R
+.DE
+to appear in the beginning of the file.
+.IP \fB\-V\fIcm\fR.\fIn\fR,\ \fB\-V\fIcm\fR.\fIncm\fR.\fIn\fR\ ...
+.br
+Set the size and alignment requirements.
+The letter \fIc\fR indicates the simple type, which is one of
+\fBs\fR(short), \fBi\fR(int), \fBl\fR(long), \fBf\fR(float), \fBd\fR(double)
+or \fBp\fR(pointer).
+If \fIc\fR is \fBS\fP or \fBU\fP, then \fIn\fP is taken to be the initial
+alignment of structs or unions, respectively.
+The effective alignment of a struct or union is the least common multiple
+of the initial struct/union alignment and the alignments of its members.
+The \fIm\fR parameter can be used to specify the length of the type (in bytes)
+and the \fIn\fR parameter for the alignment of that type.
+Absence of \fIm\fR or \fIn\fR causes the default value to be retained.
+To specify that the bitfields should be right adjusted instead of the
+default left adjustment, specify \fBr\fR as \fIc\fR parameter.
+.IP \fB\-w\fR
+Suppress warning messages
+.IP \fB\-\-\fIcharacter\fR
+.br
+Set debug-flag \fIcharacter\fP.
+This enables some special features offered by a debug and develop version of
+the compiler.
+Some particular flags may be recognized, others may have surprising effects.
+.RS
+.IP \fBd\fP
+Generate a dependency graph, reflecting the calling structure of functions.
+Lines of the form
+.DS
+.ft 5
+DFA: \fIcalling-function\fP: \fIcalled-function\fP
+.ft
+.DE
+are generated whenever a function call is encountered.
+.IP \fBf\fP
+Dump whole identifier table, including macros and reserved words.
+.IP \fBh\fP
+Supply hash-table statistics.
+.IP \fBi\fP
+Print names of included files.
+.IP \fBm\fP
+Supply statistics concerning the memory allocation.
+.IP \fBt\fP
+Dump table of identifiers.
+.IP \fBu\fP
+Generate extra statistics concerning the predefined types and identifiers.
+Works in combination with \fBf\fP or \fBt\fP.
+.IP \fBx\fP
+Print expression trees in human-readable format.
+.RE
+.LP
+.SH
+References
+.IP [1]
+Brian W. Kernighan, Dennis M. Ritchie,
+.I
+The C Programming Language
+.R
+.IP [2]
+L. Rosler,
+.I
+Draft Proposed Standard - Programming Language C,
+.R
+ANSI X3J11 Language Subcommittee
+.IP [3]
+Erik H. Baalbergen, Dick Grune, Maarten Waage,
+.I
+The CEM Compiler,
+.R
+Informatica Manual IM-4, Dept. of Mathematics and Computer Science, Vrije
+Universiteit, Amsterdam, The Netherlands
+.IP [4]
+Erik H. Baalbergen,
+.I
+Modeling global declarations in C,
+.R
+internal paper
+.LP
+.bp
+.SH
+Appendix A - Enumeration Type
+.PP
+The syntax is
+.sp
+.RS
+.I enum-specifier :
+.RS
+\&\f5enum\fP { \fIenum-list\fP }
+.br
+\&\f5enum\fP \fIidentifier\fP { \fIenum-list\fP }
+.br
+\&\f5enum\fP \fIidentifier\fP
+.RE
+.sp
+\&\fIenum-list\fP :
+.RS
+\&\fIenumerator\fP
+.br
+\&\fIenum-list\fP , \fIenumerator\fP
+.RE
+.sp
+\&\fIenumerator\fP :
+.RS
+\&\fIidentifier\fP
+.br
+\&\fIidentifier\fP = \fIconstant-expression\fP
+.RE
+.sp
+.RE
+The identifier has the same role as the structure tag in a struct specification.
+It names a particular enumeration type.
+.PP
+The identifiers in the enum-list are declared as constants, and may appear
+whenever constants are required.
+If no enumerators with
+.B = 
+appear, then the values of the constants begin at 0 and increase by 1 as the
+declaration is read from left to right.
+An enumerator with
+.B =
+gives the associated identifier the value indicated; subsequent identifiers 
+continue the progression from the assigned value.
+.PP
+Enumeration tags and constants must all be distinct, and, unlike structure
+tags and members, are drawn from the same set as ordinary identifiers.
+.PP
+Objects of a given enumeration type are regarded as having a type distinct
+from objects of all other types.
+.bp
+.SH
+Appendix B:  C grammar in LL(1) form
+.PP
+The \fBbold-faced\fP and \fIitalicized\fP tokens represent terminal symbols.
+.vs 16
+.nf
+\fBexternal definitions\fP
+program:  external-definition*
+external-definition:  ext-decl-specifiers [declarator [function  |  non-function]  |  '\fB;\fP']  |  asm-statement
+ext-decl-specifiers:  decl-specifiers?
+non-function:  initializer? ['\fB,\fP' init-declarator]* '\fB;\fP'
+function:  declaration* compound-statement
+.sp 1
+\fBdeclarations\fP
+declaration:  decl-specifiers init-declarator-list? '\fB;\fP'
+decl-specifiers:  other-specifier+ [single-type-specifier other-specifier*]?  |  single-type-specifier other-specifier*
+other-specifier:  \fBauto\fP  |  \fBstatic\fP  |  \fBextern\fP  |  \fBtypedef\fP  |  \fBregister\fP  |  \fBshort\fP  |  \fBlong\fP  |  \fBunsigned\fP
+type-specifier:  decl-specifiers
+single-type-specifier:  \fItype-identifier\fP  |  struct-or-union-specifier  |  enum-specifier
+init-declarator-list:  init-declarator ['\fB,\fP' init-declarator]*
+init-declarator:  declarator initializer?
+declarator:  primary-declarator ['\fB(\fP' formal-list ? '\fB)\fP'  |  arrayer]*  |  '\fB*\fP' declarator
+primary-declarator:  identifier  |  '\fB(\fP' declarator '\fB)\fP'
+arrayer:  '\fB[\fP' constant-expression? '\fB]\fP'
+formal-list:  formal ['\fB,\fP' formal]*
+formal:  identifier
+enum-specifier:  \fBenum\fP [enumerator-pack  |  identifier enumerator-pack?]
+enumerator-pack:  '\fB{\fP' enumerator ['\fB,\fP' enumerator]* '\fB,\fP'? '\fB}\fP'
+enumerator:  identifier ['\fB=\fP' constant-expression]?
+struct-or-union-specifier:  [ \fBstruct\fP  |  \fBunion\fP] [ struct-declaration-pack  |  identifier struct-declaration-pack?]
+struct-declaration-pack:  '\fB{\fP' struct-declaration+ '\fB}\fP'
+struct-declaration:  type-specifier struct-declarator-list '\fB;\fP'?
+struct-declarator-list:  struct-declarator ['\fB,\fP' struct-declarator]*
+struct-declarator:  declarator bit-expression?  |  bit-expression
+bit-expression:  '\fB:\fP' constant-expression
+initializer:  '\fB=\fP'? initial-value
+cast:  '\fB(\fP' type-specifier abstract-declarator '\fB)\fP'
+abstract-declarator:  primary-abstract-declarator ['\fB(\fP' '\fB)\fP'  |  arrayer]*  |  '\fB*\fP' abstract-declarator
+primary-abstract-declarator:  ['\fB(\fP' abstract-declarator '\fB)\fP']?
+.sp 1
+\fBstatements\fP
+statement:
+	 expression-statement
+	| label '\fB:\fP' statement
+	| compound-statement
+	| if-statement
+	| while-statement
+	| do-statement
+	| for-statement
+	| switch-statement
+	| case-statement
+	| default-statement
+	| break-statement
+	| continue-statement
+	| return-statement
+	| jump
+	| '\fB;\fP'
+	| asm-statement
+	;
+expression-statement:  expression '\fB;\fP'
+label:  identifier
+if-statement:  \fBif\fP '\fB(\fP' expression '\fB)\fP' statement [\fBelse\fP statement]?
+while-statement:  \fBwhile\fP '\fB(\fP' expression '\fB)\fP' statement
+do-statement:  \fBdo\fP statement \fBwhile\fP '\fB(\fP' expression '\fB)\fP' '\fB;\fP'
+for-statement:  \fBfor\fP '\fB(\fP' expression? '\fB;\fP' expression? '\fB;\fP' expression? '\fB)\fP' statement
+switch-statement:  \fBswitch\fP '\fB(\fP' expression '\fB)\fP' statement
+case-statement:  \fBcase\fP constant-expression '\fB:\fP' statement
+default-statement:  \fBdefault\fP '\fB:\fP' statement
+break-statement:  \fBbreak\fP '\fB;\fP'
+continue-statement:  \fBcontinue\fP '\fB;\fP'
+return-statement:  \fBreturn\fP expression? '\fB;\fP'
+jump:  \fBgoto\fP identifier '\fB;\fP'
+compound-statement:  '\fB{\fP' declaration* statement* '\fB}\fP'
+asm-statement:  \fBasm\fP '\fB(\fP' \fIstring\fP '\fB)\fP' '\fB;\fP'
+.sp 1
+\fBexpressions\fP
+initial-value:  assignment-expression  |  initial-value-pack
+initial-value-pack:  '\fB{\fP' initial-value-list '\fB}\fP'
+initial-value-list:  initial-value ['\fB,\fP' initial-value]* '\fB,\fP'?
+primary:  \fIidentifier\fP  |  constant  |  \fIstring\fP  |  '\fB(\fP' expression '\fB)\fP'
+secundary:  primary [index-pack  |  parameter-pack  |  selection]*
+index-pack:  '\fB[\fP' expression '\fB]\fP'
+parameter-pack:  '\fB(\fP' parameter-list? '\fB)\fP'
+selection:  ['\fB.\fP'  |  '\fB\->\fP'] identifier
+parameter-list:  assignment-expression ['\fB,\fP' assignment-expression]*
+postfixed:  secundary postop?
+unary:  cast unary  |  postfixed  |  unop unary  |  size-of
+size-of:  \fBsizeof\fP [cast  |  unary]
+binary-expression:  unary [binop binary-expression]*
+conditional-expression:  binary-expression ['\fB?\fP' expression '\fB:\fP' assignment-expression]?
+assignment-expression:  conditional-expression [asgnop assignment-expression]?
+expression:  assignment-expression ['\fB,\fP' assignment-expression]*
+unop:  '\fB*\fP'  |  '\fB&\fP'  |  '\fB\-\fP'  |  '\fB!\fP'  |  '\fB~ \fP'  |  '\fB++\fP'  |  '\fB\-\-\fP'
+postop:  '\fB++\fP'  |  '\fB\-\-\fP'
+multop:  '\fB*\fP'  |  '\fB/\fP'  |  '\fB%\fP'
+addop:  '\fB+\fP'  |  '\fB\-\fP'
+shiftop:  '\fB<<\fP'  |  '\fB>>\fP'
+relop:  '\fB<\fP'  |  '\fB>\fP'  |  '\fB<=\fP'  |  '\fB>=\fP'
+eqop:  '\fB==\fP'  |  '\fB!=\fP'
+arithop:  multop  |  addop  |  shiftop  |  '\fB&\fP'  |  '\fB^ \fP'  |  '\fB|\fP'
+binop:  arithop  |  relop  |  eqop  |  '\fB&&\fP'  |  '\fB||\fP'
+asgnop:  '\fB=\fP'  |  '\fB+\fP' '\fB=\fP'  |  '\fB\-\fP' '\fB=\fP'  |  '\fB*\fP' '\fB=\fP'  |  '\fB/\fP' '\fB=\fP'  |  '\fB%\fP' '\fB=\fP'
+	| '\fB<<\fP' '\fB=\fP'  |  '\fB>>\fP' '\fB=\fP'  |  '\fB&\fP' '\fB=\fP'  |  '\fB^ \fP' '\fB=\fP'  |  '\fB|\fP' '\fB=\fP'
+	| '\fB+=\fP'  |  '\fB\-=\fP'  |  '\fB*=\fP'  |  '\fB/=\fP'  |  '\fB%=\fP'
+	| '\fB<<=\fP'  |  '\fB>>=\fP'  |  '\fB&=\fP'  |  '\fB^=\fP'  |  '\fB|=\fP'
+constant:  \fIinteger\fP  |  \fIfloating\fP
+constant-expression:  assignment-expression
+identifier:  \fIidentifier\fP  |  \fItype-identifier\fP
+.fi
--- a/doc/ego/.distr
+++ b/doc/ego/.distr
@@ -0,0 +1,18 @@
+Makefile
+bo
+ca
+cf
+cj
+cs
+ic
+il
+intro
+lv
+ov
+ra
+refs.gen
+refs.opt
+refs.stat
+sp
+sr
+ud
--- a/doc/ego/Makefile
+++ b/doc/ego/Makefile
@@ -0,0 +1,52 @@
+REFS=-p refs.opt -p refs.stat -p refs.gen
+INTRO=intro/intro?
+OV=ov/ov?
+IC=ic/ic?
+CF=cf/cf?
+IL=il/il?
+SR=sr/sr?
+CS=cs/cs?
+SP=sp/sp?
+UD=ud/ud?
+LV=lv/lv?
+CJ=cj/cj?
+BO=bo/bo?
+RA=ra/ra?
+CA=ca/ca?
+EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \
+    $(UD) $(LV) $(RA) $(CA)
+REFER=refer
+
+../ego.doc:	$(EGO)
+	 $(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail > ../ego.doc
+
+ego.f:	$(EGO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | nroff -ms > ego.f
+intro.f:	$(INTRO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ov/head $(INTRO) intro/tail | nroff -ms > intro.f
+ov.f:	$(OV)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ov/head $(OV) intro/tail | nroff -ms > ov.f
+ic.f:	$(IC)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ic/head $(IC) intro/tail | nroff -ms > ic.f
+cf.f:	$(CF)
+	 $(REFER)  -sA+T -l4,2 $(REFS) cf/head $(CF) intro/tail | nroff -ms > cf.f
+il.f:	$(IL)
+	 $(REFER)  -sA+T -l4,2 $(REFS) il/head $(IL) intro/tail | nroff -ms > il.f
+sr.f:	$(SR)
+	 $(REFER)  -sA+T -l4,2 $(REFS) sr/head $(SR) intro/tail | nroff -ms > sr.f
+cs.f:	$(CS)
+	 $(REFER)	-sA+T -l4,2 $(REFS) cs/head $(CS) intro/tail | nroff -ms > cs.f
+sp.f:	$(SP)
+	 $(REFER)  -sA+T -l4,2 $(REFS) sp/head $(SP) intro/tail | nroff -ms > sp.f
+cj.f:	$(CJ)
+	 $(REFER)  -sA+T -l4,2 $(REFS) cj/head $(CJ) intro/tail | nroff -ms > cj.f
+bo.f:	$(BO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) bo/head $(BO) intro/tail | nroff -ms > bo.f
+ud.f:	$(UD)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ud/head $(UD) intro/tail | nroff -ms > ud.f
+lv.f:	$(LV)
+	 $(REFER)  -sA+T -l4,2 $(REFS) lv/head $(LV) intro/tail | nroff -ms > lv.f
+ra.f:	$(RA)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ra/head $(RA) intro/tail | nroff -ms > ra.f
+ca.f:	$(CA)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ca/head $(CA) intro/tail | nroff -ms > ca.f
--- a/doc/ego/bo/.distr
+++ b/doc/ego/bo/.distr
@@ -0,0 +1 @@
+bo1
--- a/doc/ego/bo/bo1
+++ b/doc/ego/bo/bo1
@@ -0,0 +1,151 @@
+.bp
+.NH 1
+Branch Optimization
+.NH 2
+Introduction
+.PP
+The Branch Optimization phase (BO) performs two related
+(branch) optimizations.
+.NH 3
+Fusion of basic blocks
+.PP
+If two basic blocks B1 and B2 have the following properties:
+.DS
+SUCC(B1) = {B2}
+PRED(B2) = {B1}
+.DE
+then B1 and B2 can be combined into one basic block.
+If B1 ends in an unconditional jump to the beginning of B2, this
+jump can be eliminated,
+hence saving a little execution time and object code size.
+This technique can be used to eliminate some deficiencies
+introduced by the front ends (for example, the "C" front end
+translates switch statements inefficiently due to its one pass nature).
+.NH 3
+While-loop optimization
+.PP
+The straightforward way to translate a while loop is to
+put the test for loop termination at the beginning of the loop.
+.DS
+while cond loop                  LAB1: Test cond
+   body of the loop     --->           Branch On False To LAB2
+end loop                               code for body of loop
+				       Branch To LAB1
+				 LAB2:
+
+Fig. 10.1 Example of Branch Optimization
+.DE
+If the condition fails at the Nth iteration, the following code
+gets executed (dynamically):
+.DS
+N   *  conditional branch (which fails N-1 times)
+N-1 *  unconditional branch
+N-1 *  body of the loop
+.DE
+An alternative translation is:
+.DS
+     Branch To LAB2
+LAB1:
+     code for body of loop
+LAB2:
+     Test cond
+     Branch On True To LAB1
+.DE
+This translation results in the following profile:
+.DS
+N   *  conditional branch (which succeeds N-1 times)
+1   *  unconditional branch
+N-1 *  body of the loop
+.DE
+So the second translation will be significantly faster if N >> 2.
+If N=2, execution time will be slightly increased.
+On the average, the program will be speeded up.
+Note that the code sizes of the two translations will be the same.
+.NH 2
+Implementation
+.PP
+The basic block fusion technique is implemented
+by traversing the control flow graph of a procedure,
+looking for basic blocks B with only one successor (S).
+If one is found, it is checked if S has only one predecessor
+(which has to be B).
+If so, the two basic blocks can in principle be combined.
+However, as one basic block will have to be moved,
+the textual order of the basic blocks will be altered.
+This reordering causes severe problems in the presence
+of conditional jumps.
+For example, if S ends in a conditional branch,
+the basic block that comes textually next to S must stay
+in that position.
+So the transformation in Fig. 10.2 is illegal.
+.DS
+LAB1: S1              LAB1: S1
+      BRA LAB2        S2
+      ...       -->   BEQ LAB3
+LAB2: S2              ...
+      BEQ LAB3        S3
+      S3
+
+Fig. 10.2 An illegal transformation of Branch Optimization
+.DE
+If B is moved towards S the same problem occurs if the block before B
+ends in a conditional jump.
+The problem could be solved by adding one extra branch,
+but this would reduce the gains of the optimization to zero.
+Hence the optimization will only be done if the block that
+follows S (in the textual order) is not a successor of S.
+This condition assures that S does not end in a conditional branch.
+The condition always holds for the code generated by the "C"
+front end for a switch statement.
+.PP
+After the transformation has been performed,
+some attributes of the basic blocks involved (such as successor and
+predecessor sets and immediate dominator) must be recomputed.
+.PP
+The while-loop technique is applied to one loop at a time.
+The list of basic blocks of the loop is traversed to find
+a block B that satisfies the following conditions:
+.IP 1.
+the textually next block to B is not part of the loop
+.IP 2.
+the last instruction of B is an unconditional branch;
+hence B has only one successor, say S
+.IP 3.
+the textually next block of B is a successor of S
+.IP 4.
+the last instruction of S is a conditional branch
+.LP
+If such a block B is found, the control flow graph is changed
+as depicted in Fig. 10.3.
+.DS
+       |				    |
+       |				    v
+       v				    |
+       |-----<------|			    ----->-----|
+   ____|____	    |				       |
+   |	   |	    |		    |-------|	       |
+   |  S1   |	    |		    |	    v	       |
+   |  Bcc  |	    |		    |	  ....	       |
+|--|	   |	    |		    |		       |
+|  ---------	    |		    |	----|----      |
+|		    |		    |	|	|      |
+|     ....	    ^		    |	|  S2	|      |
+|		    |		    |	|	|      |
+|   ---------	    |		    |	|	|      |
+v   |	    |	    |		    ^	---------      |
+|   |  S2   |	    |		    |	    |	       |
+|   | BRA   |	    |		    |	    |-----<-----
+|   |	    |	    |		    |	    v
+|   ---------	    |		    |	____|____
+|	|	    |		    |	|	|
+|	------>------		    |	|  S1	|
+|				    |	|  Bnn  |
+|-------|			    |	|	|
+	|			    |	----|----
+	v			    |	    |
+				    |----<--|
+					    |
+					    v
+
+Fig. 10.3 Transformation of the CFG by Branch Optimization
+.DE
--- a/doc/ego/ca/.distr
+++ b/doc/ego/ca/.distr
@@ -0,0 +1 @@
+ca1
--- a/doc/ego/ca/ca1
+++ b/doc/ego/ca/ca1
@@ -0,0 +1,75 @@
+.bp
+.NH 1
+Compact assembly generation
+.NH 2
+Introduction
+.PP
+The "Compact Assembly generation phase" (CA) transforms the
+intermediate code of the optimizer into EM code in
+Compact Assembly Language (CAL) format.
+In the intermediate code, all program entities
+(such as procedures, labels, global variables)
+are denoted by a unique identifying number (see 3.5).
+In the CAL output of the optimizer these numbers have to
+be replaced by normal identifiers (strings).
+The original identifiers of the input program are used whenever possible.
+Recall that the IC phase generates two files that can be
+used to map unique identifying numbers to procedure names and
+global variable names.
+For instruction labels CA always generates new names.
+The reasons for doing so are:
+.IP -
+instruction labels are only visible inside one procedure, so they can
+not be referenced in other modules
+.IP -
+the names are not very suggestive anyway, as they must be integer numbers
+.IP -
+the optimizer considerably changes the control structure of the program,
+so there is really no one to one mapping of instruction labels in
+the input and the output program.
+.LP
+As the optimizer combines all input modules into one module,
+visibility problems may occur.
+Two modules M1 and M2 can both define an identifier X (provided that
+X is not externally visible in any of these modules).
+If M1 and M2 are combined into one module M, two distinct
+entities with the same name would exist in M, which
+is not allowed.
+.[~[
+tanenbaum machine architecture
+.], section 11.1.4.3]
+In these cases, CA invents a new unique name for one of the entities.
+.NH 2
+Implementation
+.PP
+CA first reads the files containing the procedure and global variable names
+and stores the names in two tables.
+It scans these tables to make sure that all names are different.
+Subsequently it reads the EM text, one procedure at a time,
+and outputs it in CAL format.
+The major part of the code that does the latter transformation
+is adapted from the EM Peephole Optimizer.
+.PP
+The main problem of the implementation of CA is to
+assure that the visibility rules are obeyed.
+If an identifier must be externally visible (i.e.
+it was externally visible in the input program)
+and the identifier is defined (in the output program) before
+being referenced,
+an EXA or EXP pseudo must be generated for it.
+(Note that the optimizer may change the order of definitions and
+references, so some pseudos may be needed that were not
+present in the input program).
+On the other hand, an identifier may be only internally visible.
+If such an identifier is referenced before being defined,
+an INA or INP pseudo must be emitted prior to its first reference.
+.UH
+Acknowledgements
+.PP
+The author would like to thank Andy Tanenbaum for his guidance,
+Duk Bekema for implementing the Common Subexpression Elimination phase
+and writing the initial documentation of that phase,
+Dick Grune for reading the manuscript of this report
+and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren
+and the members of the S.T.W. user's group for their
+interest and assistance.
--- a/doc/ego/cf/.distr
+++ b/doc/ego/cf/.distr
@@ -0,0 +1,6 @@
+cf1
+cf2
+cf3
+cf4
+cf5
+cf6
--- a/doc/ego/cf/cf1
+++ b/doc/ego/cf/cf1
@@ -0,0 +1,94 @@
+.bp
+.NH
+The Control Flow Phase
+.PP
+In the previous chapter we described the intermediate
+code of the global optimizer.
+We also specified which part of this code
+was constructed by the IC phase of the optimizer.
+The Control Flow Phase (\fICF\fR) does
+the remainder of the job,
+i.e. it determines:
+.IP -
+the control flow graphs
+.IP -
+the loop tables
+.IP -
+the calling, change and use attributes of
+the procedure table entries
+.LP
+CF operates on one procedure at a time.
+For every procedure it first reads the EM instructions
+from the EM-text file and groups them into basic blocks.
+For every basic block, its successors and
+predecessors are determined,
+resulting in the control flow graph.
+Next, the immediate dominator of every basic block
+is computed.
+Using these dominators, any loop in the
+procedure is detected.
+Finally, interprocedural analysis is done,
+after which we will know the global effects of
+every procedure call on its environment.
+.sp
+CF uses the same internal data structures
+for the procedure table and object table as IC.
+.NH 2
+Partitioning into basic blocks
+.PP
+With regard to flow of control, we distinguish
+three kinds of EM instructions:
+jump instructions, instruction label definitions and
+normal instructions.
+Jump instructions are all conditional or unconditional
+branch instructions,
+the case instructions (CSA/CSB)
+and the RET (return) instruction.
+A procedure call (CAL) is not considered to be a jump.
+A defining occurrence of an instruction label
+is regarded as an EM instruction.
+.PP
+An instruction starts
+a new basic block, in any of the following cases:
+.IP 1.
+It is the first instruction of a procedure
+.IP 2.
+It is the first of a list of instruction label
+defining occurrences
+.IP 3.
+It follows a jump
+.LP
+If there are several consecutive instruction labels
+(which is highly unusual),
+all of them are put in the same basic block.
+Note that several cases may overlap,
+e.g. a label definition at the beginning of a procedure
+or a label following a jump.
+.PP
+A simple Finite State Machine is used to model
+the above rules.
+It also recognizes the end of a procedure,
+marked by an END pseudo.
+The basic blocks are stored internally as a doubly linked
+linear list.
+The blocks are linked in textual order.
+Every node of this list has the attributes described
+in the previous chapter (see syntax rule for
+basic_block).
+Furthermore, every node contains a pointer to its
+EM instructions,
+which are represented internally
+as a linear, doubly linked list,
+just as in the IC phase.
+However, instead of one list per procedure (as in IC)
+there is now one list per basic block.
+.PP
+On the fly, a table is build that maps
+every label identifier to the label definition
+instruction.
+This table is used for computing the control flow.
+The table is stored as a dynamically allocated array.
+The length of the array is the number of labels
+of the current procedure;
+this value can be found in the procedure table,
+where it was stored by IC.
--- a/doc/ego/cf/cf2
+++ b/doc/ego/cf/cf2
@@ -0,0 +1,50 @@
+.NH 2
+Control Flow
+.PP
+A \fIsuccessor\fR of a basic block B is a block C
+that can be executed immediately after B.
+C is said to be a \fIpredecessor\fR of B.
+A block ending with a RET instruction
+has no successors.
+Such a block is called a \fIreturn block\fR.
+Any block that has no predecessors cannot be
+executed at all (i.e. it is unreachable),
+unless it is the first block of a procedure,
+called the \fIprocedure entry block\fR.
+.PP
+Internally, the successor and predecessor
+attributes of a basic block are stored as \fIsets\fR.
+Alternatively, one may regard all these
+sets of all basic blocks as a conceptual \fIgraph\fR,
+in which there is an edge from B to C if C
+is in the successor set of B.
+We call this conceptual graph
+the \fIControl Flow Graph\fR.
+.PP
+The only successor of a basic block ending on an
+unconditional branch instruction is the block that
+contains the label definition of the target of the jump.
+The target instruction can be found via the LAB_ID
+that is the operand of the jump instruction,
+by using the label-map table mentioned
+above.
+If the last instruction of a block is a
+conditional jump,
+the successors are the target block and the textually
+next block.
+The last instruction can also be a case jump
+instruction (CSA or CSB).
+We then analyze the case descriptor,
+to find all possible target instructions
+and their associated blocks.
+We require the case descriptor to be allocated in
+a ROM, so it cannot be changed dynamically.
+A case jump via an alterable descriptor could in principle
+go to any label in the program.
+In the presence of such an uncontrolled jump,
+hardly any optimization can be done.
+We do not expect any front end to generate such a descriptor,
+however, because of the controlled nature
+of case statements in high level languages.
+If the basic block does not end in a jump instruction,
+its only successor is the textually next block.
--- a/doc/ego/cf/cf3
+++ b/doc/ego/cf/cf3
@@ -0,0 +1,53 @@
+.NH 2
+Immediate dominators
+.PP
+A basic block B dominates a block C if every path
+in the control flow graph from the procedure entry block
+to C goes through B.
+The immediate dominator of C is the closest dominator
+of C on any path from the entry block.
+See also
+.[~[
+aho compiler design
+.], section 13.1.]
+.PP
+There are a number of algorithms to compute
+the immediate dominator relation.
+.IP 1.
+Purdom and Moore give an algorithm that is
+easy to program and easy to describe (although the
+description they give is unreadable;
+it is given in a very messy Algol60 program full of gotos).
+.[
+predominators 
+.]
+.IP 2.
+Aho and Ullman present a bitvector algorithm, which is also
+easy to program and to understand.
+(See 
+.[~[
+aho compiler design
+.], section 13.1.]).
+.IP 3
+Lengauer and Tarjan introduce a fast algorithm that is
+hard to understand, yet remarkably easy to implement.
+.[
+lengauer dominators
+.]
+.LP
+The Purdom-Moore algorithm is very slow if the
+number of basic blocks in the flow graph is large.
+The Aho-Ullman algorithm in fact computes the
+dominator relation,
+from which the immediate dominator relation can be computed
+in time quadratic to the number of basic blocks, worst case.
+The storage requirement is also quadratic to the number
+of blocks.
+The running time of the third algorithm is proportional
+to:
+.DS
+(number of edges in the graph) * log(number of blocks).
+.DE
+We have chosen this algorithm because it is fast
+(as shown by experiments done by Lengauer and Tarjan),
+it is easy to program and requires little data space.
--- a/doc/ego/cf/cf4
+++ b/doc/ego/cf/cf4
@@ -0,0 +1,93 @@
+.NH 2
+Loop detection
+.PP
+Loops are detected by using the loop construction
+algorithm of.
+.[~[
+aho compiler design
+.], section 13.1.]
+This algorithm uses \fIback edges\fR.
+A back edge is an edge from B to C in the CFG,
+whose head (C) dominates its tail (B).
+The loop associated with this back edge
+consists of C plus all nodes in the CFG
+that can reach B without going through C.
+.PP
+As an example of how the algorithm works,
+consider the piece of program of Fig. 4.1.
+First just look at the program and think for
+yourself what part of the code constitutes the loop.
+.DS
+loop
+   if cond then                       1
+      -- lots of simple
+      -- assignment
+      -- statements              2          3
+      exit; -- exit loop
+   else
+      S; -- one statement
+   end if;
+end loop;
+
+Fig. 4.1 A misleading loop
+.DE
+Although a human being may be easily deceived
+by the brackets "loop" and "end loop",
+the loop detection algorithm will correctly
+reply that only the test for "cond" and
+the single statement in the false-part
+of the if statement are part of the loop!
+The statements in the true-part only get
+executed once, so there really is no reason at all
+to say they're part of the loop too.
+The CFG contains one back edge, "3->1".
+As node 3 cannot be reached from node 2,
+the latter node is not part of the loop.
+.PP
+A source of problems with the algorithm is the fact
+that different back edges may result in
+the same loop.
+Such an ill-structured loop is
+called a \fImessy\fR loop.
+After a loop has been constructed, it is checked
+if it is really a new loop.
+.PP
+Loops can partly overlap, without one being nested
+inside the other.
+This is the case in the program of Fig. 4.2.
+.DS
+1:                              1
+   S1;
+2:
+   S2;                          2
+   if cond then
+      goto 4;
+   S3;                     3         4
+   goto 1;
+4:
+   S4;
+   goto 1;
+
+Fig. 4.2 Partly overlapping loops
+.DE
+There are two back edges "3->1" and "4->1",
+resulting in the loops {1,2,3} and {1,2,4}.
+With every basic block we associate a set of
+all loops it is part of.
+It is not sufficient just to record its
+most enclosing loop.
+.PP
+After all loops of a procedure are detected, we determine
+the nesting level of every loop.
+Finally, we find all strong and firm blocks of the loop.
+If the loop has only one back edge (i.e. it is not messy),
+the set of firm blocks consists of the
+head of this back edge and its dominators
+in the loop (including the loop entry block).
+A firm block is also strong if it is not a
+successor of a block that may exit the loop;
+a block may exit a loop if it has an (immediate) successor
+that is not part of the loop.
+For messy loops we do not determine the strong
+and firm blocks. These loops are expected
+to occur very rarely.
--- a/doc/ego/cf/cf5
+++ b/doc/ego/cf/cf5
@@ -0,0 +1,79 @@
+.NH 2
+Interprocedural analysis
+.PP
+It is often desirable to know the effects
+a procedure call may have.
+The optimization below is only possible if
+we know for sure that the call to P cannot
+change A.
+.DS
+A := 10;                        A:= 10;
+P;  -- procedure call    -->    P;
+B := A + 2;                     B := 12;
+.DE
+Although it is not possible to predict exactly
+all the effects a procedure call has, we may
+determine a kind of upper bound for it.
+So we compute all variables that may be
+changed by P, although they need not be
+changed at every invocation of P.
+We can get hold of this set by just looking
+at all assignment (store) instructions
+in the body of P.
+EM also has a set of \fIindirect\fR assignment
+instructions,
+i.e. assignment through a pointer variable.
+In general, it is not possible to determine
+which variable is affected by such an assignment.
+In these cases, we just record the fact that P
+does an indirect assignment.
+Note that this does not mean that all variables
+are potentially affected, as the front ends
+may generate messages telling that certain
+variables can never be accessed indirectly.
+We also set a flag if P does a use (load) indirect.
+Note that we only have to look at \fIglobal\fR
+variables.
+If P changes or uses any of its locals,
+this has no effect on its environment.
+Local variables of a lexically enclosing
+procedure can only be accessed indirectly.
+.PP
+A procedure P may of course call another procedure.
+To determine the effects of a call to P,
+we also must know the effects of a call to the second procedure.
+This second one may call a third one, and so on.
+Effectively, we need to compute the \fItransitive closure\fR
+of the effects.
+To do this, we determine for every procedure
+which other procedures it calls.
+This set is the "calling" attribute of a procedure.
+One may regard all these sets as a conceptual graph,
+in which there is an edge from P to Q
+if Q is in the calling set of P. This graph will
+be referred to as the \fIcall graph\fR.
+(Note the resemblance with the control flow graph).
+.PP
+We can detect which procedures are called by P
+by looking at all CAL instructions in its body.
+Unfortunately, a procedure may also be
+called indirectly, via a CAI instruction.
+Yet, only procedures that are used as operand of an LPI
+instruction can be called indirect,
+because this is the only way to take the address of a procedure.
+We determine for every procedure whether it does
+a CAI instruction.
+We also build a set of all procedures used as
+operand of an LPI.
+.sp
+After all procedures have been processed (i.e. all CFGs
+are constructed, all loops are detected,
+all procedures are analyzed to see which variables
+they may change, which procedures they call,
+whether they do a CAI or are used in an LPI) the
+transitive closure of all interprocedural
+information is computed.
+During the same process,
+the calling set of every procedure that uses a CAI
+is extended with the above mentioned set of all
+procedures that can be called indirect.
--- a/doc/ego/cf/cf6
+++ b/doc/ego/cf/cf6
@@ -0,0 +1,21 @@
+.NH 2
+Source files
+.PP
+The sources of CF are in the following files and packages:
+.IP cf.h: 14
+declarations of global variables and data structures
+.IP cf.c:
+the routine main; interprocedural analysis;
+transitive closure
+.IP succ:
+control flow (successor and predecessor)
+.IP idom:
+immediate dominators
+.IP loop:
+loop detection
+.IP get:
+read object and procedure table;
+read EM text and partition it into basic blocks
+.IP put:
+write tables, CFGs and EM text
+.LP
--- a/doc/ego/cj/.distr
+++ b/doc/ego/cj/.distr
@@ -0,0 +1 @@
+cj1
--- a/doc/ego/cj/cj1
+++ b/doc/ego/cj/cj1
@@ -0,0 +1,136 @@
+.bp
+.NH 1
+Cross jumping
+.NH 2
+Introduction
+.PP
+The "Cross Jumping" optimization technique (CJ)
+.[
+wulf design optimizing compiler
+.]
+is basically a space optimization technique. It looks for pairs of
+basic blocks (B1,B2), for which:
+.DS
+SUCC(B1) = SUCC(B2) = {S}
+.DE
+(So B1 and B2 both have one and the same successor).
+If the last few non-branch instructions are the same for B1 and B2,
+one such sequence can be eliminated.
+.DS
+Pascal:
+
+if cond then
+    S1
+    S3
+else
+    S2
+    S3
+
+(pseudo) EM:
+
+TEST COND		TEST COND
+BNE *1			BNE *1
+S1			S1
+S3	   --->		BRA *2
+BRA *2			1:
+1:			S2
+S2			2:
+S3			S3
+2:
+
+Fig. 9.1 An example of Cross Jumping
+.DE
+As the basic blocks have the same successor,
+at least one of them ends in an unconditional branch instruction (BRA).
+Hence no extra branch instruction is ever needed, just the target
+of an existing branch needs to be changed; neither the program size
+nor the execution time will ever increase.
+In general, the execution time will remain the same, unless
+further optimizations can be applied because of this optimization.
+.PP
+This optimization is particularly effective,
+because it cannot always be done by the programmer at the source level,
+as demonstrated by the Fig. 8.2.
+.DS
+	Pascal:
+
+	if cond then
+	   x := f(4)
+	else
+	   x := g(5)
+
+
+	EM:
+
+	...                     ...
+	LOC 4			LOC 5
+	CAL F			CAL G
+	ASP 2			ASP 2
+	LFR 2			LFR 2
+	STL X			STL X
+
+Fig. 9.2 Effectiveness of Cross Jumping
+.DE
+At the source level there is no common tail,
+but at the EM level there is a common tail.
+.NH 2
+Implementation
+.PP
+The implementation of cross jumping is rather straightforward.
+The technique is applied to one procedure at a time.
+The control flow graph of the procedure 
+is scanned for pairs of basic blocks
+with the same (single) successor and with common tails.
+Note that there may be more than two such blocks (e.g. as the result
+of a case statement).
+This is dealt with by repeating the entire process until no
+further optimizations can de done for the current procedure.
+.sp
+If a suitable pair of basic blocks has been found, the control flow
+graph must be altered. One of the basic
+blocks must be split into two.
+The control flow graphs before and after the optimization are shown
+in Fig. 9.3 and Fig. 9.4.
+.DS
+
+	--------				--------
+	|      |				|      |
+	| S1   |			        | S2   |
+	| S3   |   				| S3   |
+	|      |				|      |
+	--------				--------
+	   |					   |
+	   |------------------|--------------------|
+			      |
+			      v
+
+Fig. 9.3 CFG before optimization
+.DE
+.DS
+
+	--------				--------
+	|      |				|      |
+	| S1   |			        | S2   |
+	|      |				|      |
+	--------				--------
+	   |					   |
+	   |--------------------<------------------|
+	   v
+	--------
+	|      |
+	| S3   |
+	|      |
+	--------
+	   |
+	   v
+
+Fig. 9.4 CFG after optimization
+.DE
+Some attributes of the three resulting blocks (such as immediate dominator)
+are updated.
+.PP
+In some cases, cross jumping might split the computation of an expression
+into two, by inserting a branch somewhere in the middle.
+Most code generators will generate very poor assembly code when
+presented with such EM code. 
+Therefor, cross jumping is not performed in these cases.
--- a/doc/ego/cs/.distr
+++ b/doc/ego/cs/.distr
@@ -0,0 +1,5 @@
+cs1
+cs2
+cs3
+cs4
+cs5
--- a/doc/ego/cs/cs1
+++ b/doc/ego/cs/cs1
@@ -0,0 +1,42 @@
+.bp
+.NH 1
+Common subexpression elimination
+.NH 2
+Introduction
+.PP
+The Common Subexpression Elimination optimization technique (CS)
+tries to eliminate multiple computations of EM expressions
+that yield the same result.
+It places the result of one such computation
+in a temporary variable,
+and replaces the other computations by a reference
+to this temporary variable.
+The primary goal of this technique is to decrease
+the execution time of the program,
+but in general it will save space too.
+.PP
+As an example of the application of Common Subexpression Elimination,
+consider the piece of program in Fig. 7.1(a).
+.DS
+x := a * b;          TMP := a * b;       x := a * b;
+CODE;                x := TMP;           CODE
+y := c + a * b;      CODE                y := x;
+                     y := c + TMP;
+
+   (a)                  (b)                 (c)
+
+Fig. 7.1  Examples of Common Subexpression Elimination
+.DE
+If neither a nor b is changed in CODE,
+the instructions can be replaced by those of Fig. 7.1(b),
+which saves one multiplication,
+but costs an extra store instruction.
+If the value of x is not changed in CODE either,
+the instructions can be replaced by those of Fig. 7.1(c).
+In this case
+the extra store is not needed.
+.PP
+In the following sections we will describe
+which transformations are done
+by CS and how this phase
+was implemented.
--- a/doc/ego/cs/cs2
+++ b/doc/ego/cs/cs2
@@ -0,0 +1,83 @@
+.NH 2
+Specification of the Common Subexpression Elimination phase
+.PP
+In this section we will describe
+the window
+through which CS examines the code,
+the expressions recognized by CS,
+and finally the changes made to the code.
+.NH 3
+The working window
+.PP
+The CS algorithm is applied to the
+largest sequence of textually adjacent basic blocks
+B1,..,Bn, for which
+.DS
+PRED(Bj) = {Bj-1},  j = 2,..,n.
+.DE
+Intuitively, this window consists of straight line code,
+with only one entry point (at the beginning); it may
+contain jumps, which should all have their targets outside the window.
+This is illustrated in Fig. 7.2.
+.DS
+x := a * b;	(1)
+if x < 10 then	(2)
+    y := a * b;	(3)
+
+Fig. 7.2 The working window of CS
+.DE
+Line (2) can only be executed after line (1).
+Likewise, line (3) can only be executed after
+line (2).
+Both a and b have the same values at line (1) and at line (3).
+.PP
+Larger windows were avoided.
+In Fig. 7.3, the value of a at line (4) may have been obtained
+at more than one point.
+.DS
+x := a * b;	(1)
+if x < 10 then	(2)
+    a := 100;	(3)
+y := a * b;	(4)
+
+Fig. 7.3 Several working windows
+.DE
+.NH 3
+Recognized expressions.
+.PP
+The computations eliminated by CS need not be normal expressions
+(like "a * b"),
+but can even consist of a single operand that is expensive to access,
+such as an array element or a record field.
+If an array element is used,
+its address is computed implicitly.
+CS is able to eliminate either the element itself or its
+address, whichever one is most profitable.
+A variable of a textually enclosing procedure may also be
+expensive to access, depending on the lexical level difference.
+.NH 3
+Transformations
+.PP
+CS creates a new temporary local variable (TMP)
+for every eliminated expression,
+unless it is able to use an existing local variable.
+It emits code to initialize this variable with the
+result of the expression.
+Most recurrences of the expression
+can simply be replaced by a reference to TMP.
+If the address of an array element is recognized as
+a common subexpression,
+references to the element itself are replaced by
+indirect references through TMP (see Fig. 7.4).
+.DS
+x := A[i];			TMP := &A[i];
+  . . .			-->	x := *TMP;
+A[i] := y;			   . . .
+				*TMP := y;
+
+Fig. 7.4 Elimination of an array address computation
+.DE
+Here, '&' is the 'address of' operator,
+and unary '*' is the indirection operator.
+(Note that EM actually has different instructions to do
+a use-indirect or an assign-indirect.)
--- a/doc/ego/cs/cs3
+++ b/doc/ego/cs/cs3
@@ -0,0 +1,243 @@
+.NH 2
+Implementation
+.PP
+.NH 3
+The value number method
+.PP
+To determine whether two expressions have the same result,
+there must be some way to determine whether their operands have
+the same values.
+We use a system of \fIvalue numbers\fP
+.[
+kennedy data flow analysis 
+.]
+in which each distinct value of whatever type,
+created or used within the working window,
+receives a unique identifying number, its value number.
+Two items have the same value number if and only if,
+based only upon information from the instructions in the window,
+their values are provably identical.
+For example, after processing the statement
+.DS
+a := 4;
+.DE
+the variable a and the constant 4 have the same value number.
+.PP
+The value number of the result of an expression depends only
+on the kind of operator and the value number(s) of the operand(s).
+The expressions need not be textually equal, as shown in Fig. 7.5.
+.DS
+a := c;		(1)
+use(a * b);	(2)
+d := b;		(3)
+use(c * d);	(4)
+
+Fig. 7.5 Different expressions with the same value number
+.DE
+At line (1) a receives the same value number as c.
+At line (2) d receives the same value number as b.
+At line (4) the expression "c * d" receives the same value number
+as the expression "a * b" at line (2),
+because the value numbers of their left and right operands are the same,
+and the operator (*) is the same.
+.PP
+As another example of the value number method, consider Fig. 7.6.
+.DS
+use(a * b);	(1)
+a := 123;	(2)
+use(a * b);	(3)
+
+Fig. 7.6 Identical expressions with the different value numbers
+.DE
+Although textually the expressions "a * b" in line 1 and line 3 are equal,
+a will have different value numbers at line 3 and line 1.
+The two expressions will not mistakenly be recognized as equivalent.
+.NH 3
+Entities
+.PP
+The Value Number Method distinguishes between operators and operands.
+The value numbers of operands are stored in a table,
+called the \fIsymbol table\fR.
+The value number of a subexpression depends on the
+(root) operator of the expression and on the value numbers
+of its operands.
+A table of "available expressions" is used to do this mapping.
+.PP
+CS recognizes the following kinds of EM operands, called \fIentities\fR:
+.IP
+- constant
+- local variable
+- external variable
+- indirectly accessed entity
+- offsetted entity
+- address of local variable
+- address of external variable
+- address of offsetted entity
+- address of local base
+- address of argument base
+- array element
+- procedure identifier
+- floating zero
+- local base
+- heap pointer
+- ignore mask
+.LP
+Whenever a new entity is encountered in the working window,
+it is entered in the symbol table and given a brand new value number.
+Most entities have attributes (e.g. the offset in
+the current stackframe for local variables),
+which are also stored in the symbol table.
+.PP
+An entity is called static if its value cannot be changed
+(e.g. a constant or an address).
+.NH 3
+Parsing expressions
+.PP
+Common subexpressions are recognized by simulating the behaviour
+of the EM machine.
+The EM code is parsed from left to right;
+as EM is postfix code, this is a bottom up parse.
+At any point the current state of the EM runtime stack is
+reflected by a simulated "fake stack",
+containing descriptions of the parsed operands and expressions.
+A descriptor consists of:
+.DS
+(1) the value number of the operand or expression
+(2) the size of the operand or expression
+(3) a pointer to the first line of EM-code
+    that constitutes the operand or expression
+.DE
+Note that operands may consist of several EM instructions.
+Whenever an operator is encountered, the
+descriptors of its operands are on top of the fake stack.
+The operator and the value numbers of the operands 
+are used as indices in the table of available expressions,
+to determine the value number of the expression.
+.PP
+During the parsing process,
+we keep track of the first line of each expression;
+we need this information when we decide to eliminate the expression.
+.NH 3
+Updating entities
+.PP
+An entity is assigned a value number when it is
+used for the first time
+in the working window.
+If the entity is used as left hand side of an assignment,
+it gets the value number of the right hand side.
+Sometimes the effects of an instruction on an entity cannot
+be determined exactly;
+the current value and value number of the entity may become
+inconsistent.
+Hence the current value number must be forgotten.
+This is achieved by giving the entity a new value number
+that was not used before.
+The entity is said to be \fIkilled\fR.
+.PP
+As information is lost when an entity is killed,
+CS tries to save as many entities as possible.
+In case of an indirect assignment through a pointer,
+some analysis is done to see which variables cannot be altered.
+For a procedure call, the interprocedural information contained
+in the procedure table is used to restrict the set of entities that may
+be changed by the call.
+Local variables for which the front end generated 
+a register message can never be changed by an indirect assignment
+or a procedure call.
+.NH 3
+Changing the EM text
+.PP
+When a new expression comes available,
+it is checked whether its result is saved in a local
+that may go in a register.
+The last line of the expression must be followed
+by a STL or SDL instruction
+(depending on the size of the result)
+and a register message must be present for
+this local.
+If there is such a local,
+it is recorded in the available expressions table.
+Each time a new occurrence of this expression
+is found,
+the value number of the local is compared against
+the value number of the result.
+If they are different the local cannot be used and is forgotten.
+.PP
+The available expressions are linked in a list.
+New expressions are linked at the head of the list.
+In this way expressions that are contained within other
+expressions appear later in the list,
+because EM-expressions are postfix.
+The elimination process walks through the list,
+starting at the head, to find the largest expressions first.
+If an expression is eliminated,
+any expression later on in the list, contained in the former expression,
+is removed from the list,
+as expressions can only be eliminated once.
+.PP
+A STL or SDL is emitted after the first occurrence of the expression,
+unless there was an existing local variable that could hold the result.
+.NH 3
+Desirability analysis
+.PP
+Although the global optimizer works on EM code,
+the goal is to improve the quality of the object code.
+Therefore some machine-dependent information is needed
+to decide whether it is desirable to
+eliminate a given expression.
+Because it is impossible for the CS phase to know
+exactly what code will be generated,
+some heuristics are used.
+CS essentially looks for some special cases
+that should not be eliminated.
+These special cases can be turned on or off for a given machine,
+as indicated in a machine descriptor file.
+.PP
+Some operators can sometimes be translated
+into an addressing mode for the machine at hand.
+Such an operator is only eliminated
+if its operand is itself expensive,
+i.e. it is not just a simple load.
+The machine descriptor file contains a set of such operators.
+.PP
+Eliminating the loading of the Local Base or
+the Argument Base by the LXL resp. LXA instruction
+is only beneficial if the difference in lexical levels
+exceeds a certain threshold.
+The machine descriptor file contains this threshold.
+.PP
+Replacing a SAR or a LAR by an AAR followed by a LOI
+may possibly increase the size of the object code.
+We assume that this is only possible when the
+size of the array element is greater than some limit.
+.PP
+There are back ends that can very efficiently translate
+the index computing instruction sequence LOC SLI ADS.
+If this is the case,
+the SLI instruction between a LOC
+and an ADS is not eliminated.
+.PP
+To handle unforseen cases, the descriptor file may also contain
+a set of operators that should never be eliminated.
+.NH 3
+The algorithm
+.PP
+After these preparatory explanations,
+the algorithm itself is easy to understand.
+For each instruction within the current window,
+the following steps are performed in the given order :
+.IP 1.
+Check if this instruction defines an entity.
+If so, the set of entities is updated accordingly.
+.IP 2.
+Kill all entities that might be affected by this instruction.
+.IP 3.
+Simulate the instruction on the fake-stack.
+If this instruction is an operator,
+update the list of available expressions accordingly.
+.PP
+The result of this process is
+a list of available expressions plus the information
+needed to eliminate them.
+Expressions that are desirable to eliminate are eliminated.
+Next, the window is shifted and the process is repeated.
--- a/doc/ego/cs/cs4
+++ b/doc/ego/cs/cs4
@@ -0,0 +1,305 @@
+.NH 2
+Implementation.
+.PP
+In this section we will discuss the implementation of the CS phase.
+We will first describe the basic actions that are undertaken
+by the algorithm, than the algorithm itself.
+.NH 3
+Partioning the EM instructions
+.PP
+There are over 100 EM instructions.
+For our purpose we partition this huge set into groups of
+instructions which can be more or less conveniently handled together.
+.PP
+There are groups for all sorts of load instructions:
+simple loads, expensive loads, loads of an array element.
+A load is considered \fIexpensive\fP when more than one EM instructions
+are involved in loading it.
+The load of a lexical entity is also considered expensive.
+For instance: LOF is expensive, LAL is not.
+LAR forms a group on its own, 
+because it is not only an expensive load,
+but also implicitly includes the ternary operator AAR,
+which computes the address of the array element.
+.PP
+There are groups for all sorts of operators:
+unary, binary, and ternary.
+The groups of operators are further partitioned according to the size
+of their operand(s) and result.
+\" .PP
+\" The distinction between operators and expensive loads is not always clear.
+\" The ADP instruction for example,
+\" might seem a unary operator because it pops one item
+\" (a pointer) from the stack.
+\" However, two ADP-instructions which pop an item with the same value number
+\" need not have the same result,
+\" because the attributes (an offset, to be added to the pointer)
+\" can be different.
+\" Is it then a binary operator?
+\" That would give rise to the strange, and undesirable,
+\" situation that some binary operators pop two operands
+\" and others pop one.
+\" The conclusion is inevitable:
+\" we have been fooled by the name (ADd Pointer).
+\" The ADP-instruction is an expensive load.
+\" In this context LAF, meaning Load Address of oFfsetted,
+\" would have been a better name,
+\" corresponding to LOF, like LAL,
+\" Load Address of Local, corresponds to LOL.
+.PP
+There are groups for all sorts of stores:
+direct, indirect, array element.
+The SAR forms a group on its own for the same reason
+as appeared with LAR.
+.PP
+The effect of the remaining instructions is less clear.
+They do not help very much in parsing expressions or
+in constructing our pseudo symboltable.
+They are partitioned according to the following criteria:
+.RS
+.IP "-"
+They change the value of an entity without using the stack
+(e.g. ZRL, DEE).
+.IP "-"
+They are subroutine calls (CAI, CAL).
+.IP "-"
+They change the stack in some irreproduceable way (e.g. ASP, LFR, DUP).
+.IP "-"
+They have no effect whatever on the stack or on the entities.
+This does not mean they can be deleted,
+but they can be ignored for the moment
+(e.g. MES, LIN, NOP).
+.IP "-"
+Their effect is too complicate too compute,
+so we just assume worst case behaviour.
+Hopefully, they do not occur very often.
+(e.g. MON, STR, BLM).
+.IP "-"
+They signal the end of the basic block (e.g. BLT, RET, TRP).
+.RE
+.NH 3
+Parsing expressions
+.PP
+To recognize expressions,
+we simulate the behaviour of the EM machine,
+by means of a fake-stack.
+When we scan the instructions in sequential order,
+we first encounter the instructions that load
+the operands on the stack,
+and then the instruction that indicates the operator,
+because EM expressions are postfix.
+When we find an instruction to load an operand,
+we load on the fake-stack a struct with the following information:
+.DS
+(1) the value number of the operand
+(2) the size of the operand
+(3) a pointer to the first line of EM-code
+    that constitutes the operand
+.DE
+In most cases, (3) will point to the line
+that loaded the operand (e.g. LOL, LOC),
+i.e. there is only one line that refers to this operand,
+but sometimes some information must be popped
+to load the operand (e.g. LOI, LAR).
+This information must have been pushed before,
+so we also pop a pointer to the first line that pushed
+the information.
+This line is now the first line that defines the operand.
+.PP
+When we find the operator instruction,
+we pop its operand(s) from the fake-stack.
+The first line that defines the first operand is
+now the first line of the expression.
+We now have all information to determine
+whether the just parsed expression has occurred before.
+We also know the first and last line of the expression;
+we need this when we decide to eliminate it.
+Associated with each available expression is a set of
+which the elements contains the first and last line of
+a recurrence of this expression.
+.PP
+Not only will the operand(s) be popped from the fake-stack,
+but the following will be pushed:
+.DS
+(1) the value number of the result
+(2) the size of the result
+(3) a pointer to the first line of the expression
+.DE
+In this way an item on the fake-stack always contains
+the necessary information.
+As you see, EM expressions are parsed bottum up.
+.NH 3
+Updating entities
+.PP
+As said before,
+we build our private "symboltable",
+while scanning the EM-instructions.
+The behaviour of the EM-machine is not only reflected
+in the fake-stack,
+but also in the entities.
+When an entity is created,
+we do not yet know its value,
+so we assign a brand new value number to it.
+Each time a store-instruction is encountered,
+we change the value number of the target entity of this store
+to the value number of the token that was popped
+from the fake-stack.
+Because entities may overlap,
+we must also "forget" the value numbers of entities
+that might be affected by this store.
+Each such entity will be \fIkilled\fP,
+i.e. assigned a brand new valuenumber.
+.PP
+Because we lose information when we forget
+the value number of an entity,
+we try to save as much entities as possible.
+When we store into an external,
+we don't have to kill locals and vice versa.
+Furthermore, we can see whether two locals or
+two externals overlap,
+because we know the offset from the local base,
+resp. the offset within the data block,
+and the size.
+The situation becomes more complicated when we have
+to consider indirection.
+The worst case is that we store through an unknown pointer.
+In that case we kill all entities except those locals
+for which a so-called \fIregister message\fP has been generated;
+this register message indicates that this local can never be
+accessed indirectly.
+If we know this pointer we can be more careful.
+If it points to a local then the entity that is accessed through
+this pointer can never overlap with an external.
+If it points to an external this entity can never overlap with a local.
+Furthermore, in the latter case,
+we can find the data block this entity belongs to.
+Since pointer arithmetic is only defined within a data block,
+this entity can never overlap with entities that are known to
+belong to another data block.
+.PP
+Not only after a store-instruction but also after a 
+subroutine-call it may be necessary to kill entities;
+the subroutine may affect global variables or store
+through a pointer.
+If a subroutine is called that is not available as EM-text,
+we assume worst case behaviour,
+i.e. we kill all entities without register message.
+.NH 3
+Additions and replacements.
+.PP
+When a new expression comes available,
+we check whether the result is saved in a local
+that may go in a register.
+The last line of the expression must be followed
+by a STL or SDL instruction,
+depending on the size of the result
+(resp. WS and 2*WS),
+and a register message must be present for
+this local.
+If we have found such a local,
+we store a pointer to it with the available expression.
+Each time a new occurrence of this expression
+is found,
+we compare the value number of the local against
+the value number of the result.
+When they are different we remove the pointer to it,
+because we cannot use it.
+.PP
+The available expressions are singly linked in a list.
+When a new expression comes available,
+we link it at the head of the list.
+In this way expressions that are contained within other
+expressions appear later in the list,
+because EM-expressions are postfix.
+When we are going to eliminate expressions,
+we walk through the list,
+starting at the head, to find the largest expressions first.
+When we decide to eliminate an expression,
+we look at the expressions in the tail of the list,
+starting from where we are now,
+to delete expressions that are contained within
+the chosen one because
+we cannot eliminate an expression more than once.
+.PP
+When we are going to eliminate expressions,
+and we do not have a local that holds the result,
+we emit a STL or SDL after the line where the expression
+was first found.
+The other occurrences are simply removed,
+unless they contain instructions that not only have
+effect on the stack; e.g. messages, stores, calls.
+Before each instruction that needs the result on the stack,
+we emit a LOL or LDL.
+When the expression was an AAR,
+but the instruction was a LAR or a SAR,
+we append a LOI resp. a STI of the number of bytes
+in an array-element after each LOL/LDL.
+.NH 3
+Desirability analysis
+.PP
+Although the global optimizer works on EM code,
+the goal is to improve the quality of the object code.
+Therefore we need some machine dependent information
+to decide whether it is desirable to
+eliminate a given expression.
+Because it is impossible for the CS phase to know
+exactly what code will be generated,
+we use some heuristics.
+In most cases it will save time when we eliminate an
+operator, so we just do it.
+We only look for some special cases.
+.PP
+Some operators can in some cases be translated
+into an addressing mode for the machine at hand.
+We only eliminate such an operator,
+when its operand is itself "expensive",
+i.e. not just a simple load.
+The user of the CS phase has to supply
+a set of such operators.
+.PP
+Eliminating the loading of the Local Base or
+the Argument Base by the LXL resp. LXA instruction
+is only beneficial when the number of lexical levels
+we have to go back exceeds a certain threshold.
+This threshold will be different when registers
+are saved by the back end.
+The user must supply this threshold.
+.PP
+Replacing a SAR or a LAR by an AAR followed by a LOI
+may possibly increase the size of the object code.
+We assume that this is only possible when the
+size of the array element is greater than some
+(user-supplied) limit.
+.PP
+There are back ends that can very efficiently translate
+the index computing instruction sequence LOC SLI ADS.
+If this is the case,
+we do not eliminate the SLI instruction between a LOC
+and an ADS.
+.PP
+To handle unforeseen cases, the user may also supply
+a set of operators that should never be eliminated.
+.NH 3
+The algorithm
+.PP
+After these preparatory explanations,
+we can be short about the algorithm itself.
+For each instruction within our window,
+the following steps are performed in the order given:
+.IP 1.
+We check if this instructin defines an entity.
+If this is the case the set of entities is updated accordingly.
+.IP 2.
+We kill all entities that might be affected by this instruction.
+.IP 3.
+The instruction is simulated on the fake-stack.
+Copy propagation is done.
+If this instruction is an operator,
+we update the list of available expressions accordingly.
+.PP
+When we have processed all instructions this way,
+we have built a list of available expressions plus the information we
+need to eliminate them.
+Those expressions of which desirability analysis tells us so,
+we eliminate.
+The we shift our window and continue.
--- a/doc/ego/cs/cs5
+++ b/doc/ego/cs/cs5
@@ -0,0 +1,46 @@
+.NH 2
+Source files of CS
+.PP
+The sources of CS are in the following files and packages:
+.IP cs.h 14
+declarations of global variables and data structures
+.IP cs.c
+the routine main;
+a driving routine to process
+the basic blocks in the right order
+.IP vnm
+implements a procedure that performs
+the value numbering on one basic block
+.IP eliminate
+implements a procedure that does the
+transformations, if desirable
+.IP avail
+implements a procedure that manipulates the list of available expressions
+.IP entity
+implements a procedure that manipulates the set of entities
+.IP getentity
+implements a procedure that extracts the
+pseudo symboltable information from EM-instructions;
+uses a small table
+.IP kill
+implements several routines that find the entities
+that might be changed by EM-instructions
+and kill them
+.IP partition
+implements several routines that partition the huge set
+of EM-instructions into more or less manageable,
+more or less logical chunks
+.IP profit
+implements a procedure that decides whether it
+is advantageous to eliminate an expression;
+also removes expressions with side-effects
+.IP stack
+implements the fake-stack and operations on it
+.IP alloc
+implements several allocation routines
+.IP aux
+implements several auxiliary routines
+.IP debug
+implements several routines to provide debugging
+and verbose output
+.LP
--- a/doc/ego/ic/.distr
+++ b/doc/ego/ic/.distr
@@ -0,0 +1,5 @@
+ic1
+ic2
+ic3
+ic4
+ic5
--- a/doc/ego/ic/ic1
+++ b/doc/ego/ic/ic1
@@ -0,0 +1,57 @@
+.bp
+.NH
+The Intermediate Code and the IC phase
+.PP
+In this chapter the intermediate code of the EM global optimizer
+will be defined.
+The 'Intermediate Code construction' phase (IC),
+which builds the initial intermediate code from
+EM Compact Assembly Language,
+will be described.
+.NH 2
+Introduction
+.PP
+The EM global optimizer is a multi pass program,
+hence there is a need for an intermediate code.
+Usually, programs in the Amsterdam Compiler Kit use the
+Compact Assembly Language format
+.[~[
+keizer architecture
+.], section 11.2]
+for this purpose.
+Although this code has some convenient features,
+such as being compact,
+it is quite unsuitable in our case,
+because of a number of reasons.
+At first, the code lacks global information
+about whole procedures or whole basic blocks.
+Second, it uses identifiers ('names') to bind
+defining and applied occurrences of
+procedures, data labels and instruction labels.
+Although this is usual in high level programming
+languages, it is awkward in an intermediate code
+that must be read many times.
+Each pass of the optimizer would have
+to incorporate an identifier look-up mechanism
+to associate a defining occurrence with each
+applied occurrence of an identifier.
+Finally, EM programs are used to declare blocks of bytes,
+rather than variables. A 'hol 6' instruction may be used to
+declare three 2-byte variables.
+Clearly, the optimizer wants to deal with variables, and
+not with rows of bytes.
+.PP
+To overcome these problems, we have developed a new
+intermediate code.
+This code does not merely consist of the EM instructions,
+but also contains global information in the
+form of tables and graphs.
+Before describing the intermediate code we will
+first leap aside to outline
+the problems one generally encounters
+when trying to store complex data structures such as
+graphs outside the program, i.e. in a file.
+We trust this will enhance the
+comprehensibility of the
+intermediate code definition and the design and implementation
+of the IC phase.
--- a/doc/ego/ic/ic2
+++ b/doc/ego/ic/ic2
@@ -0,0 +1,146 @@
+.NH 2
+Representation of complex data structures in a sequential file
+.PP
+Most programmers are quite used to deal with
+complex data structures, such as
+arrays, graphs and trees.
+There are some particular problems that occur
+when storing such a data structure
+in a sequential file.
+We call data that is kept in
+main memory
+.UL internal
+,as opposed to
+.UL external
+data
+that is kept in a file outside the program.
+.sp
+We assume a simple data structure of a
+scalar type (integer, floating point number)
+has some known external representation.
+An
+.UL array
+having elements of a scalar type can be represented
+externally easily, by successively
+representing its elements.
+The external representation may be preceded by a
+number, giving the length of the array.
+Now, consider a linear, singly linked list,
+the elements of which look like:
+.DS
+record
+        data: scalar_type;
+        next: pointer_type;
+end;
+.DE
+It is significant to note that the "next"
+fields of the elements only have a meaning within
+main memory.
+The field contains the address of some location in
+main memory.
+If a list element is written to a file in
+some program,
+and read by another program,
+the element will be allocated at a different
+address in main memory.
+Hence this address value is completely
+useless outside the program.
+.sp
+One may represent the list by ignoring these "next" fields
+and storing the data items in the order they are linked.
+The "next" fields are represented \fIimplicitly\fR.
+When the file is read again,
+the same list can be reconstructed.
+In order to know where the external representation of the
+list ends,
+it may be useful to put the length of
+the list in front of it.
+.sp
+Note that arrays and linear lists have the
+same external representation.
+.PP
+A doubly linked, linear list,
+with elements of the type:
+.DS
+record
+        data: scalar_type;
+        next,
+        previous: pointer_type;
+end
+.DE
+can be represented in precisely the same way.
+Both the "next" and the "previous" fields are represented
+implicitly.
+.PP
+Next, consider a binary tree,
+the nodes of which have type:
+.DS
+record
+        data: scalar_type;
+        left,
+        right: pointer_type;
+end
+.DE
+Such a tree can be represented sequentially,
+by storing its nodes in some fixed order, e.g. prefix order.
+A special null data item may be used to
+denote a missing left or right son.
+For example, let the scalar type be integer,
+and let the null item be 0.
+Then the tree of fig. 3.1(a)
+can be represented as in fig. 3.1(b).
+.DS
+                        4
+
+                    9      12
+
+                12    3   4   6
+
+                     8  1  5 1
+
+Fig. 3.1(a) A binary tree
+
+
+4 9 12 0 0 3 8 0 0 1 0 0 12 4 0 5 0 0 6 1 0 0 0
+
+Fig. 3.1(b) Its sequential representation
+.DE
+We are still able to represent the pointer fields ("left"
+and "right") implicitly.
+.PP
+Finally, consider a general
+.UL graph
+, where each node has a "data" field and
+pointer fields,
+with no restriction on where they may point to.
+Now we're at the end of our tale.
+There is no way to represent the pointers implicitly,
+like we did with lists and trees.
+In order to represent them explicitly,
+we use the following scheme.
+Every node gets an extra field,
+containing some unique number that identifies the node.
+We call this number its
+.UL id.
+A pointer is represented externally as the id of the node
+it points to.
+When reading the file we use a table that maps
+an id to the address of its node.
+In general this table will not be completely filled in
+until we have read the entire external representation of
+the graph and allocated internal memory locations for
+every node.
+Hence we cannot reconstruct the graph in one scan.
+That is, there may be some pointers from node A to B,
+where B is placed after A in the sequential file than A.
+When we read the node of A we cannot map the id of B
+to the address of node B,
+as we have not yet allocated node B.
+We can overcome this problem if the size
+of every node is known in advance.
+In this case we can allocate memory for a node
+on first reference.
+Else, the mapping from id to pointer
+cannot be done while reading nodes.
+The mapping can be done either in an extra scan
+or at every reference to the node.
--- a/doc/ego/ic/ic3
+++ b/doc/ego/ic/ic3
@@ -0,0 +1,414 @@
+.NH 2
+Definition of the intermediate code
+.PP
+The intermediate code of the optimizer consists
+of several components:
+.IP -
+the object table
+.IP -
+the procedure table
+.IP -
+the em code
+.IP -
+the control flow graphs
+.IP -
+the loop table
+.LP -
+.PP
+These components are described in
+the next sections.
+The syntactic structure of every component
+is described by a set of context free syntax rules,
+with the following conventions:
+.DS
+x               a non-terminal symbol
+A               a terminal symbol (in capitals)
+x: a b c;       a grammar rule
+a | b           a or b
+(a)+            1 or more occurrences of a
+{a}             0 or more occurrences of a
+.DE
+.NH 3
+The object table
+.PP
+EM programs declare blocks of bytes rather than (global) variables.
+A typical program may declare 'HOL 7780'
+to allocate space for 8 I/O buffers,
+2 large arrays and 10 scalar variables.
+The optimizer wants to deal with
+.UL objects
+like variables, buffers and arrays
+and certainly not with huge numbers of bytes.
+Therefore the intermediate code contains information
+about which global objects are used.
+This information can be obtained from an EM program
+by just looking at the operands of instruction
+such as LOE, LAE, LDE, STE, SDE, INE, DEE and ZRE.
+.PP
+The object table consists of a list of
+.UL datablock
+entries.
+Each such entry represents a declaration like HOL, BSS,
+CON or ROM.
+There are five kinds of datablock entries.
+The fifth kind,
+UNKNOWN, denotes a declaration in a
+separately compiled file that is not made
+available to the optimizer.
+Each datablock entry contains the type of the block,
+its size, and a description of the objects that
+belong to it.
+If it is a rom,
+it also contains a list of values given
+as arguments to the rom instruction,
+provided that this list contains only integer numbers.
+An object has an offset (within its datablock)
+and a size.
+The size need not always be determinable.
+Both datablock and object contain a unique
+identifying number
+(see previous section for their use).
+.DS
+.UL syntax
+  object_table:
+                {datablock} ;
+  datablock:
+                D_ID            -- unique identifying number
+                PSEUDO          -- one of ROM,CON,BSS,HOL,UNKNOWN
+                SIZE            -- # bytes declared
+                FLAGS
+                {value}         -- contents of rom
+                {object} ;      -- objects of the datablock
+  object:
+                O_ID            -- unique identifying number
+                OFFSET          -- offset within the datablock
+                SIZE ;          -- size of the object in bytes
+  value:
+                argument ;
+.DE
+A data block has only one flag: "external", indicating
+whether the data label is externally visible.
+The syntax for "argument" will be given later on
+(see em_text).
+.NH 3
+The procedure table
+.PP
+The procedure table contains global information
+about all procedures that are made available
+to the optimizer
+and that are needed by the EM program.
+(Library units may not be needed, see section 3.5).
+The table has one entry for
+every procedure.
+.DS
+.UL syntax
+  procedure_table:
+                {procedure}
+  procedure:
+                P_ID            -- unique identifying number
+                #LABELS         -- number of instruction labels
+                #LOCALS         -- number of bytes for locals 
+		#FORMALS        -- number of bytes for formals
+                FLAGS           -- flag bits
+                calling         -- procedures called by this one
+                change          -- info about global variables changed
+                use ;           -- info about global variables used
+  calling:
+                {P_ID} ;        -- procedures called
+  change:
+                ext             -- external variables changed
+                FLAGS ;
+  use:
+                FLAGS ;
+  ext:
+                {O_ID} ;        -- a set of objects
+.DE
+.PP
+The number of bytes of formal parameters accessed by
+a procedure is determined by the front ends and
+passed via a message (parameter message) to the optimizer.
+If the front end is not able to determine this number
+(e.g. the parameter may be an array of dynamic size or
+the procedure may have a variable number of arguments) the attribute
+contains the value 'UNKNOWN_SIZE'.
+.sp 0
+A procedure has the following flags:
+.IP -
+external: true if the proc. is externally visible
+.IP -
+bodyseen: true if its code is available as EM text
+.IP -
+calunknown: true if it calls a procedure that has its bodyseen
+flag not set
+.IP -
+environ: true if it uses or changes a (non-global) variable in
+a lexically enclosing procedure
+.IP -
+lpi: true if is used as operand of an lpi instruction, so
+it may be called indirect
+.LP
+The change and use attributes both have one flag: "indirect",
+indicating whether the procedure does a 'use indirect'
+or a 'store indirect' (indirect means through a pointer).
+.NH 3
+The EM text
+.PP
+The EM text contains the EM instructions.
+Every EM instruction has an operation code (opcode)
+and 0 or 1 operands.
+EM pseudo instructions can have more than
+1 operand.
+The opcode is just a small (8 bit) integer.
+.sp
+There are several kinds of operands, which we will
+refer to as
+.UL types.
+Many EM instructions can have more than one type of operand.
+The types and their encodings in Compact Assembly Language
+are discussed extensively in.
+.[~[
+keizer architecture 
+.], section 11.2]
+Of special interest is the way numeric values
+are represented.
+Of prime importance is the machine independency of
+the representation.
+Ultimately, one could store every integer
+just as a string of the characters '0' to '9'.
+As doing arithmetic on strings is awkward,
+Compact Assembly Language allows several alternatives.
+The main idea is to look at the value of the integer.
+Integers that fit in 16, 32 or 64 bits are
+represented as a row of resp. 2, 4 and 8 bytes,
+preceded by an indication of how many bytes are used.
+Longer integers are represented as strings;
+this is only allowed within pseudo instructions, however.
+This concept works very well for target machines
+with reasonable word sizes.
+At present, most ACK software cannot be used for word sizes
+higher than 32 bits,
+although the handles for using larger word sizes are
+present in the design of the EM code.
+In the intermediate code we essentially use the
+same ideas.
+We allow three representations of integers.
+.IP -
+integers that fit in a short are represented as a short
+.IP -
+integers that fit in a long but not in a short are represented
+as longs
+.IP -
+all remaining integers are represented as strings
+(only allowed in pseudos).
+.LP
+The terms short and long are defined in
+.[~[
+ritchie reference manual programming language
+.], section 4]
+and depend only on the source machine
+(i.e. the machine on which ACK runs),
+not on the target machines.
+For historical reasons a long will often be called an
+.UL offset.
+.PP
+Operands can also be instruction labels,
+objects or procedures.
+Instruction labels are denoted by a
+.UL label
+.UL identifier,
+which can be distinguished from a normal identifier.
+.sp
+The operand of a pseudo instruction can be a list of
+.UL arguments.
+Arguments can have the same type as operands, except
+for the type short, which is not used for arguments.
+Furthermore, an argument can be a string or
+a string representation of a signed integer, unsigned integer
+or floating point number.
+If the number of arguments is not fully determined by
+the pseudo instruction (e.g. a ROM pseudo can have any number
+of arguments), then the list is terminated by a special
+argument of type CEND.
+.DS
+.UL syntax
+  em_text:
+                {line} ;
+  line:
+                INSTR           -- opcode
+                OPTYPE          -- operand type
+                operand ;
+  operand:
+                empty |         -- OPTYPE = NO
+                SHORT |         -- OPTYPE = SHORT
+                OFFSET |        -- OPTYPE = OFFSET
+                LAB_ID |        -- OPTYPE = INSTRLAB
+                O_ID |          -- OPTYPE = OBJECT
+                P_ID |          -- OPTYPE = PROCEDURE
+                {argument} ;    -- OPTYPE = LIST
+  argument:
+                ARGTYPE
+                arg ;
+  arg:
+                empty |         -- ARGTYPE = CEND
+                OFFSET |
+                LAB_ID |
+                O_ID |
+                P_ID |
+                string |        -- ARGTYPE = STRING
+                const ;         -- ARGTYPE = ICON,UCON or FCON
+  string:
+                LENGTH          -- number of characters
+                {CHARACTER} ;
+  const:
+                SIZE            -- number of bytes
+                string ;        -- string representation of (un)signed
+                                -- or floating point constant
+.DE
+.NH 3
+The control flow graphs
+.PP
+Each procedure can be divided
+into a number of basic blocks.
+A basic block is a piece of code with
+no jumps in, except at the beginning,
+and no jumps out, except at the end.
+.PP
+Every basic block has a set of
+.UL successors,
+which are basic blocks that can follow it immediately in
+the dynamic execution sequence.
+The
+.UL predecessors
+are the basic blocks of which this one
+is a successor.
+The successor and predecessor attributes
+of all basic blocks of a single procedure
+are said to form the
+.UL control
+.UL flow
+.UL graph
+of that procedure.
+.PP
+Another important attribute is the
+.UL immediate
+.UL dominator.
+A basic block B dominates a block C if
+every path in the graph from the procedure entry block
+to C goes through B.
+The immediate dominator of C is the closest dominator
+of C on any path from the entry block.
+(Note that the dominator relation is transitive,
+so the immediate dominator is well defined.)
+.PP
+A basic block also has an attribute containing
+the identifiers of every
+.UL loop
+that the block belongs to (see next section for loops).
+.DS
+.UL syntax
+  control_flow_graph:
+                {basic_block} ;
+  basic_block:
+                B_ID            -- unique identifying number
+                #INSTR          -- number of EM instructions
+                succ
+                pred
+                idom            -- immediate dominator
+                loops           -- set of loops
+		FLAGS ;         -- flag bits
+  succ:
+                {B_ID} ;
+  pred:
+                {B_ID} ;
+  idom:
+                B_ID ;
+  loops:
+                {LP_ID} ;
+.DE
+The flag bits can have the values 'firm' and 'strong',
+which are explained below.
+.NH 3
+The loop tables
+.PP
+Every procedure has an associated
+.UL loop
+.UL table
+containing information about all the loops
+in the procedure.
+Loops can be detected by a close inspection of
+the control flow graph.
+The main idea is to look for two basic blocks,
+B and C, for which the following holds:
+.IP -
+B is a successor of C
+.IP -
+B is a dominator of C
+.LP
+B is called the loop
+.UL entry
+and C is called the loop
+.UL end.
+Intuitively, C contains a jump backwards to
+the beginning of the loop (B).
+.PP
+A loop L1 is said to be
+.UL nested
+within loop L2 if all basic blocks of L1
+are also part of L2.
+It is important to note that loops could
+originally be written as a well structured for -or
+while loop or as a messy goto loop.
+Hence loops may partly overlap without one
+being nested inside the other.
+The
+.UL nesting
+.UL level
+of a loop is the number of loops in
+which it is nested (so it is 0 for
+an outermost loop).
+The details of loop detection will be discussed later.
+.PP
+It is often desirable to know whether a
+basic block gets executed during every iteration
+of a loop.
+This leads to the following definitions:
+.IP -
+A basic block B of a loop L is said to be a \fIfirm\fR block
+of L if B is executed on all successive iterations of L,
+with the only possible exception of the last iteration.
+.IP -
+A basic block B of a loop L is said to be a \fIstrong\fR block
+of L if B is executed on all successive iterations of L.
+.LP
+Note that a strong block is also a firm block.
+If a block is part of a conditional statement, it is neither
+strong nor firm, as it may be skipped during some iterations
+(see Fig. 3.2).
+.DS
+loop
+       if cond1 then
+	      ... -- this code will not
+		  -- result in a firm or strong block
+       end if;
+       ...  -- strong (always executed)
+       exit when cond2;
+       ...  -- firm (not executed on
+            -- last iteration).
+end loop;
+
+Fig. 3.2 Example of firm and strong block
+.DE
+.DS
+.UL syntax
+  looptable:
+                {loop} ;
+  loop:
+                LP_ID           -- unique identifying number
+                LEVEL           -- loop nesting level
+                entry           -- loop entry block
+                end ;
+  entry:
+                B_ID ;
+  end:
+                B_ID ;
+.DE
--- a/doc/ego/ic/ic4
+++ b/doc/ego/ic/ic4
@@ -0,0 +1,80 @@
+.NH 2
+External representation of the intermediate code
+.PP
+The syntax of the intermediate code was given
+in the previous section.
+In this section we will make some remarks about
+the representation of the code in sequential files.
+.sp
+We use sequential files in order to avoid
+the bookkeeping of complex file indices.
+As a consequence of this decision
+we can't store all components
+of the intermediate code
+in one file.
+If a phase wishes to change some attribute
+of a procedure,
+or wants to add or delete entire procedures
+(inline substitution may do the latter),
+the procedure table will only be fully updated
+after the entire EM text has been scanned.
+Yet, the next phase undoubtedly wants
+to read the procedure table before it
+starts working on the EM text.
+Hence there is an ordering problem, which
+can be solved easily by putting the
+procedure table in a separate file.
+Similarly, the data block table is kept
+in a file of its own.
+.PP
+The control flow graphs (CFGs) could be mixed
+with the EM text.
+Rather, we have chosen to put them
+in a separate file too.
+The control flow graph file should be regarded as a
+file that imposes some structure on the EM-text file,
+just as an overhead sheet containing a picture
+of a Flow Chart may be put on an overhead sheet
+containing statements.
+The loop tables are also put in the CFG file.
+A loop imposes an extra structure on the
+CFGs and hence on the EM text.
+So there are four files:
+.IP -
+the EM-text file
+.IP -
+the procedure table file
+.IP -
+the object table file
+.IP -
+the CFG and loop tables file
+.LP
+Every table is preceded by its length, in order to
+tell where it ends.
+The CFG file also contains the number of instructions of
+every basic block,
+indicating which part of the EM text belongs
+to that block.
+.DS
+.UL syntax
+  intermediate_code:
+                object_table_file
+                proctable_file
+                em_text_file
+                cfg_file ;
+  object_table_file:
+                LENGTH          -- number of objects
+                object_table ;
+  proctable_file:
+                LENGTH          -- number of procedures
+                procedure_table ;
+  em_text_file:
+                em_text ;
+  cfg_file:
+                {per_proc} ;    -- one for every procedure
+  per_proc:
+                BLENGTH         -- number of basic blocks
+                LLENGTH         -- number of loops
+                control_flow_graph
+                looptable ;
+.DE
--- a/doc/ego/ic/ic5
+++ b/doc/ego/ic/ic5
@@ -0,0 +1,163 @@
+.NH 2
+The Intermediate Code construction phase
+.PP
+The first phase of the global optimizer,
+called
+.UL IC,
+constructs a major part of the intermediate code.
+To be specific, it produces:
+.IP -
+the EM text
+.IP -
+the object table
+.IP -
+part of the procedure table
+.LP
+The calling, change and use attributes of a procedure
+and all its flags except the external and bodyseen flags
+are computed by the next phase (Control Flow phase).
+.PP
+As explained before,
+the intermediate code does not contain
+any names of variables or procedures.
+The normal identifiers are replaced by identifying
+numbers.
+Yet, the output of the global optimizer must
+contain normal identifiers, as this
+output is in Compact Assembly Language format.
+We certainly want all externally visible names
+to be the same in the input as in the output,
+because the optimized EM module may be a library unit,
+used by other modules.
+IC dumps the names of all procedures and data labels
+on two files:
+.IP -
+the procedure dump file, containing tuples (P_ID, procedure name)
+.IP -
+the data dump file, containing tuples (D_ID, data label name)
+.LP
+The names of instruction labels are not dumped,
+as they are not visible outside the procedure
+in which they are defined.
+.PP
+The input to IC consists of one or more files.
+Each file is either an EM module in Compact Assembly Language
+format, or a Unix archive file (library) containing such modules.
+IC only extracts those modules from a library that are
+needed somehow, just as a linker does.
+It is advisable to present as much code
+of the EM program as possible to the optimizer,
+although it is not required to present the whole program.
+If a procedure is called somewhere in the EM text,
+but its body (text) is not included in the input,
+its bodyseen flag in the procedure table will still
+be off.
+Whenever such a procedure is called,
+we assume the worst case for everything;
+it will change and use all variables it has access to,
+it will call every procedure etc.
+.sp
+Similarly, if a data label is used
+but not defined, the PSEUDO attribute in its data block
+will be set to UNKNOWN.
+.NH 3
+Implementation
+.PP
+Part of the code for the EM Peephole Optimizer
+.[
+staveren peephole toplass
+.]
+has been used for IC.
+Especially the routines that read and unravel
+Compact Assembly Language and the identifier
+lookup mechanism have been used.
+New code was added to recognize objects,
+build the object and procedure tables and to
+output the intermediate code.
+.PP
+IC uses singly linked linear lists for both the
+procedure and object table.
+Hence there are no limits on the size of such
+a table (except for the trivial fact that it must fit
+in main memory).
+Both tables are outputted after all EM code has
+been processed.
+IC reads the EM text of one entire procedure
+at a time,
+processes it and appends the modified code to
+the EM text file.
+EM code is represented internally as a doubly linked linear
+list of EM instructions.
+.PP
+Objects are recognized by looking at the operands
+of instructions that reference global data.
+If we come across the instructions:
+.DS
+LDE X+6         -- Load Double External
+LAE X+20        -- Load Address External
+.DE
+we conclude that the data block
+preceded by the data label X contains an object
+at offset 6 of size twice the word size,
+and an object at offset 20 of unknown size.
+.sp
+A data block entry of the object table is allocated
+at the first reference to a data label.
+If this reference is a defining occurrence
+or a INA pseudo instruction,
+the label is not externally visible
+.[~[
+keizer architecture
+.], section 11.1.4.3]
+In this case, the external flag of the data block
+is turned off.
+If the first reference is an applied occurrence
+or a EXA pseudo instruction, the flag is set.
+We record this information, because the
+optimizer may change the order of defining and
+applied occurrences.
+The INA and EXA pseudos are removed from the EM text.
+They may be regenerated by the last phase
+of the optimizer.
+.sp
+Similar rules hold for the procedure table
+and the INP and EXP pseudos.
+.NH 3
+Source files of IC
+.PP
+The source files of IC consist
+of the files ic.c, ic.h and several packages.
+.UL ic.h
+contains type definitions, macros and
+variable declarations that may be used by
+ic.c and by every package.
+.UL ic.c
+contains the definitions of these variables,
+the procedure
+.UL main
+and some high level I/O routines used by main.
+.sp
+Every package xxx consists of two files.
+ic_xxx.h contains type definitions,
+macros, variable declarations and
+procedure declarations that may be used by
+every .c file that includes this .h file.
+The file ic_xxx.c provides the
+definitions of these variables and
+the implementation of the declared procedures.
+IC uses the following packages:
+.IP lookup: 18
+procedures that loop up procedure, data label
+and instruction label names; procedures to dump
+the procedure and data label names.
+.IP lib:
+one procedure that gets the next useful input module;
+while scanning archives, it skips unnecessary modules.
+.IP aux:
+several auxiliary routines.
+.IP io:
+low-level I/O routines that unravel the Compact
+Assembly Language.
+.IP put:
+routines that output the intermediate code
+.LP
--- a/doc/ego/il/.distr
+++ b/doc/ego/il/.distr
@@ -0,0 +1,6 @@
+il1
+il2
+il3
+il4
+il5
+il6
--- a/doc/ego/il/il1
+++ b/doc/ego/il/il1
@@ -0,0 +1,112 @@
+.bp
+.NH 1
+Inline substitution
+.NH 2
+Introduction
+.PP
+The Inline Substitution technique (IL)
+tries to decrease the overhead associated
+with procedure calls (invocations).
+During a procedure call, several actions
+must be undertaken to set up the right
+environment for the called procedure.
+.[
+johnson calling sequence
+.]
+On return from the procedure, most of these
+effects must be undone.
+This entire process introduces significant
+costs in execution time as well as
+in object code size.
+.PP
+The inline substitution technique replaces
+some of the calls by the modified body of
+the called procedure, hence eliminating
+the overhead.
+Furthermore, as the calling and called procedure
+are now integrated, they can be optimized
+together, using other techniques of the optimizer.
+This often leads to extra opportunities for
+optimization
+.[
+ball predicting effects
+.]
+.[
+carter code generation cacm
+.]
+.[
+scheifler inline cacm
+.]
+.PP
+An inline substitution of a call to a procedure P increases
+the size of the program, unless P is very small or P is
+called only once.
+In the latter case, P can be eliminated.
+In practice, procedures that are called only once occur
+quite frequently, due to the
+introduction of structured programming.
+(Carter
+.[
+carter umi ann arbor
+.]
+states that almost 50% of the Pascal procedures
+he analyzed were called just once).
+.PP
+Scheifler
+.[
+scheifler inline cacm
+.]
+has a more general view of inline substitution.
+In his model, the program under consideration is
+allowed to grow by a certain amount,
+i.e. code size is sacrificed to speed up the program.
+The above two cases are just special cases of
+his model, obtained by setting the size-change to
+(approximately) zero.
+He formulates the substitution problem as follows:
+.IP
+"Given a program, a subset of all invocations,
+a maximum program size, and a maximum procedure size,
+find a sequence of substitutions that minimizes
+the expected execution time."
+.LP
+Scheifler shows that this problem is NP-complete
+.[~[
+aho hopcroft ullman analysis algorithms
+.], chapter 10]
+by reduction to the Knapsack Problem.
+Heuristics will have to be used to find a near-optimal
+solution.
+.PP
+In the following chapters we will extend
+Scheifler's view and adapt it to the EM Global Optimizer.
+We will first describe the transformations that have
+to be applied to the EM text when a call is substituted
+in line.
+Next we will examine in which cases inline substitution
+is not possible or desirable.
+Heuristics will be developed for
+chosing a good sequence of substitutions.
+These heuristics make no demand on the user
+(such as making profiles
+.[
+scheifler inline cacm
+.]
+or giving pragmats
+.[~[
+ichbiah ada military standard
+.], section 6.3.2]),
+although the model could easily be extended
+to use such information.
+Finally, we will discuss the implementation
+of the IL phase of the optimizer.
+.PP
+We will often use the term inline expansion
+as a synonym of inline substitution.
+.sp 0
+The inverse technique of procedure abstraction
+(automatic subroutine generation)
+.[
+shaffer subroutine generation
+.]
+will not be discussed in this report.
--- a/doc/ego/il/il2
+++ b/doc/ego/il/il2
@@ -0,0 +1,93 @@
+.NH 2
+Parameters and local variables.
+.PP
+In the EM calling sequence, the calling procedure
+pushes its parameters on the stack
+before doing the CAL.
+The called routine first saves some
+status information on the stack and then
+allocates space for its own locals
+(also on the stack).
+Usually, one special purpose register,
+the Local Base (LB) register,
+is used to access both the locals and the
+parameters.
+If memory is highly segmented,
+the stack frames of the caller and the callee
+may be allocated in different fragments;
+an extra Argument Base (AB) register is used
+in this case to access the actual parameters.
+See 4.2 of
+.[
+keizer architecture
+.]
+for further details.
+.PP
+If a procedure call is expanded in line,
+there are two problems:
+.IP 1. 3
+No stack frame will be allocated for the called procedure;
+we must find another place to put its locals.
+.IP 2.
+The LB register cannot be used to access the actual
+parameters;
+as the CAL instruction is deleted, the LB will
+still point to the local base of the \fIcalling\fR procedure.
+.LP
+The local variables of the called procedure will
+be put in the stack frame of the calling procedure,
+just after its own locals.
+The size of the stack frame of the
+calling procedure will be increased
+during its entire lifetime.
+Therefore our model will allow a
+limit to be set on the number of bytes
+for locals that the called procedure may have
+(see next section).
+.PP
+There are several alternatives to access the parameters.
+An actual parameter may be any auxiliary expression,
+which we will refer to as
+the \fIactual parameter expression\fR.
+The value of this expression is stored
+in a location on the stack (see above),
+the \fIparameter location\fR.
+.sp 0
+The alternatives for accessing parameters are:
+.IP -
+save the value of the stackpointer at the point of the CAL
+in a temporary variable X;
+this variable can be used to simulate the AB register,  i.e.
+parameter locations are accessed via an offset to
+the value of X.
+.IP -
+create a new temporary local variable T for
+the parameter (in the stack frame of the caller);
+every access to the parameter location must be changed
+into an access to T.
+.IP -
+do not evaluate the actual parameter expression before the call;
+instead, substitute this expression for every use of the
+parameter location.
+.LP
+The first method may be expensive if X is not
+put in a register.
+We will not use this method.
+The time required to evaluate and access the
+parameters when the second method is used
+will not differ much from the normal
+calling sequence (i.e. not in line call).
+It is not expensive, but there are no
+extra savings either.
+The third method is essentially the 'by name'
+parameter mechanism of Algol60.
+If the actual parameter is just a numeric constant,
+it is advantageous to use it.
+Yet, there are several circumstances
+under which it cannot or should not be used.
+We will deal with this in the next section.
+.sp 0
+In general we will use the third method,
+if it is possible and desirable.
+Such parameters will be called \fIin line parameters\fR.
+In all other cases we will use the second method.
--- a/doc/ego/il/il3
+++ b/doc/ego/il/il3
@@ -0,0 +1,164 @@
+.NH 2
+Feasibility and desirability analysis
+.PP
+Feasibility and desirability analysis
+of in line substitution differ
+somewhat from most other techniques.
+Usually, much effort is needed to find
+a feasible opportunity for optimization
+(e.g. a redundant subexpression).
+Desirability analysis then checks
+if it is really advantageous to do
+the optimization.
+For IL, opportunities are easy to find.
+To see if an in line expansion is
+desirable will not be hard either.
+Yet, the main problem is to find the most
+desirable ones.
+We will deal with this problem later and
+we will first attend feasibility and
+desirability analysis.
+.PP
+There are several reasons why a procedure invocation
+cannot or should not be expanded in line.
+.sp
+A call to a procedure P cannot be expanded in line
+in any of the following cases:
+.IP 1. 3
+The body of P is not available as EM text.
+Clearly, there is no way to do the substitution.
+.IP 2.
+P, or any procedure called by P (transitively),
+follows the chain of statically enclosing
+procedures (via a LXL or LXA instruction)
+or follows the chain of dynamically enclosing
+procedures (via a DCH).
+If the call were expanded in line,
+one level would be removed from the chains,
+leading to total chaos.
+This chaos could be solved by patching up
+every LXL, LXA or DCH in all procedures
+that could be part of the chains,
+but this is hard to implement.
+.IP 3.
+P, or any procedure called by P (transitively),
+calls a procedure whose body is not
+available as EM text.
+The unknown procedure may use an LXL, LXA or DCH.
+However, in several languages a separately
+compiled procedure has no access to the
+static or dynamic chain.
+In this case
+this point does not apply.
+.IP 4.
+P, or any procedure called by P (transitively),
+uses the LPB instruction, which converts a
+local base to an argument base;
+as the locals and parameters are stored
+in a non-standard way (differing from the
+normal EM calling sequence) this instruction
+would yield incorrect results.
+.IP 5.
+The total number of bytes of the parameters
+of P is not known.
+P may be a procedure with a variable number
+of parameters or may have an array of dynamic size
+as value parameter.
+.LP
+It is undesirable to expand a call to a procedure P in line
+in any of the following cases:
+.IP 1. 3
+P is large, i.e. the number of EM instructions
+of P exceeds some threshold.
+The expanded code would be large too.
+Furthermore, several programs in ACK,
+including the global optimizer itself,
+may run out of memory if they they have to run
+in a small address space and are provided
+very large procedures.
+The threshold may be set to infinite,
+in which case this point does not apply.
+.IP 2.
+P has many local variables.
+All these variables would have to be allocated
+in the stack frame of the calling procedure.
+.PP
+If a call may be expanded in line, we have to
+decide how to access its parameters.
+In the previous section we stated that we would
+use in line parameters whenever possible and desirable.
+There are several reasons why a parameter
+cannot or should not be expanded in line.
+.sp
+No parameter of a procedure P can be expanded in line,
+in any of the following cases:
+.IP 1. 3
+P, or any procedure called by P (transitively),
+does a store-indirect or a use-indirect (i.e. through
+a pointer).
+However, if the front-end has generated messages
+telling that certain parameters can not be accessed
+indirectly, those parameters may be expanded in line.
+.IP 2.
+P, or any procedure called by P (transitively),
+calls a procedure whose body is not available as EM text.
+The unknown procedure may do a store-indirect
+or a use-indirect.
+However, the same remark about front-end messages
+as for 1. holds here.
+.IP 3.
+The address of a parameter location is taken (via a LAL).
+In the normal calling sequence, all parameters
+are stored sequentially. If the address of one
+parameter location is taken, the address of any
+other parameter location can be computed from it.
+Hence we must put every parameter in a temporary location;
+furthermore, all these locations must be in
+the same order as for the normal calling sequence.
+.IP 4.
+P has overlapping parameters; for example, it uses
+the parameter at offset 10 both as a 2 byte and as a 4 byte
+parameter.
+Such code may be produced by the front ends if
+the formal parameter is of some record type
+with variants.
+.PP
+Sometimes a specific parameter must not be expanded in line.
+.sp 0
+An actual parameter expression cannot be expanded in line
+in any of the following cases:
+.IP 1. 3
+P stores into the parameter location.
+Even if the actual parameter expression is a simple
+variable, it is incorrect to change the 'store into
+formal' into a 'store into actual', because of
+the parameter mechanism used.
+In Pascal, the following expansion is incorrect:
+.DS
+procedure p (x:integer);
+begin
+   x := 20;
+end;
+...
+a := 10;                a := 10;
+p(a);        --->       a := 20;
+write(a);               write(a);
+.DE
+.IP 2.
+P changes any of the operands of the
+actual parameter expression.
+If the expression is expanded and evaluated
+after the operand has been changed,
+the wrong value will be used.
+.IP 3.
+The actual parameter expression has side effects.
+It must be evaluated only once,
+at the place of the call.
+.LP
+It is undesirable to expand an actual parameter in line
+in the following case:
+.IP 1. 3
+The parameter is used more than once
+(dynamically) and the actual parameter expression
+is not just a simple variable or constant.
+.LP
--- a/doc/ego/il/il4
+++ b/doc/ego/il/il4
@@ -0,0 +1,132 @@
+.NH 2
+Heuristic rules
+.PP
+Using the information described
+in the previous section,
+we can find all calls that can
+be expanded in line, and for which
+this expansion is desirable.
+In general, we cannot expand all these calls,
+so we have to choose the 'best' ones.
+With every CAL instruction
+that may be expanded, we associate
+a \fIpay off\fR,
+which expresses how desirable it is
+to expand this specific CAL.
+.sp
+Let Tc denote the portion of EM text involved
+in a specific call, i.e. the pushing of the actual
+parameter expressions, the CAL itself,
+the popping of the parameters and the
+pushing of the result (if any, via an LFR).
+Let Te denote the EM text that would be obtained
+by expanding the call in line.
+Let Pc be the original program and Pe the program
+with Te substituted for Tc.
+The pay off of the CAL depends on two factors:
+.IP -
+T = execution_time(Pe) - execution_time(Pc)
+.IP -
+S = code_size(Pe) - code_size(Pc)
+.LP
+The change in execution time (T) depends on:
+.IP -
+T1 = execution_time(Te) - execution_time(Tc)
+.IP -
+N = number of times Te or Tc get executed.
+.LP
+We assume that T1 will be the same every
+time the code gets executed.
+This is a reasonable assumption.
+(Note that we are talking about one CAL,
+not about different calls to the same procedure).
+Hence
+.DS
+T = N * T1
+.DE
+T1 can be estimated by a careful analysis
+of the transformations that are performed.
+Below, we list everything that will be
+different when a call is expanded in line:
+.IP -
+The CAL instruction is not executed.
+This saves a subroutine jump.
+.IP -
+The instructions in the procedure prolog
+are not executed.
+These instructions, generated from the PRO pseudo,
+save some machine registers 
+(including the old LB), set the new LB and allocate space
+for the locals of the called routine.
+The savings may be less if there are no
+locals to allocate.
+.IP -
+In line parameters are not evaluated before the call
+and are not pushed on the stack.
+.IP -
+All remaining parameters are stored in local variables,
+instead of being pushed on the stack.
+.IP -
+If the number of parameters is nonzero,
+the ASP instruction after the CAL is not executed.
+.IP -
+Every reference to an in line parameter is
+substituted by the parameter expression.
+.IP -
+RET (return) instructions are replaced by
+BRA (branch) instructions.
+If the called procedure 'falls through'
+(i.e. it has only one RET, at the end of its code),
+even the BRA is not needed.
+.IP -
+The LFR (fetch function result) is not executed
+.PP
+Besides these changes, which are caused directly by IL,
+other changes may occur as IL influences other optimization
+techniques, such as Register Allocation and Constant Propagation.
+Our heuristic rules do not take into account the quite
+inpredictable effects on Register Allocation.
+It does, however, favour calls that have numeric \fIconstants\fR
+as parameter; especially the constant "0" as an inline
+parameter gets high scores,
+as further optimizations may often be possible.
+.PP
+It cannot be determined statically how often a CAL instruction gets
+executed.
+We will use \fIloop nesting\fR information here.
+The nesting level of the loop in which
+the CAL appears (if any) will be used as an
+indication for the number of times it gets executed.
+.PP
+Based on all these facts,
+the pay off of a call will be computed.
+The following model was developed empirically.
+Assume procedure P calls procedure Q.
+The call takes place in basic block B.
+.DS
+ZP = # zero parameters
+CP = # constant parameters - ZP
+LN = Loop Nesting level (0 if outside any loop)
+F  = \fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0
+FT = \fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0
+S  = size(Q) - 1 - # inline_parameters - F
+L  = \fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1
+A  = CP + 2 * ZP
+N  = \fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2
+FM = \fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1
+
+pay_off = (100/S + FT + F + L + A) * N * FM
+.DE
+S stands for the size increase of the program,
+which is slightly less than the size of Q.
+The size of a procedure is taken to be its number
+of (non-pseudo) EM instructions.
+The terms "loop nesting level" and "firm" were defined
+in the chapter on the Intermediate Code (section "loop tables").
+If a call is not inside a loop and the calling procedure
+is itself never called from a loop (transitively),
+then the call will probably be executed at most once.
+Such a call is never expanded in line (its pay off is zero).
+If the calling procedure doesn't have local variables, a penalty (L)
+is introduced, as it will most likely get local variables if the
+call gets expanded.
--- a/doc/ego/il/il5
+++ b/doc/ego/il/il5
@@ -0,0 +1,440 @@
+.NH 2
+Implementation
+.PP
+A major factor in the implementation
+of Inline Substitution is the requirement
+not to use an excessive amount of memory.
+IL essentially analyzes the entire program;
+it makes decisions based on which procedure calls
+appear in the whole program.
+Yet, because of the memory restriction, it is
+not feasible to read the entire program
+in main memory.
+To solve this problem, the IL phase has been
+split up into three subphases that are executed sequentially:
+.IP 1.
+analyze every procedure; see how it accesses its parameters;
+simultaneously collect all calls
+appearing in the whole program an put them
+in a \fIcall-list\fR.
+.IP 2.
+use the call-list and decide which calls will be substituted
+in line.
+.IP 3.
+take the decisions of subphase 2 and modify the
+program accordingly.
+.LP
+Subphases 1 and 3 scan the input program; only
+subphase 3 modifies it.
+It is essential that the decisions can be made
+in subphase 2
+without using the input program,
+provided that subphase 1 puts enough information
+in the call-list.
+Subphase 2 keeps the entire call-list in main memory
+and repeatedly scans it, to
+find the next best candidate for expansion.
+.PP
+We will specify the
+data structures used by IL before 
+describing the subphases.
+.NH 3
+Data structures
+.NH 4
+The procedure table
+.PP
+In subphase 1 information is gathered about every procedure
+and added to the procedure table.
+This information is used by the heuristic rules.
+A proctable entry for procedure p has
+the following extra information:
+.IP -
+is it allowed to substitute an invocation of p in line?
+.IP -
+is it allowed to put any parameter of such a call in line?
+.IP -
+the size of p (number of EM instructions)
+.IP -
+does p 'fall through'?
+.IP -
+a description of the formal parameters that p accesses; this information
+is obtained by looking at the code of p. For every parameter f,
+we record:
+.RS
+.IP -
+the offset of f
+.IP -
+the type of f (word, double word, pointer)
+.IP -
+may the corresponding actual parameter be put in line?
+.IP -
+is f ever accessed indirectly?
+.IP -
+if f used: never, once or more than once?
+.RE
+.IP -
+the number of times p is called (see below)
+.IP -
+the file address of its call-count information (see below).
+.LP
+.NH 4
+Call-count information
+.PP
+As a result of Inline Substitution, some procedures may
+become useless, because all their invocations have been
+substituted in line.
+One of the tasks of IL is to keep track which
+procedures are no longer called.
+Note that IL is especially keen on procedures that are
+called only once
+(possibly as a result of expanding all other calls to it).
+So we want to know how many times a procedure
+is called \fIduring\fR Inline Substitution.
+It is not good enough to compute this
+information afterwards.
+The task is rather complex, because
+the number of times a procedure is called
+varies during the entire process:
+.IP 1.
+If a call to p is substituted in line,
+the number of calls to p gets decremented by 1.
+.IP 2.
+If a call to p is substituted in line,
+and p contains n calls to q, then the number of calls to q
+gets incremented by n.
+.IP 3.
+If a procedure p is removed (because it is no
+longer called) and p contains n calls to q,
+then the number of calls to q gets decremented by n.
+.LP
+(Note that p may be the same as q, if p is recursive).
+.sp 0
+So we actually want to have the following information:
+.DS
+NRCALL(p,q) = number of call to q appearing in p,
+
+for all procedures p and q that may be put in line.
+.DE
+This information, called \fIcall-count information\fR is
+computed by the first subphase.
+It is stored in a file.
+It is represented as a number of lists, rather than as
+a (very sparse) matrix.
+Every procedure has a list of (proc,count) pairs,
+telling which procedures it calls, and how many times.
+The file address of its call-count list is stored
+in its proctable entry.
+Whenever this information is needed, it is fetched from
+the file, using direct access.
+The proctable entry also contains the number of times
+a procedure is called, at any moment.
+.NH 4
+The call-list
+.PP
+The call-list is the major data structure use by IL.
+Every item of the list describes one procedure call.
+It contains the following attributes:
+.IP -
+the calling procedure (caller)
+.IP -
+the called procedure (callee)
+.IP -
+identification of the CAL instruction (sequence number)
+.IP -
+the loop nesting level; our heuristic rules appreciate
+calls inside a loop (or even inside a loop nested inside
+another loop, etc.) more than other calls
+.IP -
+the actual parameter expressions involved in the call;
+for every actual, we record:
+.RS
+.IP -
+the EM code of the expression
+.IP -
+the number of bytes of its result (size)
+.IP -
+an indication if the actual may be put in line
+.RE
+.LP
+The structure of the call-list is rather complex.
+Whenever a call is expanded in line, new calls
+will suddenly appear in the program,
+that were not contained in the original body
+of the calling subroutine.
+These calls are inherited from the called procedure.
+We will refer to these invocations as \fInested calls\fR
+(see Fig. 5.1).
+.DS
+procedure p is
+begin                           .
+     a();                       .
+     b();                       .
+end;
+
+procedure r is            procedure r is
+begin                     begin
+     x();                      x();
+     p();  -- in line          a();  -- nested call
+     y();                      b();  -- nested call
+end;                           y();
+                          end;
+
+Fig. 5.1 Example of nested procedure calls
+.DE
+Nested calls may subsequently be put in line too
+(probably resulting in a yet deeper nesting level, etc.).
+So the call-list does not always reflect the source program,
+but changes dynamically, as decisions are made.
+If a call to p is expanded, all calls appearing in p
+will be added to the call-list.
+.sp 0
+A convenient and elegant way to represent
+the call-list is to use a LISP-like list.
+.[
+poel lisp trac
+.]
+Calls that appear at the same level
+are linked in the CDR direction. If a call C
+to a procedure p is expanded,
+all calls appearing in p are put in a sub-list
+of C, i.e. in its CAR.
+In the example above, before the decision
+to expand the call to p is made, the
+call-list of procedure r looks like:
+.DS
+(call-to-x, call-to-p, call-to-y)
+.DE
+After the decision, it looks like:
+.DS
+(call-to-x, (call-to-p*, call-to-a, call-to-b), call-to-y)
+.DE
+The call to p is marked, because it has been
+substituted.
+Whenever IL wants to traverse the call-list of some procedure,
+it uses the well-known LISP technique of
+recursion in the CAR direction and
+iteration in the CDR direction
+(see page 1.19-2 of
+.[
+poel lisp trac
+.]
+).
+All list traversals look like:
+.DS
+traverse(list)
+{
+    for (c = first(list); c != 0; c = CDR(c)) {
+	if (c is marked) {
+	    traverse(CAR(c));
+	} else {
+	    do something with c
+	}
+    }
+}
+.DE
+The entire call-list consists of a number of LISP-like lists,
+one for every procedure.
+The proctable entry of a procedure contains a pointer
+to the beginning of the list.
+.NH 3
+The first subphase: procedure analysis
+.PP
+The tasks of the first subphase are to determine
+several attributes of every procedure
+and to construct the basic call-list,
+i.e. without nested calls.
+The size of a procedure is determined
+by simply counting its EM instructions.
+Pseudo instructions are skipped.
+A procedure does not 'fall through' if its CFG
+contains a basic block
+that is not the last block of the CFG and
+that ends on a RET instruction.
+The formal parameters of a procedure are determined
+by inspection of
+its code.
+.PP
+The call-list in constructed by looking at all CAL instructions
+appearing in the program.
+The call-list should only contain calls to procedures
+that may be put in line.
+This fact is only known if the procedure was
+analyzed earlier.
+If a call to a procedure p appears in the program
+before the body of p,
+the call will always be put in the call-list.
+If p is later found to be unsuitable,
+the call will be removed from the list by the
+second subphase.
+.PP
+An important issue is the recognition
+of the actual parameter expressions of the call.
+The front ends produces messages telling how many
+bytes of formal parameters every procedure accesses.
+(If there is no such message for a procedure, it
+cannot be put in line).
+The actual parameters together must account for
+the same number of bytes.A recursive descent parser is used
+to parse side-effect free EM expressions.
+It uses a table and some
+auxiliary routines to determine
+how many bytes every EM instruction pops from the stack
+and how many bytes it pushes onto the stack.
+These numbers depend on the EM instruction, its argument,
+and the wordsize and pointersize of the target machine.
+Initially, the parser has to recognize the
+number of bytes specified in the formals-message,
+say N.
+Assume the first instruction before the CAL pops S bytes
+and pushes R bytes.
+If R > N, too many bytes are recognized
+and the parser fails.
+Else, it calls itself recursively to recognize the
+S bytes used as operand of the instruction.
+If it succeeds in doing so, it continues with the next instruction,
+i.e. the first instruction before the code recognized by
+the recursive call, to recognize N-R more bytes.
+The result is a number of EM instructions that collectively push N bytes.
+If an instruction is come across that has side-effects
+(e.g. a store or a procedure call) or of which R and S cannot
+be computed statically (e.g. a LOS), it fails.
+.sp 0
+Note that the parser traverses the code backwards.
+As EM code is essentially postfix code, the parser works top down.
+.PP
+If the parser fails to recognize the parameters, the call will not
+be substituted in line.
+If the parameters can be determined, they still have to
+match the formal parameters of the called procedure.
+This check is performed by the second subphase; it cannot be
+done here, because it is possible that the called
+procedure has not been analyzed yet.
+.PP
+The entire call-list is written to a file,
+to be processed by the second subphase.
+.NH 3
+The second subphase: making decisions
+.PP
+The task of the second subphase is quite easy
+to understand.
+It reads the call-list file,
+builds an incore call-list and deletes every
+call that may not be expanded in line (either because the called
+procedure may not be put in line, or because the actual parameters
+of the call do not match the formal parameters of the called procedure).
+It assigns a \fIpay-off\fR to every call,
+indicating how desirable it is to expand it.
+.PP
+The subphase repeatedly scans the call-list and takes
+the call with the highest ratio.
+The chosen one gets marked,
+and the call-list is extended with the nested calls,
+as described above.
+These nested calls are also assigned a ratio,
+and will be considered too during the next scans.
+.sp 0
+After every decision the number of times
+every procedure is called is updated, using
+the call-count information.
+Meanwhile, the subphase keeps track of the amount of space left
+available.
+If all space is used, or if there are no more calls left to
+be expanded, it exits this loop.
+Finally, calls to procedures that are called only
+once are also chosen.
+.PP
+The actual parameters of a call are only needed by
+this subphase to assign a ratio to a call.
+To save some space, these actuals are not kept in main memory.
+They are removed after the call has been read and a ratio
+has been assigned to it.
+So this subphase works with \fIabstracts\fR of calls.
+After all work has been done,
+the actual parameters of the chosen calls are retrieved
+from a file,
+as they are needed by the transformation subphase.
+.NH 3
+The third subphase: doing transformations
+.PP
+The third subphase makes the actual modifications to
+the EM text.
+It is directed by the decisions made in the previous subphase,
+as expressed via the call-list.
+The call-list read by this subphase contains
+only calls that were selected for expansion.
+The list is ordered in the same way as the EM text,
+i.e. if a call C1 appears before a call C2 in the call-list,
+C1 also appears before C2 in the EM text.
+So the EM text is traversed linearly,
+the calls that have to be substituted are determined
+and the modifications are made.
+If a procedure is come across that is no longer needed,
+it is simply not written to the output EM file.
+The substitution of a call takes place in distinct steps:
+.IP "change the calling sequence" 7
+.sp 0
+The actual parameter expressions are changed.
+Parameters that are put in line are removed.
+All remaining ones must store their result in a
+temporary local variable, rather than
+push it on the stack.
+The CAL instruction and any ASP (to pop actual parameters)
+or LFR (to fetch the result of a function)
+are deleted.
+.IP "fetch the text of the called procedure"
+.sp 0
+Direct disk access is used to to read the text of the
+called procedure.
+The file offset is obtained from the proctable entry.
+.IP "allocate bytes for locals and temporaries"
+.sp 0
+The local variables of the called procedure will be put in the
+stack frame of the calling procedure.
+The same applies to any temporary variables
+that hold the result of parameters
+that were not put in line.
+The proctable entry of the caller is updated.
+.IP "put a label after the CAL"
+.sp 0
+If the called procedure contains a RET (return) instruction
+somewhere in the middle of its text (i.e. it does
+not fall through), the RET must be changed into
+a BRA (branch), to jump over the
+remainder of the text.
+This label is not needed if the called
+procedure falls through.
+.IP "copy the text of the called procedure and modify it"
+.sp 0
+References to local variables of the called routine
+and to parameters that are not put in line
+are changed to refer to the
+new local of the caller.
+References to in line parameters are replaced
+by the actual parameter expression.
+Returns (RETs) are either deleted or
+replaced by a BRA.
+Messages containing information about local
+variables or parameters are changed.
+Global data declarations and the PRO and END pseudos
+are removed.
+Instruction labels and references to them are
+changed to make sure they do not have the
+same identifying number as
+labels in the calling procedure.
+.IP "insert the modified text"
+.sp 0
+The pseudos of the called procedure are put after the pseudos
+of the calling procedure.
+The real text of the callee is put at
+the place where the CAL was.
+.IP "take care of nested substitutions"
+.sp 0
+The expanded procedure may contain calls that
+have to be expanded too (nested calls).
+If the descriptor of this call contains actual
+parameter expressions,
+the code of the expressions has to be changed
+the same way as the code of the callee was changed.
+Next, the entire process of finding CALs and doing
+the substitutions is repeated recursively.
+.LP
--- a/doc/ego/il/il6
+++ b/doc/ego/il/il6
@@ -0,0 +1,27 @@
+.NH 2
+Source files of IL
+.PP
+The sources of IL are in the following files
+and packages (the prefixes 1_, 2_ and 3_ refer to the three subphases):
+.IP il.h: 14
+declarations of global variables and
+data structures
+.IP il.c:
+the routine main; the driving routines of the three subphases
+.IP 1_anal:
+contains a subroutine that analyzes a procedure
+.IP 1_cal:
+contains a subroutine that analyzes a call
+.IP 1_aux:
+implements auxiliary procedures used by subphase 1
+.IP 2_aux:
+implements auxiliary procedures used by subphase 2
+.IP 3_subst:
+the driving routine for doing the substitution
+.IP 3_change:
+lower level routines that do certain modifications
+.IP 3_aux:
+implements auxiliary procedures used by subphase 3
+.IP aux
+implements auxiliary procedures used by several subphases.
+.LP
--- a/doc/ego/intro/.distr
+++ b/doc/ego/intro/.distr
@@ -0,0 +1,3 @@
+head
+intro1
+tail
--- a/doc/ego/intro/head
+++ b/doc/ego/intro/head
@@ -0,0 +1,7 @@
+.ND
+.ll 80m
+.nr LL 80m
+.nr tl 78m
+.tr ~ 
+.ds >. .
+.ds [. " \[
--- a/doc/ego/intro/intro1
+++ b/doc/ego/intro/intro1
@@ -0,0 +1,79 @@
+.TL
+The design and implementation of
+the EM Global Optimizer
+.AU
+H.E. Bal
+.AI
+Vrije Universiteit
+Wiskundig Seminarium, Amsterdam
+.AB
+The EM Global Optimizer is part of the Amsterdam Compiler Kit,
+a toolkit for making retargetable compilers.
+It optimizes the intermediate code common to all compilers of
+the toolkit (EM),
+so it can be used for all programming languages and
+all processors supported by the kit.
+.PP
+The optimizer is based on well-understood concepts like
+control flow analysis and data flow analysis.
+It performs the following optimizations:
+Inline Substitution, Strength Reduction, Common Subexpression Elimination,
+Stack Pollution, Cross Jumping, Branch Optimization, Copy Propagation,
+Constant Propagation, Dead Code Elimination and Register Allocation.
+.PP
+This report describes the design of the optimizer and several
+of its implementation issues.
+.AE
+.bp
+.NH 1
+Introduction
+.PP
+.FS
+This work was supported by the
+Stichting Technische Wetenschappen (STW)
+under grant VWI00.0001.
+.FE
+The EM Global Optimizer is part of a software toolkit
+for making production-quality retargetable compilers.
+This toolkit,
+called the Amsterdam Compiler Kit
+.[
+tanenbaum toolkit rapport
+.]
+.[
+tanenbaum toolkit cacm
+.]
+runs under the Unix*
+.FS
+*Unix is a Trademark of Bell Laboratories
+.FE
+operating system.
+.sp 0
+The main design philosophy of the toolkit is to use
+a language- and machine-independent
+intermediate code, called EM.
+.[
+keizer architecture
+.]
+The basic compilation process can be split up into
+two parts.
+A language-specific front end translates the source program into EM.
+A machine-specific back end transforms EM to assembly code
+of the target machine.
+.PP
+The global optimizer is an optional phase of the
+compilation process, and can be used to obtain
+machine code of a higher quality.
+The optimizer transforms EM-code to better EM-code,
+so it comes between the front end and the back end.
+It can be used with any combination of languages
+and machines, as far as they are supported by
+the compiler kit.
+.PP
+This report describes the design of the
+global optimizer and several of its
+implementation issues.
+Measurements can be found in.
+.[
+bal tanenbaum global
+.]
--- a/doc/ego/intro/tail
+++ b/doc/ego/intro/tail
@@ -0,0 +1,3 @@
+.[
+$LIST$
+.]
--- a/doc/ego/lv/.distr
+++ b/doc/ego/lv/.distr
@@ -0,0 +1 @@
+lv1
--- a/doc/ego/lv/lv1
+++ b/doc/ego/lv/lv1
@@ -0,0 +1,95 @@
+.bp
+.NH 1
+Live-Variable analysis
+.NH 2
+Introduction
+.PP
+The "Live-Variable analysis" optimization technique (LV)
+performs some code improvements and computes information that may be
+used by subsequent optimizations.
+The main task of this phase is the 
+computation of \fIlive-variable information\fR.
+.[~[
+aho compiler design
+.] section 14.4]
+A variable A is said to be \fIdead\fR at some point p of the
+program text, if on no path in the control flow graph
+from p to a RET (return), A can be used before being changed;
+else A is said to be \fIlive\fR. 
+.PP
+A statement of the form
+.DS
+VARIABLE := EXPRESSION
+.DE
+is said to be dead if the left hand side variable is dead just after
+the statement and the right hand side expression has no
+side effects (i.e. it doesn't change any variable).
+Such a statement can be eliminated entirely.
+Dead code will seldom be present in the original program,
+but it may be the result of earlier optimizations,
+such as copy propagation.
+.PP
+Live-variable information is passed to other phases via
+messages in the EM code.
+Live/dead messages are generated at points in the EM text where
+variables become dead or live.
+This information is especially useful for the Register
+Allocation phase.
+.NH 2
+Implementation
+.PP
+The implementation uses algorithm 14.6 of.
+.[
+aho compiler design
+.]
+First two sets DEF and USE are computed for every basic block b:
+.IP DEF(b) 9
+the set of all variables that are assigned a value in b before
+being used
+.IP USE(b) 9
+the set of all variables that may be used in b before being changed.
+.LP
+(So variables that may, but need not, be used resp. changed via a procedure
+call or through a pointer are included in USE but not in DEF).
+The next step is to compute the sets IN and OUT :
+.IP IN[b] 9
+the set of all variables that are live at the beginning of b
+.IP OUT[b] 9
+the set of all variables that are live at the end of b
+.LP
+IN and OUT can be computed for all blocks simultaneously by solving the
+data flow equations:
+.DS
+(1)   IN[b] = OUT[b] - DEF[b] + USE[b]
+[2]   OUT[b] = IN[s1] + ... + IN[sn] ;
+	where SUCC[b] = {s1, ... , sn}
+.DE
+The equations are solved by a similar algorithm as for
+the Use Definition equations (see previous chapter).
+.PP
+Finally, each basic block is visited in turn to remove its dead code
+and to emit the live/dead messages.
+Every basic block b is traversed from its last
+instruction backwards to the beginning of b.
+Initially, all variables that are dead at the end
+of b are marked dead. All others are marked live.
+If we come across an assignment to a variable X that
+was marked live, a live-message is put after the
+assignment and X is marked dead;
+if X was marked dead, the assignment may be removed, provided that
+the right hand side expression contains no side effects.
+If we come across a use of a variable X that
+was marked dead, a dead-message is put after the
+use and X is marked live.
+So at any point, the mark of X tells whether X is
+live or dead immediately before that point.
+A message is also generated at the start of a basic block
+for every variable that was live at the end of the (textually)
+previous block, but dead at the entry of this block, or v.v.
+.PP
+Only local variables are considered.
+This significantly reduces the memory needed by this phase,
+eases the implementation and is hardly less efficient than
+considering all variables.
+(Note that it is very hard to prove that an assignment to
+a global variable is dead).
--- a/doc/ego/ov/.distr
+++ b/doc/ego/ov/.distr
@@ -0,0 +1 @@
+ov1
--- a/doc/ego/ov/ov1
+++ b/doc/ego/ov/ov1
@@ -0,0 +1,371 @@
+.bp
+.NH 1
+Overview of the global optimizer
+.NH 2
+The ACK compilation process
+.PP
+The EM Global Optimizer is one of three optimizers that are
+part of the Amsterdam Compiler Kit (ACK).
+The phases of ACK are:
+.IP 1.
+A Front End translates a source program to EM
+.IP 2.
+The Peephole Optimizer
+.[
+tanenbaum staveren peephole toplass
+.]
+reads EM code and produces 'better' EM code.
+It performs a number of optimizations (mostly peephole
+optimizations)
+such as constant folding, strength reduction and unreachable code
+elimination.
+.IP 3.
+The Global Optimizer further improves the EM code.
+.IP 4.
+The Code Generator transforms EM to assembly code
+of the target computer.
+.IP 5.
+The Target Optimizer improves the assembly code.
+.IP 6.
+An Assembler/Loader generates an executable file.
+.LP
+For a more extensive overview of the ACK compilation process,
+we refer to.
+.[
+tanenbaum toolkit rapport
+.]
+.[
+tanenbaum toolkit cacm
+.]
+.PP
+The input of the Global Optimizer may consist of files and
+libraries.
+Every file or module in the library must contain EM code in
+Compact Assembly Language format.
+.[~[
+tanenbaum machine architecture
+.], section 11.2]
+The output consists of one such EM file.
+The input files and libraries together need not
+constitute an entire program,
+although as much of the program as possible should be supplied.
+The more information about the program the optimizer 
+gets, the better its output code will be.
+.PP
+The Global Optimizer is language- and machine-independent,
+i.e. it can be used for all languages and machines supported by ACK.
+Yet, it puts some unavoidable restrictions on the EM code
+produced by the Front End (see below).
+It must have some knowledge of the target machine.
+This knowledge is expressed in a machine description table
+which is passed as argument to the optimizer.
+This table does not contain very detailed information about the
+target (such as its instruction set and addressing modes).
+.NH 2
+The EM code
+.PP
+The definition of EM, the intermediate code of all ACK compilers,
+is given in a separate document.
+.[
+tanenbaum machine architecture
+.]
+We will only discuss some features of EM that are most relevant
+to the Global Optimizer.
+.PP
+EM is the assembly code of a virtual \fIstack machine\fR.
+All operations are performed on the top of the stack.
+For example, the statement "A := B + 3" may be expressed in EM as:
+.DS
+LOL -4         -- push local variable B
+LOC 3          -- push constant 3
+ADI 2          -- add two 2-byte items on top of
+	       -- the stack and push the result
+STL -2         -- pop A
+.DE
+So EM is essentially a \fIpostfix\fR code.
+.PP
+EM has a rich instruction set, containing several arithmetic
+and logical operators.
+It also contains special-case instructions (such as INCrement).
+.PP
+EM has \fIglobal\fR (\fIexternal\fR) variables, accessible
+by all procedures and \fIlocal\fR variables, accessible by a few
+(nested) procedures.
+The local variables of a lexically enclosing procedure may
+be accessed via a \fIstatic link\fR. 
+EM has instructions to follow the static chain.
+There are EM instruction to allow a procedure
+to access its local variables directly (such as LOL and STL above).
+Local variables are referenced via an offset in the stack frame
+of the procedure, rather than by their names (e.g. -2 and -4 above).
+The EM code does not contain the (source language) type
+of the variables.
+.PP
+All structured statements in the source program are expressed in
+low level jump instructions.
+Besides conditional and unconditional branch instructions, there are 
+two case instructions (CSA and CSB),
+to allow efficient translation of case statements.
+.NH 2
+Requirements on the EM input
+.PP
+As the optimizer should be useful for all languages,
+it clearly should not put severe restrictions on the EM code
+of the input.
+There is, however, one immovable requirement:
+it must be possible to determine the \fIflow of control\fR of the
+input program.
+As virtually all global optimizations are based on control flow information,
+the optimizer would be totally powerless without it.
+For this reason we restrict the usage of the case jump instructions (CSA/CSB)
+of EM.
+Such an instruction is always called with the address of a case descriptor
+on top the the stack.
+.[~[
+tanenbaum machine architecture
+.] section 7.4]
+This descriptor contains the labels of all possible
+destinations of the jump.
+We demand that all case descriptors are allocated in a global
+data fragment of type ROM, i.e. the case descriptors
+may not be modifyable.
+Furthermore, any case instruction should be immediately preceded by
+a LAE (Load Address External) instruction, that loads the
+address of the descriptor,
+so the descriptor can be uniquely identified.
+.PP
+The optimizer will work improperly if the user deceives the control flow.
+We will give two methods to do this.
+.PP
+In "C" the notorious library routines "setjmp" and "longjmp"
+.[
+unix programmer's manual McIlroy
+.]
+may be used to jump out of a procedure,
+but can also be used for a number of other stuffy purposes,
+for example, to create an extra entry point in a loop.
+.DS
+ while (condition) {
+	 ....
+	 setjmp(buf);
+	 ...
+ }
+ ...
+ longjmp(buf);
+.DE
+The invocation to longjmp actually is a jump to the place of
+the last call to setjmp with the same argument (buf).
+As the calls to setjmp and longjmp are indistinguishable from
+normal procedure calls, the optimizer will not see the danger.
+No need to say that several loop optimizations will behave
+unexpectedly when presented with such pathological input.
+.PP
+Another way to deceive the flow of control is
+by using exception handling routines.
+Ada*
+.FS
+* Ada is a registered trademark of the U.S. Government
+(Ada Joint Program Office).
+.FE
+has clearly recognized the dangers of exception handling,
+but other languages (such as PL/I) have not.
+.[
+ada rationale
+.]
+.PP
+The optimizer will be more effective if the EM input contains
+some extra information about the source program.
+Especially the \fIregister message\fR is very important.
+These messages indicate which local variables may never be
+accessed indirectly.
+Most optimizations benefit significantly by this information.
+.PP
+The Inline Substitution technique needs to know how many bytes
+of formal parameters every procedure accesses.
+Only calls to procedures for which the EM code contains this information
+will be substituted in line.
+.NH 2
+Structure of the optimizer
+.PP
+The Global Optimizer is organized as a number of \fIphases\fR,
+each one performing some task.
+The main structure is as follows:
+.IP IC 6
+the Intermediate Code construction phase transforms EM into the
+intermediate code (ic) of the optimizer
+.IP CF
+the Control Flow phase extends the ic with control flow
+information and interprocedural information
+.IP OPTs
+zero or more optimization phases, each one performing one or
+more related optimizations
+.IP CA
+the Compact Assembly phase generates Compact Assembly Language EM code
+out of ic.
+.LP
+.PP
+An important issue in the design of a global optimizer is the
+interaction between optimization techniques.
+It is often advantageous to combine several techniques in
+one algorithm that takes into account all interactions between them.
+Ideally, one single algorithm should be developed that does
+all optimizations simultaneously and deals with all possible interactions.
+In practice, such an algorithm is still far out of  reach.
+Instead some rather ad hoc (albeit important) combinations are chosen,
+such as Common Subexpression Elimination and Register Allocation.
+.[
+prabhala sethi common subexpressions
+.]
+.[
+sethi ullman optimal code
+.]
+.PP
+In the Em Global Optimizer there is one separate algorithm for
+every technique.
+Note that this does not mean that all techniques are independent
+of each other.
+.PP
+In principle, the optimization phases can be run in any order;
+a phase may even be run more than once.
+However, the following rules should be obeyed:
+.IP -
+the Live Variable analysis phase (LV) must be run prior to
+Register Allocation (RA), as RA uses information outputted by LV.
+.IP -
+RA should be the last phase; this is a consequence of the way
+the interface between RA and the Code Generator is defined.
+.LP
+The ordering of the phases has significant impact on
+the quality of the produced code.
+In
+.[
+wulf overview production quality carnegie-mellon
+.]
+two kinds of phase ordering problems are distinguished.
+If two techniques A and B both take away opportunities of each other,
+there is a "negative" ordering problem.
+If, on the other hand, both A and B introduce new optimization
+opportunities for each other, the problem is called "positive".
+In the Global Optimizer the following interactions must be
+taken into account:
+.IP -
+Inline Substitution (IL) may create new opportunities for most
+other techniques, so it should be run as early as possible
+.IP -
+Use Definition analysis (UD) may introduce opportunities for LV.
+.IP -
+Strength Reduction may create opportunities for UD
+.LP
+The optimizer has a default phase ordering, which can
+be changed by the user.
+.NH 2
+Structure of this document
+.PP
+The remaining chapters of this document each describe one
+phase of the optimizer.
+For every phase, we describe its task, its design,
+its implementation, and its source files.
+The latter two sections are intended to aid the
+maintenance of the optimizer and
+can be skipped by the initial reader.
+.NH 2
+References
+.PP
+There are very 
+few modern textbooks on optimization.
+Chapters 12, 13, and 14 of
+.[
+aho compiler design
+.]
+are a good introduction to the subject.
+Wulf et. al.
+.[
+wulf optimizing compiler
+.]
+describe one specific optimizing (Bliss) compiler.
+Anklam et. al.
+.[
+anklam vax-11
+.]
+discuss code generation and optimization in
+compilers for one specific machine (a Vax-11).
+Kirchgaesner et. al. 
+.[
+optimizing ada compiler
+.]
+present a brief description of many
+optimizations; the report also contains a lengthy (over 60 pages)
+bibliography.
+.PP
+The number of articles on optimization is quite impressive.
+The Lowry and Medlock paper on the Fortran H compiler
+.[
+object code optimization Lowry Medlock
+.]
+is a classical one.
+Other papers on global optimization are.
+.[
+faiman optimizing pascal
+.]
+.[
+perkins sites
+.]
+.[
+harrison general purpose optimizing
+.]
+.[
+morel partial redundancies
+.]
+.[
+Mintz global optimizer
+.]
+Freudenberger
+.[
+freudenberger setl optimizer
+.]
+describes an optimizer for a Very High Level Language (SETL).
+The Production-Quality Compiler-Compiler (PQCC) project uses
+very sophisticated compiler techniques, as described in.
+.[
+wulf overview ieee
+.]
+.[
+wulf overview carnegie-mellon
+.]
+.[
+wulf machine-relative
+.]
+.PP
+Several Ph.D. theses are dedicated to optimization.
+Davidson
+.[
+davidson simplifying
+.]
+outlines a machine-independent peephole optimizer that
+improves assembly code.
+Katkus
+.[
+katkus
+.]
+describes how efficient programs can be obtained at little cost by
+optimizing only a small part of a program.
+Photopoulos
+.[
+photopoulos mixed code
+.]
+discusses the idea of generating interpreted intermediate code as well
+as assembly code, to obtain programs that are both small and  fast.
+Shaffer
+.[
+shaffer automatic
+.]
+describes the theory of automatic subroutine generation.
+.]
+Leverett
+.[
+leverett register allocation compilers
+.]
+deals with register allocation in the PQCC compilers.
+.PP
+References to articles about specific optimization techniques
+will be given in later chapters.
--- a/doc/ego/ra/.distr
+++ b/doc/ego/ra/.distr
@@ -0,0 +1,4 @@
+ra1
+ra2
+ra3
+ra4
--- a/doc/ego/ra/ra1
+++ b/doc/ego/ra/ra1
@@ -0,0 +1,33 @@
+.bp
+.NH 1
+Register Allocation
+.NH 2
+Introduction
+.PP
+The efficient usage of the general purpose registers
+of the target machine plays a key role in any optimizing compiler.
+This subject, often referred to as \fIRegister Allocation\fR,
+has great impact on both the code generator and the
+optimizing part of such a compiler.
+The code generator needs registers for at least the evaluation of
+arithmetic expressions;
+the optimizer uses the registers to decrease the access costs
+of frequently used entities (such as variables).
+The design of an optimizing compiler must pay great
+attention to the cooperation of optimization, register allocation
+and code generation.
+.PP
+Register allocation has received much attention in literature (see
+.[
+leverett register allocation compilers
+.]
+.[
+chaitin register coloring
+.]
+.[
+freiburghouse usage counts
+.]
+and
+.[~[
+sites register
+.]]).
--- a/doc/ego/ra/ra2
+++ b/doc/ego/ra/ra2
@@ -0,0 +1,139 @@
+.NH 2
+Usage of registers in ACK compilers
+.PP
+We will first describe the major design decisions 
+of the Amsterdam Compiler Kit,
+as far as they concern register allocation.
+Subsequently we will outline 
+the role of the Global Optimizer in the register
+allocation process and the interface
+between the code generator and the optimizer.
+.NH 3
+Usage of registers without the intervention of the Global Optimizer
+.PP
+Registers are used for two purposes:
+.IP 1.
+for the evaluation of arithmetic expressions
+.IP 2.
+to hold local variables, for the duration of the procedure they
+are local to.
+.LP
+It is essential to note that no translation part of the compilers,
+except for the code generator, knows anything at all
+about the register set of the target computer.
+Hence all decisions about registers are ultimately made by
+the code generator.
+Earlier phases of a compiler can only \fIadvise\fR the code generator.
+.PP
+The code generator splits the register set into two:
+a fixed part for the evaluation of expressions (called \fIscratch\fR
+registers) and a fixed part to store local variables.
+This partitioning, which depends only on the target computer, significantly
+reduces the complexity of register allocation, at the penalty
+of some loss of code quality.
+.PP
+The code generator has some (machine-dependent) knowledge of the access costs
+of memory locations and registers and of the costs of saving and
+restoring registers. (Registers are always saved by the \fIcalled\fR
+procedure).
+This knowledge is expressed in a set of procedures for each target machine.
+The code generator also knows how many registers there are and of
+which type they are.
+A register can be of type \fIpointer\fR, \fIfloating point\fR
+or \fIgeneral\fR.
+.PP
+The front ends of the compilers determine which local variables may
+be put in a register;
+such a variable may never be accessed indirectly (i.e. through a pointer).
+The front end also determines the types and sizes of these variables.
+The type can be any of the register types or the type \fIloop variable\fR,
+which denotes a general-typed variable that is used as loop variable
+in a for-statement.
+All this information is collected in a \fIregister message\fR in
+the EM code.
+Such a message is a pseudo EM instruction.
+This message also contains a \fIscore\fR field,
+indicating how desirable it is to put this variable in a register.
+A front end may assign a high score to a variable if it
+was declared as a register variable (which is only possible in
+some languages, such as "C").
+Any compiler phase before the code generator may change this score field,
+if it has reason to do so.
+The code generator bases its decisions on the information contained
+in the register message, most notably on the score.
+.PP
+If the global optimizer is not used,
+the score fields are set by the Peephole Optimizer.
+This optimizer simply counts the number of occurrences
+of every local (register) variable and adds this count
+to the score provided by the front end.
+In this way a simple, yet quite effective
+register allocation scheme is achieved.
+.NH 3
+The role of the Global Optimizer
+.PP
+The Global Optimizer essentially tries to improve the scheme
+outlined above.
+It uses the following principles for this purpose:
+.IP -
+Entities are not always assigned a register for the duration
+of an entire procedure; smaller regions of the program text
+may be considered too.
+.IP -
+several variables may be put in the same register simultaneously,
+provided at most one of them is live at any point.
+.IP -
+besides local variables, other entities (such as constants and addresses of
+variables and procedures) may be put in a register.
+.IP -
+more accurate cost estimates are used.
+.LP
+To perform its task, the optimizer must have some
+knowledge of the target machine.
+.NH 3
+The interface between the register allocator and the code generator
+.PP
+The RA phase of the optimizer must somehow be able to express its
+decisions.
+Such decisions may look like: 'put constant 1283 in a register from
+line 12 to line 40'.
+To be precise, RA must be able to tell the code generator to:
+.IP -
+initialize a register with some value
+.IP -
+update an entity from a register
+.IP -
+replace all occurrences of an entity in a certain region
+of text by a reference to the register.
+.LP
+At least three problems occur here: the code generator is only used to
+put local variables in registers,
+it only assigns a register to a variable for the duration of an entire
+procedure and it is not used to have some earlier compiler phase
+make all the decisions.
+.PP
+All problems are solved by one mechanism, that involves no changes
+to the code generator.
+With every (non-scratch) register R that will be used in
+a procedure P, we associate a new variable T, local to P.
+The size of T is the same as the size of R.
+A register message is generated for T with an exceptionally high score.
+The scores of all original register messages are set to zero.
+Consequently, the code generator will always assign precisely those new
+variables to a register.
+If the optimizer wants to put some entity, say the constant 1283, in
+a register, it emits the code "T := 1283" and replaces all occurrences
+of '1283' by T.
+Similarly, it can put the address of a procedure in T and replace all
+calls to that procedure by indirect calls.
+Furthermore, it can put several different entities in T (and thus in R)
+during the lifetime of P.
+.PP
+In principle, the code generated by the optimizer in this way would
+always be valid EM code, even if the optimizer would be presented
+a totally wrong description of the target computer register set.
+In practice, it would be a waste of data as well as text space to
+allocate memory for these new variables, as they will always be assigned
+a register (in the correct order of events).
+Hence, no memory locations are allocated for them.
+For this reason they are called pseudo local variables.
--- a/doc/ego/ra/ra3
+++ b/doc/ego/ra/ra3
@@ -0,0 +1,383 @@
+.NH 2
+The register allocation phase
+.PP
+.NH 3
+Overview
+.PP
+The RA phase deals with one procedure at a time.
+For every procedure, it first determines which entities
+may be put in a register. Such an entity
+is called an \fIitem\fR.
+For every item it decides during which parts of the procedure it
+might be assigned a register.
+Such a region is called a \fItimespan\fR.
+For any item, several (possibly overlapping) timespans may
+be considered.
+A pair (item,timespan) is called an \fIallocation\fR.
+If the items of two allocations are both live at some
+point of time in the intersections of their timespans,
+these allocations are said to be \fIrivals\fR of each other,
+as they cannot be assigned the same register.
+The rivals-set of every allocation is computed.
+Next, the gains of assigning a register to an allocation are estimated,
+for every allocation.
+With all this information, decisions are made which allocations
+to store in which registers (\fIpacking\fR).
+Finally, the EM text is transformed to reflect these decisions.
+.NH 3
+The item recognition subphase
+.PP
+RA tries to put the following entities in a register:
+.IP -
+a local variable for which a register message was found
+.IP -
+the address of a local variable for which no
+register message was found
+.IP -
+the address of a global variable
+.IP -
+the address of a procedure
+.IP -
+a numeric constant.
+.LP
+Only the \fIaddress\fR of a global variable
+may be put in a register, not the variable itself.
+This approach avoids the very complex problems that would be
+caused by procedure calls and indirect pointer references (see
+.[~[
+aho design compiler
+.] sections 14.7 and 14.8]
+and 
+.[~[
+spillman side-effects
+.]]).
+Still, on most machines accessing a global variable using indirect
+addressing through a register is much cheaper than
+accessing it via its address.
+Similarly, if the address of a procedure is put in a register, the
+procedure can be called via an indirect call.
+.PP
+With every item we associate a register type.
+This type is
+.DS
+for local variables: the type contained in the register message
+for addresses of variables and procedures: the pointer type
+for constants: the general type
+.DE
+An entity other than a local variable is not taken to be an item
+if it is used only once within the current procedure.
+.PP
+An item is said to be \fIlive\fR at some point of the program text
+if its value may be used before it is changed.
+As addresses and constants are never changed, all items but local
+variables are always live.
+The region of text during which a local variable is live is
+determined via the live/dead messages generated by the
+Live Variable analysis phase of the Global Optimizer.
+.NH 3
+The allocation determination subphase
+.PP
+If a procedure has more items than registers,
+it may be advantageous to put an item in a register
+only during those parts of the procedure where it is most
+heavily used.
+Such a part will be called a timespan.
+With every item we may associate a set of timespans.
+If two timespans of an item overlap,
+at most one of them may be granted a register,
+as there is no use in putting the same item in two
+registers simultaneously.
+If two timespans of an item are distinct,
+both may be chosen;
+the item will possibly be put in two
+different registers during different parts of the procedure.
+The timespan may also consist
+of the whole procedure.
+.PP
+A list of (item,timespan) pairs (allocations)
+is build, which will be the input to the decision making
+subphase of RA (packing subphase).
+This allocation list is the main data structure of RA.
+The description of the remainder of RA will be in terms
+of allocations rather than items.
+The phrase "to assign a register to an allocation" means "to assign
+a register to the item of the allocation for the duration of
+the timespan of the allocation".
+Subsequent subphases will add more information
+to this list.
+.PP
+Several factors must be taken into account when a
+timespan for an item is constructed:
+.IP 1.
+At any \fIentry point\fR of the timespan where the
+item is live,
+the register must be initialized with the item
+.IP 2.
+At any exit point of the timespan where the item is live,
+the item must be updated.
+.LP
+In order to decrease these costs, we will only consider timespans with
+one entry point
+and no live exit points.
+.NH 3
+The rivals computation subphase
+.PP
+As stated before, several different items may be put in the
+same register, provided they are not live simultaneously.
+For every allocation we determine the intersection
+of its timespan and the lifetime of its item (i.e. the part of the
+procedure during which the item is live).
+The allocation is said to be busy during this intersection.
+If two allocations are ever busy simultaneously they are
+said to be rivals of each other.
+The rivals information is added to the allocation list.
+.NH 3
+The profits computation subphase
+.PP
+To make good decisions, the packing subphase needs to
+know which allocations can be assigned the same register
+(rivals information) and how much is gained by
+granting an allocation a register.
+.PP
+Besides the gains of using a register instead of an
+item,
+two kinds of overhead costs must be
+taken into account:
+.IP -
+the register must be initialized with the item
+.IP -
+the register must be saved at procedure entry
+and restored at procedure exit.
+.LP
+The latter costs should not be due to a single
+allocation, as several allocations can be assigned the same register.
+These costs are dealt with after packing has been done.
+They do not influence the decisions of the packing algorithm,
+they may only undo them.
+.PP
+The actual profits consist of improvements
+of execution time and code size.
+As the former is far more difficult to estimate , we will 
+discuss code size improvements first.
+.PP
+The gains of putting a certain item in a register
+depends on how the item is used.
+Suppose the item is
+a pointer variable.
+On machines that do not have a
+double-indirect addressing mode,
+two instructions are needed to dereference the variable
+if it is not in a register, but only one if it is put in a register.
+If the variable is not dereferenced, but simply copied, one instruction
+may be sufficient in both cases.
+So  the gains of putting a pointer variable in a register are higher
+if the variable is dereferenced often.
+.PP
+To make accurate estimates, detailed knowledge of
+the target machine and of the code generator
+would be needed.
+Therefore, a simplification has been made that substantially limits
+the amount of target machine information that is needed.
+The estimation of the number of bytes saved does
+not take into account how an item is used.
+Rather, an average number is used.
+So these gains are computed as follows:
+.DS
+#bytes_saved = #occurrences * gains_per_occurrence
+.DE
+The number of occurrences is derived from
+the EM code.
+Note that this is not exact either,
+as there is no one-to-one correspondence between occurrences in
+the EM code and in the assembler code.
+.PP
+The gains of one occurrence depend on:
+.IP 1.
+the type of the item
+.IP 2.
+the size of the item
+.IP 3.
+the type of the register
+.LP
+and for local variables and addresses of local variables:
+.IP 4.
+the type of the local variable
+.IP 5.
+the offset of the variable in the stackframe
+.LP
+For every allocation we try two types of registers: the register type
+of the item and the general register type.
+Only the type with the highest profits will subsequently be used.
+This type is added to the allocation information.
+.PP
+To compute the gains, RA uses a machine-dependent table
+that is read from a machine descriptor file.
+By means of this table the number of bytes saved can be computed
+as a function of the five properties.
+.PP
+The costs of initializing a register with an item
+is determined in a similar way.
+The cost of one initialization is also
+obtained from the descriptor file.
+Note that there can be at most one initialization for any
+allocation.
+.PP
+To summarize, the number of bytes a certain allocation would
+save is computed as follows:
+.DS
+net_bytes_saved =  bytes_saved - init_cost
+bytes_saved =      #occurrences * gains_per_occ
+init_cost =        #initializations * costs_per_init
+.DE
+.PP
+It is inherently more difficult to estimate the execution
+time saved by putting an item in a register,
+because it is impossible to predict how
+many times an item will be used dynamically.
+If an occurrence is part of a loop,
+it may be executed many times.
+If it is part of a conditional statement, 
+it may never be executed at all.
+In the latter case, the speed of the program may even get
+worse if an initialization is needed.
+As a clear example, consider the piece of "C" code in Fig. 13.1.
+.DS
+switch(expr) {
+      case 1:  p(); break;
+      case 2:  p(); p(); break;
+      case 3:  p(); break;
+      default: break;
+}
+
+Fig. 13.1 A "C" switch statement
+.DE
+Lots of bytes may be saved by putting the address of procedure p
+in a register, as p is called four times (statically).
+Dynamically, p will be called zero, one or two times,
+depending on the value of the expression.
+.PP
+The optimizer uses the following strategy for optimizing
+execution time:
+.IP 1.
+try to put items in registers during \fIloops\fR first
+.IP 2.
+always keep the initializing code outside the loop
+.IP 3.
+if an item is not used in a loop, do not put it in a register if
+the initialization costs may be higher than the gains
+.LP
+The latter condition can be checked by determining the 
+minimal number of usages (dynamically) of the item during the procedure,
+via a shortest path algorithm.
+In the example above, this minimal number is zero, so the address of
+p is not put in a register.
+.PP
+The costs of one occurrence is estimated as described above for the
+code size.
+The number of dynamic occurrences is guessed by looking at the
+loop nesting level of every occurrence.
+If the item is never used in a loop,
+the minimal number of occurrences is used.
+From these facts, the execution time improvement is assessed
+for every allocation.
+.NH 3
+The packing subphase
+.PP
+The packing subphase takes as input the allocation
+list and outputs a
+description of which allocations should be put
+in which registers.
+So it is essentially the decision making part of RA.
+.PP
+The packing system tries to assign a register to allocations one
+at a time, in some yet to be defined order.
+For every allocation A, it first checks if there is a register
+(of the right type)
+that is already assigned to one or more allocations,
+none of which are rivals of A.
+In this case A is assigned the same register.
+Else, A is assigned a new register, if one exists.
+A table containing the number of free registers for every type
+is maintained.
+It is initialized with the number of non-scratch registers of
+the target computer and updated whenever a
+new register is handed out.
+The packing algorithm stops when no more allocations can 
+or need be assigned a register.
+.PP
+After an allocation A has been packed,
+all allocations with non-disjunct timespans (including
+A itself) are removed from the allocation list.
+.PP
+In case the number of items exceeds the number of registers, it
+is important to choose the most profitable allocations.
+Due to the possibility of having several allocations
+occupying the same register,
+this problem is quite complex.
+Our packing algorithm uses simple heuristic rules
+and avoids any combinatorial search.
+It has distinct rules for different costs measures.
+.PP
+If object code size is the most important factor,
+the algorithm is greedy and chooses allocations in
+decreasing order of their profits attribute.
+It does not take into account the fact that
+other allocations may be passed over because of
+this decision.
+.PP
+If execution time is at prime stake, the algorithm
+first considers allocations whose timespans consist of loops.
+After all these have been packed, it considers the remaining
+allocations.
+Within the two subclasses, it considers allocations
+with the highest profits first.
+When assigning a register to an allocation with a loop
+as timespan, the algorithm checks if the item has
+already been put in a register during another loop.
+If so, it tries to use the same register for the
+new allocation.
+After all packing has been done,
+it checks if the item has always been assigned the same
+register (although not necessarily during all loops).
+If so, it tries to put the item in that register during
+the entire procedure. This is possible
+if the allocation (item,whole_procedure) is not a rival
+of any allocation with a different item that has been
+assigned to the same register.
+Note that this approach is essentially 'bottom up',
+as registers are first assigned over small regions
+of text which are later collapsed into larger regions.
+The advantage of this approach is the fact that
+the decisions for one loop can be made independently
+of all other loops.
+.PP
+After the entire packing process has been completed,
+we compute for each register how much is gained in using
+this register, by simply adding the net profits
+of all allocations assigned to it.
+This total yield should outweigh the costs of
+saving/restoring the register at procedure entry/exit.
+As most modern processors (e.g. 68000, Vax) have special
+instructions to save/restore several registers,
+the differential costs of saving one extra register are by
+no means constant.
+The costs are read from the machine descriptor file and
+compared to the total yields of the registers.
+As a consequence of this analysis, some allocations 
+may have their registers taken away.
+.NH 3
+The transformation subphase
+.PP
+The final subphase of RA transforms the EM text according to the
+decisions made by the packing system.
+It traverses the text of the currently optimized procedure and
+changes all occurrences of items at points where
+they are assigned a register.
+It also clears the score field of the register messages for
+normal local variables and emits register messages with a very
+high score for the pseudo locals.
+At points where registers have to be initialized with items,
+it generates EM code to do so.
+Finally it tries to decrease the size of the stackframe
+of the procedure by looking at which local variables need not
+be given memory locations.
--- a/doc/ego/ra/ra4
+++ b/doc/ego/ra/ra4
@@ -0,0 +1,28 @@
+.NH 2
+Source files of RA
+.PP
+The sources of RA are in the following files and packages:
+.IP ra.h: 14
+declarations of global variables and data structures
+.IP ra.c:
+the routine main; initialization of target machine-dependent tables
+.IP items:
+a routine to build the list of items of one procedure;
+routines to manipulate items
+.IP lifetime:
+contains a subroutine that determines when items are live/dead
+.IP alloclist:
+contains subroutines that build the initial allocations list
+and that compute the rivals sets.
+.IP profits:
+contains a subroutine that computes the profits of the allocations
+and a routine that determines the costs of saving/restoring registers
+.IP pack:
+contains the packing subphase
+.IP xform:
+contains the transformation subphase
+.IP interval:
+contains routines to manipulate intervals of time
+.IP aux:
+contains auxiliary routines
+.LP
--- a/doc/ego/refs.gen
+++ b/doc/ego/refs.gen
@@ -0,0 +1,120 @@
+%T A Practical Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-74
+%D October 1981
+
+%T A Practical Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%J CACM
+%V 26
+%N 9
+%P 654-660
+%D September 1983
+
+%T A Unix Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%J Proceedings USENIX conf.
+%C Toronto, Canada
+%V 26
+%D July 1983
+%P 255-261
+
+%T Using Peephole Optimization on Intermediate Code
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A J.W. Stevenson
+%J TOPLAS
+%V 4
+%N 1
+%P 21-36
+%D January 1982
+
+%T Language- and Machine-independent Global Optimization on Intermediate Code
+%A H.E. Bal
+%A A.S. Tanenbaum
+%J Computer Languages
+%V 11
+%N 2
+%P 105-121
+%D April 1986
+
+%T Description of a machine architecture for use with
+block structured languages
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-81
+%D August 1983
+
+%T Amsterdam Compiler Kit documentation
+%A A.S. Tanenbaum et. al.
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-90
+%D June 1984
+
+%T The C Programming Language - Reference Manual
+%A D.M. Ritchie
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%D 1978
+
+%T Unix programmer's manual, Seventh Edition
+%A B.W. Kernighan
+%A M.D. McIlroy
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%V 1
+%D January 1979
+
+%T A Tour Through the Portable C Compiler
+%A S.C. Johnson
+%I Bell Laboratories
+%B Unix programmer's manual, Seventh Edition
+%C Murray Hill, New Jersey
+%D January 1979
+
+
+%T Ada Programming Language - MILITARY STANDARD 
+%A J.D. Ichbiah
+%I U.S. Department of Defense
+%R ANSI/MIL-STD-1815A
+%D 22 January 1983
+
+%T Rationale for the Design of the Ada Programming Language
+%A J.D. Ichbiah
+%J SIGPLAN Notices
+%V 14
+%N 6
+%D June 1979
+
+%T The Programming Languages LISP and TRAC
+%A W.L. van der Poel
+%I Technische Hogeschool Delft
+%C Delft
+%D 1972
+
+%T Compiler construction
+%A W.M. Waite
+%A G. Goos
+%I Springer-Verlag
+%C New York
+%D 1984
+
+%T The C Programming Language
+%A B.W. Kernighan
+%A D.M. Ritchie
+%I Prentice-Hall, Inc
+%C Englewood Cliffs,NJ
+%D 1978
--- a/doc/ego/refs.opt
+++ b/doc/ego/refs.opt
@@ -0,0 +1,546 @@
+%T Principles of compiler design
+%A A.V. Aho
+%A J.D. Ullman
+%I Addison-Wesley
+%C Reading, Massachusetts
+%D 1978
+
+%T The Design and Analysis of Computer Algorithms
+%A A.V. Aho
+%A J.E. Hopcroft
+%A J.D. Ullman
+%I Addison-Wesley
+%C Reading, Massachusetts
+%D 1974
+
+%T Code generation in a machine-independent compiler
+%A R.G.G. Cattell
+%A J.M. Newcomer
+%A B.W. Leverett
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 65-75
+%D August 1979
+
+%T An algorithm for Reduction of Operator Strength
+%A J. Cocke
+%A K. Kennedy
+%J CACM
+%V 20
+%N 11
+%P 850-856
+%D November 1977
+
+%T Reduction of Operator Strength
+%A F.E. Allen
+%A J. Cocke
+%A K. Kennedy
+%B Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs, N.J.
+%D 1981
+
+%T Simplifying Code Generation Through Peephole Optimization
+%A J.W. Davidson
+%R Ph.D. thesis
+%I Dept. of Computer Science
+%C Univ. of Arizona
+%D December 1981
+
+%T A study of selective optimization techniques
+%A G.R. Katkus
+%R Ph.D. Thesis
+%C University of Southern California
+%D 1973
+
+%T Automatic subroutine generation in an optimizing compiler
+%A J.B. Shaffer
+%R Ph.D. Thesis
+%C University of Maryland
+%D 1978
+
+%T Optimal mixed code generation for microcomputers
+%A D.S. Photopoulos
+%R Ph.D. Thesis
+%C Northeastern University
+%D 1981
+
+%T The Design of an Optimizing Compiler
+%A W.A. Wulf
+%A R.K. Johnsson
+%A C.B. Weinstock
+%A S.O. Hobbs
+%A C.M. Geschke
+%I American Elsevier Publishing Company
+%C New York
+%D 1975
+
+%T Retargetable Compiler Code Generation
+%A M. Ganapathi
+%A C.N. Fischer
+%A J.L. Hennessy
+%J ACM Computing Surveys
+%V 14
+%N 4
+%P 573-592
+%D December 1982
+
+%T An Optimizing Pascal Compiler
+%A R.N. Faiman
+%A A.A. Kortesoja
+%J IEEE Trans. on Softw. Eng.
+%V 6
+%N 6
+%P 512-518
+%D November 1980
+
+%T Experience with the SETL Optimizer
+%A S.M. Freudenberger
+%A J.T. Schwartz
+%J TOPLAS
+%V 5
+%N 1
+%P 26-45
+%D Januari 1983
+
+%T An Optimizing Ada Compiler
+%A W. Kirchgaesner
+%A J. Uhl
+%A G. Winterstein
+%A G. Goos
+%A M. Dausmann
+%A S. Drossopoulou
+%I Institut fur Informatik II, Universitat Karlsruhe
+%D February 1983
+
+%T A Fast Algorithm for Finding Dominators
+in a Flowgraph
+%A T. Lengauer
+%A R.E. Tarjan
+%J TOPLAS
+%V 1
+%N 1
+%P 121-141
+%D July 1979
+
+%T Optimization of hierarchical directed graphs
+%A M.T. Lepage
+%A D.T. Barnard
+%A A. Rudmik
+%J Computer Languages
+%V 6
+%N 1
+%P 19-34
+%D Januari 1981
+
+%T Object Code Optimization
+%A E.S. Lowry
+%A C.W. Medlock
+%J CACM
+%V 12
+%N 1
+%P 13-22
+%D Januari 1969
+
+%T Automatic Program Improvement:
+Variable Usage Transformations
+%A B. Maher
+%A D.H. Sleeman
+%J TOPLAS
+%V 5
+%N 2
+%P 236-264
+%D April 1983
+
+%T The design of a global optimizer
+%A R.J. Mintz
+%A G.A. Fisher
+%A M. Sharir
+%J SIGPLAN Notices
+%V 14
+%N 9
+%P 226-234
+%D September 1979
+
+%T Global Optimization by Suppression of Partial Redundancies
+%A E. Morel
+%A C. Renvoise
+%J CACM
+%V 22
+%N 2
+%P 96-103
+%D February 1979
+
+%T Efficient Computation of Expressions with Common Subexpressions
+%A B. Prabhala
+%A R. Sethi
+%J JACM
+%V 27
+%N 1
+%P 146-163
+%D Januari 1980
+
+%T An Analysis of Inline Substitution for a Structured
+Programming Language
+%A R.W. Scheifler
+%J CACM
+%V 20
+%N 9
+%P 647-654
+%D September 1977
+
+%T Immediate Predominators in a Directed Graph
+%A P.W. Purdom
+%A E.F. Moore
+%J CACM
+%V 15
+%N 8
+%P 777-778
+%D August 1972
+
+%T The Generation of Optimal Code for Arithmetic Expressions
+%A R. Sethi
+%A J.D. Ullman
+%J JACM
+%V 17
+%N 4
+%P 715-728
+%D October 1970
+
+%T Exposing side-effects in a PL/I optimizing compiler
+%A T.C. Spillman
+%B Information Processing 1971
+%I North-Holland Publishing Company
+%C Amsterdam
+%P 376-381
+%D 1971
+
+%T Inner Loops in Flowgraphs and Code Optimization
+%A S. Vasudevan
+%J Acta Informatica
+%N 17
+%P 143-155
+%D 1982
+
+%T A New Strategy for Code Generation - the General-Purpose
+Optimizing Compiler
+%A W.H. Harrison
+%J IEEE Trans. on Softw. Eng.
+%V 5
+%N 4
+%P 367-373
+%D July 1979
+
+%T PQCC: A Machine-Relative Compiler Technology
+%A W.M. Wulf
+%R CMU-CS-80-144
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 25 september 1980
+
+%T Machine-independent Pascal code optimization
+%A D.R. Perkins
+%A R.L. Sites
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 201-207
+%D August 1979
+
+%T A Case Study of a New Code Generation Technique for Compilers
+%A J.L. Carter
+%J CACM
+%V 20
+%N 12
+%P 914-920
+%D December 1977
+
+%T Table-driven Code Generation
+%A S.L. Graham
+%J IEEE Computer
+%V 13
+%N 8
+%P 25-33
+%D August 1980
+
+%T Register Allocation in Optimizing Compilers
+%A B.W. Leverett
+%R Ph.D. Thesis, CMU-CS-81-103
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D February 1981
+
+%T Register Allocation via Coloring
+%A G.J. Chaitin
+%A M.A. Auslander
+%A A.K. Chandra
+%A J. Cocke
+%A M.E. Hopkins
+%A P.W. Markstein
+%J Computer Languages
+%V 6
+%N 1
+%P 47-57
+%D January 1981
+
+%T How to Call Procedures, or Second Thoughts on
+Ackermann's Function
+%A B.A. Wichmann
+%J Software - Practice and Experience
+%V 7
+%P 317-329
+%D 1977
+
+%T Register Allocation Via Usage Counts
+%A R.A. Freiburghouse
+%J CACM
+%V 17
+%N 11
+%P 638-642
+%D November 1974
+
+%T Machine-independent register allocation
+%A R.L. Sites
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 221-225
+%D August 1979
+
+%T An Overview of the Production-Quality Compiler-Compiler Project
+%A B.W. Leverett
+%A R.G.G Cattell
+%A S.O. Hobbs
+%A J.M. Newcomer
+%A A.H. Reiner
+%A B.R. Schatz
+%A W.A. Wulf
+%J IEEE Computer
+%V 13
+%N 8
+%P 38-49
+%D August 1980
+
+%T An Overview of the Production-Quality Compiler-Compiler Project
+%A B.W. Leverett
+%A R.G.G Cattell
+%A S.O. Hobbs
+%A J.M. Newcomer
+%A A.H. Reiner
+%A B.R. Schatz
+%A W.A. Wulf
+%R CMU-CS-79-105
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 1979
+
+%T Topics in Code Generation and Register Allocation
+%A B.W. Leverett
+%R CMU-CS-82-130
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 28 July 1982
+
+%T Predicting the Effects of Optimization on a Procedure Body
+%A J.E. Ball
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 214-220
+%D August 1979
+
+%T The C Language Calling Sequence
+%A S.C. Johnson
+%A D.M. Ritchie
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%D September 1981
+
+%T A Generalization of Two Code Ordering Optimizations
+%A C.W. Fraser
+%R TR 82-11
+%I Department of Computer Science
+%C The University of Arizona, Tucson
+%D October 1982
+
+%T A Survey of Data Flow Analysis Techniques
+%A K. Kennedy
+%B Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs
+%D 1981
+
+%T Delayed Binding in PQCC Generated Compilers
+%A W.A. Wulf
+%A K.V. Nori
+%R CMU-CS-82-138
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 1982
+
+%T Interprocedural Data Flow Analysis in the presence
+of Pointers, Procedure Variables, and Label Variables
+%A W.E. Weihl
+%J Conf. Rec. of the 7th ACM Symp. on Principles of
+Programming Languages
+%C Las Vegas, Nevada
+%P 83-94
+%D 1980
+
+%T Low-Cost, High-Yield Code Optimization
+%A D.R. Hanson
+%R TR 82-17
+%I Department of Computer Science
+%C The University of Arizona, Tucson
+%D November 1982
+
+%T Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs
+%D 1981
+
+%T A machine independent algorithm for code generation and its
+use in retargetable compilers
+%A R. Glanville
+%R Ph.D. thesis
+%C University of California, Berkeley
+%D December 1977
+
+%T A formal framework for the derivation of machine-specific optimizers
+%A R. Giegerich
+%J TOPLAS
+%V 5
+%N 3
+%P 478-498
+%D July 1983
+
+%T Engineering a compiler: Vax-11 code generation and optimization
+%A P. Anklam
+%A D. Cutler
+%A R. Heinen
+%A M. MacLaren
+%I Digital Equipment Corporation
+%D 1982
+
+%T Analyzing exotic instructions for a retargetable code generator
+%A T.M. Morgan
+%A L.A. Rowe
+%J SIGPLAN Notices
+%V 17
+%N 6
+%P 197-204
+%D June 1982
+
+%T TCOLAda and the Middle End of the PQCC Ada Compiler
+%A B.M. Brosgol
+%J SIGPLAN Notices
+%V 15
+%N 11
+%P 101-112
+%D November 1980
+
+%T Implementation Implications of Ada Generics
+%A G. Bray
+%J Ada Letters
+%V III
+%N 2
+%P 62-71
+%D September 1983
+
+%T Attributed Linear Intermediate Representations for Retargetable
+Code Generators
+%A M. Ganapathi
+%A C.N. Fischer
+%J Software-Practice and Experience
+%V 14
+%N 4
+%P 347-364
+%D April 1984
+
+%T UNCOL: The myth and the fact
+%A T.B. Steel
+%J Annu. Rev. Autom. Program.
+%V 2
+%D 1960
+%P 325-344
+
+%T Experience with a Graham-Glanville Style Code Generator
+%A P. Aigrain
+%A S.L. Graham
+%A R.R. Henry
+%A M.K. McKusick
+%A E.P. Llopart
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 13-24
+
+%T Using Dynamic Programming to generate Optimized Code in a
+Graham-Glanville Style Code Generator
+%A T.W. Christopher
+%A P.J. Hatcher
+%A R.C. Kukuk
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 25-36
+
+%T Peep - An Architectural Description Driven Peephole Optimizer
+%A R.R. Kessler
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 106-110
+
+%T Automatic Generation of Peephole Optimizations
+%A J.W. Davidson
+%A C.W. Fraser
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 111-116
+
+%T Analysing and Compressing Assembly Code
+%A C.W. Fraser
+%A E.W. Myers
+%A A.L. Wendt
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 117-121
+
+%T Register Allocation by Priority-based Coloring
+%A F. Chow
+%A J. Hennessy
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 222-232
+%V 19
+%N 6
+%D June 1984
+%P 117-121
+
+%T Code Selection through Object Code Optimization
+%A J.W. Davidson
+%A C.W. Fraser
+%I Dept. of Computer Science
+%C Univ. of Arizona
+%D November 1981
+
+%T A Portable Machine-Independent Global Optimizer - Design
+and Measurements
+%A F.C. Chow
+%I Computer Systems Laboratory
+%C Stanford University
+%D December 1983
--- a/doc/ego/refs.stat
+++ b/doc/ego/refs.stat
@@ -0,0 +1,29 @@
+%T An analysis of Pascal Programs
+%A L.R. Carter
+%I UMI Research Press
+%C Ann Arbor, Michigan
+%D 1982
+
+%T An Emperical Study of FORTRAN Programs
+%A D.E. Knuth
+%J Software - Practice and Experience
+%V 1
+%P 105-133
+%D 1971
+
+%T F77 Performance
+%A D.A. Mosher
+%A R.P. Corbett
+%J ;login:
+%V 7
+%N 3
+%D June 1982
+
+%T Ada Language Statistics for the iMAX 432 Operating System
+%A S.F. Zeigler
+%A R.P. Weicker
+%J Ada LETTERS
+%V 2
+%N 6
+%P 63-67
+%D May 1983
--- a/doc/ego/sp/.distr
+++ b/doc/ego/sp/.distr
@@ -0,0 +1 @@
+sp1
--- a/doc/ego/sp/sp1
+++ b/doc/ego/sp/sp1
@@ -0,0 +1,171 @@
+.bp
+.NH 1
+Stack pollution
+.NH 2
+Introduction
+.PP
+The "Stack Pollution" optimization technique (SP) decreases the costs
+(time as well as space) of procedure calls.
+In the EM calling sequence, the actual parameters are popped from
+the stack by the \fIcalling\fR procedure.
+The ASP (Adjust Stack Pointer) instruction is used for this purpose.
+A call in EM is shown in Fig. 8.1
+.DS
+Pascal:                EM:
+
+f(a,2)                 LOC 2
+		       LOE A
+		       CAL F
+		       ASP 4    -- pop 4 bytes
+
+Fig. 8.1 An example procedure call in Pascal and EM
+.DE
+As procedure calls occur often in most programs,
+the ASP is one of the most frequently used EM instructions.
+.PP
+The main intention of removing the actual parameters after a procedure call
+is to avoid the stack size to increase rapidly.
+Yet, in some cases, it is possible to \fIdelay\fR or even \fIavoid\fR the
+removal of the parameters without letting the stack grow
+significantly.
+In this way, considerable savings in code size and execution time may
+be achieved, at the cost of a slightly increased stack size.
+.PP
+A stack adjustment may be delayed if there is some other stack adjustment
+later on in the same basic block.
+The two ASPs can be combined into one.
+.DS
+Pascal:           EM:               optimized EM:
+
+f(a,2)            LOC 2             LOC 2
+g(3,b,c)          LOE A             LOE A
+		  CAL F             CAL F
+		  ASP 4             LOE C
+		  LOE C             LOE B
+		  LOE B             LOC 3
+		  LOC 3             CAL G
+		  CAL G             ASP 10
+		  ASP 6
+
+Fig. 8.2 An example of local Stack Pollution
+.DE
+The stacksize will be increased only temporarily.
+If the basic block contains another ASP, the ASP 10 may subsequently be
+combined with that next ASP, and so on.
+.PP
+For some back ends, a stack adjustment also takes place
+at the point of a procedure return.
+There is no need to specify the number of bytes to be popped at a
+return.
+This provides an opportunity to remove ASPs more globally.
+If all ASPs outside any loop are removed, the increase of the
+stack size will still only be small, as no such ASP is executed more
+than once without an intervening return from the procedure it is part of.
+.PP
+This second approach is not generally applicable to all target machines,
+as some back ends require the stack to be cleaned up at the point of
+a procedure return.
+.NH 2
+Implementation
+.PP
+There is one main problem the implementation has to solve.
+In EM, the stack is not only used for passing parameters,
+but also for evaluating expressions.
+Hence, ASP instructions can only be combined or removed
+if certain conditions are satisfied.
+.PP
+Two consecutive ASPs of one basic block can only be combined
+(as described above) if:
+.IP 1.
+On no point of text in between the two ASPs, any item is popped from
+the stack that was pushed onto it before the first ASP.
+.IP 2.
+The number of bytes popped from the stack by the second ASP must equal
+the number of bytes pushed since the first ASP.
+.LP
+Condition 1. is not satisfied in Fig. 8.3.
+.DS
+Pascal:               EM:
+
+5 + f(10) + g(30)     LOC 5
+		      LOC 10
+		      CAL F
+		      ASP 2    -- cannot be removed
+		      LFR 2    -- push function result
+		      ADI 2
+		      LOC 30
+		      CAL G
+		      ASP 2
+		      LFR 2
+		      ADI 2
+Fig. 8.3 An illegal transformation
+.DE
+If the first ASP were removed (delayed), the first ADI would add
+10 and f(10), instead of 5 and f(10).
+.sp
+Condition 2. is not satisfied in Fig. 8.4.
+.DS
+Pascal:               EM:
+
+f(10) + 5 * g(30)     LOC 10
+		      CAL F
+		      ASP 2
+		      LFR 2
+		      LOC 5
+		      LOC 30
+		      CAL G
+		      ASP 2
+		      LFR 2
+		      MLI 2   --  5 * g(30)
+		      ADI 2
+
+Fig. 8.4 A second illegal transformation
+.DE
+If the two ASPs were combined into one 'ASP 4', the constant 5 would
+have been popped, rather than the parameter 10 (so '10 + f(10)*g(30)'
+would have been computed).
+.PP
+The second approach to deleting ASPs (i.e. let the procedure return
+do the stack clean-up)
+is only applied to the last ASP of every basic block.
+Any preceding ASPs are dealt with by the first approach.
+The last ASP of a basic block B will only be removed if:
+.IP -
+on no path in the control flow graph from B to any block containing a
+RET (return) there is a basic block that, at some point of its text, pops
+items from the stack that it has not itself pushed earlier.
+.LP
+Clearly, if this condition is satisfied, no harm can be done; no
+other basic block will ever access items that were pushed
+on the stack before the ASP.
+.PP
+The number of bytes pushed onto or popped from the stack can be
+easily encoded in a so called "pop-push table".
+The numbers in general depend on the target machine word- and pointer
+size and on the argument given to the instruction.
+For example, an ADS instruction is described by:
+.DS
+   -a-p+p
+.DE
+which means: an 'ADS n' first pops an n-byte value (n being the argument),
+next pops a pointer-size value and finally pushes a pointer-size value.
+For some infrequently used EM instructions the pop-push numbers
+cannot be computed statically.
+.PP
+The stack pollution algorithm first performs a depth first search over
+the control flow graph and marks all blocks that do not satisfy
+the global condition.
+Next it visits all basic blocks in turn.
+For every pair of adjacent ASPs, it checks conditions 1. and 2. and
+combines the ASPs if they are satisfied.
+The new ASP may be used as first ASP in the next pair.
+If a condition fails, it simply continues with the next ASP.
+Finally, the last ASP is removed if:
+.IP -
+nothing has been popped from the stack after the last ASP that was
+pushed before it
+.IP -
+the block was not marked by the depth first search
+.IP -
+the block is not in a loop
+.LP
--- a/doc/ego/sr/.distr
+++ b/doc/ego/sr/.distr
@@ -0,0 +1,4 @@
+sr1
+sr2
+sr3
+sr4
--- a/doc/ego/sr/sr1
+++ b/doc/ego/sr/sr1
@@ -0,0 +1,44 @@
+.bp
+.NH 1
+Strength reduction
+.NH 2
+Introduction
+.PP
+The Strength Reduction optimization technique (SR)
+tries to replace expensive operators
+by cheaper ones,
+in order to decrease the execution time
+of the program.
+A classical example is replacing a 'multiplication by 2'
+by an addition or a shift instruction.
+These kinds of local transformations are already
+done by the EM Peephole Optimizer.
+Strength reduction can also be applied
+more generally to operators used in a loop.
+.DS
+i := 1;                    i := 1;
+while i < 100 loop  -->    TMP := i * 118;
+   put(i * 118);           while i < 100 loop
+   i := i + 1;                put(TMP);
+end loop;                     i := i + 1;
+			      TMP := TMP + 118;
+			   end loop;
+
+Fig. 6.1 An example of Strenght Reduction
+.DE
+In Fig. 6.1, a multiplication inside a loop is
+replaced by an addition inside the loop and a multiplication
+outside the loop.
+Clearly, this is a global optimization; it cannot
+be done by a peephole optimizer.
+.PP
+In some cases a related technique, \fItest replacement\fR,
+can be used to eliminate the
+loop variable i.
+This technique will not be discussed in this report.
+.sp 0
+In the example above, the resulting code
+can be further optimized by using
+constant propagation.
+Obviously, this is not the task of the
+Strength Reduction phase.
--- a/doc/ego/sr/sr2
+++ b/doc/ego/sr/sr2
@@ -0,0 +1,217 @@
+.NH 2
+The model of strength reduction
+.PP
+In this section we will describe 
+the transformations performed by
+Strength Reduction (SR).
+Before doing so, we will introduce the
+central notion of an induction variable.
+.NH 3
+Induction variables
+.PP
+SR looks for variables whose
+values form an arithmetic progression
+at the beginning of a loop.
+These variables are called induction variables.
+The most frequently occurring example of such
+a variable is a loop-variable in a high-order
+programming language.
+Several quite sophisticated models of strength
+reduction can be found in the literature.
+.[
+cocke reduction strength cacm
+.]
+.[
+allen cocke kennedy reduction strength
+.]
+.[
+lowry medlock cacm
+.]
+.[
+aho compiler design
+.]
+In these models the notion of an induction variable
+is far more general than the intuitive notion
+of a loop-variable.
+The definition of an induction variable we present here
+is more restricted,
+yielding a simpler model and simpler transformations.
+We think the principle source for strength reduction lies in
+expressions using a loop-variable,
+i.e. a variable that is incremented or decremented
+by the same amount after every loop iteration,
+and that cannot be changed in any other way.
+.PP
+Of course, the EM code does not contain high level constructs
+such as for-statements.
+We will define an induction variable in terms
+of the Intermediate Code of the optimizer.
+Note that the notions of a loop in the
+EM text and of a firm basic block
+were defined in section 3.3.5.
+.sp
+.UL definition
+.sp 0
+An induction variable i of a loop L is a local variable
+that is never accessed indirectly,
+whose size is the word size of the target machine, and
+that is assigned exactly once within L,
+the assignment:
+.IP -
+being of the form i := i + c or i := c +i,
+c is a constant
+called the \fIstep value\fR of i.
+.IP -
+occurring in a firm block of L.
+.LP
+(Note that the first restriction on the assignment
+is not described in terms of the Intermediate Code;
+we will give such a description later; the current
+definition is easier to understand however).
+.NH 3
+Recognized expressions
+.PP
+SR recognizes certain expressions using
+an induction variable and replaces
+them by cheaper ones.
+Two kinds of expensive operations are recognized:
+multiplication and array address computations.
+The expressions that are simplified must
+use an induction variable
+as an operand of
+a multiplication or as index in an array expression.
+.PP
+Often a linear function of an induction variable is used,
+rather than the variable itself.
+In these cases optimization is still possible.
+We call such expressions \fIiv-expressions\fR.
+.sp
+.UL definition:
+.sp 0
+An iv-expression of an induction variable i of a loop L is
+an expression that:
+.IP -
+uses only the operators + and - (unary as well as binary)
+.IP -
+uses i as operand exactly once
+.IP -
+uses (besides i) only constants or variables that are
+never changed in L as operands.
+.LP
+.PP
+The expressions recognized by SR are of the following forms:
+.IP (1)
+iv_expression * constant
+.IP (2)
+constant * iv_expression
+.IP (3)
+A[iv-expression] :=       (assign to array element)
+.IP (4)
+A[iv-expression]          (use array element)
+.IP (5)
+& A[iv-expression]        (take address of array element)
+.LP
+(Note that EM has different instructions to use an array element,
+store into one, or take the address of one, resp. LAR, SAR, and AAR).
+.sp 0
+The size of the elements of A must
+be known statically.
+In cases (3) and (4) this size 
+must equal the word size of the
+target machine.
+.NH 3
+Transformations
+.PP
+With every recognized expression we associate
+a new temporary local variable TMP,
+allocated in the stack frame of the
+procedure containing the expression.
+At any program point within the loop, TMP will
+contain the following value:
+.IP multiplication: 18
+the current value of iv-expression * constant
+.IP arrays:
+the current value of &A[iv-expression].
+.LP
+In the second case, TMP essentially is a pointer variable,
+pointing to the element of A that is currently in use.
+.sp 0
+If the same expression occurs several times in the loop,
+the same temporary local is used each time.
+.PP
+Three transformations are applied to the EM text:
+.IP (1)
+TMP is initialized with the right value.
+This initialization takes place just
+before the loop.
+.IP (2)
+The recognized expression is simplified.
+.IP (3)
+TMP is incremented; this takes place just
+after the induction variable is incremented.
+.LP
+For multiplication, the initial value of TMP
+is the value of the recognized expression at
+the program point immediately before the loop.
+For arrays, TMP is initialized with the address
+of the first array element that is accessed.
+So the initialization code is:
+.DS
+TMP := iv-expression * constant;  or
+TMP := &A[iv-expression]
+.DE
+At the point immediately before the loop,
+the induction variable will already have been
+initialized,
+so the value used in the code above will be the
+value it has during the first iteration.
+.PP
+For multiplication, the recognized expression can simply be
+replaced by TMP.
+For array optimizations, the replacement
+depends on the form:
+.DS
+\fIform\fR                         \fIreplacement\fR
+(3) A[iv-expr] :=            *TMP :=     (assign indirect)
+(4) A[iv-expr]               *TMP        (use indirect)
+(5) &A[iv-expr]              TMP
+.DE
+The '*' denotes the indirect operator. (Note that
+EM has different instructions to do
+an assign-indirect and a use-indirect).
+As the size of the array elements is restricted
+to be the word size in case (3) and (4),
+only one EM instruction needs to
+be generated in all cases.
+.PP
+The amount by which TMP is incremented is:
+.IP multiplication: 18
+step value * constant
+.IP arrays:
+step value * element size
+.LP
+Note that the step value (see definition of induction variable above),
+the constant, and the element size (see previous section) can all
+be determined statically.
+If the sign of the induction variable in the
+iv-expression is negative, the amount
+must be negated.
+.PP
+The transformations are demonstrated by an example.
+.DS
+i := 100;                     i := 100;
+while i > 1 loop              TMP := (6-i) * 5;
+   X := (6-i) * 5 + 2;        while i > 1 loop
+   Y := (6-i) * 5 - 8;   -->     X := TMP + 2;
+   i := i - 3;                   Y := TMP - 8;
+end loop;                        i := i - 3;
+			         TMP := TMP + 15;
+			      end loop;
+
+Fig. 6.2 Example of complex Strength Reduction transformations
+.DE
+The expression '(6-i)*5' is recognized twice. The constant
+is 5.
+The step value is -3.
+The sign of i in the recognized expression is '-'.
+So the increment value of TMP is -(-3*5) = +15.
--- a/doc/ego/sr/sr3
+++ b/doc/ego/sr/sr3
@@ -0,0 +1,232 @@
+.NH 2
+Implementation
+.PP
+Like most phases, SR deals with one procedure
+at a time.
+Within a procedure, SR works on one loop at a time.
+Loops are processed in textual order.
+If loops are nested inside each other,
+SR starts with the outermost loop and proceeds in the
+inwards direction.
+This order is chosen,
+because it enables the optimization
+of multi-dimensional array address computations,
+if the elements are accessed in the usual way
+(i.e. row after row, rather than column after column).
+For every loop, SR first detects all induction variables
+and then tries to recognize
+expressions that can be optimized.
+.NH 3
+Finding induction variables
+.PP
+The process of finding induction variables
+can conveniently be split up
+into two parts.
+First, the EM text of the loop is scanned to find
+all \fIcandidate\fR induction variables,
+which are word-sized local variables
+that are assigned precisely once
+in the loop, within a firm block.
+Second, for every candidate, the single assignment
+is inspected, to see if it has the form
+required by the definition of an induction variable.
+.PP
+Candidates are found by scanning the EM code of the loop.
+During this scan, two sets are maintained.
+The set "cand" contains all variables that were
+assigned exactly once so far, within a firm block.
+The set "dismiss" contains all variables that
+should not be made a candidate.
+Initially, both sets are empty.
+If a variable is assigned to, it is put
+in the cand set, if three conditions are met:
+.IP 1.
+the variable was not in cand or dismiss already
+.IP 2.
+the assignment takes place in a firm block
+.IP 3.
+the assignment is not a ZRL instruction (assignment by zero)
+or a SDL instruction (store double local).
+.LP
+If any condition fails, the variable is dismissed from cand
+(if it was there already) and put in dismiss
+(if it was not there already).
+.sp 0
+All variables for which no register message was generated (i.e. those
+variables that may be accessed indirectly) are assumed
+to be changed in the loop.
+.sp 0
+All variables that remain in cand are candidate induction variables.
+.PP
+From the set of candidates, the induction variables can
+be determined, by inspecting the single assignment.
+The assignment must match one of the EM patterns below.
+('x' is the candidate. 'ws' is the word size of the target machine.
+'n' is any number.)
+.DS
+\fIpattern\fR                                     \fIstep size\fR
+INL x  |                                      +1
+DEL x  |                                      -1
+LOL x ; (INC | DEC) ; STL x  |                +1 | -1
+LOL x ; LOC n ; (ADI ws | SBI ws) ; STL x  |  +n | -n
+LOC n ; LOL x ; ADI ws ; STL x.               +n
+.DE
+From the patterns the step size of the induction variable
+can also be determined.
+These step sizes are displayed on the right hand side.
+.sp
+For every induction variable we maintain the following information:
+.IP -
+the offset of the variable in the stackframe of its procedure
+.IP -
+a pointer to the EM text of the assignment statement
+.IP -
+the step value
+.LP
+.NH 3
+Optimizing expressions
+.PP
+If any induction variables of the loop were found,
+the EM text of the loop is scanned again,
+to detect expressions that can be optimized.
+SR scans for multiplication and array instructions.
+Whenever it finds such an instruction, it analyses the
+code in front of it.
+If an expression is to be optimized, it must
+be generated by the following syntax rules.
+.DS
+   optimizable_expr:
+		iv_expr const mult |
+		const iv_expr mult |
+		address iv_expr address array_instr;
+   mult:
+		MLI ws |
+		MLU ws ;
+   array_instr:
+		LAR ws |
+		SAR ws |
+		AAR ws ;
+   const:
+		LOC n ;
+.DE
+An 'address' is an EM instruction that loads an
+address on the stack.
+An instruction like LOL may be an 'address', if
+the size of an address (pointer size, =ps) is
+the same as the word size.
+If the pointer size is twice the word size,
+instructions like LDL are an 'address'.
+(The addresses in the third grammar rule
+denote resp. the array address and the
+array descriptor address).
+.DS
+   address:
+		LAE |
+		LAL |
+		LOL if ps=ws |
+		LOE    ,,    |
+		LIL    ,,    |
+		LDL if ps=2*ws |
+		LDE    ,,      ;
+.DE
+The notion of an iv-expression was introduced earlier.
+.DS
+   iv_expr:
+		iv_expr unair_op |
+		iv_expr iv_expr binary_op |
+		loopconst |
+		iv ;
+   unair_op:
+		NGI ws |
+		INC |
+		DEC ;
+   binary_op:
+		ADI ws |
+		ADU ws |
+		SBI ws |
+		SBU ws ;
+   loopconst:
+		const |
+		LOL x  if x is not changed in loop ;
+   iv:
+		LOL x  if x is an induction variable ;
+.DE
+An iv_expression must satisfy one additional constraint:
+it must use exactly one operand that is an induction
+variable.
+A simple, hand written, top-down parser is used
+to recognize an iv-expression.
+It scans the EM code from right to left
+(recall that EM is essentially postfix).
+It uses semantic attributes (inherited as well as
+derived) to check the additional constraint.
+.PP
+All information assembled during the recognition
+process is put in a 'code_info' structure.
+This structure contains the following information:
+.IP -
+the optimizable code itself
+.IP -
+the loop and basic block the code is part of
+.IP -
+the induction variable
+.IP -
+the iv-expression
+.IP -
+the sign of the induction variable in the
+iv-expression
+.IP -
+the offset and size of the temporary local variable
+.IP -	
+the expensive operator (MLI, LAR etc.)
+.IP -
+the instruction that loads the constant
+(for multiplication) or the array descriptor
+(for arrays).
+.LP
+The entire transformation process is driven
+by this information.
+As the EM text is represented internally
+as a list, this process consists
+mainly of straightforward list manipulations.
+.sp 0
+The initialization code must be put
+immediately before the loop entry.
+For this purpose a \fIheader block\fR is
+created that has the loop entry block as
+its only successor and that dominates the
+entry block.
+The CFG and all relations (SUCC,PRED, IDOM, LOOPS etc.)
+are updated.
+.sp 0
+An EM instruction that will
+replace the optimizable code
+is created and put at the place of the old code.
+The list representing the old optimizable code
+is used to create a list for the initializing code,
+as they are similar.
+Only two modifications are required:
+.IP -
+if the expensive operator is a LAR or SAR,
+it must be replaced by an AAR, as the initial value
+of TMP is the \fIaddress\fR of the first
+array element that is accessed.
+.IP -
+code must be appended to store the result of the
+expression in TMP.
+.LP
+Finally, code to increment TMP is created and put after
+the code of the single assignment to the
+induction variable.
+The generated code uses either an integer addition
+(ADI) or an integer-to-pointer addition (ADS)
+to do the increment.
+.PP
+SR maintains a set of all expressions that have already
+been recognized in the present loop.
+Such expressions are said to be \fIavailable\fR.
+If an expression is recognized that is
+already available,
+no new temporary local variable is allocated for it,
+and the code to initialize and increment the local
+is not generated.
--- a/doc/ego/sr/sr4
+++ b/doc/ego/sr/sr4
@@ -0,0 +1,28 @@
+.NH 2
+Source files of SR
+.PP
+The sources of SR are in the following files
+and packages:
+.IP sr.h: 14
+declarations of global variables and
+data structures
+.IP sr.c:
+the routine main; a driving routine to process
+(possibly nested) loops in the right order
+.IP iv
+implements a procedure that finds the induction variables
+of a loop
+.IP reduce
+implements a procedure that finds optimizable expressions
+and that does the transformations
+.IP cand
+implements a procedure that finds the candidate induction
+variables; used to implement iv
+.IP xform
+implements several useful routines that transform
+lists of EM text or a CFG; used to implement reduce
+.IP expr
+implements a procedure that parses iv-expressions
+.IP aux
+implements several auxiliary procedures.
+.LP
--- a/doc/ego/ud/.distr
+++ b/doc/ego/ud/.distr
@@ -0,0 +1,5 @@
+ud1
+ud2
+ud3
+ud4
+ud5
--- a/doc/ego/ud/ud1
+++ b/doc/ego/ud/ud1
@@ -0,0 +1,58 @@
+.bp
+.NH 1
+Use-Definition analysis
+.NH 2
+Introduction
+.PP
+The "Use-Definition analysis" phase (UD) consists of two related optimization
+techniques that both depend on "Use-Definition" information.
+The techniques are Copy Propagation and Constant Propagation.
+They are best explained via an example (see Figs. 11.1 and 11.2).
+.DS
+   (1)  A := B                  A := B
+	 ...          -->        ...
+   (2)  use(A)                  use(B)
+
+Fig. 11.1 An example of Copy Propagation
+.DE
+.DS
+   (1)  A := 12                  A := 12
+	 ...          -->        ...
+   (2)  use(A)                  use(12)
+
+Fig. 11.2 An example of Constant Propagation
+.DE
+Both optimizations have to check that the value of A at line (2)
+can only be obtained at line (1).
+Copy Propagation also has to assure that the value of B is
+the same at line (1) as at line (2).
+.PP
+One purpose of both transformations is to introduce
+opportunities for the Dead Code Elimination optimization.
+If the variable A is used nowhere else, the assignment A := B
+becomes useless and can be eliminated.
+.sp 0
+If B is less expensive to access than A (e.g. this is sometimes the case
+if A is a local variable and B is a global variable),
+Copy Propagation directly improves the code itself.
+If A is cheaper to access the transformation will not be performed.
+Likewise, a constant as operand may be cheeper than a variable.
+Having a constant as operand may also facilitate other optimizations.
+.PP
+The design of UD is based on the theory described in section
+14.1 and 14.3 of.
+.[
+aho compiler design
+.]
+As a main departure from that theory,
+we do not demand the statement A := B to become redundant after
+Copy Propagation.
+If B is cheaper to access than A, the optimization is always performed;
+if B is more expensive than A, we never do the transformation.
+If A and B are equally expensive UD uses the heuristic rule to
+replace infrequently used variables by frequently used ones.
+This rule increases the chances of the assignment to become useless.
+.PP
+In the next section we will give a brief outline of the data
+flow theory used
+for the implementation of UD.
--- a/doc/ego/ud/ud2
+++ b/doc/ego/ud/ud2
@@ -0,0 +1,64 @@
+.NH 2
+Data flow information
+.PP
+.NH 3
+Use-Definition information
+.PP
+A \fIdefinition\fR of a variable A is an assignment to A.
+A definition is said to \fIreach\fR a point p if there is a
+path in the control flow graph from the definition to p, such that
+A is not redefined on that path.
+.PP
+For every basic block B, we define the following sets:
+.IP GEN[b] 9
+the set of definitions in b that reach the end of b.
+.IP KILL[b]
+the set of definitions outside b that define a variable that
+is changed in b.
+.IP IN[b]
+the set of all definitions reaching the beginning of b.
+.IP OUT[b]
+the set of all definitions reaching the end of b.
+.LP
+GEN and KILL can be determined by inspecting the code of the procedure.
+IN and OUT are computed by solving the following data flow equations:
+.DS
+(1)    OUT[b] = IN[b] - KILL[b] + GEN[b]
+(2)    IN[b]  = OUT[p1] + ... + OUT[pn],
+	 where PRED(b) = {p1, ... , pn}
+.DE
+.NH 3
+Copy information
+.PP
+A \fIcopy\fR is a definition of the form "A := B".
+A copy is said to be \fIgenerated\fR in a basic block n if
+it occurs in n and there is no subsequent assignment to B in n.
+A copy is said to be \fIkilled\fR in n if:
+.IP (i)
+it occurs in n and there is a subsequent assignment to B within n, or
+.IP (ii)
+it occurs outside n, the definition A := B reaches the beginning of n
+and B is changed in n (note that a copy also is a definition).
+.LP
+A copy \fIreaches\fR a point p, if there are no assignments to B
+on any path in the control flow graph from the copy to p.
+.PP
+We define the following sets:
+.IP C_GEN[b] 11
+the set of all copies in b generated in b.
+.IP C_KILL[b]
+the set of all copies killed in b.
+.IP C_IN[b]
+the set of all copies reaching the beginning of b.
+.IP C_OUT[b]
+the set of all copies reaching the end of b.
+.LP
+C_IN and C_OUT are computed by solving the following equations:
+(root is the entry node of the current procedure; '*' denotes
+set intersection)
+.DS
+(1)    C_OUT[b] = C_IN[b] - C_KILL[b] + C_GEN[b]
+(2)    C_IN[b]  = C_OUT[p1] * ... * C_OUT[pn],
+	 where PRED(b) = {p1, ... , pn} and b /= root
+       C_IN[root] = {all copies}
+.DE
--- a/doc/ego/ud/ud3
+++ b/doc/ego/ud/ud3
@@ -0,0 +1,26 @@
+.NH 2
+Pointers and subroutine calls
+.PP
+The theory outlined above assumes that variables can
+only be changed by a direct assignment.
+This condition does not hold for EM.
+In case of an assignment through a pointer variable,
+it is in general impossible to see which variable is affected
+by the assignment.
+Similar problems occur in the presence of procedure calls.
+Therefore we distinguish two kinds of definitions:
+.IP -
+an \fIexplicit\fR definition is a direct assignment to one
+specific variable
+.IP -
+an \fIimplicit\fR definition is the potential alteration of
+a variable as a result of a procedure call or an indirect assignment.
+.LP
+An indirect assignment causes implicit definitions to
+all variables that may be accessed indirectly, i.e. 
+all local variables for which no register message was generated
+and all global variables.
+If a procedure contains an indirect assignment it may change the
+same set of variables, else it may change some global variables directly.
+The KILL, GEN, IN and OUT sets contain explicit as well
+as implicit definitions.
--- a/doc/ego/ud/ud4
+++ b/doc/ego/ud/ud4
@@ -0,0 +1,78 @@
+.NH 2
+Implementation
+.PP
+UD first builds a number of tables:
+.IP locals: 9
+contains information about the local variables of the
+current procedure (offset,size,whether a register message was found
+for it and, if so, the score field of that message)
+.IP defs:
+a table of all explicit definitions appearing in the
+current procedure.
+.IP copies:
+a table of all copies appearing in the
+current procedure.
+.LP
+Every variable (local as well as global), definition and copy
+is identified by a unique number, which is the index
+in the table.
+All tables are constructed by traversing the EM code.
+A fourth table, "vardefs" is used, indexed by a 'variable number',
+which contains for every variable the set of explicit definitions of it.
+Also, for each basic block b, the set CHGVARS containing all variables
+changed by it is computed.
+.PP
+The GEN sets are obtained in one scan over the EM text,
+by analyzing every EM instruction.
+The KILL set of a basic block b is computed by looking at the
+set of variables
+changed by b (i.e. CHGVARS[b]).
+For every such variable v, all explicit definitions to v
+(i.e. vardefs[v]) that are not in GEN[b] are added to KILL[b].
+Also, the implicit defininition of v is added to KILL[b].
+Next, the data flow equations for use-definition information
+are solved,
+using a straight forward, iterative algorithm.
+All sets are represented as bitvectors, so the operations
+on sets (union, difference) can be implemented efficiently.
+.PP
+The C_GEN and C_KILL sets are computed simultaneously in one scan
+over the EM text.
+For every copy A := B appearing in basic block b we do
+the following:
+.IP 1.
+for every basic block n /= b that changes B, see if the definition A := B
+reaches the beginning of n (i.e. check if the index number of A := B in
+the "defs" table is an element of IN[n]);
+if so, add the copy to C_KILL[n]
+.IP 2.
+if B is redefined later on in b, add the copy to C_KILL[b], else
+add it to C_GEN[b]
+.LP
+C_IN and C_OUT are computed from C_GEN and C_KILL via the second set of
+data flow equations.
+.PP
+Finally, in one last scan all opportunities for optimization are
+detected.
+For every use u of a variable A, we check if
+there is a unique explicit definition d reaching u.
+.sp
+If the definition is a copy A := B and B has the same value at d as
+at u, then the use of A at u may be changed into B.
+The latter condition can be verified as follows:
+.IP -
+if u and d are in the same basic block, see if there is
+any assignment to B in between d and u
+.IP -
+if u and d are in different basic blocks, the condition is
+satisfied if there is no assignment to B in the block of u prior to u
+and d is in C_IN[b].
+.LP
+Before the transformation is actually done, UD first makes sure the
+alteration is really desirable, as described before.
+The information needed for this purpose (access costs of local and
+global variables) is read from a machine descriptor file.
+.sp
+If the only definition reaching u has the form "A := constant", the use
+of A at u is replaced by the constant.
+
--- a/doc/ego/ud/ud5
+++ b/doc/ego/ud/ud5
@@ -0,0 +1,19 @@
+
+.NH 2
+Source files of UD
+.PP
+The sources of UD are in the following files and packages:
+.IP ud.h: 14
+declarations of global variables and data structures
+.IP ud.c:
+the routine main; initialization of target machine dependent tables
+.IP defs:
+routines to compute the GEN and KILL sets and routines to analyse
+EM instructions
+.IP const:
+routines involved in constant propagation
+.IP copy:
+routines involved in copy propagation
+.IP aux:
+contains auxiliary routines
+.LP
--- a/doc/em/.distr
+++ b/doc/em/.distr
@@ -0,0 +1,32 @@
+Makefile
+READ_ME
+addend.n
+app.codes.nr
+app.exam.nr
+app.int.nr
+assem.nr
+cont.nr
+descr.nr
+dspace.nr
+em.i
+env.nr
+even.c
+exam.e
+exam.p
+int
+intro.nr
+ip.awk
+ispace.nr
+mach.nr
+macr.nr
+mapping.nr
+mem.nr
+print
+show
+title.nr
+traps.nr
+types.nr
+mkdispatch.c
+dispat1.sed
+dispat2.sed
+dispat3.sed
--- a/doc/em/Makefile
+++ b/doc/em/Makefile
@@ -1,31 +1,36 @@
-head:   doc.pr
+HOME=../..

+TBL=tbl
 NROFF=nroff
-FILES = macr.nr title.nr intro.nr mem.nr ispace.nr dspace.nr mapping.nr types.nr descr.nr iotrap.nr mach.nr assem.nr app.nr
-IOP=../../util/ass/ip_spec.t
+SUF=pr

-doc.pr: $(FILES) itables em.i
-	tbl $(FILES) | $(NROFF) >doc.pr
+head:   ../em.$(SUF)

-distr:	$(FILES) itables em.i
-	tbl $(FILES) | nroff -Tlp >doc.pr
+FILES = macr.nr title.nr intro.nr mem.nr ispace.nr dspace.nr mapping.nr \
+	types.nr descr.nr env.nr traps.nr mach.nr assem.nr \
+	app.int.nr app.codes.nr app.exam.nr cont.nr

-opr:	doc.pr
-	make pr | opr
+IOP=$(HOME)/util/ass/ip_spec.t#			# to construct itables from

-pr:
-	@make "NROFF="$NROFF doc.pr >makepr.out 2>&1
-	@cat doc.pr
+../em.$(SUF):	$(FILES) itables dispatdummy em.i Makefile
+		$(TBL) $(FILES) | $(NROFF) > ../em.$(SUF)

-app.t:	itables em.i
+app.codes.pr: app.codes.nr itables dispatdummy

-em.i:	int/em.p
-	@echo Sorry, this copy was edited by hand from int/em.p
+itables: $(IOP) ip.awk
+	awk -f ip.awk $(IOP) | sed 's/-/\\-/g' | $(TBL) >itables

-itables: $(IOP)
-	awk -f ip.awk $(IOP) | tbl >itables
+dispatdummy:	$(IOP) mkdispatch
+	mkdispatch < $(IOP) > dispatdummy
+	sed -f dispat1.sed < dispatdummy | $(TBL) > dispat1
+	sed -f dispat2.sed < dispatdummy | $(TBL) > dispat2
+	sed -f dispat3.sed < dispatdummy | $(TBL) > dispat3
+
+mkdispatch:	mkdispatch.c
+	cc -I$(HOME)/util/ass -I$(HOME)/h -o mkdispatch mkdispatch.c $(HOME)/lib/em_data.a

 .SUFFIXES : .pr .nr
-.nr.pr: ; tbl macr.nr $*.nr | $(NROFF) >$@
+.nr.pr: ; $(TBL) macr.nr $*.nr | $(NROFF) >$@

-cont.t intro.t mem.t ispace.t dspace.t mapping.t succ.t descr.t iotrap.t mach.t assem.t kern.t app.t: macr.nr
+clean:
+	rm -f *.pr itables *.out dispatdummy dispat? *.o mkdispatch
--- a/doc/em/READ_ME
+++ b/doc/em/READ_ME
@@ -1 +1,9 @@
-Sorry, the kun macro package is not ours to distribute.
+This it the text of IR-81,
+DESCRIPTION OF A MACHINE ARCHITECTURE FOR USE WITH BLOCK STRUCTURED LANGUAGES
+
+The file em.i (text of the defining interpreter) was hand-edited from int/em.p
+
+To print, set NROFF and TBL in the Makefile and call  make.
+It uses the kun macro package which is also distributed.
+
+The directory int contains the interpreter.
--- a/doc/em/app.codes.nr
+++ b/doc/em/app.codes.nr
@@ -0,0 +1,153 @@
+.BP
+.AP "EM CODE TABLES"
+The following table is used by the assembler for EM machine
+language.
+It specifies the opcodes used for each instruction and
+how arguments are mapped to machine language arguments.
+The table is presented in three columns,
+each line in each column contains three or four fields.
+Each line describes a range of interpreter opcodes by
+specifying for which instruction the range is used, the type of the
+opcodes (mini, shortie, etc..) and range for the instruction
+argument.
+.A
+The first field on each line gives the EM instruction mnemonic,
+the second field gives some flags.
+If the opcodes are minis or shorties the third field specifies
+how many minis/shorties are used.
+The last field gives the number of the (first) interpreter
+opcode.
+.N 1
+Flags :
+.IS 3
+.N 1
+Opcode type, only one of the following may be specified.
+.PS - 5 "  "
+.PT \-
+opcode without argument
+.PT m
+mini
+.PT s
+shortie
+.PT 2
+opcode with 2-byte signed argument
+.PT 4
+opcode with 4-byte signed argument
+.PT 8
+opcode with 8-byte signed argument
+.PE
+Secondary (escaped) opcodes.
+.PS - 5 "  "
+.PT e
+The opcode thus marked is in the secondary opcode group instead
+of the primary
+.PE
+restrictions on arguments
+.PS - 5 "  "
+.PT N
+Negative arguments only
+.PT P
+Positive and zero arguments only
+.PE
+mapping of arguments
+.PS - 5 "  "
+.PT w
+argument must be divisible by the wordsize and is divided by the
+wordsize before use as opcode argument.
+.PT o
+argument ( possibly after division ) must be >= 1 and is
+decremented before use as opcode argument
+.PE
+.IE
+If the opcode type is 2,4 or 8 the resulting argument is used as
+opcode argument (least significant byte first).
+.N
+If the opcode type is mini, the argument is added
+to the first opcode \- if in range \- .
+If the argument is negative, the absolute value minus one is
+used in the algorithm above.
+.N
+For shorties with positive arguments the first opcode is used
+for arguments in the range 0..255, the second for the range
+256..511, etc..
+For shorties with negative arguments the first opcode is used
+for arguments in the range \-1..\-256, the second for the range
+\-257..\-512, etc..
+The byte following the opcode contains the least significant
+byte of the argument.
+First some examples of these specifications.
+.PS - 5
+.PT "aar mwPo 1 34"
+Indicates that opcode 34 is used as a mini for Positive
+instruction arguments only.
+The w and o indicate division and decrementing of the
+instruction argument.
+Because the resulting argument must be zero ( only opcode 34 may be used
+), this mini can only be used for instruction argument 2.
+Conclusion: opcode 34 is for "AAR 2".
+.PT "adp sP 1 41"
+Opcode 41 is used as shortie for ADP with arguments in the range
+0..255.
+.PT "bra sN 2 60"
+Opcode 60 is used as shortie for BRA with arguments \-1..\-256,
+61 is used for arguments \-257..\-512.
+.PT "zer e\- 145"
+Escaped opcode 145 is used for ZER.
+.PE
+The interpreter opcode table:
+.N 1
+.IS 3
+.so itables
+.IE
+.P
+The table above results in the following dispatch tables.
+Dispatch tables are used by interpreters to jump to the
+routines implementing the EM instructions, indexed by the next opcode.
+Each line of the dispatch tables gives the routine names
+of eight consecutive opcodes, preceded by the first opcode number
+on that line.
+Routine names consist of an EM mnemonic followed by a suffix.
+The suffices show the encoding used for each opcode.
+.N
+The following suffices exist:
+.N 1
+.VS 1 0
+.IS 4
+.PS - 11
+.PT .z
+no arguments
+.PT .l
+16-bit argument
+.PT .lw
+16-bit argument divided by the wordsize
+.PT .p
+positive 16-bit argument
+.PT .pw
+positive 16-bit argument divided by the wordsize
+.PT .n
+negative 16-bit argument
+.PT .nw
+negative 16-bit argument divided by the wordsize
+.PT .s<num>
+shortie with <num> as high order argument byte
+.PT .w<num>
+shortie with argument divided by the wordsize
+.PT .<num>
+mini with <num> as argument
+.PT .<num>W
+mini with <num>*wordsize as argument
+.PE 1
+<num> is a possibly negative integer.
+.VS 1 1
+.IE
+The dispatch table for the 256 primary opcodes:
+.N 1
+.so dispat1
+.N 2
+The list of secondary opcodes (escape1):
+.N 1
+.so dispat2
+.N 2
+Finally, the list of opcodes with four byte arguments (escape2).
+.N 1
+.so dispat3
--- a/doc/em/app.exam.nr
+++ b/doc/em/app.exam.nr
@@ -0,0 +1,277 @@
+.BP
+.AP "AN EXAMPLE PROGRAM"
+.A 1 0
+.NA
+.ta 4n 8n 12n 16n 20n
+.nf
+ 1	program example(output);
+ 2	{This program just demonstrates typical EM code.}
+ 3	type rec = record r1: integer; r2:real; r3: boolean end;
+ 4	var mi: integer;  mx:real;  r:rec;
+ 5
+ 6	function sum(a,b:integer):integer;
+ 7	begin
+ 8		sum := a + b
+ 9	end;
+10
+11	procedure test(var r: rec);
+12	label 1;
+13	var i,j: integer;
+14		x,y: real;
+15		b: boolean;
+16		c: char;
+17		a: array[1..100] of integer;
+18
+19	begin
+20		j := 1;
+21		i := 3 * j + 6;
+22		x := 4.8;
+23		y := x/0.5;
+24		b := true;
+25		c := 'z';
+26		for i:= 1 to 100 do a[i] := i * i;
+27		r.r1 := j+27;
+28		r.r3 := b;
+29		r.r2 := x+y;
+30		i := sum(r.r1, a[j]);
+31		while i > 0 do begin j := j + r.r1; i := i - 1 end;
+32		with r do begin r3 := b;  r2 := x+y;  r1 := 0 end;
+33		goto 1;
+34	1:	writeln(j, i:6, x:9:3, b)
+35	end; {test}
+36	begin {main program}
+37		mx := 15.96;
+38		mi := 99;
+39		test(r)
+40	end.
+.fi
+.AD
+.BP
+The EM code as produced by the Pascal-VU compiler is given below. Comments
+have been added manually.  Note that this code has already been  optimized.
+.A 1 0
+.NA
+.nf
+.ta 1n 24n
+	mes 2,2,2	; wordsize 2, pointersize 2
+\&.1
+	rom 't.p\e000'	; the name of the source file
+	hol 552,\-32768,0	; externals and buf occupy 552 bytes
+	exp $sum	; sum can be called from other modules
+	pro $sum,2	; procedure sum	; 2 bytes local storage
+	lin 8	; code from source line 8
+	ldl 0	; load two locals ( a and b )
+	adi 2	; add them
+	ret 2	; return the result
+	end 2	; end of procedure ( still two bytes local storage )
+\&.2
+	rom 1,99,2	; descriptor of array a[]
+	exp $test	; the compiler exports all level 0 procedures
+	pro $test,226	; procedure test, 226 bytes local storage
+\&.3
+	rom 4.8F8	; assemble Floating point 4.8 (8 bytes) in
+\&.4		; global storage
+	rom 0.5F8	; same for 0.5
+	mes 3,\-226,2,2	; compiler temporary not referenced by address
+	mes 3,\-24,2,0	; the same is true for i, j, b and c in test
+	mes 3,\-22,2,0
+	mes 3,\-4,2,0
+	mes 3,\-2,2,0
+	mes 3,\-20,8,0	; and for x and y
+	mes 3,\-12,8,0
+	lin 20	; maintain source line number
+	loc 1
+	stl \-4	; j := 1
+	lni	; lin 21 prior to optimization
+	lol \-4
+	loc 3
+	mli 2
+	loc 6
+	adi 2
+	stl \-2	; i := 3 * j + 6
+	lni	; lin 22 prior to optimization
+	lae .3
+	loi 8
+	lal \-12
+	sti 8	; x := 4.8
+	lni	; lin 23 prior to optimization
+	lal \-12
+	loi 8
+	lae .4
+	loi 8
+	dvf 8
+	lal \-20
+	sti 8	; y := x / 0.5
+	lni	; lin 24 prior to optimization
+	loc 1
+	stl \-22	; b := true
+	lni	; lin 25 prior to optimization
+	loc 122
+	stl \-24	; c := 'z'
+	lni	; lin 26 prior to optimization
+	loc 1
+	stl \-2	; for i:= 1
+2
+	lol \-2
+	dup 2
+	mli 2	; i*i
+	lal \-224
+	lol \-2
+	lae .2
+	sar 2	; a[i] :=
+	lol \-2
+	loc 100
+	beq *3	; to 100 do
+	inl \-2	; increment i and loop
+	bra *2
+3
+	lin 27
+	lol \-4
+	loc 27
+	adi 2	; j + 27
+	sil 0	; r.r1 :=
+	lni	; lin 28 prior to optimization
+	lol \-22	; b
+	lol 0
+	stf 10	; r.r3 :=
+	lni	; lin 29 prior to optimization
+	lal \-20
+	loi 16
+	adf 8	; x + y
+	lol 0
+	adp 2
+	sti 8	; r.r2 :=
+	lni	; lin 23 prior to optimization
+	lal \-224
+	lol \-4
+	lae .2
+	lar 2	; a[j]
+	lil 0	; r.r1
+	cal $sum	; call now
+	asp 4	; remove parameters from stack
+	lfr 2	; get function result
+	stl \-2	; i :=
+4
+	lin 31
+	lol \-2
+	zle *5	; while i > 0 do
+	lol \-4
+	lil 0
+	adi 2
+	stl \-4	; j := j + r.r1
+	del \-2	; i := i - 1
+	bra *4	; loop
+5
+	lin 32
+	lol 0
+	stl \-226	; make copy of address of r
+	lol \-22
+	lol \-226
+	stf 10	; r3 := b
+	lal \-20
+	loi 16
+	adf 8
+	lol \-226
+	adp 2
+	sti 8	; r2 := x + y
+	loc 0
+	sil \-226	; r1 := 0
+	lin 34	; note the abscence of the unnecesary jump
+	lae 22	; address of output structure
+	lol \-4
+	cal $_wri	; write integer with default width
+	asp 4	; pop parameters
+	lae 22
+	lol \-2
+	loc 6
+	cal $_wsi	; write integer width 6
+	asp 6
+	lae 22
+	lal \-12
+	loi 8
+	loc 9
+	loc 3
+	cal $_wrf	; write fixed format real, width 9, precision 3
+	asp 14
+	lae 22
+	lol \-22
+	cal $_wrb	; write boolean, default width
+	asp 4
+	lae 22
+	cal $_wln	; writeln
+	asp 2
+	ret 0	; return, no result
+	end 226
+	exp $_main
+	pro $_main,0	; main program
+\&.6
+	con 2,\-1,22	; description of external files
+\&.5
+	rom 15.96F8
+	fil .1	; maintain source file name
+	lae .6	; description of external files
+	lae 0	; base of hol area to relocate buffer addresses
+	cal $_ini	; initialize files, etc...
+	asp 4
+	lin 37
+	lae .5
+	loi 8
+	lae 2
+	sti 8	; mx := 15.96
+	lni	; lin 38 prior to optimization
+	loc 99
+	ste 0	; mi := 99
+	lni	; lin 39 prior to optimization
+	lae 10	; address of r
+	cal $test
+	asp 2
+	loc 0	; normal exit
+	cal $_hlt	; cleanup and finish
+	asp 2
+	end 0
+	mes 5	; reals were used
+.fi
+.AD
+.A 1 0
+The compact code corresponding to the above program is listed below.
+Read it horizontally, line by line, not column by column.
+Each number represents a byte of compact code, printed in decimal.
+The first two bytes form the magic word.
+.N 1
+.IS 3
+.Dr 33
+ 173   0 159 122 122 122 255 242   1 161 250 124 116  46 112   0
+ 255 156 245  40   2 245   0 128 120 155 249 123 115 117 109 160
+ 249 123 115 117 109 122  67 128  63 120   3 122  88 122 152 122
+ 242   2 161 121 219 122 255 155 249 124 116 101 115 116 160 249
+ 124 116 101 115 116 245 226   0 242   3 161 253 128 123  52  46
+  56 255 242   4 161 253 128 123  48  46  53 255 159 123 245  30
+ 255 122 122 255 159 123  96 122 120 255 159 123  98 122 120 255
+ 159 123 116 122 120 255 159 123 118 122 120 255 159 123 100 128
+ 120 255 159 123 108 128 120 255  67 140  69 121 113 116  68  73
+ 116  69 123  81 122  69 126   3 122 113 118  68  57 242   3  72
+ 128  58 108 112 128  68  58 108  72 128  57 242   4  72 128  44
+ 128  58 100 112 128  68  69 121 113  98  68  69 245 122   0 113
+  96  68  69 121 113 118 182  73 118  42 122  81 122  58 245  32
+ 255  73 118  57 242   2  94 122  73 118  69 220  10 123  54 118
+  18 122 183  67 147  73 116  69 147   3 122 104 120  68  73  98
+  73 120 111 130  68  58 100  72 136   2 128  73 120   4 122 112
+ 128  68  58 245  32 255  73 116  57 242   2  59 122  65 120  20
+ 249 123 115 117 109   8 124  64 122 113 118 184  67 151  73 118
+ 128 125  73 116  65 120   3 122 113 116  41 118  18 124 185  67
+ 152  73 120 113 245  30 255  73  98  73 245  30 255 111 130  58
+ 100  72 136   2 128  73 245  30 255   4 122 112 128  69 120 104
+ 245  30 255  67 154  57 142  73 116  20 249 124  95 119 114 105
+   8 124  57 142  73 118  69 126  20 249 124  95 119 115 105   8
+ 126  57 142  58 108  72 128  69 129  69 123  20 249 124  95 119
+ 114 102   8 134  57 142  73  98  20 249 124  95 119 114  98   8
+ 124  57 142  20 249 124  95 119 108 110   8 122  88 120 152 245
+ 226   0 155 249 125  95 109  97 105 110 160 249 125  95 109  97
+ 105 110 120 242   6 151 122 119 142 255 242   5 161 253 128 125
+  49  53  46  57  54 255  50 242   1  57 242   6  57 120  20 249
+ 124  95 105 110 105   8 124  67 157  57 242   5  72 128  57 122
+ 112 128  68  69 219 110 120  68  57 130  20 249 124 116 101 115
+ 116   8 122  69 120  20 249 124  95 104 108 116   8 122 152 120
+ 159 124 160 255 159 125 255
+.De
+.IE
--- a/doc/em/app.int.nr
+++ b/doc/em/app.int.nr
@@ -0,0 +1,8 @@
+.BP
+.AP "EM INTERPRETER"
+.nf
+.ft CW
+.ta 8 16 24 32 40 48 56 64 72 80
+.so em.i
+.ft P
+.fi
--- a/doc/em/assem.nr
+++ b/doc/em/assem.nr
@@ -34,7 +34,7 @@ The scope of an instruction label is its procedure.
 .A
 The pseudoinstructions CON, ROM and BSS may be preceded by a
 line containing a
-1-8 character data label, the first character of which is a
+1\-8 character data label, the first character of which is a
 letter, period or underscore.
 The period may only be followed by
 digits, the others may be followed by letters, digits and underscores.
@@ -66,7 +66,7 @@ They do not belong to a specific procedure.
 All constants in EM are interpreted in the decimal base.
 The ASCII assembly language accepts constant expressions
 wherever constants are allowed.
-The operators recognized are: +, -, *, % and / with the usual
+The operators recognized are: +, \-, *, % and / with the usual
 precedence order.
 Use of the parentheses ( and ) to alter the precedence order is allowed.
 .S3 "Instruction arguments"
@@ -109,16 +109,16 @@ integers on top of the stack are to be compared.
 on top of the stack that specifies the size of the integers to
 be compared.
 Thus the following two sequences are equivalent:
-.N 2
+.N 1
 .TS
 center, tab(:) ;
 l r 30 l r.
-LDL:-10:LDL:-10
-LDL:-14:LDL:-14
+LDL:\-10:LDL:\-10
+LDL:\-14:LDL:\-14
 ::LOC:4
 CMI:4:CMI:
 ZEQ:*1:ZEQ:*1
-.TE 2
+.TE 1
 Section 11.1.6 shows the arguments allowed for each instruction.
 .S3 "Pseudoinstruction arguments"
 Pseudoinstruction arguments can be divided in two classes:
@@ -139,7 +139,7 @@ initializer's size.
 This integer is governed by the same restrictions as for
 transfer of objects to/from memory.
 As in instruction arguments, initializers include expressions of the form:
-\&"LABEL+offset" and "LABEL-offset".
+\&"LABEL+offset" and "LABEL\-offset".
 The offset must be an unsigned decimal constant.
 The 'IUF' indicators cannot be used in the offsets.
 .P
@@ -167,7 +167,7 @@ double quote:":\e"
 bit pattern:\fBddd\fP:\e\fBddd\fP
 .TE
 .DE
-The escape \fBddd\fP consists of the backslash followed by 1,
+The escape \fB\eddd\fP consists of the backslash followed by 1,
 2, or 3 octal digits specifing the value of
 the desired character.
 If the character following a backslash is not one of those
@@ -190,9 +190,9 @@ instructions and pseudoinstructions.
 .TS
 tab(:);
 l l l.
-<cst>:\&=:integer constant (current range -2**31..2**31-1)
+<cst>:\&=:integer constant (current range \-2**31..2**31\-1)
 <dlb>:\&=:data label
-<arg>:\&=:<cst> or <dlb> or <dlb>+<cst> or <dlb>-<cst>
+<arg>:\&=:<cst> or <dlb> or <dlb>+<cst> or <dlb>\-<cst>
 <con>:\&=:integer constant, unsigned constant, floating-point constant
 <str>:\&=:string constant (surrounded by double quotes),
 <ilb>:\&=:instruction label
@@ -425,13 +425,13 @@ etc. represent the succeeding bytes.
 tab(:) ;
 rw17 4 l.
 0:Reserved for future use
-1-129:Machine instructions, see Appendix A, alphabetical list
-130-149:Reserved for future use
-150-161:BSS,CON,END,EXA,EXC,EXP,HOL,INA,INP,MES,PRO,ROM
-162-179:Reserved for future pseudoinstructions
-180-239:Instruction labels 0 - 59  (180 is local label 0 etc.)
-240-244:See the Common Table below
-245-255:Not used
+1\-129:Machine instructions, see Appendix A, alphabetical list
+130\-149:Reserved for future use
+150\-161:BSS,CON,END,EXA,EXC,EXP,HOL,INA,INP,MES,PRO,ROM
+162\-179:Reserved for future pseudoinstructions
+180\-239:Instruction labels 0 \- 59  (180 is local label 0 etc.)
+240\-244:See the Common Table below
+245\-255:Not used
 .TE 1
 .DE 0
 After a label, the assembler is back in neutral state; it can immediately
@@ -449,9 +449,9 @@ encoded as follows:
 .TS
 tab(:);
 r l.
-0-239:Offsets from -120 to 119
+0\-239:Offsets from \-120 to 119

-240-255:See the Common Table below
+240\-255:See the Common Table below
 .TE 1
 Absence of an optional argument is indicated by a special
 byte.
@@ -467,8 +467,8 @@ class:bytes:description

 <ilb>:240:b1:Instruction label b1  (Not used for branches)
 <ilb>:241:b1 b2:16 bit instruction label  (256*b2 + b1)
-<dlb>:242:b1:Global label .0-.255, with b1 being the label
-<dlb>:243:b1 b2:Global label .0-.32767
+<dlb>:242:b1:Global label .0\-.255, with b1 being the label
+<dlb>:243:b1 b2:Global label .0\-.32767
 :::with 256*b2+b1 being the label
 <dlb>:244:<string>:Global symbol not of the form .nnn
 <cst>:245:b1 b2:16 bit constant
@@ -488,7 +488,7 @@ class:bytes:description
 The bytes specifying the value of a 16, 32 or 64 bit constant
 are presented in two's complement notation, with the least
 significant byte first. For example: the value of a 32 bit
-constant is ((s4*256+b3)*256+b2)*256+b1, where s4 is b4-256 if
+constant is ((s4*256+b3)*256+b2)*256+b1, where s4 is b4\-256 if
 b4 is greater than 128 else s4 takes the value of b4.
 A <string> consists of a <cst> inmediatly followed by
 a sequence of bytes with length <cst>.
@@ -498,10 +498,10 @@ The pseudoinstructions fall into several categories, depending on their
 arguments:
 .N 1
 .DS
- Group 1 -- EXC, BSS, HOL have a known number of arguments
- Group 2 -- EXA, EXP, INA, INP have a string as argument
- Group 3 -- CON, MES, ROM have a variable number of various things
- Group 4 -- END, PRO have a trailing optional argument.
+ Group 1 \- EXC, BSS, HOL have a known number of arguments
+ Group 2 \- EXA, EXP, INA, INP have a string as argument
+ Group 3 \- CON, MES, ROM have a variable number of various things
+ Group 4 \- END, PRO have a trailing optional argument.
 .DE 1
 Groups 1 and 2
 use the encoding described above.
@@ -522,7 +522,7 @@ Example  ASCII|Example compact
 2||182
 1||181
 LOC|10|69 130
- LOC|-10|69 110
+ LOC|\-10|69 110
 LOC|300|69 245 44 1
 BRA|*19|18 139
 300||241 44 1
@@ -531,7 +531,6 @@ Example  ASCII|Example compact
 CON|.35|151 242 35 255
 .TE 0
 .IE 0
-.BP
 .S2 "Assembly language instruction list"
 .P
 For each instruction in the list the range of argument values
@@ -556,7 +555,7 @@ are indicated by letters:
 .ds s \fBs\fP
 .ds z \fBz\fP
 .ds o \fBo\fP
-.ds - \fB-\fP
+.ds - \fB\-\fP
 .N 1
 .TS
 tab(:);
@@ -589,185 +588,214 @@ Instructions that check for undefined integer or floating-point
 values and underflow or overflow
 are indicated below by (*).
 .N 1
-.DS B
-GROUP 1 - LOAD
+.DS
+.ta 12n
+GROUP 1 \- LOAD

-  LOC \*c : Load constant (i.e. push one word onto the stack)
-  LDC \*d : Load double constant ( push two words )
-  LOL \*l : Load word at \*l-th local (\*l<0) or parameter (\*l>=0)
-  LOE \*g : Load external word \*g
-  LIL \*l : Load word pointed to by \*l-th local or parameter
-  LOF \*f : Load offsetted (top of stack + \*f yield address)
-  LAL \*l : Load address of local or parameter
-  LAE \*g : Load address of external
-  LXL \*n : Load lexical (address of LB \*n static levels back)
-  LXA \*n : Load lexical (address of AB \*n static levels back)
-  LOI \*o : Load indirect \*o bytes (address is popped from the stack)
-  LOS \*w : Load indirect, \*w-byte integer on top of stack gives object size
-  LDL \*l : Load double local or parameter (two consecutive words are stacked)
-  LDE \*g : Load double external (two consecutive externals are stacked)
-  LDF \*f : Load double offsetted (top of stack + \*f yield address)
-  LPI \*p : Load procedure identifier
+  LOC \*c :	Load constant (i.e. push one word onto the stack)
+  LDC \*d :	Load double constant ( push two words )
+  LOL \*l :	Load word at \*l-th local (\*l<0) or parameter (\*l>=0)
+  LOE \*g :	Load external word \*g
+  LIL \*l :	Load word pointed to by \*l-th local or parameter
+  LOF \*f :	Load offsetted (top of stack + \*f yield address)
+  LAL \*l :	Load address of local or parameter
+  LAE \*g :	Load address of external
+  LXL \*n :	Load lexical (address of LB \*n static levels back)
+  LXA \*n :	Load lexical (address of AB \*n static levels back)
+  LOI \*o :	Load indirect \*o bytes (address is popped from the stack)
+  LOS \*w :	Load indirect, \*w-byte integer on top of stack gives object size
+  LDL \*l :	Load double local or parameter (two consecutive words are stacked)
+  LDE \*g :	Load double external (two consecutive externals are stacked)
+  LDF \*f :	Load double offsetted (top of stack + \*f yield address)
+  LPI \*p :	Load procedure identifier
+.DE

-GROUP 2 - STORE
+.DS
+GROUP 2 \- STORE

-  STL \*l : Store local or parameter
-  STE \*g : Store external
-  SIL \*l : Store into word pointed to by \*l-th local or parameter
-  STF \*f : Store offsetted
-  STI \*o : Store indirect \*o bytes (pop address, then data)
-  STS \*w : Store indirect, \*w-byte integer on top of stack gives object size
-  SDL \*l : Store double local or parameter
-  SDE \*g : Store double external
-  SDF \*f : Store double offsetted
+  STL \*l :	Store local or parameter
+  STE \*g :	Store external
+  SIL \*l :	Store into word pointed to by \*l-th local or parameter
+  STF \*f :	Store offsetted
+  STI \*o :	Store indirect \*o bytes (pop address, then data)
+  STS \*w :	Store indirect, \*w-byte integer on top of stack gives object size
+  SDL \*l :	Store double local or parameter
+  SDE \*g :	Store double external
+  SDF \*f :	Store double offsetted
+.DE

-GROUP 3 - INTEGER ARITHMETIC
+.DS
+GROUP 3 \- INTEGER ARITHMETIC

-  ADI \*w : Addition (*)
-  SBI \*w : Subtraction (*)
-  MLI \*w : Multiplication (*)
-  DVI \*w : Division (*)
-  RMI \*w : Remainder (*)
-  NGI \*w : Negate (two's complement) (*)
-  SLI \*w : Shift left (*)
-  SRI \*w : Shift right (*)
+  ADI \*w :	Addition (*)
+  SBI \*w :	Subtraction (*)
+  MLI \*w :	Multiplication (*)
+  DVI \*w :	Division (*)
+  RMI \*w :	Remainder (*)
+  NGI \*w :	Negate (two's complement) (*)
+  SLI \*w :	Shift left (*)
+  SRI \*w :	Shift right (*)
+.DE

-GROUP 4 - UNSIGNED ARITHMETIC
+.DS
+GROUP 4 \- UNSIGNED ARITHMETIC

-  ADU \*w : Addition
-  SBU \*w : Subtraction
-  MLU \*w : Multiplication
-  DVU \*w : Division
-  RMU \*w : Remainder
-  SLU \*w : Shift left
-  SRU \*w : Shift right
+  ADU \*w :	Addition
+  SBU \*w :	Subtraction
+  MLU \*w :	Multiplication
+  DVU \*w :	Division
+  RMU \*w :	Remainder
+  SLU \*w :	Shift left
+  SRU \*w :	Shift right
+.DE

-GROUP 5 - FLOATING POINT ARITHMETIC
+.DS
+GROUP 5 \- FLOATING POINT ARITHMETIC

-  ADF \*w : Floating add (*)
-  SBF \*w : Floating subtract (*)
-  MLF \*w : Floating multiply (*)
-  DVF \*w : Floating divide (*)
-  NGF \*w : Floating negate (*)
-  FIF \*w : Floating multiply and split integer and fraction part (*)
-  FEF \*w : Split floating number in exponent and fraction part (*)
+  ADF \*w :	Floating add (*)
+  SBF \*w :	Floating subtract (*)
+  MLF \*w :	Floating multiply (*)
+  DVF \*w :	Floating divide (*)
+  NGF \*w :	Floating negate (*)
+  FIF \*w :	Floating multiply and split integer and fraction part (*)
+  FEF \*w :	Split floating number in exponent and fraction part (*)
+.DE

-GROUP 6 - POINTER ARITHMETIC
+.DS
+GROUP 6 \- POINTER ARITHMETIC

-  ADP \*f : Add \*f to pointer on top of stack
-  ADS \*w : Add \*w-byte value and pointer
-  SBS \*w : Subtract pointers in same fragment and push diff as size \*w integer
+  ADP \*f :	Add \*f to pointer on top of stack
+  ADS \*w :	Add \*w-byte value and pointer
+  SBS \*w :	Subtract pointers in same fragment and push diff as size \*w integer
+.DE

-GROUP 7 - INCREMENT/DECREMENT/ZERO
+.DS
+GROUP 7 \- INCREMENT/DECREMENT/ZERO

-  INC \*- : Increment word on top of stack by 1 (*)
-  INL \*l : Increment local or parameter (*)
-  INE \*g : Increment external (*)
-  DEC \*- : Decrement word on top of stack by 1 (*)
-  DEL \*l : Decrement local or parameter (*)
-  DEE \*g : Decrement external (*)
-  ZRL \*l : Zero local or parameter
-  ZRE \*g : Zero external
-  ZRF \*w : Load a floating zero of size \*w
-  ZER \*w : Load \*w zero bytes
+  INC \*- :	Increment word on top of stack by 1 (*)
+  INL \*l :	Increment local or parameter (*)
+  INE \*g :	Increment external (*)
+  DEC \*- :	Decrement word on top of stack by 1 (*)
+  DEL \*l :	Decrement local or parameter (*)
+  DEE \*g :	Decrement external (*)
+  ZRL \*l :	Zero local or parameter
+  ZRE \*g :	Zero external
+  ZRF \*w :	Load a floating zero of size \*w
+  ZER \*w :	Load \*w zero bytes
+.DE

-GROUP 8 - CONVERT    (stack: source, source size, dest. size (top))
+.DS				\" ???
+GROUP 8 \- CONVERT    (stack:	source, source size, dest. size (top))

-  CII \*- : Convert integer to integer (*)
-  CUI \*- : Convert unsigned to integer (*)
-  CFI \*- : Convert floating to integer (*)
-  CIF \*- : Convert integer to floating (*)
-  CUF \*- : Convert unsigned to floating (*)
-  CFF \*- : Convert floating to floating (*)
-  CIU \*- : Convert integer to unsigned
-  CUU \*- : Convert unsigned to unsigned
-  CFU \*- : Convert floating to unsigned
+  CII \*- :	Convert integer to integer (*)
+  CUI \*- :	Convert unsigned to integer (*)
+  CFI \*- :	Convert floating to integer (*)
+  CIF \*- :	Convert integer to floating (*)
+  CUF \*- :	Convert unsigned to floating (*)
+  CFF \*- :	Convert floating to floating (*)
+  CIU \*- :	Convert integer to unsigned
+  CUU \*- :	Convert unsigned to unsigned
+  CFU \*- :	Convert floating to unsigned
+.DE

-GROUP 9 - LOGICAL
+.DS
+GROUP 9 \- LOGICAL

-  AND \*w : Boolean and on two groups of \*w bytes
-  IOR \*w : Boolean inclusive or on two groups of \*w bytes
-  XOR \*w : Boolean exclusive or on two groups of \*w bytes
-  COM \*w : Complement (one's complement of top \*w bytes)
-  ROL \*w : Rotate left a group of \*w bytes
-  ROR \*w : Rotate right a group of \*w bytes
+  AND \*w :	Boolean and on two groups of \*w bytes
+  IOR \*w :	Boolean inclusive or on two groups of \*w bytes
+  XOR \*w :	Boolean exclusive or on two groups of \*w bytes
+  COM \*w :	Complement (one's complement of top \*w bytes)
+  ROL \*w :	Rotate left a group of \*w bytes
+  ROR \*w :	Rotate right a group of \*w bytes
+.DE

-GROUP 10 - SETS
+.DS
+GROUP 10 \- SETS

-  INN \*w : Bit test on \*w byte set (bit number on top of stack)
-  SET \*w : Create singleton \*w byte set with bit n on (n is top of stack)
+  INN \*w :	Bit test on \*w byte set (bit number on top of stack)
+  SET \*w :	Create singleton \*w byte set with bit n on (n is top of stack)
+.DE

-GROUP 11 - ARRAY
+.DS
+GROUP 11 \- ARRAY

-  LAR \*w : Load array element, descriptor contains integers of size \*w
-  SAR \*w : Store array element
-  AAR \*w : Load address of array element
+  LAR \*w :	Load array element, descriptor contains integers of size \*w
+  SAR \*w :	Store array element
+  AAR \*w :	Load address of array element
+.DE

-GROUP 12 - COMPARE
+.DS
+GROUP 12 \- COMPARE

-  CMI \*w : Compare \*w byte integers, Push negative, zero, positive for <, = or >
-  CMF \*w : Compare \*w byte reals
-  CMU \*w : Compare \*w byte unsigneds
-  CMS \*w : Compare \*w byte values, can only be used for bit for bit equality test
-  CMP \*- : Compare pointers
+  CMI \*w :	Compare \*w byte integers, Push negative, zero, positive for <, = or >
+  CMF \*w :	Compare \*w byte reals
+  CMU \*w :	Compare \*w byte unsigneds
+  CMS \*w :	Compare \*w byte values, can only be used for bit for bit equality test
+  CMP \*- :	Compare pointers

-  TLT \*- : True if less, i.e. iff top of stack < 0
-  TLE \*- : True if less or equal, i.e. iff top of stack <= 0
-  TEQ \*- : True if equal, i.e. iff top of stack = 0
-  TNE \*- : True if not equal, i.e. iff top of stack non zero
-  TGE \*- : True if greater or equal, i.e. iff top of stack >= 0
-  TGT \*- : True if greater, i.e. iff top of stack > 0
+  TLT \*- :	True if less, i.e. iff top of stack < 0
+  TLE \*- :	True if less or equal, i.e. iff top of stack <= 0
+  TEQ \*- :	True if equal, i.e. iff top of stack = 0
+  TNE \*- :	True if not equal, i.e. iff top of stack non zero
+  TGE \*- :	True if greater or equal, i.e. iff top of stack >= 0
+  TGT \*- :	True if greater, i.e. iff top of stack > 0
+.DE

-GROUP 13 - BRANCH
+.DS				\" ???
+GROUP 13 \- BRANCH

-  BRA \*b : Branch unconditionally to label \*b
+  BRA \*b :	Branch unconditionally to label \*b

-  BLT \*b : Branch less (pop 2 words, branch if top > second)
-  BLE \*b : Branch less or equal
-  BEQ \*b : Branch equal
-  BNE \*b : Branch not equal
-  BGE \*b : Branch greater or equal
-  BGT \*b : Branch greater
+  BLT \*b :	Branch less (pop 2 words, branch if top > second)
+  BLE \*b :	Branch less or equal
+  BEQ \*b :	Branch equal
+  BNE \*b :	Branch not equal
+  BGE \*b :	Branch greater or equal
+  BGT \*b :	Branch greater

-  ZLT \*b : Branch less than zero (pop 1 word, branch negative)
-  ZLE \*b : Branch less or equal to zero
-  ZEQ \*b : Branch equal zero
-  ZNE \*b : Branch not zero
-  ZGE \*b : Branch greater or equal zero
-  ZGT \*b : Branch greater than zero
+  ZLT \*b :	Branch less than zero (pop 1 word, branch negative)
+  ZLE \*b :	Branch less or equal to zero
+  ZEQ \*b :	Branch equal zero
+  ZNE \*b :	Branch not zero
+  ZGE \*b :	Branch greater or equal zero
+  ZGT \*b :	Branch greater than zero
+.DE

-GROUP 14 - PROCEDURE CALL
+.DS
+GROUP 14 \- PROCEDURE CALL

-  CAI \*- : Call procedure (procedure identifier on stack)
-  CAL \*p : Call procedure (with identifier \*p)
-  LFR \*s : Load function result
-  RET \*z : Return (function result consists of top \*z bytes)
+  CAI \*- :	Call procedure (procedure identifier on stack)
+  CAL \*p :	Call procedure (with identifier \*p)
+  LFR \*s :	Load function result
+  RET \*z :	Return (function result consists of top \*z bytes)
+.DE

-GROUP 15 - MISCELLANEOUS
+.DS
+GROUP 15 \- MISCELLANEOUS

-  ASP \*f : Adjust the stack pointer by \*f
-  ASS \*w : Adjust the stack pointer by \*w-byte integer
-  BLM \*z : Block move \*z bytes; first pop destination addr, then source addr
-  BLS \*w : Block move, size is in \*w-byte integer on top of stack
-  CSA \*w : Case jump; address of jump table at top of stack
-  CSB \*w : Table lookup jump; address of jump table at top of stack
-  DCH \*- : Follow dynamic chain, convert LB to LB of caller
-  DUP \*s : Duplicate top \*s bytes
-  DUS \*w : Duplicate top \*w bytes
-  EXG \*w : Exchange top \*w bytes
-  FIL \*g : File name (external 4 := \*g)
-  GTO \*g : Non-local goto, descriptor at \*g
-  LIM \*- : Load 16 bit ignore mask
-  LIN \*n : Line number (external 0 := \*n)
-  LNI \*- : Line number increment
-  LOR \*r : Load register (0=LB, 1=SP, 2=HP)
-  LPB \*- : Convert local base to argument base
-  MON \*- : Monitor call
-  NOP \*- : No operation
-  RCK \*w : Range check; trap on error
-  RTT \*- : Return from trap
-  SIG \*- : Trap errors to proc identifier on top of stack, -2 resets default
-  SIM \*- : Store 16 bit ignore mask
-  STR \*r : Store register (0=LB, 1=SP, 2=HP)
-  TRP \*- : Cause trap to occur (Error number on stack)
+  ASP \*f :	Adjust the stack pointer by \*f
+  ASS \*w :	Adjust the stack pointer by \*w-byte integer
+  BLM \*z :	Block move \*z bytes; first pop destination addr, then source addr
+  BLS \*w :	Block move, size is in \*w-byte integer on top of stack
+  CSA \*w :	Case jump; address of jump table at top of stack
+  CSB \*w :	Table lookup jump; address of jump table at top of stack
+  DCH \*- :	Follow dynamic chain, convert LB to LB of caller
+  DUP \*s :	Duplicate top \*s bytes
+  DUS \*w :	Duplicate top \*w bytes
+  EXG \*w :	Exchange top \*w bytes
+  FIL \*g :	File name (external 4 := \*g)
+  GTO \*g :	Non-local goto, descriptor at \*g
+  LIM \*- :	Load 16 bit ignore mask
+  LIN \*n :	Line number (external 0 := \*n)
+  LNI \*- :	Line number increment
+  LOR \*r :	Load register (0=LB, 1=SP, 2=HP)
+  LPB \*- :	Convert local base to argument base
+  MON \*- :	Monitor call
+  NOP \*- :	No operation
+  RCK \*w :	Range check; trap on error
+  RTT \*- :	Return from trap
+  SIG \*- :	Trap errors to proc identifier on top of stack, \-2 resets default
+  SIM \*- :	Store 16 bit ignore mask
+  STR \*r :	Store register (0=LB, 1=SP, 2=HP)
+  TRP \*- :	Cause trap to occur (Error number on stack)
 .DE 0
--- a/doc/em/cont.nr
+++ b/doc/em/cont.nr
@@ -0,0 +1,6 @@
+.MS T A 0
+.ME
+.BP
+.MS B A 0
+.ME
+.CT
--- a/doc/em/descr.nr
+++ b/doc/em/descr.nr
@@ -36,7 +36,7 @@ Array descriptors contain the following three integers:
 .PT
 lower bound~~~~~~~~~~~~~~~~~~~~~signed
 .PT
-upper bound - lower bound~~~~~~~unsigned
+upper bound \- lower bound~~~~~~~unsigned
 .PT
 number of bytes per element~~~~~unsigned
 .PE
@@ -60,7 +60,7 @@ LAR n (n is the size of the integers in the descriptor and I)
 All array instructions first pop the address of the descriptor
 and the index.
 If the index is not within the bounds specified, a trap occurs.
-If ok, (I~-~lower bound) is multiplied
+If ok, (I~\-~lower bound) is multiplied
 by the number of bytes per element (the third word).  The result is added
 to the address of A and replaces A on the stack.
 .A
@@ -128,12 +128,12 @@ each source language case statement
 is up to the front end.
 If the range of the index value is dense, i.e
 .DS
-(highest value - lowest value) / number of cases
+(highest value \- lowest value) / number of cases
 .DE 1
 is less than some threshold, then CSA is the obvious choice.
 If the range is sparse, CSB is better.
 .N 2
-.DS
+.Dr 30
   |--------------------|        |--------------------|  high address
   | pointer for upb    |        |    pointer n-1     |
   |--------------------|        |-  -  -  -  -  -  - |
@@ -157,7 +157,6 @@ If the range is sparse, CSB is better.
   |--------------------|        |--------------------|

       CSA descriptor                CSB descriptor
-
-
-      Figure 4. Descriptor layout for CSA and CSB
-.DE
+.Df
+Figure 4. Descriptor layout for CSA and CSB
+.De
--- a/Show More
+++ b/Show More