fixup commit for branch 'hs'

Added some xtra output for Henk Schouten's debugger.
1985-02-26 15:05:52 +00:00 · 1985-02-26 15:04:08 +00:00 · 1984-10-23 15:02:04 +00:00
7342 changed files with 283 additions and 632294 deletions
--- a/.distr
+++ b/.distr
@ -1,57 +0,0 @@
-README
-CHANGES
-Copyright
-pm
-pmfile
-config.pm
-
-h
-modules/h
-
-first
-util/data
-util/LLgen
-
-modules/src/alloc
-modules/src/assert
-modules/src/system
-modules/src/string
-modules/src/read_em
-modules/src/em_code
-modules/src/em_mes
-modules/src/print
-modules/src/object
-modules/src/idf
-modules/src/input
-modules/src/flt_arith
-
-util/amisc
-util/cmisc
-util/ack
-lib/descr/fe
-util/arch
-#util/cpp
-util/cgg
-util/ncgg
-util/misc
-util/opt
-util/ego
-util/topgen
-util/led
-
-lang/cem
-lang/pc
-lang/m2
-#lang/occam
-#lang/basic
-
-mach/proto
-mach/i80
-mach/i86
-mach/i386
-
-plat/cpm
-plat/pc86
-plat/linux386
-
-examples
--- a/276
+++ b/276
@ -1,276 +0,0 @@
-name	"System definition"
-dir first
-action ack_sys
-failure "You have to run the shell script first/first"
-fatal
-end
-name "Manual pages"
-dir man
-end
-! name	"EM definition"
-! dir etc
-! end
-name "EM definition library"
-dir util/data
-end
-name "C utilities"
-dir util/cmisc
-end
-name "Yacc parser generator"
-dir util/byacc
-end
-name "Flex lexical analyzer generator"
-dir util/flex
-action "make firstinstall && make clean"
-end
-name "Include files for modules"
-dir modules/h
-end
-name "Modules"
-dir modules/src
-indir
-end
-! name "LL(1) Parser generator"
-! dir util/LLgen
-! action "make firstinstall && make clean"
-! end
-name "C preprocessor"
-dir util/cpp
-end
-name "Peephole optimizer libraries"
-dir modules/src/em_opt
-end
-name "ACK object utilities"
-dir util/amisc
-end
-name "Encode/Decode"
-dir util/misc
-end
-name "Shell files in bin"
-dir util/shf
-end
-name "EM assembler"
-dir util/ass
-end
-name "EM Peephole optimizer"
-dir util/opt
-end
-name "EM Global optimizer"
-dir util/ego
-indir
-end
-name "ACK archiver"
-dir util/arch
-end
-name "Program 'ack'"
-dir util/ack
-end
-name "Bootstrap for backend tables"
-dir util/cgg
-end
-name "Bootstrap for newest form of backend tables"
-dir util/ncgg
-end
-name "Bootstrap for code expanders"
-dir util/ceg
-indir
-end
-name "LED link editor"
-dir util/led
-end
-name "TOPGEN target optimizer generator"
-dir util/topgen
-end
-name "C frontend"
-dir lang/cem/cemcom
-end
-name "ANSI-C frontend"
-dir lang/cem/cemcom.ansi
-end
-name "ANSI-C preprocessor"
-dir lang/cem/cpp.ansi
-end
-name "ANSI-C header files"
-dir lang/cem/libcc.ansi
-end
-name "LINT C program checker"
-dir lang/cem/lint
-end
-name "EM definition lint-library"
-action "make lintlib"
-dir util/data
-end
-name "Modules lint libraries"
-dir modules/src
-indir "Action.lint"
-end
-name "Global optimizer lint libraries"
-dir util/ego/share
-action "make lintlib"
-end
-name "Pascal frontend"
-dir lang/pc/comp
-end
-name "Basic frontend"
-dir lang/basic/src
-end
-name "Occam frontend"
-dir lang/occam/comp
-end
-name "Modula-2 frontend"
-dir lang/m2/comp
-end
-name "Modula-2 definition modules"
-dir lang/m2/libm2
-end
-name "Modula-2 makefile generator"
-dir lang/m2/m2mm
-end
-name "Fortran to C compiler"
-dir lang/fortran/comp
-end
-name "EM interpreter in C"
-dir util/int
-end
-name "Symbolic debugger"
-dir util/grind
-end
-name "Intel 8086 support"
-dir mach/i86
-indir
-end
-name "Intel 80286 support for Xenix"
-dir mach/xenix3
-indir
-end
-name "Intel 80386 support for Xenix 386 System V"
-dir mach/i386
-indir
-end
-name "MSC6500 support"
-dir mach/6500
-indir
-end
-name "Motorola 6800 support"
-dir mach/6800
-indir
-end
-name "Motorola 6805 support"
-dir mach/6805
-indir
-end
-name "Motorola 6809 support"
-dir mach/6809
-indir
-end
-name "Intel 8080 support"
-dir mach/i80
-indir
-end
-name "2-2 Interpreter support"
-dir mach/em22
-indir
-end
-name "2-4 Interpreter support"
-dir mach/em24
-indir
-end
-name "4-4 Interpreter support"
-dir mach/em44
-indir
-end
-name "Motorola 68000 2-4 support"
-dir mach/m68k2
-indir
-end
-name "Motorola 68000 4-4 support"
-dir mach/m68k4
-indir
-end
-name "NS16032 support"
-dir mach/ns
-indir
-end
-name "PDP 11 support"
-dir mach/pdp
-indir
-end
-name "PMDS support"
-dir mach/pmds
-indir
-end
-name "PMDS 4/4 support"
-dir mach/pmds4
-indir
-end
-name "Signetics 2650 support"
-dir mach/s2650
-indir
-end
-name "Vax 4-4 support"
-dir mach/vax4
-indir
-end
-name "M68020 System V/68 support"
-dir mach/m68020
-indir
-end
-name "Sun 3 M68020 support"
-dir mach/sun3
-indir
-end
-name "Sun 4 SPARC SunOs 4 support"
-dir mach/sparc
-system "sparc|sparc_solaris"
-indir
-end
-name "Sun 4 SPARC Solaris support"
-dir mach/sparc_solaris
-system "sparc_solaris"
-indir
-end
-name "Sun 2 M68000 support"
-dir mach/sun2
-indir
-end
-name "Mantra M68000 System V.0 support"
-dir mach/mantra
-indir
-end
-name "PC Minix support"
-dir mach/minix
-indir
-end
-name "Atari ST Minix support"
-dir mach/minixST
-indir
-end
-name "Z80 support"
-dir mach/z80
-indir
-end
-name "Zilog Z8000 support"
-dir mach/z8000
-indir
-end
-name "Acorn Archimedes support"
-dir mach/arm
-indir
-end
-name "Documentation"
-dir doc
-end
-name "Motorola 68000 interpreters"
-system "m68*|sun*"
-dir mach/mantra/int
-end
-name "Fast compilers"
-system "m68020|sun3|i386|vax*"
-dir fast
-indir
-end
-name "Fast cc-compatible C compiler"
-system "sun3|vax*"
-dir fcc
-indir
-end
--- a/35
+++ b/35
@ -1,35 +0,0 @@
-# $Source$
-# $State$
-# $Revision$
-
-6.0pre4
-
-  Fixed some minor bit-rotting issues that were preventing compilation on
-  modern Linux systems.
-  
-6.0pre3
-
-  Added the cpm platform. Made some optimisations to the i80 code generator,
-  including getting topgen up and running and adding some peephole optimiser
-  rules. Fixed loads of bugs in ego so that it now works on platforms that
-  support it (pc86 and linux386). Made the floating point work on platforms
-  that support it (pc86 and linux386 again). Made stdint.h work. Lots and lots
-  of bugfixes and tweaks everywhere.
-  
-6.0pre2
-
-  Much simplified the syscall interface by disabling libmon and instead
-  calling the syscalls directly. Disabled the K&R C compiler and libc because
-  it doesn't actually gain us anything and has a high maintenance load --- the
-  ANSI C compiler works fine with K&R C. Adapted the rest of the system to
-  build with the ANSI C compiler. Rewrote the pc86 syscall interface and added
-  linux386 support, using the i386 code generator. Lots and lots of bugfixes
-  and tweaks everywhere.
-  
-6.0pre1
-
-  First working version of the 6.0 release stream. Working frontends: both C
-  compilers, Pascal, Modula-2, Basic and Occam. Working backends: i86. Working
-  platforms: pc86, the very noddy testbed setup that produces floppy disk
-  images.
-  
--- a/32
+++ b/32
@ -1,32 +0,0 @@
-Copyright (c) 1987, 1990, 1993, 2005 Vrije Universiteit, Amsterdam, The Netherlands.
-All rights reserved.
-
-Redistribution and use of the Amsterdam Compiler Kit in source and
-binary forms, with or without modification, are permitted provided
-that the following conditions are met:
-
-   * Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-   * Redistributions in binary form must reproduce the above
-     copyright notice, this list of conditions and the following
-     disclaimer in the documentation and/or other materials provided
-     with the distribution.
-
-   * Neither the name of Vrije Universiteit nor the names of the
-     software authors or contributors may be used to endorse or
-     promote products derived from this software without specific
-     prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND
-CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL VRIJE UNIVERSITEIT OR ANY AUTHORS OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/45
+++ b/45
@ -1,45 +0,0 @@
-This is ACK distribution 5.6.
-
-This is a minor update of 5.5, the last public release from Vrije University.
-Only minor changes have been made to make the system build on modern
-platforms.
-
-The NEW document from the previous release follows.
-
-David Given
-dg@cowlark.com 2005-06-24
-
-----------------------------------------------------------------------------
-
-The only addition with respect to the 5th ACK distribution is the support
-for Solaris 2 on SPARCs. It also contains many bug fixes.
-
-Notes for the 5th ACK distribution:
-
-It is not wise to mix files created by the previous version of the Kit
-with files belonging to this version, although that might sometimes work.
-Many problems with the previous distribution have been fixed.
-The major additions are:
-
-	- an ANSI C compiler
-	- a LINT C program checker, both non-ansi and ansi
-	- an Intel 80386 back-end
-	- a SPARC code expander
-	- a source level debugger for Pascal, Modula-2, C, and ANSI C
-	- an Acorn Archimedes back-end
-	- code-expanders for VAX, Intel 80386 and Motorola M68020 processors,
-	  and very fast Pascal, Modula-2, ANSI C, and C compilers constructed
-	  using these code expanders
-	- a cc-compatible very fast C compiler for SUN-3 and VAX.
-
-Also added, but not part of the Kit proper are
-	- flex: a lexical analyzer generator
-	- byacc: yacc-clone by UCB
-	- f2c: a Fortran to C compiler by AT&T.
-
-See the ACK installation manual for their copyright notices.
-
--
-Ceriel Jacobs, Dept. of Mathematics and Computer Science, Vrije Universiteit,
-De Boelelaan 1081a, 1081 HV Amsterdam, The Netherlands
-Email:	ceriel@cs.vu.nl		Fax: +31 20 6427705
--- a/170
+++ b/170
@ -1,170 +0,0 @@
-                     THE AMSTERDAM COMPILER KIT V6.0pre4
-                     ===================================
-
-                  © 1987-2005 Vrije Universiteit, Amsterdam
-                                2010-08-08
-
-
-INTRODUCTION
-============
-
-The Amsterdam Compiler Kit is a complete compiler toolchain consisting of
-front end compilers for a number of different languages, code generators,
-support libraries, and all the tools necessary to go from source code to
-executable on any of the platforms it supports.
-
-This is an early prerelease of the apocryphal version 6.0 release. Not a
-lot is supported, the build mechanism needs work, and a lot of things are
-probably broken. However, what's there should be sufficient to get things
-done and to evaluate how the full 6.0 release should work. 
-
-
-SUPPORT
-=======
-
-Languages:
-
-ANSI C, Pascal, Modula 2. K&R is supported via the ANSI C compiler.
-
-Platforms:
-
-pc86          produces bootable floppy disk images for 8086 PCs
-linux386      produces ELF executables for PC Linux systems
-cpm           produces i80 CP/M .COM files
-
-
-INSTALLATION
-============
-
-The version 6.0 build mechanism has been completely rewritten and is based
-around the Prime Mover build tool (see http://primemover.sf.net for more
-information). Installation ought to be fairly straightforward.
-
-Requirements:
-
- an ANSI C compiler. Currently, I'm afraid, it's hard-coded to use gcc.
-  To change, try changing the variable definitions in first/c.pm. This also
-  needs to be available as 'cc' from the shell.
-  
- about 20MB free in /tmp (or some other temporary directory).
-
- about 6MB in the target directory.
-
-Instructions:
-
- edit config.pm. There's a small section at the top containing some editable
-  variables. Probably the only one you may want to edit is PREFIX, which
-  changes where the ACK installs to.
-  
- Run:
-
-    ./pm configure
-    
-  ...from the command line. This will write out a configuration file.
-  
- Run:
-
-    ./pm
-    
-  ...from the command line. This will actually do the build. This takes
-  about two minutes on my 1.6GHz Athlon Linux machine and about 30 on my
-  166MHz Pentium OpenBSD machine.
-  
- Run:
-
-    ./pm install
-    
-  ...from the command line (possibly with sudo). This will install the built
-  ACK into whatever directory you nominated in PREFIX.
-  
-The ACK should now be ready to use.
-
-
-USAGE
-=====
-
-Currently I haven't sorted out all the documentation --- it's supplied in the
-distribution, but not all of it gets installed yet --- so here is a quickstart
-guide.
-
-The main command to use is 'ack'. This invokes the compiler and the linker.
-Some useful options include:
-
-  -m<platform>     build for the specified platform
-  -o <file>        specifies the output file
-  -c               produce a .o file
-  -c.s             produce a .s assembly file
-  -O               enable optimisation
-  -ansi            compile ANSI C (when using the C compiler)
-  <file>           build file
-
-ack figures out which language to use from the file extension:
-
-  .c               C (ANSI or K&R)
-  .b               Basic
-  .mod             Modula-2
-  .ocm             Occam 1
-  .p               Pascal
-  .o               object files
-  .s               assembly files
-
-For further information, see the man page (which actually does get
-installed, but is rather out of date).
-
-There are some (known working) example programs in the 'examples' directory.
-A sample command line is:
-
-ack -mlinux386 -O examples/paranoia.c
-
-
-GOTCHAS
-=======
-
-There are some things you should be aware of.
-
- Look at plat/<PLATFORMNAME>/README for information about the two supported
-  platforms.
-  
- The library support is fairly limited; for C, it's at roughly the ANSI C
-  level, and for the other languages it's similar.
-  
- When compiling languages other than C, the ACK will usually look at the
-  first character of the file. If it's a #, then the file will be run through
-  the C preprocessor anyway.
-
- BSD systems may need to up the number of file descriptors (e.g.
-  'ulimit -n 200') before the ACK will compile.
-  
- The ACK uses its own .o format. You won't be able to mix the ACK's object
-  files and another compiler's.
-
-
-DISCLAIMER
-==========
-
-The ACK is mature, well-tested software, but the environment in which it was
-developed for and tested under is rather different from that available on
-today's machines. There will probably be little in the way of logical bugs,
-but there may be many compilation and API bugs.
-
-If you wish to use the ACK, *please* join the mailing list. We are interested
-in any reports of success and particularly, failure. If it does fail for you,
-we would love to know why, in as much detail as possible. Bug fixes are even
-more welcome.
-
-The ACK is licensed under a BSD-like license. Please see the 'Copyright' file
-for the full text.
-
-You can find the mailing list on the project's web site:
-
-	http://tack.sourceforge.net/
-	
-Please enjoy.
-
-David Given (dtrg on Sourceforge)
-dg@cowlark.com
-2010-08-08
-
-# $Source$
-# $State$
-# $Revision$
--- a/20
+++ b/20
@ -1,20 +0,0 @@
-# $Source$
-# $State$
-
-This file contains things that I have noticed need fixing, but have not
-yet been fixed. Everything here should be reasonably low priority. Some
-bugs have been bodged around to make things work; these are all marked in
-the source with FIXME tags.
-
-
-* util/int needs to be rewritten to emulate sgtty with termios; look for
-  FIXMEs.
-
-* mach/i80/dl/nascom.c needs to be rewritten to use termios, not sgtty.
-
-
-# Revision history
-# $Log$
-# Revision 2.1  2005-06-24 23:20:41  dtrg
-# Added some new readmes at the top level.
-#
--- a/146
+++ b/146
@ -1,146 +0,0 @@
-#!/bin/sh
-
-case $# in
-0)	PAR='make install && make clean' ; CMD=Action ;;
-1)	PAR="$1" ; CMD=Action ;;
-2)	PAR="$1" ; CMD="$2" ;;
-*)	echo Syntax: "$0" [command [file]] ; exit 1 ;;
-esac
-if test -r "$CMD"
-then :
-else
-	case "$CMD" in
-	Action)		echo No Action file present ;;
-	*)		echo No Action file "($CMD)" present ;;
-	esac
-fi
-case $0 in
-/*)	THISFILE=$0
-	;;
-*)	if [ -f $0 ]
-	then
-		THISFILE=`pwd`/$0
-	else
-		THISFILE=$0
-	fi
-	;;
-esac
-SYS=
-RETC=0
-{ while read LINE
-do
-	eval set $LINE
-	case x"$1" in
-	x!*)	;;
-	xname)		SYS="$2"
-			ACTION='$PAR'
-			DIR=.
-			FM=no
-			FAIL='Failed for $SYS, see $DIR/Out'
-			SUCC='$SYS -- done'
-			ATYPE=
-			FATAL=no
-			DOIT=yes
-			;;
-	xfatal)		FATAL=yes ;;
-	xaction|xindir)	case x$ATYPE in
-			x)	ACTION=$2 ; ATYPE=$1
-				case $ATYPE$FM in
-				indirno) FAIL='Failed for $SYS' ;;
-				esac
-				;;
-			*)	echo Already specified an $ATYPE for this name
-				RETC=65 ;;
-			esac ;;
-	xfailure)	FM=yes 
-			FAIL="$2" ;;
-	xsuccess)	SUCC="$2" ;;
-	xdir)		DIR="$2" ;;
-	xsystem)	PAT="$2"
-			oIFS=$IFS
-			IFS="|"
-			eval set $2
-			case x`ack_sys` in
-			x$1|x$2|x$3|x$4|x$5|x$6|x$7)	;;
-			*)	echo "Sorry, $SYS can only be made on $PAT systems"
-				DOIT=no
-				;;
-			esac
-			IFS=$oIFS
-			;;
-	xend)		case $DOIT in
-			no)	continue ;;
-			esac
-			case x$SYS in
-			x)	echo Missing name line; RETC=65 ;;
-			*)	if test -d $DIR
-				then (
-				    cd $DIR
-				    X=
-				    case $ATYPE in
-				    indir)	
-					if $THISFILE "$PAR" $ACTION
-					then eval echo $SUCC
-					else RETC=2 ; eval echo $FAIL
-					fi ;;
-				    *)
-					case "$ACTION" in
-					'$PAR')
-					    	ACTION="$PAR"
-					    ;;
-					*)  ;;
-					esac
-					if [ -f No$CMD ]
-					then
-					    x=`cat No$CMD`
-					    if [ "$ACTION" = "$x" ]
-					    then
-						ACTION='echo "No actions performed, No$CMD file present"'
-						SUCC='$SYS -- skipped'
-					    fi
-					fi
-					if eval "{ $ACTION ; } >Out 2>&1 </dev/null"
-					then	eval echo $SUCC
-						if [ "$SUCC" = '$SYS -- skipped' ]
-						then :
-						else echo "$ACTION" > No$CMD 2>/dev/null
-						fi
-					else RETC=1 ; X=: ; eval echo $FAIL
-					fi
-					;;
-				    esac
-				    (echo ------- `pwd`
-				     cat Out
-				     $X rm -f Out
-				    ) 2>/dev/null 1>&- 1>&3
-				    exit $RETC
-				)
-				case $? in
-				0) ;;
-				*) case $RETC in
-				   0) RETC=$? ;;
-				   esac ;;
-				esac
-				else
-				      echo Directory $DIR for $SYS is inaccessible
-				      RETC=66
-				fi ;;
-			esac
-			case $FATAL$RETC in
-			yes0)	;;
-			yes*)	echo Fatal error, installation stopped.
-				exit $RETC ;;
-			esac
-			SYS=
-			;;
-	*)		echo Unknown keyword "$1"
-			RETC=67 ;;
-	esac
-done
-exit $RETC
-} <$CMD
-RETX=$?
-case $RETX in
-0)	exit $RETC ;;
-*)	exit $RETX ;;
-esac
--- a/bin/.distr
+++ b/bin/.distr
@ -1,9 +0,0 @@
-cc-and-mkdep.ack
-cc-and-mkdep.all
-cc-and-mkdep.sun
-do_deps
-do_resolve
-lint-lib.ack
-lint-lib.unix
-mk_manpage
-rm_deps
--- a/bin/cc-and-mkdep.ack
+++ b/bin/cc-and-mkdep.ack
@ -1,8 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Compile and make dependencies. First argument is the file on which the
-: dependencies must be produced. This version is for ACK.
-n=$1
-shift
-exec $CC -Rcem-A$n -Rcem-m $*
--- a/bin/cc-and-mkdep.all
+++ b/bin/cc-and-mkdep.all
@ -1,21 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Compile and make dependencies. First argument is the file on which the
-: dependencies must be produced. This version is a generic one that should
-: work for all Unix systems.
-n=$1
-shift
-cpp_args=
-for i in $*
-do
-	case $i in
-	-I*|-D*|-U*)	cpp_args="$cpp_args $i"
-			;;
-	-*)		;;
-	*)		cpp_args="$cpp_args $i"
-			;;
-	esac
-done
-$UTIL_HOME/lib.bin/cpp -d -m $cpp_args > $n 2>/dev/null
-exec $CC $*
--- a/bin/cc-and-mkdep.sun
+++ b/bin/cc-and-mkdep.sun
@ -1,8 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Compile and make dependencies. First argument is the file on which the
-: dependencies must be produced. This version is for the SUN cc.
-n=$1
-shift
-exec $CC -Qpath $UTIL_HOME/lib.bin -Qoption cpp -d$n -Qoption cpp -m $*
--- a/bin/do_deps
+++ b/bin/do_deps
@ -1,19 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Produce dependencies for all argument files
-
-for i in $*
-do
-	n=`basename $i .c`
-	if [ -f $n.dep ]
-	then
-		:
-	else
-		echo $n.'$(SUF):	'$i > $n.dep
-		echo "	head -5 $n.dep > $n.dp1" >> $n.dep
-		echo '	CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" $(CC_AND_MKDEP) '$n.dp2 '$(CFLAGS)' -c $i >> $n.dep
-		echo "	cat $n.dp1 $n.dp2 > $n.dep" >> $n.dep
-		echo "	rm -f $n.dp1 $n.dp2" >> $n.dep
-	fi
-done
--- a/bin/do_resolve
+++ b/bin/do_resolve
@ -1,48 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Resolve name clashes in the files on the argument list. If these
-: files reside in another directory, a copy is made in the current
-: directory. If not, it is overwritten. Never do this in a source
-: directory! A list of the new files is produced on standard output.
-
-UTIL_BIN=$UTIL_HOME/bin
-
-trap "rm -f tmp$$ a.out nmclash.* longnames clashes" 0 1 2 3 15
-
-: first find out if we have to resolve problems with identifier significance.
-
-cat > nmclash.c <<'EOF'
-/* Accepted if many characters of long names are significant */
-abcdefghijklmnopr() { }
-abcdefghijklmnopq() { }
-main() { }
-EOF
-if $CC nmclash.c
-then	: no identifier significance problem
-	for i in $*
-	do
-		echo $i
-	done
-else
-	$UTIL_BIN/prid -l7 $* > longnames
-
-	: remove code generating routines from the clashes list.
-	: code generating routine names start with C_.
-	: also remove names starting with flt_.
-
-	sed '/^C_/d' < longnames | sed '/^flt_/d' > tmp$$
-	$UTIL_BIN/cclash -c -l7 tmp$$ > clashes
-	for i in $*
-	do
-		$UTIL_BIN/cid -Fclashes < $i > tmp$$
-		n=`basename $i .xxx`
-		if cmp -s $n tmp$$
-		then
-			rm -f tmp$$
-		else
-			mv tmp$$ $n
-		fi
-		echo $n
-	done
-fi
--- a/bin/lint-lib.ack
+++ b/bin/lint-lib.ack
@ -1,13 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Create a lint library file. The name of the library file is constructed
-: from the first argument. The second argument indicates the directory where
-: the result is to be placed. This version is for ACK lint.
-
-n=$1
-shift
-d=$1
-shift
-lint -L$n $*
-mv $n.llb $d
--- a/bin/lint-lib.unix
+++ b/bin/lint-lib.unix
@ -1,13 +0,0 @@
-#!/bin/sh
-: '$Id$'
-
-: Create a lint library file. The name of the library file is constructed
-: from the first argument. The second argument indicates the directory where
-: the result is to be placed. This version is for Unix lint.
-
-n=$1
-shift
-d=$1
-shift
-/usr/bin/lint -C$n $*
-mv llib-l$n.ln $d
--- a/bin/mk_manpage
+++ b/bin/mk_manpage
@ -1,20 +0,0 @@
-#!/bin/sh
-
-num=`expr $1 : '.*\.\([1-8]\)'`
-
-if [ -d $2/man ] ; then : ; else mkdir $2/man ; fi
-if [ -f $2/man/head ] ; then : ; else cat > $2/man/head <<'EOF'
-.rn TH yy
-.de TH
-.di zz
-.yy "\\$1" "\\$2" "\\$3" "\\$4"
-.ds ]W 5th ACK distribution
-.ds ]D Amsterdam Compiler Kit
-.ds ]L "\\$3
-.di
-.rm zz
-..
-EOF
-fi
-if [ -d $2/man/man$num ] ; then : ; else mkdir $2/man/man$num ; fi
-cat $2/man/head $1  | sed "s!TARGETHOME!$2!" > $2/man/man$num/`expr //$1 : '.*/\([^/]*\)'`
--- a/bin/rm_deps
+++ b/bin/rm_deps
@ -1,9 +0,0 @@
-#!/bin/sh
-: $Id$
-
-: remove dependencies from a makefile, write result on standard output.
-: we cannot do this directly in a makefile because some make versions
-: have # start a comment, always.
-
-sed -e '/^#DEPENDENCIES/,$d' $1
-echo '#DEPENDENCIES'
--- a/config.pm
+++ b/config.pm
@ -1,71 +0,0 @@
-- ======================================================================= --
--                          ACK CONFIGURATION                              --
--                      (Edit this before building)                        --
-- ======================================================================= --
-
-- What platform to build for by default?
-
-DEFAULT_PLATFORM = "pc86"
-
-- Where should the ACK put its temporary files?
-
-ACK_TEMP_DIR = "/tmp"
-
-- Where is the ACK going to be installed, eventually?
-
-PREFIX = "/usr/local"
-
-- ======================================================================= --
--                       BROKEN ACK CONFIGURATION                          --
--                       (Currently not editable)                          --
-- ======================================================================= --
-
-- FIXME: the following two variables must be set to their Minix variants
-- due to hard-coded references in the descr files.
-
-- Name of the platform-independent library directory; 'share' on modern
-- systems, 'lib' on Minix-like systems.
-
-PLATIND = "lib"
-
-- Name of the platform-dependent library directory; 'lib' on modern
-- systems, 'lib.bin' on Minix-like systems.
-
-PLATDEP = "lib.bin"
-
-- ======================================================================= --
--                      BUILD SYSTEM CONFIGURATION                         --
--                        (Not user servicable)                            --
-- ======================================================================= --
-
-- Absolute path to the ACK source directory.
-
-ROOTDIR = posix.getcwd().."/"
-
-- Temporary directory used during the build process.
-
-TEMPDIR = "/tmp/ack-temp/"
-
-- Directory in which dynamically generated header files will go during
-- the build process.
-
-HEADERDIR = TEMPDIR.."headers/"
-
-- Directory in which tools used by the build process but which not actually
-- deployed with the ACK will go.
-
-TOOLDIR = TEMPDIR.."tools/"
-
-- Directory in which the libraries used to build the ACK tools but which are
-- not actually deployed with the ACK will go.
-
-LIBDIR = TEMPDIR.."lib/"
-
-- Staging area where the installation will be built before actually copying
-- it.
-
-BINDIR = TEMPDIR.."staging/"
-
-- Directory that the pm cache goes in.
-
-pm.intermediate_cache_dir = TEMPDIR.."pmcache/"
--- a/distr/Exceptions
+++ b/distr/Exceptions
@ -1,15 +0,0 @@
-++ ./doc/install.pr made
-++ ./doc/int/.distr made
-++ ./etc/new_table_done made
-++ ./lang/cem/cemcom.ansi/Version.c made
-++ ./lang/cem/libcc.ansi/stdlib/malloc.c made
-++ ./lang/cem/cemcom/Version.c made
-++ ./lang/pc/comp/Version.c made
-++ ./lang/m2/comp/Version.c made
-++ ./lang/m2/m2mm/Version.c made
-++ ./mach/sparc/ce/EM_table made
-++ ./mach/sparc_solaris/libem/LIST made
-++ ./util/LLgen/src/LLgen.c.dist made
-++ ./util/cpp/Version.c made
-++ ./util/ego/share/pop_push.h made
-++ ./util/grind/ops.c made
--- a/distr/How_To
+++ b/distr/How_To
@ -1,90 +0,0 @@
-How to make a distribution
--------------------------
-
-I have written a new tool to generate the distributions that does not rely on
-having a local CVS server --- distr/mkdist.
-
-To use it, you need to specify your CVS work tree, the destination directory
-that the distribution will be written to, plus flags. It should be self-
-documenting; use:
-
-	mkdist --help
-	
-...to get documentation.
-
-It uses .distr files in exactly the same way as the previous mechanism.
-
-The documentation for the old distribution tools follows.
-
-David Given
-dg@cowlark.com
-2005-06-25
-
-----------------------------------------------------------------------------
-
-How to make a fresh distribution:
-For a distribution you need ".distr" files and RCS files.
-The EM home directory contains a file called ".distr". It contains
-the names of all the files and directories you want to have in the distribution.
-The directories should contain .distr files, the other files should
-be placed under CVS.
-There are files that derive from other files and yet should be placed
-in the distribution.
-These files should not be placed under RCS or CVS.
-The file "Exceptions" in this directory contains the current list of
-these files.
-
-When all this is correct, use the shell script mktree the extract
-the distribution from the EM tree.
-	sh mktree destination_tree repository_tree <distrname> 2>f.attf
-Use the "cvs rtag" command to give the distribution a name first!
-Make sure that the destination tree exists and is empty!
-Failing to do that will almost certainly result in a welter of
-error messages.
-The file f.attf contains mktree error messages and should be compared
-to Exceptions.
-The actions of mktree are quite complicated. It starts in the current
-directory creating a version in the destination directory.
-Then it reads the .distr file.
-For each file mentioned there it performes certain actions:
-1- Directory	Change to that directory and call yourself recursively.
-2- File
-   a-               Does a file LIST exist in this directory AND
-                    is the first line of LIST equal to the name of the
-                    destination file? If so, try to extract all the files
-                    named in the rest of the LIST file and call the program
-                    arch to create a library "arch cDr `cat LIST`".
-                    In this manner libraries can be distributed whose members
-                    have their own RCS file.
-              else
-   b-		    Try to run 'make distr'
-	      else
-   c-		    Try to run 'make <filename>'
-	      else
-   d-               give message that says "not present" (or some such). 
-
-Now, the tree contains all the files in the distribution, but it also contains
-files that should not be in the distribution, especially the files created
-by CVS.
-That is why we now give the command:
-	dtar cdf distr .
-The file distr is the one you should put on tape!
-But,.... before doing that: Try it out!
-Repeat the process described in the installation manual.
-Only if that succeeds you are sure that you included the files needed.
-					Good Luck,
-						Ed Keizer, 85/4/15.
-
-Updated for 3rd distribution by Ceriel Jacobs, 87/3/11.
-And again,
-					Good Luck!
-
-Updated for 4th distribution by Ceriel Jacobs, 88/4/08.
-And again,
-					Good Luck!
-Updated for 5th distribution by Ceriel Jacobs, 91/19/12.
-And again,
-					Good Luck!
-Updated for 1st upgrade to 5th distribution by Ceriel Jacobs, 91/12/11.
-And again,
-					Good Luck!
--- a/distr/dwalk
+++ b/distr/dwalk
@ -1,26 +0,0 @@
-#!/bin/sh
-
-: ${CDIR=.}
-${DF-:} $CDIR .distr
-if test ! -r $DESTDIR/$CDIR/.distr
-then
-	echo ++ no .distr in $CDIR 1>&2
-        exit 0
-fi
-for i in `cat $DESTDIR/$CDIR/.distr`
-do
-        if test -d $i
-        then
-                ( if cd $i
-		  then 
-			CDIR=$CDIR/$i
-			export CDIR
-			exec $DDIR/dwalk $*
-		  else
-			echo ++ Could not access $CDIR/$i 1>&2
-		  fi
-		)
-	else
-		${DF-:} $CDIR $i
-        fi
-done
--- a/distr/echod
+++ b/distr/echod
@ -1 +0,0 @@
-echo $1
--- a/distr/listall
+++ b/distr/listall
@ -1,10 +0,0 @@
-case $# in
-0)	DESTDIR=. ;;
-1)	DESTDIR=$1 ;;
-*)	echo $0 [directory] ; exit 1 ;;
-esac
-DD=`pwd`/listall.d
-DW=`pwd`/dwalk
-export DD DESTDIR
-cd $DESTDIR
-$DW
--- a/distr/listall.d
+++ b/distr/listall.d
@ -1,2 +0,0 @@
-echo "<$1>"
-ls -bCdx `cat .distr`
--- a/distr/listdirs
+++ b/distr/listdirs
@ -1,10 +0,0 @@
-case $# in
-0)	DIR=. ;;
-1)	DIR=$1 ;;
-*)	echo $0 [directory] ; exit 1 ;;
-esac
-DD=`pwd`/echod
-DW=`pwd`/dwalk
-export DD
-cd $DIR
-$DW
--- a/distr/mk_distr_syms
+++ b/distr/mk_distr_syms
@ -1,40 +0,0 @@
-#!/bin/sh
-: Utility to make a tree of symbolic links to source tree.
-: Mount the source tree read-only, use this script, and then try installation.
-case $# in
-2)	;;
-*)	echo "Usage: $0 <source-tree> <symlink-tree>" 1>&2
-	exit 1
-	;;
-esac
-if [ -f $1/.distr ]
-then
-	for i in `cat $1/.distr`
-	do
-		if [ -d $1/$i ]
-		then
-			if mkdir $2/$i && $0 $1/$i $2/$i
-			then
-				:
-			else
-				exit 2
-			fi
-		else
-			if [ -f $1/$i ] 
-			then
-				if ln -s $1/$i $2/$i
-				then
-					:
-				else
-					exit 3
-				fi
-			else
-				echo "Missing file $1/$i" 1>&2
-				exit 4
-			fi
-		fi
-	done
-else
-	echo "No .distr file in $1" 1>&2
-	exit 5
-fi
--- a/distr/mka
+++ b/distr/mka
@ -1,11 +0,0 @@
-#!/bin/sh
-
-set -e
-for i in `tail +2 $DESTDIR/$1/LIST`
-do
-	${DF-false} $1 $i
-done
-cd $DESTDIR/$1
-arch cDr `cat LIST`
-: I do not remove the files constituating the library, because
-: they might be present in .distr
--- a/distr/mkd
+++ b/distr/mkd
--- a/distr/mkdist
+++ b/distr/mkdist
@ -1,177 +0,0 @@
-#!/bin/sh
-# $Source$
-# $State$
-
-# Set up default variables.
-
-destdir=
-srcdir=`pwd`
-arch=/usr/local/bin/arch
-delete=no
-copy="ln"
-
-# --- Options parsing -------------------------------------------------------
-
-while [ "$1" != "" ]; do
-	case "$1" in
-		-s|--srcdir)
-			srcdir="$2"
-			shift
-			;;
-			
-		-d|--destdir)
-			destdir="$2"
-			shift
-			;;
-			
-		-x|--delete)
-			delete=yes
-			;;
-			
-		-c|--copy)
-			copy="cp -Rp"
-			;;
-			
-		-S|--symlink)
-			copy="ln -s"
-			;;
-			
-		-a|--arch)
-			arch="$2"
-			shift
-			;;
-			
-		-h|--help)
-			echo "mkdist [options]"
-			echo "Options are:"
-			echo "  -s --srcdir <path>   The CVS tree to read from. (default: CWD)"
-			echo "  -d --destdir <path>  The directory to create the distribution in."
-			echo "  -x --delete          Erase the destination directory first."
-			echo "  -c --copy            Make physical copies of the files. (default: hardlink)"
-			echo "  -S --symlink         Make symbolic links instead of copying or hardlinking."
-			echo "  -a --arch <path>     Where the ACK 'arch' tool is."
-			echo "  -h --help            Display this message."
-			exit 0
-			;;
-			
-		*)
-			echo "Unrecognised option. Try --help for help."
-			exit 1
-	esac
-	shift
-done
-
-if [ "$destdir" = "" ]; then
-	echo "You must specify a destination directory. (Try --help for help.)"
-	exit 1
-fi
-
-# --- Main routines ---------------------------------------------------------
-
-# These two routines do the work of traversing the source tree and building
-# the distribution tree.
-
-addfile() {
-	local f
-	f="${1##$srcdir/}"
-	mkdir -p $destdir/`dirname $f`
-	$copy "$1" "$destdir/$f"
-}
-
-process_dir() {
-	local path
-	local archivename
-	
-	path=$1
-	cd $path
-	echo $PWD
-	
-	# Look for a LIST file and cache the first line.
-	
-	archivename=
-	if [ -f LIST ]; then
-		archivename=`head -1 LIST`
-	fi
-
-	for i in `cat $path/.distr`; do
-		case "$i" in
-			\#*)	# Comment. Do nothing.
-					;;
-					
-			*)
-					if [ -d $i ]; then
-						# This is a directory. Recurse into it.
-						
-						( process_dir $path/$i )
-					elif [ -f $i ]; then
-						# This is a file.
-						
-						addfile $path/$i
-					elif [ "$i" = "$archivename" ]; then
-						# Build the named archive.
-			
-						$arch cDr `cat LIST`
-						addfile $path/$archivename
-					else
-						echo "Don't know what to do with $i, listed in $PWD/.distr."
-						exit 1
-					fi
-					;;
-		esac
-	done
-}
-
-# --- Main program ----------------------------------------------------------
-
-# Test to make sure that $arch points to the right thing.
-
-if !(strings $arch | grep archiver > /dev/null); then
-	echo "$arch does not seem to point at the ACK archiver tool."
-	echo "(Don't confuse this with the Linux tool for displaying your"
-	echo "architecture.)"
-	echo ""
-	echo "Press RETURN to go ahead anyway, or CTRL+C to abort."
-	read ignored
-fi
-
-# Actually do the work.
-
-echo "Creating distribution from CVS tree: $srcdir"
-echo "              into destination tree: $destdir"
-echo ""
-
-if [ -e $destdir ]; then
-	if [ "$delete" = "yes" ]; then
-		echo "Press RETURN to erase $destdir and its contents, or CTRL+C to abort."
-		read
-		echo "Erasing..."
-		rm -rf "$destdir"
-	else
-		echo "$destdir exists. Aborting."
-		exit 1
-	fi
-fi
-
-echo "Working..."
-mkdir -p $destdir
-process_dir $srcdir
-echo "Done."
-
-# Revision history
-# $Log$
-# Revision 1.5  2007-04-24 19:48:41  dtrg
-# Removed bashish.
-#
-# Revision 1.4  2007/02/25 20:56:41  dtrg
-# Performed major renovations to make the script work on OpenBSD.
-#
-# Revision 1.3  2007/02/24 02:05:56  dtrg
-# Removed some bashish; added comment support; removed the make
-# distr functionality, as nothing was using it any more and it was
-# causing problems.
-#
-# Revision 1.2  2005/06/24 23:19:23  dtrg
-# Added new mkdist tool.
-#
-# Revision 1.1  2005/06/24 22:13:57  dtrg
-# Created new tool to generate distributions.
--- a/distr/mkf
+++ b/distr/mkf
@ -1,19 +0,0 @@
-#!/bin/sh
-
-if [ -f $DESTDIR/$1/$2 ]
-then
-	:
-elif grep LIST $DESTDIR/$1/.distr >/dev/null 2>&1 &&
-     (test "$2" = "`head -1 $DESTDIR/$1/LIST`") >/dev/null 2>&1 &&
-     ${DA-false} "$1" "$2"
-then
-:	Fetched library contents one by one and put them together
-elif ( cd $DESTDIR/$1 ; make distr ) > /dev/null 2>&1
-then
-	echo ++ $1/$2 made 1>&2
-elif ( cd $DESTDIR/$1 ; make $2 ) > /dev/null 2>&1
-then
-	echo ++ $1/$2 made 1>&2
-else
-	echo ++ $1/$2 not present 1>&2
-fi
--- a/distr/mktree
+++ b/distr/mktree
@ -1,42 +0,0 @@
-case $# in
-2|3)	;;
-*)	echo Usage: $0 directory repdir [ SVrecord ] 1>&2 ; exit 1 ;;
-esac
-case $0 in
-/*)	DDIR=`dirname $0`
-	;;
-*)	DDIR=`pwd`/`dirname $0`
-	;;
-esac
-case $1 in
-/*)	DESTDIR=$1 ;;
-*)	DESTDIR=`pwd`/$1 ;;
-esac
-case $2 in
-/*)	REPDIR=$2 ;;
-*)	REPDIR=`pwd`/$2 ;;
-esac
-# DD=$DDIR/mkd
-# export DD
-mkdir -p $DESTDIR
-CVSROOT=/usr/proj/em/Repositories
-export CVSROOT
-cd $DESTDIR
-case $# in
-3)
-	cvs checkout world -r $3
-	;;
-2)
-	cvs checkout world
-	;;
-esac
-cd $REPDIR
-DF=$DDIR/mkf
-DA=$DDIR/mka
-export DDIR DESTDIR DF DA REPDIR
-
-$DDIR/dwalk
-
-cd $DESTDIR
-find . -type d -print | xargs chmod "uog+rx"
-chmod -R "og-w,u+w,uog+r" .
--- a/distr/todistr
+++ b/distr/todistr
@ -1,26 +0,0 @@
-REV=
-FILE=
-while :
-do
-	case $# in
-	0)	break ;;
-	esac
-	ARG="$1"
-	shift
-	case "$ARG" in
-	-r*)	REV=`echo "$ARG"| sed s/-r//` ;;
-	-*)	FLAGS="$FLAGS $ARG" ;;
-	*)	case x$FILE in
-		x)	FILE="$ARG" ;;
-		*)	echo todistr can only be done on one file at the time
-			exit 1 ;;
-		esac
-	esac
-done
-case x$REV in
-x)	REV=`rlog -h "$FILE"|sed -n -e '/head/s/^head:[ 	]*//p'` ;;
-esac
-case x$REV in
-x)	exit 2 ;;
-esac
-rcs -ndistr4:$REV $FLAGS $FILE
--- a/distr/ts
+++ b/distr/ts
@ -1,2 +0,0 @@
-DD=`pwd`/ts
-echo OK
--- a/doc/.distr
+++ b/doc/.distr
@ -1,32 +0,0 @@
-READ_ME
-Makefile
-proto.make
-ack.doc
-basic.doc
-cg.doc
-crefman.doc
-ansi_C.doc
-em
-install.doc
-install.pr
-ncg.doc
-pcref.doc
-peep.doc
-regadd.doc
-toolkit.doc
-v7bugs.doc
-val.doc
-6500.doc
-i80.doc
-z80.doc
-m68020.doc
-m2ref.doc
-nopt.doc
-top
-ego
-occam
-int
-ceg
-sparc
-lint
-pascal
--- a/doc/6500.doc
+++ b/doc/6500.doc
--- a/doc/LLgen/.distr
+++ b/doc/LLgen/.distr
@ -1,4 +0,0 @@
-LLgen.n
-LLgen_NCER.n
-LLgen.refs
-proto.make
--- a/doc/LLgen/LLgen.n
+++ b/doc/LLgen/LLgen.n
--- a/doc/LLgen/LLgen.refs
+++ b/doc/LLgen/LLgen.refs
@ -1,54 +0,0 @@
-%T An ALL(1) Compiler Generator
-%A D. R. Milton
-%A L. W. Kirchhoff
-%A B. R. Rowland
-%B Proc. of the SIGPLAN '79 Symposium on Compiler Construction
-%D August 1979 
-%J SIGPLAN Notices
-%N 8
-%P 152-157
-%V 14
-
-%T Lex - A Lexical Analyser Generator
-%A M. E. Lesk
-%I Bell Laboratories
-%D October 1975
-%C Murray Hill, New Jersey
-%R Comp. Sci. Tech. Rep. No. 39
-
-%T Yacc: Yet Another Compiler Compiler
-%A S. C. Johnson
-%I Bell Laboratories
-%D 1975
-%C Murray Hill, New Jersey
-%R Comp. Sci. Tech. Rep. No. 32
-
-%T The C Programming Language
-%A B. W. Kernighan
-%A D. M. Ritchie
-%I Prentice-Hall, Inc.
-%C Englewood Cliffs, New Jersey
-%D 1978
-
-%A M. Griffiths
-%T LL(1) Grammars and Analysers
-%E F. L. Bauer and J. Eickel
-%B Compiler Construction, An Advanced Course
-%I Springer-Verlag
-%C New York, N.Y.
-%D 1974
-
-%T Make - A Program for Maintaining Computer Programs
-%A S. I. Feldman
-%J Software - Practice and Experience
-%V 10
-%N 8
-%P 255-265
-%D August 1979
-
-%T Methods for the Automatic Construction of Error Correcting Parsers
-%A J. R\*:ohrich
-%J Acta Informatica
-%V 13
-%P 115-139
-%D 1980
--- a/doc/LLgen/LLgen_NCER.n
+++ b/doc/LLgen/LLgen_NCER.n
--- a/doc/LLgen/Makefile
+++ b/doc/LLgen/Makefile
@ -1,15 +0,0 @@
-# $Id$
-
-GRAP=grap
-PIC=pic
-EQN=eqn
-REFER=refer
-TBL=tbl
-
-all:		../LLgen.doc ../LLgen_NCER.doc
-
-../LLgen.doc:	LLgen.n LLgen.refs
-		$(REFER) -sA+T -p LLgen.refs LLgen.n | $(EQN) | $(TBL) > $@
-
-../LLgen_NCER.doc:	LLgen_NCER.n
-		$(GRAP) LLgen_NCER.n | pic | eqn > $@
--- a/doc/LLgen/proto.make
+++ b/doc/LLgen/proto.make
@ -1,20 +0,0 @@
-# $Id$
-
-#PARAMS         do not remove this line!
-
-SRC_DIR = $(SRC_HOME)/doc/LLgen
-
-GRAP=grap
-PIC=pic
-EQN=eqn
-REFER=refer
-TBL=tbl
-
-all:	$(TARGET_HOME)/doc/LLgen.doc $(TARGET_HOME)/doc/LLgen_NCER.doc
-
-$(TARGET_HOME)/doc/LLgen.doc:	$(SRC_DIR)/LLgen.n $(SRC_DIR)/LLgen.refs
-	$(REFER) -sA+T -p $(SRC_DIR)/LLgen.refs $(SRC_DIR)/LLgen.n | $(EQN) | $(TBL) > $@
-
-$(TARGET_HOME)/doc/LLgen_NCER.doc:      $(SRC_DIR)/LLgen_NCER.n
-		$(GRAP) $(SRC_DIR)/LLgen_NCER.n | pic | eqn > $@
-
--- a/doc/Makefile
+++ b/doc/Makefile
@ -1,82 +0,0 @@
-# $Id$
-
-# This Makefile is not supposed to be used in the doc source directory.
-# Instead, it is supposed to be copied to the target doc directory.
-
-SUF=dit
-PRINT=dis
-NROFF=troff
-MS=-ms
-OPR=dip
-
-RESFILES= \
-	toolkit.$(SUF) install.$(SUF) em.$(SUF) ack.$(SUF) v7bugs.$(SUF) \
-	peep.$(SUF) cg.$(SUF) ncg.$(SUF) regadd.$(SUF) LLgen.$(SUF) \
-	basic.$(SUF) crefman.$(SUF) pascal.$(SUF) pcref.$(SUF) val.$(SUF) \
-	ansi_C.$(SUF) \
-	6500.$(SUF) i80.$(SUF) z80.$(SUF) top.$(SUF) ego.$(SUF) \
-	m68020.$(SUF) occam.$(SUF) m2ref.$(SUF) ceg.$(SUF) nopt.$(SUF) \
-	sparc.$(SUF) int.$(SUF) lint.$(SUF)
-
-.SUFFIXES: .doc .$(SUF) .lpr .out
-
-.doc.$(SUF):
-		$(NROFF) $(MS) $< > $@
-
-# directly to the printer:
-.doc.lpr:
-		$(NROFF) $(MS) $< | $(OPR)
-
-# to standard output
-.doc.out:
-		@$(NROFF) $(MS) $<
-
-# Exceptions, to be run without -ms
-
-v7bugs.$(SUF):	v7bugs.doc
-		$(NROFF) v7bugs.doc >$@
-
-v7bugs.lpr:	v7bugs.doc
-		$(NROFF) v7bugs.doc | $(OPR)
-
-v7bugs.out:	v7bugs.doc
-		@$(NROFF) v7bugs.doc
-
-pcref.$(SUF):	pcref.doc
-		$(NROFF) pcref.doc >$@
-
-pcref.lpr:	pcref.doc
-		$(NROFF) pcref.doc | $(OPR)
-
-pcref.out:	pcref.doc
-		@$(NROFF) pcref.doc
-
-val.$(SUF):	val.doc
-		$(NROFF) val.doc >$@
-
-val.lpr:	val.doc
-		$(NROFF) val.doc | $(OPR)
-
-val.out:	val.doc
-		@$(NROFF) val.doc
-
-pr:
-		@make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) \
-			$(RESFILES) >make.pr.out 2>&1
-		@$(PRINT) $(RESFILES)
-
-# The 'opr' entry creates a lot of paper ... but the user must be able
-# to write the doc directory. I hope that this limits the users of
-# this entry to persons that know what they are doing.
-opr:
-		@make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) $(RESFILES) 
-		$(OPR) $(RESFILES)
-
-clean:
-		-rm -f $(RESFILES)
-
-# The distr entry is only used when making a distribution tree.
-# It makes a version of the installation manual, suitable for a simple
-# line printer.
-distr:		install.doc
-		tbl install.doc | nroff -Tlp $(MS) >install.pr
--- a/doc/READ_ME
+++ b/doc/READ_ME
@ -1,8 +0,0 @@
-Some of these documents use a font called CW.
-If this font is not available, reference to it can be changed with
-a sed-script like
-	s/\.ft CW/.ft yourfont/
-	s/\\f(CW/\\fyourfont/g
-	s/^.fp\(.*\)CW$/.fp\1yourfont/
-However, the font must be a constant-width font for the documents to look
-reasonable.
--- a/doc/ack.doc
+++ b/doc/ack.doc
@ -1,444 +0,0 @@
-.\" $Id$
-.nr PD 1v
-.tr ~
-.TL
-Ack Description File
-.br
-Reference Manual
-.AU
-Ed Keizer
-.AI
-Vakgroep Informatica
-Vrije Universiteit
-Amsterdam
-.NH
-Introduction
-.PP
-The program \fIack\fP(I) internally maintains a table of
-possible transformations and a table of string variables.
-The transformation table contains one entry for each possible
-transformation of a file.
-Which transformations are used depends on the suffix of the
-source file.
-Each transformation table entry tells which input suffixes are
-allowed and what suffix/name the output file has.
-When the output file does not already satisfy the request of the
-user (indicated with the flag \fB\-c.suffix\fP), the table is scanned
-starting with the next transformation in the table for another
-transformation that has as input suffix the output suffix of
-the previous transformation.
-A few special transformations are recognized, among them is the
-combiner, which is
-a program combining several files into one.
-When no stop suffix was specified (flag \fB\-c.suffix\fP) \fIack\fP
-stops after executing the combiner with as arguments the \-
-possibly transformed \- input files and libraries.
-\fIAck\fP will only perform the transformations in the order in
-which they are presented in the table.
-.LP
-The string variables are used while creating the argument list
-and program call name for
-a particular transformation.
-.NH
-Which descriptions are used
-.PP
-\fIAck\fP always uses two description files: one to define the
-front-end transformations and one for the machine dependent
-back-end transformations.
-Each description has a name.
-First the way of determining
-the name of the descriptions needed is described.
-.PP
-When the shell environment variable ACKFE is set \fIack\fP uses
-that to determine the front-end table name, otherwise it uses
-\fBfe\fP.
-.PP
-The way the backend table name is determined is more
-convoluted.
-.br
-First, when the last filename in the program call name is not
-one of \fIack\fP or the front-end call-names,
-this filename is used as the backend description name.
-Second, when the \fB\-m\fP is present the \fB\-m\fP is chopped of this
-flag and the rest is used as the backend description name.
-Third, when both failed the shell environment variable ACKM is
-used.
-Last, when also ACKM was not present the default backend is
-used, determined by the definition of ACKM in h/local.h.
-The presence and value of the definition of ACKM is
-determined at compile time of \fIack\fP.
-.PP
-Now, we have the names, but that is only the first step.
-\fIAck\fP stores a few descriptions at compile time.
-This descriptions are simply files read in at compile time.
-At the moment of writing this document, the descriptions
-included are: pdp, fe, i86, m68k2, vax2 and int.
-The name of a description is first searched for internally,
-then in lib/descr/\fIname\fP, then in
-lib/\fIname\fP/descr, and finally in the current
-directory of the user.
-.NH
-Using the description file
-.PP
-Before starting on a narrative of the description file,
-the introduction of a few terms is necessary.
-All these terms are used to describe the scanning of zero
-terminated strings, thereby producing another string or
-sequence of strings.
-.IP Backslashing 5
-.br
-All characters preceded by \e are modified to prevent
-recognition at further scanning.
-This modification is undone before a string is passed to the
-outside world as argument or message.
-When reading the description files the
-sequences \e\e, \e# and \e<newline> have a special meaning.
-\e\e translates to a single \e, \e# translates to a single #
-that is not
-recognized as the start of comment, but can be used in
-recognition and finally, \e<newline> translates to nothing at
-all, thereby allowing continuation lines.
-.nr PD 0
-.IP "Variable replacement"
-.br
-The scan recognizes the sequences {{, {NAME} and {NAME?text}
-Where NAME can be any combination if characters excluding ? and
-} and text may be anything excluding }.
-(~\e} is allowed of course~)
-The first sequence produces an unescaped single {.
-The second produces the contents of the NAME, definitions are
-done by \fIack\fP and in description files.
-When the NAME is not defined an error message is produced on
-the diagnostic output.
-The last sequence produces the contents of NAME if it is
-defined and text otherwise.
-.PP
-.IP "Expression replacement"
-.br
-Syntax:  (\fIsuffix sequence\fP:\fIsuffix sequence\fP=\fItext\fP)
-.br
-Example: (.c.p.e:.e=tail_em)
-.br
-If the two suffix sequences have a common member \-~\&.e in this
-case~\- the text is produced.
-When no common member is present the empty string is produced.
-Thus the example given is a constant expression.
-Normally, one of the suffix sequences is produced by variable
-replacement.
-\fIAck\fP sets three variables while performing the diverse
-transformations: HEAD, TAIL and RTS.
-All three variables depend on the properties \fIrts\fP and
-\fIneed\fP from the transformations used.
-Whenever a transformation is used for the first time,
-the text following the \fIneed\fP is appended to both the HEAD and
-TAIL variable.
-The value of the variable RTS is determined by the first
-transformation used with a \fIrts\fP property.
-.IP
-Two runtime flags have effect on the value of one or more of
-these variables.
-The flag \fB\-.suffix\fP has the same effect on these three variables
-as if a file with that \fBsuffix\fP was included in the argument list
-and had to be translated.
-The flag \fB\-r.suffix\fP only has that effect on the TAIL
-variable.
-The program call names \fIacc\fP and \fIcc\fP have the effect
-of an automatic \fB\-.c\fP flag.
-\fIApc\fP and \fIpc\fP have the effect of an automatic \fB\-.p\fP flag.
-.IP "Line splitting"
-.br
-The string is transformed into a sequence of strings by replacing
-the blank space by string separators (nulls).
-.IP "IO replacement"
-.br
-The > in the string is replaced by the output file name.
-The < in the string is replaced by the input file name.
-When multiple input files are present the string is duplicated
-for each input file name.
-.nr PD 1v
-.LP
-Each description is a sequence of variable definitions followed
-by a sequence of transformation definitions.
-Variable definitions use a line each, transformations
-definitions consist of a sequence of lines.
-Empty lines are discarded, as are lines with nothing but
-comment.
-Comment is started by a # character, and continues to the end
-of the line.
-Three special two-characters sequences exist: \e#, \e\e and
-\e<newline>.
-Their effect is described under 'backslashing' above.
-Each \- nonempty \- line starts with a keyword, possibly
-preceded by blank space.
-The keyword can be followed by a further specification.
-The two are separated by blank space.
-.PP
-Variable definitions use the keyword \fIvar\fP and look like this:
-.DS X
-   var NAME=text
-.DE
-The name can be any identifier, the text may contain any
-character.
-Blank space before the equal sign is not part of the NAME.
-Blank space after the equal is considered as part of the text.
-The text is scanned for variable replacement before it is
-associated with the variable name.
-.br
-.sp 2
-The start of a transformation definition is indicated by the
-keyword \fIname\fP.
-The last line of such a definition contains the keyword
-\fIend\fP.
-The lines in between associate properties to a transformation
-and may be presented in any order.
-The identifier after the \fIname\fP keyword determines the name
-of the transformation.
-This name is used for debugging and by the \fB\-R\fP flag.
-The keywords are used to specify which input suffices are
-recognized by that transformation,
-the program to run, the arguments to be handed to that program
-and the name or suffix of the resulting output file.
-Two keywords are used to indicate which run-time startoffs and
-libraries are needed.
-The possible keywords are:
-.IP \fIfrom\fP
-.br
-followed by a sequence of suffices.
-Each file with one of these suffices is allowed as input file.
-Preprocessor transformations do not need the \fIfrom\fP
-keyword. All other transformations do.
-.nr PD 0
-.IP \fIto\fP
-.br
-followed by the suffix of the output file name or in the case of a
-linker
-the output file name.
-.IP \fIprogram\fP
-.br
-followed by name of the load file of the program, a pathname most likely
-starts with either a / or {EM}.
-This keyword must be
-present, the remainder of the line
-is subject to backslashing and variable replacement.
-.IP \fImapflag\fP
-.br
-The mapflags are used to grab flags given to \fIack\fP and
-pass them on to a specific transformation.
-This feature uses a few simple pattern matching and replacement
-facilities.
-Multiple occurrences of this keyword are allowed.
-This text following the keyword is
-subjected to backslashing.
-The keyword is followed by a match expression and a variable
-assignment separated by blank space.
-As soon as both description files are read, \fIack\fP looks
-at all transformations in these files to find a match for the
-flags given to \fIack\fP.
-The flags \fB\-m\fP, \fB\-o\fP,
-\fB\-O\fP, \fB\-r\fP, \fB\-v\fP, \fB\-g\fP, \-\fB\-c\fP, \fB\-t\fP,
-\fB\-k\fP, \fB\-R\fP and \-\fB\-.\fP are specific to \fIack\fP and
-not handed down to any transformation.
-The matching is performed in the order in which the entries
-appear in the definition.
-The scanning stops after first match is found.
-When a match is found, the variable assignment is executed.
-A * in the match expression matches any sequence of characters,
-a * in the right hand part of the assignment is
-replaced by the characters matched by
-the * in the expression.
-The right hand part is also subject to variable replacement.
-The variable will probably be used in the program arguments.
-The \fB\-l\fP flags are special,
-the order in which they are presented to \fIack\fP must be
-preserved.
-The identifier LNAME is used in conjunction with the scanning of
-\fB\-l\fP flags.
-The value assigned to LNAME is used to replace the flag.
-The example further on shows the use of all this.
-.IP \fIargs\fP
-.br
-The keyword is followed by the program call arguments.
-It is subject to backslashing, variable replacement, expression
-replacement, line splitting and IO replacement.
-The variables assigned to by \fImapflags\fP will probably be
-used here.
-The flags not recognized by \fIack\fP or any of the transformations
-are passed to the linker and inserted before all other arguments.
-.IP \fIstdin\fP
-.br
-This keyword indicates that the transformation reads from standard input.
-.IP \fIstdout\fP
-.br
-This keyword indicates that the transformation writes on standard output.
-.IP \fIoptimizer\fP
-.br
-The presence of this keyword indicates that this transformation is an optimizer.
-It can be followed by a number, indicating the "level" of the
-optimizer (see description of the -O option in the ack(1ACK) manual page).
-.IP \fIpriority\fP
-.br
-This \-~optional~\- keyword is followed by a number. Positive priority means
-that the transformation is likely to be used, negative priority means that
-the transformation is unlikely to be used.
-Priorities can also be set with a ack(1ACK) command line option.
-Priorities come in handy when there are several implementations of a
-certain transformation. They can then be used to select a default one.
-.IP \fIlinker\fP
-.br
-This keyword indicates that this transformation is the linker.
-.IP \fIcombiner\fP
-.br
-This keyword indicates that this transformation is a combiner. A combiner
-is a program combining several files into one, but is not a linker.
-An example of a combiner is the global optimizer.
-.IP \fIprep\fP
-.br
-This \-~optional~\- keyword is followed an option indicating its relation
-to the preprocessor.
-The possible options are:
-.DS X
-  always	the input files must be preprocessed
-  cond	the input files must be preprocessed when starting with #
-  is	this transformation is the preprocessor
-.DE
-.IP \fIrts\fP
-.br
-This \-~optional~\- keyword indicates that the rest of the line must be
-used to set the variable RTS, if it was not already set.
-Thus the variable RTS is set by the first transformation
-executed which such a property or as a result from \fIack\fP's program
-call name (acc, cc, apc or pc) or by the \fB\-.suffix\fP flag.
-.IP \fIneed\fP
-.br
-This \-~optional~\- keyword indicates that the rest of the line must be
-concatenated to the HEAD and TAIL variables.
-This is done once for every transformation used or indicated
-by one of the program call names mentioned above or indicated
-by the \fB\-.suffix\fP flag.
-.br
-.nr PD 1v
-.NH
-Conventions used in description files
-.PP
-\fIAck\fP reads two description files.
-A few of the variables defined in the machine specific file
-are used by the descriptions of the front-ends.
-Other variables, set by \fIack\fP, are of use to all
-transformations.
-.PP
-\fIAck\fP sets the variable EM to the home directory of the
-Amsterdam Compiler Kit.
-The variable SOURCE is set to the name of the argument that is currently
-being massaged, this is useful for debugging.
-The variable SUFFIX is set to the suffix of the argument that is
-currently being massaged.
-.br
-The variable M indicates the
-directory in lib/{M}/tail_..... and NAME is the string to
-be defined by the preprocessor with \-D{NAME}.
-The definitions of {w}, {s}, {l}, {d}, {f} and {p} indicate
-EM_WSIZE, EM_SSIZE, EM_LSIZE, EM_DSIZE, EM_FSIZE and EM_PSIZE
-respectively.
-.br
-The variable INCLUDES is used as the last argument to \fIcpp\fP.
-It is used to add directories to
-the list of directories containing #include files.
-.PP
-The variables HEAD, TAIL and RTS are set by \fIack\fP and used
-to compose the arguments for the linker.
-.NH
-Example
-.PP
-Description for front-end
-.DS X
-.ta 4n 40n
-name cpp	# the C-preprocessor
-		# no from, it's governed by the P property
-	to .i	# result files have suffix i
-	program {EM}/lib/cpp	# pathname of loadfile
-	mapflag \-I* CPP_F={CPP_F?} \-I*	# grab \-I.. \-U.. and
-	mapflag \-U* CPP_F={CPP_F?} \-U*	# \-D.. to use as arguments
-	mapflag \-D* CPP_F={CPP_F?} \-D*	# in the variable CPP_F
-	args {CPP_F?} {INCLUDES?} \-D{NAME} \-DEM_WSIZE={w} \-DEM_PSIZE={p} \e
-	    \-DEM_SSIZE={s} \-DEM_LSIZE={l} \-DEM_FSIZE={f} \-DEM_DSIZE={d} <
-		# The arguments are: first the \-[IUD]...
-		#  then the include dir's for this machine
-		#  then the NAME and size values finally
-		#  followed by the input file name
-	stdout	# Output on stdout
-	prep is	# Is preprocessor
-end
-name cem	# the C-compiler proper
-	from .c	# used for files with suffix .c
-	to .k	# produces compact code files
-	program {EM}/lib/em_cem	# pathname of loadfile
-	mapflag \-p CEM_F={CEM_F?} \-Xp	# pass \-p as \-Xp to cem
-	mapflag \-L CEM_F={CEM_F?} \-l	# pass \-L as \-l to cem
-	args \-Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?}
-		# the arguments are the object sizes in
-		# the \-V... flag and possibly \-l and \-Xp
-	stdin	# input from stdin
-	stdout	# output on stdout
-	prep always	# use cpp
-	rts .c	# use the C run-time system
-	need .c	# use the C libraries
-end
-name decode	# make human readable files from compact code
-	from .k.m	# accept files with suffix .k or .m
-	to .e	# produce .e files
-	program {EM}/lib/em_decode	# pathname of loadfile
-	args <	# the input file name is the only argument
-	stdout	# the output comes on stdout
-end
-.DE
-
-.DS X
-.ta 4n 40n
-Example of a backend, in this case the EM assembler/loader.
-
-var w=2	# wordsize 2
-var p=2	# pointersize 2
-var s=2	# short size 2
-var l=4	# long size 4
-var f=4	# float size 4
-var d=8	# double size 8
-var M=em22
-var NAME=em22	# for cpp (NAME=em22 results in #define em22 1)
-var LIB=lib/{M}/tail_	# part of file name for libraries
-var RT=lib/{M}/head_	# part of file name for run-time startoff
-var SIZE_FLAG=\-sm	# default internal table size flag
-var INCLUDES=\-I{EM}/include	# use {EM}/include for #include files
-name asld	# Assembler/loader
-	from .k.m.a	# accepts compact code and archives
-	to e.out	# output file name
-	program {EM}/lib/em_ass	# load file pathname
-	mapflag \-l* LNAME={EM}/{LIB}*	# e.g. \-ly becomes
-		#	{EM}/mach/int/lib/tail_y
-	mapflag \-+* ASS_F={ASS_F?} \-+*  # recognize \-+ and \-\-
-	mapflag \-\-* ASS_F={ASS_F?} \-\-*
-	mapflag \-s* SIZE_FLAG=\-s*	# overwrite old value of SIZE_FLAG
-	args {SIZE_FLAG} \e
-	    ({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) \-o > < \e
-	    (.p:{TAIL}={EM}/{LIB}pc) \e
-	    (.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \e
-	    (.c.p:{TAIL}={EM}/{LIB}mon)
-		# \-s[sml] must be first argument
-		# the next line contains the choice for head_cc or head_pc
-		# and the specification of in- and output.
-		# the last three args lines choose libraries
-	linker
-end
-.DE
-
-The command \fIack \-mem22 \-v \-v \-I../h \-L \-ly prog.c\fP
-would result in the following
-calls (with exec(II)):
-.DS X
-.ta 4n
-1)	/lib/cpp \-I../h \-I/usr/em/include \-Dem22 \-DEM_WSIZE=2 \-DEM_PSIZE=2 \e
-	    \-DEM_SSIZE=2 \-DEM_LSIZE=4 \-DEM_FSIZE=4 \-DEM_DSIZE=8 prog.c
-2)	/usr/em/lib/em_cem \-Vw2i2p2f4s2l4d8 \-l
-3)	/usr/em/lib/em_ass \-sm /usr/em/lib/em22/head_cc \-o e.out prog.k
-	/usr/em/lib/em22/tail_y /usr/em/lib/em22/tail_cc.1s
-	/usr/em/lib/em22/tail_cc.2g /usr/em/lib/em22/tail_mon
-.DE
--- a/doc/ansi_C.doc
+++ b/doc/ansi_C.doc
@ -1,365 +0,0 @@
-.de NS
-.sp
-.in 0
-\\fBANS \\$1:\\fP
-..
-.TL
-Amsterdam Compiler Kit-ANSI C compiler compliance statements
-.AU 
-Hans van Eck
-.AI
-Dept. of Mathematics and Computer Science
-Vrije Universiteit
-Amsterdam, The Netherlands
-.PP
-This document specifies the implementation-defined behaviour of the ANSI-C
-front end of the Amsterdam Compiler Kit as required by ANS X3.159-1989.  Since
-the implementation-defined behaviour sometimes depends on the machine
-compiling on or for, some items will be left unspecified in this
-document\(dg.
-.FS
-\(dg when cross-compiling, run-time behaviour may be different from
-compile-time behaviour
-.FE
-The compiler assumes that it runs on a UNIX system.
-.NS A.6.3.1
-.IP -
-Diagnostics are placed on the standard error output.  They have the
-following specification:
-.br
-"<file>", line <nr>: [(<class>)] <diagnostic>
-.br
-There are three classes of diagnostics: "error", "strict" and "warning".
-When the class is "error", the <class> is absent.
-.br
-The class "strict" is used for violations of the standard which are
-not severe enough to stop compilation.  An example is the the occurrence
-of non white-space after an '#else' or '#endif' pre-processing
-directive.  The class "warning" is used for legal but dubious
-constructions.  An example is overflow of constant expressions.
-.NS A.6.3.2
-.IP -
-The function 'main' can have two arguments.  The first argument is an
-integer specifying the number of arguments on the command line.  The second
-argument is a pointer to an array of pointers to the arguments (as
-strings).
-.IP -
-Interactive devices are terminals.
-.NS A.6.3.3
-.IP -
-The number of significant characters is an option.  By default it is 64.
-There is a distinction between upper and lower case.
-.NS A.6.3.4
-.IP -
-The compiler assumes ASCII-characters in both the source and execution
-character set.
-.IP -
-There are no multi-byte characters.
-.IP -
-There 8 bits in a character.
-.IP -
-Character constants with values that can not be represented in 8 bits
-are truncated.
-.IP -
-Character constants that are more than 1 character wide will have the
-first character specified in the least significant byte.
-.IP -
-The only supported locale is "C".
-.IP -
-A plain 'char' has the same range of values as 'signed char'.
-.NS A.6.3.5
-.IP -
-The compiler assumes that it works on and compiles for a
-2-complement binary-number system.  Shorts will use 2 bytes and longs
-will use 4 bytes.  The size of integers are machine dependent.
-.IP -
-Converting an integer to a shorter signed integer is implemented by
-ignoring the high-order byte(s) of the former.
-Converting a unsigned integer to a signed integer of the same type is
-only done in administration.  This means that the bit-pattern remains
-unchanged.
-.IP -
-The result of bitwise operations on signed integers are what can be
-expected on a 2-complement machine.
-.IP -
-If either operand is negative, whether the result of the / operator is the
-largest integer less than or equal to the algebraic quotient or the
-smallest integer greater than or equal to the algebraic quotient is machine
-dependent, as is the sign of the result of the % operator.
-.IP -
-The right-shift of a negative value is negative.
-.NS A.6.3.6
-.IP -
-The representation of floating-point values is machine-dependent.
-When native floating-point is not present an IEEE-emulation is used.
-The compiler uses high-precision floating-point for constant folding.
-.IP -
-Truncation is always to the nearest floating-point number that can
-be represented.
-.NS A.6.3.7
-.IP -
-The type returned by the sizeof-operator (also known as size_t)
-is 'unsigned int'.  This is done for backward compatibility reasons.
-.IP -
-Casting an integer to a pointer or vice versa has no effect in
-bit-pattern when the sizes are equal.  Otherwise the value will be
-truncated or zero-extended (depending on the direction of the
-conversion and the relative sizes).
-.IP -
-When a pointer is as large as an integer, the type of a 'ptrdiff_t' will
-be 'int'.  Otherwise the type will be 'long'.
-.NS A.6.3.8
-.IP -
-Since the front end has only limited control over the registers, it can
-only make it more likely that variables that are declared as
-registers also end up in registers.  The only things that can possibly be
-put into registers are : 'int', 'long', 'float', 'double', 'long double'
-and pointers.
-.NS A.6.3.9
-.IP -
-When a member of a union object is accessed using a member of a
-different type, the resulting value will usually be garbage.  The
-compiler makes no effort to catch these errors.
-.IP -
-The alignment of types is a compile-time option.  The alignment of
-a structure-member is the alignment of its type.  Usually, the
-alignment is passed on to the compiler by the 'ack' program.  When a
-user wants to do this manually, he/she should be prepared for trouble.
-.IP -
-A "plain" 'int' bit-field is taken as a 'signed int'.  This means that
-a field with a size of 1 bit can only store the values 0 and -1.
-.IP -
-The order of allocation of bit-fields is a compile-time option.  By
-default, high-order bits are allocated first.
-.IP -
-An enum has the same size as a "plain" 'int'.
-.NS A.6.3.10
-.IP -
-An access to a volatile declared variable is done by just mentioning
-the variable.  E.g. the statement "x;" where x is declared volatile,
-constitutes an access.
-.S A.6.3.11
-.IP -
-There is no fixed limit on the number of declarators that may modify an
-arithmetic, structure or union type, although specifying too many may
-cause the compiler to run out of memory.
-.NS A.6.3.12
-.IP -
-The maximum number of cases in a switch-statement is in the order of
-1e9, although the compiler may run out of memory somewhat earlier.
-.NS A.6.3.13
-.IP -
-Since both the pre-processor and the compiler assume ASCII-characters, 
-a single character constant in a conditional-inclusion directive
-matches the same value in the execution character set.
-.IP -
-The pre-processor recognizes -I... command-line options.  The
-directories thus specified are searched first.  After that, depending on the
-command that the preprocessor is called with, machine/system-dependant
-directories are searched.  After that, ~em/include/_tail_ac and
-/usr/include are visited.
-.IP -
-Quoted names are first looked for in the directory in which the file
-which does the include resides.
-.IP -
-The characters in a h- or q- char-sequence are taken to be UNIX
-paths.
-.IP -
-Neither the compiler nor the preprocessor know any pragmas.
-.IP -
-Since the compiler runs on UNIX, __DATE__ and __TIME__ will always be
-defined.
-.NS A.6.3.14
-.IP -
-NULL is defined as ((void *)0).  This in order to flag dubious
-constructions like "int x = NULL;".
-.IP -
-The diagnostic printed by 'assert' is as follows:
-.ti +4n
-"Assertion "<expr>" failed, file "<file>", line <line>",
-.br
-where <expr> is the argument to the assert macro, printed as string.
-(the <file> and <line> should be clear)
-.KS
-.IP -
-The sets for character test macros.
-.TS
-l l.
-name:	set:
-isalnum()	0-9A-Za-z
-isalpha()	A-Za-z
-iscntrl()	\e000-\e037\e177
-islower()	a-z
-isupper()	A-Z
-isprint()	<space>-~ (== \e040-\e176)
-.TE
-.KE
-As an addition, there is an isascii() macro, which tests whether a character
-is an ascii character.  Characters in the range from \e000 to \e177 are ascii
-characters.
-.KS
-.IP -
-The behaviour of mathematic functions on domain error:
-.TS
-l c
-l n.
-name:	returns:
-asin()	0.0
-acos()	0.0
-atan2()	0.0
-fmod()	0.0
-log()	-HUGE_VAL
-log10()	-HUGE_VAL
-pow()	0.0
-sqrt()	0.0
-.TE
-.KE
-.IP -
-Underflow range errors do not cause errno to be set.
-.IP -
-The function fmod() returns 0.0 and sets errno to EDOM when the second
-argument is 0.0.
-.IP -
-The set of signals for the signal() function depends on the UNIX-system
-which the compiler is compiling for.  The default handling, semantics
-and behaviour of these signals are those specified by the operating
-system vendor.  The default handling is not reset when SIGILL is
-received.
-.IP -
-A text-stream need not end in a new-line character.
-.IP -
-White space characters before a new-line appear when read in.
-.IP -
-There may be any number of null characters appended to a binary
-stream.
-.IP -
-The file position indicator of an append mode stream is initially
-positioned at the beginning of the file.
-.IP -
-A write on a text stream does not cause the associated file to be
-truncated beyond that point.
-.IP -
-The buffering intended by the standard is fully supported.
-.IP -
-A zero-length file actually exists.
-.IP -
-A file name can consist of any character, except for the '\e0' and
-the '/'.
-.IP -
-A file can be open multiple times.
-.IP -
-When a remove() is done on an open file, reading and writing behave
-just as can be expected from a non-removed file.  When the associated
-stream is closed, all written data will be lost.
-.IP -
-When a file exists prior to a call to rename(), the behaviour is that
-of the underlying UNIX system.  Normally, the call would fail.
-.IP -
-The %p conversion in fprintf() has the same effect as %#x or %#lx,
-depending on the sizes of pointer and integer.
-.IP -
-The %p conversion in fscanf() has the same effect as %x or %lx,
-depending on the sizes of pointer and integer.
-.IP -
-A - character that is neither the first nor the last character in the
-scanlist for %[ conversion is taken to be a range indicator.  When the
-first character has a higher ASCII-value than the second, the - will
-just be put into the scanlist.
-.IP -
-The value of errno when fgetpos() or ftell() failed is that of lseek().
-This means:
-.RS
-.IP "EBADF \-" 10
-when the stream is not valid
-.IP "ESPIPE \-"
-when fildes is associated with a pipe (and on some systems: sockets)
-.IP "EINVAL \-"
-the resulting file pointer would be negative
-.RE
-.LP
-.IP -
-The messages generated by perror() depend on the value of errno.
-The mapping of errors to strings is done by strerror().
-.IP -
-When the requested size is zero, malloc(), calloc() and realloc()
-return a null-pointer.
-.IP -
-When abort() is called, output buffers will be flushed.  Temporary files
-(made with the tmpfile() function) will have disappeared when SIGABRT
-is not caught or ignored.
-.IP -
-The exit() function returns the low-order eight bits of its argument
-to the environment.
-.IP -
-The predefined environment names are controlled by the user.
-Setting environment variables is done through the putenv() function.
-This function accepts a pointer to char as its argument.
-To set f.i. the environment variable TERM to a230 one writes
-.ti +4n
-putenv("TERM=a230");
-.br
-The argument to putenv() is stored in an internal table, so malloc'ed
-strings can not be freed until another call to putenv() (which sets the
-same environment variable) is made.  The function returns 1 if it fails,
-0 otherwise.
-.LP
-.IP -
-The argument to system is passed as argument to /bin/sh -c.
-.IP -
-The strings returned by strerror() depend on errno in the following
-way:
-.TS
-l l.
-errno	string
-0	"Error 0",
-EPERM	"Not owner",
-ENOENT	"No such file or directory",
-ESRCH	"No such process",
-EINTR	"Interrupted system call",
-EIO	"I/O error",
-ENXIO	"No such device or address",
-E2BIG	"Arg list too long",
-ENOEXEC	"Exec format error",
-EBADF	"Bad file number",
-ECHILD	"No children",
-EAGAIN	"No more processes",
-ENOMEM	"Not enough core",
-EACCES	"Permission denied",
-EFAULT	"Bad address",
-ENOTBLK	"Block device required",
-EBUSY	"Mount device busy",
-EEXIST	"File exists",
-EXDEV	"Cross-device link",
-ENODEV	"No such device",
-ENOTDIR	"Not a directory",
-EISDIR	"Is a directory",
-EINVAL	"Invalid argument",
-ENFILE	"File table overflow",
-EMFILE	"Too many open files",
-ENOTTY	"Not a typewriter",
-ETXTBSY	"Text file busy",
-EFBUG	"File too large",
-ENOSPC	"No space left on device",
-ESPIPE	"Illegal seek",
-EROFS	"Read-only file system",
-EMLINK	"Too many links",
-EPIPE	"Broken pipe",
-EDOM	"Math argument",
-ERANGE	"Result too large"
-.TE
-everything else causes strerror() to return "unknown error"
-.IP -
-The local time zone is per default MET (GMT + 1:00:00).  This can be
-changed through the TZ environment variable, or by some changes in the
-sources.
-.IP -
-The clock() function returns the number of ticks since process
-startup.
-.SH
-References
-.IP [1]
-ANS X3.159-1989
-.I
-American National Standard for Information Systems -
-Programming Language C
-.R
--- a/doc/basic.doc
+++ b/doc/basic.doc
@ -1,949 +0,0 @@
-.\" $Id$ 
-.TL 
-.de Sy
-.LP
-.IP \fBsyntax\fR 10
-..
-.de PU
-.IP \fBpurpose\fR 10
-..
-.de RM
-.IP \fBremarks\fR 10
-..
-The ABC compiler
-.AU
-Martin L. Kersten
-Gert-Jan Akkerman
-Marcel Worring
-Edo Westerhuis
-Frans Kunst
-Ronnie Lachniet
-.AI
-Department of Mathematics and Computer Science.
-.br
-Free University
-.br
-Amsterdam
-.AB
-This manual describes the 
-programming language BASIC and its compiler
-included in the Amsterdam Compiler Kit.
-.AE
-.SH
-INTRODUCTION.
-.LP
-The BASIC-EM compiler is an extensive implementation of the
-programming language BASIC.
-The language structure and semantics are modelled after the 
-BASIC interpreter/compiler of Microsoft (tr), a short comparison
-is provided in appendix A.
-.LP
-The compiler generates code for a virtual machine, the EM machine
-[[ACM, etc]].
-Using EM as an intermediate machine results in a highly portable
-compiler and BASIC code.
-.br
-The drawback of EM is that it does not directly reflect one particular
-hardware design, which means that many of the low level operations available 
-within BASIC are ill-defined or even inapplicable.
-To mention a few, the peek and poke instructions are likely
-to be behave errorneous, while line printer and tapedeck 
-primitives are unknown.
-.LP
-This manual is divided into three chapters.
-.br
-Chapter 1 discusses the general language syntax and semantics.
-.br
-Chapter 2 describes the statements available in BASIC-EM.
-.br
-Chapter 3 describes the predefined functions, ordered alphabetically.
-.LP
-Appendix A discusses the differences with Microsoft BASIC. 
-.br
-Appendix B describes all reserved symbols.
-.LP
-.LP
-.SH
-SYNTAX NOTATION
-.LP
-The conventions for syntax presentation are as follows:
-.IP CAPS 10
-Items are reserved words, must be input as shown.
-.IP <> 10
-Items in lowercase letters enclosed in angular brackets
-are to be supplied by the user.
-.IP [] 10
-Items are optional.
-.IP \.\.\. 10
-Items may be repeated any number of times 
-.IP {} 10
-A choice between two or more alternatives. At least one of the entries
-must be chosen.
-.IP | 10
-Vertical bars separate the choices within braces.
-.LP
-All punctuation must be included where shown.
-.bp
-.NH 1
-GENERAL INFORMATION
-.LP
-The BASIC-EM compiler is designed for a UNIX based environment.
-It accepts a text file with a BASIC program (suffix .b) and generates
-an executable file, called a.out.
-.NH 2
-LINE FORMAT
-.LP
-A BASIC program consists of a series of lines, starting with a 
-positive line number in the range 0 to 32767.
-A line may consists of more than one physical line on a terminal, but
-is limited to 1024 characters.
-Multiple BASIC statements may be placed on a single line, provided
-they are separated by a colon (:).
-.NH 2
-CONSTANTS
-.LP
-The BASIC compiler character set is comprised of alphabetic
-characters, numeric characters, and special characters shown below.
-.DS
-= + - * / ^ ( ) % # $ \\ _
-! [ ] , . ; : & ' ? > <  \\ (blanc)
-.DE
-.LP
-BASIC uses two different types of constants during processing:
-numeric and string constants.
-.br
-A string constant is a sequence of characters taken from the ASCII
-character set enclosed by double quotation marks.
-.br
-Numeric constants are positive or negative numbers, grouped into
-five different classes.
-.IP "a) integer constants" 25
-.br
-Whole numbers in the range of -32768 and 32767. Integer constants do
-not contain decimal points.
-.IP "b) fixed point constants" 25
-.br
-Positive or negative real numbers, i.e. numbers with a decimal point.
-.IP "c) floating point constants" 25
-.br
-Real numbers in scientific notation. A floating point constant
-consists of an optional signed integer or fixed point number
-followed by the letter E (or D) and an optional signed integer
-(the exponent).
-The allowable range of floating point constants is 10^-38 to 10^+38.
-.IP "d) Hex constants" 25
-.br
-Hexadecimal numbers, denoted by the prefix &H.
-.IP "e) Octal constants" 25
-.br
-Octal numbers, denoted by the prefix &O.
-.NH 2
-VARIABLES
-.LP
-Variables are names used to represent values in a BASIC program.
-A variable is assigned a value by assigment specified in the program.
-Before a variable is assigned its value is assumed to be zero.
-.br
-Variable names are composed of letters, digits or the decimal point,
-starting with a letter. Up to 40 characters are significant.
-A variable name can be followed by any of the following  type 
-declaration characters:
-.IP % 5
-Defines an integer variable
-.IP ! 5
-Defines a single precision variable (see below)
-.IP # 5
-Defines a double precision variable
-.IP $ 5
-Defines a string variable.
-.LP
-Beside single valued variables, values may be grouped into tables or arrays.
-Each element in an array is referenced by the array name and an index,
-such a variable is called a subscripted variable.
-An array has as many subscripts as there are dimensions in the array,
-the maximum of which is 11.
-.br
-If a variable starts with FN it is assumed to be a call to a user defined
-function. 
-.br
-A variable name may not be a reserved word nor the name 
-of a predefined function.
-A list of all reserved identifiers is included as Appendix B.
-.LP
-NOTES: 
-.br
-Two variables with the same name but different type is
-considered illegal.
-.br
-The type of a variable without typedeclaration-character is set,
-at it's first occurence in the program, 
-to the defaulttype which is (in this implementation) double precision.
-.br
-Multi-dimensional array's must be declared before use (see 
-DIM-statement ).
-.br
-BASIC-EM differs from Microsoft BASIC in supporting floats in one precision
-only (due to EM), eg doubles and floats have the same precision.
-.NH 2
-EXPRESSIONS
-.LP
-When necessary the compiler will convert a numeric value from
-one type to another.
-A value is always converted to the precision of the variable it is assigned
-to.
-When a floating point value is converted to an integer the fractional
-portion is rounded.
-In an expression all values are converted to the same degree of precision,
-i.e. that of the most precise operand.
-.br
-Division by zero results in the message "Division by zero".
-If overflow (or underflow) occurs, the "Overflow (underflow)" message is
-displayed and  execution is terminated (contrary to Microsoft).
-.SH
-Arithmetic
-.LP
-The arithmetic operators in order of precedence,a re:
-.DS L
-^		Exponentiation
-		Negation
-*,/,\\\\\\\\,MOD	 Multiplication, Division, Remainder
-+,-		Addition, Substraction
-.DE
-The operator \\\\ denotes integer division, its operands are rounded to
-integers before the operator is applied.
-Modulus arithmetic is denoted by the operator MOD, which yields the
-integer value that is the remainder of an integer division.
-.br
-The order in which operators are performed can be changed with parentheses.
-.SH
-Relational
-.LP
-The relational operators in order of precedence, are:
-.DS
-=	Equality
-<>	Inequality
-<	Less than
->	Greater than
-<=	Less than or equal to
->=	Greater than or equal to
-.DE
-The relational operators are used to compare two values and returns
-either "true" (-1) or "false" (0) (See IF statement).
-The precedence of the relational operators is lower 
-then the arithmetic operators.
-.SH
-Logical
-.LP
-The logical operators performs tests on multiple relations, bit manipulations,
-or boolean operations.
-The logical operators returns a bitwise result ("true" or "false").
-In an expression, logical operators are performed after the relational and
-arithmetic operators.
-The logical operators work by converting their operands to signed
-two-complement integers in the range -32768 to 32767.
-.DS
-NOT		Bitwise negation
-AND		Bitwise and
-OR		Bitwise or
-XOR		Bitwise exclusive or
-EQV		Bitwise equivalence
-IMP		Bitwise implies
-.DE
-.SH
-Functional
-.LP
-A function is used in  an expression to call a system or user defined
-function.
-A list of predefined functions is presented in chapter 3.
-.SH
-String operations
-.LP
-Strings can be concatenated by using +. Strings can be compared with
-the relational operators. String comparison is performed in lexicographic
-order.
-.NH 2
-ERROR MESSAGES
-.LP
-The occurence of an error results in termination of the program
-unless an ON....ERROR statement has been encountered.
-.bp
-.NH 1
-B-EM STATEMENTS
-.LP
-This chapter describes the statements available within the BASIC-EM
-compiler. Each description is formatted as follows:
-.Sy
-Shows the correct syntax for the statement. See introduction of
-syntax notation above.
-.PU
-Describes the purpose and details of the instructions.
-.RM
-Describes special cases, deviation from Microsoft BASIC etc.
-.LP
-.NH 2 
-CALL
-.Sy
-CALL <variable name>[(<argument list>)]
-.PU
-The CALL statement provides the means to execute procedures
-and functions written in another language included in the
-Amsterdam Compiler Kit.
-The argument list consist of (subscripted) variables.
-The BASIC compiler pushes the address of the arguments on the stack in order
-of encounter.
-.RM
-Not yet available.
-.NH 2
-CLOSE
-.Sy
-CLOSE [[#]<file number>[,[#]<file number...>]]
-.PU
-To terminate I/O on a disk file.
-<file number> is the number associated with the file 
-when it was OPENed (See OPEN-statement). Ommission of parameters results in closing
-all files.
-.sp
-The END statement and STOP statement always issue a CLOSE of
-all files.
-.NH 2
-DATA
-.Sy
-DATA <list of constants>
-.PU
-DATA statements are used to construct a data bank of values that are
-accessed by the program's READ statement.
-DATA statements are non-executable,
-the data items are assembled in a data file by the BASIC compiler.
-This file can be replaced, provided the layout remains
-the same (otherwise the RESTORE won't function properly).
-.sp
-The list of data items consists of numeric and string constants
-as discussed in section 1.
-Moreover, string constants starting with a letter and not
-containing blancs, newlines, commas, colon need not be enclosed with
-the string quotes.
-.sp
-DATA statements can be reread using the RESTORE statement.
-.NH 2
-DEF FN
-.Sy
-DEF FN<name> [(<parameterlist>)]=<expression>
-.PU
-To define and name a function that is written by the user.
-<name> must be an identifier and should be preceded by FN,
-which is considered integral part of the function name. 
-<expression> defines the expression to be evaluated upon function call.
-.sp
-The parameter list is comprised of a comma separated 
-list of variable names, used within the function definition,
-that are to replaced by values upon function call.
-The variable names defined in the parameterlist, called formal
-parameters, do not affect the definition and use of variables
-defined with the same name in the rest of the BASIC program.
-.sp
-A type declaration character may be suffixed to the function name to
-designate the data type of the function result.
-.NH 2
-DEFINT/SNG/DBL/STR
-.Sy
-DEF<type> <range of letters>
-.PU
-Any undefined variable starting with the letter included in the range of
-letters is declared of type <type> unless a type declaration character
-is appended.
-The range of letters is a comma separated list of characters and
-character ranges (<letter>-<letter>).
-.NH 2
-DIM
-.Sy
-DIM <list of subscripted variable>
-.PU
-The DIM statement allocates storage for subscripted variables.
-If an undefined subscripted variable is used 
-the maximum value of the array subscript is assumed to be 10.
-A subscript out of range is signalled by the program (when ACK works)
-The minimum subscript value is 0, unless the OPTION BASE statement has been
-encountered.
-.sp
-All variables in a subscripted variable are initially zero.
-.sp
-BUGS. Multi-dimensional arrays MUST be defined. Subscript out of range is
-left unnotified.
-.NH 2
-END
-.Sy
-END
-.PU
-END terminates a BASIC program and returns to the UNIX shell.
-An END statement at the end of the BASIC program is optional.
-.NH 2
-ERR and ERL
-.Sy
-<identifier name>= ERR
-.br
-<identifier name>= ERL
-.PU
-Whenever an error occurs the variable ERR contains the
-error number and ERL the BASIC line where the error occurred.
-The variables are usually used in error handling routines
-provided by the user.
-.NH 2
-ERROR
-.Sy
-ERROR <integer expression>
-.PU
-To simulate the occurrence of a BASIC error.
-To define a private error code a value must be used that is not already in
-use by the BASIC runtime system.
-The list of error messages currently in use can be found in appendix B.
-.NH 2
-FIELD
-.PU
-To be implemented.
-.NH 2
-FOR...NEXT
-.Sy
-FOR <variable>= <low>TO<high>[STEP<size>]
-.br
- ......
-.br
-NEXT [<variable>][,<variable>...]
-.PU
-The FOR statements allows a series of statements to be performed
-repeatedly. <variable> is used as a counter. During the first
-execution pass it is assigned the value <low>,
-an arithmetic expression. After each pass the counter
-is incremented (decremented) with the step size <size>, an expression.
-Ommission of the step size is intepreted as an increment of 1.
-.br
-Execution of the program lines specified between the FOR and the NEXT
-statement is terminated as soon as <low> is greater (less) than <high>
-.sp
-The NEXT statement is labeled with the name(s) of the counter to be
-incremented.
-.sp
-The variables mentioned in the NEXT statement may be ommitted, in which case
-the variable of increment the counter of the most recent FOR statement.
-If a NEXT statement is encountered before its corresponding FOR statement,
-the error message "NEXT without FOR" is generated.
-.NH 2
-GET
-.Sy
-GET [#]<file number>[, <record number>]
-.PU
-To be implemented.
-.NH 2
-GOSUB...RETURN
-.Sy
-GOSUB <line number>
-  ...
-.br
-RETURN
-.PU
-The GOSUB statement branches to the first statement of a subroutine.
-The RETURN statement cause a branch back to the statement following the
-most recent GOSUB statement.
-A subroutine may contain more than one RETURN statement.
-.sp
-Subroutines may be called recursively. 
-Nesting of subroutine calls is limited, upon exceeding the maximum depth
-the error message "XXXXX" is displayed.
-.NH 2
-GOTO
-.Sy
-GOTO <line number>
-.PU
-To branch unconditionally to a specified line in the program.
-If <line number> does not exists, the compilation error message
-"Line not defined" is displayed.
-.RM
-Microsoft BASIC continues at the first line
-equal or greater then the line specified.
-.NH 2
-IF...THEN
-.Sy
-.br
-IF <expression> THEN {<statements>|<line number>}
-[ELSE {<statements>|<line number>}]
-.br
-.Sy
-IF <expression> GOTO <line number>
-[ELSE {<statements>|<line number>}]
-.PU
-The IF statement is used
-to make a decision regarding the program flow based on the
-result of the expressions.
-If the expression is not zero, the THEN or GOTO clause is
-executed. If the result of <expression> is zero, the THEN or
-GOTO clause is ignored and the ELSE clause, if present is
-executed.
-.br
-IF..THEN..ELSE statements may be nested.
-Nesting is limited by the length of the line.
-The ELSE clause matches with the closests unmatched THEN.
-.sp
-When using IF to test equality for a value that is the
-result of a floating point expression, remember that the
-internal representation of the value may not be exact.
-Therefore, the test should be against a range to
-handle the relative error.
-.RM
-Microsoft BASIC allows a comma before THEN.
-.NH 2
-INPUT
-.Sy
-INPUT [;][<"prompt string">;]<list of variables>
-.PU
-An INPUT statement can be used to obtain values from the user at the
-terminal.
-When an INPUT statement is encountered a question mark is printed
-to indicate the program is awaiting data.
-IF <"prompt string"> is included, the string is printed before the
-the question mark. The question mark is suppressed when the prompt
-string is followed by a comma, rather then a semicolon.
-.sp
-For each variable in the variable a list a value should be supplied.
-Data items presented should be separated by a comma.
-.sp
-The type of the variable in the variable list must aggree with the
-type of the data item entered. Responding with too few or too many
-data items causes the message "?Redo". No assignment of input values
-is made until an acceptable response is given.
-.RM
-The option to disgard the carriage return with the semicolon after the
-input symbol is not yet implemented.
-.NH 2
-INPUT [#]
-.Sy
-INPUT #<file number>,<list of variables>
-.PU
-The purpose of the INPUT# statement is to read data items from a sequential
-file and assign them to program variables.
-<file number> is the number used to open the file for input.
-The variables mentioned are (subscripted) variables.
-The type of the data items read should aggree with the type of the variables.
-A type mismatch results in the error message "XXXXX".
-.sp
-The data items on the sequential file are separated by commas and newlines.
-In scanning the file, leading spaces, new lines, tabs, and
-carriage returns are ignored. The first character encountered 
-is assumed to be the state of a new item.
-String items need not be enclosed with double quotes, provided
-it does not contain spaces, tabs, newlines and commas,
-.RM
-Microsoft BASIC won't assign values until the end of input statement.
-This means that the user has to supply all the information.
-.NH 2
-LET
-.Sy
-[LET]<variable>=<expression>
-.PU
-To assign  the value of an expression to a (subscribted) variable.
-The type convertions as dictated in chapter 1 apply.
-.NH 2
-LINE INPUT
-.Sy
-LINE INPUT [;][<"prompt string">;]<string variable>
-.PU
-An entire line of input is assigned to the string variable.
-See INPUT for the meaning of the <"prompt string"> option.
-.NH 2
-LINE INPUT [#]
-.Sy
-LINE INPUT #<file number>,<string variable>
-.PU
-Read an entire line of text from a sequential file <file number>
-and assign it to a string variable.
-.NH 2
-LSET and RSET
-.PU
-To be implemented
-.NH 2
-MID$
-.Sy
-MID$(<string expr1>,n[,m])=<string expr2>
-.PU
-To replace a portion of a string with another string value.
-The characters of <string expr2> replaces characters in <string expr1>
-starting at position n. If m is present, at most m characters are copied,
-otherwise all characters are copied.
-However, the string obtained never exceeds the length of string expr1.
-.NH 2
-ON ERROR GOTO
-.Sy
-ON ERROR GOTO <line number>
-.PU
-To enable error handling within the BASIC program.
-An error may result from arithmetic errors, disk problems, interrupts, or
-as a result of the ERROR statement.
-After printing an error message the program is continued at the
-statements associated with <line number>.
-.sp
-Error handling is disabled using ON ERROR GOTO 0.
-Subsequent errors result in an error message and program termination.
-.NH 2
-ON...GOSUB and ON ...GOTO
-.Sy
-ON <expression> GOSUB <list of line numbers>
-.br
-ON <expression> GOTO <list of line numbers>
-.PU
-To branch to one of several specified line numbers or subroutines, based
-on the result of the <expression>. The list of line numbers are considered
-the first, second, etc alternative. Branching to the first occurs when
-the expression evaluates to one, to the second alternative on two, etc.
-If the value of the expression is zero or greater than the number of alternatives, processing continues at the first statement following the ON..GOTO 
-(ON GOSUB) statement.
-.sp
-When the expression results in a negative number the 
-an "Illegal function call" error occurs.
-.sp
-BUG If the value of the expression is zero or greater than the number of 
-alternatives, processing does NOT continue at the first statement 
-following the ON..GOTO (ON GOSUB) statement.
-.NH 2
-OPEN
-.Sy
-OPEN {"i" | "o" | "r" } , [#]<file number> , <file-name>
-.PU
-To open <file-name> (filename should be quoted) for input/reading or output.
-If file is not opened for output it has to be existent, otherwise an 
-"file not found" error will occur.
-.NH 2
-OPTION BASE
-.Sy
-OPTION BASE n
-.PU
-To declare the lower bound of subsequent array subscripts as either
-0 or 1. The default lower bound is zero.
-.NH 2
-POKE
-.Sy
-POKE <expr1>,<expr2>
-.PU
-To poke around in memory. The use of this statement is not recommended,
-because it requires full understanding of both
-the implementation of the Amsterdam
-Compiler Kit and the hardware characteristics.
-.NH 2
-PRINT 
-.Sy
-PRINT <list of variables and/or constants>
-.PU
-To print constants or the contents of variables on the terminal-device. 
-If the variables or constants are seperated by comma's the values will 
-be printed seperated by tabs. 
-If the variables or constants are seperated by semi-colon's the values 
-will be printed without spaces in between. 
-The new-line generated at the end of the print-statement can be suppressed by 
-a semi-colon at the end of list of variables or constants.
-.NH 2
-PRINT USING
-.PU
-To be implemented
-.NH 2
-PUT
-.PU
-To be implemented
-.NH 2
-RANDOMIZE
-.Sy
-RANDOMIZE [<expression>]
-.PU
-To reset the random seed. When the expression is ommitted, the system
-will ask for a value between -32768 and 32767.
-The random number generator returns the same sequence of values provided
-the same seed is used.
-.NH 2
-READ
-.Sy
-READ <list of variables>
-.PU
-To read values from the DATA statements and assign them to variables.
-The type of the variables should match to the type of the items being read,
-otherwise a "Syntax error" occurs. If all data is read the message "Out of
-data" will be displayed.
-.NH 2
-REM
-.Sy
-REM <remark>
-.PU
-To include explantory information in a program.
-The REM statements are not executed.
-A single quote has the same effect as  : REM, which
-allows for the inclusion of comment at the end of the line.
-.RM
-Microsoft BASIC does not allow REM statements as part of
-DATA lines.
-.NH 2
-RESTORE
-.Sy
-RESTORE  [<line number>]
-.PU
-To allow DATA statements to be re-read from a specific line.
-After a RESTORE statement is executed, the next READ accesses
-the first item of the DATA statements.
-If <line number> is specified, the next READ accesses the first
-item in the specified line.
-.sp
-Note that data statements result in a sequential datafile generated
-by the compiler, being read by the read statements.
-This data file may be replaced using the operating system functions
-with a modified version, provided the same layout of items
-(same number of lines and items per line) is used.
-.NH 2
-STOP
-.Sy
-STOP
-.PU
-To terminate the execution of a program and return to the operating system
-command interpreter. A STOP statement results in the message "Break in line
-???"
-.NH 2
-SWAP
-.Sy
-SWAP <variable>,<variable>
-.PU
-To exchange the values of two variables.
-.sp
-BUG. Strings cannot be swapped !
-.NH 2
-TRON/TROFF
-.Sy
-TRON
-.Sy
-TROFF
-.PU
-As an aid in debugging the TRON statement results in a program
-listing each line being interpreted. TROFF disables generation of
-this code.
-.NH 2
-WHILE...WEND
-.Sy
-WHILE <expression>
-  .....
-WEND
-.PU
-To execute a series of BASIC statements as long as a conditional expression
-is true. WHILE...WEND loops may be nested.
-.NH 2
-WRITE 
-.Sy
-WRITE [<list of expressions>]
-.PU
-To write data at the terminal in DATA statement layout conventions.
-The expressions should be separated by commas.
-.NH 2
-WRITE #
-.Sy
-WRITE #<file number> ,<list of expressions>
-.PU
-To write a sequential data file, being opened with the "O" mode.
-The values are being writting using the DATA statements layout conventions.
-.bp
-.NH
-FUNCTIONS
-.LP
-.IP ABS(X) 25
-Returns the absolute value of expression X
-.IP ASC(X$) 25
-Returns the numeric value of the first character of the string.
-If X$ is not initialized an "Illegal function call" error
-is returned.
-.IP ATN(X) 25
-Returns the arctangent of X in radians. Result is in the range
-of -pi/2 to pi/2.
-.IP CDBL(X) 25
-Converts X to a double precision number.
-.IP CHR$(X) 25
-Converts the integer value X to its ASCII character. 
-X must be in the range of 0 to 257.
-It is used for cursor addressing and generating bel signals.
-.IP CINT(X) 25
-Converts X to an integer by rounding the fractional portion.
-If X is not in the range -32768 to 32767 an "Overflow"
-error occurs.
-.IP COS(X) 25
-Returns the cosine of X in radians.
-.IP CSNG(X) 25
-Converts X to a single precision number.
-.IP CVI(<2-bytes>) 25
-Convert two byte string value to integer number.
-.IP CVS(<4-bytes>) 25
-Convert four byte string value to single precision number.
-.IP CVD(<8-bytes>) 25
-Convert eight byte string value to double precision number.
-.IP EOF[(<file-number>)] 25
-Returns -1 (true) if the end of a sequential file has been reached.
-.IP EXP(X) 25
-Returns e(base of natural logarithm) to the power of X.
-X should be less then 10000.0.
-.IP FIX(X) 25
-Returns the truncated integer part of X. FIX(X) is
-equivalent to SGN(X)*INT(ABS(X)).
-The major difference between FIX and INT is that FIX does not
-return the next lower number for negative X.
-.IP HEX$(X) 25
-Returns the string which represents the hexadecimal value of
-the decimal argument. X is rounded to an integer using CINT
-before HEX$ is evaluated.
-.IP INT(X) 25
-Returns the largest integer <= X.
-.IP INP$(X[,[#]Y]) 25
-Returns the string of X characters read from the terminal or
-the designated file.
-.IP LEN(X$) 25
-Returns the number of characters in the string X$.
-Non printable and blancs are counted too.
-.IP LOC(<file\ number>) 25
-For sequential files LOC returns 
-position of the read/write head, counted in number of bytes.
-For random files the function returns the record number just
-read or written from a GET or PUT statement.
-If nothing was read or written 0 is returned.
-.IP LOG(X) 25
-Returns the natural logarithm of X. X must be greater than zero.
-.IP MID$(X,I,[J]) 25
-Returns first J characters from string X starting at position I in X.
-If J is omitted all characters starting of from position I in X are returned.
-.IP MKI$(X) 25
-Converts an integer expression to a two-byte string.
-.IP MKS$(X) 25
-Converts a single precision expression to a four-byte string.
-.IP MKD$(X) 25
-Converts a double precision expression to a eight-byte string.
-.IP OCT$(X) 25
-Returns the string which represents the octal value of the decimal
-argument. X is rounded to an integer using CINT before OCTS is evaluated.
-.IP PEEK(I) 25
-Returns the byte read from the indicated memory. (Of limited use
-in the context of ACK)
-.IP POS(I) 25
-Returns the current cursor position. To be implemented.
-.IP RIGHT$(X$,I)
-Returns the right most I characters of string X$.
-If I=0 then the empty string is returned.
-.IP RND(X) 25
-Returns a random number between 0 and 1. X is a dummy argument.
-.IP SGN(X) 25
-If X>0 , SGN(X) returns 1.
-.br
-if X=0, SGN(X) returns 0.
-.br
-if X<0, SGN(X) returns -1.
-.IP SIN(X) 25
-Returns the sine of X in radians.
-.IP SPACE$(X) 25
-Returns  a string of spaces length X. The expression
-X is rounded to an integer using CINT.
-.IP STR$(X)
-Returns the string representation value of X.
-.IP STRING$(I,J) 25
-Returns thes string of length Iwhose characters all
-have ASCII code J. (or first character when J is a string)
-.IP TAB(I) 25
-Spaces to position I on the terminal. If the current
-print position is already beyond space I,TAB
-goes to that position on the next line.
-Space 1 is leftmost position, and the rightmost position
-is width minus 1. To be used within PRINT statements only.
-.IP TAN(X) 25
-Returns the tangent of X in radians. If TAN overflows
-the "Overflow" message is displayed.
-.IP VAL(X$) 25
-Returns the numerical value of string X$.
-The VAL function strips leading blanks and tabs from the
-argument string.
-.bp
-.SH
-APPENDIX A DIFFERENCES WITH MICROSOFT BASIC
-.LP
-The following list of Microsoft commands and statements are
-not recognized by the compiler.
-.DS
-SPC
-USR
-VARPTR
-AUTO
-CHAIN
-CLEAR	
-CLOAD
-COMMON
-CONT
-CSAVE
-DELETE
-EDIT
-ERASE
-FRE
-KILL
-LIST
-LLIST
-LOAD
-LPRINT
-MERGE
-NAME
-NEW
-NULL
-RENUM
-RESUME
-RUN
-SAVE
-WAIT
-WIDTH LPRINT
-.DE
-Some statements are in the current implementation not available,
-but will be soon. These include:
-.DS
-CALL
-DEFUSR
-FIELD
-GET
-INKEY
-INPUT$
-INSTR$
-LEFT$
-LSET
-RSET
-PUT
-.DE
-.bp
-.SH
-APPENDIX B RESERVED WORDS IN BASIC-EM
-.LP
-The following list of words/symbols/names/identifiers are reserved, which
-means that they can not be used for variable-names.
-.DS
-ABS		AND		ASC		AS
-ATN		AUTO		BASE		CALL
-CDBL		CHAIN		CHR		CINT
-CLEAR		CLOAD	CLOSE		COMMON
-CONT		COS		CSNG		CSAVE
-CVI		CVS		CVD		DATA
-DEFINT	DEFSNG	DEFDBL	DEFSTR
-DEF		DELETE	DIM		EDIT
-ELSE		END		EOF		ERASE
-ERROR		ERR		ERL		ELSE
-EQV		EXP		FIELD		FIX
-FOR		FRE		GET		GOSUB
-GOTO		HEX		IF		IMP
-INKEY		INPUT		INP		INSTR
-INT		KILL		LEFT		LEN		
-LET		LINE		LIST		LLIST
-LOAD		LOC		LOG		LPOS
-LPRINT	LSET		MERGE	MID
-MKI		MKS		MKD		MOD
-NAME		NEW		NEXT		NOT		
-NULL		ON		OCT		OPEN
-OPTION	OR		OUT		PEEK
-POKE		PRINT		POS		PUT
-RANDOMIZE	READ		REM		RENUM
-REN		RESTORE	RESUME	RETURN
-RIGHT		RND		RUN		SAVE
-STEP		SGN		SIN		SPACE
-SPC		SQR		STOP		STRING
-STR		SWAP		TAB		TAN
-THEN		TO		TRON		TROFF
-USING		USR		VAL		VARPTR
-WAIT		WHILE		WEND		WIDTH
-WRITE		XOR
-.DE
--- a/doc/ceg/.distr
+++ b/doc/ceg/.distr
@ -1,3 +0,0 @@
-proto.make
-ceg.ref
-ceg.tr
--- a/doc/ceg/ceg.ref
+++ b/doc/ceg/ceg.ref
@ -1,42 +0,0 @@
-%T A Practical Toolkit For Making Compilers
-%A A.S. Tanenbaum
-%A H. v. Staveren
-%A E.G. Keizer
-%A J.W. Stevenson
-%J Communications of the ACM
-%V 26
-%N 9
-%D September 1983
-
-%T Description of a Machine Architecture for Use with Block Structured Languages
-%A A.S. Tanenbuum
-%A H. v. Staveren
-%A E.G. Keizer
-%A J.W. Stevenson
-%R IR-81
-%I Dept. Mathematics and Computer Science, Vrije Universiteit
-%C Amsterdam
-%D August 1983
-
-%T EM_CODE(3ACK)
-%A ACK Documentation
-%I Dept. Mathematics and Computer Science, Vrije Universiteit
-%C Amsterdam
-
-%T ACK.OUT(5ACK)
-%A ACK Documentation
-%I Dept. Mathematics and Computer Science, Vrije Universiteit
-%C Amsterdam
-%K aout
-
-%T PRINT(3ACK)
-%A ACK Documentation
-%I Dept. Mathematics and Computer Science, Vrije Universiteit
-%C Amsterdam
-
-%T The C Programming Language
-%A B.W. Kernighan
-%A D.M. Ritchie
-%I Prentice-Hall Inc.
-%C Englewood Cliffs, New Jersey
-%D 1978
--- a/doc/ceg/ceg.tr
+++ b/doc/ceg/ceg.tr
--- a/doc/ceg/proto.make
+++ b/doc/ceg/proto.make
@ -1,12 +0,0 @@
-# $Id$
-
-#PARAMS         do not remove this line!
-
-SRC_DIR = $(SRC_HOME)/doc/ceg
-
-PIC=pic
-TBL=tbl
-REFER=refer
-
-$(TARGET_HOME)/doc/ceg.doc:	$(SRC_DIR)/ceg.tr $(SRC_DIR)/ceg.ref
-	$(PIC) $(SRC_DIR)/ceg.tr | $(REFER) -e -p $(SRC_DIR)/ceg.ref | $(TBL) > $@
--- a/doc/cg.doc
+++ b/doc/cg.doc
--- a/doc/crefman.doc
+++ b/doc/crefman.doc
@ -1,629 +0,0 @@
-\." $Id$
-.\" eqn crefman.doc | troff -ms
-.EQ
-delim $$
-.EN
-.RP
-.TL
-ACK/CEM Compiler
-.br
-Reference Manual
-.AU
-Erik H. Baalbergen
-.AI
-Department of Mathematics and Computer Science
-Vrije Universiteit
-Amsterdam
-The Netherlands
-.AB no
-.AE
-.NH
-C Language
-.PP
-This section discusses the extensions to and deviations from the C language,
-as described in [1].
-The issues are numbered according to the reference manual.
-.SH
-2.2 Identifiers
-.PP
-Upper and lower case letters are different.
-The number of significant letters
-is 32 by default, but may be set to another value using the \fB\-M\fP option.
-The identifier length should be set according to the rest of the compilation
-programs.
-.SH
-2.3 Keywords
-.SH
-\f(CWasm\fP
-.PP
-The keyword \f(CWasm\fP
-is recognized.
-However, the statement
-.DS
-.ft CW
-asm(string);
-.ft R
-.DE
-is skipped, while a warning is given.
-.SH
-\f(CWenum\fP
-.PP
-The \f(CWenum\fP keyword is recognized and interpreted.
-.SH
-\f(CWentry\fP, \f(CWfortran\fP
-.PP
-The words \f(CWentry\fP and \f(CWfortran\fP
-are reserved under the restricted option.
-The words are not interpreted by the compiler.
-.SH
-2.4.1 Integer Constants
-.PP
-The type of an integer constant is the first of the corresponding list
-in which its value can be represented. Decimal: \f(CWint, long, unsigned long\fP;
-octal or hexadecimal: \f(CWint, unsigned, long, unsigned long\fP; suffixed by
-the letter L or l: \f(CWlong, unsigned long\fP.
-.SH
-2.4.3 Character Constants
-.PP
-A character constant is a sequence of 1 up to \f(CWsizeof(int)\fP characters
-enclosed in single quotes.
-The value of a character constant '$c sub 1 c sub 2 ... c sub n$'
-is $d sub n + M \(mu d sub {n - 1} + ... + M sup {n - 1} \(mu d sub 2 + M sup n \(mu d sub 1$,
-where M is 1 + maximum unsigned number representable in an \f(CWunsigned char\fP,
-and $d sub i$ is the signed value (ASCII)
-of character $c sub i$.
-.SH
-2.4.4 Floating Constants
-.PP
-The compiler does not support compile-time floating point arithmetic.
-.SH
-2.6 Hardware characteristics
-.PP
-The compiler is capable of producing EM code for machines with the following
-properties
-.IP \(bu
-a \f(CWchar\fP is 8 bits
-.IP \(bu
-the size of \f(CWint\fP is equal to the word size
-.IP \(bu
-the size of \f(CWshort\fP may not exceed the size of \f(CWint\fP
-.IP \(bu
-the size of \f(CWint\fP may not exceed the size of \f(CWlong\fP
-.IP \(bu
-the size of pointers is equal to the size of either \f(CWshort\fP, \f(CWint\fP
-or \f(CWlong\fP
-.LP
-.SH
-4 What's in a name?
-.SH
-\f(CWchar\fP
-.PP
-Objects of type \f(CWchar\fP are taken to be signed.
-The combination \f(CWunsigned char\fP is legal.
-.SH
-\f(CWunsigned\fP
-.PP
-The type combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP and
-\f(CWunsigned long\fP are supported.
-.SH
-\f(CWenum\fP
-.PP
-The data type \f(CWenum\fP is implemented as described 
-in \fIRecent Changes to C\fP (see appendix A).
-.I Cem
-treats enumeration variables as if they were \f(CWint\fP.
-.SH
-\f(CWvoid\fP
-.PP
-Type \f(CWvoid\fP is implemented.
-The type specifies an empty set of values, which takes no storage space.
-.SH
-\fRFundamental types\fP
-.PP
-The names of the fundamental types can be redefined by the user, using
-\f(CWtypedef\fP.
-.SH
-7 Expressions
-.PP
-The order of evaluation of expressions depends on the complexity of the
-subexpressions.
-In case of commutative operations, the most complex subexpression is
-evaluated first.
-Parameter lists are evaluated from right to left.
-.SH
-7.2 Unary operators
-.PP
-The type of a \f(CWsizeof\fP expression is \f(CWunsigned int\fP.
-.SH
-7.13 Conditional operator
-.PP
-Both the second and the third expression in a conditional expression may
-include assignment operators.
-They may be structs or unions.
-.SH
-7.14 Assignment operators
-.PP
-Structures may be assigned, passed as arguments to functions, and returned
-by functions.
-The types of operands taking part must be the same.
-.SH
-8.2 Type specifiers
-.PP
-The combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP
-and \f(CWunsigned long\fP are implemented.
-.SH
-8.5 Structure and union declarations
-.PP
-Fields of any integral type, either signed or unsigned,
-are supported, as long as the type fits in a word on the target machine.
-.PP
-Fields are left adjusted by default; the first field is put into the left
-part of a word, the next one on the right side of the first one, etc.
-The \f(CW-Vr\fP option in the call of the compiler
-causes fields to be right adjusted within a machine word.
-.PP
-The tags of structs and unions occupy a different name space from that of 
-variables and that of member names.
-.SH
-9.7 Switch statement
-.PP
-The type of \fIexpression\fP in
-.DS
-.ft CW
-\f(CWswitch (\fP\fIexpression\fP\f(CW)\fP \fIstatement\fP
-.ft
-.DE
-must be integral.
-A warning is given under the restricted option if the type is \f(CWlong\fP.
-.SH
-10 External definitions
-.PP
-See [4] for a discussion on this complicated issue.
-.SH
-10.1 External function definitions
-.PP
-Structures may be passed as arguments to functions, and returned
-by functions.
-.SH
-11.1 Lexical scope
-.PP
-Typedef names may be redeclared like any other variable name; the ice mentioned
-in \(sc11.1 is walked correctly.
-.SH
-12 Compiler control lines
-.PP
-Lines which do not occur within comment, and with \f(CW#\fP as first
-character, are interpreted as compiler control line.
-There may be an arbitrary number of spaces, tabs and comments (collectively
-referred as \fIwhite space\fP) following the \f(CW#\fP.
-Comments may contain newline characters.
-Control lines with only white space between the \f(CW#\fP and the line separator
-are skipped.
-.PP
-The #\f(CWinclude\fP, #\f(CWifdef\fP, #\f(CWifndef\fP, #\f(CWundef\fP, #\f(CWelse\fP and
-#\f(CWendif\fP control lines and line directives consist of a fixed number of
-arguments.
-The list of arguments may be followed an arbitrary sequence of characters,
-in which comment is interpreted as such.
-(I.e., the text between \f(CW/*\fP and \f(CW*/\fP is skipped, regardless of
-newlines; note that commented-out lines beginning with \f(CW#\fP are not
-considered to be control lines.)
-.SH
-12.1 Token replacement
-.PP
-The replacement text of macros is taken to be a string of characters, in which
-an identifier may stand for a formal parameter, and in which comment is
-interpreted as such.
-Comments and newline characters, preceeded by a backslash, in the replacement
-text are replaced by a space character.
-.PP
-The actual parameters of a macro are considered tokens and are
-balanced with regard to \f(CW()\fP, \f(CW{}\fP and \f(CW[]\fP.
-This prevents the use of macros like
-.DS
-.ft CW
-CTL([)
-.ft
-.DE
-.PP
-Formal parameters of a macro must have unique names within the formal-parameter
-list of that macro.
-.PP
-A message is given at the definition of a macro if the macro has 
-already been #\f(CWdefined\fP, while the number of formal parameters differ or
-the replacement texts are not equal (apart from leading and trailing
-white space).
-.PP
-Recursive use of macros is detected by the compiler.
-.PP
-Standard #\f(CWdefined\fP macros are
-.DS
-\f(CW__FILE__\fP  name of current input file as string constant
-\f(CW__DATE__\fP  curent date as string constant; e.g. \f(CW"Tue Wed  2 14:45:23 1986"\fP
-\f(CW__LINE__\fP  current line number as an integer
-.DE
-.PP
-No message is given if \fIidentifier\fP is not known in
-.DS
-.ft CW
-#undef \fIidentifier\fP
-.ft
-.DE
-.SH
-12.2 File inclusion
-.PP
-A newline character is appended to each file which is included.
-.SH
-12.3 Conditional compilation
-.PP
-The #\f(CWif\fP, #\f(CWifdef\fP and #\f(CWifndef\fP control lines may be followed
-by an arbitrary number of
-.DS
-.ft CW
-#elif \fIconstant-expression\fP
-.ft
-.DE
-control lines, before the corresponding #\f(CWelse\fP or #\f(CWendif\fP
-is encountered.
-The construct
-.DS
-.ft CW
-#elif \fIconstant-expression\fP
-some text
-#endif /* corresponding to #elif */
-.ft
-.DE
-is equivalent to
-.DS
-.ft CW
-#else
-#if \fIconstant-expression\fP
-some text
-#endif /* corresponding to #if */
-#endif /* corresponding to #else */
-.ft
-.DE
-.PP
-The \fIconstant-expression\fP in #\f(CWif\fP and #\f(CWelif\fP control lines
-may contain the construction
-.DS
-.ft CW
-defined(\fIidentifier\fP)
-.ft
-.DE
-which is replaced by \f(CW1\fP, if \fIidentifier\fP has been #\f(CWdefined\fP,
-and by \f(CW0\fP, if not.
-.PP
-Comments in skipped lines are interpreted as such.
-.SH
-12.4 Line control
-.PP
-Line directives may occur in the following forms:
-.DS
-.ft CW
-#line \fIconstant\fP
-#line \fIconstant\fP "\fIfilename\fP"
-#\fIconstant\fP
-#\fIconstant\fP "\fIfilename\fP"
-.ft
-.DE
-Note that \fIfilename\fP is enclosed in double quotes.
-.SH
-14.2 Functions
-.PP
-If a pointer to a function is called, the function the pointer points to
-is called instead.
-.SH
-15 Constant expressions
-.PP
-The compiler distinguishes the following types of integral constant expressions
-.IP \(bu
-field-width specifier
-.IP \(bu
-case-entry specifier
-.IP \(bu
-array-size specifier
-.IP \(bu
-global variable initialization value
-.IP \(bu
-enum-value specifier
-.IP \(bu
-truth value in \f(CW#if\fP control line
-.LP
-.PP
-Constant integral expressions are compile-time evaluated while an effort
-is made to report overflow.
-Constant floating expressions are not compile-time evaluated.
-.NH
-Compiler flags
-.IP \fB\-C\fR
-Run the preprocessor stand-alone while maintaining the comments.
-Line directives are produced whenever needed.
-.IP \fB\-D\fP\fIname\fP=\fIstring-of-characters\fP
-.br
-Define \fIname\fR as macro with \fIstring-of-characters\fR as
-replacement text.
-.IP \fB\-D\fP\fIname\fP
-.br
-Equal to \fB\-D\fP\fIname\fP\fB=1\fP.
-.IP \fB\-E\fP
-Run the preprocessor stand alone, i.e.,
-list the sequence of input tokens and delete any comments.
-Line directives are produced whenever needed.
-.IP \fB\-I\fIpath\fR
-.br
-Prepend \fIpath\fR to the list of include directories.
-To put the directories "include", "sys/h" and "util/h" into the
-include directory list in that order, the user has to specify
-.DS
-.ft CW
-Iinclude -Isys/h -Iutil/h
-.ft R
-.DE
-An empty \fIpath\fP causes the standard include
-directory (usually \f(CW/usr/include\fP) to be forgotten.
-.IP \fB\-M\fP\fIn\fP
-.br
-Set maximum significant identifier length to \fIn\fP.
-.IP \fB\-n\fP
-Suppress EM register messages.
-The user-declared variables are not stored into registers on the target
-machine.
-.IP \fB\-p\fP
-Generate the EM \fBfil\fP and \fBlin\fP instructions in order to enable
-an interpreter to keep track of the current location in the source code.
-.IP \fB\-P\fP
-Equivalent with \fB\-E\fP, but without line directives.
-.IP \fB\-R\fP
-Interpret the input as restricted C (according to the language as 
-described in [1]).
-.IP \fB\-T\fP\fIpath\fP
-.br
-Create temporary files, if necessary, in directory \fIpath\fP.
-.IP \fB\-U\fP\fIname\fP
-.br
-Get rid of the compiler-predefined macro \fIname\fP, i.e.,
-consider
-.DS
-.ft CW
-#undef \fIname\fP
-.ft R
-.DE
-to appear in the beginning of the file.
-.IP \fB\-V\fIcm\fR.\fIn\fR,\ \fB\-V\fIcm\fR.\fIncm\fR.\fIn\fR\ ...
-.br
-Set the size and alignment requirements.
-The letter \fIc\fR indicates the simple type, which is one of
-\fBs\fR(short), \fBi\fR(int), \fBl\fR(long), \fBf\fR(float), \fBd\fR(double)
-or \fBp\fR(pointer).
-If \fIc\fR is \fBS\fP or \fBU\fP, then \fIn\fP is taken to be the initial
-alignment of structs or unions, respectively.
-The effective alignment of a struct or union is the least common multiple
-of the initial struct/union alignment and the alignments of its members.
-The \fIm\fR parameter can be used to specify the length of the type (in bytes)
-and the \fIn\fR parameter for the alignment of that type.
-Absence of \fIm\fR or \fIn\fR causes the default value to be retained.
-To specify that the bitfields should be right adjusted instead of the
-default left adjustment, specify \fBr\fR as \fIc\fR parameter.
-.IP \fB\-w\fR
-Suppress warning messages
-.IP \fB\-\-\fIcharacter\fR
-.br
-Set debug-flag \fIcharacter\fP.
-This enables some special features offered by a debug and develop version of
-the compiler.
-Some particular flags may be recognized, others may have surprising effects.
-.RS
-.IP \fBd\fP
-Generate a dependency graph, reflecting the calling structure of functions.
-Lines of the form
-.DS
-.ft CW
-DFA: \fIcalling-function\fP: \fIcalled-function\fP
-.ft
-.DE
-are generated whenever a function call is encountered.
-.IP \fBf\fP
-Dump whole identifier table, including macros and reserved words.
-.IP \fBh\fP
-Supply hash-table statistics.
-.IP \fBi\fP
-Print names of included files.
-.IP \fBm\fP
-Supply statistics concerning the memory allocation.
-.IP \fBt\fP
-Dump table of identifiers.
-.IP \fBu\fP
-Generate extra statistics concerning the predefined types and identifiers.
-Works in combination with \fBf\fP or \fBt\fP.
-.IP \fBx\fP
-Print expression trees in human-readable format.
-.RE
-.LP
-.SH
-References
-.IP [1]
-Brian W. Kernighan, Dennis M. Ritchie,
-.I
-The C Programming Language
-.R
-.IP [2]
-L. Rosler,
-.I
-Draft Proposed Standard - Programming Language C,
-.R
-ANSI X3J11 Language Subcommittee
-.IP [3]
-Erik H. Baalbergen, Dick Grune, Maarten Waage,
-.I
-The CEM Compiler,
-.R
-Informatica Manual IM-4, Dept. of Mathematics and Computer Science, Vrije
-Universiteit, Amsterdam, The Netherlands
-.IP [4]
-Erik H. Baalbergen,
-.I
-Modeling global declarations in C,
-.R
-internal paper
-.LP
-.bp
-.SH
-Appendix A - Enumeration Type
-.PP
-The syntax is
-.sp
-.RS
-.I enum-specifier :
-.RS
-\&\f(CWenum\fP { \fIenum-list\fP }
-.br
-\&\f(CWenum\fP \fIidentifier\fP { \fIenum-list\fP }
-.br
-\&\f(CWenum\fP \fIidentifier\fP
-.RE
-.sp
-\&\fIenum-list\fP :
-.RS
-\&\fIenumerator\fP
-.br
-\&\fIenum-list\fP , \fIenumerator\fP
-.RE
-.sp
-\&\fIenumerator\fP :
-.RS
-\&\fIidentifier\fP
-.br
-\&\fIidentifier\fP = \fIconstant-expression\fP
-.RE
-.sp
-.RE
-The identifier has the same role as the structure tag in a struct specification.
-It names a particular enumeration type.
-.PP
-The identifiers in the enum-list are declared as constants, and may appear
-whenever constants are required.
-If no enumerators with
-.B = 
-appear, then the values of the constants begin at 0 and increase by 1 as the
-declaration is read from left to right.
-An enumerator with
-.B =
-gives the associated identifier the value indicated; subsequent identifiers 
-continue the progression from the assigned value.
-.PP
-Enumeration tags and constants must all be distinct, and, unlike structure
-tags and members, are drawn from the same set as ordinary identifiers.
-.PP
-Objects of a given enumeration type are regarded as having a type distinct
-from objects of all other types.
-.bp
-.SH
-Appendix B:  C grammar in LL(1) form
-.PP
-The \fBbold-faced\fP and \fIitalicized\fP tokens represent terminal symbols.
-.vs 16
-.nf
-\fBexternal definitions\fP
-program:  external-definition*
-external-definition:  ext-decl-specifiers [declarator [function  |  non-function]  |  '\fB;\fP']  |  asm-statement
-ext-decl-specifiers:  decl-specifiers?
-non-function:  initializer? ['\fB,\fP' init-declarator]* '\fB;\fP'
-function:  declaration* compound-statement
-.sp 1
-\fBdeclarations\fP
-declaration:  decl-specifiers init-declarator-list? '\fB;\fP'
-decl-specifiers:  other-specifier+ [single-type-specifier other-specifier*]?  |  single-type-specifier other-specifier*
-other-specifier:  \fBauto\fP  |  \fBstatic\fP  |  \fBextern\fP  |  \fBtypedef\fP  |  \fBregister\fP  |  \fBshort\fP  |  \fBlong\fP  |  \fBunsigned\fP
-type-specifier:  decl-specifiers
-single-type-specifier:  \fItype-identifier\fP  |  struct-or-union-specifier  |  enum-specifier
-init-declarator-list:  init-declarator ['\fB,\fP' init-declarator]*
-init-declarator:  declarator initializer?
-declarator:  primary-declarator ['\fB(\fP' formal-list ? '\fB)\fP'  |  arrayer]*  |  '\fB*\fP' declarator
-primary-declarator:  identifier  |  '\fB(\fP' declarator '\fB)\fP'
-arrayer:  '\fB[\fP' constant-expression? '\fB]\fP'
-formal-list:  formal ['\fB,\fP' formal]*
-formal:  identifier
-enum-specifier:  \fBenum\fP [enumerator-pack  |  identifier enumerator-pack?]
-enumerator-pack:  '\fB{\fP' enumerator ['\fB,\fP' enumerator]* '\fB,\fP'? '\fB}\fP'
-enumerator:  identifier ['\fB=\fP' constant-expression]?
-struct-or-union-specifier:  [ \fBstruct\fP  |  \fBunion\fP] [ struct-declaration-pack  |  identifier struct-declaration-pack?]
-struct-declaration-pack:  '\fB{\fP' struct-declaration+ '\fB}\fP'
-struct-declaration:  type-specifier struct-declarator-list '\fB;\fP'?
-struct-declarator-list:  struct-declarator ['\fB,\fP' struct-declarator]*
-struct-declarator:  declarator bit-expression?  |  bit-expression
-bit-expression:  '\fB:\fP' constant-expression
-initializer:  '\fB=\fP'? initial-value
-cast:  '\fB(\fP' type-specifier abstract-declarator '\fB)\fP'
-abstract-declarator:  primary-abstract-declarator ['\fB(\fP' '\fB)\fP'  |  arrayer]*  |  '\fB*\fP' abstract-declarator
-primary-abstract-declarator:  ['\fB(\fP' abstract-declarator '\fB)\fP']?
-.sp 1
-\fBstatements\fP
-statement:
-	 expression-statement
-	| label '\fB:\fP' statement
-	| compound-statement
-	| if-statement
-	| while-statement
-	| do-statement
-	| for-statement
-	| switch-statement
-	| case-statement
-	| default-statement
-	| break-statement
-	| continue-statement
-	| return-statement
-	| jump
-	| '\fB;\fP'
-	| asm-statement
-	;
-expression-statement:  expression '\fB;\fP'
-label:  identifier
-if-statement:  \fBif\fP '\fB(\fP' expression '\fB)\fP' statement [\fBelse\fP statement]?
-while-statement:  \fBwhile\fP '\fB(\fP' expression '\fB)\fP' statement
-do-statement:  \fBdo\fP statement \fBwhile\fP '\fB(\fP' expression '\fB)\fP' '\fB;\fP'
-for-statement:  \fBfor\fP '\fB(\fP' expression? '\fB;\fP' expression? '\fB;\fP' expression? '\fB)\fP' statement
-switch-statement:  \fBswitch\fP '\fB(\fP' expression '\fB)\fP' statement
-case-statement:  \fBcase\fP constant-expression '\fB:\fP' statement
-default-statement:  \fBdefault\fP '\fB:\fP' statement
-break-statement:  \fBbreak\fP '\fB;\fP'
-continue-statement:  \fBcontinue\fP '\fB;\fP'
-return-statement:  \fBreturn\fP expression? '\fB;\fP'
-jump:  \fBgoto\fP identifier '\fB;\fP'
-compound-statement:  '\fB{\fP' declaration* statement* '\fB}\fP'
-asm-statement:  \fBasm\fP '\fB(\fP' \fIstring\fP '\fB)\fP' '\fB;\fP'
-.sp 1
-\fBexpressions\fP
-initial-value:  assignment-expression  |  initial-value-pack
-initial-value-pack:  '\fB{\fP' initial-value-list '\fB}\fP'
-initial-value-list:  initial-value ['\fB,\fP' initial-value]* '\fB,\fP'?
-primary:  \fIidentifier\fP  |  constant  |  \fIstring\fP  |  '\fB(\fP' expression '\fB)\fP'
-secundary:  primary [index-pack  |  parameter-pack  |  selection]*
-index-pack:  '\fB[\fP' expression '\fB]\fP'
-parameter-pack:  '\fB(\fP' parameter-list? '\fB)\fP'
-selection:  ['\fB.\fP'  |  '\fB\->\fP'] identifier
-parameter-list:  assignment-expression ['\fB,\fP' assignment-expression]*
-postfixed:  secundary postop?
-unary:  cast unary  |  postfixed  |  unop unary  |  size-of
-size-of:  \fBsizeof\fP [cast  |  unary]
-binary-expression:  unary [binop binary-expression]*
-conditional-expression:  binary-expression ['\fB?\fP' expression '\fB:\fP' assignment-expression]?
-assignment-expression:  conditional-expression [asgnop assignment-expression]?
-expression:  assignment-expression ['\fB,\fP' assignment-expression]*
-unop:  '\fB*\fP'  |  '\fB&\fP'  |  '\fB\-\fP'  |  '\fB!\fP'  |  '\fB~ \fP'  |  '\fB++\fP'  |  '\fB\-\-\fP'
-postop:  '\fB++\fP'  |  '\fB\-\-\fP'
-multop:  '\fB*\fP'  |  '\fB/\fP'  |  '\fB%\fP'
-addop:  '\fB+\fP'  |  '\fB\-\fP'
-shiftop:  '\fB<<\fP'  |  '\fB>>\fP'
-relop:  '\fB<\fP'  |  '\fB>\fP'  |  '\fB<=\fP'  |  '\fB>=\fP'
-eqop:  '\fB==\fP'  |  '\fB!=\fP'
-arithop:  multop  |  addop  |  shiftop  |  '\fB&\fP'  |  '\fB^ \fP'  |  '\fB|\fP'
-binop:  arithop  |  relop  |  eqop  |  '\fB&&\fP'  |  '\fB||\fP'
-asgnop:  '\fB=\fP'  |  '\fB+\fP' '\fB=\fP'  |  '\fB\-\fP' '\fB=\fP'  |  '\fB*\fP' '\fB=\fP'  |  '\fB/\fP' '\fB=\fP'  |  '\fB%\fP' '\fB=\fP'
-	| '\fB<<\fP' '\fB=\fP'  |  '\fB>>\fP' '\fB=\fP'  |  '\fB&\fP' '\fB=\fP'  |  '\fB^ \fP' '\fB=\fP'  |  '\fB|\fP' '\fB=\fP'
-	| '\fB+=\fP'  |  '\fB\-=\fP'  |  '\fB*=\fP'  |  '\fB/=\fP'  |  '\fB%=\fP'
-	| '\fB<<=\fP'  |  '\fB>>=\fP'  |  '\fB&=\fP'  |  '\fB^=\fP'  |  '\fB|=\fP'
-constant:  \fIinteger\fP  |  \fIfloating\fP
-constant-expression:  assignment-expression
-identifier:  \fIidentifier\fP  |  \fItype-identifier\fP
-.fi
--- a/doc/ego/.distr
+++ b/doc/ego/.distr
@ -1,18 +0,0 @@
-proto.make
-bo
-ca
-cf
-cj
-cs
-ic
-il
-intro
-lv
-ov
-ra
-refs.gen
-refs.opt
-refs.stat
-sp
-sr
-ud
--- a/doc/ego/bo/.distr
+++ b/doc/ego/bo/.distr
@ -1 +0,0 @@
-bo1
--- a/doc/ego/bo/bo1
+++ b/doc/ego/bo/bo1
@ -1,162 +0,0 @@
-.bp
-.NH 1
-Branch Optimization
-.NH 2
-Introduction
-.PP
-The Branch Optimization phase (BO) performs two related
-(branch) optimizations.
-.NH 3
-Fusion of basic blocks
-.PP
-If two basic blocks B1 and B2 have the following properties:
-.DS
-SUCC(B1) = {B2}
-PRED(B2) = {B1}
-.DE
-then B1 and B2 can be combined into one basic block.
-If B1 ends in an unconditional jump to the beginning of B2, this
-jump can be eliminated,
-hence saving a little execution time and object code size.
-This technique can be used to eliminate some deficiencies
-introduced by the front ends (for example, the "C" front end
-translates switch statements inefficiently due to its one pass nature).
-.NH 3
-While-loop optimization
-.PP
-The straightforward way to translate a while loop is to
-put the test for loop termination at the beginning of the loop.
-.DS
-while cond loop                       \kyLAB1: \kxTest cond
-   body of the loop     --->\h'|\nxu'Branch On False To LAB2
-end loop\h'|\nxu'code for body of loop
-\h'|\nxu'Branch To LAB1
-\h'|\nyu'LAB2:
-
-Fig. 10.1 Example of Branch Optimization
-.DE
-If the condition fails at the Nth iteration, the following code
-gets executed (dynamically):
-.DS
-.TS
-l l l.
-N	*	conditional branch (which fails N-1 times)
-N-1	*	unconditional branch
-N-1	*	body of the loop
-.TE
-.DE
-An alternative translation is:
-.DS
-     Branch To LAB2
-LAB1:
-     code for body of loop
-LAB2:
-     Test cond
-     Branch On True To LAB1
-.DE
-This translation results in the following profile:
-.DS
-.TS
-l l l.
-N	*	conditional branch (which succeeds N-1 times)
-1	*	unconditional branch
-N-1	*	body of the loop
-.TE
-.DE
-So the second translation will be significantly faster if N >> 2.
-If N=2, execution time will be slightly increased.
-On the average, the program will be speeded up.
-Note that the code sizes of the two translations will be the same.
-.NH 2
-Implementation
-.PP
-The basic block fusion technique is implemented
-by traversing the control flow graph of a procedure,
-looking for basic blocks B with only one successor (S).
-If one is found, it is checked if S has only one predecessor
-(which has to be B).
-If so, the two basic blocks can in principle be combined.
-However, as one basic block will have to be moved,
-the textual order of the basic blocks will be altered.
-This reordering causes severe problems in the presence
-of conditional jumps.
-For example, if S ends in a conditional branch,
-the basic block that comes textually next to S must stay
-in that position.
-So the transformation in Fig. 10.2 is illegal.
-.DS
-.TS
-l l l l l.
-LAB1:	S1		LAB1:	S1
-	BRA LAB2			S2
-	...	-->		BEQ LAB3
-LAB2:	S2			...
-	BEQ LAB3			S3
-	S3
-.TE
-
-Fig. 10.2 An illegal transformation of Branch Optimization
-.DE
-If B is moved towards S the same problem occurs if the block before B
-ends in a conditional jump.
-The problem could be solved by adding one extra branch,
-but this would reduce the gains of the optimization to zero.
-Hence the optimization will only be done if the block that
-follows S (in the textual order) is not a successor of S.
-This condition assures that S does not end in a conditional branch.
-The condition always holds for the code generated by the "C"
-front end for a switch statement.
-.PP
-After the transformation has been performed,
-some attributes of the basic blocks involved (such as successor and
-predecessor sets and immediate dominator) must be recomputed.
-.PP
-The while-loop technique is applied to one loop at a time.
-The list of basic blocks of the loop is traversed to find
-a block B that satisfies the following conditions:
-.IP 1.
-the textually next block to B is not part of the loop
-.IP 2.
-the last instruction of B is an unconditional branch;
-hence B has only one successor, say S
-.IP 3.
-the textually next block of B is a successor of S
-.IP 4.
-the last instruction of S is a conditional branch
-.LP
-If such a block B is found, the control flow graph is changed
-as depicted in Fig. 10.3.
-.DS
-.ft 5
-       |                                    |
-       |                                    v
-       v                                    |
-       |-----<------|                       ----->-----|
-   ____|____        |                                  |
-   |       |        |               |-------|          |
-   |  S1   |        |               |       v          |
-   |  Bcc  |        |               |     ....         |
-|--|       |        |               |                  |
-|  ---------        |               |   ----|----      |
-|                   |               |   |       |      |
-|     ....          ^               |   |  S2   |      |
-|                   |               |   |       |      |
-|   ---------       |               |   |       |      |
-v   |       |       |               ^   ---------      |
-|   |  S2   |       |               |       |          |
-|   | BRA   |       |               |       |-----<-----
-|   |       |       |               |       v
-|   ---------       |               |   ____|____
-|       |           |               |   |       |
-|       ------>------               |   |  S1   |
-|                                   |   |  Bnn  |
-|-------|                           |   |       |
-        |                           |   ----|----
-        v                           |       |
-                                    |----<--|
-                                            |
-                                            v
-.ft R
-
-Fig. 10.3 Transformation of the CFG by Branch Optimization
-.DE
--- a/doc/ego/ca/.distr
+++ b/doc/ego/ca/.distr
@ -1 +0,0 @@
-ca1
--- a/doc/ego/ca/ca1
+++ b/doc/ego/ca/ca1
@ -1,65 +0,0 @@
-.bp
-.NH 1
-Compact assembly generation
-.NH 2
-Introduction
-.PP
-The "Compact Assembly generation phase" (CA) transforms the
-intermediate code of the optimizer into EM code in
-Compact Assembly Language (CAL) format.
-In the intermediate code, all program entities
-(such as procedures, labels, global variables)
-are denoted by a unique identifying number (see 3.5).
-In the CAL output of the optimizer these numbers have to
-be replaced by normal identifiers (strings).
-The original identifiers of the input program are used whenever possible.
-Recall that the IC phase generates two files that can be
-used to map unique identifying numbers to procedure names and
-global variable names.
-For instruction labels CA always generates new names.
-The reasons for doing so are:
-.IP -
-instruction labels are only visible inside one procedure, so they can
-not be referenced in other modules
-.IP -
-the names are not very suggestive anyway, as they must be integer numbers
-.IP -
-the optimizer considerably changes the control structure of the program,
-so there is really no one to one mapping of instruction labels in
-the input and the output program.
-.LP
-As the optimizer combines all input modules into one module,
-visibility problems may occur.
-Two modules M1 and M2 can both define an identifier X (provided that
-X is not externally visible in any of these modules).
-If M1 and M2 are combined into one module M, two distinct
-entities with the same name would exist in M, which
-is not allowed.
-.[~[
-tanenbaum machine architecture
-.], section 11.1.4.3]
-In these cases, CA invents a new unique name for one of the entities.
-.NH 2
-Implementation
-.PP
-CA first reads the files containing the procedure and global variable names
-and stores the names in two tables.
-It scans these tables to make sure that all names are different.
-Subsequently it reads the EM text, one procedure at a time,
-and outputs it in CAL format.
-The major part of the code that does the latter transformation
-is adapted from the EM Peephole Optimizer.
-.PP
-The main problem of the implementation of CA is to
-assure that the visibility rules are obeyed.
-If an identifier must be externally visible (i.e.
-it was externally visible in the input program)
-and the identifier is defined (in the output program) before
-being referenced,
-an EXA or EXP pseudo must be generated for it.
-(Note that the optimizer may change the order of definitions and
-references, so some pseudos may be needed that were not
-present in the input program).
-On the other hand, an identifier may be only internally visible.
-If such an identifier is referenced before being defined,
-an INA or INP pseudo must be emitted prior to its first reference.
--- a/doc/ego/cf/.distr
+++ b/doc/ego/cf/.distr
@ -1,6 +0,0 @@
-cf1
-cf2
-cf3
-cf4
-cf5
-cf6
--- a/doc/ego/cf/cf1
+++ b/doc/ego/cf/cf1
@ -1,94 +0,0 @@
-.bp
-.NH
-The Control Flow Phase
-.PP
-In the previous chapter we described the intermediate
-code of the global optimizer.
-We also specified which part of this code
-was constructed by the IC phase of the optimizer.
-The Control Flow Phase (\fICF\fR) does
-the remainder of the job,
-i.e. it determines:
-.IP -
-the control flow graphs
-.IP -
-the loop tables
-.IP -
-the calling, change and use attributes of
-the procedure table entries
-.LP
-CF operates on one procedure at a time.
-For every procedure it first reads the EM instructions
-from the EM-text file and groups them into basic blocks.
-For every basic block, its successors and
-predecessors are determined,
-resulting in the control flow graph.
-Next, the immediate dominator of every basic block
-is computed.
-Using these dominators, any loop in the
-procedure is detected.
-Finally, interprocedural analysis is done,
-after which we will know the global effects of
-every procedure call on its environment.
-.sp
-CF uses the same internal data structures
-for the procedure table and object table as IC.
-.NH 2
-Partitioning into basic blocks
-.PP
-With regard to flow of control, we distinguish
-three kinds of EM instructions:
-jump instructions, instruction label definitions and
-normal instructions.
-Jump instructions are all conditional or unconditional
-branch instructions,
-the case instructions (CSA/CSB)
-and the RET (return) instruction.
-A procedure call (CAL) is not considered to be a jump.
-A defining occurrence of an instruction label
-is regarded as an EM instruction.
-.PP
-An instruction starts
-a new basic block, in any of the following cases:
-.IP 1.
-It is the first instruction of a procedure
-.IP 2.
-It is the first of a list of instruction label
-defining occurrences
-.IP 3.
-It follows a jump
-.LP
-If there are several consecutive instruction labels
-(which is highly unusual),
-all of them are put in the same basic block.
-Note that several cases may overlap,
-e.g. a label definition at the beginning of a procedure
-or a label following a jump.
-.PP
-A simple Finite State Machine is used to model
-the above rules.
-It also recognizes the end of a procedure,
-marked by an END pseudo.
-The basic blocks are stored internally as a doubly linked
-linear list.
-The blocks are linked in textual order.
-Every node of this list has the attributes described
-in the previous chapter (see syntax rule for
-basic_block).
-Furthermore, every node contains a pointer to its
-EM instructions,
-which are represented internally
-as a linear, doubly linked list,
-just as in the IC phase.
-However, instead of one list per procedure (as in IC)
-there is now one list per basic block.
-.PP
-On the fly, a table is build that maps
-every label identifier to the label definition
-instruction.
-This table is used for computing the control flow.
-The table is stored as a dynamically allocated array.
-The length of the array is the number of labels
-of the current procedure;
-this value can be found in the procedure table,
-where it was stored by IC.
--- a/doc/ego/cf/cf2
+++ b/doc/ego/cf/cf2
@ -1,50 +0,0 @@
-.NH 2
-Control Flow
-.PP
-A \fIsuccessor\fR of a basic block B is a block C
-that can be executed immediately after B.
-C is said to be a \fIpredecessor\fR of B.
-A block ending with a RET instruction
-has no successors.
-Such a block is called a \fIreturn block\fR.
-Any block that has no predecessors cannot be
-executed at all (i.e. it is unreachable),
-unless it is the first block of a procedure,
-called the \fIprocedure entry block\fR.
-.PP
-Internally, the successor and predecessor
-attributes of a basic block are stored as \fIsets\fR.
-Alternatively, one may regard all these
-sets of all basic blocks as a conceptual \fIgraph\fR,
-in which there is an edge from B to C if C
-is in the successor set of B.
-We call this conceptual graph
-the \fIControl Flow Graph\fR.
-.PP
-The only successor of a basic block ending on an
-unconditional branch instruction is the block that
-contains the label definition of the target of the jump.
-The target instruction can be found via the LAB_ID
-that is the operand of the jump instruction,
-by using the label-map table mentioned
-above.
-If the last instruction of a block is a
-conditional jump,
-the successors are the target block and the textually
-next block.
-The last instruction can also be a case jump
-instruction (CSA or CSB).
-We then analyze the case descriptor,
-to find all possible target instructions
-and their associated blocks.
-We require the case descriptor to be allocated in
-a ROM, so it cannot be changed dynamically.
-A case jump via an alterable descriptor could in principle
-go to any label in the program.
-In the presence of such an uncontrolled jump,
-hardly any optimization can be done.
-We do not expect any front end to generate such a descriptor,
-however, because of the controlled nature
-of case statements in high level languages.
-If the basic block does not end in a jump instruction,
-its only successor is the textually next block.
--- a/doc/ego/cf/cf3
+++ b/doc/ego/cf/cf3
@ -1,53 +0,0 @@
-.NH 2
-Immediate dominators
-.PP
-A basic block B dominates a block C if every path
-in the control flow graph from the procedure entry block
-to C goes through B.
-The immediate dominator of C is the closest dominator
-of C on any path from the entry block.
-See also
-.[~[
-aho compiler design
-.], section 13.1.]
-.PP
-There are a number of algorithms to compute
-the immediate dominator relation.
-.IP 1.
-Purdom and Moore give an algorithm that is
-easy to program and easy to describe (although the
-description they give is unreadable;
-it is given in a very messy Algol60 program full of gotos).
-.[
-predominators 
-.]
-.IP 2.
-Aho and Ullman present a bitvector algorithm, which is also
-easy to program and to understand.
-(See 
-.[~[
-aho compiler design
-.], section 13.1.]).
-.IP 3
-Lengauer and Tarjan introduce a fast algorithm that is
-hard to understand, yet remarkably easy to implement.
-.[
-lengauer dominators
-.]
-.LP
-The Purdom-Moore algorithm is very slow if the
-number of basic blocks in the flow graph is large.
-The Aho-Ullman algorithm in fact computes the
-dominator relation,
-from which the immediate dominator relation can be computed
-in time quadratic to the number of basic blocks, worst case.
-The storage requirement is also quadratic to the number
-of blocks.
-The running time of the third algorithm is proportional
-to:
-.DS
-(number of edges in the graph) * log(number of blocks).
-.DE
-We have chosen this algorithm because it is fast
-(as shown by experiments done by Lengauer and Tarjan),
-it is easy to program and requires little data space.
--- a/doc/ego/cf/cf4
+++ b/doc/ego/cf/cf4
@ -1,93 +0,0 @@
-.NH 2
-Loop detection
-.PP
-Loops are detected by using the loop construction
-algorithm of.
-.[~[
-aho compiler design
-.], section 13.1.]
-This algorithm uses \fIback edges\fR.
-A back edge is an edge from B to C in the CFG,
-whose head (C) dominates its tail (B).
-The loop associated with this back edge
-consists of C plus all nodes in the CFG
-that can reach B without going through C.
-.PP
-As an example of how the algorithm works,
-consider the piece of program of Fig. 4.1.
-First just look at the program and try to
-see what part of the code constitutes the loop.
-.DS
-loop
-   if cond then                       1
-      -- lots of simple
-      -- assignment
-      -- statements              2          3
-      exit; -- exit loop
-   else
-      S; -- one statement
-   end if;
-end loop;
-
-Fig. 4.1 A misleading loop
-.DE
-Although a human being may be easily deceived
-by the brackets "loop" and "end loop",
-the loop detection algorithm will correctly
-reply that only the test for "cond" and
-the single statement in the false-part
-of the if statement are part of the loop!
-The statements in the true-part only get
-executed once, so there really is no reason at all
-to say they're part of the loop too.
-The CFG contains one back edge, "3->1".
-As node 3 cannot be reached from node 2,
-the latter node is not part of the loop.
-.PP
-A source of problems with the algorithm is the fact
-that different back edges may result in
-the same loop.
-Such an ill-structured loop is
-called a \fImessy\fR loop.
-After a loop has been constructed, it is checked
-if it is really a new loop.
-.PP
-Loops can partly overlap, without one being nested
-inside the other.
-This is the case in the program of Fig. 4.2.
-.DS
-1:                              1
-   S1;
-2:
-   S2;                          2
-   if cond then
-      goto 4;
-   S3;                     3         4
-   goto 1;
-4:
-   S4;
-   goto 1;
-
-Fig. 4.2 Partly overlapping loops
-.DE
-There are two back edges "3->1" and "4->1",
-resulting in the loops {1,2,3} and {1,2,4}.
-With every basic block we associate a set of
-all loops it is part of.
-It is not sufficient just to record its
-most enclosing loop.
-.PP
-After all loops of a procedure are detected, we determine
-the nesting level of every loop.
-Finally, we find all strong and firm blocks of the loop.
-If the loop has only one back edge (i.e. it is not messy),
-the set of firm blocks consists of the
-head of this back edge and its dominators
-in the loop (including the loop entry block).
-A firm block is also strong if it is not a
-successor of a block that may exit the loop;
-a block may exit a loop if it has an (immediate) successor
-that is not part of the loop.
-For messy loops we do not determine the strong
-and firm blocks. These loops are expected
-to occur very rarely.
--- a/doc/ego/cf/cf5
+++ b/doc/ego/cf/cf5
@ -1,82 +0,0 @@
-.NH 2
-Interprocedural analysis
-.PP
-It is often desirable to know the effects
-a procedure call may have.
-The optimization below is only possible if
-we know for sure that the call to P cannot
-change A.
-.DS
-.TS
-l l.
-A := 10;	A:= 10;
-P;  -- procedure call    -->	P;
-B := A + 2;	B := 12;
-.TE
-.DE
-Although it is not possible to predict exactly
-all the effects a procedure call has, we may
-determine a kind of upper bound for it.
-So we compute all variables that may be
-changed by P, although they need not be
-changed at every invocation of P.
-We can get hold of this set by just looking
-at all assignment (store) instructions
-in the body of P.
-EM also has a set of \fIindirect\fR assignment
-instructions,
-i.e. assignment through a pointer variable.
-In general, it is not possible to determine
-which variable is affected by such an assignment.
-In these cases, we just record the fact that P
-does an indirect assignment.
-Note that this does not mean that all variables
-are potentially affected, as the front ends
-may generate messages telling that certain
-variables can never be accessed indirectly.
-We also set a flag if P does a use (load) indirect.
-Note that we only have to look at \fIglobal\fR
-variables.
-If P changes or uses any of its locals,
-this has no effect on its environment.
-Local variables of a lexically enclosing
-procedure can only be accessed indirectly.
-.PP
-A procedure P may of course call another procedure.
-To determine the effects of a call to P,
-we also must know the effects of a call to the second procedure.
-This second one may call a third one, and so on.
-Effectively, we need to compute the \fItransitive closure\fR
-of the effects.
-To do this, we determine for every procedure
-which other procedures it calls.
-This set is the "calling" attribute of a procedure.
-One may regard all these sets as a conceptual graph,
-in which there is an edge from P to Q
-if Q is in the calling set of P. This graph will
-be referred to as the \fIcall graph\fR.
-(Note the resemblance with the control flow graph).
-.PP
-We can detect which procedures are called by P
-by looking at all CAL instructions in its body.
-Unfortunately, a procedure may also be
-called indirectly, via a CAI instruction.
-Yet, only procedures that are used as operand of an LPI
-instruction can be called indirect,
-because this is the only way to take the address of a procedure.
-We determine for every procedure whether it does
-a CAI instruction.
-We also build a set of all procedures used as
-operand of an LPI.
-.sp
-After all procedures have been processed (i.e. all CFGs
-are constructed, all loops are detected,
-all procedures are analyzed to see which variables
-they may change, which procedures they call,
-whether they do a CAI or are used in an LPI) the
-transitive closure of all interprocedural
-information is computed.
-During the same process,
-the calling set of every procedure that uses a CAI
-is extended with the above mentioned set of all
-procedures that can be called indirect.
--- a/doc/ego/cf/cf6
+++ b/doc/ego/cf/cf6
@ -1,21 +0,0 @@
-.NH 2
-Source files
-.PP
-The sources of CF are in the following files and packages:
-.IP cf.h: 14
-declarations of global variables and data structures
-.IP cf.c:
-the routine main; interprocedural analysis;
-transitive closure
-.IP succ:
-control flow (successor and predecessor)
-.IP idom:
-immediate dominators
-.IP loop:
-loop detection
-.IP get:
-read object and procedure table;
-read EM text and partition it into basic blocks
-.IP put:
-write tables, CFGs and EM text
-.LP
--- a/doc/ego/cj/.distr
+++ b/doc/ego/cj/.distr
@ -1 +0,0 @@
-cj1
--- a/doc/ego/cj/cj1
+++ b/doc/ego/cj/cj1
@ -1,144 +0,0 @@
-.bp
-.NH 1
-Cross jumping
-.NH 2
-Introduction
-.PP
-The "Cross Jumping" optimization technique (CJ)
-.[
-wulf design optimizing compiler
-.]
-is basically a space optimization technique. It looks for pairs of
-basic blocks (B1,B2), for which:
-.DS
-SUCC(B1) = SUCC(B2) = {S}
-.DE
-(So B1 and B2 both have one and the same successor).
-If the last few non-branch instructions are the same for B1 and B2,
-one such sequence can be eliminated.
-.DS
-Pascal:
-
-if cond then
-    S1
-    S3
-else
-    S2
-    S3
-
-(pseudo) EM:
-.TS
-l l l.
- TEST COND		 TEST COND
- BNE *1		 BNE *1
- S1		 S1
- S3	--->	 BRA *2
- BRA *2		1:
-1:		 S2
- S2		2:
- S3		 S3
-2:
-.TE
-
-Fig. 9.1 An example of Cross Jumping
-.DE
-As the basic blocks have the same successor,
-at least one of them ends in an unconditional branch instruction (BRA).
-Hence no extra branch instruction is ever needed, just the target
-of an existing branch needs to be changed; neither the program size
-nor the execution time will ever increase.
-In general, the execution time will remain the same, unless
-further optimizations can be applied because of this optimization.
-.PP
-This optimization is particularly effective,
-because it cannot always be done by the programmer at the source level,
-as demonstrated by the Fig. 8.2.
-.DS
-	Pascal:
-
-if cond then
-   x := f(4)
-else
-   x := g(5)
-
-
-EM:
-
-.TS
-l l.
-...	...
-LOC 4	LOC 5
-CAL F	CAL G
-ASP 2	ASP 2
-LFR 2	LFR 2
-STL X	STL X
-.TE
-
-Fig. 9.2 Effectiveness of Cross Jumping
-.DE
-At the source level there is no common tail,
-but at the EM level there is a common tail.
-.NH 2
-Implementation
-.PP
-The implementation of cross jumping is rather straightforward.
-The technique is applied to one procedure at a time.
-The control flow graph of the procedure 
-is scanned for pairs of basic blocks
-with the same (single) successor and with common tails.
-Note that there may be more than two such blocks (e.g. as the result
-of a case statement).
-This is dealt with by repeating the entire process until no
-further optimizations can de done for the current procedure.
-.sp
-If a suitable pair of basic blocks has been found, the control flow
-graph must be altered. One of the basic
-blocks must be split into two.
-The control flow graphs before and after the optimization are shown
-in Fig. 9.3 and Fig. 9.4.
-.DS
-.ft 5
-
-        --------                                --------
-        |      |                                |      |
-        | S1   |                                | S2   |
-        | S3   |                                | S3   |
-        |      |                                |      |
-        --------                                --------
-           |                                       |
-           |------------------|--------------------|
-                              |
-                              v
-.ft R
-
-Fig. 9.3 CFG before optimization
-.DE
-.DS
-.ft 5
-        --------                                --------
-        |      |                                |      |
-        | S1   |                                | S2   |
-        |      |                                |      |
-        --------                                --------
-           |                                       |
-           |--------------------<------------------|
-           v
-        --------
-        |      |
-        | S3   |
-        |      |
-        --------
-           |
-           v
-.ft R
-
-Fig. 9.4 CFG after optimization
-.DE
-Some attributes of the three resulting blocks (such as immediate dominator)
-are updated.
-.PP
-In some cases, cross jumping might split the computation of an expression
-into two, by inserting a branch somewhere in the middle.
-Most code generators will generate very poor assembly code when
-presented with such EM code. 
-Therefor, cross jumping is not performed in these cases.
--- a/doc/ego/cs/.distr
+++ b/doc/ego/cs/.distr
@ -1,5 +0,0 @@
-cs1
-cs2
-cs3
-cs4
-cs5
--- a/doc/ego/cs/cs1
+++ b/doc/ego/cs/cs1
@ -1,45 +0,0 @@
-.bp
-.NH 1
-Common subexpression elimination
-.NH 2
-Introduction
-.PP
-The Common Subexpression Elimination optimization technique (CS)
-tries to eliminate multiple computations of EM expressions
-that yield the same result.
-It places the result of one such computation
-in a temporary variable,
-and replaces the other computations by a reference
-to this temporary variable.
-The primary goal of this technique is to decrease
-the execution time of the program,
-but in general it will save space too.
-.PP
-As an example of the application of Common Subexpression Elimination,
-consider the piece of program in Fig. 7.1(a).
-.DS
-.TS
-l l l.
-x := a * b;	TMP := a * b;	x := a * b;
-CODE;	x := TMP;	CODE
-y := c + a * b;	CODE	y := x;
-	y := c + TMP;
-
-   (a)	   (b)	   (c)
-.TE
-
-Fig. 7.1  Examples of Common Subexpression Elimination
-.DE
-If neither a nor b is changed in CODE,
-the instructions can be replaced by those of Fig. 7.1(b),
-which saves one multiplication,
-but costs an extra store instruction.
-If the value of x is not changed in CODE either,
-the instructions can be replaced by those of Fig. 7.1(c).
-In this case
-the extra store is not needed.
-.PP
-In the following sections we will describe
-which transformations are done
-by CS and how this phase
-was implemented.
--- a/doc/ego/cs/cs2
+++ b/doc/ego/cs/cs2
@ -1,86 +0,0 @@
-.NH 2
-Specification of the Common Subexpression Elimination phase
-.PP
-In this section we will describe
-the window
-through which CS examines the code,
-the expressions recognized by CS,
-and finally the changes made to the code.
-.NH 3
-The working window
-.PP
-The CS algorithm is applied to the
-largest sequence of textually adjacent basic blocks
-B1,..,Bn, for which
-.DS
-PRED(Bj) = {Bj-1},  j = 2,..,n.
-.DE
-Intuitively, this window consists of straight line code,
-with only one entry point (at the beginning); it may
-contain jumps, which should all have their targets outside the window.
-This is illustrated in Fig. 7.2.
-.DS
-x := a * b;	(1)
-if x < 10 then	(2)
-    y := a * b;	(3)
-
-Fig. 7.2 The working window of CS
-.DE
-Line (2) can only be executed after line (1).
-Likewise, line (3) can only be executed after
-line (2).
-Both a and b have the same values at line (1) and at line (3).
-.PP
-Larger windows were avoided.
-In Fig. 7.3, the value of a at line (4) may have been obtained
-at more than one point.
-.DS
-x := a * b;	(1)
-if x < 10 then	(2)
-    a := 100;	(3)
-y := a * b;	(4)
-
-Fig. 7.3 Several working windows
-.DE
-.NH 3
-Recognized expressions.
-.PP
-The computations eliminated by CS need not be normal expressions
-(like "a * b"),
-but can even consist of a single operand that is expensive to access,
-such as an array element or a record field.
-If an array element is used,
-its address is computed implicitly.
-CS is able to eliminate either the element itself or its
-address, whichever one is most profitable.
-A variable of a textually enclosing procedure may also be
-expensive to access, depending on the lexical level difference.
-.NH 3
-Transformations
-.PP
-CS creates a new temporary local variable (TMP)
-for every eliminated expression,
-unless it is able to use an existing local variable.
-It emits code to initialize this variable with the
-result of the expression.
-Most recurrences of the expression
-can simply be replaced by a reference to TMP.
-If the address of an array element is recognized as
-a common subexpression,
-references to the element itself are replaced by
-indirect references through TMP (see Fig. 7.4).
-.DS
-.TS
-l l l.
-x := A[i];		TMP := &A[i];
-  . . .	-->	x := *TMP;
-A[i] := y;		   . . .
-			*TMP := y;
-.TE
-
-Fig. 7.4 Elimination of an array address computation
-.DE
-Here, '&' is the 'address of' operator,
-and unary '*' is the indirection operator.
-(Note that EM actually has different instructions to do
-a use-indirect or an assign-indirect.)
--- a/doc/ego/cs/cs3
+++ b/doc/ego/cs/cs3
@ -1,250 +0,0 @@
-.NH 2
-Implementation
-.PP
-.NH 3
-The value number method
-.PP
-To determine whether two expressions have the same result,
-there must be some way to determine whether their operands have
-the same values.
-We use a system of \fIvalue numbers\fP
-.[
-kennedy data flow analysis 
-.]
-in which each distinct value of whatever type,
-created or used within the working window,
-receives a unique identifying number, its value number.
-Two items have the same value number if and only if,
-based only upon information from the instructions in the window,
-their values are provably identical.
-For example, after processing the statement
-.DS
-a := 4;
-.DE
-the variable a and the constant 4 have the same value number.
-.PP
-The value number of the result of an expression depends only
-on the kind of operator and the value number(s) of the operand(s).
-The expressions need not be textually equal, as shown in Fig. 7.5.
-.DS
-.TS
-l l.
-a := c;	(1)
-use(a * b);	(2)
-d := b;	(3)
-use(c * d);	(4)
-.TE
-
-Fig. 7.5 Different expressions with the same value number
-.DE
-At line (1) a receives the same value number as c.
-At line (2) d receives the same value number as b.
-At line (4) the expression "c * d" receives the same value number
-as the expression "a * b" at line (2),
-because the value numbers of their left and right operands are the same,
-and the operator (*) is the same.
-.PP
-As another example of the value number method, consider Fig. 7.6.
-.DS
-.TS
-l l.
-use(a * b);	(1)
-a := 123;	(2)
-use(a * b);	(3)
-.TE
-
-Fig. 7.6 Identical expressions with the different value numbers
-.DE
-Although textually the expressions "a * b" in line 1 and line 3 are equal,
-a will have different value numbers at line 3 and line 1.
-The two expressions will not mistakenly be recognized as equivalent.
-.NH 3
-Entities
-.PP
-The Value Number Method distinguishes between operators and operands.
-The value numbers of operands are stored in a table,
-called the \fIsymbol table\fR.
-The value number of a subexpression depends on the
-(root) operator of the expression and on the value numbers
-of its operands.
-A table of "available expressions" is used to do this mapping.
-.PP
-CS recognizes the following kinds of EM operands, called \fIentities\fR:
-.DS
- constant
- local variable
- external variable
- indirectly accessed entity
- offsetted entity
- address of local variable
- address of external variable
- address of offsetted entity
- address of local base
- address of argument base
- array element
- procedure identifier
- floating zero
- local base
- heap pointer
- ignore mask
-.DE
-.LP
-Whenever a new entity is encountered in the working window,
-it is entered in the symbol table and given a brand new value number.
-Most entities have attributes (e.g. the offset in
-the current stackframe for local variables),
-which are also stored in the symbol table.
-.PP
-An entity is called static if its value cannot be changed
-(e.g. a constant or an address).
-.NH 3
-Parsing expressions
-.PP
-Common subexpressions are recognized by simulating the behaviour
-of the EM machine.
-The EM code is parsed from left to right;
-as EM is postfix code, this is a bottom up parse.
-At any point the current state of the EM runtime stack is
-reflected by a simulated "fake stack",
-containing descriptions of the parsed operands and expressions.
-A descriptor consists of:
-.DS
-(1) the value number of the operand or expression
-(2) the size of the operand or expression
-(3) a pointer to the first line of EM-code
-    that constitutes the operand or expression
-.DE
-Note that operands may consist of several EM instructions.
-Whenever an operator is encountered, the
-descriptors of its operands are on top of the fake stack.
-The operator and the value numbers of the operands 
-are used as indices in the table of available expressions,
-to determine the value number of the expression.
-.PP
-During the parsing process,
-we keep track of the first line of each expression;
-we need this information when we decide to eliminate the expression.
-.NH 3
-Updating entities
-.PP
-An entity is assigned a value number when it is
-used for the first time
-in the working window.
-If the entity is used as left hand side of an assignment,
-it gets the value number of the right hand side.
-Sometimes the effects of an instruction on an entity cannot
-be determined exactly;
-the current value and value number of the entity may become
-inconsistent.
-Hence the current value number must be forgotten.
-This is achieved by giving the entity a new value number
-that was not used before.
-The entity is said to be \fIkilled\fR.
-.PP
-As information is lost when an entity is killed,
-CS tries to save as many entities as possible.
-In case of an indirect assignment through a pointer,
-some analysis is done to see which variables cannot be altered.
-For a procedure call, the interprocedural information contained
-in the procedure table is used to restrict the set of entities that may
-be changed by the call.
-Local variables for which the front end generated 
-a register message can never be changed by an indirect assignment
-or a procedure call.
-.NH 3
-Changing the EM text
-.PP
-When a new expression comes available,
-it is checked whether its result is saved in a local
-that may go in a register.
-The last line of the expression must be followed
-by a STL or SDL instruction
-(depending on the size of the result)
-and a register message must be present for
-this local.
-If there is such a local,
-it is recorded in the available expressions table.
-Each time a new occurrence of this expression
-is found,
-the value number of the local is compared against
-the value number of the result.
-If they are different the local cannot be used and is forgotten.
-.PP
-The available expressions are linked in a list.
-New expressions are linked at the head of the list.
-In this way expressions that are contained within other
-expressions appear later in the list,
-because EM-expressions are postfix.
-The elimination process walks through the list,
-starting at the head, to find the largest expressions first.
-If an expression is eliminated,
-any expression later on in the list, contained in the former expression,
-is removed from the list,
-as expressions can only be eliminated once.
-.PP
-A STL or SDL is emitted after the first occurrence of the expression,
-unless there was an existing local variable that could hold the result.
-.NH 3
-Desirability analysis
-.PP
-Although the global optimizer works on EM code,
-the goal is to improve the quality of the object code.
-Therefore some machine-dependent information is needed
-to decide whether it is desirable to
-eliminate a given expression.
-Because it is impossible for the CS phase to know
-exactly what code will be generated,
-some heuristics are used.
-CS essentially looks for some special cases
-that should not be eliminated.
-These special cases can be turned on or off for a given machine,
-as indicated in a machine descriptor file.
-.PP
-Some operators can sometimes be translated
-into an addressing mode for the machine at hand.
-Such an operator is only eliminated
-if its operand is itself expensive,
-i.e. it is not just a simple load.
-The machine descriptor file contains a set of such operators.
-.PP
-Eliminating the loading of the Local Base or
-the Argument Base by the LXL resp. LXA instruction
-is only beneficial if the difference in lexical levels
-exceeds a certain threshold.
-The machine descriptor file contains this threshold.
-.PP
-Replacing a SAR or a LAR by an AAR followed by a LOI
-may possibly increase the size of the object code.
-We assume that this is only possible when the
-size of the array element is greater than some limit.
-.PP
-There are back ends that can very efficiently translate
-the index computing instruction sequence LOC SLI ADS.
-If this is the case,
-the SLI instruction between a LOC
-and an ADS is not eliminated.
-.PP
-To handle unforseen cases, the descriptor file may also contain
-a set of operators that should never be eliminated.
-.NH 3
-The algorithm
-.PP
-After these preparatory explanations,
-the algorithm itself is easy to understand.
-For each instruction within the current window,
-the following steps are performed in the given order :
-.IP 1.
-Check if this instruction defines an entity.
-If so, the set of entities is updated accordingly.
-.IP 2.
-Kill all entities that might be affected by this instruction.
-.IP 3.
-Simulate the instruction on the fake-stack.
-If this instruction is an operator,
-update the list of available expressions accordingly.
-.PP
-The result of this process is
-a list of available expressions plus the information
-needed to eliminate them.
-Expressions that are desirable to eliminate are eliminated.
-Next, the window is shifted and the process is repeated.
--- a/doc/ego/cs/cs4
+++ b/doc/ego/cs/cs4
@ -1,311 +0,0 @@
-.NH 2
-Implementation.
-.PP
-In this section we will discuss the implementation of the CS phase.
-We will first describe the basic actions that are undertaken
-by the algorithm, than the algorithm itself.
-.NH 3
-Partioning the EM instructions
-.PP
-There are over 100 EM instructions.
-For our purpose we partition this huge set into groups of
-instructions which can be more or less conveniently handled together.
-.PP
-There are groups for all sorts of load instructions:
-simple loads, expensive loads, loads of an array element.
-A load is considered \fIexpensive\fP when more than one EM instructions
-are involved in loading it.
-The load of a lexical entity is also considered expensive.
-For instance: LOF is expensive, LAL is not.
-LAR forms a group on its own, 
-because it is not only an expensive load,
-but also implicitly includes the ternary operator AAR,
-which computes the address of the array element.
-.PP
-There are groups for all sorts of operators:
-unary, binary, and ternary.
-The groups of operators are further partitioned according to the size
-of their operand(s) and result.
-.\" .PP
-.\" The distinction between operators and expensive loads is not always clear.
-.\" The ADP instruction for example,
-.\" might seem a unary operator because it pops one item
-.\" (a pointer) from the stack.
-.\" However, two ADP-instructions which pop an item with the same value number
-.\" need not have the same result,
-.\" because the attributes (an offset, to be added to the pointer)
-.\" can be different.
-.\" Is it then a binary operator?
-.\" That would give rise to the strange, and undesirable,
-.\" situation that some binary operators pop two operands
-.\" and others pop one.
-.\" The conclusion is inevitable:
-.\" we have been fooled by the name (ADd Pointer).
-.\" The ADP-instruction is an expensive load.
-.\" In this context LAF, meaning Load Address of oFfsetted,
-.\" would have been a better name,
-.\" corresponding to LOF, like LAL,
-.\" Load Address of Local, corresponds to LOL.
-.PP
-There are groups for all sorts of stores:
-direct, indirect, array element.
-The SAR forms a group on its own for the same reason
-as appeared with LAR.
-.PP
-The effect of the remaining instructions is less clear.
-They do not help very much in parsing expressions or
-in constructing our pseudo symboltable.
-They are partitioned according to the following criteria:
-.RS
-.IP "-"
-They change the value of an entity without using the stack
-(e.g. ZRL, DEE).
-.IP "-"
-They are subroutine calls (CAI, CAL).
-.IP "-"
-They change the stack in some irreproduceable way (e.g. ASP, LFR, DUP).
-.IP "-"
-They have no effect whatever on the stack or on the entities.
-This does not mean they can be deleted,
-but they can be ignored for the moment
-(e.g. MES, LIN, NOP).
-.IP "-"
-Their effect is too complicate too compute,
-so we just assume worst case behaviour.
-Hopefully, they do not occur very often.
-(e.g. MON, STR, BLM).
-.IP "-"
-They signal the end of the basic block (e.g. BLT, RET, TRP).
-.RE
-.NH 3
-Parsing expressions
-.PP
-To recognize expressions,
-we simulate the behaviour of the EM machine,
-by means of a fake-stack.
-When we scan the instructions in sequential order,
-we first encounter the instructions that load
-the operands on the stack,
-and then the instruction that indicates the operator,
-because EM expressions are postfix.
-When we find an instruction to load an operand,
-we load on the fake-stack a struct with the following information:
-.DS
-.TS
-l l.
-(1)	the value number of the operand
-(2)	the size of the operand
-(3)	a pointer to the first line of EM-code
-	that constitutes the operand
-.TE
-.DE
-In most cases, (3) will point to the line
-that loaded the operand (e.g. LOL, LOC),
-i.e. there is only one line that refers to this operand,
-but sometimes some information must be popped
-to load the operand (e.g. LOI, LAR).
-This information must have been pushed before,
-so we also pop a pointer to the first line that pushed
-the information.
-This line is now the first line that defines the operand.
-.PP
-When we find the operator instruction,
-we pop its operand(s) from the fake-stack.
-The first line that defines the first operand is
-now the first line of the expression.
-We now have all information to determine
-whether the just parsed expression has occurred before.
-We also know the first and last line of the expression;
-we need this when we decide to eliminate it.
-Associated with each available expression is a set of
-which the elements contains the first and last line of
-a recurrence of this expression.
-.PP
-Not only will the operand(s) be popped from the fake-stack,
-but the following will be pushed:
-.DS
-.TS
-l l.
-(1)	the value number of the result
-(2)	the size of the result
-(3)	a pointer to the first line of the expression
-.TE
-.DE
-In this way an item on the fake-stack always contains
-the necessary information.
-EM expressions are parsed bottum up.
-.NH 3
-Updating entities
-.PP
-As said before,
-we build our private "symboltable",
-while scanning the EM-instructions.
-The behaviour of the EM-machine is not only reflected
-in the fake-stack,
-but also in the entities.
-When an entity is created,
-we do not yet know its value,
-so we assign a brand new value number to it.
-Each time a store-instruction is encountered,
-we change the value number of the target entity of this store
-to the value number of the token that was popped
-from the fake-stack.
-Because entities may overlap,
-we must also "forget" the value numbers of entities
-that might be affected by this store.
-Each such entity will be \fIkilled\fP,
-i.e. assigned a brand new valuenumber.
-.PP
-Because we lose information when we forget
-the value number of an entity,
-we try to save as much entities as possible.
-When we store into an external,
-we don't have to kill locals and vice versa.
-Furthermore, we can see whether two locals or
-two externals overlap,
-because we know the offset from the local base,
-resp. the offset within the data block,
-and the size.
-The situation becomes more complicated when we have
-to consider indirection.
-The worst case is that we store through an unknown pointer.
-In that case we kill all entities except those locals
-for which a so-called \fIregister message\fP has been generated;
-this register message indicates that this local can never be
-accessed indirectly.
-If we know this pointer we can be more careful.
-If it points to a local then the entity that is accessed through
-this pointer can never overlap with an external.
-If it points to an external this entity can never overlap with a local.
-Furthermore, in the latter case,
-we can find the data block this entity belongs to.
-Since pointer arithmetic is only defined within a data block,
-this entity can never overlap with entities that are known to
-belong to another data block.
-.PP
-Not only after a store-instruction but also after a 
-subroutine-call it may be necessary to kill entities;
-the subroutine may affect global variables or store
-through a pointer.
-If a subroutine is called that is not available as EM-text,
-we assume worst case behaviour,
-i.e. we kill all entities without register message.
-.NH 3
-Additions and replacements.
-.PP
-When a new expression comes available,
-we check whether the result is saved in a local
-that may go in a register.
-The last line of the expression must be followed
-by a STL or SDL instruction,
-depending on the size of the result
-(resp. WS and 2*WS),
-and a register message must be present for
-this local.
-If we have found such a local,
-we store a pointer to it with the available expression.
-Each time a new occurrence of this expression
-is found,
-we compare the value number of the local against
-the value number of the result.
-When they are different we remove the pointer to it,
-because we cannot use it.
-.PP
-The available expressions are singly linked in a list.
-When a new expression comes available,
-we link it at the head of the list.
-In this way expressions that are contained within other
-expressions appear later in the list,
-because EM-expressions are postfix.
-When we are going to eliminate expressions,
-we walk through the list,
-starting at the head, to find the largest expressions first.
-When we decide to eliminate an expression,
-we look at the expressions in the tail of the list,
-starting from where we are now,
-to delete expressions that are contained within
-the chosen one because
-we cannot eliminate an expression more than once.
-.PP
-When we are going to eliminate expressions,
-and we do not have a local that holds the result,
-we emit a STL or SDL after the line where the expression
-was first found.
-The other occurrences are simply removed,
-unless they contain instructions that not only have
-effect on the stack; e.g. messages, stores, calls.
-Before each instruction that needs the result on the stack,
-we emit a LOL or LDL.
-When the expression was an AAR,
-but the instruction was a LAR or a SAR,
-we append a LOI resp. a STI of the number of bytes
-in an array-element after each LOL/LDL.
-.NH 3
-Desirability analysis
-.PP
-Although the global optimizer works on EM code,
-the goal is to improve the quality of the object code.
-Therefore we need some machine dependent information
-to decide whether it is desirable to
-eliminate a given expression.
-Because it is impossible for the CS phase to know
-exactly what code will be generated,
-we use some heuristics.
-In most cases it will save time when we eliminate an
-operator, so we just do it.
-We only look for some special cases.
-.PP
-Some operators can in some cases be translated
-into an addressing mode for the machine at hand.
-We only eliminate such an operator,
-when its operand is itself "expensive",
-i.e. not just a simple load.
-The user of the CS phase has to supply
-a set of such operators.
-.PP
-Eliminating the loading of the Local Base or
-the Argument Base by the LXL resp. LXA instruction
-is only beneficial when the number of lexical levels
-we have to go back exceeds a certain threshold.
-This threshold will be different when registers
-are saved by the back end.
-The user must supply this threshold.
-.PP
-Replacing a SAR or a LAR by an AAR followed by a LOI
-may possibly increase the size of the object code.
-We assume that this is only possible when the
-size of the array element is greater than some
-(user-supplied) limit.
-.PP
-There are back ends that can very efficiently translate
-the index computing instruction sequence LOC SLI ADS.
-If this is the case,
-we do not eliminate the SLI instruction between a LOC
-and an ADS.
-.PP
-To handle unforeseen cases, the user may also supply
-a set of operators that should never be eliminated.
-.NH 3
-The algorithm
-.PP
-After these preparatory explanations,
-we can be short about the algorithm itself.
-For each instruction within our window,
-the following steps are performed in the order given:
-.IP 1.
-We check if this instructin defines an entity.
-If this is the case the set of entities is updated accordingly.
-.IP 2.
-We kill all entities that might be affected by this instruction.
-.IP 3.
-The instruction is simulated on the fake-stack.
-Copy propagation is done.
-If this instruction is an operator,
-we update the list of available expressions accordingly.
-.PP
-When we have processed all instructions this way,
-we have built a list of available expressions plus the information we
-need to eliminate them.
-Those expressions of which desirability analysis tells us so,
-we eliminate.
-The we shift our window and continue.
--- a/doc/ego/cs/cs5
+++ b/doc/ego/cs/cs5
@ -1,46 +0,0 @@
-.NH 2
-Source files of CS
-.PP
-The sources of CS are in the following files and packages:
-.IP cs.h 14
-declarations of global variables and data structures
-.IP cs.c
-the routine main;
-a driving routine to process
-the basic blocks in the right order
-.IP vnm
-implements a procedure that performs
-the value numbering on one basic block
-.IP eliminate
-implements a procedure that does the
-transformations, if desirable
-.IP avail
-implements a procedure that manipulates the list of available expressions
-.IP entity
-implements a procedure that manipulates the set of entities
-.IP getentity
-implements a procedure that extracts the
-pseudo symboltable information from EM-instructions;
-uses a small table
-.IP kill
-implements several routines that find the entities
-that might be changed by EM-instructions
-and kill them
-.IP partition
-implements several routines that partition the huge set
-of EM-instructions into more or less manageable,
-more or less logical chunks
-.IP profit
-implements a procedure that decides whether it
-is advantageous to eliminate an expression;
-also removes expressions with side-effects
-.IP stack
-implements the fake-stack and operations on it
-.IP alloc
-implements several allocation routines
-.IP aux
-implements several auxiliary routines
-.IP debug
-implements several routines to provide debugging
-and verbose output
-.LP
--- a/doc/ego/ic/.distr
+++ b/doc/ego/ic/.distr
@ -1,5 +0,0 @@
-ic1
-ic2
-ic3
-ic4
-ic5
--- a/doc/ego/ic/ic1
+++ b/doc/ego/ic/ic1
@ -1,57 +0,0 @@
-.bp
-.NH
-The Intermediate Code and the IC phase
-.PP
-In this chapter the intermediate code of the EM global optimizer
-will be defined.
-The 'Intermediate Code construction' phase (IC),
-which builds the initial intermediate code from
-EM Compact Assembly Language,
-will be described.
-.NH 2
-Introduction
-.PP
-The EM global optimizer is a multi pass program,
-hence there is a need for an intermediate code.
-Usually, programs in the Amsterdam Compiler Kit use the
-Compact Assembly Language format
-.[~[
-keizer architecture
-.], section 11.2]
-for this purpose.
-Although this code has some convenient features,
-such as being compact,
-it is quite unsuitable in our case,
-because of a number of reasons.
-At first, the code lacks global information
-about whole procedures or whole basic blocks.
-Second, it uses identifiers ('names') to bind
-defining and applied occurrences of
-procedures, data labels and instruction labels.
-Although this is usual in high level programming
-languages, it is awkward in an intermediate code
-that must be read many times.
-Each pass of the optimizer would have
-to incorporate an identifier look-up mechanism
-to associate a defining occurrence with each
-applied occurrence of an identifier.
-Finally, EM programs are used to declare blocks of bytes,
-rather than variables. A 'hol 6' instruction may be used to
-declare three 2-byte variables.
-Clearly, the optimizer wants to deal with variables, and
-not with rows of bytes.
-.PP
-To overcome these problems, we have developed a new
-intermediate code.
-This code does not merely consist of the EM instructions,
-but also contains global information in the
-form of tables and graphs.
-Before describing the intermediate code we will
-first leap aside to outline
-the problems one generally encounters
-when trying to store complex data structures such as
-graphs outside the program, i.e. in a file.
-We trust this will enhance the
-comprehensibility of the
-intermediate code definition and the design and implementation
-of the IC phase.
--- a/doc/ego/ic/ic2
+++ b/doc/ego/ic/ic2
@ -1,150 +0,0 @@
-.NH 2
-Representation of complex data structures in a sequential file
-.PP
-Most programmers are quite used to deal with
-complex data structures, such as
-arrays, graphs and trees.
-There are some particular problems that occur
-when storing such a data structure
-in a sequential file.
-We call data that is kept in
-main memory
-.UL internal
-,as opposed to
-.UL external
-data
-that is kept in a file outside the program.
-.sp
-We assume a simple data structure of a
-scalar type (integer, floating point number)
-has some known external representation.
-An
-.UL array
-having elements of a scalar type can be represented
-externally easily, by successively
-representing its elements.
-The external representation may be preceded by a
-number, giving the length of the array.
-Now, consider a linear, singly linked list,
-the elements of which look like:
-.DS
-record
-        data: scalar_type;
-        next: pointer_type;
-end;
-.DE
-It is significant to note that the "next"
-fields of the elements only have a meaning within
-main memory.
-The field contains the address of some location in
-main memory.
-If a list element is written to a file in
-some program,
-and read by another program,
-the element will be allocated at a different
-address in main memory.
-Hence this address value is completely
-useless outside the program.
-.sp
-One may represent the list by ignoring these "next" fields
-and storing the data items in the order they are linked.
-The "next" fields are represented \fIimplicitly\fR.
-When the file is read again,
-the same list can be reconstructed.
-In order to know where the external representation of the
-list ends,
-it may be useful to put the length of
-the list in front of it.
-.sp
-Note that arrays and linear lists have the
-same external representation.
-.PP
-A doubly linked, linear list,
-with elements of the type:
-.DS
-record
-        data: scalar_type;
-        next,
-        previous: pointer_type;
-end
-.DE
-can be represented in precisely the same way.
-Both the "next" and the "previous" fields are represented
-implicitly.
-.PP
-Next, consider a binary tree,
-the nodes of which have type:
-.DS
-record
-        data: scalar_type;
-        left,
-        right: pointer_type;
-end
-.DE
-Such a tree can be represented sequentially,
-by storing its nodes in some fixed order, e.g. prefix order.
-A special null data item may be used to
-denote a missing left or right son.
-For example, let the scalar type be integer,
-and let the null item be 0.
-Then the tree of fig. 3.1(a)
-can be represented as in fig. 3.1(b).
-.DS
-.ft 5
-                        4
-                      /   \e
-                    9      12
-                  /  \e    /  \e
-                12    3   4   6
-                     / \e  \e  /
-                     8  1  5 1
-.ft R
-
-Fig. 3.1(a) A binary tree
-
-
-.ft 5
-4 9 12 0 0 3 8 0 0 1 0 0 12 4 0 5 0 0 6 1 0 0 0
-.ft R
-
-Fig. 3.1(b) Its sequential representation
-.DE
-We are still able to represent the pointer fields ("left"
-and "right") implicitly.
-.PP
-Finally, consider a general
-.UL graph
-, where each node has a "data" field and
-pointer fields,
-with no restriction on where they may point to.
-Now we're at the end of our tale.
-There is no way to represent the pointers implicitly,
-like we did with lists and trees.
-In order to represent them explicitly,
-we use the following scheme.
-Every node gets an extra field,
-containing some unique number that identifies the node.
-We call this number its
-.UL id.
-A pointer is represented externally as the id of the node
-it points to.
-When reading the file we use a table that maps
-an id to the address of its node.
-In general this table will not be completely filled in
-until we have read the entire external representation of
-the graph and allocated internal memory locations for
-every node.
-Hence we cannot reconstruct the graph in one scan.
-That is, there may be some pointers from node A to B,
-where B is placed after A in the sequential file than A.
-When we read the node of A we cannot map the id of B
-to the address of node B,
-as we have not yet allocated node B.
-We can overcome this problem if the size
-of every node is known in advance.
-In this case we can allocate memory for a node
-on first reference.
-Else, the mapping from id to pointer
-cannot be done while reading nodes.
-The mapping can be done either in an extra scan
-or at every reference to the node.
--- a/doc/ego/ic/ic3
+++ b/doc/ego/ic/ic3
@ -1,431 +0,0 @@
-.NH 2
-Definition of the intermediate code
-.PP
-The intermediate code of the optimizer consists
-of several components:
-.IP -
-the object table
-.IP -
-the procedure table
-.IP -
-the em code
-.IP -
-the control flow graphs
-.IP -
-the loop table
-.LP -
-.PP
-These components are described in
-the next sections.
-The syntactic structure of every component
-is described by a set of context free syntax rules,
-with the following conventions:
-.DS
-.TS
-l l.
-x	a non-terminal symbol
-A	a terminal symbol (in capitals)
-x: a b c;	a grammar rule
-a | b	a or b
-(a)+	1 or more occurrences of a
-{a}	0 or more occurrences of a
-.TE
-.DE
-.NH 3
-The object table
-.PP
-EM programs declare blocks of bytes rather than (global) variables.
-A typical program may declare 'HOL 7780'
-to allocate space for 8 I/O buffers,
-2 large arrays and 10 scalar variables.
-The optimizer wants to deal with
-.UL objects
-like variables, buffers and arrays
-and certainly not with huge numbers of bytes.
-Therefore the intermediate code contains information
-about which global objects are used.
-This information can be obtained from an EM program
-by just looking at the operands of instruction
-such as LOE, LAE, LDE, STE, SDE, INE, DEE and ZRE.
-.PP
-The object table consists of a list of
-.UL datablock
-entries.
-Each such entry represents a declaration like HOL, BSS,
-CON or ROM.
-There are five kinds of datablock entries.
-The fifth kind,
-UNKNOWN, denotes a declaration in a
-separately compiled file that is not made
-available to the optimizer.
-Each datablock entry contains the type of the block,
-its size, and a description of the objects that
-belong to it.
-If it is a rom,
-it also contains a list of values given
-as arguments to the rom instruction,
-provided that this list contains only integer numbers.
-An object has an offset (within its datablock)
-and a size.
-The size need not always be determinable.
-Both datablock and object contain a unique
-identifying number
-(see previous section for their use).
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-object_table:
-	{datablock} ;
-datablock:
-	D_ID	-- unique identifying number
-	PSEUDO	-- one of ROM,CON,BSS,HOL,UNKNOWN
-	SIZE	-- # bytes declared
-	FLAGS
-	{value}	-- contents of rom
-	{object} ;	-- objects of the datablock
-object:
-	O_ID	-- unique identifying number
-	OFFSET	-- offset within the datablock
-	SIZE ;	-- size of the object in bytes
-value:
-	argument ;
-.TE
-.DE
-A data block has only one flag: "external", indicating
-whether the data label is externally visible.
-The syntax for "argument" will be given later on
-(see em_text).
-.NH 3
-The procedure table
-.PP
-The procedure table contains global information
-about all procedures that are made available
-to the optimizer
-and that are needed by the EM program.
-(Library units may not be needed, see section 3.5).
-The table has one entry for
-every procedure.
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-procedure_table:
-	{procedure}
-procedure:
-	P_ID	-- unique identifying number
-	#LABELS	-- number of instruction labels
-	#LOCALS	-- number of bytes for locals 
-	#FORMALS	-- number of bytes for formals
-	FLAGS	-- flag bits
-	calling	-- procedures called by this one
-	change	-- info about global variables changed
-	use ;	-- info about global variables used
-calling:
-	{P_ID} ;	-- procedures called
-change:
-	ext	-- external variables changed
-	FLAGS ;
-use:
-	FLAGS ;
-ext:
-	{O_ID} ;	-- a set of objects
-.TE
-.DE
-.PP
-The number of bytes of formal parameters accessed by
-a procedure is determined by the front ends and
-passed via a message (parameter message) to the optimizer.
-If the front end is not able to determine this number
-(e.g. the parameter may be an array of dynamic size or
-the procedure may have a variable number of arguments) the attribute
-contains the value 'UNKNOWN_SIZE'.
-.sp 0
-A procedure has the following flags:
-.IP -
-external: true if the proc. is externally visible
-.IP -
-bodyseen: true if its code is available as EM text
-.IP -
-calunknown: true if it calls a procedure that has its bodyseen
-flag not set
-.IP -
-environ: true if it uses or changes a (non-global) variable in
-a lexically enclosing procedure
-.IP -
-lpi: true if is used as operand of an lpi instruction, so
-it may be called indirect
-.LP
-The change and use attributes both have one flag: "indirect",
-indicating whether the procedure does a 'use indirect'
-or a 'store indirect' (indirect means through a pointer).
-.NH 3
-The EM text
-.PP
-The EM text contains the EM instructions.
-Every EM instruction has an operation code (opcode)
-and 0 or 1 operands.
-EM pseudo instructions can have more than
-1 operand.
-The opcode is just a small (8 bit) integer.
-.sp
-There are several kinds of operands, which we will
-refer to as
-.UL types.
-Many EM instructions can have more than one type of operand.
-The types and their encodings in Compact Assembly Language
-are discussed extensively in.
-.[~[
-keizer architecture 
-.], section 11.2]
-Of special interest is the way numeric values
-are represented.
-Of prime importance is the machine independency of
-the representation.
-Ultimately, one could store every integer
-just as a string of the characters '0' to '9'.
-As doing arithmetic on strings is awkward,
-Compact Assembly Language allows several alternatives.
-The main idea is to look at the value of the integer.
-Integers that fit in 16, 32 or 64 bits are
-represented as a row of resp. 2, 4 and 8 bytes,
-preceded by an indication of how many bytes are used.
-Longer integers are represented as strings;
-this is only allowed within pseudo instructions, however.
-This concept works very well for target machines
-with reasonable word sizes.
-At present, most ACK software cannot be used for word sizes
-higher than 32 bits,
-although the handles for using larger word sizes are
-present in the design of the EM code.
-In the intermediate code we essentially use the
-same ideas.
-We allow three representations of integers.
-.IP -
-integers that fit in a short are represented as a short
-.IP -
-integers that fit in a long but not in a short are represented
-as longs
-.IP -
-all remaining integers are represented as strings
-(only allowed in pseudos).
-.LP
-The terms short and long are defined in
-.[~[
-ritchie reference manual programming language
-.], section 4]
-and depend only on the source machine
-(i.e. the machine on which ACK runs),
-not on the target machines.
-For historical reasons a long will often be called an
-.UL offset.
-.PP
-Operands can also be instruction labels,
-objects or procedures.
-Instruction labels are denoted by a
-.UL label
-.UL identifier,
-which can be distinguished from a normal identifier.
-.sp
-The operand of a pseudo instruction can be a list of
-.UL arguments.
-Arguments can have the same type as operands, except
-for the type short, which is not used for arguments.
-Furthermore, an argument can be a string or
-a string representation of a signed integer, unsigned integer
-or floating point number.
-If the number of arguments is not fully determined by
-the pseudo instruction (e.g. a ROM pseudo can have any number
-of arguments), then the list is terminated by a special
-argument of type CEND.
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-em_text:
-	{line} ;
-line:
-	INSTR	-- opcode
-	OPTYPE	-- operand type
-	operand ;
-operand:
-	empty |	-- OPTYPE = NO
-	SHORT |	-- OPTYPE = SHORT
-	OFFSET |	-- OPTYPE = OFFSET
-	LAB_ID |	-- OPTYPE = INSTRLAB
-	O_ID |	-- OPTYPE = OBJECT
-	P_ID |	-- OPTYPE = PROCEDURE
-	{argument} ;	-- OPTYPE = LIST
-argument:
-	ARGTYPE
-	arg ;
-arg:
-	empty |	-- ARGTYPE = CEND
-	OFFSET |
-	LAB_ID |
-	O_ID |
-	P_ID |
-	string |	-- ARGTYPE = STRING
-	const ;	-- ARGTYPE = ICON,UCON or FCON
-string:
-	LENGTH	-- number of characters
-	{CHARACTER} ;
-const:
-	SIZE	-- number of bytes
-	string ;	-- string representation of (un)signed
-		-- or floating point constant
-.TE
-.DE
-.NH 3
-The control flow graphs
-.PP
-Each procedure can be divided
-into a number of basic blocks.
-A basic block is a piece of code with
-no jumps in, except at the beginning,
-and no jumps out, except at the end.
-.PP
-Every basic block has a set of
-.UL successors,
-which are basic blocks that can follow it immediately in
-the dynamic execution sequence.
-The
-.UL predecessors
-are the basic blocks of which this one
-is a successor.
-The successor and predecessor attributes
-of all basic blocks of a single procedure
-are said to form the
-.UL control
-.UL flow
-.UL graph
-of that procedure.
-.PP
-Another important attribute is the
-.UL immediate
-.UL dominator.
-A basic block B dominates a block C if
-every path in the graph from the procedure entry block
-to C goes through B.
-The immediate dominator of C is the closest dominator
-of C on any path from the entry block.
-(Note that the dominator relation is transitive,
-so the immediate dominator is well defined.)
-.PP
-A basic block also has an attribute containing
-the identifiers of every
-.UL loop
-that the block belongs to (see next section for loops).
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-control_flow_graph:
-	{basic_block} ;
-basic_block:
-	B_ID	-- unique identifying number
-	#INSTR	-- number of EM instructions
-	succ
-	pred
-	idom	-- immediate dominator
-	loops	-- set of loops
-	FLAGS ;	-- flag bits
-succ:
-	{B_ID} ;
-pred:
-	{B_ID} ;
-idom:
-	B_ID ;
-loops:
-	{LP_ID} ;
-.TE
-.DE
-The flag bits can have the values 'firm' and 'strong',
-which are explained below.
-.NH 3
-The loop tables
-.PP
-Every procedure has an associated
-.UL loop
-.UL table
-containing information about all the loops
-in the procedure.
-Loops can be detected by a close inspection of
-the control flow graph.
-The main idea is to look for two basic blocks,
-B and C, for which the following holds:
-.IP -
-B is a successor of C
-.IP -
-B is a dominator of C
-.LP
-B is called the loop
-.UL entry
-and C is called the loop
-.UL end.
-Intuitively, C contains a jump backwards to
-the beginning of the loop (B).
-.PP
-A loop L1 is said to be
-.UL nested
-within loop L2 if all basic blocks of L1
-are also part of L2.
-It is important to note that loops could
-originally be written as a well structured for -or
-while loop or as a messy goto loop.
-Hence loops may partly overlap without one
-being nested inside the other.
-The
-.UL nesting
-.UL level
-of a loop is the number of loops in
-which it is nested (so it is 0 for
-an outermost loop).
-The details of loop detection will be discussed later.
-.PP
-It is often desirable to know whether a
-basic block gets executed during every iteration
-of a loop.
-This leads to the following definitions:
-.IP -
-A basic block B of a loop L is said to be a \fIfirm\fR block
-of L if B is executed on all successive iterations of L,
-with the only possible exception of the last iteration.
-.IP -
-A basic block B of a loop L is said to be a \fIstrong\fR block
-of L if B is executed on all successive iterations of L.
-.LP
-Note that a strong block is also a firm block.
-If a block is part of a conditional statement, it is neither
-strong nor firm, as it may be skipped during some iterations
-(see Fig. 3.2).
-.DS
-loop
-       if cond1 then
-	      ... \kx-- this code will not
-		  \h'|\nxu'-- result in a firm or strong block
-       end if;
-       ...  -- strong (always executed)
-       exit when cond2;
-       ...  \kx-- firm (not executed on last iteration).
-end loop;
-
-Fig. 3.2 Example of firm and strong block
-.DE
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-looptable:
-	{loop} ;
-loop:
-	LP_ID	-- unique identifying number
-	LEVEL	-- loop nesting level
-	entry	-- loop entry block
-	end ;
-entry:
-	B_ID ;
-end:
-	B_ID ;
-.TE
-.DE
--- a/doc/ego/ic/ic4
+++ b/doc/ego/ic/ic4
@ -1,83 +0,0 @@
-.NH 2
-External representation of the intermediate code
-.PP
-The syntax of the intermediate code was given
-in the previous section.
-In this section we will make some remarks about
-the representation of the code in sequential files.
-.sp
-We use sequential files in order to avoid
-the bookkeeping of complex file indices.
-As a consequence of this decision
-we can't store all components
-of the intermediate code
-in one file.
-If a phase wishes to change some attribute
-of a procedure,
-or wants to add or delete entire procedures
-(inline substitution may do the latter),
-the procedure table will only be fully updated
-after the entire EM text has been scanned.
-Yet, the next phase undoubtedly wants
-to read the procedure table before it
-starts working on the EM text.
-Hence there is an ordering problem, which
-can be solved easily by putting the
-procedure table in a separate file.
-Similarly, the data block table is kept
-in a file of its own.
-.PP
-The control flow graphs (CFGs) could be mixed
-with the EM text.
-Rather, we have chosen to put them
-in a separate file too.
-The control flow graph file should be regarded as a
-file that imposes some structure on the EM-text file,
-just as an overhead sheet containing a picture
-of a Flow Chart may be put on an overhead sheet
-containing statements.
-The loop tables are also put in the CFG file.
-A loop imposes an extra structure on the
-CFGs and hence on the EM text.
-So there are four files:
-.IP -
-the EM-text file
-.IP -
-the procedure table file
-.IP -
-the object table file
-.IP -
-the CFG and loop tables file
-.LP
-Every table is preceded by its length, in order to
-tell where it ends.
-The CFG file also contains the number of instructions of
-every basic block,
-indicating which part of the EM text belongs
-to that block.
-.DS
-.UL syntax
-.TS
-lw(1i) l l.
-intermediate_code:
-	object_table_file
-	proctable_file
-	em_text_file
-	cfg_file ;
-object_table_file:
-	LENGTH	-- number of objects
-	object_table ;
-proctable_file:
-	LENGTH	-- number of procedures
-	procedure_table ;
-em_text_file:
-	em_text ;
-cfg_file:
-	{per_proc} ;	-- one for every procedure
-per_proc:
-	BLENGTH	-- number of basic blocks
-	LLENGTH	-- number of loops
-	control_flow_graph
-	looptable ;
-.TE
-.DE
--- a/doc/ego/ic/ic5
+++ b/doc/ego/ic/ic5
@ -1,166 +0,0 @@
-.NH 2
-The Intermediate Code construction phase
-.PP
-The first phase of the global optimizer,
-called
-.UL IC,
-constructs a major part of the intermediate code.
-To be specific, it produces:
-.IP -
-the EM text
-.IP -
-the object table
-.IP -
-part of the procedure table
-.LP
-The calling, change and use attributes of a procedure
-and all its flags except the external and bodyseen flags
-are computed by the next phase (Control Flow phase).
-.PP
-As explained before,
-the intermediate code does not contain
-any names of variables or procedures.
-The normal identifiers are replaced by identifying
-numbers.
-Yet, the output of the global optimizer must
-contain normal identifiers, as this
-output is in Compact Assembly Language format.
-We certainly want all externally visible names
-to be the same in the input as in the output,
-because the optimized EM module may be a library unit,
-used by other modules.
-IC dumps the names of all procedures and data labels
-on two files:
-.IP -
-the procedure dump file, containing tuples (P_ID, procedure name)
-.IP -
-the data dump file, containing tuples (D_ID, data label name)
-.LP
-The names of instruction labels are not dumped,
-as they are not visible outside the procedure
-in which they are defined.
-.PP
-The input to IC consists of one or more files.
-Each file is either an EM module in Compact Assembly Language
-format, or a Unix archive file (library) containing such modules.
-IC only extracts those modules from a library that are
-needed somehow, just as a linker does.
-It is advisable to present as much code
-of the EM program as possible to the optimizer,
-although it is not required to present the whole program.
-If a procedure is called somewhere in the EM text,
-but its body (text) is not included in the input,
-its bodyseen flag in the procedure table will still
-be off.
-Whenever such a procedure is called,
-we assume the worst case for everything;
-it will change and use all variables it has access to,
-it will call every procedure etc.
-.sp
-Similarly, if a data label is used
-but not defined, the PSEUDO attribute in its data block
-will be set to UNKNOWN.
-.NH 3
-Implementation
-.PP
-Part of the code for the EM Peephole Optimizer
-.[
-staveren peephole toplass
-.]
-has been used for IC.
-Especially the routines that read and unravel
-Compact Assembly Language and the identifier
-lookup mechanism have been used.
-New code was added to recognize objects,
-build the object and procedure tables and to
-output the intermediate code.
-.PP
-IC uses singly linked linear lists for both the
-procedure and object table.
-Hence there are no limits on the size of such
-a table (except for the trivial fact that it must fit
-in main memory).
-Both tables are outputted after all EM code has
-been processed.
-IC reads the EM text of one entire procedure
-at a time,
-processes it and appends the modified code to
-the EM text file.
-EM code is represented internally as a doubly linked linear
-list of EM instructions.
-.PP
-Objects are recognized by looking at the operands
-of instructions that reference global data.
-If we come across the instructions:
-.DS
-.TS
-l l.
-LDE X+6	-- Load Double External
-LAE X+20	-- Load Address External
-.TE
-.DE
-we conclude that the data block
-preceded by the data label X contains an object
-at offset 6 of size twice the word size,
-and an object at offset 20 of unknown size.
-.sp
-A data block entry of the object table is allocated
-at the first reference to a data label.
-If this reference is a defining occurrence
-or a INA pseudo instruction,
-the label is not externally visible
-.[~[
-keizer architecture
-.], section 11.1.4.3]
-In this case, the external flag of the data block
-is turned off.
-If the first reference is an applied occurrence
-or a EXA pseudo instruction, the flag is set.
-We record this information, because the
-optimizer may change the order of defining and
-applied occurrences.
-The INA and EXA pseudos are removed from the EM text.
-They may be regenerated by the last phase
-of the optimizer.
-.sp
-Similar rules hold for the procedure table
-and the INP and EXP pseudos.
-.NH 3
-Source files of IC
-.PP
-The source files of IC consist
-of the files ic.c, ic.h and several packages.
-.UL ic.h
-contains type definitions, macros and
-variable declarations that may be used by
-ic.c and by every package.
-.UL ic.c
-contains the definitions of these variables,
-the procedure
-.UL main
-and some high level I/O routines used by main.
-.sp
-Every package xxx consists of two files.
-ic_xxx.h contains type definitions,
-macros, variable declarations and
-procedure declarations that may be used by
-every .c file that includes this .h file.
-The file ic_xxx.c provides the
-definitions of these variables and
-the implementation of the declared procedures.
-IC uses the following packages:
-.IP lookup: 18
-procedures that loop up procedure, data label
-and instruction label names; procedures to dump
-the procedure and data label names.
-.IP lib:
-one procedure that gets the next useful input module;
-while scanning archives, it skips unnecessary modules.
-.IP aux:
-several auxiliary routines.
-.IP io:
-low-level I/O routines that unravel the Compact
-Assembly Language.
-.IP put:
-routines that output the intermediate code
-.LP
--- a/doc/ego/il/.distr
+++ b/doc/ego/il/.distr
@ -1,6 +0,0 @@
-il1
-il2
-il3
-il4
-il5
-il6
--- a/doc/ego/il/il1
+++ b/doc/ego/il/il1
@ -1,112 +0,0 @@
-.bp
-.NH 1
-Inline substitution
-.NH 2
-Introduction
-.PP
-The Inline Substitution technique (IL)
-tries to decrease the overhead associated
-with procedure calls (invocations).
-During a procedure call, several actions
-must be undertaken to set up the right
-environment for the called procedure.
-.[
-johnson calling sequence
-.]
-On return from the procedure, most of these
-effects must be undone.
-This entire process introduces significant
-costs in execution time as well as
-in object code size.
-.PP
-The inline substitution technique replaces
-some of the calls by the modified body of
-the called procedure, hence eliminating
-the overhead.
-Furthermore, as the calling and called procedure
-are now integrated, they can be optimized
-together, using other techniques of the optimizer.
-This often leads to extra opportunities for
-optimization
-.[
-ball predicting effects
-.]
-.[
-carter code generation cacm
-.]
-.[
-scheifler inline cacm
-.]
-.PP
-An inline substitution of a call to a procedure P increases
-the size of the program, unless P is very small or P is
-called only once.
-In the latter case, P can be eliminated.
-In practice, procedures that are called only once occur
-quite frequently, due to the
-introduction of structured programming.
-(Carter
-.[
-carter umi ann arbor
-.]
-states that almost 50% of the Pascal procedures
-he analyzed were called just once).
-.PP
-Scheifler
-.[
-scheifler inline cacm
-.]
-has a more general view of inline substitution.
-In his model, the program under consideration is
-allowed to grow by a certain amount,
-i.e. code size is sacrificed to speed up the program.
-The above two cases are just special cases of
-his model, obtained by setting the size-change to
-(approximately) zero.
-He formulates the substitution problem as follows:
-.IP
-"Given a program, a subset of all invocations,
-a maximum program size, and a maximum procedure size,
-find a sequence of substitutions that minimizes
-the expected execution time."
-.LP
-Scheifler shows that this problem is NP-complete
-.[~[
-aho hopcroft ullman analysis algorithms
-.], chapter 10]
-by reduction to the Knapsack Problem.
-Heuristics will have to be used to find a near-optimal
-solution.
-.PP
-In the following chapters we will extend
-Scheifler's view and adapt it to the EM Global Optimizer.
-We will first describe the transformations that have
-to be applied to the EM text when a call is substituted
-in line.
-Next we will examine in which cases inline substitution
-is not possible or desirable.
-Heuristics will be developed for
-chosing a good sequence of substitutions.
-These heuristics make no demand on the user
-(such as making profiles
-.[
-scheifler inline cacm
-.]
-or giving pragmats
-.[~[
-ichbiah ada military standard
-.], section 6.3.2]),
-although the model could easily be extended
-to use such information.
-Finally, we will discuss the implementation
-of the IL phase of the optimizer.
-.PP
-We will often use the term inline expansion
-as a synonym of inline substitution.
-.sp 0
-The inverse technique of procedure abstraction
-(automatic subroutine generation)
-.[
-shaffer subroutine generation
-.]
-will not be discussed in this report.
--- a/doc/ego/il/il2
+++ b/doc/ego/il/il2
@ -1,93 +0,0 @@
-.NH 2
-Parameters and local variables.
-.PP
-In the EM calling sequence, the calling procedure
-pushes its parameters on the stack
-before doing the CAL.
-The called routine first saves some
-status information on the stack and then
-allocates space for its own locals
-(also on the stack).
-Usually, one special purpose register,
-the Local Base (LB) register,
-is used to access both the locals and the
-parameters.
-If memory is highly segmented,
-the stack frames of the caller and the callee
-may be allocated in different fragments;
-an extra Argument Base (AB) register is used
-in this case to access the actual parameters.
-See 4.2 of
-.[
-keizer architecture
-.]
-for further details.
-.PP
-If a procedure call is expanded in line,
-there are two problems:
-.IP 1. 3
-No stack frame will be allocated for the called procedure;
-we must find another place to put its locals.
-.IP 2.
-The LB register cannot be used to access the actual
-parameters;
-as the CAL instruction is deleted, the LB will
-still point to the local base of the \fIcalling\fR procedure.
-.LP
-The local variables of the called procedure will
-be put in the stack frame of the calling procedure,
-just after its own locals.
-The size of the stack frame of the
-calling procedure will be increased
-during its entire lifetime.
-Therefore our model will allow a
-limit to be set on the number of bytes
-for locals that the called procedure may have
-(see next section).
-.PP
-There are several alternatives to access the parameters.
-An actual parameter may be any auxiliary expression,
-which we will refer to as
-the \fIactual parameter expression\fR.
-The value of this expression is stored
-in a location on the stack (see above),
-the \fIparameter location\fR.
-.sp 0
-The alternatives for accessing parameters are:
-.IP -
-save the value of the stackpointer at the point of the CAL
-in a temporary variable X;
-this variable can be used to simulate the AB register,  i.e.
-parameter locations are accessed via an offset to
-the value of X.
-.IP -
-create a new temporary local variable T for
-the parameter (in the stack frame of the caller);
-every access to the parameter location must be changed
-into an access to T.
-.IP -
-do not evaluate the actual parameter expression before the call;
-instead, substitute this expression for every use of the
-parameter location.
-.LP
-The first method may be expensive if X is not
-put in a register.
-We will not use this method.
-The time required to evaluate and access the
-parameters when the second method is used
-will not differ much from the normal
-calling sequence (i.e. not in line call).
-It is not expensive, but there are no
-extra savings either.
-The third method is essentially the 'by name'
-parameter mechanism of Algol60.
-If the actual parameter is just a numeric constant,
-it is advantageous to use it.
-Yet, there are several circumstances
-under which it cannot or should not be used.
-We will deal with this in the next section.
-.sp 0
-In general we will use the third method,
-if it is possible and desirable.
-Such parameters will be called \fIin line parameters\fR.
-In all other cases we will use the second method.
--- a/doc/ego/il/il3
+++ b/doc/ego/il/il3
@ -1,164 +0,0 @@
-.NH 2
-Feasibility and desirability analysis
-.PP
-Feasibility and desirability analysis
-of in line substitution differ
-somewhat from most other techniques.
-Usually, much effort is needed to find
-a feasible opportunity for optimization
-(e.g. a redundant subexpression).
-Desirability analysis then checks
-if it is really advantageous to do
-the optimization.
-For IL, opportunities are easy to find.
-To see if an in line expansion is
-desirable will not be hard either.
-Yet, the main problem is to find the most
-desirable ones.
-We will deal with this problem later and
-we will first attend feasibility and
-desirability analysis.
-.PP
-There are several reasons why a procedure invocation
-cannot or should not be expanded in line.
-.sp
-A call to a procedure P cannot be expanded in line
-in any of the following cases:
-.IP 1. 3
-The body of P is not available as EM text.
-Clearly, there is no way to do the substitution.
-.IP 2.
-P, or any procedure called by P (transitively),
-follows the chain of statically enclosing
-procedures (via a LXL or LXA instruction)
-or follows the chain of dynamically enclosing
-procedures (via a DCH).
-If the call were expanded in line,
-one level would be removed from the chains,
-leading to total chaos.
-This chaos could be solved by patching up
-every LXL, LXA or DCH in all procedures
-that could be part of the chains,
-but this is hard to implement.
-.IP 3.
-P, or any procedure called by P (transitively),
-calls a procedure whose body is not
-available as EM text.
-The unknown procedure may use an LXL, LXA or DCH.
-However, in several languages a separately
-compiled procedure has no access to the
-static or dynamic chain.
-In this case
-this point does not apply.
-.IP 4.
-P, or any procedure called by P (transitively),
-uses the LPB instruction, which converts a
-local base to an argument base;
-as the locals and parameters are stored
-in a non-standard way (differing from the
-normal EM calling sequence) this instruction
-would yield incorrect results.
-.IP 5.
-The total number of bytes of the parameters
-of P is not known.
-P may be a procedure with a variable number
-of parameters or may have an array of dynamic size
-as value parameter.
-.LP
-It is undesirable to expand a call to a procedure P in line
-in any of the following cases:
-.IP 1. 3
-P is large, i.e. the number of EM instructions
-of P exceeds some threshold.
-The expanded code would be large too.
-Furthermore, several programs in ACK,
-including the global optimizer itself,
-may run out of memory if they they have to run
-in a small address space and are provided
-very large procedures.
-The threshold may be set to infinite,
-in which case this point does not apply.
-.IP 2.
-P has many local variables.
-All these variables would have to be allocated
-in the stack frame of the calling procedure.
-.PP
-If a call may be expanded in line, we have to
-decide how to access its parameters.
-In the previous section we stated that we would
-use in line parameters whenever possible and desirable.
-There are several reasons why a parameter
-cannot or should not be expanded in line.
-.sp
-No parameter of a procedure P can be expanded in line,
-in any of the following cases:
-.IP 1. 3
-P, or any procedure called by P (transitively),
-does a store-indirect or a use-indirect (i.e. through
-a pointer).
-However, if the front-end has generated messages
-telling that certain parameters can not be accessed
-indirectly, those parameters may be expanded in line.
-.IP 2.
-P, or any procedure called by P (transitively),
-calls a procedure whose body is not available as EM text.
-The unknown procedure may do a store-indirect
-or a use-indirect.
-However, the same remark about front-end messages
-as for 1. holds here.
-.IP 3.
-The address of a parameter location is taken (via a LAL).
-In the normal calling sequence, all parameters
-are stored sequentially. If the address of one
-parameter location is taken, the address of any
-other parameter location can be computed from it.
-Hence we must put every parameter in a temporary location;
-furthermore, all these locations must be in
-the same order as for the normal calling sequence.
-.IP 4.
-P has overlapping parameters; for example, it uses
-the parameter at offset 10 both as a 2 byte and as a 4 byte
-parameter.
-Such code may be produced by the front ends if
-the formal parameter is of some record type
-with variants.
-.PP
-Sometimes a specific parameter must not be expanded in line.
-.sp 0
-An actual parameter expression cannot be expanded in line
-in any of the following cases:
-.IP 1. 3
-P stores into the parameter location.
-Even if the actual parameter expression is a simple
-variable, it is incorrect to change the 'store into
-formal' into a 'store into actual', because of
-the parameter mechanism used.
-In Pascal, the following expansion is incorrect:
-.DS
-procedure p (x:integer);
-begin
-   x := 20;
-end;
-\&...
-a := 10;                \kxa := 10;
-p(a);        --->       \h'|\nxu'a := 20;
-write(a);               \h'|\nxu'write(a);
-.DE
-.IP 2.
-P changes any of the operands of the
-actual parameter expression.
-If the expression is expanded and evaluated
-after the operand has been changed,
-the wrong value will be used.
-.IP 3.
-The actual parameter expression has side effects.
-It must be evaluated only once,
-at the place of the call.
-.LP
-It is undesirable to expand an actual parameter in line
-in the following case:
-.IP 1. 3
-The parameter is used more than once
-(dynamically) and the actual parameter expression
-is not just a simple variable or constant.
-.LP
--- a/doc/ego/il/il4
+++ b/doc/ego/il/il4
@ -1,135 +0,0 @@
-.NH 2
-Heuristic rules
-.PP
-Using the information described
-in the previous section,
-we can find all calls that can
-be expanded in line, and for which
-this expansion is desirable.
-In general, we cannot expand all these calls,
-so we have to choose the 'best' ones.
-With every CAL instruction
-that may be expanded, we associate
-a \fIpay off\fR,
-which expresses how desirable it is
-to expand this specific CAL.
-.sp
-Let Tc denote the portion of EM text involved
-in a specific call, i.e. the pushing of the actual
-parameter expressions, the CAL itself,
-the popping of the parameters and the
-pushing of the result (if any, via an LFR).
-Let Te denote the EM text that would be obtained
-by expanding the call in line.
-Let Pc be the original program and Pe the program
-with Te substituted for Tc.
-The pay off of the CAL depends on two factors:
-.IP -
-T = execution_time(Pe) - execution_time(Pc)
-.IP -
-S = code_size(Pe) - code_size(Pc)
-.LP
-The change in execution time (T) depends on:
-.IP -
-T1 = execution_time(Te) - execution_time(Tc)
-.IP -
-N = number of times Te or Tc get executed.
-.LP
-We assume that T1 will be the same every
-time the code gets executed.
-This is a reasonable assumption.
-(Note that we are talking about one CAL,
-not about different calls to the same procedure).
-Hence
-.DS
-T = N * T1
-.DE
-T1 can be estimated by a careful analysis
-of the transformations that are performed.
-Below, we list everything that will be
-different when a call is expanded in line:
-.IP -
-The CAL instruction is not executed.
-This saves a subroutine jump.
-.IP -
-The instructions in the procedure prolog
-are not executed.
-These instructions, generated from the PRO pseudo,
-save some machine registers 
-(including the old LB), set the new LB and allocate space
-for the locals of the called routine.
-The savings may be less if there are no
-locals to allocate.
-.IP -
-In line parameters are not evaluated before the call
-and are not pushed on the stack.
-.IP -
-All remaining parameters are stored in local variables,
-instead of being pushed on the stack.
-.IP -
-If the number of parameters is nonzero,
-the ASP instruction after the CAL is not executed.
-.IP -
-Every reference to an in line parameter is
-substituted by the parameter expression.
-.IP -
-RET (return) instructions are replaced by
-BRA (branch) instructions.
-If the called procedure 'falls through'
-(i.e. it has only one RET, at the end of its code),
-even the BRA is not needed.
-.IP -
-The LFR (fetch function result) is not executed
-.PP
-Besides these changes, which are caused directly by IL,
-other changes may occur as IL influences other optimization
-techniques, such as Register Allocation and Constant Propagation.
-Our heuristic rules do not take into account the quite
-inpredictable effects on Register Allocation.
-It does, however, favour calls that have numeric \fIconstants\fR
-as parameter; especially the constant "0" as an inline
-parameter gets high scores,
-as further optimizations may often be possible.
-.PP
-It cannot be determined statically how often a CAL instruction gets
-executed.
-We will use \fIloop nesting\fR information here.
-The nesting level of the loop in which
-the CAL appears (if any) will be used as an
-indication for the number of times it gets executed.
-.PP
-Based on all these facts,
-the pay off of a call will be computed.
-The following model was developed empirically.
-Assume procedure P calls procedure Q.
-The call takes place in basic block B.
-.DS
-.TS
-l l l.
-ZP	\&=	# zero parameters
-CP	\&=	# constant parameters - ZP
-LN	\&=	Loop Nesting level (0 if outside any loop)
-F	\&=	\fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0
-FT	\&=	\fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0
-S	\&=	size(Q) - 1 - # inline_parameters - F
-L	\&=	\fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1
-A	\&=	CP + 2 * ZP
-N	\&=	\fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2
-FM	\&=	\fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1
-
-pay_off	\&=	(100/S + FT + F + L + A) * N * FM
-.TE
-.DE
-S stands for the size increase of the program,
-which is slightly less than the size of Q.
-The size of a procedure is taken to be its number
-of (non-pseudo) EM instructions.
-The terms "loop nesting level" and "firm" were defined
-in the chapter on the Intermediate Code (section "loop tables").
-If a call is not inside a loop and the calling procedure
-is itself never called from a loop (transitively),
-then the call will probably be executed at most once.
-Such a call is never expanded in line (its pay off is zero).
-If the calling procedure doesn't have local variables, a penalty (L)
-is introduced, as it will most likely get local variables if the
-call gets expanded.
--- a/doc/ego/il/il5
+++ b/doc/ego/il/il5
@ -1,446 +0,0 @@
-.NH 2
-Implementation
-.PP
-A major factor in the implementation
-of Inline Substitution is the requirement
-not to use an excessive amount of memory.
-IL essentially analyzes the entire program;
-it makes decisions based on which procedure calls
-appear in the whole program.
-Yet, because of the memory restriction, it is
-not feasible to read the entire program
-in main memory.
-To solve this problem, the IL phase has been
-split up into three subphases that are executed sequentially:
-.IP 1.
-analyze every procedure; see how it accesses its parameters;
-simultaneously collect all calls
-appearing in the whole program an put them
-in a \fIcall-list\fR.
-.IP 2.
-use the call-list and decide which calls will be substituted
-in line.
-.IP 3.
-take the decisions of subphase 2 and modify the
-program accordingly.
-.LP
-Subphases 1 and 3 scan the input program; only
-subphase 3 modifies it.
-It is essential that the decisions can be made
-in subphase 2
-without using the input program,
-provided that subphase 1 puts enough information
-in the call-list.
-Subphase 2 keeps the entire call-list in main memory
-and repeatedly scans it, to
-find the next best candidate for expansion.
-.PP
-We will specify the
-data structures used by IL before 
-describing the subphases.
-.NH 3
-Data structures
-.NH 4
-The procedure table
-.PP
-In subphase 1 information is gathered about every procedure
-and added to the procedure table.
-This information is used by the heuristic rules.
-A proctable entry for procedure p has
-the following extra information:
-.IP -
-is it allowed to substitute an invocation of p in line?
-.IP -
-is it allowed to put any parameter of such a call in line?
-.IP -
-the size of p (number of EM instructions)
-.IP -
-does p 'fall through'?
-.IP -
-a description of the formal parameters that p accesses; this information
-is obtained by looking at the code of p. For every parameter f,
-we record:
-.RS
-.IP -
-the offset of f
-.IP -
-the type of f (word, double word, pointer)
-.IP -
-may the corresponding actual parameter be put in line?
-.IP -
-is f ever accessed indirectly?
-.IP -
-if f used: never, once or more than once?
-.RE
-.IP -
-the number of times p is called (see below)
-.IP -
-the file address of its call-count information (see below).
-.LP
-.NH 4
-Call-count information
-.PP
-As a result of Inline Substitution, some procedures may
-become useless, because all their invocations have been
-substituted in line.
-One of the tasks of IL is to keep track which
-procedures are no longer called.
-Note that IL is especially keen on procedures that are
-called only once
-(possibly as a result of expanding all other calls to it).
-So we want to know how many times a procedure
-is called \fIduring\fR Inline Substitution.
-It is not good enough to compute this
-information afterwards.
-The task is rather complex, because
-the number of times a procedure is called
-varies during the entire process:
-.IP 1.
-If a call to p is substituted in line,
-the number of calls to p gets decremented by 1.
-.IP 2.
-If a call to p is substituted in line,
-and p contains n calls to q, then the number of calls to q
-gets incremented by n.
-.IP 3.
-If a procedure p is removed (because it is no
-longer called) and p contains n calls to q,
-then the number of calls to q gets decremented by n.
-.LP
-(Note that p may be the same as q, if p is recursive).
-.sp 0
-So we actually want to have the following information:
-.DS
-NRCALL(p,q) = number of call to q appearing in p,
-
-for all procedures p and q that may be put in line.
-.DE
-This information, called \fIcall-count information\fR is
-computed by the first subphase.
-It is stored in a file.
-It is represented as a number of lists, rather than as
-a (very sparse) matrix.
-Every procedure has a list of (proc,count) pairs,
-telling which procedures it calls, and how many times.
-The file address of its call-count list is stored
-in its proctable entry.
-Whenever this information is needed, it is fetched from
-the file, using direct access.
-The proctable entry also contains the number of times
-a procedure is called, at any moment.
-.NH 4
-The call-list
-.PP
-The call-list is the major data structure use by IL.
-Every item of the list describes one procedure call.
-It contains the following attributes:
-.IP -
-the calling procedure (caller)
-.IP -
-the called procedure (callee)
-.IP -
-identification of the CAL instruction (sequence number)
-.IP -
-the loop nesting level; our heuristic rules appreciate
-calls inside a loop (or even inside a loop nested inside
-another loop, etc.) more than other calls
-.IP -
-the actual parameter expressions involved in the call;
-for every actual, we record:
-.RS
-.IP -
-the EM code of the expression
-.IP -
-the number of bytes of its result (size)
-.IP -
-an indication if the actual may be put in line
-.RE
-.LP
-The structure of the call-list is rather complex.
-Whenever a call is expanded in line, new calls
-will suddenly appear in the program,
-that were not contained in the original body
-of the calling subroutine.
-These calls are inherited from the called procedure.
-We will refer to these invocations as \fInested calls\fR
-(see Fig. 5.1).
-.DS
-.TS
-lw(2.5i) l.
-procedure p is
-begin	.
-     a();	.
-     b();	.
-end;
-.TE
-
-.TS
-lw(2.5i) l.
-procedure r is	procedure r is
-begin	begin
-     x();	    x();
-     p();  -- in line	    a();  -- nested call
-     y();	    b();  -- nested call
-end;	    y();
-	end;
-.TE
-
-Fig. 5.1 Example of nested procedure calls
-.DE
-Nested calls may subsequently be put in line too
-(probably resulting in a yet deeper nesting level, etc.).
-So the call-list does not always reflect the source program,
-but changes dynamically, as decisions are made.
-If a call to p is expanded, all calls appearing in p
-will be added to the call-list.
-.sp 0
-A convenient and elegant way to represent
-the call-list is to use a LISP-like list.
-.[
-poel lisp trac
-.]
-Calls that appear at the same level
-are linked in the CDR direction. If a call C
-to a procedure p is expanded,
-all calls appearing in p are put in a sub-list
-of C, i.e. in its CAR.
-In the example above, before the decision
-to expand the call to p is made, the
-call-list of procedure r looks like:
-.DS
-(call-to-x, call-to-p, call-to-y)
-.DE
-After the decision, it looks like:
-.DS
-(call-to-x, (call-to-p*, call-to-a, call-to-b), call-to-y)
-.DE
-The call to p is marked, because it has been
-substituted.
-Whenever IL wants to traverse the call-list of some procedure,
-it uses the well-known LISP technique of
-recursion in the CAR direction and
-iteration in the CDR direction
-(see page 1.19-2 of
-.[
-poel lisp trac
-.]
-).
-All list traversals look like:
-.DS
-traverse(list)
-{
-    for (c = first(list); c != 0; c = CDR(c)) {
-        if (c is marked) {
-            traverse(CAR(c));
-        } else {
-            do something with c
-        }
-    }
-}
-.DE
-The entire call-list consists of a number of LISP-like lists,
-one for every procedure.
-The proctable entry of a procedure contains a pointer
-to the beginning of the list.
-.NH 3
-The first subphase: procedure analysis
-.PP
-The tasks of the first subphase are to determine
-several attributes of every procedure
-and to construct the basic call-list,
-i.e. without nested calls.
-The size of a procedure is determined
-by simply counting its EM instructions.
-Pseudo instructions are skipped.
-A procedure does not 'fall through' if its CFG
-contains a basic block
-that is not the last block of the CFG and
-that ends on a RET instruction.
-The formal parameters of a procedure are determined
-by inspection of
-its code.
-.PP
-The call-list in constructed by looking at all CAL instructions
-appearing in the program.
-The call-list should only contain calls to procedures
-that may be put in line.
-This fact is only known if the procedure was
-analyzed earlier.
-If a call to a procedure p appears in the program
-before the body of p,
-the call will always be put in the call-list.
-If p is later found to be unsuitable,
-the call will be removed from the list by the
-second subphase.
-.PP
-An important issue is the recognition
-of the actual parameter expressions of the call.
-The front ends produces messages telling how many
-bytes of formal parameters every procedure accesses.
-(If there is no such message for a procedure, it
-cannot be put in line).
-The actual parameters together must account for
-the same number of bytes.A recursive descent parser is used
-to parse side-effect free EM expressions.
-It uses a table and some
-auxiliary routines to determine
-how many bytes every EM instruction pops from the stack
-and how many bytes it pushes onto the stack.
-These numbers depend on the EM instruction, its argument,
-and the wordsize and pointersize of the target machine.
-Initially, the parser has to recognize the
-number of bytes specified in the formals-message,
-say N.
-Assume the first instruction before the CAL pops S bytes
-and pushes R bytes.
-If R > N, too many bytes are recognized
-and the parser fails.
-Else, it calls itself recursively to recognize the
-S bytes used as operand of the instruction.
-If it succeeds in doing so, it continues with the next instruction,
-i.e. the first instruction before the code recognized by
-the recursive call, to recognize N-R more bytes.
-The result is a number of EM instructions that collectively push N bytes.
-If an instruction is come across that has side-effects
-(e.g. a store or a procedure call) or of which R and S cannot
-be computed statically (e.g. a LOS), it fails.
-.sp 0
-Note that the parser traverses the code backwards.
-As EM code is essentially postfix code, the parser works top down.
-.PP
-If the parser fails to recognize the parameters, the call will not
-be substituted in line.
-If the parameters can be determined, they still have to
-match the formal parameters of the called procedure.
-This check is performed by the second subphase; it cannot be
-done here, because it is possible that the called
-procedure has not been analyzed yet.
-.PP
-The entire call-list is written to a file,
-to be processed by the second subphase.
-.NH 3
-The second subphase: making decisions
-.PP
-The task of the second subphase is quite easy
-to understand.
-It reads the call-list file,
-builds an incore call-list and deletes every
-call that may not be expanded in line (either because the called
-procedure may not be put in line, or because the actual parameters
-of the call do not match the formal parameters of the called procedure).
-It assigns a \fIpay-off\fR to every call,
-indicating how desirable it is to expand it.
-.PP
-The subphase repeatedly scans the call-list and takes
-the call with the highest ratio.
-The chosen one gets marked,
-and the call-list is extended with the nested calls,
-as described above.
-These nested calls are also assigned a ratio,
-and will be considered too during the next scans.
-.sp 0
-After every decision the number of times
-every procedure is called is updated, using
-the call-count information.
-Meanwhile, the subphase keeps track of the amount of space left
-available.
-If all space is used, or if there are no more calls left to
-be expanded, it exits this loop.
-Finally, calls to procedures that are called only
-once are also chosen.
-.PP
-The actual parameters of a call are only needed by
-this subphase to assign a ratio to a call.
-To save some space, these actuals are not kept in main memory.
-They are removed after the call has been read and a ratio
-has been assigned to it.
-So this subphase works with \fIabstracts\fR of calls.
-After all work has been done,
-the actual parameters of the chosen calls are retrieved
-from a file,
-as they are needed by the transformation subphase.
-.NH 3
-The third subphase: doing transformations
-.PP
-The third subphase makes the actual modifications to
-the EM text.
-It is directed by the decisions made in the previous subphase,
-as expressed via the call-list.
-The call-list read by this subphase contains
-only calls that were selected for expansion.
-The list is ordered in the same way as the EM text,
-i.e. if a call C1 appears before a call C2 in the call-list,
-C1 also appears before C2 in the EM text.
-So the EM text is traversed linearly,
-the calls that have to be substituted are determined
-and the modifications are made.
-If a procedure is come across that is no longer needed,
-it is simply not written to the output EM file.
-The substitution of a call takes place in distinct steps:
-.IP "change the calling sequence" 7
-.sp 0
-The actual parameter expressions are changed.
-Parameters that are put in line are removed.
-All remaining ones must store their result in a
-temporary local variable, rather than
-push it on the stack.
-The CAL instruction and any ASP (to pop actual parameters)
-or LFR (to fetch the result of a function)
-are deleted.
-.IP "fetch the text of the called procedure"
-.sp 0
-Direct disk access is used to to read the text of the
-called procedure.
-The file offset is obtained from the proctable entry.
-.IP "allocate bytes for locals and temporaries"
-.sp 0
-The local variables of the called procedure will be put in the
-stack frame of the calling procedure.
-The same applies to any temporary variables
-that hold the result of parameters
-that were not put in line.
-The proctable entry of the caller is updated.
-.IP "put a label after the CAL"
-.sp 0
-If the called procedure contains a RET (return) instruction
-somewhere in the middle of its text (i.e. it does
-not fall through), the RET must be changed into
-a BRA (branch), to jump over the
-remainder of the text.
-This label is not needed if the called
-procedure falls through.
-.IP "copy the text of the called procedure and modify it"
-.sp 0
-References to local variables of the called routine
-and to parameters that are not put in line
-are changed to refer to the
-new local of the caller.
-References to in line parameters are replaced
-by the actual parameter expression.
-Returns (RETs) are either deleted or
-replaced by a BRA.
-Messages containing information about local
-variables or parameters are changed.
-Global data declarations and the PRO and END pseudos
-are removed.
-Instruction labels and references to them are
-changed to make sure they do not have the
-same identifying number as
-labels in the calling procedure.
-.IP "insert the modified text"
-.sp 0
-The pseudos of the called procedure are put after the pseudos
-of the calling procedure.
-The real text of the callee is put at
-the place where the CAL was.
-.IP "take care of nested substitutions"
-.sp 0
-The expanded procedure may contain calls that
-have to be expanded too (nested calls).
-If the descriptor of this call contains actual
-parameter expressions,
-the code of the expressions has to be changed
-the same way as the code of the callee was changed.
-Next, the entire process of finding CALs and doing
-the substitutions is repeated recursively.
-.LP
--- a/doc/ego/il/il6
+++ b/doc/ego/il/il6
@ -1,27 +0,0 @@
-.NH 2
-Source files of IL
-.PP
-The sources of IL are in the following files
-and packages (the prefixes 1_, 2_ and 3_ refer to the three subphases):
-.IP il.h: 14
-declarations of global variables and
-data structures
-.IP il.c:
-the routine main; the driving routines of the three subphases
-.IP 1_anal:
-contains a subroutine that analyzes a procedure
-.IP 1_cal:
-contains a subroutine that analyzes a call
-.IP 1_aux:
-implements auxiliary procedures used by subphase 1
-.IP 2_aux:
-implements auxiliary procedures used by subphase 2
-.IP 3_subst:
-the driving routine for doing the substitution
-.IP 3_change:
-lower level routines that do certain modifications
-.IP 3_aux:
-implements auxiliary procedures used by subphase 3
-.IP aux:
-implements auxiliary procedures used by several subphases.
-.LP
--- a/doc/ego/intro/.distr
+++ b/doc/ego/intro/.distr
@ -1,3 +0,0 @@
-head
-intro1
-tail
--- a/doc/ego/intro/head
+++ b/doc/ego/intro/head
@ -1,10 +0,0 @@
-.ND
-.\".ll 80m
-.\".nr LL 80m
-.\".nr tl 78m
-.tr ~ 
-.ds >. .
-.ds >, ,
-.ds [. " [
-.ds .] ]
-.cs 5 22
--- a/doc/ego/intro/intro1
+++ b/doc/ego/intro/intro1
@ -1,79 +0,0 @@
-.TL
-The design and implementation of
-the EM Global Optimizer
-.AU
-H.E. Bal
-.AI
-Vrije Universiteit
-Wiskundig Seminarium, Amsterdam
-.AB
-The EM Global Optimizer is part of the Amsterdam Compiler Kit,
-a toolkit for making retargetable compilers.
-It optimizes the intermediate code common to all compilers of
-the toolkit (EM),
-so it can be used for all programming languages and
-all processors supported by the kit.
-.PP
-The optimizer is based on well-understood concepts like
-control flow analysis and data flow analysis.
-It performs the following optimizations:
-Inline Substitution, Strength Reduction, Common Subexpression Elimination,
-Stack Pollution, Cross Jumping, Branch Optimization, Copy Propagation,
-Constant Propagation, Dead Code Elimination and Register Allocation.
-.PP
-This report describes the design of the optimizer and several
-of its implementation issues.
-.AE
-.bp
-.NH 1
-Introduction
-.PP
-.FS
-This work was supported by the
-Stichting Technische Wetenschappen (STW)
-under grant VWI00.0001.
-.FE
-The EM Global Optimizer is part of a software toolkit
-for making production-quality retargetable compilers.
-This toolkit,
-called the Amsterdam Compiler Kit
-.[
-tanenbaum toolkit rapport
-.]
-.[
-tanenbaum toolkit cacm
-.]
-runs under the Unix*
-.FS
-*Unix is a Trademark of Bell Laboratories
-.FE
-operating system.
-.sp 0
-The main design philosophy of the toolkit is to use
-a language- and machine-independent
-intermediate code, called EM.
-.[
-keizer architecture
-.]
-The basic compilation process can be split up into
-two parts.
-A language-specific front end translates the source program into EM.
-A machine-specific back end transforms EM to assembly code
-of the target machine.
-.PP
-The global optimizer is an optional phase of the
-compilation process, and can be used to obtain
-machine code of a higher quality.
-The optimizer transforms EM-code to better EM-code,
-so it comes between the front end and the back end.
-It can be used with any combination of languages
-and machines, as far as they are supported by
-the compiler kit.
-.PP
-This report describes the design of the
-global optimizer and several of its
-implementation issues.
-Measurements can be found in.
-.[
-bal tanenbaum global
-.]
--- a/doc/ego/intro/tail
+++ b/doc/ego/intro/tail
@ -1,17 +0,0 @@
-.SH
-Acknowledgements
-.PP
-The author would like to thank Andy Tanenbaum for his guidance,
-Duk Bekema for implementing the Common Subexpression Elimination phase
-and writing the initial documentation of that phase,
-Dick Grune for reading the manuscript of this report
-and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren
-and the members of the S.T.W. user's group for their
-interest and assistance.
-.bp
-.SH
-References
-.LP
-.[
-$LIST$
-.]
--- a/doc/ego/lv/.distr
+++ b/doc/ego/lv/.distr
@ -1 +0,0 @@
-lv1
--- a/doc/ego/lv/lv1
+++ b/doc/ego/lv/lv1
@ -1,95 +0,0 @@
-.bp
-.NH 1
-Live-Variable analysis
-.NH 2
-Introduction
-.PP
-The "Live-Variable analysis" optimization technique (LV)
-performs some code improvements and computes information that may be
-used by subsequent optimizations.
-The main task of this phase is the 
-computation of \fIlive-variable information\fR.
-.[~[
-aho compiler design
-.] section 14.4]
-A variable A is said to be \fIdead\fR at some point p of the
-program text, if on no path in the control flow graph
-from p to a RET (return), A can be used before being changed;
-else A is said to be \fIlive\fR. 
-.PP
-A statement of the form
-.DS
-VARIABLE := EXPRESSION
-.DE
-is said to be dead if the left hand side variable is dead just after
-the statement and the right hand side expression has no
-side effects (i.e. it doesn't change any variable).
-Such a statement can be eliminated entirely.
-Dead code will seldom be present in the original program,
-but it may be the result of earlier optimizations,
-such as copy propagation.
-.PP
-Live-variable information is passed to other phases via
-messages in the EM code.
-Live/dead messages are generated at points in the EM text where
-variables become dead or live.
-This information is especially useful for the Register
-Allocation phase.
-.NH 2
-Implementation
-.PP
-The implementation uses algorithm 14.6 of.
-.[
-aho compiler design
-.]
-First two sets DEF and USE are computed for every basic block b:
-.IP DEF(b) 9
-the set of all variables that are assigned a value in b before
-being used
-.IP USE(b) 9
-the set of all variables that may be used in b before being changed.
-.LP
-(So variables that may, but need not, be used resp. changed via a procedure
-call or through a pointer are included in USE but not in DEF).
-The next step is to compute the sets IN and OUT :
-.IP IN[b] 9
-the set of all variables that are live at the beginning of b
-.IP OUT[b] 9
-the set of all variables that are live at the end of b
-.LP
-IN and OUT can be computed for all blocks simultaneously by solving the
-data flow equations:
-.DS
-(1)   IN[b] = OUT[b] - DEF[b] + USE[b]
-[2]   OUT[b] = IN[s1] + ... + IN[sn] ;
-	where SUCC[b] = {s1, ... , sn}
-.DE
-The equations are solved by a similar algorithm as for
-the Use Definition equations (see previous chapter).
-.PP
-Finally, each basic block is visited in turn to remove its dead code
-and to emit the live/dead messages.
-Every basic block b is traversed from its last
-instruction backwards to the beginning of b.
-Initially, all variables that are dead at the end
-of b are marked dead. All others are marked live.
-If we come across an assignment to a variable X that
-was marked live, a live-message is put after the
-assignment and X is marked dead;
-if X was marked dead, the assignment may be removed, provided that
-the right hand side expression contains no side effects.
-If we come across a use of a variable X that
-was marked dead, a dead-message is put after the
-use and X is marked live.
-So at any point, the mark of X tells whether X is
-live or dead immediately before that point.
-A message is also generated at the start of a basic block
-for every variable that was live at the end of the (textually)
-previous block, but dead at the entry of this block, or v.v.
-.PP
-Only local variables are considered.
-This significantly reduces the memory needed by this phase,
-eases the implementation and is hardly less efficient than
-considering all variables.
-(Note that it is very hard to prove that an assignment to
-a global variable is dead).
--- a/doc/ego/ov/.distr
+++ b/doc/ego/ov/.distr
@ -1 +0,0 @@
-ov1
--- a/doc/ego/ov/ov1
+++ b/doc/ego/ov/ov1
@ -1,374 +0,0 @@
-.bp
-.NH 1
-Overview of the global optimizer
-.NH 2
-The ACK compilation process
-.PP
-The EM Global Optimizer is one of three optimizers that are
-part of the Amsterdam Compiler Kit (ACK).
-The phases of ACK are:
-.IP 1.
-A Front End translates a source program to EM
-.IP 2.
-The Peephole Optimizer
-.[
-tanenbaum staveren peephole toplass
-.]
-reads EM code and produces 'better' EM code.
-It performs a number of optimizations (mostly peephole
-optimizations)
-such as constant folding, strength reduction and unreachable code
-elimination.
-.IP 3.
-The Global Optimizer further improves the EM code.
-.IP 4.
-The Code Generator transforms EM to assembly code
-of the target computer.
-.IP 5.
-The Target Optimizer improves the assembly code.
-.IP 6.
-An Assembler/Loader generates an executable file.
-.LP
-For a more extensive overview of the ACK compilation process,
-we refer to.
-.[
-tanenbaum toolkit rapport
-.]
-.[
-tanenbaum toolkit cacm
-.]
-.PP
-The input of the Global Optimizer may consist of files and
-libraries.
-Every file or module in the library must contain EM code in
-Compact Assembly Language format.
-.[~[
-tanenbaum machine architecture
-.], section 11.2]
-The output consists of one such EM file.
-The input files and libraries together need not
-constitute an entire program,
-although as much of the program as possible should be supplied.
-The more information about the program the optimizer 
-gets, the better its output code will be.
-.PP
-The Global Optimizer is language- and machine-independent,
-i.e. it can be used for all languages and machines supported by ACK.
-Yet, it puts some unavoidable restrictions on the EM code
-produced by the Front End (see below).
-It must have some knowledge of the target machine.
-This knowledge is expressed in a machine description table
-which is passed as argument to the optimizer.
-This table does not contain very detailed information about the
-target (such as its instruction set and addressing modes).
-.NH 2
-The EM code
-.PP
-The definition of EM, the intermediate code of all ACK compilers,
-is given in a separate document.
-.[
-tanenbaum machine architecture
-.]
-We will only discuss some features of EM that are most relevant
-to the Global Optimizer.
-.PP
-EM is the assembly code of a virtual \fIstack machine\fR.
-All operations are performed on the top of the stack.
-For example, the statement "A := B + 3" may be expressed in EM as:
-.DS
-.TS
-l l.
-LOL -4	-- push local variable B
-LOC 3	-- push constant 3
-ADI 2	-- add two 2-byte items on top of
-	-- the stack and push the result
-STL -2	-- pop A
-.TE
-.DE
-So EM is essentially a \fIpostfix\fR code.
-.PP
-EM has a rich instruction set, containing several arithmetic
-and logical operators.
-It also contains special-case instructions (such as INCrement).
-.PP
-EM has \fIglobal\fR (\fIexternal\fR) variables, accessible
-by all procedures and \fIlocal\fR variables, accessible by a few
-(nested) procedures.
-The local variables of a lexically enclosing procedure may
-be accessed via a \fIstatic link\fR. 
-EM has instructions to follow the static chain.
-There are EM instruction to allow a procedure
-to access its local variables directly (such as LOL and STL above).
-Local variables are referenced via an offset in the stack frame
-of the procedure, rather than by their names (e.g. -2 and -4 above).
-The EM code does not contain the (source language) type
-of the variables.
-.PP
-All structured statements in the source program are expressed in
-low level jump instructions.
-Besides conditional and unconditional branch instructions, there are 
-two case instructions (CSA and CSB),
-to allow efficient translation of case statements.
-.NH 2
-Requirements on the EM input
-.PP
-As the optimizer should be useful for all languages,
-it clearly should not put severe restrictions on the EM code
-of the input.
-There is, however, one immovable requirement:
-it must be possible to determine the \fIflow of control\fR of the
-input program.
-As virtually all global optimizations are based on control flow information,
-the optimizer would be totally powerless without it.
-For this reason we restrict the usage of the case jump instructions (CSA/CSB)
-of EM.
-Such an instruction is always called with the address of a case descriptor
-on top the the stack.
-.[~[
-tanenbaum machine architecture
-.] section 7.4]
-This descriptor contains the labels of all possible
-destinations of the jump.
-We demand that all case descriptors are allocated in a global
-data fragment of type ROM, i.e. the case descriptors
-may not be modifyable.
-Furthermore, any case instruction should be immediately preceded by
-a LAE (Load Address External) instruction, that loads the
-address of the descriptor,
-so the descriptor can be uniquely identified.
-.PP
-The optimizer will work improperly if the user deceives the control flow.
-We will give two methods to do this.
-.PP
-In "C" the notorious library routines "setjmp" and "longjmp"
-.[
-unix programmer's manual McIlroy
-.]
-may be used to jump out of a procedure,
-but can also be used for a number of other stuffy purposes,
-for example, to create an extra entry point in a loop.
-.DS
- while (condition) {
-	 ....
-	 setjmp(buf);
-	 ...
- }
- ...
- longjmp(buf);
-.DE
-The invocation to longjmp actually is a jump to the place of
-the last call to setjmp with the same argument (buf).
-As the calls to setjmp and longjmp are indistinguishable from
-normal procedure calls, the optimizer will not see the danger.
-No need to say that several loop optimizations will behave
-unexpectedly when presented with such pathological input.
-.PP
-Another way to deceive the flow of control is
-by using exception handling routines.
-Ada*
-.FS
-* Ada is a registered trademark of the U.S. Government
-(Ada Joint Program Office).
-.FE
-has clearly recognized the dangers of exception handling,
-but other languages (such as PL/I) have not.
-.[
-ada rationale
-.]
-.PP
-The optimizer will be more effective if the EM input contains
-some extra information about the source program.
-Especially the \fIregister message\fR is very important.
-These messages indicate which local variables may never be
-accessed indirectly.
-Most optimizations benefit significantly by this information.
-.PP
-The Inline Substitution technique needs to know how many bytes
-of formal parameters every procedure accesses.
-Only calls to procedures for which the EM code contains this information
-will be substituted in line.
-.NH 2
-Structure of the optimizer
-.PP
-The Global Optimizer is organized as a number of \fIphases\fR,
-each one performing some task.
-The main structure is as follows:
-.IP IC 6
-the Intermediate Code construction phase transforms EM into the
-intermediate code (ic) of the optimizer
-.IP CF
-the Control Flow phase extends the ic with control flow
-information and interprocedural information
-.IP OPTs
-zero or more optimization phases, each one performing one or
-more related optimizations
-.IP CA
-the Compact Assembly phase generates Compact Assembly Language EM code
-out of ic.
-.LP
-.PP
-An important issue in the design of a global optimizer is the
-interaction between optimization techniques.
-It is often advantageous to combine several techniques in
-one algorithm that takes into account all interactions between them.
-Ideally, one single algorithm should be developed that does
-all optimizations simultaneously and deals with all possible interactions.
-In practice, such an algorithm is still far out of  reach.
-Instead some rather ad hoc (albeit important) combinations are chosen,
-such as Common Subexpression Elimination and Register Allocation.
-.[
-prabhala sethi common subexpressions
-.]
-.[
-sethi ullman optimal code
-.]
-.PP
-In the Em Global Optimizer there is one separate algorithm for
-every technique.
-Note that this does not mean that all techniques are independent
-of each other.
-.PP
-In principle, the optimization phases can be run in any order;
-a phase may even be run more than once.
-However, the following rules should be obeyed:
-.IP -
-the Live Variable analysis phase (LV) must be run prior to
-Register Allocation (RA), as RA uses information outputted by LV.
-.IP -
-RA should be the last phase; this is a consequence of the way
-the interface between RA and the Code Generator is defined.
-.LP
-The ordering of the phases has significant impact on
-the quality of the produced code.
-In
-.[
-wulf overview production quality carnegie-mellon
-.]
-two kinds of phase ordering problems are distinguished.
-If two techniques A and B both take away opportunities of each other,
-there is a "negative" ordering problem.
-If, on the other hand, both A and B introduce new optimization
-opportunities for each other, the problem is called "positive".
-In the Global Optimizer the following interactions must be
-taken into account:
-.IP -
-Inline Substitution (IL) may create new opportunities for most
-other techniques, so it should be run as early as possible
-.IP -
-Use Definition analysis (UD) may introduce opportunities for LV.
-.IP -
-Strength Reduction may create opportunities for UD
-.LP
-The optimizer has a default phase ordering, which can
-be changed by the user.
-.NH 2
-Structure of this document
-.PP
-The remaining chapters of this document each describe one
-phase of the optimizer.
-For every phase, we describe its task, its design,
-its implementation, and its source files.
-The latter two sections are intended to aid the
-maintenance of the optimizer and
-can be skipped by the initial reader.
-.NH 2
-References
-.PP
-There are very 
-few modern textbooks on optimization.
-Chapters 12, 13, and 14 of
-.[
-aho compiler design
-.]
-are a good introduction to the subject.
-Wulf et. al.
-.[
-wulf optimizing compiler
-.]
-describe one specific optimizing (Bliss) compiler.
-Anklam et. al.
-.[
-anklam vax-11
-.]
-discuss code generation and optimization in
-compilers for one specific machine (a Vax-11).
-Kirchgaesner et. al. 
-.[
-optimizing ada compiler
-.]
-present a brief description of many
-optimizations; the report also contains a lengthy (over 60 pages)
-bibliography.
-.PP
-The number of articles on optimization is quite impressive.
-The Lowry and Medlock paper on the Fortran H compiler
-.[
-object code optimization Lowry Medlock
-.]
-is a classical one.
-Other papers on global optimization are.
-.[
-faiman optimizing pascal
-.]
-.[
-perkins sites
-.]
-.[
-harrison general purpose optimizing
-.]
-.[
-morel partial redundancies
-.]
-.[
-Mintz global optimizer
-.]
-Freudenberger
-.[
-freudenberger setl optimizer
-.]
-describes an optimizer for a Very High Level Language (SETL).
-The Production-Quality Compiler-Compiler (PQCC) project uses
-very sophisticated compiler techniques, as described in.
-.[
-wulf overview ieee
-.]
-.[
-wulf overview carnegie-mellon
-.]
-.[
-wulf machine-relative
-.]
-.PP
-Several Ph.D. theses are dedicated to optimization.
-Davidson
-.[
-davidson simplifying
-.]
-outlines a machine-independent peephole optimizer that
-improves assembly code.
-Katkus
-.[
-katkus
-.]
-describes how efficient programs can be obtained at little cost by
-optimizing only a small part of a program.
-Photopoulos
-.[
-photopoulos mixed code
-.]
-discusses the idea of generating interpreted intermediate code as well
-as assembly code, to obtain programs that are both small and  fast.
-Shaffer
-.[
-shaffer automatic
-.]
-describes the theory of automatic subroutine generation.
-.]
-Leverett
-.[
-leverett register allocation compilers
-.]
-deals with register allocation in the PQCC compilers.
-.PP
-References to articles about specific optimization techniques
-will be given in later chapters.
--- a/doc/ego/proto.make
+++ b/doc/ego/proto.make
@ -1,64 +0,0 @@
-# $Id$
-
-#PARAMS         do not remove this line!
-
-SRC_DIR = $(SRC_HOME)/doc/ego
-
-REFS=-p $(SRC_DIR)/refs.opt -p $(SRC_DIR)/refs.stat -p $(SRC_DIR)/refs.gen
-REFFILES = $(SRC_DIR)/refs.opt $(SRC_DIR)/refs.stat $(SRC_DIR)/refs.gen
-INTRO=$(SRC_DIR)/intro/intro?
-OV=$(SRC_DIR)/ov/ov?
-IC=$(SRC_DIR)/ic/ic?
-CF=$(SRC_DIR)/cf/cf?
-IL=$(SRC_DIR)/il/il?
-SR=$(SRC_DIR)/sr/sr?
-CS=$(SRC_DIR)/cs/cs?
-SP=$(SRC_DIR)/sp/sp?
-UD=$(SRC_DIR)/ud/ud?
-LV=$(SRC_DIR)/lv/lv?
-CJ=$(SRC_DIR)/cj/cj?
-BO=$(SRC_DIR)/bo/bo?
-RA=$(SRC_DIR)/ra/ra?
-CA=$(SRC_DIR)/ca/ca?
-EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \
-    $(UD) $(LV) $(RA) $(CA)
-REFER=refer
-TROFF=troff
-TBL=tbl
-TARGET=-Tlp
-HEAD = $(SRC_DIR)/intro/head
-TAIL = $(SRC_DIR)/intro/tail
-
-$(TARGET_HOME)/doc/ego.doc:	$(REFFILES) $(HEAD) $(TAIL) $(EGO)
-	 $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) > $(TARGET_HOME)/doc/ego.doc
-
-ego.f:	$(REFFILES) $(HEAD) $(TAIL) $(EGO)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ego.f
-intro.f:	$(REFFILES) $(HEAD) $(TAIL) $(INTRO)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(INTRO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > intro.f
-ov.f:	$(REFFILES) $(HEAD) $(TAIL) $(OV)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(OV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ov.f
-ic.f:	$(REFFILES) $(HEAD) $(TAIL) $(IC)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(IC) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ic.f
-cf.f:	$(REFFILES) $(HEAD) $(TAIL) $(CF)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(CF) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cf.f
-il.f:	$(REFFILES) $(HEAD) $(TAIL) $(IL)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(IL) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > il.f
-sr.f:	$(REFFILES) $(HEAD) $(TAIL) $(SR)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(SR) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sr.f
-cs.f:	$(REFFILES) $(HEAD) $(TAIL) $(CS)
-	 $(REFER)	-sA+T -l4,2 $(REFS) $(HEAD) $(CS) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cs.f
-sp.f:	$(REFFILES) $(HEAD) $(TAIL) $(SP)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(SP) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sp.f
-cj.f:	$(REFFILES) $(HEAD) $(TAIL) $(CJ)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(CJ) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cj.f
-bo.f:	$(REFFILES) $(HEAD) $(TAIL) $(BO)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(BO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > bo.f
-ud.f:	$(REFFILES) $(HEAD) $(TAIL) $(UD)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(UD) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ud.f
-lv.f:	$(REFFILES) $(HEAD) $(TAIL) $(LV)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(LV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > lv.f
-ra.f:	$(REFFILES) $(HEAD) $(TAIL) $(RA)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(RA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ra.f
-ca.f:	$(REFFILES) $(HEAD) $(TAIL) $(CA)
-	 $(REFER)  -sA+T -l4,2 $(REFS) $(HEAD) $(CA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ca.f
--- a/doc/ego/ra/.distr
+++ b/doc/ego/ra/.distr
@ -1,4 +0,0 @@
-ra1
-ra2
-ra3
-ra4
--- a/doc/ego/ra/ra1
+++ b/doc/ego/ra/ra1
@ -1,33 +0,0 @@
-.bp
-.NH 1
-Register Allocation
-.NH 2
-Introduction
-.PP
-The efficient usage of the general purpose registers
-of the target machine plays a key role in any optimizing compiler.
-This subject, often referred to as \fIRegister Allocation\fR,
-has great impact on both the code generator and the
-optimizing part of such a compiler.
-The code generator needs registers for at least the evaluation of
-arithmetic expressions;
-the optimizer uses the registers to decrease the access costs
-of frequently used entities (such as variables).
-The design of an optimizing compiler must pay great
-attention to the cooperation of optimization, register allocation
-and code generation.
-.PP
-Register allocation has received much attention in literature (see
-.[
-leverett register allocation compilers
-.]
-.[
-chaitin register coloring
-.]
-.[
-freiburghouse usage counts
-.]
-and
-.[~[
-sites register
-.]]).
--- a/doc/ego/ra/ra2
+++ b/doc/ego/ra/ra2
@ -1,139 +0,0 @@
-.NH 2
-Usage of registers in ACK compilers
-.PP
-We will first describe the major design decisions 
-of the Amsterdam Compiler Kit,
-as far as they concern register allocation.
-Subsequently we will outline 
-the role of the Global Optimizer in the register
-allocation process and the interface
-between the code generator and the optimizer.
-.NH 3
-Usage of registers without the intervention of the Global Optimizer
-.PP
-Registers are used for two purposes:
-.IP 1.
-for the evaluation of arithmetic expressions
-.IP 2.
-to hold local variables, for the duration of the procedure they
-are local to.
-.LP
-It is essential to note that no translation part of the compilers,
-except for the code generator, knows anything at all
-about the register set of the target computer.
-Hence all decisions about registers are ultimately made by
-the code generator.
-Earlier phases of a compiler can only \fIadvise\fR the code generator.
-.PP
-The code generator splits the register set into two:
-a fixed part for the evaluation of expressions (called \fIscratch\fR
-registers) and a fixed part to store local variables.
-This partitioning, which depends only on the target computer, significantly
-reduces the complexity of register allocation, at the penalty
-of some loss of code quality.
-.PP
-The code generator has some (machine-dependent) knowledge of the access costs
-of memory locations and registers and of the costs of saving and
-restoring registers. (Registers are always saved by the \fIcalled\fR
-procedure).
-This knowledge is expressed in a set of procedures for each target machine.
-The code generator also knows how many registers there are and of
-which type they are.
-A register can be of type \fIpointer\fR, \fIfloating point\fR
-or \fIgeneral\fR.
-.PP
-The front ends of the compilers determine which local variables may
-be put in a register;
-such a variable may never be accessed indirectly (i.e. through a pointer).
-The front end also determines the types and sizes of these variables.
-The type can be any of the register types or the type \fIloop variable\fR,
-which denotes a general-typed variable that is used as loop variable
-in a for-statement.
-All this information is collected in a \fIregister message\fR in
-the EM code.
-Such a message is a pseudo EM instruction.
-This message also contains a \fIscore\fR field,
-indicating how desirable it is to put this variable in a register.
-A front end may assign a high score to a variable if it
-was declared as a register variable (which is only possible in
-some languages, such as "C").
-Any compiler phase before the code generator may change this score field,
-if it has reason to do so.
-The code generator bases its decisions on the information contained
-in the register message, most notably on the score.
-.PP
-If the global optimizer is not used,
-the score fields are set by the Peephole Optimizer.
-This optimizer simply counts the number of occurrences
-of every local (register) variable and adds this count
-to the score provided by the front end.
-In this way a simple, yet quite effective
-register allocation scheme is achieved.
-.NH 3
-The role of the Global Optimizer
-.PP
-The Global Optimizer essentially tries to improve the scheme
-outlined above.
-It uses the following principles for this purpose:
-.IP -
-Entities are not always assigned a register for the duration
-of an entire procedure; smaller regions of the program text
-may be considered too.
-.IP -
-several variables may be put in the same register simultaneously,
-provided at most one of them is live at any point.
-.IP -
-besides local variables, other entities (such as constants and addresses of
-variables and procedures) may be put in a register.
-.IP -
-more accurate cost estimates are used.
-.LP
-To perform its task, the optimizer must have some
-knowledge of the target machine.
-.NH 3
-The interface between the register allocator and the code generator
-.PP
-The RA phase of the optimizer must somehow be able to express its
-decisions.
-Such decisions may look like: 'put constant 1283 in a register from
-line 12 to line 40'.
-To be precise, RA must be able to tell the code generator to:
-.IP -
-initialize a register with some value
-.IP -
-update an entity from a register
-.IP -
-replace all occurrences of an entity in a certain region
-of text by a reference to the register.
-.LP
-At least three problems occur here: the code generator is only used to
-put local variables in registers,
-it only assigns a register to a variable for the duration of an entire
-procedure and it is not used to have some earlier compiler phase
-make all the decisions.
-.PP
-All problems are solved by one mechanism, that involves no changes
-to the code generator.
-With every (non-scratch) register R that will be used in
-a procedure P, we associate a new variable T, local to P.
-The size of T is the same as the size of R.
-A register message is generated for T with an exceptionally high score.
-The scores of all original register messages are set to zero.
-Consequently, the code generator will always assign precisely those new
-variables to a register.
-If the optimizer wants to put some entity, say the constant 1283, in
-a register, it emits the code "T := 1283" and replaces all occurrences
-of '1283' by T.
-Similarly, it can put the address of a procedure in T and replace all
-calls to that procedure by indirect calls.
-Furthermore, it can put several different entities in T (and thus in R)
-during the lifetime of P.
-.PP
-In principle, the code generated by the optimizer in this way would
-always be valid EM code, even if the optimizer would be presented
-a totally wrong description of the target computer register set.
-In practice, it would be a waste of data as well as text space to
-allocate memory for these new variables, as they will always be assigned
-a register (in the correct order of events).
-Hence, no memory locations are allocated for them.
-For this reason they are called pseudo local variables.
--- a/doc/ego/ra/ra3
+++ b/doc/ego/ra/ra3
@ -1,386 +0,0 @@
-.NH 2
-The register allocation phase
-.PP
-.NH 3
-Overview
-.PP
-The RA phase deals with one procedure at a time.
-For every procedure, it first determines which entities
-may be put in a register. Such an entity
-is called an \fIitem\fR.
-For every item it decides during which parts of the procedure it
-might be assigned a register.
-Such a region is called a \fItimespan\fR.
-For any item, several (possibly overlapping) timespans may
-be considered.
-A pair (item,timespan) is called an \fIallocation\fR.
-If the items of two allocations are both live at some
-point of time in the intersections of their timespans,
-these allocations are said to be \fIrivals\fR of each other,
-as they cannot be assigned the same register.
-The rivals-set of every allocation is computed.
-Next, the gains of assigning a register to an allocation are estimated,
-for every allocation.
-With all this information, decisions are made which allocations
-to store in which registers (\fIpacking\fR).
-Finally, the EM text is transformed to reflect these decisions.
-.NH 3
-The item recognition subphase
-.PP
-RA tries to put the following entities in a register:
-.IP -
-a local variable for which a register message was found
-.IP -
-the address of a local variable for which no
-register message was found
-.IP -
-the address of a global variable
-.IP -
-the address of a procedure
-.IP -
-a numeric constant.
-.LP
-Only the \fIaddress\fR of a global variable
-may be put in a register, not the variable itself.
-This approach avoids the very complex problems that would be
-caused by procedure calls and indirect pointer references (see
-.[~[
-aho design compiler
-.] sections 14.7 and 14.8]
-and 
-.[~[
-spillman side-effects
-.]]).
-Still, on most machines accessing a global variable using indirect
-addressing through a register is much cheaper than
-accessing it via its address.
-Similarly, if the address of a procedure is put in a register, the
-procedure can be called via an indirect call.
-.PP
-With every item we associate a register type.
-This type is
-.DS
-for local variables: the type contained in the register message
-for addresses of variables and procedures: the pointer type
-for constants: the general type
-.DE
-An entity other than a local variable is not taken to be an item
-if it is used only once within the current procedure.
-.PP
-An item is said to be \fIlive\fR at some point of the program text
-if its value may be used before it is changed.
-As addresses and constants are never changed, all items but local
-variables are always live.
-The region of text during which a local variable is live is
-determined via the live/dead messages generated by the
-Live Variable analysis phase of the Global Optimizer.
-.NH 3
-The allocation determination subphase
-.PP
-If a procedure has more items than registers,
-it may be advantageous to put an item in a register
-only during those parts of the procedure where it is most
-heavily used.
-Such a part will be called a timespan.
-With every item we may associate a set of timespans.
-If two timespans of an item overlap,
-at most one of them may be granted a register,
-as there is no use in putting the same item in two
-registers simultaneously.
-If two timespans of an item are distinct,
-both may be chosen;
-the item will possibly be put in two
-different registers during different parts of the procedure.
-The timespan may also consist
-of the whole procedure.
-.PP
-A list of (item,timespan) pairs (allocations)
-is build, which will be the input to the decision making
-subphase of RA (packing subphase).
-This allocation list is the main data structure of RA.
-The description of the remainder of RA will be in terms
-of allocations rather than items.
-The phrase "to assign a register to an allocation" means "to assign
-a register to the item of the allocation for the duration of
-the timespan of the allocation".
-Subsequent subphases will add more information
-to this list.
-.PP
-Several factors must be taken into account when a
-timespan for an item is constructed:
-.IP 1.
-At any \fIentry point\fR of the timespan where the
-item is live,
-the register must be initialized with the item
-.IP 2.
-At any exit point of the timespan where the item is live,
-the item must be updated.
-.LP
-In order to decrease these costs, we will only consider timespans with
-one entry point
-and no live exit points.
-.NH 3
-The rivals computation subphase
-.PP
-As stated before, several different items may be put in the
-same register, provided they are not live simultaneously.
-For every allocation we determine the intersection
-of its timespan and the lifetime of its item (i.e. the part of the
-procedure during which the item is live).
-The allocation is said to be busy during this intersection.
-If two allocations are ever busy simultaneously they are
-said to be rivals of each other.
-The rivals information is added to the allocation list.
-.NH 3
-The profits computation subphase
-.PP
-To make good decisions, the packing subphase needs to
-know which allocations can be assigned the same register
-(rivals information) and how much is gained by
-granting an allocation a register.
-.PP
-Besides the gains of using a register instead of an
-item,
-two kinds of overhead costs must be
-taken into account:
-.IP -
-the register must be initialized with the item
-.IP -
-the register must be saved at procedure entry
-and restored at procedure exit.
-.LP
-The latter costs should not be due to a single
-allocation, as several allocations can be assigned the same register.
-These costs are dealt with after packing has been done.
-They do not influence the decisions of the packing algorithm,
-they may only undo them.
-.PP
-The actual profits consist of improvements
-of execution time and code size.
-As the former is far more difficult to estimate , we will 
-discuss code size improvements first.
-.PP
-The gains of putting a certain item in a register
-depends on how the item is used.
-Suppose the item is
-a pointer variable.
-On machines that do not have a
-double-indirect addressing mode,
-two instructions are needed to dereference the variable
-if it is not in a register, but only one if it is put in a register.
-If the variable is not dereferenced, but simply copied, one instruction
-may be sufficient in both cases.
-So  the gains of putting a pointer variable in a register are higher
-if the variable is dereferenced often.
-.PP
-To make accurate estimates, detailed knowledge of
-the target machine and of the code generator
-would be needed.
-Therefore, a simplification has been made that substantially limits
-the amount of target machine information that is needed.
-The estimation of the number of bytes saved does
-not take into account how an item is used.
-Rather, an average number is used.
-So these gains are computed as follows:
-.DS
-#bytes_saved = #occurrences * gains_per_occurrence
-.DE
-The number of occurrences is derived from
-the EM code.
-Note that this is not exact either,
-as there is no one-to-one correspondence between occurrences in
-the EM code and in the assembler code.
-.PP
-The gains of one occurrence depend on:
-.IP 1.
-the type of the item
-.IP 2.
-the size of the item
-.IP 3.
-the type of the register
-.LP
-and for local variables and addresses of local variables:
-.IP 4.
-the type of the local variable
-.IP 5.
-the offset of the variable in the stackframe
-.LP
-For every allocation we try two types of registers: the register type
-of the item and the general register type.
-Only the type with the highest profits will subsequently be used.
-This type is added to the allocation information.
-.PP
-To compute the gains, RA uses a machine-dependent table
-that is read from a machine descriptor file.
-By means of this table the number of bytes saved can be computed
-as a function of the five properties.
-.PP
-The costs of initializing a register with an item
-is determined in a similar way.
-The cost of one initialization is also
-obtained from the descriptor file.
-Note that there can be at most one initialization for any
-allocation.
-.PP
-To summarize, the number of bytes a certain allocation would
-save is computed as follows:
-.DS
-.TS
-l l.
-net_bytes_saved =	bytes_saved - init_cost
-bytes_saved =	#occurrences * gains_per_occ
-init_cost =	#initializations * costs_per_init
-.TE
-.DE
-.PP
-It is inherently more difficult to estimate the execution
-time saved by putting an item in a register,
-because it is impossible to predict how
-many times an item will be used dynamically.
-If an occurrence is part of a loop,
-it may be executed many times.
-If it is part of a conditional statement, 
-it may never be executed at all.
-In the latter case, the speed of the program may even get
-worse if an initialization is needed.
-As a clear example, consider the piece of "C" code in Fig. 13.1.
-.DS
-switch(expr) {
-      case 1:  p(); break;
-      case 2:  p(); p(); break;
-      case 3:  p(); break;
-      default: break;
-}
-
-Fig. 13.1 A "C" switch statement
-.DE
-Lots of bytes may be saved by putting the address of procedure p
-in a register, as p is called four times (statically).
-Dynamically, p will be called zero, one or two times,
-depending on the value of the expression.
-.PP
-The optimizer uses the following strategy for optimizing
-execution time:
-.IP 1.
-try to put items in registers during \fIloops\fR first
-.IP 2.
-always keep the initializing code outside the loop
-.IP 3.
-if an item is not used in a loop, do not put it in a register if
-the initialization costs may be higher than the gains
-.LP
-The latter condition can be checked by determining the 
-minimal number of usages (dynamically) of the item during the procedure,
-via a shortest path algorithm.
-In the example above, this minimal number is zero, so the address of
-p is not put in a register.
-.PP
-The costs of one occurrence is estimated as described above for the
-code size.
-The number of dynamic occurrences is guessed by looking at the
-loop nesting level of every occurrence.
-If the item is never used in a loop,
-the minimal number of occurrences is used.
-From these facts, the execution time improvement is assessed
-for every allocation.
-.NH 3
-The packing subphase
-.PP
-The packing subphase takes as input the allocation
-list and outputs a
-description of which allocations should be put
-in which registers.
-So it is essentially the decision making part of RA.
-.PP
-The packing system tries to assign a register to allocations one
-at a time, in some yet to be defined order.
-For every allocation A, it first checks if there is a register
-(of the right type)
-that is already assigned to one or more allocations,
-none of which are rivals of A.
-In this case A is assigned the same register.
-Else, A is assigned a new register, if one exists.
-A table containing the number of free registers for every type
-is maintained.
-It is initialized with the number of non-scratch registers of
-the target computer and updated whenever a
-new register is handed out.
-The packing algorithm stops when no more allocations can 
-or need be assigned a register.
-.PP
-After an allocation A has been packed,
-all allocations with non-disjunct timespans (including
-A itself) are removed from the allocation list.
-.PP
-In case the number of items exceeds the number of registers, it
-is important to choose the most profitable allocations.
-Due to the possibility of having several allocations
-occupying the same register,
-this problem is quite complex.
-Our packing algorithm uses simple heuristic rules
-and avoids any combinatorial search.
-It has distinct rules for different costs measures.
-.PP
-If object code size is the most important factor,
-the algorithm is greedy and chooses allocations in
-decreasing order of their profits attribute.
-It does not take into account the fact that
-other allocations may be passed over because of
-this decision.
-.PP
-If execution time is at prime stake, the algorithm
-first considers allocations whose timespans consist of loops.
-After all these have been packed, it considers the remaining
-allocations.
-Within the two subclasses, it considers allocations
-with the highest profits first.
-When assigning a register to an allocation with a loop
-as timespan, the algorithm checks if the item has
-already been put in a register during another loop.
-If so, it tries to use the same register for the
-new allocation.
-After all packing has been done,
-it checks if the item has always been assigned the same
-register (although not necessarily during all loops).
-If so, it tries to put the item in that register during
-the entire procedure. This is possible
-if the allocation (item,whole_procedure) is not a rival
-of any allocation with a different item that has been
-assigned to the same register.
-Note that this approach is essentially 'bottom up',
-as registers are first assigned over small regions
-of text which are later collapsed into larger regions.
-The advantage of this approach is the fact that
-the decisions for one loop can be made independently
-of all other loops.
-.PP
-After the entire packing process has been completed,
-we compute for each register how much is gained in using
-this register, by simply adding the net profits
-of all allocations assigned to it.
-This total yield should outweigh the costs of
-saving/restoring the register at procedure entry/exit.
-As most modern processors (e.g. 68000, Vax) have special
-instructions to save/restore several registers,
-the differential costs of saving one extra register are by
-no means constant.
-The costs are read from the machine descriptor file and
-compared to the total yields of the registers.
-As a consequence of this analysis, some allocations 
-may have their registers taken away.
-.NH 3
-The transformation subphase
-.PP
-The final subphase of RA transforms the EM text according to the
-decisions made by the packing system.
-It traverses the text of the currently optimized procedure and
-changes all occurrences of items at points where
-they are assigned a register.
-It also clears the score field of the register messages for
-normal local variables and emits register messages with a very
-high score for the pseudo locals.
-At points where registers have to be initialized with items,
-it generates EM code to do so.
-Finally it tries to decrease the size of the stackframe
-of the procedure by looking at which local variables need not
-be given memory locations.
--- a/doc/ego/ra/ra4
+++ b/doc/ego/ra/ra4
@ -1,28 +0,0 @@
-.NH 2
-Source files of RA
-.PP
-The sources of RA are in the following files and packages:
-.IP ra.h: 14
-declarations of global variables and data structures
-.IP ra.c:
-the routine main; initialization of target machine-dependent tables
-.IP items:
-a routine to build the list of items of one procedure;
-routines to manipulate items
-.IP lifetime:
-contains a subroutine that determines when items are live/dead
-.IP alloclist:
-contains subroutines that build the initial allocations list
-and that compute the rivals sets.
-.IP profits:
-contains a subroutine that computes the profits of the allocations
-and a routine that determines the costs of saving/restoring registers
-.IP pack:
-contains the packing subphase
-.IP xform:
-contains the transformation subphase
-.IP interval:
-contains routines to manipulate intervals of time
-.IP aux:
-contains auxiliary routines
-.LP
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
cvs2hg	6555bf6893	fixup commit for branch 'hs'	1985-02-26 15:05:52 +00:00
em	bd796849ef	Added some xtra output for Henk Schouten's debugger.	1985-02-26 15:04:08 +00:00
cvs2hg	9afbc0ad32	fixup commit for branch 'hs'	1984-10-23 15:02:04 +00:00