diff --git a/.distr b/.distr deleted file mode 100644 index 139840f92..000000000 --- a/.distr +++ /dev/null @@ -1,21 +0,0 @@ -Action -Copyright -NEW -README -TODO -TakeAction -bin -doc -emtest -etc -fast -fcc -first -h -include -modules -lang -lib -mach -man -util diff --git a/Action b/Action deleted file mode 100644 index 54a724dbe..000000000 --- a/Action +++ /dev/null @@ -1,276 +0,0 @@ -name "System definition" -dir first -action ack_sys -failure "You have to run the shell script first/first" -fatal -end -name "Manual pages" -dir man -end -! name "EM definition" -! dir etc -! end -name "EM definition library" -dir util/data -end -name "C utilities" -dir util/cmisc -end -name "Yacc parser generator" -dir util/byacc -end -name "Flex lexical analyzer generator" -dir util/flex -action "make firstinstall && make clean" -end -name "Include files for modules" -dir modules/h -end -name "Modules" -dir modules/src -indir -end -name "LL(1) Parser generator" -dir util/LLgen -action "make firstinstall && make clean" -end -name "C preprocessor" -dir util/cpp -end -name "Peephole optimizer libraries" -dir modules/src/em_opt -end -name "ACK object utilities" -dir util/amisc -end -name "Encode/Decode" -dir util/misc -end -name "Shell files in bin" -dir util/shf -end -name "EM assembler" -dir util/ass -end -name "EM Peephole optimizer" -dir util/opt -end -name "EM Global optimizer" -dir util/ego -indir -end -name "ACK archiver" -dir util/arch -end -name "Program 'ack'" -dir util/ack -end -name "Bootstrap for backend tables" -dir util/cgg -end -name "Bootstrap for newest form of backend tables" -dir util/ncgg -end -name "Bootstrap for code expanders" -dir util/ceg -indir -end -name "LED link editor" -dir util/led -end -name "TOPGEN target optimizer generator" -dir util/topgen -end -name "C frontend" -dir lang/cem/cemcom -end -name "ANSI-C frontend" -dir lang/cem/cemcom.ansi -end -name "ANSI-C preprocessor" -dir lang/cem/cpp.ansi -end -name "ANSI-C header files" -dir lang/cem/libcc.ansi -end -name "LINT C program checker" -dir lang/cem/lint -end -name "EM definition lint-library" -action "make lintlib" -dir util/data -end -name "Modules lint libraries" -dir modules/src -indir "Action.lint" -end -name "Global optimizer lint libraries" -dir util/ego/share -action "make lintlib" -end -name "Pascal frontend" -dir lang/pc/comp -end -name "Basic frontend" -dir lang/basic/src -end -name "Occam frontend" -dir lang/occam/comp -end -name "Modula-2 frontend" -dir lang/m2/comp -end -name "Modula-2 definition modules" -dir lang/m2/libm2 -end -name "Modula-2 makefile generator" -dir lang/m2/m2mm -end -name "Fortran to C compiler" -dir lang/fortran/comp -end -name "EM interpreter in C" -dir util/int -end -name "Symbolic debugger" -dir util/grind -end -name "Intel 8086 support" -dir mach/i86 -indir -end -name "Intel 80286 support for Xenix" -dir mach/xenix3 -indir -end -name "Intel 80386 support for Xenix 386 System V" -dir mach/i386 -indir -end -name "MSC6500 support" -dir mach/6500 -indir -end -name "Motorola 6800 support" -dir mach/6800 -indir -end -name "Motorola 6805 support" -dir mach/6805 -indir -end -name "Motorola 6809 support" -dir mach/6809 -indir -end -name "Intel 8080 support" -dir mach/i80 -indir -end -name "2-2 Interpreter support" -dir mach/em22 -indir -end -name "2-4 Interpreter support" -dir mach/em24 -indir -end -name "4-4 Interpreter support" -dir mach/em44 -indir -end -name "Motorola 68000 2-4 support" -dir mach/m68k2 -indir -end -name "Motorola 68000 4-4 support" -dir mach/m68k4 -indir -end -name "NS16032 support" -dir mach/ns -indir -end -name "PDP 11 support" -dir mach/pdp -indir -end -name "PMDS support" -dir mach/pmds -indir -end -name "PMDS 4/4 support" -dir mach/pmds4 -indir -end -name "Signetics 2650 support" -dir mach/s2650 -indir -end -name "Vax 4-4 support" -dir mach/vax4 -indir -end -name "M68020 System V/68 support" -dir mach/m68020 -indir -end -name "Sun 3 M68020 support" -dir mach/sun3 -indir -end -name "Sun 4 SPARC SunOs 4 support" -dir mach/sparc -system "sparc|sparc_solaris" -indir -end -name "Sun 4 SPARC Solaris support" -dir mach/sparc_solaris -system "sparc_solaris" -indir -end -name "Sun 2 M68000 support" -dir mach/sun2 -indir -end -name "Mantra M68000 System V.0 support" -dir mach/mantra -indir -end -name "PC Minix support" -dir mach/minix -indir -end -name "Atari ST Minix support" -dir mach/minixST -indir -end -name "Z80 support" -dir mach/z80 -indir -end -name "Zilog Z8000 support" -dir mach/z8000 -indir -end -name "Acorn Archimedes support" -dir mach/arm -indir -end -name "Documentation" -dir doc -end -name "Motorola 68000 interpreters" -system "m68*|sun*" -dir mach/mantra/int -end -name "Fast compilers" -system "m68020|sun3|i386|vax*" -dir fast -indir -end -name "Fast cc-compatible C compiler" -system "sun3|vax*" -dir fcc -indir -end diff --git a/Copyright b/Copyright deleted file mode 100644 index a27fd6332..000000000 --- a/Copyright +++ /dev/null @@ -1,32 +0,0 @@ -Copyright (c) 1987, 1990, 1993, 2005 Vrije Universiteit, Amsterdam, The Netherlands. -All rights reserved. - -Redistribution and use of the Amsterdam Compiler Kit in source and -binary forms, with or without modification, are permitted provided -that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of Vrije Universiteit nor the names of the - software authors or contributors may be used to endorse or - promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND -CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL VRIJE UNIVERSITEIT OR ANY AUTHORS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/DistrAction b/DistrAction deleted file mode 100755 index 90295286b..000000000 --- a/DistrAction +++ /dev/null @@ -1,3 +0,0 @@ -p=/proj/em/Work -sh TakeAction 'make distr' $p/distr/Action -sh TakeAction 'make distr' $p/distr/Action1 diff --git a/Makefile b/Makefile deleted file mode 100644 index bf4572b29..000000000 --- a/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -cmp: # compile everything and compare - (cd etc ; make cmp ) - (cd util ; make cmp ) - (cd lang ; make cmp ) - (cd mach ; make cmp ) - -install: # compile everything to machine code - (cd etc ; make install ) - (cd util ; make install ) - (cd lang/cem ; make install ) - (cd mach ; make install ) - (cd lang/pc ; make install ) - -clean: # remove all non-sources, except boot-files - (cd doc ; make clean ) - (cd man ; make clean ) - (cd h ; make clean ) - (cd etc ; make clean ) - (cd util ; make clean ) - (cd lang ; make clean ) - (cd mach ; make clean ) - -opr: # print all sources - make pr | opr - -pr: # print all sources - @( pr Makefile ; \ - (cd doc ; make pr ) ; \ - (cd man ; make pr ) ; \ - (cd h ; make pr ) ; \ - (cd etc ; make pr ) ; \ - (cd lang ; make pr ) ; \ - (cd util ; make pr ) ; \ - (cd mach ; make pr ) \ - ) diff --git a/NEW b/NEW deleted file mode 100644 index bb47c71d1..000000000 --- a/NEW +++ /dev/null @@ -1,45 +0,0 @@ -This is ACK distribution 5.6. - -This is a minor update of 5.5, the last public release from Vrije University. -Only minor changes have been made to make the system build on modern -platforms. - -The NEW document from the previous release follows. - -David Given -dg@cowlark.com 2005-06-24 - ------------------------------------------------------------------------------ - -The only addition with respect to the 5th ACK distribution is the support -for Solaris 2 on SPARCs. It also contains many bug fixes. - -Notes for the 5th ACK distribution: - -It is not wise to mix files created by the previous version of the Kit -with files belonging to this version, although that might sometimes work. -Many problems with the previous distribution have been fixed. -The major additions are: - - - an ANSI C compiler - - a LINT C program checker, both non-ansi and ansi - - an Intel 80386 back-end - - a SPARC code expander - - a source level debugger for Pascal, Modula-2, C, and ANSI C - - an Acorn Archimedes back-end - - code-expanders for VAX, Intel 80386 and Motorola M68020 processors, - and very fast Pascal, Modula-2, ANSI C, and C compilers constructed - using these code expanders - - a cc-compatible very fast C compiler for SUN-3 and VAX. - -Also added, but not part of the Kit proper are - - flex: a lexical analyzer generator - - byacc: yacc-clone by UCB - - f2c: a Fortran to C compiler by AT&T. - -See the ACK installation manual for their copyright notices. - --- -Ceriel Jacobs, Dept. of Mathematics and Computer Science, Vrije Universiteit, -De Boelelaan 1081a, 1081 HV Amsterdam, The Netherlands -Email: ceriel@cs.vu.nl Fax: +31 20 6427705 diff --git a/README b/README deleted file mode 100644 index 037d88c66..000000000 --- a/README +++ /dev/null @@ -1,174 +0,0 @@ -# $Source$ -# $State$ - -Installing the ACK on a modern platform -======================================= - -This document provides some very quick and dirty instructions for installing -the ACK on a modern platform. It is not intended as a substitute for the -real instructions, which can be found in doc/install.pr. - -Let me repeat myself: - -THE FULL INSTALLATION INSTRUCTIONS ARE IN doc/install.pr. - -The ACK is a very large and complex package and has received minimal -maintenance for the best part of a decade. During that time, the Unix -world has moved on, and many APIs have changed. It compiles cleanly on -my, dtrg's, test machine, which is a Debian Ubuntu Linux system. Your -mileage may vary. - -All disclaimers now done, now on to the good stuff: - -Building the ACK ----------------- - -I'm assuming you're using Linux here, because that's what I use. If you -don't use Linux, please let me know if you have any trouble and I'll update -the instructions. - -1. Configure the build. - - To do this, run the first/first script. You will be asked several - questions. - - * What is the root of the ACK source tree? - - This is the directory that you have unpacked the distribution into. - For example, /home/dg/src/Ack-5.6. - - * What is the root of the configuration tree? - - This is the directory that the build process will use for temporary - files. You'll only need this during the compilation process; it can - be removed afterwards. - For example, /tmp/ack-conf - - * What is the root of the ACK binaries? - - This is the ACK's installation path; where the binaries will live. - This needs to be writable during the build process --- if you want - to install in /usr/local, you either have to make /usr/local - writable or compile as root. Sorry! - - * What is your system type? - - Linux isn't on the list. Choose ANY. - - * Is this the system you are running on? - - Yes. - - * Are you satisfied? - - Yes. - - * What default machine do you wish to compile for? - - The ACK wants to know what architecture to target if you don't manually - specify an architecture. Unfortunately, it can't generate runnable - binaries for Linux or any other modern system (except possible Solaris - on Sparc). I'd recommend you choose em44. This will produce portable - binaries using the ACK's intermediate format, which you can run using - the int interpreter. - - * What kind of Unix are you running? - - Linux is a mixture, but I pick SYS_5 and it works. - - * Do you wish to limit the installation? - - No. If you pick Yes, the script will ask detailed questions about - exactly what you want to build. Modern systems are fast enough that - we may as well build everything. - - * Which system call library do you wish to use on the VAX? - - I don't have a VAX; the only person I know who has one uses it to vacuum - his carpets. I pick libsysV_2 with no ill effects. - - If the configuration script is happy, it will generate a script called - INSTALL. - -2. Do the compilation. - - The configuration script will recommend a command line. Execute this. On - modern systems, the compilation doesn't take long. - - Check the output of the configuration script for "Failed" lines. On my - system there are two: - - $ grep Failed INSTALL.out - Failed for Intel 8080 download programs, see dl/Out - Failed for Intel 8080 support - - You can ignore these. They aren't important. - -3. Use the ACK. - - Ensure that the ACK's binary directory is on your path; this is /bin in - the directory you specified during the configuration process. In my - example, this is /usr/local/bin. The /man subdirectory should go on your - manpath. - - To test your path, do: ack - - This should return silently. - - To test your manpath, do: man ack - - This will produce the documentation for the main compiler driver. - - If this works, you can remove the conf tree (/tmp/ack-conf in my example). - -Gotchas -------- - -There are some things you should be aware of. - -* The ACK's archiver tool is called 'arch'. This conflicts on Linux platforms - with a utility that displays the current architecture. If your compilation - occasionally fails obscurely and displays something like 'i686', you are - running afoul of this. As a workaround, rearrange your path so the ACK's - bin directory comes first --- but do be aware that some Linux system - tools may stop working. - -* By default, the ack tool will compile K&R C. Practically all C source these - days is ANSI C --- use the -ansi switch to enable ANSI mode. No, the ACK is - not C99 compatible. - -* Not all combinations of optimisation and architectures work. This is - perfectly normal, but the combinations are not well documented. Everything - supports -O. - -Disclaimer ----------- - -The ACK is mature, well-tested software, but the environment in which it was -developed for and tested under is rather different from that available on -today's machines. There will probably be little in the way of logical bugs, -but there may be many compilation and API bugs. - -If you wish to use the ACK, *please* join the mailing list. We are interested -in any reports of success and particularly, failure. If it does fail for you, -we would love to know why, in as much detail as possible. Bug fixes are even -more welcome. - -The ACK is licensed under a BSD-like license. Please see the 'Copyright' file -for the full text. - -You can find the mailing list on the project's web site: - - http://tack.sourceforge.net/ - -Please enjoy. - -David Given (dtrg on Sourceforge) -dg@cowlark.com -2005-06-24, 23:53 - -# Revision history -# $Log$ -# Revision 2.2 2005-06-24 23:20:41 dtrg -# Added some new readmes at the top level. -# diff --git a/TODO b/TODO deleted file mode 100644 index 83fd134ae..000000000 --- a/TODO +++ /dev/null @@ -1,20 +0,0 @@ -# $Source$ -# $State$ - -This file contains things that I have noticed need fixing, but have not -yet been fixed. Everything here should be reasonably low priority. Some -bugs have been bodged around to make things work; these are all marked in -the source with FIXME tags. - - -* util/int needs to be rewritten to emulate sgtty with termios; look for - FIXMEs. - -* mach/i80/dl/nascom.c needs to be rewritten to use termios, not sgtty. - - -# Revision history -# $Log$ -# Revision 2.1 2005-06-24 23:20:41 dtrg -# Added some new readmes at the top level. -# diff --git a/TakeAction b/TakeAction deleted file mode 100755 index 6a25701da..000000000 --- a/TakeAction +++ /dev/null @@ -1,144 +0,0 @@ -case $# in -0) PAR='make install && make clean' ; CMD=Action ;; -1) PAR="$1" ; CMD=Action ;; -2) PAR="$1" ; CMD="$2" ;; -*) echo Syntax: "$0" [command [file]] ; exit 1 ;; -esac -if test -r "$CMD" -then : -else - case "$CMD" in - Action) echo No Action file present ;; - *) echo No Action file "($CMD)" present ;; - esac -fi -case $0 in -/*) THISFILE=$0 - ;; -*) if [ -f $0 ] - then - THISFILE=`pwd`/$0 - else - THISFILE=$0 - fi - ;; -esac -SYS= -RETC=0 -{ while read LINE -do - eval set $LINE - case x"$1" in - x!*) ;; - xname) SYS="$2" - ACTION='$PAR' - DIR=. - FM=no - FAIL='Failed for $SYS, see $DIR/Out' - SUCC='$SYS -- done' - ATYPE= - FATAL=no - DOIT=yes - ;; - xfatal) FATAL=yes ;; - xaction|xindir) case x$ATYPE in - x) ACTION=$2 ; ATYPE=$1 - case $ATYPE$FM in - indirno) FAIL='Failed for $SYS' ;; - esac - ;; - *) echo Already specified an $ATYPE for this name - RETC=65 ;; - esac ;; - xfailure) FM=yes - FAIL="$2" ;; - xsuccess) SUCC="$2" ;; - xdir) DIR="$2" ;; - xsystem) PAT="$2" - oIFS=$IFS - IFS="|" - eval set $2 - case x`ack_sys` in - x$1|x$2|x$3|x$4|x$5|x$6|x$7) ;; - *) echo "Sorry, $SYS can only be made on $PAT systems" - DOIT=no - ;; - esac - IFS=$oIFS - ;; - xend) case $DOIT in - no) continue ;; - esac - case x$SYS in - x) echo Missing name line; RETC=65 ;; - *) if test -d $DIR - then ( - cd $DIR - X= - case $ATYPE in - indir) - if $THISFILE "$PAR" $ACTION - then eval echo $SUCC - else RETC=2 ; eval echo $FAIL - fi ;; - *) - case "$ACTION" in - '$PAR') - ACTION="$PAR" - ;; - *) ;; - esac - if [ -f No$CMD ] - then - x=`cat No$CMD` - if [ "$ACTION" = "$x" ] - then - ACTION='echo "No actions performed, No$CMD file present"' - SUCC='$SYS -- skipped' - fi - fi - if eval "{ $ACTION ; } >Out 2>&1 No$CMD 2>/dev/null - fi - else RETC=1 ; X=: ; eval echo $FAIL - fi - ;; - esac - (echo ------- `pwd` - cat Out - $X rm -f Out - ) 2>/dev/null 1>&- 1>&3 - exit $RETC - ) - case $? in - 0) ;; - *) case $RETC in - 0) RETC=$? ;; - esac ;; - esac - else - echo Directory $DIR for $SYS is inaccessible - RETC=66 - fi ;; - esac - case $FATAL$RETC in - yes0) ;; - yes*) echo Fatal error, installation stopped. - exit $RETC ;; - esac - SYS= - ;; - *) echo Unknown keyword "$1" - RETC=67 ;; - esac -done -exit $RETC -} <$CMD -RETX=$? -case $RETX in -0) exit $RETC ;; -*) exit $RETX ;; -esac diff --git a/bin/.distr b/bin/.distr deleted file mode 100644 index 96768b1ae..000000000 --- a/bin/.distr +++ /dev/null @@ -1,9 +0,0 @@ -cc-and-mkdep.ack -cc-and-mkdep.all -cc-and-mkdep.sun -do_deps -do_resolve -lint-lib.ack -lint-lib.unix -mk_manpage -rm_deps diff --git a/bin/cc-and-mkdep.ack b/bin/cc-and-mkdep.ack deleted file mode 100755 index fb7103eb8..000000000 --- a/bin/cc-and-mkdep.ack +++ /dev/null @@ -1,7 +0,0 @@ -: '$Id$' - -: Compile and make dependencies. First argument is the file on which the -: dependencies must be produced. This version is for ACK. -n=$1 -shift -exec $CC -Rcem-A$n -Rcem-m $* diff --git a/bin/cc-and-mkdep.all b/bin/cc-and-mkdep.all deleted file mode 100755 index 3a5271ef6..000000000 --- a/bin/cc-and-mkdep.all +++ /dev/null @@ -1,20 +0,0 @@ -: '$Id$' - -: Compile and make dependencies. First argument is the file on which the -: dependencies must be produced. This version is a generic one that should -: work for all Unix systems. -n=$1 -shift -cpp_args= -for i in $* -do - case $i in - -I*|-D*|-U*) cpp_args="$cpp_args $i" - ;; - -*) ;; - *) cpp_args="$cpp_args $i" - ;; - esac -done -$UTIL_HOME/lib.bin/cpp -d -m $cpp_args > $n -exec $CC $* diff --git a/bin/cc-and-mkdep.sun b/bin/cc-and-mkdep.sun deleted file mode 100755 index 0245b9cf6..000000000 --- a/bin/cc-and-mkdep.sun +++ /dev/null @@ -1,7 +0,0 @@ -: '$Id$' - -: Compile and make dependencies. First argument is the file on which the -: dependencies must be produced. This version is for the SUN cc. -n=$1 -shift -exec $CC -Qpath $UTIL_HOME/lib.bin -Qoption cpp -d$n -Qoption cpp -m $* diff --git a/bin/do_deps b/bin/do_deps deleted file mode 100755 index 51418c741..000000000 --- a/bin/do_deps +++ /dev/null @@ -1,18 +0,0 @@ -: '$Id$' - -: Produce dependencies for all argument files - -for i in $* -do - n=`basename $i .c` - if [ -f $n.dep ] - then - : - else - echo $n.'$(SUF): '$i > $n.dep - echo " head -5 $n.dep > $n.dp1" >> $n.dep - echo ' CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" $(CC_AND_MKDEP) '$n.dp2 '$(CFLAGS)' -c $i >> $n.dep - echo " cat $n.dp1 $n.dp2 > $n.dep" >> $n.dep - echo " rm -f $n.dp1 $n.dp2" >> $n.dep - fi -done diff --git a/bin/do_resolve b/bin/do_resolve deleted file mode 100755 index 064bcbedf..000000000 --- a/bin/do_resolve +++ /dev/null @@ -1,47 +0,0 @@ -: '$Id$' - -: Resolve name clashes in the files on the argument list. If these -: files reside in another directory, a copy is made in the current -: directory. If not, it is overwritten. Never do this in a source -: directory! A list of the new files is produced on standard output. - -UTIL_BIN=$UTIL_HOME/bin - -trap "rm -f tmp$$ a.out nmclash.* longnames clashes" 0 1 2 3 15 - -: first find out if we have to resolve problems with identifier significance. - -cat > nmclash.c <<'EOF' -/* Accepted if many characters of long names are significant */ -abcdefghijklmnopr() { } -abcdefghijklmnopq() { } -main() { } -EOF -if $CC nmclash.c -then : no identifier significance problem - for i in $* - do - echo $i - done -else - $UTIL_BIN/prid -l7 $* > longnames - - : remove code generating routines from the clashes list. - : code generating routine names start with C_. - : also remove names starting with flt_. - - sed '/^C_/d' < longnames | sed '/^flt_/d' > tmp$$ - $UTIL_BIN/cclash -c -l7 tmp$$ > clashes - for i in $* - do - $UTIL_BIN/cid -Fclashes < $i > tmp$$ - n=`basename $i .xxx` - if cmp -s $n tmp$$ - then - rm -f tmp$$ - else - mv tmp$$ $n - fi - echo $n - done -fi diff --git a/bin/em.pascal b/bin/em.pascal deleted file mode 100755 index 2e03475b9..000000000 --- a/bin/em.pascal +++ /dev/null @@ -1 +0,0 @@ -exec /usr/em/doc/em/int/em /usr/em/doc/em/int/tables ${1-e.out} core diff --git a/bin/lint-lib.ack b/bin/lint-lib.ack deleted file mode 100755 index b851ac270..000000000 --- a/bin/lint-lib.ack +++ /dev/null @@ -1,12 +0,0 @@ -: '$Id$' - -: Create a lint library file. The name of the library file is constructed -: from the first argument. The second argument indicates the directory where -: the result is to be placed. This version is for ACK lint. - -n=$1 -shift -d=$1 -shift -lint -L$n $* -mv $n.llb $d diff --git a/bin/lint-lib.unix b/bin/lint-lib.unix deleted file mode 100755 index eb20c2625..000000000 --- a/bin/lint-lib.unix +++ /dev/null @@ -1,12 +0,0 @@ -: '$Id$' - -: Create a lint library file. The name of the library file is constructed -: from the first argument. The second argument indicates the directory where -: the result is to be placed. This version is for Unix lint. - -n=$1 -shift -d=$1 -shift -/usr/bin/lint -C$n $* -mv llib-l$n.ln $d diff --git a/bin/mk_manpage b/bin/mk_manpage deleted file mode 100755 index 6b7d95302..000000000 --- a/bin/mk_manpage +++ /dev/null @@ -1,18 +0,0 @@ -num=`expr $1 : '.*\.\([1-8]\)'` - -if [ -d $2/man ] ; then : ; else mkdir $2/man ; fi -if [ -f $2/man/head ] ; then : ; else cat > $2/man/head <<'EOF' -.rn TH yy -.de TH -.di zz -.yy "\\$1" "\\$2" "\\$3" "\\$4" -.ds ]W 5th ACK distribution -.ds ]D Amsterdam Compiler Kit -.ds ]L "\\$3 -.di -.rm zz -.. -EOF -fi -if [ -d $2/man/man$num ] ; then : ; else mkdir $2/man/man$num ; fi -cat $2/man/head $1 | sed "s!TARGETHOME!$2!" > $2/man/man$num/`expr //$1 : '.*/\([^/]*\)'` diff --git a/bin/rm_deps b/bin/rm_deps deleted file mode 100755 index 6a136f132..000000000 --- a/bin/rm_deps +++ /dev/null @@ -1,8 +0,0 @@ -: $Id$ - -: remove dependencies from a makefile, write result on standard output. -: we cannot do this directly in a makefile because some make versions -: have # start a comment, always. - -sed -e '/^#DEPENDENCIES/,$d' $1 -echo '#DEPENDENCIES' diff --git a/distr/Action b/distr/Action deleted file mode 100644 index 1cbc5442c..000000000 --- a/distr/Action +++ /dev/null @@ -1,3 +0,0 @@ -name "EM tables" -dir etc -end diff --git a/distr/Action1 b/distr/Action1 deleted file mode 100644 index dc1e371f9..000000000 --- a/distr/Action1 +++ /dev/null @@ -1,16 +0,0 @@ -name "m68k2/cg bootstrap files" -dir mach/m68k2/cg -action "make EMHOME=/proj/em/Work distr" -end -name "vax4/cg bootstrap files" -dir mach/vax4/cg -action "make EMHOME=/proj/em/Work distr" -end -name "m68020/ncg bootstrap files" -dir mach/m68020/ncg -action "make EMHOME=/proj/em/Work distr" -end -name "m68k4/cg bootstrap files" -dir mach/m68k4/cg -action "make EMHOME=/proj/em/Work distr" -end diff --git a/distr/Exceptions b/distr/Exceptions deleted file mode 100644 index edd5a5724..000000000 --- a/distr/Exceptions +++ /dev/null @@ -1,15 +0,0 @@ -++ ./doc/install.pr made -++ ./doc/int/.distr made -++ ./etc/new_table_done made -++ ./lang/cem/cemcom.ansi/Version.c made -++ ./lang/cem/libcc.ansi/stdlib/malloc.c made -++ ./lang/cem/cemcom/Version.c made -++ ./lang/pc/comp/Version.c made -++ ./lang/m2/comp/Version.c made -++ ./lang/m2/m2mm/Version.c made -++ ./mach/sparc/ce/EM_table made -++ ./mach/sparc_solaris/libem/LIST made -++ ./util/LLgen/src/LLgen.c.dist made -++ ./util/cpp/Version.c made -++ ./util/ego/share/pop_push.h made -++ ./util/grind/ops.c made diff --git a/distr/How_To b/distr/How_To deleted file mode 100644 index 17ec1ad39..000000000 --- a/distr/How_To +++ /dev/null @@ -1,90 +0,0 @@ -How to make a distribution --------------------------- - -I have written a new tool to generate the distributions that does not rely on -having a local CVS server --- distr/mkdist. - -To use it, you need to specify your CVS work tree, the destination directory -that the distribution will be written to, plus flags. It should be self- -documenting; use: - - mkdist --help - -...to get documentation. - -It uses .distr files in exactly the same way as the previous mechanism. - -The documentation for the old distribution tools follows. - -David Given -dg@cowlark.com -2005-06-25 - ------------------------------------------------------------------------------ - -How to make a fresh distribution: -For a distribution you need ".distr" files and RCS files. -The EM home directory contains a file called ".distr". It contains -the names of all the files and directories you want to have in the distribution. -The directories should contain .distr files, the other files should -be placed under CVS. -There are files that derive from other files and yet should be placed -in the distribution. -These files should not be placed under RCS or CVS. -The file "Exceptions" in this directory contains the current list of -these files. - -When all this is correct, use the shell script mktree the extract -the distribution from the EM tree. - sh mktree destination_tree repository_tree 2>f.attf -Use the "cvs rtag" command to give the distribution a name first! -Make sure that the destination tree exists and is empty! -Failing to do that will almost certainly result in a welter of -error messages. -The file f.attf contains mktree error messages and should be compared -to Exceptions. -The actions of mktree are quite complicated. It starts in the current -directory creating a version in the destination directory. -Then it reads the .distr file. -For each file mentioned there it performes certain actions: -1- Directory Change to that directory and call yourself recursively. -2- File - a- Does a file LIST exist in this directory AND - is the first line of LIST equal to the name of the - destination file? If so, try to extract all the files - named in the rest of the LIST file and call the program - arch to create a library "arch cDr `cat LIST`". - In this manner libraries can be distributed whose members - have their own RCS file. - else - b- Try to run 'make distr' - else - c- Try to run 'make ' - else - d- give message that says "not present" (or some such). - -Now, the tree contains all the files in the distribution, but it also contains -files that should not be in the distribution, especially the files created -by CVS. -That is why we now give the command: - dtar cdf distr . -The file distr is the one you should put on tape! -But,.... before doing that: Try it out! -Repeat the process described in the installation manual. -Only if that succeeds you are sure that you included the files needed. - Good Luck, - Ed Keizer, 85/4/15. - -Updated for 3rd distribution by Ceriel Jacobs, 87/3/11. -And again, - Good Luck! - -Updated for 4th distribution by Ceriel Jacobs, 88/4/08. -And again, - Good Luck! -Updated for 5th distribution by Ceriel Jacobs, 91/19/12. -And again, - Good Luck! -Updated for 1st upgrade to 5th distribution by Ceriel Jacobs, 91/12/11. -And again, - Good Luck! diff --git a/distr/dwalk b/distr/dwalk deleted file mode 100755 index 6db9f0e02..000000000 --- a/distr/dwalk +++ /dev/null @@ -1,24 +0,0 @@ -: ${CDIR=.} -${DF-:} $CDIR .distr -if test ! -r $DESTDIR/$CDIR/.distr -then - echo ++ no .distr in $CDIR 1>&2 - exit 0 -fi -for i in `cat $DESTDIR/$CDIR/.distr` -do - if test -d $i - then - ( if cd $i - then - CDIR=$CDIR/$i - export CDIR - exec $DDIR/dwalk $* - else - echo ++ Could not access $CDIR/$i 1>&2 - fi - ) - else - ${DF-:} $CDIR $i - fi -done diff --git a/distr/echod b/distr/echod deleted file mode 100755 index 9fb9840d0..000000000 --- a/distr/echod +++ /dev/null @@ -1 +0,0 @@ -echo $1 diff --git a/distr/f.attf b/distr/f.attf deleted file mode 100644 index a95572a51..000000000 --- a/distr/f.attf +++ /dev/null @@ -1,26 +0,0 @@ --- ./doc/install.pr no RCS file --- ./h/em_mnem.h no RCS file --- ./h/em_pseu.h no RCS file --- ./h/em_spec.h no RCS file --- ./lang/basic/src/y.tab.c no RCS file --- ./lang/basic/src/y.tab.h no RCS file --- ./lang/pc/pem/pem22.m no RCS file --- ./lang/pc/pem/pem24.m no RCS file --- ./lang/pc/pem/pem44.m no RCS file --- ./lib/LLgen/incl no RCS file --- ./lib/LLgen/rec no RCS file --- ./mach/m68k2/cg/tables1.c no RCS file --- ./mach/m68k2/cg/tables1.h no RCS file --- ./mach/m68020/ncg/tables1.c no RCS file --- ./mach/m68020/ncg/tables1.h no RCS file --- ./mach/vax4/cg/tables1.c no RCS file --- ./mach/vax4/cg/tables1.h no RCS file --- ./util/LLgen/src/parser no RCS file --- ./util/LLgen/src/LLgen.c no RCS file --- ./util/LLgen/src/Lpars.c no RCS file --- ./util/LLgen/src/Lpars.h no RCS file --- ./util/LLgen/src/tokens.c no RCS file --- ./util/data/em_flag.c no RCS file --- ./util/data/em_mnem.c no RCS file --- ./util/data/em_pseu.c no RCS file --- ./util/ego/share/pop_push.h no RCS file diff --git a/distr/listall b/distr/listall deleted file mode 100755 index 0bacb4932..000000000 --- a/distr/listall +++ /dev/null @@ -1,10 +0,0 @@ -case $# in -0) DESTDIR=. ;; -1) DESTDIR=$1 ;; -*) echo $0 [directory] ; exit 1 ;; -esac -DD=`pwd`/listall.d -DW=`pwd`/dwalk -export DD DESTDIR -cd $DESTDIR -$DW diff --git a/distr/listall.d b/distr/listall.d deleted file mode 100755 index d66cf179c..000000000 --- a/distr/listall.d +++ /dev/null @@ -1,2 +0,0 @@ -echo "<$1>" -ls -bCdx `cat .distr` diff --git a/distr/listdirs b/distr/listdirs deleted file mode 100755 index 9d5d3c1f1..000000000 --- a/distr/listdirs +++ /dev/null @@ -1,10 +0,0 @@ -case $# in -0) DIR=. ;; -1) DIR=$1 ;; -*) echo $0 [directory] ; exit 1 ;; -esac -DD=`pwd`/echod -DW=`pwd`/dwalk -export DD -cd $DIR -$DW diff --git a/distr/mk_distr_syms b/distr/mk_distr_syms deleted file mode 100755 index 2075f7597..000000000 --- a/distr/mk_distr_syms +++ /dev/null @@ -1,39 +0,0 @@ -: Utility to make a tree of symbolic links to source tree. -: Mount the source tree read-only, use this script, and then try installation. -case $# in -2) ;; -*) echo "Usage: $0 " 1>&2 - exit 1 - ;; -esac -if [ -f $1/.distr ] -then - for i in `cat $1/.distr` - do - if [ -d $1/$i ] - then - if mkdir $2/$i && $0 $1/$i $2/$i - then - : - else - exit 2 - fi - else - if [ -f $1/$i ] - then - if ln -s $1/$i $2/$i - then - : - else - exit 3 - fi - else - echo "Missing file $1/$i" 1>&2 - exit 4 - fi - fi - done -else - echo "No .distr file in $1" 1>&2 - exit 5 -fi diff --git a/distr/mka b/distr/mka deleted file mode 100755 index 21f738cb6..000000000 --- a/distr/mka +++ /dev/null @@ -1,9 +0,0 @@ -set -e -for i in `tail +2 $DESTDIR/$1/LIST` -do - ${DF-false} $1 $i -done -cd $DESTDIR/$1 -arch cDr `cat LIST` -: I do not remove the files constituating the library, because -: they might be present in .distr diff --git a/distr/mkd b/distr/mkd deleted file mode 100755 index e69de29bb..000000000 diff --git a/distr/mkdist b/distr/mkdist deleted file mode 100755 index c4a307367..000000000 --- a/distr/mkdist +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/sh -# $Source$ -# $State$ - -# Set up default variables. - -destdir= -srcdir=`pwd` -arch=/usr/local/bin/arch -delete=no -copy=ln - -# --- Options parsing ------------------------------------------------------- - -while [ "$1" != "" ]; do - case "$1" in - -s|--srcdir) - srcdir="$2" - shift - ;; - - -d|--destdir) - destdir="$2" - shift - ;; - - -x|--delete) - delete=yes - ;; - - -c|--copy) - copy="cp -dp" - ;; - - -a|--arch) - arch="$2" - shift - ;; - - -h|--help) - echo "mkdist [options]" - echo "Options are:" - echo " -s --srcdir The CVS tree to read from. (default: CWD)" - echo " -d --destdir The directory to create the distribution in." - echo " -x --delete Erase the destination directory first." - echo " -c --copy Make physical copies of the files. (default: hardlink)" - echo " -a --arch Where the ACK 'arch' tool is." - echo " -h --help Display this message." - exit 0 - ;; - - *) - echo "Unrecognised option. Try --help for help." - exit 1 - esac - shift -done - -if [ "$destdir" == "" ]; then - echo "You must specify a destination directory. (Try --help for help.)" - exit 1 -fi - -# --- Main routines --------------------------------------------------------- - -# These two routines do the work of traversing the source tree and building -# the distribution tree. - -addfile() { - local f - f="${1##$srcdir/}" - mkdir -p $destdir/`dirname $f` - $copy "$1" "$destdir/$f" -} - -process_dir() { - local path - local archivename - - path=$1 - cd $path - - # Look for a LIST file and cache the first line. - - archivename= - if [ -f LIST ]; then - archivename=`head -1 LIST` - fi - - for i in `cat $path/.distr`; do - if [ -d $i ]; then - # This is a directory. Recurse into it. - - ( process_dir $path/$i ) - elif [ -f $i ]; then - # This is a file. - - addfile $path/$i - elif [ "$i" = "$archivename" ]; then - # Build the named archive. - - $arch cDr `cat LIST` - addfile $path/$archivename - else - ( - PATH=$PATH:. - export PATH - make distr || make $i || ( - echo "Don't know what to do with $i, listed in $1/.distr." - exit 1 - ) - - if [ ! -f "$path/$i" ]; then - echo "Make failed for $i, listed in $path/.distr" - exit 1 - fi - addfile $path/$i - ) - fi - done -} - -# --- Main program ---------------------------------------------------------- - -# Test to make sure that $arch points to the right thing. - -if !(strings $arch | grep archiver > /dev/null); then - echo "$arch does not seem to point at the ACK archiver tool." - echo "(Don't confuse this with the Linux tool for displaying your" - echo "architecture.)" - echo "" - echo "Press RETURN to go ahead anyway, or CTRL+C to abort." - read -fi - -# Actually do the work. - -echo "Creating distribution from CVS tree: $srcdir" -echo " into destination tree: $destdir" -echo "" - -if [ -e $destdir ]; then - if [ "$delete" == "yes" ]; then - echo "Press RETURN to erase $destdir and its contents, or CTRL+C to abort." - read - echo "Erasing..." - rm -rf "$destdir" - else - echo "$destdir exists. Aborting." - exit 1 - fi -fi - -echo "Working..." -mkdir -p $destdir -process_dir $srcdir -echo "Done." - -# Revision history -# $Log$ -# Revision 1.2 2005-06-24 23:19:23 dtrg -# Added new mkdist tool. -# -# Revision 1.1 2005/06/24 22:13:57 dtrg -# Created new tool to generate distributions. diff --git a/distr/mkf b/distr/mkf deleted file mode 100755 index 6ba970378..000000000 --- a/distr/mkf +++ /dev/null @@ -1,17 +0,0 @@ -if [ -f $DESTDIR/$1/$2 ] -then - : -elif grep LIST $DESTDIR/$1/.distr >/dev/null 2>&1 && - (test "$2" = "`head -1 $DESTDIR/$1/LIST`") >/dev/null 2>&1 && - ${DA-false} "$1" "$2" -then -: Fetched library contents one by one and put them together -elif ( cd $DESTDIR/$1 ; make distr ) > /dev/null 2>&1 -then - echo ++ $1/$2 made 1>&2 -elif ( cd $DESTDIR/$1 ; make $2 ) > /dev/null 2>&1 -then - echo ++ $1/$2 made 1>&2 -else - echo ++ $1/$2 not present 1>&2 -fi diff --git a/distr/mks b/distr/mks deleted file mode 100755 index be169fb6f..000000000 --- a/distr/mks +++ /dev/null @@ -1 +0,0 @@ -cp .distr $DESTDIR/$1 diff --git a/distr/mktree b/distr/mktree deleted file mode 100644 index 037fa4086..000000000 --- a/distr/mktree +++ /dev/null @@ -1,42 +0,0 @@ -case $# in -2|3) ;; -*) echo Usage: $0 directory repdir [ SVrecord ] 1>&2 ; exit 1 ;; -esac -case $0 in -/*) DDIR=`dirname $0` - ;; -*) DDIR=`pwd`/`dirname $0` - ;; -esac -case $1 in -/*) DESTDIR=$1 ;; -*) DESTDIR=`pwd`/$1 ;; -esac -case $2 in -/*) REPDIR=$2 ;; -*) REPDIR=`pwd`/$2 ;; -esac -# DD=$DDIR/mkd -# export DD -mkdir -p $DESTDIR -CVSROOT=/usr/proj/em/Repositories -export CVSROOT -cd $DESTDIR -case $# in -3) - cvs checkout world -r $3 - ;; -2) - cvs checkout world - ;; -esac -cd $REPDIR -DF=$DDIR/mkf -DA=$DDIR/mka -export DDIR DESTDIR DF DA REPDIR - -$DDIR/dwalk - -cd $DESTDIR -find . -type d -print | xargs chmod "uog+rx" -chmod -R "og-w,u+w,uog+r" . diff --git a/distr/todistr b/distr/todistr deleted file mode 100644 index 5b171453b..000000000 --- a/distr/todistr +++ /dev/null @@ -1,26 +0,0 @@ -REV= -FILE= -while : -do - case $# in - 0) break ;; - esac - ARG="$1" - shift - case "$ARG" in - -r*) REV=`echo "$ARG"| sed s/-r//` ;; - -*) FLAGS="$FLAGS $ARG" ;; - *) case x$FILE in - x) FILE="$ARG" ;; - *) echo todistr can only be done on one file at the time - exit 1 ;; - esac - esac -done -case x$REV in -x) REV=`rlog -h "$FILE"|sed -n -e '/head/s/^head:[ ]*//p'` ;; -esac -case x$REV in -x) exit 2 ;; -esac -rcs -ndistr4:$REV $FLAGS $FILE diff --git a/distr/ts b/distr/ts deleted file mode 100755 index 8350341b2..000000000 --- a/distr/ts +++ /dev/null @@ -1,2 +0,0 @@ -DD=`pwd`/ts -echo OK diff --git a/doc/.distr b/doc/.distr deleted file mode 100644 index bc07de978..000000000 --- a/doc/.distr +++ /dev/null @@ -1,33 +0,0 @@ -READ_ME -Makefile -proto.make -ack.doc -basic.doc -cg.doc -crefman.doc -ansi_C.doc -em -install.doc -install.pr -ncg.doc -pcref.doc -peep.doc -regadd.doc -toolkit.doc -v7bugs.doc -val.doc -LLgen -6500.doc -i80.doc -z80.doc -m68020.doc -m2ref.doc -nopt.doc -top -ego -occam -int -ceg -sparc -lint -pascal diff --git a/doc/6500.doc b/doc/6500.doc deleted file mode 100644 index 173675807..000000000 --- a/doc/6500.doc +++ /dev/null @@ -1,1893 +0,0 @@ -. \" $Id$" -.RP -.ND Dec 1984 -.TL -.B -A backend table for the 6500 microprocessor -.R -.AU -Jan van Dalen -.AB -The backend table is part of the Amsterdam Compiler Kit (ACK). -It translates the intermediate language family EM to a machine -code for the MCS6500 microprocessor family. -.AE -.bp -.DS C -.B -THE MCS6500 MICROPROCESSOR. -.R -.DE -.NH 0 -Introduction -.PP -Why a back end table for the MCS6500 microprocessor family. -Although the MCS6500 microprocessor family has an simple -instruction set and internal structure, it is used in a -variety of microcomputers and homecomputers. -This is because of is low cost. -As an example the Apple II, a well known and width spread -microprocessor, uses the MCS6502 CPU. -Also the BBC homecomputer, whose popularity is growing day -by day uses the MCS6502 CPU. -The BBC homecomputer is based on the MCS6502 CPU although -better and stronger microprocessors are available. -The designers of Acorn computer Industries have probably -choosen for the MCS6502 because of the amount of software -available for this CPU. -Since its width spreaded use, a variaty of software -will be needed for it. -One can think of games!!, administration programs, -teaching programs, basic interpreters and other application -programs. -Even do it will not be possible to run the total compiler kit -on a MCS6500 based computer, it is possible to write application -programs in a high level language, such as Pascal or C on a -minicomputer. -These application programs can be tested and compiled on that -minicomputer and put in a ROM (Read Only Memory), for example, -cso that it an be executed by a MCS6500 CPU. -The strategy of writing testprograms on a minicomputer, -compile it and then execute it on a MCS6500 based -microprocessor is used by the development of the back end. -The minicomputer used is M68000 based one, manufactured by -Bleasdale Computer Systems Ltd.. -The micro- or homecomputer used is a BBC microcomputer, -manufactured by Acorn Computer Ltd.. -.NH -The MOS Technology MCS6500 -.PP -The MCS6500 is as a family of CPU devices developed by MOS -Technology [1]. -The members of the MCS6500 family are the same chips in a -different housing. -The MCS6502, the big brother in the family, can handle 64k -bytes of memory, while for example the MCS6504 can only handle -8k bytes of memory. -This difference is due to the fact that the MCS6502 is in a -40 pins house and the MCS6504 has a 28 pins house, so less -address lines are available. -.bp -.NH -The MCS6500 CPU programmable registers -.PP -The MCS6500 series is based on the same chip so all have the -same programmable registers. -.sp 9 -.NH 2 -The accumulator A. -.PP -The accumulator A is the only register on which the arithmetic -and logical instructions can be used. -For example, the instruction ADC (add with carry) adds the -contents of the accumulator A and a byte from memory or data. -.NH 2 -The index register X. -.PP -As the name suggests this register can be used for some -indirect addressing modes. -The modes are explaned below. -.NH 2 -The index register Y. -.PP -This register is, just as the index register X, used for -certain indirect addressing modes. -These addressing modes are different from the modes which -use index register X. -.NH 2 -The program counter PC -.PP -This is the only 16-bit register available. -It is used to point to the next instruction to be -carried out. -.NH 2 -The stack pointer SP -.PP -The stack pointer is an 8-bit register, so the stack can contain -at most 256 bytes. -The CPU always appends 00000001 as highbyte of any stack address, -which means that memory locations -.B -0100 -.R -through -.B -01FF -.R -are permanently assigned to the stack. -.sp 12 -.NH 2 -The status register -.PP -The status register maintains six status flags and a master -interrupt control bit. -.br -These are the six status flags: - Carry (c) - Zero (z) - Overflow (o) - Sign (n) - Decimal mode (d) - Break (b) - - - - - -The bit (i) is the master interrupt control bit. -.NH -The MCS6500 memory layout. -.PP -In the MCS6500 memory space three area's have special meaning. -These area's are: -.IP 1) -Top page. -.IP 2) -Zero page. -.IP 3) -The stack. -.PP -MCS6500 memory is divided up into pages. -These pages consist 256 bytes. -So in a memory address the highbyte denotes the page number -and the lowbyte the offset within the page. -.NH 2 -Top page. -.PP -When a MCS6500 is restared it jumps indirect via memory address -.B -FFFC. -.R -At -.B -FFFC -.R -(lowbyte) and -.B -FFFD -.R -(highbyte) there must be the address of the bootstrap subroutine. -When a break instruction (BRK) occurs or an interrupt takes place, -the MCS6500 jumps indirect through memory address -.B -FFFE. -.R -.B -FFFE -.R -and -.B -FFFF -.R -thus, must contain the address of the interrupt routine. -The former only goes for maskeble interrupt. -There also exist a nonmaskeble interrupt. -This cause the MCS6500 to jump indirect through memory address -.B -FFFA. -.R -So the top six bytes of memory are used by the operating system -and therefore not available for the back end. -.NH 2 -Zero page. -.PP -This page has a special meaning in the sence that addressing -this page uses special opcodes. -Since a page consists of 256 bytes, only one byte is needed -for addressing zero page. -So an instruction which uses zero page occupies two bytes. -It also uses less clock cycle's while carrying out the instruction. -Zero page is also needed when indirect addressing is used. -This means that when indirect addressing is used, the address must -reside in zero page (two consecutive bytes). -In this case (the back end), zero page is used, for example -to hold the local base, the second local base, the stack pointer -etc. -.NH 2 -The stack. -.PP -The stack is described in paragraph 3.5 about the MCS6500 -programmable registers. -.NH -The memory adressing modes -.PP -MCS6500 memory reference instructions use direct addressing, -indexed addressing, and indirect addressing. -.NH 2 -direct addressing. -.PP -Three-byte instructions use the second and third bytes of the -object code to provide a direct 16-bit address: -therefore, 65.536 bytes of memory can be addressed directly. -The commonly used memory reference instructions also have a two-byte -object code variation, where the second byte directly addresses -one of the first 256 bytes. -.NH 2 -Base page, indexed addressing. -.PP -In this case, the instruction has two bytes of object code. -The contents of either the X or Y index registers are added to the -second object code byte in order to compute a memory address. -This may be illustrated as follows: -.sp 15 -Base page, indexed addressing, as illustrated above, is -wraparound - which means that there is no carry. -If the sum of the index register and second object code byte contents -is more than -.B -FF -.R -, the carry bit will be dicarded. -This may be illustrated as follows: -.sp 9 -.NH 2 -Absolute indexed addressing. -.PP -In this case, the contents of either the X or Y register are added -to a 16-bit direct address provided by the second and third bytes -of an instruction's object code. -This may be illustrated as follows: -.sp 10 -.NH 2 -Indirect addressing. -.PP -Instructions that use simple indirect addressing have three bytes of -object code. -The second and third object code bytes provide a 16-bit address; -therefore, the indirect address can be located anywhere in -memory. -This is straightforward indirect addressing. -.NH 3 -Pre-indexed indirect addressing. -.PP -In this case, the object code consists of two bytes and the -second object code byte provides an 8-bit address. -Instructions that use pre-indexed indirect addressing add the contents -of the X index register and the second object code byte to access -a memory location in the first 256 bytes of memory, where the -indirect address will be found: -.sp 18 -When using pre-indexed indirect addressing, once again wraparound -addition is used, which means that when the X index register contents -are added to the second object code byte, any carry will be discarded. -Note that only the X index register can be used with pre-indexed -addressing. -.NH 3 -Post-indexed indirect addressing. -.PP -In this case, the object code consists of two bytes and the -second object code byte provides an 8-bit address. -Now the second object code byte indentifies a location -in the first 256 bytes of memory where an indirect address -will be found. -The contents of the Y index register are added to this indirect -address. -This may be illustrated as follows: -.sp 18 -Note that only the Y index register can be used with post-indexed -indirect addressing. -.bp -.NH -What the CPU has and doesn't has. -.PP -Although the designers of the MCS6500 CPUs family state that -there is nothing very significant about the short stack (only -256 bytes) this stack caused problems for the back end. -The designers say that a 256-byte stack usually is sufficient -for any typical microcomputer, this is only true if the stack -is used only for return addresses of the JSR (jump to -subroutine) instruction. -But since the EM machine is suppost to be a stack machine and -high level languages need the ability of parameters and -locals in there procedures and function, this short stack -is unsufficiant. -So an software stack is implemented in this back end, requiring two -additional subroutines for stack handling. -These two stack handling subroutines slow down the processing time -of a program since the stack is used heavely. -.PP -Since parameters and locals of EM procedures are offseted -from the localbase of that procedure, indirect addressing -is havily used. -Offsets are positive (for parameters) and negative (for -local variables). -As explaned before the addressing modes the MCS6500 have a -post indexed indirect addressing mode. -This addressing mode can only handle positive offsets. -This raises a problem for accessing the local variables -I have chosen for the next solution. -A second local base is introduced. -This second local base is the real local base subtracted by -a constant BASE. -In the present situation of the back end the value of BASE -is 240. -This means that there are 240 bytes reseved for local -variables to be indirect addressed and 14 bytes for -the parameters. -.DS C -.B -THE CODE GENERATOR. -.R -.DE -.NH 0 -Description of the machine table. -.PP -The machine description table consists of the following sections: -.IP 1. -The macro definitions. -.IP 2. -Constant definitions. -.IP 3. -Register definitions. -.IP 4. -Token definitions. -.IP 5. -Token expressions. -.IP 6. -Code rules. -.IP 7. -Move definitions. -.IP 8. -Test definitions. -.IP 9. -Stack definitions. -.NH 2 -Macro definitions. -.PP -The macro definitions at the top of the table are expanded -by the preprocessor on occurence in the rest of the table. -.NH 2 -Constant definitions. -.PP -There are three constants which must be defined at first. -The are: -.IP EM_WSIZE: 11 -Number of bytes in a machine word. -This is the number of bytes a simple -.B -loc -.R -instruction will put on the stack. -.IP EM_PSIZE: -Number of bytes in a pointer. -This is the number of bytes a -.B -lal -.R -instruction will put on the stack. -.IP EM_BSIZE: -Number of bytes in the hole between AB and LB. -The calling sequence only saves LB on the stack so this -constant is equal to the pointer size. -.NH 1 -Register definitions. -.PP -The only important register definition is the definition of -the registerpair AX. -Since the rest of the machine's registers Y, PC, ST serve -special purposes, the code generator cannot use them. -.NH 2 -Token definitions -.PP -There is a fake token. -This token is put in the table, since the code generator generator -complains if it cannot find one. -.NH 2 -Token expression definitions. -.PP -The token expression is also a fake one. -This token expression is put in the table, since the code generator -generator complains if it cannot find one. -.NH 2 -Code rules. -.PP -The code rule section is the largest section in the table. -They specify EM patterns, stack patterns, code to be generated, -etc. -The syntax is: -.IP code rule: -EM pattern '|' stack pattern '|' code '|' -stack replacement '|' EM replacement '|' -.PP -All patterns are optional, however there must be at least one -pattern present. -If the EM pattern is missing the rule becomes a rewriting -rule or a -.B -coercion -.R -to be used when code generation cannot continue because of an -invalid stack pattern. -The code rules are preceeded by the word CODE:. -.NH 3 -The EM pattern. -.PP -The EM pattern consists of a list of EM mnemonics followed by -a boolean expression. Examples: -.sp 1 -.br -.B -loe -.R -.sp 1 -will match a single -.B -loe -.R -instruction, -.sp 1 -.br -.B -loc loc cif -.R -$1==2 && $2==8 -.sp 1 -is a pattern that will match -.sp 1 -.br -.B -loc -.R -2 -.br -.B -loc -.R -8 -.br -.B -cif -.R -.sp 1 -and -.sp 1 -.br -.B -lol -inc -stl -.R -$1==$3 -.sp 1 -will match for example -.sp 1 -.br -.B -lol -.R -6 -.br -.B -inc -.R -.br -.B -stl -.R -6 -.sp 1 -A missing boolean expession evaluates to TRUE. -.PP -The code generator will match the longest EM pattern on every occasion, -if two patterns of the same length match the first in the table -will be chosen, while all patterns of length greater than or equal -to three are considered to be of the same length. -.NH 3 -The stack pattern. -.PP -The only stack pattern that can occur is R16, which means that the -registerpair AX contains the word on top of the stack. -If this is not the case a coersion occurs. -This coersion generates a "jsr Pop", which means that the top -of the stack is popped and stored in the registerpair AX. -.NH 3 -The code part. -.PP -The code part consists of three parts, stack cleanup, register -allocation, and code to be generated. -All of these may be omitted. -.NH 4 -Stack cleanup. -.PP -When generating something like a branch instruction it might be -needed to empty the fake stack, that is, remove the AX registerpair. -This is done by the instruction remove(ALL) -.NH 4 -Register allocation. -.PP -If the machine code to be generated uses the registerpair AX, -this is signaled to the code generator by the allocate(R16) -instruction. -If the registerpair AX resides on the fake stack, this will result -in a "jsr Push", which means that the registerpair AX is pushed on -the stack and will be free for further use. -If registerpair AX is not on the fake stack nothing happens. -.NH 4 -Code to be generated. -.PP -Code to be generated is specified as a list of items of the following -kind: -.IP 1) -A string in double quotes("This is a string"). -This is copied to the codefile and a newline ('\n') is appended. -Inside the string all normal C string conventions are allowed, -and substitutions can be made of the following sorts. -.RS -.IP a) -$1, $2 etc. These are the operand of the corresponding EM -instructions and are printed according to there type. -To put a real '$' inside the string it must be doubled ('$$'). -.IP b) -%[1], %[2.reg], %[b.1] etc. these have there obvious meaning. -If they describe a complete token (%[1]) the printformat for -the token is used. -If they stand fo a basic term in an expression they will be -printed according to their type. -To put a real '%' inside the string it must be doubled ('%%'). -.IP c) -%( arbitrary expression %). This allows inclusion of arbitrary -expressions inside strings. -Usually not needed very often, so that the akward notation -is not too bad. -Note that %(%[1]%) is equivalent to %[1]. -.RE -.NH 3 -stack replacement. -.PP -The stack replacement is a possibly empty list of items to be -pushed on the fake stack. -Three things can occur: -.IP 1) -%[1] is used if the registerpair AX was on the fake stack and is -to be pushed back onto it. -.IP 2) -%[a] is used if the registerpair AX is allocated with allocate(R16) -and is to be pushed onto the fake stack. -.IP 3) -It can also be empty. -.NH 3 -EM replacement. -.PP -In exeptional cases it might be useful to leave part of the an EM -pattern undone. -For example, a -.B -sdl -.R -instruction might be split into two -.B -stl -.R -instructions when there is no 4-byte quantity on the stack. -The EM replacement part allows one to express this. -Example: -.sp 1 -.br -.B -stl -.R -$1 -.B -stl -.R -$1+2 -.sp 1 -The instructions are inserted in the stream so they can match -the first part of a pattern in the next step. -Note that since the code generator traverses the EM instructions -in a strict linear fashion, it is impossible to let the EM -replacement match later parts of a pattern. -So if there is a pattern -.sp 1 -.br -.B -loc -stl -.R -$1==0 -.sp1 -and the input is -.sp 1 -.br -.B -loc -.R -0 -.B -sdl -.R -4 -.sp 1 -the -.B -loc -.R -0 -will be processed first, then the -.B -sdl -.R -might be split into two -.B -stl -.R -'s but the pattern cannot match now. -.NH 3 -Move definitions. -.PP -This definition is a fake. This definition is put in the -table, since the code generator generator complains if it -cannot find one. -.NH 3 -Test definitions. -.PP -Test definitions aren't used by the table. -.NH 3 -Stack definitions. -.PP -When the generator has to push the registerpair AX, it must -know how to do so. -The machine code to be generated is defined here. -.NH 1 -Some remarks. -.PP -The above description of the machine table is -a description of the table for the MCS6500. -It uses only a part of the possibilities which the code generator -generator offers. -For a more precise and detailed description see [2]. -.DS C -.B -THE BACK END TABLE. -.R -.DE -.NH 0 -Introduction. -.PP -The code rules are divided in 15 groups. -These groups are: -.IP 1. -Load instructions. -.IP 2. -Store instructions. -.IP 3. -Integer arithmetic instructions. -.IP 4. -Unsigned arithmetic instructions. -.IP 5. -Floating point arithmetic instructions. -.IP 6. -Pointer arithmetic instructions. -.IP 7. -Increment, decrement and zero instructions. -.IP 8. -Convert instructions. -.IP 9. -Logical instructions. -.IP 10. -Set manipulation instructions. -.IP 11. -Array instructions. -.IP 12. -Compare instructions. -.IP 13. -Branch instructions. -.IP 14. -Procedure call instructions. -.IP 15. -Miscellaneous instructions. -.PP -From all of these groups one or two typical EM pattern will be explained -in the next paragraphs. -Comment is placed between /* and */ (/* This is a comment */). -.NH -The instructions. -.NH 2 -The load instructions. -.PP -In this group a typical instruction is -.B -lol -.R -. -A -.B -lol -.R -instruction pushes the word at local base + offset, where offset -is the instructions argument, onto the stack. -Since the MCS6500 can only offset by 256 bytes, as explaned at the -memory addressing modes, there is a need for two code rules in the -table. -One which can offset directly and one that must explicit -calculate the address of the local. -.NH 3 -The lol instruction with indirect offsetting. -.PP -In this case an indirect offsetted load from the second local base -is possible. -The table content is: -.sp 1 -.br -.B -lol -.R -IN($1) | | -.br -allocate(R16) /* allocate registerpair AX */ -.br -"ldy #BASE+$1" /* load Y with the offset from the second -.br - local base */ -.br -"lda (LBl),y" /* load indirect the lowbyte of the word */ -.br -"tax" /* move register A to register X */ -.br -"iny" /* increment register Y (offset) */ -.br -"lda (LBl),y" /* load indirect the highbyte of the word */ -.br -| %[a] | | /* push the word onto the fake stack */ -.NH 3 -The lol instruction whose offset is to big. -.PP -In this case, the library subroutine "Lol" is used. -This subroutine expects the offset in registerpair AX, then -calculates the address of the local or parameter, and loads -it into registerpair AX. -The table content is: -.sp 1 -.br -.B -lol -.R -| | -.br -allocate(R16) /* allocate registerpair AX */ -.br -"lda #[$1].h" /* load highbyte of offset into register A */ -.br -"ldx #[$1].l" /* load lowbyte of offset into register X */ -.br -"jsr Lol" /* perform the subroutine */ -.br -| %[a] | | /* push word onto the fake stack */ -.NH 2 -The store instructions. -.PP -In this group a typical instruction is -.B -stl. -.R -A -.B -stl -.R -instruction poppes a word from the stack and stores it in the word -at local base + offset, where offset is the instructions argument. -Here also is the need for two code rules in the table as a result -of the offset limits. -.NH 3 -The stl instruction with indirect offsetting. -.PP -In this case it an indirect offsetted store from the second local -base is possible. -The table content is: -.sp 1 -.br -.B -stl -.R -IN($1) | R16 | /* expect registerpair AX on top of the -.br - fake stack */ -.br -"ldy #BASE+1+$1" /* load Y with the offset from the -.br - second local base */ -.br -"sta (LBl),y" /* store the highbyte of the word from A */ -.br -"txa" /* move register X to register A */ -.br -"dey" /* decrement offset */ -.br -"sta (LBl),y" /* store the lowbyte of the word from A */ -.br -| | | -.NH 3 -The stl instruction whose offset is to big. -.PP -In this case the library subroutine 'Stl' is used. -This subroutine expects the offset in registerpair AX, then -calculates the address, poppes the word stores it at its place. -The table content is: -.sp 1 -.br -.B -stl -.R -| | -.br -allocate(R16) /* allocate registerpair AX */ -.br -"lda #[$1].h" /* load highbyte of offset in register A */ -.br -"ldx #[$1].l" /* load lowbyte of offset in register X */ -.br -"jsr Stl" /* perform the subroutine */ -.br -| | | -.NH 2 -Integer arithmetic instructions. -.PP -In this group typical instructions are -.B -adi -.R -and -.B -mli. -.R -These instructions, in this table, are implemented for 2-byte -and 4-byte integers. -The only arithmetic instructions available on the MCS6500 are -the ADC (add with carry), and SBC (subtract with not(carry)). -Not(carry) here means that in a subtraction, the one's complement -of the carry is taken. -The absence of multiply and division instructions forces the -use of subroutines to handle these cases. -Because there are no registers left to perform on the multiply -and division, zero page is used here. -The 4-byte integer arithmetic is implemented, because in C there -exists the integer type long. -A user is freely to use the type long, but will pay in performance. -.NH 3 -The adi instruction. -.PP -In case of the -.B -adi -.R -2 (and -.B -sbi -.R -2) instruction there are many EM -patterns, so that the instruction can be performed in line in -most cases. -For the worst case there exists a subroutine in the library -which deals with the EM instruction. -In case of a -.B -adi -.R -4 (or -.B -sbi -.R -4) there only is a subroutine to deal with it. -A table content is: -.sp 1 -.br -.B -lol lol adi -.R -(IN($1) && IN($2) && $3==2) | | /* is it in range */ -.br -allocate(R16) /* allocate registerpair AX */ -.br -"ldy #BASE+$1+1" /* load Y with offset for first operand */ -.br -"lda (LBl),y" /* load indirect highbyte first operand */ -.br -"pha" /* save highbyte first operand on hard_stack */ -.br -"dey" /* decrement offset first operand */ -.br -"lda (LBl),y" /* load indirect lowbyte first operand */ -.br -"ldy #BASE+$2" /* load Y with offset for second operand */ -.br -"clc" /* clear carry for addition */ -.br -"adc (LBl),y" /* add the lowbytes of the operands */ -.br -"tax" /* store lowbyte of result in place */ -.br -"iny" /* increment offset second operand */ -.br -"pla" /* get highbyte first operand */ -.br -"adc (LBl),y" /* add the highbytes of the operands */ -.br -| %[a] | | /* push the result onto the fake stack */ -.NH 3 -The mli instruction. -.PP -The -.B -mli -.R -2 instruction uses most the subroutine 'Mlinp'. -This subroutine expects the multiplicand in zero page -at locations ARTH, ARTH+1, while the multiplier is in zero -page locations ARTH+2, ARTH+3. -For a description of the algorithms used for multiplication and -division, see [3]. -A table content is: -.sp 1 -.br -.B -lol lol mli -.R -(IN($1) && IN($2) && $3==2) | | -.br -allocate(R16) /* allocate registerpair AX */ -.br -"ldy #BASE+$1" /* load Y with offset of multiplicand */ -.br -"lda (LBl),y" /* load indirect lowbyte of multiplicand */ -.br -"sta ARTH" /* store lowbyte in zero page */ -.br -"iny" /* increment offset of multiplicand */ -.br -"lda (LBl),y" /* load indirect highbyte of multiplicand */ -.br -"sta ARTH+1" /* store highbyte in zero page */ -.br -"ldy #BASE+$2" /* load Y with offset of multiplier */ -.br -"lda (LBl),y" /* load indirect lowbyte of multiplier */ -.br -"sta ARTH+2" /* store lowbyte in zero page */ -.br -"iny" /* increment offset of multiplier */ -.br -"lda (LBl),y" /* load indirect highbyte of multiplier */ -.br -"sta ARTH+3" /* store highbyte in zero page */ -.br -"jsr Mlinp" /* perform the multiply */ -.br -| %[a] | | /* push result onto fake stack */ -.NH 2 -The unsgned arithmetic instructions. -.PP -Since unsigned addition an subtraction is performed in the same way -as signed addition and subtraction, these cases are dealt with by -an EM replacement. -For mutiplication and division there are special subroutines. -.NH 3 -Unsigned addition. -.PP -This is an example of the EM replacement strategy. -.sp 1 -.br -.B -lol lol adu -.R - | | | | -.B -lol -.R -$1 -.B -lol -.R -$2 -.B -adi -.R -$3 | -.NH 2 -Floating point arithmetic. -.PP -Floating point arithmetic isn't implemented in this table. -.NH 2 -Pointer arithmetic instructions. -.PP -A typical pointer arithmetic instruction is -.B -adp -.R -2. -This instruction adds an offset and a pointer. -A table content is: -.sp 1 -.br -.B -adp -.R - | | | | -.B -loc -.R -$1 -.B -adi -.R -2 | -.NH 2 -Increment, decrement and zero instructions. -.PP -In this group a typical instruction is -.B -inl -.R -, which increments a local or parameter. -The MCS6500 doesn't have an instruction to increment the -accumulator A, so the 'ADC' instruction must be used. -A table content is: -.sp 1 -.br -.B -inl -.R -IN($1) | | -.br -allocate(R16) /* allocate registerpair AX */ -.br -"ldy #BASE+$1" /* load Y with offset of the local */ -.br -"clc" /* clear carry for addition */ -.br -"lda (LBl),y" /* load indirect lowbyte of local */ -.br -"adc #1" /* increment lowbyte */ -.br -"sta (LBl),y" /* restore indirect the incremented lowbyte */ -.br -"bcc 1f" /* if carry is clear then ready */ -.br -"iny" /* increment offset of local */ -.br -"lda (LBl),y" /* load indirect highbyte of local */ -.br -"adc #0" /* add carry to highbyte */ -.br -"sta (LBl),y\\n1:" /* restore indirect the highbyte */ -.PP -If the offset of the local or parameter is to big, first the -local or parameter is fetched, than incremented, and then -restored. -.NH 2 -Convert instructions. -.PP -In this case there are two convert instructions -which really do something. -One of them is in line code, and deals with the extension of -a character (1-byte) to an integer. -The other one is a subroutine which handles the conversion -between 2-byte integers and 4-byte integers. -.NH 3 -The in line conversion. -.PP -The table content is: -.sp 1 -.br -.B -loc loc cii -.R -$1==1 && $2==2 | R16 | -.br -"txa" /* see if sign extension is needed */ -.br -"bpl 1f" /* there is no need for sign extension */ -.br -"lda #0FFh" /* sign extension here */ -.br -"bne 2f" /* conversion ready */ -.br -"1: lda #0\\n2:" /* no sign extension here */ -.NH 2 -Logical instructions. -.PP -A typical instruction in this group is the logical -.B -and -.R -on two 2-byte words. -The logical -.B -and -.R -on groups of more than two bytes (max 254) -is also possible and uses a library subroutine. -.NH 3 -The logical and on 2-byte groups. -.PP -The table content is: -.sp 1 -.br -.B -and -.R -$1==2 | R16 | /* one group must be on the fake stack */ -.br -"sta ARTH+1" /* temporary save of first group highbyte */ -.br -"stx ARTH" /* temporary save of first group lowbyte */ -.br -"jsr Pop" /* pop second group from the stack */ -.br -"and ARTH+1" /* logical and on highbytes */ -.br -"pha" /* temporary save the result's highbyte */ -.br -"txa" /* logical and can only be done in A */ -.br -"and ARTH" /* logical and on lowbytes */ -.br -"tax" /* restore results lowbyte */ -.br -"pla" /* restore results highbyte */ -.br -| %[1] | | /* push result onto fake stack */ -.NH 2 -Set manipulation instructions. -.PP -A typical EM pattern in this group is -.B -loc inn zeq -.R -$1>0 && $1<16 && $2==2. -This EM pattern works on sets of 16 bits. -Sets can be bigger (max 256 bytes = 2048 bits), but than a -library routine is used instead of in line code. -The table content of the above EM pattern is: -.sp 1 -.br -.B -loc inn zeq -.R -$1>0 && $1<16 && $2==2 | R16 | -.br -"ldy #$1+1" /* load Y with bit number */ -.br -"stx ARTH" /* cannot rotate X, so use zero page */ -.br -"1: lsr a" /* right shift A */ -.br -"ror ARTH" /* right rotate zero page location */ -.br -"dey" /* decrement Y */ -.br -"bne 1b" /* shift $1 times */ -.br -"bcc $1" /* no carry, so bit is zero */ -.NH 2 -Array instructions. -.PP -In this group a typical EM pattern is -.B -lae lar -.R -defined(rom(1,3)) | | | | -.B -lae -.R -$1 -.B -aar -.R -$2 -.B -loi -.R -rom(1,3). -This pattern uses the -.B -aar -.R -instruction, which is part of a typical EM pattern: -.sp 1 -.br -.B -lae aar -.R -$2==2 && rom(1,3)==2 && rom(1,1)==0 | R16 | /* registerpair AX contains -the index in the array */ -.br -"pha" /* save highbyte of index */ -.br -"txa" /* move lowbyte of index to A */ -.br -"asl a" /* shift left lowbyte == 2 times lowbyte */ -.br -"tax" /* restore lowbyte */ -.br -"pla" /* restore highbyte */ -.br -"rol a" /* rotate left highbyte == 2 times highbyte */ -.br -| %[1] | adi 2 | /* push new index, add to lowerbound array */ -.NH 2 -Compare instructions. -.PP -In this group all EM patterns are performed by calling -a subroutine. -Subroutines are used here because comparison is only -possible byte by byte. -This means a lot of code, and since compare are used frequently -a lot of in line code would be generated, and thus reducing -the space left for the software stack. -These subroutines can be found in the library. -.NH 2 -Branch instructions. -.PP -A typical branch instruction is -.B -beq. -.R -The table content for it is: -.sp 1 -.br -.B -beq -.R -| R16 | -.br -"sta BRANCH+1" /* save highbyte second operand in zero page */ -.br -"stx BRANCH" /* save lowbyte second operand in zero page */ -.br -"jsr Pop" /* pop the first operand */ -.br -"cmp BRANCH+1" /* compare the highbytes */ -.br -"bne 1f" /* there not equal so go on */ -.br -"cpx BRANCH" /* compare the lowbytes */ -.br -"beq $1\\n1:" /* lowbytes are also equal, so branch */ -.PP -Another typical instruction in this group is -.B -zeq. -.R -The table content is: -.sp 1 -.br -.B -zeq -.R -| R16 | -.br -"tay" /* move A to Y for setting testbits */ -.br -"bmi $1" /* highbyte s minus so branch */ -.br -"txa" /* move X to A for setting testbits */ -.br -"beq $1\\n1:" /* lowbyte also zero, thus branch */ -.NH 2 -Procedure call instructions. -.PP -In this group one code generation might seem a little -akward. -It is the EM instruction -.B -cai -.R -which generates a 'jsr Indir'. -This is because there is no indirect jump_subroutine in the -MCS6500. -The only solution is to store the address in zero page, and then -do a 'jsr' to a known label. -At this label there must be an indirect jump instruction, which -perform a jump to the address stored in zero page. -In this case the label is Indir, and the address is stored in -zero page at the addresses ADDR, ADDR+1. -The tabel content is: -.sp 1 -.br -.B -cai -.R -| R16 | -.br -"stx ADDR" /* store lowbyte of address in zero page */ -.br -"sta ADDR+1" /* store highbyte of address in zero page */ -.br -"jsr Indir" /* use the indirect jump */ -.br -| | | -.NH 2 -Miscellaneous instructions. -.PP -In this group, as the name suggests, there is no -typical EM instruction or EM pattern. -Most of the MCS6500 code to be generated uses a library subroutine -or is straightforward. -.DS C -.B -PERFORMANCE. -.R -.DE -.NH 0 -Introduction. -.PP -To measure the performance of the back end table some timing -tests are done. -What to time? -In this case, the execution time of several Pascal statements -are timed. -Statements in C, which have a Pascal equivalence are timed also. -The statements are timed as follows. -A test program is been written, which executes two -nested for_loops from 1 to 1.000. -Within these for_loops the statement, which is to be tested, is placed, -so the statement will be executed 1.000.000 times. -Then the same program is executed without the test statement. -The time difference between the two executions is the time -neccesairy to execute the test statement 1.000.000 times. -The total time to execute the test statement requires thus the -time difference divided by 1.000.000. -.NH 0 -Testing Pascal statements. -.PP -The next statements are tested. -.IP 1) -int1 := 0; -.IP 2) -int1 := int2 - 1; -.IP 3) -int1 := int1 + 1; -.IP 4) -int1 := icon1 - icon2; -.IP 5) -int1 := icon2 div icon1; -.IP 6) -int1 := int2 * int3; -.IP 7) -bool := (int1 < 0); -.IP 8) -bool := (int1 < 3); -.IP 9) -bool := ((int1 > 3) or (int1 < 3)) -.IP 10) -case int1 of 1: bool := false; 2: bool := true end; -.IP 11) -if int1 = 0 then int2 := 3; -.IP 12) -while int1 > 0 do int1 := int1 - 1; -.IP 13) -m := a[k]; -.IP 14) -let2 := ['a'..'c']; -.IP 15) -P3(x); -.IP 16) -dum := F3(x); -.IP 17) -s.overhead := 5400; -.IP 18) -with s do overhead := 5400; -.PP -These statement were tested in a procedure test. -.sp 1 -.br -procedure test; -.br -var i, j, ... : integer; -.br - bool : boolean; -.br - let2 : set of char; -.br -begin -.br - for i := 1 to 1000 -.br - for j := 1 to 1000 -.br - STATEMENT -.br -end; -.sp 1 -.PP -STATEMENT is one of the statements as shown above, or it is -the empty statement. -The assignment of used variables, if neccesairy, is done before -the first for_loop. -In case of the statement which uses the procedure call, statement -15, a dummy procedure is declared whose body is empty. -In case of the statement which uses the function, statement 16, -this function returns its argument. -for the timing of C statements a similar test program was -written. -.sp 1 -.br -main() -.br -{ -.br - int i, j, ...; -.br - for (i = 1; i <= 1000; i++) -.br - for (j = 1; j <= 1000; j++) -.br - STATEMENT -.br -} -.sp 1 -.NH -The results. -.PP -Here are tables with the results of the time measurments. -Times are in microseconds (10^-6). -Some statements appear twice in the tables. -In the second case an array of 200 integers was declerated -before the variable to be tested, so this variable cannot -be accessed by indirect addressing from the second local base. -This results in a larger execution time of the statement to be -tested. -The column 68000 contains the times measured on a Bleasdale, -M68000 based, computer. -The times in column pdp are measured on a DEC pdp11/44, where -the times from column 6500 come from a BBC microcomputer. -.bp -.TS -expand; -c s s s -c c c c -lw35 nw7 nw7 nw7. -Pascal timing results -statement 68000 pdp 6500 -_ -T{ -int1 := 0; -T} 4.0 5.8 16.7 - 4.0 4.2 97.8 -_ -T{ -int1 := int2 - 1; -T} 7.2 7.1 27.2 - 6.9 7.1 206.5 -_ -T{ -int1 := int1 + 1; -T} 6.9 6.8 27.2 - 6.4 6.7 106.5 -_ -T{ -int1 := icon1 + icon2; -T} 6.2 6.2 25.6 - 6.2 6.0 106.6 -_ -T{ -int1 := icon2 div icon1; -T} 14.9 14.3 372.6 - 14.9 14.7 453.7 -_ -T{ -int1 := int2 * int3; -T} 11.5 12.0 558.1 - 11.3 11.6 728.6 -_ -T{ -bool := (int1 < 0); -T} 7.2 6.9 122.8 - 7.8 8.1 453.2 -_ -T{ -bool := (int1 < 3); -T} 7.3 7.6 126.0 - 7.2 8.1 232.2 -_ -T{ -bool := ((int1 > 3) or (int1 < 3)) -T} 10.1 12.0 307.8 - 10.2 11.9 440.1 -_ -T{ -case int1 of 1: bool := false; 2: bool := true end; -T} 18.3 17.9 165.7 -_ -T{ -if int1 = 0 then int2 := 3; -T} 9.5 8.5 133.8 -_ -T{ -while int1 > 0 do int1 := int1 - 1; -T} 6.9 6.9 126.0 -_ -T{ -m := a[k]; -T} 7.2 6.8 134.3 -_ -T{ -let2 := ['a'..'c']; -T} 38.4 38.8 447.4 -_ -T{ -P3(x); -T} 18.9 18.8 180.3 -_ -T{ -dum := F3(x); -T} 26.8 27.1 343.3 -_ -T{ -s.overhead := 5400; -T} 4.6 4.1 16.7 -_ -T{ -with s do overhead := 5400; -T} 4.2 4.3 16.7 -.TE -.TS -expand; -c s s s -c c c c -lw35 nw7 nw7 nw7. -C timing results -statement 68000time pdptime 6500time -_ -T{ -int1 = 0; -T} 4.1 3.6 17.2 - 4.1 4.1 97.7 -_ -T{ -int1 = int2 - 1; -T} 6.6 6.9 27.2 - 6.1 6.5 206.4 -_ -T{ -int1 = int1 + 1; -T} 6.4 7.3 27.2 - 6.3 6.2 206.4 -_ -T{ -int1 = int2 * int3; -T} 11.4 12.3 522.6 - 9.6 10.1 721.2 -_ -T{ -int1 = (int2 < 0); -T} 7.2 7.6 126.4 - 7.4 7.7 232.5 -_ -T{ -int1 = (int2 < 3); -T} 7.0 7.5 126.0 - 7.8 7.8 232.6 -_ -T{ -int1 = ((int2 > 3) || (int2 < 3)); -T} 11.8 12.2 193.4 - 11.5 13.2 245.6 -_ -T{ -switch (int1) { case 1: int1 = 0; break; case 2: int1 = 1; break; } -T} 28.3 29.2 164.1 -_ -T{ -if (int1 == 0) int2 = 3; -T} 4.8 4.8 19.4 -_ -T{ -while (int2 > 0) int2 = int2 - 1; -T} 5.8 6.0 125.9 -_ -T{ -int2 = a[int2]; -T} 4.8 5.1 192.8 -_ -T{ -P3(int2); -T} 18.8 18.4 180.3 -_ -T{ -int2 = F3(int2); -T} 27.0 27.2 309.4 -_ -T{ -s.overhead = 5400; -T} 5.0 4.1 16.7 -.TE -.NH -Pascal statements which don't have a C equivalent. -.PP -At first, the two statements who perform an operation on constants -are left out. -These are left out while the C front end does constant folding, -while the Pascal front end doesn't. -So in C the statements int1 = icon1 + icon2; and int1 = icon1 / icont2; -will use the same amount of time since the expression is evaluated -by the front end. -The two other statements (let2 := ['a'..'c']; and -.B -with -.R -s -.B -do -.R -overhead := 5400;), aren't included in the C statement timing table, -because there constructs do not exist in C. -Although in C there can be direct bit manipulation, and thus can -be used to implement sets I have not used it here. -The -.B -with -.R -statement does not exists in C and there is nothing with the slightest -resemblance to it. -.PP -At first sight in the table , it looked if there is no much difference -in the times for the M68000 and the pdp11/44, in comparison with the -times needed by the MCS6500. -To verify this impression, I calculated the correlation coefficient -between the times of the M68000 and pdp11/44. -It turned out to be 0.997 for both the Pascal time tests and the C -time tests. -Since the correlation coefficient is near to one and the difference -between the times is small, they can be considered to be the same -as seen from the times of the MCS6500. -Then I have tried to make a grafic of the times from the M68000 and -the MCS6500. -Well, there was't any correlation to been seen, taken all the times. -The only correlation one could see, with some effort, was in the -times for the first three Pascal statements. -The two first C statements show also a correlation, which two points -always do. -.PP -Also the three Pascal statements -.B -case -.R -, -.B -if -.R -, -and -.B -while -.R -have a correlation coefficient of 0.999. -This is probably because the -.B -case -.R -statement uses a subroutine in both cases and the other two -statements -.B -if -.R -and, -.B -while -.R -generate in line code. -The last two Pascal statements use the same time, since the front -end wil generate the same EM code for both. -.PP -The independence between the rest of the test times is because -in these cases the object code for the MCS6500 uses library -subroutines, while the other processors can handle the EM code -with in line code. -.PP -It is clear that the MCS6500 is a slower device, it needs longer -execution times, the need of more library subroutines, but -there is no constant factor between it execution times and those -of other processors. -.PP -The slowing down of the MCS6500 as result of the need of a -library subroutine is illustrated by the muliplication -statement. -The MCS6500 needs a library subroutine, while the other -two processors have a machine instruction to perform the -multiply. -This results in a factor of 48.5, when the operands can be accessed -indirect by the MCS6500. -When the MCS6500 cannot access the operands indirectly the situation -is even worse. -The slight differences between the MCS6500 execution times for -Pascal statements and C statements is probably the result of the -front end, and thus beyond the scope of this discussion. -.PP -Another timing test is done in C on the statement k = i + j + 1983. -This statement is tested on many UNIX* -.FS -* UNIX is a Trademark of Bell Laboratories. -.FE -systems. -For a complete list see appendix A. -The slowest one is the IBM XT, which runs on a 8088 microprocessor. -The fasted one is the Amdahl computer. -Here is short table to illustrate the performance of the -MCS6500. -.TS -c c c -c n n. -machine short int -IBM XT 53.4 53.4 -Amdahl 0.5 0.3 -MCS6500 150.2 150.2 -.TE -The MCS6500 is three times slower than the IBM XT, but threehundred -times slower than the Amdahl. -The reason why the times on the IBM XT and the MCS6500 are the -same for short's and int's, is that most C compilers make the types -short and integer the same size on 16-bit machines. -In this project the MCS6500 is regarded as a 16-bit machine. -.NH -Length tests. -.PP -I have also compiled several programs written in Pascal and C to -see if there is a resemblance between the number of bytes generated -in the machine's language. -In the tables: -.IP length: 9 -The number of bytes of the source program. -.IP 68000: -The number of bytes of the a.out file for a M68000. -.IP pdp: -The number of bytes of the a.out file for a pdp11/44. -.IP 6500: -The number of bytes of the a.out file for a MCS6500. -.LP -These are the results: -.TS -c s s s -c c c c -n n n n. -Pascal programs -length 68000 pdp 6500 -_ -19946 14383 16090 26710 -19484 20169 20190 35416 -10849 10469 11464 18949 -273 4221 5106 7944 -1854 5807 6610 10301 -.TE -.TS -c s s s -c c c c -n n n n. -C progams -length 68000 pdp 6500 -_ -9444 6927 8234 11559 -7655 14353 18240 26251 -4775 11309 15934 19910 -639 6337 9660 12494 -.TE -.PP -In contrast to the execution times of the test statements, the -object code files sizes show a constant factor between them. -After calculating the correlation coefficient, I have calculated -the line fitted between sizes. -.FS -* x is the number of bytes -.FE -.TS -c s s -c c c -l c c. -Pascal programs -processor corr. coef. fitted line -_ -68000-pdp 0.996 -68000-6500 0.999 1.76x + 502* -pdp-6500 0.999 1.80x - 1577 -.TE -.TS -c s s -c c c -l c c. -C programs -processor corr. coef. fitted line -_ -68000-pdp 0.974 -68000-6500 0.992 1.80x + 502* -pdp-6500 0.980 1.40x - 1577 -.TE -.PP -As seen from the tables above the correlation coefficient for -Pascal programs is better than the ones for C programs. -Thus the line fits best for Pascal programs. -With the formula of the best fitted line one can now estimate -the size of the object code, which a program needs, for a MCS6500 -without having the compiler at hand. -One also can see from these formula that the object code -generated for a MCS6500 is about 1.8 times more than for the other -processors. -Since the number of bytes in the source file havily depends on the -programmer, how many spaces he or she uses, the size of the indenting -in structured programs, etc., there is no correlation between the -size of the source file and the size of the object file. -Also the use of comments has its influence on the size. -.bp -.DS C -.B -SUMMARY. -.R -.DE -.NH 0 -Summary -.PP -In this chapter some final conclusions are made. -.PP -In spite of its simplicity, the MCS6500 is strong enough to -implement a EM machine. -A serious deficy of the MCS6500 is the missing of 16-bit -general purpose registers, and especially the missing of a -16-bit stackpointer. -As pointed out before, one 16-bit register can be simulated -by a pair of 8-bit registers, in fact, the accumulator A to -hold the highbyte, and the index register X to hold the lowbyte -of the word. -By lack of a 16-bit stackpointer, zero page must be used to hold -a stackpointer and there are also two subroutines needed for -manipulating the stack (Push and Pop). -.PP -As seen at the time tests, the simple instruction set of the -MCS6500 forces the use of library subroutines. -These library subroutines increas the execution time of the -programs. -.PP -The sizes of the object code files show a strong correlation -in contrast to the execution times. -With this correlatiuon one canestimate the size of a program -if it is to be used on a MCS6500. -.bp -.NH 0 -.B -REFERENCES. -.R -.IP 1. -Osborn, A., Jacobson, S., and Kane, J. The Mos Technology MCS6500. -.B -An Introduction to Microcomputers , -.R -Volume II, Some Real Products (june 1977) chap. 9. -.RS -.PP -A hardware description of some real existing CPU's, such as -the Intel Z80, MCS6500, etc. is given in this book. -.RE -.IP 2. -van Staveren, H. -The table driven code generator from the Amsterdam Compiler Kit. -Vrije Universiteit, Amsterdam, (July 11, 1983). -.RS -.PP -The defining document for writing a back end table. -.RE -.IP 3. -Tanenbaum, A.S. Structured Computer Organization. -Prentice Hall. (1976). -.RS -.PP -In this book computers are described as a hierarchy of levels, -with each one performing some well-defined function. -.RE diff --git a/doc/LLgen/.distr b/doc/LLgen/.distr deleted file mode 100644 index 5215a59c7..000000000 --- a/doc/LLgen/.distr +++ /dev/null @@ -1,4 +0,0 @@ -LLgen.n -LLgen_NCER.n -LLgen.refs -proto.make diff --git a/doc/LLgen/LLgen.n b/doc/LLgen/LLgen.n deleted file mode 100644 index 3d9786a5b..000000000 --- a/doc/LLgen/LLgen.n +++ /dev/null @@ -1,1077 +0,0 @@ -.\" $Id$ -.\" Run this paper off with -.\" refer [options] -p LLgen.refs LLgen.doc | [n]eqn | tbl | (nt)roff -ms -.if '\*(>.'' \{\ -. if '\*(<.'' \{\ -. if n .ds >. . -. if n .ds >, , -. if t .ds <. . -. if t .ds <, ,\ -\}\ -\} -.cs 5 22u -.ND -.EQ -delim @@ -.EN -.TL -LLgen, an extended LL(1) parser generator -.AU -Ceriel J. H. Jacobs -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.AB -\fILLgen\fR provides a -tool for generating an efficient recursive descent parser -with no backtrack from -an Extended Context Free syntax. -The \fILLgen\fR -user specifies the syntax, together with code -describing actions associated with the parsing process. -\fILLgen\fR -turns this specification into a number of subroutines that handle the -parsing process. -.PP -The grammar may be ambiguous. -\fILLgen\fR contains both static and dynamic facilities -to resolve these ambiguities. -.PP -The specification can be split into several files, for each of -which \fILLgen\fR generates an output file containing the -corresponding part of the parser. -Furthermore, only output files that differ from their previous -version are updated. -Other output files are not affected in any -way. -This allows the user to recompile only those output files that have -changed. -.PP -The subroutine produced by \fILLgen\fR calls a user supplied routine -that must return the next token. This way, the input to the -parser can be split into single characters or higher level -tokens. -.PP -An error recovery mechanism is generated almost completely -automatically. -It is based on so called \fBdefault choices\fR, which are -implicitly or explicitly specified by the user. -.PP -\fILLgen\fR has succesfully been used to create recognizers for -Pascal, C, and Modula-2. -.AE -.NH -Introduction -.PP -\fILLgen\fR -provides a tool for generating an efficient recursive -descent parser with no backtrack from an Extended Context Free -syntax. -A parser generated by -\fILLgen\fR -will be called -\fILLparse\fR -for the rest of this document. -It is assumed that the reader has some knowledge of LL(1) grammars and -recursive descent parsers. -For a survey on the subject, see reference -.[ ( -griffiths -.]). -.PP -Extended LL(1) parsers are an extension of LL(1) parsers. They are -derived from an Extended Context-Free (ECF) syntax instead of a Context-Free -(CF) syntax. -ECF syntax is described in section 2. -Section 3 provides an outline of a -specification as accepted by -\fILLgen\fR and also discusses the lexical conventions of -grammar specification files. -Section 4 provides a description of the way the -\fILLgen\fR -user can associate -actions with the syntax. These actions must be written in the programming -language C, -.[ -kernighan ritchie -.] -which also is the target language of \fILLgen\fR. -The error recovery technique is discussed in section 5. -This section also discusses what the user can do about it. -Section 6 discusses -the facilities \fILLgen\fR offers -to resolve ambiguities and conflicts. -\fILLgen\fR offers facilities to resolve them both at parser -generation time and during the execution of \fILLparse\fR. -Section 7 discusses the -\fILLgen\fR -working environment. -It also discusses the lexical analyzer that must be supplied by the -user. -This lexical analyzer must read the input stream and break it -up into basic input items, called \fBtokens\fR for the rest of -this document. -Appendix A gives a summary of the -\fILLgen\fR -input syntax. -Appendix B gives an example. -It is very instructive to compare this example with the one -given in reference -.[ ( -yacc -.]). -It demonstrates the struggle \fILLparse\fR and other LL(1) -parsers have with expressions. -Appendix C gives an example of the \fILLgen\fR features -allowing the user to recompile only those output files that -have changed, using the \fImake\fR program. -.[ -make -.] -.NH -The Extended Context-Free Syntax -.PP -The extensions of an ECF syntax with respect to an ordinary CF syntax are: -.IP 1. 10 -An ECF syntax contains the repetition operator: "N" (N represents a positive -integer). -.IP 2. 10 -An ECF syntax contains the closure set operator without and with -upperbound: "*" and "*N". -.IP 3. 10 -An ECF syntax contains the positive closure set operator without and with -upperbound: "+" and "+N". -.IP 4. 10 -An ECF syntax contains the optional operator: "?", which is a -shorthand for "*1". -.IP 5. 10 -An ECF syntax contains parentheses "[" and "]" which can be -used for grouping. -.PP -We can describe the syntax of an ECF syntax with an ECF syntax : -.DS -.ft CW -grammar : rule + - ; -.ft R -.DE -This grammar rule states that a grammar consists of one or more -rules. -.DS -.ft CW -rule : nonterminal ':' productionrule ';' - ; -.ft R -.DE -A rule consists of a left hand side, the nonterminal, -followed by ":", -the \fBproduce symbol\fR, followed by a production rule, followed by a -";", in\%di\%ca\%ting the end of the rule. -.DS -.ft CW -productionrule : production [ '|' production ]* - ; -.ft R -.DE -A production rule consists of one or -more alternative productions separated by "|". This symbol is called the -\fBalternation symbol\fR. -.DS -.ft CW -production : term * - ; -.ft R -.DE -A production consists of a possibly empty list of terms. -So, empty productions are allowed. -.DS -.ft CW -term : element repeats - ; -.ft R -.DE -A term is an element, possibly with a repeat specification. -.DS -.ft CW -element : LITERAL - | IDENTIFIER - | '[' productionrule ']' - ; -.ft R -.DE -An element can be a LITERAL, which basically is a single character -between apostrophes, it can be an IDENTIFIER, which is either a -nonterminal or a token, and it can be a production rule -between square parentheses. -.DS -.ft CW -repeats : '?' - | [ '*' | '+' ] NUMBER ? - | NUMBER ? - ; -.ft R -.DE -These are the repeat specifications discussed above. Notice that -this specification may be empty. -.PP -The class of ECF languages -is identical with the class of CF languages. However, in many -cases recursive definitions of language features can now be -replaced by iterative ones. This tends to reduce the number of -nonterminals and gives rise to very efficient recursive descent -parsers. -.NH -Grammar Specifications -.PP -The major part of a -\fILLgen\fR -grammar specification consists of an -ECF syntax specification. -Names in this syntax specification refer to either tokens or nonterminal -symbols. -\fILLgen\fR -requires token names to be declared as such. This way it -can be avoided that a typing error in a nonterminal name causes it to -be accepted as a token name. The token declarations will be -discussed later. -A name will be regarded as a nonterminal symbol, unless it is declared -as a token name. -If there is no production rule for a nonterminal symbol, \fILLgen\fR -will complain. -.PP -A grammar specification may also include some C routines, -for instance the lexical analyzer and an error reporting -routine. -Thus, a grammar specification file can contain declarations, -grammar rules and C-code. -.PP -Blanks, tabs and newlines are ignored, but may not appear in names or -keywords. -Comments may appear wherever a name is legal (which is almost -everywhere). -They are enclosed in -/* ... */, as in C. Comments do not nest. -.PP -Names may be of arbitrary length, and can be made up of letters, underscore -"\_" and non-initial digits. Upper and lower case letters are distinct. -Only the first 50 characters are significant. -Notice however, that the names for the tokens will be used by the -C-preprocessor. -The number of significant characters therefore depends on the -underlying C-implementation. -A safe rule is to make the identifiers distinct in the first six -characters, case ignored. -.PP -There are two kinds of tokens: -those that are declared and are denoted by a name, -and literals. -.PP -A literal consists of a character enclosed in apostrophes "'". -The "\e" is an escape character within literals. The following escapes -are recognized : -.TS -center; -l l. -\&'\en' newline -\&'\er' return -\&'\e'' apostrophe "'" -\&'\e\e' backslash "\e" -\&'\et' tab -\&'\eb' backspace -\&'\ef' form feed -\&'\exxx' "xxx" in octal -.TE -.PP -Names representing tokens must be declared before they are used. -This can be done using the "\fB%token\fR" keyword, -by writing -.nf -.ft CW -.sp 1 -%token name1, name2, . . . ; -.ft R -.fi -.PP -\fILLparse\fR is designed to recognize special nonterminal -symbols called \fBstart symbols\fR. -\fILLgen\fR allows for more than one start symbol. -Thus, grammars with more than one entry point are accepted. -The start symbols must be declared explicitly using the -"\fB%start\fR" keyword. It can be used whenever a declaration is -legal, f.i.: -.nf -.ft CW -.sp 1 -%start LLparse, specification ; -.ft R -.fi -.sp 1 -declares "specification" as a start symbol and associates the -identifier "LLparse" with it. -"LLparse" will now be the name of the C-function that must be -called to recognize "specification". -.NH -Actions -.PP -\fILLgen\fR -allows arbitrary insertions of actions within the right hand side -of a production rule in the ECF syntax. An action consists of a number of C -statements, enclosed in the brackets "{" and "}". -.PP -\fILLgen\fR -generates a parsing routine for each rule in the grammar. The actions -supplied by the user are just inserted in the proper place. -There may also be declarations before the statements in the -action, as -the "{" and "}" are copied into the target code along with the -action. The scope of these declarations terminates with the -closing bracket "}" of the action. -.PP -In addition to actions, it is also possible to declare local variables -in the parsing routine, which can then be used in the actions. -Such a declaration consists of a number of C variable declarations, -enclosed in the brackets "{" and "}". It must be placed -right in front of the ":" in the grammar rule. -The scope of these local variables consists of the complete -grammar rule. -.PP -In order to facilitate communication between the actions and -\fILLparse\fR, -the parsing routines can be given C-like parameters. -Each parameter must be declared separately, and each of these declarations must -end with a semicolon. -For the last parameter, the semicolon is optional. -.PP -So, for example -.nf -.ft CW -.sp 1 -expr(int *pval;) { int fact; } : - /* - * Rule with one parameter, a pointer to an int. - * Parameter specifications are ordinary C declarations. - * One local variable, of type int. - */ - factor (&fact) { *pval = fact; } - /* - * factor is another nonterminal symbol. - * One actual parameter is supplied. - * Notice that the parameter passing mechanism is that - * of C. - */ - [ '+' factor (&fact) { *pval += fact; } ]* - /* - * remember the '*' means zero or more times - */ - ; -.sp 1 -.ft R -.fi -is a rule to recognize a number of factors, separated by "+", and -to compute their sum. -.PP -\fILLgen\fR -generates C code, so the parameter passing mechanism is that of -C, as is shown in the example above. -.PP -Actions often manipulate attributes of the token just read. -For instance, when an identifier is read, its name must be -looked up in a symbol table. -Therefore, \fILLgen\fR generates code -such that at a number of places in the grammar rule -it is defined which token has last been read. -After a token, the last token read is this token. -After a "[" or a "|", the last token read is the next token to -be accepted by \fILLparse\fR. -At all other places, it is undefined which token has last been -read. -The last token read is available in the global integer variable -\fILLsymb\fR. -.PP -The user may also specify C-code wherever a \fILLgen\fR-declaration is -legal. -Again, this code must be enclosed in the brackets "{" and "}". -This way, the user can define global declarations and -C-functions. -To avoid name-conflicts with identifiers generated by -\fILLgen\fR, \fILLparse\fR only uses names beginning with -"LL"; the user should avoid such names. -.NH -Error Recovery -.PP -The error recovery technique used by \fILLgen\fR is a -modification of the one presented in reference -.[ ( -automatic construction error correcting -.]). -It is based on \fBdefault choices\fR, which just are -what the word says, default choices at -every point in the grammar where there is a -choice. -Thus, in an alternation, one of the productions is marked as a -default choice, and in a term with a non-fixed repetition -specification there will also be a default choice (between -doing the term (once more) and continuing with the rest of the -production in which the term appears). -.PP -When \fILLparse\fR detects an error after having parsed the -string @s@, the default choices enable it to compute one -syntactically correct continuation, -consisting of the tokens @t sub 1~...~t sub n@, -such that @s~t sub 1~...~t sub n@ is a string of tokens that -is a member of the language defined by the grammar. -Notice, that the computation of this continuation must -terminate, which implies that the default choices may not -invoke recursive rules. -.PP -At each point in this continuation, a certain number of other -tokens could also be syntactically correct, f.i. the token -@t@ is syntactically correct at point @t sub i@ in this -continuation, if the string @s~t sub 1~...~t sub i~t~s sub 1@ -is a string of the language defined by the grammar for some -string @s sub 1@ and i >= 0. -.PP -The set @T@ -containing all these tokens (including @t sub 1 ,~...,~t sub n@) is computed. -Next, \fILLparse\fR discards zero -or more tokens from its input, until a token -@t@ \(mo @T@ is found. -The error is then corrected by inserting i (i >= 0) tokens -@t sub 1~...~t sub i@, such that the string -@s~t sub 1~...~t sub i~t~s sub 1@ is a string of the language -defined by the grammar, for some @s sub 1@. -Then, normal parsing is resumed. -.PP -The above is difficult to implement in a recursive decent -parser, and is not the way \fILLparse\fR does it, but the -effect is the same. In fact, \fILLparse\fR maintains a list -of tokens that may not be discarded, which is adjusted as -\fILLparse\fR proceeds. This list is just a representation -of the set @T@ mentioned -above. When an error occurs, \fILLparse\fR discards tokens until -a token @t@ that is a member of this list is found. -Then, it continues parsing, following the default choices, -inserting tokens along the way, until this token @t@ is legal. -The selection of -the default choices must guarantee that this will always -happen. -.PP -The default choices are explicitly or implicitly -specified by the user. -By default, the default choice in an alternation is the -alternative with the shortest possible terminal production. -The user can select one of the other productions in the -alternation as the default choice by putting the keyword -"\fB%default\fR" in front of it. -.PP -By default, for terms with a repetition count containing "*" or -"?" the default choice is to continue with the rest of the rule -in which the term appears, and -.sp 1 -.ft CW -.nf - term+ -.fi -.ft R -.sp 1 -is treated as -.sp 1 -.nf -.ft CW - term term* . -.ft R -.fi -.PP -It is also clear, that it can never be the default choice to do -the term (once more), because this could cause the parser to -loop, inserting tokens forever. -However, when the user does not want the parser to skip -tokens that would not have been skipped if the term -would have been the default choice, -the skipping of such a term can be prevented by -using the keyword "\fB%persistent\fR". -For instance, the rule -.sp 1 -.ft CW -.nf -commandlist : command* ; -.fi -.ft R -.sp 1 -could be changed to -.sp 1 -.ft CW -.nf -commandlist : [ %persistent command ]* ; -.fi -.ft R -.sp 1 -The effects of this in case of a syntax error are twofold: -The set @T@ mentioned above will be extended as if "command" were -in the default production, so that fewer tokens will be -skipped. -Also, if the first token that is not skipped is a member of the -subset of @T@ arising from the grammar rule for "command", -\fILLparse\fR will enter that rule. -So, in fact the default choice -is determined dynamically (by \fILLparse\fR). -Again, \fILLgen\fR checks (statically) -that \fILLparse\fR will always terminate, and if not, -\fILLgen\fR will complain. -.PP -An important property of this error recovery method is that, -once a rule is started, it will be finished. -This means that all actions in the rule will be executed -normally, so that the user can be sure that there will be no -inconsistencies in his data structures because of syntax -errors. -Also, as the method is in fact error correcting, the -actions in a rule only have to deal with syntactically correct -input. -.NH -Ambiguities and conflicts -.PP -As \fILLgen\fR generates a recursive descent parser with no backtrack, -it must at all times be able to determine what to do, -based on the current input symbol. -Unfortunately, this cannot be done for all grammars. -Two kinds of conflicts can arise : -.IP 1) 10 -the grammar rule is of the form "production1 | production2", -and \fILLparse\fR cannot decide which production to chose. -This we call an \fBalternation conflict\fR. -.IP 2) 10 -the grammar rule is of the form "[ productionrule ]...", -where ... specifies a non-fixed repetition count, -and \fILLparse\fR cannot decide whether to -choose "productionrule" once more, or to continue. -This we call a \fBrepetition conflict\fR. -.PP -There can be several causes for conflicts: the grammar may be -ambiguous, or the grammar may require a more complex parser -than \fILLgen\fR can construct. -The conflicts can be examined by inspecting the verbose -(-\fBv\fR) option output file. -The conflicts can be resolved by rewriting the grammar -or by using \fBconflict resolvers\fR. -The mechanism described here is based on the attributed parsing -of reference -.[ ( -milton -.]). -.PP -An alternation conflict can be resolved by putting an \fBif condition\fR -in front of the first conflicting production. -It consists of a "\fB%if\fR" followed by a -C-expression between parentheses. -\fILLparse\fR will then evaluate this expression whenever a -token is met at this point on which there is a conflict, so -the conflict will be resolved dynamically. -If the expression evaluates to -non-zero, the first conflicting production is chosen, -otherwise one of the remaining ones is chosen. -.PP -An alternation conflict can also be resolved using the keywords -"\fB%prefer\fR" or "\fB%avoid\fR". "\fB%prefer\fR" -is equivalent in behaviour to -"\fB%if\fR (1)". "\fB%avoid\fR" is equivalent to "\fB%if\fR (0)". -In these cases however, "\fB%prefer\fR" and "\fB%avoid\fR" should be used, -as they resolve the conflict statically and thus -give rise to better C-code. -.PP -A repetition conflict can be resolved by putting a \fBwhile condition\fR -right after the opening parentheses. This while condition -consists of a "\fB%while\fR" followed by a C-expression between -parentheses. Again, \fILLparse\fR will then -evaluate this expression whenever a token is met -at this point on which there is a conflict. -If the expression evaluates to non-zero, the -repeating part is chosen, otherwise the parser continues with -the rest of the rule. -Appendix B will give an example of these features. -.PP -A useful aid in writing conflict resolvers is the "\fB%first\fR" keyword. -It is used to declare a C-macro that forms an expression -returning 1 if the parameter supplied can start a specified -nonterminal, f.i.: -.sp 1 -.nf -.ft CW -%first fmac, nonterm ; -.ft R -.sp 1 -.fi -declares "fmac" as a macro with one parameter, whose value -is a token number. If the parameter -X can start the nonterminal "nonterm", "fmac(X)" is true, -otherwise it is false. -.NH -The LLgen working environment -.PP -\fILLgen\fR generates a number of files: one for each input -file, and two other files: \fILpars.c\fR and \fILpars.h\fR. -\fILpars.h\fR contains "#-define"s for the tokennames. -\fILpars.c\fR contains the error recovery routines and tables. -Only those output files that differ from their previous version -are updated. See appendix C for a possible application of this -feature. -.PP -The names of the output files are constructed as -follows: -in the input file name, the suffix after the last point is -replaced by a "c". If no point is present in the input file -name, ".c" is appended to it. \fILLgen\fR checks that the -filename constructed this way in fact represents a previous -version, or does not exist already. -.PP -The user must provide some environment to obtain a complete -program. -Routines called \fImain\fR and \fILLmessage\fR must be defined. -Also, a lexical analyzer must be provided. -.PP -The routine \fImain\fR must be defined, as it must be in every -C-program. It should eventually call one of the startsymbol -routines. -.PP -The routine \fILLmessage\fR must accept one -parameter, whose value is a token number, zero or -1. -.br -A zero parameter indicates that the current token (the one in -the external variable \fILLsymb\fR) is deleted. -.br -A -1 parameter indicates that the parser expected end of file, but didn't get -it. -The parser will then skip tokens until end of file is detected. -.br -A parameter that is a token number (a positive parameter) -indicates that this -token is to be inserted in front of the token currently in -\fILLsymb\fR. -The user can give the token the proper attributes. -Also, the user must take care, that the token currently in -\fILLsymb\fR is again returned by the \fBnext\fR call to the -lexical analyzer, with the proper attributes. -So, the lexical analyzer must have a facility to push back one -token. -.PP -The user may also supply his own error recovery routines, or handle -errors differently. For this purpose, the name of a routine to be called -when an error occurs may be declared using the keyword \fB%onerror\fR. -This routine takes two parameters. -The first one is either the token number of the -token expected, or 0. In the last case, the error occurred at a choice. -In both cases, the routine must ensure that the next call to the lexical -analyser returns the token that replaces the current one. Of course, -that could well be the current one, in which case -.I LLparse -recovers from the error. -The second parameter contains a list of tokens that are not skipped at the -error point. The list is in the form of a null-terminated array of integers, -whose address is passed. -.PP -The user must supply a lexical analyzer to read the input stream and -break it up into tokens, which are passed to -.I LLparse. -It should be an integer valued function, returning the token number. -The name of this function can be declared using the -"\fB%lexical\fR" keyword. -This keyword can be used wherever a declaration is legal and may appear -only once in the grammar specification, f.i.: -.sp 1 -.nf -.ft CW -%lexical scanner ; -.ft R -.fi -.sp 1 -declares "scanner" as the name of the lexical analyzer. -The default name for the lexical analyzer is "yylex". -The reason for this funny name is that a useful tool for constructing -lexical analyzers is the -.I Lex -program, -.[ -lex -.] -which generates a routine of that name. -.PP -The token numbers are chosen by \fILLgen\fR. -The token number for a literal -is the numerical value of the character in the local character set. -If the tokens have a name, -the "#\ define" mechanism of C is used to give them a value and -to allow the lexical analyzer to return their token numbers symbolically. -These "#\ define"s are collected in the file \fILpars.h\fR which -can be "#\ include"d in any file that needs the token-names. -The maximum token number chosen is defined in the macro \fILL_MAXTOKNO\fP. -.PP -The lexical analyzer must signal the end -of input to \fILLparse\fR -by returning a number less than or equal to zero. -.NH -Programs with more than one parser -.PP -\fILLgen\fR offers a simple facility for having more than one parser in -a program: in this case, the user can change the names of global procedures, -variables, etc, by giving a different prefix, like this: -.sp 1 -.nf -.ft CW -%prefix XX ; -.ft R -.fi -.sp 1 -The effect of this is that all global names start with XX instead of LL, for -the parser that has this prefix. This holds for the variables \fILLsymb\fP, -which now is called \fIXXsymb\fP, for the routine \fILLmessage\fP, -which must now be called \fIXXmessage\fP, and for the macro \fILL_MAXTOKNO\fP, -which is now called \fIXX_MAXTOKNO\fP. -\fILL.output\fP is now \fIXX.output\fP, and \fILpars.c\fP and \fILpars.h\fP -are now called \fIXXpars.c\fP and \fIXXpars.h\fP. -.bp -.SH -References -.[ -$LIST$ -.] -.bp -.SH -Appendix A : LLgen Input Syntax -.PP -This appendix has a description of the \fILLgen\fR input syntax, -as a \fILLgen\fR specification. As a matter of fact, the current -version of \fILLgen\fR is written with \fILLgen\fR. -.nf -.ft CW -.sp 2 -/* - * First the declarations of the terminals - * The order is not important - */ - -%token IDENTIFIER; /* terminal or nonterminal name */ -%token NUMBER; -%token LITERAL; - -/* - * Reserved words - */ - -%token TOKEN; /* %token */ -%token START; /* %start */ -%token PERSISTENT; /* %persistent */ -%token IF; /* %if */ -%token WHILE; /* %while */ -%token AVOID; /* %avoid */ -%token PREFER; /* %prefer */ -%token DEFAULT; /* %default */ -%token LEXICAL; /* %lexical */ -%token PREFIX; /* %prefix */ -%token ONERROR; /* %onerror */ -%token FIRST; /* %first */ - -/* - * Declare LLparse to be a C-routine that recognizes "specification" - */ - -%start LLparse, specification; - -specification - : declaration* - ; - -declaration - : START - IDENTIFIER ',' IDENTIFIER - ';' - | '{' - /* Read C-declaration here */ - '}' - | TOKEN - IDENTIFIER - [ ',' IDENTIFIER ]* - ';' - | FIRST - IDENTIFIER ',' IDENTIFIER - ';' - | LEXICAL - IDENTIFIER - ';' - | PREFIX - IDENTIFIER - ';' - | ONERROR - IDENTIFIER - ';' - | rule - ; - -rule : IDENTIFIER parameters? ldecl? - ':' productions - ';' - ; - -ldecl : '{' - /* Read C-declaration here */ - '}' - ; - -productions - : simpleproduction - [ '|' simpleproduction ]* - ; - -simpleproduction - : DEFAULT? - [ IF '(' /* Read C-expression here */ ')' - | PREFER - | AVOID - ]? - [ element repeats ]* - ; - -element : '{' - /* Read action here */ - '}' - | '[' [ WHILE '(' /* Read C-expression here */ ')' ]? - PERSISTENT? - productions - ']' - | LITERAL - | IDENTIFIER parameters? - ; - -parameters - : '(' /* Read C-parameters here */ ')' - ; - -repeats : /* empty */ - | [ '*' | '+' ] NUMBER? - | NUMBER - | '?' - ; - -.fi -.ft R -.bp -.SH -Appendix B : An example -.PP -This example gives the complete \fILLgen\fR specification of a simple -desk calculator. It has 26 registers, labeled "a" through "z", -and accepts arithmetic expressions made up of the C operators -+, -, *, /, %, &, and |, with their usual priorities. -The value of the expression is -printed. As in C, an integer that begins with 0 is assumed to -be octal; otherwise it is assumed to be decimal. -.PP -Although the example is short and not very complicated, it -demonstrates the use of if and while conditions. In -the example they are in fact used to reduce the number of -nonterminals, and to reduce the overhead due to the recursion -that would be involved in parsing an expression with an -ordinary recursive descent parser. In an ordinary LL(1) -grammar there would be one nonterminal for each operator -priority. The example shows how we can do it all with one -nonterminal, no matter how many priority levels there are. -.sp 1 -.nf -.ft CW -{ -#include -#include -#define MAXPRIO 5 -#define prio(op) (ptab[op]) - -struct token { - int t_tokno; /* token number */ - int t_tval; /* Its attribute */ -} stok = { 0,0 }, tok; - -int nerrors = 0; -int regs[26]; /* Space for the registers */ -int ptab[128]; /* Attribute table */ - -struct token -nexttok() { /* Read next token and return it */ - register c; - struct token new; - - while ((c = getchar()) == ' ' || c == '\et') { /* nothing */ } - if (isdigit(c)) new.t_tokno = DIGIT; - else if (islower(c)) new.t_tokno = IDENT; - else new.t_tokno = c; - if (c >= 0) new.t_tval = ptab[c]; - return new; -} } - -%token DIGIT, IDENT; -%start parse, list; - -list : stat* ; - -stat { int ident, val; } : - %if (stok = nexttok(), - stok.t_tokno == '=') - /* The conflict is resolved by looking one further - * token ahead. The grammar is LL(2) - */ - IDENT - { ident = tok.t_tval; } - '=' expr(1,&val) '\en' - { if (!nerrors) regs[ident] = val; } - | expr(1,&val) '\en' - { if (!nerrors) printf("%d\en",val); } - | '\en' - ; - -expr(int level; int *val;) { int expr; } : - factor(val) - [ %while (prio(tok.t_tokno) >= level) - /* Swallow operators as long as their priority is - * larger than or equal to the level of this invocation - */ - '+' expr(prio('+')+1,&expr) - { *val += expr; } - /* This states that '+' groups left to right. If it - * should group right to left, the rule should read: - * '+' expr(prio('+'),&expr) - */ - | '-' expr(prio('-')+1,&expr) - { *val -= expr; } - | '*' expr(prio('*')+1,&expr) - { *val *= expr; } - | '/' expr(prio('/')+1,&expr) - { *val /= expr; } - | '%' expr(prio('%')+1,&expr) - { *val %= expr; } - | '&' expr(prio('&')+1,&expr) - { *val &= expr; } - | '|' expr(prio('|')+1,&expr) - { *val |= expr; } - ]* - /* Notice the "*" here. It is important. - */ - ; - -factor(int *val;): - '(' expr(1,val) ')' - | '-' expr(MAXPRIO+1,val) - { *val = -*val; } - | number(val) - | IDENT - { *val = regs[tok.t_tval]; } - ; - -number(int *val;) { int base; } - : DIGIT - { base = (*val=tok.t_tval)==0?8:10; } - [ DIGIT - { *val = base * *val + tok.t_tval; } - ]* ; - -%lexical scanner ; -{ -scanner() { - if (stok.t_tokno) { /* a token has been inserted or read ahead */ - tok = stok; - stok.t_tokno = 0; - return tok.t_tokno; - } - if (nerrors && tok.t_tokno == '\en') { - printf("ERROR\en"); - nerrors = 0; - } - tok = nexttok(); - return tok.t_tokno; -} - -LLmessage(insertedtok) { - nerrors++; - if (insertedtok) { /* token inserted, save old token */ - stok = tok; - tok.t_tval = 0; - if (insertedtok < 128) tok.t_tval = ptab[insertedtok]; - } -} - -main() { - register *p; - - for (p = ptab; p < &ptab[128]; p++) *p = 0; - /* for letters, their attribute is their index in the regs array */ - for (p = &ptab['a']; p <= &ptab['z']; p++) *p = p - &ptab['a']; - /* for digits, their attribute is their value */ - for (p = &ptab['0']; p <= &ptab['9']; p++) *p = p - &ptab['0']; - /* for operators, their attribute is their priority */ - ptab['*'] = 4; - ptab['/'] = 4; - ptab['%'] = 4; - ptab['+'] = 3; - ptab['-'] = 3; - ptab['&'] = 2; - ptab['|'] = 1; - parse(); - exit(nerrors); -} } -.fi -.ft R -.bp -.SH -Appendix C. How to use \fILLgen\fR. -.PP -This appendix demonstrates how \fILLgen\fR can be used in -combination with the \fImake\fR program, to make effective use -of the \fILLgen\fR-feature that it only changes output files -when neccessary. \fIMake\fR uses a "makefile", which -is a file containing dependencies and associated commands. -A dependency usually indicates that some files depend on other -files. When a file depends on another file and is older than -that other file, the commands associated with the dependency -are executed. -.PP -So, \fImake\fR seems just the program that we always wanted. -However, it -is not very good in handling programs that generate more than -one file. -As usual, there is a way around this problem. -A sample makefile follows: -.sp 1 -.ft CW -.nf -# The grammar exists of the files decl.g, stat.g and expr.g. -# The ".o"-files are the result of a C-compilation. - -GFILES = decl.g stat.g expr.g -OFILES = decl.o stat.o expr.o Lpars.o -LLOPT = - -# As make does'nt handle programs that generate more than one -# file well, we just don't tell make about it. -# We just create a dummy file, and touch it whenever LLgen is -# executed. This way, the dummy in fact depends on the grammar -# files. -# Then, we execute make again, to do the C-compilations and -# such. - -all: dummy - make parser - -dummy: $(GFILES) - LLgen $(LLOPT) $(GFILES) - touch dummy - -parser: $(OFILES) - $(CC) -o parser $(LDFLAGS) $(OFILES) - -# Some dependencies without actions : -# make already knows what to do about them - -Lpars.o: Lpars.h -stat.o: Lpars.h -decl.o: Lpars.h -expr.o: Lpars.h - -.fi -.ft R diff --git a/doc/LLgen/LLgen.refs b/doc/LLgen/LLgen.refs deleted file mode 100644 index df73595b8..000000000 --- a/doc/LLgen/LLgen.refs +++ /dev/null @@ -1,54 +0,0 @@ -%T An ALL(1) Compiler Generator -%A D. R. Milton -%A L. W. Kirchhoff -%A B. R. Rowland -%B Proc. of the SIGPLAN '79 Symposium on Compiler Construction -%D August 1979 -%J SIGPLAN Notices -%N 8 -%P 152-157 -%V 14 - -%T Lex - A Lexical Analyser Generator -%A M. E. Lesk -%I Bell Laboratories -%D October 1975 -%C Murray Hill, New Jersey -%R Comp. Sci. Tech. Rep. No. 39 - -%T Yacc: Yet Another Compiler Compiler -%A S. C. Johnson -%I Bell Laboratories -%D 1975 -%C Murray Hill, New Jersey -%R Comp. Sci. Tech. Rep. No. 32 - -%T The C Programming Language -%A B. W. Kernighan -%A D. M. Ritchie -%I Prentice-Hall, Inc. -%C Englewood Cliffs, New Jersey -%D 1978 - -%A M. Griffiths -%T LL(1) Grammars and Analysers -%E F. L. Bauer and J. Eickel -%B Compiler Construction, An Advanced Course -%I Springer-Verlag -%C New York, N.Y. -%D 1974 - -%T Make - A Program for Maintaining Computer Programs -%A S. I. Feldman -%J Software - Practice and Experience -%V 10 -%N 8 -%P 255-265 -%D August 1979 - -%T Methods for the Automatic Construction of Error Correcting Parsers -%A J. R\*:ohrich -%J Acta Informatica -%V 13 -%P 115-139 -%D 1980 diff --git a/doc/LLgen/LLgen_NCER.n b/doc/LLgen/LLgen_NCER.n deleted file mode 100644 index 3693a1525..000000000 --- a/doc/LLgen/LLgen_NCER.n +++ /dev/null @@ -1,2712 +0,0 @@ -.RP -.TL - - - -Top-down Non-Correcting Error Recovery - in LLgen -.AU -Arthur van Deudekom -Peter Kooiman -.AI -Department of Mathematics and Computer Science -Vrije Universiteit -Amsterdam - - - - - -Supervised by -.AU -dr. D. Grune -.AI -Department of Mathematics and Computer Science -Vrije Universiteit -Amsterdam - -.AB -This paper describes the design and implementation of a parser -generator with non-correcting error recovery based on the extended LL(1) -parser generator LLgen. It describes a top-down algorithm for implementing -this error recovery technique that can handle left-recursive grammars. -The parser generator has been tested with several existing ACK-compilers, -among which C and Modula-2. Various optimizations have been tried and are -discussed in this paper. -.AE -.LP -.nr PS 12 -.nr VS 14 - -.NH -Introduction -.EQ -delim $$ -.EN - -.nr PS 10 -.nr VS 12 -.RS -.LP -One of the trickier problems in constructing parser-generators is what -to do when the input to the generated parser is not well formed. Several -approaches are known, most of which are `correcting', meaning that they -modify the input to make it correct. However, in most cases there are -several possible corrections, and often the one chosen will turn out -to be the wrong one. As a result of such an incorrect choice, spurious error -messages can occur. Every programmer knows from experience how the omission -of a single `)' can on occasion lead to pages of error messages. - -.LP -A radically different approach is to just discard all the input up to -and including the offending token, and start with a clean slate at the -token following the offending one. [RICHTER] describes how -this idea can be used to construct a non-correcting error recovery system -that will never introduce spurious error messages. It is, however, -possible that errors are overlooked. - -.LP -In this paper we describe the incorporation of this non-correcting error -recovery into LLgen, an existing LL(1) parser generator. -In this introduction, we will describe in detail this non-correcting error -recovery technique, give an overview of LLgen and how it handles -errors, and finally describe how we have incorporated noncorrecting -error recovery in LLgen. -.RE - -.NH 2 -Non-correcting syntax error recovery - -.LP -Richter describes how syntax error recovery can be done -without making any corrections to the input text. Richter gives three -reasons why recovery without correction is desirable: - -.IP 1 -In most cases there are many possible corrections, the choice among which -will severely influence the further processing of the input. Thus, the -probability of selecting the right correction is not high. - -.IP 2 -The harm done by selecting the wrong correction is often unlimited. - -.IP 3 -The loss of information to the user of a non-correcting recovery technique -need not be grave. - -.LP -The non-correcting technique described by Richter can be summarized as -follows: When a syntax-error has occurred, the input up to and including the -erroneous symbol is discarded; the remainder of the -input is processed by a substring parser of the input -language, that is a parser that recognizes any substring of a string in the input -language. When the substring parser detects a syntax error, the offending -symbol is reported as another error, and the input up to and including the -erroneous symbol is discarded. The process is then repeated with the remaining input, possibly -finding other syntax errors, until all the input is scanned. -This process yields what Richter calls a -.I -suffix analysis -.R -of an input string. Formally, given an input string -.I x -, suffix analysis produces a set of strings $w sub k$ and a set of symbols -$ a sub k$ such that -.br - -.IP -$x~ =~ w sub 0 a sub 0 w sub 1 a sub 1~...w sub n-1 a sub n-1 w sub n$ -.LP -and such that: -.br -.IP - $w sub 0$ is the longest prefix of $x$ that is a prefix of -a string in the input language L, formally: there is a string $y$ such that -$w sub 0 y$ is in L, but there is no string $z$ such that $w sub 0 a sub 0 z$ -is in L; -.IP -For $0 < k < n$, $w sub k$ is a longest substring of $x$ that is also a -substring of a string in L, formally there are strings $u$ and $v$ such that -$u w sub k v$ is in L, but there are no strings $y$ en $z$ such that -$y w sub k a sub k z$ is in L; -.IP -$w sub n$ is a substring of $x$ -that is a substring of a string in L, formally: -there exist $u$ and $v$, such that $u w sub n v$ is in L. Note that -$w sub n$ need not be a suffix of a string in L, if $x$ represents incomplete -input $w sub n$ is not a suffix of a string in L. - -.LP -Now, the $a sub k$ indicate points at which an error is detected. The -"real" error need not be at $a sub k$, it can have occurred anywhere -within $w sub k a sub k$. -In his paper, Richter shows that, although this method may miss errors, it -will never introduce spurious errors. - -.LP -For implementing the technique, a parser that recognizes any -substring of the input language is needed. If we confine ourselves to -syntactical analysis, it is sufficient to construct a substring -recognizer. Richter himself does not give a practical construction, but -[CORMACK] describes how a LR substring parser can be constructed -that handles BC-LR(1,1) grammars. In this paper, we describe the construction -of a LL substring recognizer that can handle any grammar. Furthermore, -our recognizer is actually a suffix-recognizer, that is, a recognizer that -recognizes any suffix of a string in the input language. Our suffix recognizer has the -correct-prefix property, -meaning that it detects the first syntax error as early as possible -in a left-to-right scan of the input. Specifically, if the input language -is L and the invalid input is $x$ , it finds a string $w$ and an input symbol -$a$ such that $x = way$ , there is a string $z$ such that $wz$ -is in L, and there is no string $z$ such that $waz$ is in L. -Because the suffix parser has this correct-prefix property, it can be -used as a substring parser, because it will detect the first input symbol that -is not part of a substring of the language. Because it is a suffix-recognizer, -it additionally will detect incomplete input, because in that case -at the end of the input the parser will not be in an accepting state. - -.NH 2 -Overview of LLgen - -.LP -LLgen is an extended LL(1) parser generator. For a complete description, -see [GRUNE]. -LLgen can actually handle grammars that are not LL(1), because it allows -the use of conflict-resolvers. In case of an LL(1) conflict, these resolvers -are used to statically or dynamically decide which rule to use. As we will see -later, this feature makes it necessary for the suffix-recognizer to -handle grammars that are not LL(1). Semantic actions can occur anywhere -in the grammar rules, and they are executed when their position is -reached during parsing. A typical LLgen rule looks like -.br -.IP -S: A { -.I action -} B -.LP -where the action is a piece of C-code, that will be executed -when the parser is using the rule for S and has recognized A. - -.LP -LLgen-generated parsers use correcting syntax error recovery, based on a -scheme designed by R\*:ohrich [ROEHRICH], inserting or deleting symbols at the point of error detection -until correct input results. This means that actions in the parser will -always be executed in an order that could also have resulted from -syntactically correct input, and most importantly, once a grammar-rule -is started it is guaranteed to be completed. This means that syntactic -errors can never result in inconsistencies for the actions. Actions -only have to deal with syntactically correct input. In a nutshell, the -error recovery in LLgen-parsers works as follows: Suppose the parser is -presented with correct input that breaks off before the end. The error -recovery mechanism now provides a continuation path, chosen in such a -way that all active rules are left as soon as possible. Effectively, the -continuation path is the `shortest way out'. The symbols on this path are -called `acceptable', and end-of-file is also `acceptable'. Furthermore, at -each point along this `shortest path' there can be other terminals that -would be correct; these are `acceptable' as well. Now, when an -error occurs, all symbols that are not acceptable are discarded, until -an acceptable symbol appears in the input. The tokens on the path up to -but not including the acceptable input symbol are inserted. -From then on, normal parsing resumes. - -.NH 2 -Incorporation of non-correcting error recovery in LLgen - -.LP -An important consideration in incorporating the non-correcting recovery -in LLgen was that correct programs should suffer as little as possible -in what regards compilation speed. Furthermore, the existing error -recovery method has the highly desirable property that rules that are -started will be finished too, thus ensuring that errors in the -input text will not cause inconsistencies in the semantic actions. We have -implemented the non-correcting error recovery in such a way that this -property is preserved. - -.LP -The way we have achieved these goals is by actually including -the suffix recognizer as a `second recognizer' in the generated parser. -Correct programs are handled in the usual way by the parser, but if an error -occurs the following happens: instead of going to the standard error -recovery routine, the parser starts executing the non-correcting error -handler. This process continues, reporting all errors, until the -end of the input text is reached. Then, control is handed back to -the standard error recovery routine. This routine will now think -there is no more input, and thus start inserting tokens so as to construct -a `shortest way out'. This ensures that all rules that were started are -also finished, and no inconsistencies can occur in the semantic actions. -However, this method does require some modifications to the error reporting -routine. Normally, if the generated parser inserts a token, it reports -this to the user, but in this case this is undesirable. The insertions only -serve to maintain consistency in the semantic actions -and do not signify errors, so reporting of insertions should be suppressed. -.bp -.nr PS 12 -.nr VS 14 -.PS -boxwid = boxwid / 1.5 -boxht = boxht / 1.5 -arcrad = arcrad / 1.5 -movewid = movewid / 1.5 -moveht = moveht / 1.5 -arrowwid = arrowwid / 1.5 -arrowht = arrowht / 1.5 -arrowhead = arrowhead / 1.5 -linewid = linewid / 1.5 -lineht = lineht / 1.5 -.PE -.NH -The LL suffix parser - -.nr PS 10 -.nr VS 12 -.RS -.LP -In this chapter, we describe the construction of the LL suffix parser. -The described parser is not restricted to LL(1) grammars, because the -presence of conflict resolvers in LLgen allows for more general grammars, -that may even be left-recursive. We start this chapter with a discussion -of the implications of conflict resolvers, and continue with descriptions -of the parser algorithm, the used data-structures, -the handling of left- and right recursion, and some possible optimizations. -.RE - -.NH 2 -LLgen conflict resolvers and their implications - -.LP -In grammars that are nearly but not completely LL(1), conflicts -will arise in the two places where parsing decisions are made: the choice -of which alternative to start (`alternation conflicts') and the decision -to stop or continue a repeated item (`repetition conflicts'). In order to -allow LLgen to handle this type of grammar, the user can -specify conflict resolvers in those places where conflicts arise. -These resolvers are Boolean expressions labeling an alternative, -and are evaluated when a conflict arises during parsing. If the -expression evaluates to `true' the labeled alternative will be taken. -The Boolean expressions are expressions in C, and can consult -any information available at the point they occur. -However, if a syntactic error has occurred in the input, and the non-correcting -error recovery starts, we can no longer rely on the conflict resolvers to -guide parsing decisions. The suffix recognizer is only concerned with -syntax, and will not execute any semantic actions. It recognizes suffices -of correct input, but does not know or care what prefix would make -the suffix a correct program; as a result, the information that conflict -resolvers could use is not available, because the semantic actions -that would build this information have not been executed. -Therefore, the information used by the conflict resolvers is no longer -reliable, and the suffix parser needs to be able to handle the underlying -grammar without their help. In particular, it has to be able to handle -left-recursive grammars. - -.NH 2 -The suffix parser algorithm - -.LP -Our algorithm needs easy access to the grammar rules; in the description -we assume there is an efficient way to access the grammar rules. In -the next chapter we will describe the details of the actual implementation. -For the moment, we will only consider grammars that are not left- or -right-recursive. In the next section, we will discuss how the algorithm has to be adapted -to handle left- and right recursion. - -.LP -Suppose the grammar is G, and the input to the suffix recognizer is -$a sub 0 a sub 1 ... a sub n-1 a sub n$. Remember that parsing is -always started by the `normal' LLgen generated parser. It's only after -a syntactic error has occurred that the suffix recognizer will be started. -The input to the suffix recognizer thus is the `tail' of the input, starting -at the first symbol after the position where the first syntax error was -found. - -.LP -Now, in order to get parsing going again, the parser scans the grammar -for rules which contain symbol $a sub 0$ in the right hand side: -.br - - A: $alpha ~ a sub 0 ~ beta$ -.br - -.LP -where $alpha$ and $beta$ represent a string of terminals and non-terminals, -possible empty. Now, for each of these rules found, and for any string -$b sub 0 b sub 1$...$ b sub m$ that can be generated by $beta$ it holds that -$a sub 0 b sub o b sub 1$...$b sub m$ is a substring of some string in L. -This can be shown as follows, supposing that the start symbol is S and -S $-> sup * gamma$ A $delta$: -.br - -S $-> sup * gamma$ A $delta$ $-> sup * gamma ~ alpha ~ a sub 0 beta ~ delta --> sup * gamma ~ alpha ~ a sub 0 b sub 0 b sub 1$...$b sub m delta$ - -.br -Of course, there may very well be more than one such string -$b sub 1 b sub 2$..$b sub m$, and one of these strings can be empty as well, if -$beta$ can produce empty. Now, in what we will call the -.I -predicting phase -.R -the algorithm will -produce all possible symbols $b sub 0$. Then, in what we will call the -.I -accepting phase -.R -these symbols are matched against -the input, and those not matching are discarded. Then, entering the next -predicting phase, the algorithm will produce -all symbols $b sub 1$, and match them against the next input symbol in -the subsequent accepting phase, -etc. In case one of the strings $b sub 0$...$b sub m$ is empty, or -the end of one of the strings is reached, some way to continue is -needed; we will discuss this later. First let's see how the -algorithm produces the strings $b sub 0$...$b sub m$ . - -.LP -For each rule in the grammar of the form -.br - - A: $alpha a sub 0 W sub 1 W sub 2$...$W sub p$ -.br - -with each $W sub k$ a terminal or nonterminal, a -.I -prediction graph -.R -is created that looks like this: - -.PS -down; box "$a sub 0$"; arrow; box "$W sub 1$"; arrow -box "$W sub 2$"; arrow dashed; box "$W sub p$" -arrow; box "END" "$[A]$" -.PE - -.LP -The bottom element of these prediction graphs is an end-marker containing the -left-hand side of the rule used. All these graphs have $a sub 0$ on top, and -this $a sub 0$ is matched against the $a sub 0$ in the input in the -accepting phase that follows, removing the -$a sub 0$ from the graph. If the prediction graph is now empty, we have to find a way -to continue; this case is treated later. First we will consider what to do if -the prediction graph is not empty. There are two possibilities: either $W sub 1$ is a -terminal, or it is a nonterminal. If it is a terminal, we are finished for -the moment; if not, the algorithm scans for rules of the form -.br - - $W sub 1$: $U sub 1 U sub 2$...$U sub i$ -.br - -.LP -with each $U sub k$ a terminal or nonterminal. Now, the algorithm substitutes -the top of the prediction graph with the right-hand sides -of all the rules found. Because there can be more than one rule, the -prediction graph can now become a DAG (Directed Acyclic Graph). -Supposing there are two rules with $W sub 1$ in the LHS: - -.br - - $W sub 1$: $U sub 1 U sub 2$...$U sub i$ -.br - $W sub 1$: $V sub 1 V sub 2$...$V sub j$ - -.LP -the prediction graph will now look like this: - -.PS -B1: box "$U sub 1$" -move -B2: box "$V sub 1$" -arrow dashed down from bottom of B1 -B3: box "$U sub i$" -arrow dashed down from bottom of B2 -B4:box "$V sub j$" -move to 0.5 -down;move -B5:box "$[W sub 1 ]$" -arrow dashed; -box "$W sub p$" -arrow; -box "END" "$[A]$" -arrow from B3.bottom to B5.top -arrow from B4.bottom to B5.top -.PE - -.LP -The graph element representing $W sub 1$ is left in the stack, the -notation $[W sub 1 ]$ indicates it has been substituted. These substituted -element will from now on be ignored by the algorithm. The elements -$U sub 1$ and $V sub 1$ are now `on top' of the prediction graph. - -.LP -If $W sub 1$ can also produce empty, its successor in the prediction graph -has to be processed -as well; the algorithm walks down the graph to this successor, and -there the process is repeated; if it is a terminal we are finished, else we -substitute it with the right hand sides of its grammar rule. -However, the element that we want to substitute now, say $W sub k$, cannot -be marked `substituted' just like that, because it can be on another -path, on which it cannot be substituted yet. Therefore, a copy of element -$W sub k$ is made, it is marked $[W sub k ]$, and an edge is created -from $[W sub k ]$ to the successor of $W sub k$. This produces graphs like -this: -.br -.PS -B1: box "$U sub 1$" -move -B2: box "$V sub 1$" -move -X1:box "$X sub 1$" -arrow dashed down from bottom of B1 -B3: box "$U sub m$" -arrow dashed down from bottom of B2 -B4:box "$V sub j$" -arrow dashed down from bottom of X1 -Xj: box "$X sub j$" -move to 0.5 -down;move -B5:box "$[W sub 1 ]$" -arrow dashed; -B6: box "$W sub k$" -arrow -Wk1:box "$W sub k+1$" -arrow dashed -box "$W sub n$" -arrow; -box "END" "$[A]$" -arrow from B3.bottom to B5.top -arrow from B4.bottom to B5.top -move down from Xj.top;move;move;move -Wk: box "$[W sub k ]$" -arrow from Xj.bottom to Wk.top -arrow from Wk.bottom to Wk1.top -.PE - -.LP -This process of substituting is repeated with all nonterminals that are -now on top of the prediction graph, until there are only terminals on top of -the graph. -This completes the prediction phase of the algorithm, not taking into account -what to do if an END marker appears on top of the graph. -Now, the algorithm enters its accepting phase, in which -the terminals on top are compared with the next symbol in the input. -If a terminal in the graph matches the input, its element is deleted -from the graph, and the substitution process will continue with its -successors, in the next prediction phase. -If a terminal on top of the graph does not -match the input, the path it is on represents a `dead-end', which -does not need to be processed any further. The terminal is no longer -a `top', and the algorithm will not visit it again. - -.LP -There is one tricky situation: consider again this graph: - -.PS -B1: box "$U$" -move -B2: box "$a$" -move to 0.5 -down;move -B5:box "$W sub 1 $" -arrow dashed; -box "$W sub n$" -arrow; -box "END" "$[A]$" -arrow from B1.bottom to B5.top -arrow from B2.bottom to B5.top -.PE - -.LP -Here, the algorithm is processing $W sub 1$ in the predicting phase, and -using some rule it has produced $a$ on top; there is another rule with -$W sub 1$ in its LHS which has produced nonterminal $U$ on top. -Now, suppose $U$ is a nonterminal that can -produce empty. Now, the algorithm starts substituting $U$, and walks -down $W sub 1$. What we definitely do not want -is the algorithm to start substituting $W sub 1$ again, because then we -would loop forever. Therefore, if the algorithm starts processing -element $W sub 1$ it should make it $[W sub 1 ]$ before it does -anything else. On entering the element -for the second time in the prediction phase , it sees that it is already substituted, -so there is nothing to do. -It then just walks to the successor of $W sub 1$ and -starts substituting it. This is correct, since the fact that the algorithm -enters an element for the second time in a prediction phase means that the element -indirectly can produce the empty string, and thus its successor must -be substituted as well in the prediction phase. - -.LP -It is easy to see that the substitution process will stop: the algorithm can -only loop if it starts processing an element for the second time in a -prediction phase, -or if the processing of an element eventually yields a graph with that -same element on top. -The first case cannot occur because the algorithm marks elements it is -processing as `substituted' before it does anything else, meaning that those elements will not -be processed again; the second case can only occur if the grammar is -left-recursive, which we assumed it was not. - -.LP -The algorithm simulates -left-most derivations of strings $a sub 0 b sub 0 b sub 1$..$b sub n$ -starting from $a sub 0 W sub 1$..$W sub p$; as we showed before, if -the algorithm recognizes a string $a sub 0 b sub 0$..$b sub n$ that -string is a substring of some string in L. Conversely, because the -algorithm start out by using all rules of the form -A: $alpha a sub 0 beta$, and then proceeds to simulate all -possible left-most derivations, it will recognize all input -$a sub 0 b sub 0$... $b sub n$ that can be produced starting from -$a sub 0 beta$. - -.LP -Now we will discuss what has to be done if an END marker appears as -top of the prediction graph. -When this happens, it means that starting from some rule -.br - - A: $alpha a sub 0 beta$ - -.br -the algorithm has produced a leftmost-derivation of a string -$a sub 0 b sub 1 .. b sub n$ starting from $a sub 0 beta$, or that $beta$ can produce -empty and the string so far is just $a sub 0$. The next step is to assume -that the have recognized A and that that some string produced by $alpha$ -is part of the prefix that makes the suffix we are recognizing a -correct string in L. Remember that in the END marker we kept record of -the LHS of the rule that has started the graph, and we will now use this -LHS to continue recognizing. What the algorithm does is scan for all -rules of the form: -.br - - B: $gamma$ A $delta$ -.br - -with $gamma$ and $delta$ possibly empty strings of terminals and nonterminals. -The algorithm now starts a new component in the prediction graph, and if $delta$ is -$W sub 1 W sub 2$...$W sub n$ it looks like this: - -.PS -down;box "$W sub 1$"; arrow -box "$W sub 2$"; line dashed; box "$W sub n$" -arrow; box "END" "$[B]$" -.PE - -.LP -Note that the END marker now contains B, because we have started to match -a rule for B. If the $delta$ in the rule for B was empty, this just produces -and END marker with B in it; in this case, the process is just repeated -with all rules of the form: -.br - - C: $zeta$ B $eta$ -.br - -.LP -etc, until we have a prediction graph with a nonterminal or terminal on top. -Now, the substitution algorithm is again applied over all nonterminals on -top, until every top contains a terminal. It is possible that during -substitution again an END marker will turn up; if this happens -we again scan for rules to continue with etc. -This `continuation algorithm' can only loop if, when -trying to build a new prediction graph for matched symbol A, it produces an empty -graph with again matched symbol A. If this happens, the grammar was -(directly or indirectly) right-recursive, and we assumed that it was not. -Therefore, the algorithm will terminate. The terminals on top of the -new graph after applying this `continuation' algorithm are exactly those -that could follow the string $A sub 0 b sub 0$..$b sub n$ in a substring -of a string in L. -To see this, suppose we have `recognized' the rule -.br - - A: $alpha a sub 0 beta$ - -.br -and $a sub 0 b sub 0 b sub 1$...$b sub n$ is the string produced from -$a sub 0 beta$ by the algorithm. Now, using rule: -.br - - B: $gamma$ A $delta$ - -.br -and supposing that S $->$ $zeta$ B $eta$ we get -.br - - S $->$ $zeta$ B $eta$ $->$ $zeta gamma$ A $delta$ $eta$ $->$ $zeta gamma a sub 0 b sub 0 b sub 1$ ... $b sub n$ $delta$ $eta$ - -.br -.LP -and thus any string produced by a derivation starting from -$delta$ can come right after $a sub 0 b sub 0$...$b sub n$ in a substring -of some string in L. The algorithm will proceed to generate all these -strings starting from $delta$. If $delta$ produces empty, the above -is just repeated. Because in the `continuation' part -all possible rules are considered, the whole algorithm will recognize -all substrings of any string in L. In order to determine if we -have actually recognized a suffix of some string in L, we need to -remember if within a predicting phase the `continuation' part of the algorithm has been run -on an END marker containing the start-symbol S; -if this is the case, then the input seen until now is a suffix of some string in L. -Formally, it means that there is a derivation starting from start symbol -$S$ such that if the -input seen until now is $a sub 0 a sub 1$..$a sub n$, then: -.br - - S $-> sup * alpha beta$ $-> sup * alpha a sub 0 a sub 1$..$a sub n$ -.br - -.LP -where $alpha$ can be empty, $beta$ is not empty. - -.NH 2 -The prediction graph data structure - -.LP -The graphs that are produced by the suffix recognizer may grow extremely -large; to facilitate an efficient -implementation we have devised a way of keeping the size of the -data structure under control, in a way that is very similar to -the way described in [TOMITA]. - -.LP -The basic idea is, that in a prediction phase of the algorithm, it is not -necessary to explicitly substitute each nonterminal every time it -turns up as a `top'; it is sufficient to do it once, because the -second substitution will produce exactly the same subgraph starting at -the substituted nonterminal. Here is an example: - -.PS -down;box "$a$";arrow;box "A";arrow dashed;box "[B]";arrow -box "C";arrow dashed;box "END" "[X]" -move right from last box.e; -box "END" "[Y]"; -arrow <- dashed up from last box.top; -box "D";arrow <- up from last box.top -box "B" -.PE - -.LP -Here, in the left component of the graph, nonterminal B has been -substituted. Now, in the same prediction phase, the algorithm again runs into -B, now in the right component. There is no need to compute again -what the substitution will produce, it is exactly the part on top -of B in the left component. Therefore, all that is needed is: - -.PS -down;box "$a$";arrow;box "A";arrow dashed; -B1: box "[B]";arrow -box "C";arrow dashed;box "END" "[X]" -move right from last box.e; -box "END" "[Y]"; -arrow <- dashed up from last box.top; -box "D" -arrow from B1.bottom to last box.top -.PE - -So, when, in a prediction phase of the algorithm, a nonterminal is substituted, -the nonterminal is placed on a list, together with a pointer to -the substituted nonterminal. If in the same prediction phase a nonterminal that -is on the list becomes a top, all we need to do is place an edge -between the already substituted one and the successor of the top we are currently -processing. When a prediction phase is finished, the list is cleared. -There is one catch: if we consider again the last picture, -note that if nonterminal B can (directly or indirectly) produce empty, -it is also necessary to substitute D. However, it is not difficult to -determine if a nonterminal can produce empty. LLgen already computes -this information for each nonterminal. - -.LP -Without this `joining together' of graph components, each -element in the graph has exactly one successor, except the END marker, -which has none. -Now that components get joined as described, an element can have any -number of successors. The recognizer algorithm now has to consider all -successors of a graph element instead of one. - -.NH 2 -Handling right recursion - -.LP -The only problem right-recursive grammars cause in the algorithm is in the -`continuation' part; they can cause this part of the algorithm to loop -forever. As an example, consider: -.br - - A: $alpha$ B -.br - B: $beta$ C -.br - C: $gamma$ A - -.LP -Now suppose the `substitution' part of the algorithm has turned up -an END marker with nonterminal A in it. The continuation algorithm will -now produce: - -.PS -box "END" "[A]";move;box "END" "[C]";move;box "END" "[B]";move -box "END" "[A]";move;box "END" "[C]" -.PE - -.LP -etc. etc. However, a slight modification to the algorithm suffices -to eliminate this problem; within each prediction phase of the algorithm, we -simply maintain a list of nonterminals that have turned up in an -END marker. As soon as an END marker turns up whose nonterminal is -already in the list, we stop the `continuation' algorithm; the part -of the graph that would be produced by it already has been generated -by an earlier invocation of the algorithm in the same prediction phase. -At the end -of a prediction phase, when all heads are terminals, we clear the list. -This way, no looping can occur; even if the right recursion is -indirect, for instance if in the above example the rule for A had been -.br - - A: $alpha$ B $delta$ -.br -.LP -where $delta$ can produce empty, the algorithm still works; the substitution -of $delta$ will yield an END marker on top, and when trying to find -a continuation for LHS A the algorithm notices A is already on the list. - - -.NH 2 -Handling left recursion - -.LP -Left-recursion is, unfortunately, a much tougher problem than -right-recursion. The result of left-recursive grammar rules is that -the substitution algorithm never stops, because it can keep on building -the graph with the same set of rules without ever turning up a terminal. -One course of action would be to pre-process the grammar rules to -eliminate left-recursion; there are algorithms that eliminate direct -and indirect left-recursion. However, we have taken another course; by -allowing the produced graphs to contain loops, we can handle left -recursion without any modifications to the grammar. As soon as -we come to the point that we want to substitute a nonterminal -which was already substituted earlier on the same path and in -the same prediction phase, we can -make a link from the `older' nonterminal to the successor of -the `new' nonterminal. In this way we have constructed a loop -in the graph. As an example, suppose we have the following rules: -.br - -D: A - -A: B a - -B: A | x - -.br -Suppose also that we have nonterminal `D' on top of a stack. We -now start substituting `D': - -.PS -A: box "A" -move -X: box "x" -move to 0.5 -down -move -B: box "[B]" -arrow -box "a" -arrow -box "[A]" -arrow -box "[D]" -arrow dashed -box "END" "[S]" - -arrow from A.s to B.n -arrow from X.s to B.n - -.PE - -.LP -We now have an `A' on top of of the stack which was already -substituted on the same path and also in the same prediction phase. To avoid -never ending substitution we make a loop as follows: - -.PS -A: box "A" dashed -move -X: box "x" -move to 0.5 -down -move -B: box "[B]" -arrow -box "a" -arrow -A2: box "[A]" -arrow -box "[D]" -arrow dashed -box "END" "[S]" - -arrow dashed from A.s to B.n -arrow from X.s to B.n -arc <- from B.w to A2.w -.PE - -.LP -The dashed box with `A' in it means that it can be deleted, because -there is already an occurrence of it in the loop. - -.LP -The most beautiful result of loops in graphs is -that the original parsing algorithm needs only one minor change. -When the algorithm visits an element which has more than one -outgoing edge the algorithm starts tracking down both paths, -just like before, only now there may be one or more backedges among -these edges, but the algorithm needs not to be aware of this fact. -The only difficulty with loops is that the algorithm might go into -a loop; it continues searching for terminals but it might happen -that there are no valid terminals in the loop. The solution to this -problem is not very difficult; just set a flag at all elements we -visit. When we reach an element which has this flag turned on, we -don't have to search any further. At the end of the prediction phase, when we -have found all possible new heads, all flags are cleared. -Even if there are no loops in the -prediction graph, setting flags may be used as an optimization: -it is possible that two paths come together at one point. In that situation -it is useless to scan for the second time the part of the graph which -both paths have in common. - -.NH 2 -Some optimizations using reference counts - -.LP -As explained in section 2.2, it is sometimes necessary to copy a -prediction graph element before substituting it. In order to determine -if a certain element has to be copied, it is convenient to maintain -a reference count in each graph element. This reference count keeps -track of the number of edges that enter an element. Now, when we want -to substitute an element with reference count not 0, we need to -copy it, because there is another path in the prediction graph that -contains the element we want to substitute, and on this other path -the element cannot be substituted yet. - -.LP -Maintaining reference counts also enables us to perform another -optimization: remember that if, in a prediction phase, a terminal -is predicted that does not match the current inputsymbol, we from -then on just ignore the path in the graph starting at the terminal. -However, we can safely delete the terminal from the graph; furthermore, -all its successors in the prediction graph that have reference count -0 can be deleted as well, as can their successors with reference -count 0, etc. This way, we delete from the prediction graph -most elements that are no longer accessible, but not all of them; as will -be explained in the next section, loops in the prediction graph -can cause problems. - -.NH 2 -The algorithm to delete inaccessible loops - -.LP -Deleting graph elements which are no longer reachable is not as easy -as it looks when there are loops in the graph, introduced by -the extension to the algorithm that handles left recursive grammars. -Suppose for example that we have a very simple loop as in the left -picture below: - -.PS -down -X: box "x" "(0)" -arrow -box "[B]" "(2)" -arrow -box "a" "(1)" -arrow -box "[A]" "(1)" -arrow -box "[D]" "(1)" -arc <- from 2nd box.w to 2nd last box.w - -move right from X.ne -move -move -move -move -move -move -down -box "x" "(0)" dashed -arrow dashed -B: box "[B]" "(1)" -arrow -box "a" "(1)" -arrow -box "[A]" "(1)" -arrow -box "[D]" "(1)" -arc <- from B.w to 2nd last box.w -.PE - -.LP -The number below each symbol indicates the reference count of that element. -Suppose now that we delete `x', then we have the situation depicted in the -picture on the right. The loop consisting of `[B]', `a' and `[A]' is now -unreachable, so all these elements can be deallocated. -The reference count of `[B]' is 1, so it will not be deleted. To be precise -all elements in the loop have their reference counts on 1, and -consequently none of these will be deleted. But we stated earlier -that all elements of the loop cannot be reached anymore and that the -loop had to be deleted! In this example the reference counts of the -loop elements are all 1, but in more complex situations it is also -possible that some of the elements have a reference count of more -than 1. - -.LP -To solve this problem we present an algorithm, devised by E. Wattel, that -determines whether a loop can be deleted or not. -The algorithm consists of two parts. The first part of the algorithm goes as -follows: it presumes that all elements of the loop will indeed be -deleted. Every time it deletes an element it decreases the reference -count of all the successors of the element that are also member of the same -loop. How the algorithm knows which elements belong to the loop and which -do not will be explained later. The situation of the example above will now -look like this: - -.PS -down -box "[B]" "(0)" -arrow -box "a" "(0)" -arrow -box "[A]" "(0)" -arrow -box "[D]" "(1)" -arc <- from 1st box.w to 2nd last box.w -.PE - -.LP -The number below each symbol indicates again the reference count -after we have applied the first part of the algorithm. - -.LP -The second part of the algorithm checks and restores the -reference counts of all members of the loop . When it finds -out that one or more reference counts are not 0, it concludes -that it is still possible to enter the loop in some way, and -that it cannot be -deleted yet. In the other case it reports that the loop can be -deleted, which is also true in our example. - -.LP -We will now formally describe the first part of the algorithm -that finds all directed circuits from a given vertex, and determines if -the vertices on those circuits can be deleted. -The algorithm works on prediction-graphs in which every edge that -is in a circuit is marked. Note that a marked edge may be in more than one circuit. -We will call this mark `C'. -The input to the algorithm is such a prediction graph, and a start vertex, -say A. The first part of the algorithm is: - -.IP 1 -Put the start vertex A on a list L; mark all edges `unused' -.IP 2 -If L is empty, stop -.IP 3 -For each vertex in list L, check if there are edges marked both C' and -`unused'. For each edge found, mark it `used', and traverse it to its -other endpoint; put this endpoint on a new list M, initially empty -.IP 4 -Decrease the reference count of all vertices on M by 1 -.IP 5 -L := M; go to 2 - -.LP -It is clear that the algorithm will terminate: each edge is only traversed once, -and the number of edges is finite. We will now prove some properties of this -part of the algorithm. - -.LP -.I -An edge is traversed by the algorithm if and only if it is on some -directed circuit $A ->$...$->A$. -.R -.br - -The if-part is easy; if an edge $e$ connecting vertices $W$ and $V$ is on some directed circuit starting in -$A$, then there is a path $A ->$...$-> W -> V$; let $A ->$...$-> W -> V$ be a path -of minimum length from $A$ to $V$. If the length of the path from $A$ to -$W$ is $k$, then after turn $k$ of the algorithm $W$ will be on list L. To see -that this is the case, suppose that $W$ is not on list L after turn $k$; -this means that the edge entering $W$ was already marked used in a -previous turn, but then there would be a shorter path from $A$ -to $W$, contradicting the assumption that the path is of -minimum length. The edge -$e$ is marked `C', because it is in a circuit; it is marked `unused', for if -it were marked used, there would be a shorter path from $A$ to $V$. So, -in turn $k + 1$, the edge $e$ will be traversed. - -.LP -On the other hand, suppose that an edge $e$ is traversed by the algorithm; -we will show by induction on the number of turns the algorithm has made -that $e$ is on a directed circuit $A->$..$->A$. In the first turn, all -edges from $A$ that are marked `C' are traversed, and clearly, if an edge -from $A$ is part of a circuit then that edge is part of a circuit from $A$ to $A$. -Now suppose that in turn $n+1$ an edge $e$ connecting vertices $W$ and -$V$ is traversed. This means the edge is -marked `C', so it is part of some circuit. If there is a path from $V$ to $A$, -we can simply trace a circuit -$A->$...$-> W -> V -> $...$-> A$, and clearly $e$ is on a circuit from -$A$ to $A$. Now, suppose there is no path from $V$ to -$A$. We can always trace a circuit $W -> V ->$...$-> W$ because the -edge from $W$ to $V$ is part of a circuit; and by the -induction hypothesis there is a circuit $A ->$...$-> W ->$...$-> A$. We can -now make a `detour' at $W$, yielding a circuit $A->$...$-> W -> V$... -$-> W ->$...$-> A$. This case is shown in the picture below. -So in either case $e$ is on a circuit from $A$ to $A$. - -.PS -down; -B1: box "A"; -arrow dashed; -B3: box dashed; -arrow dashed; -B2: box "W"; -arrow dashed; box dashed; -arc <- from B1.w to last box.w -arrow right "$e$" "C" from B2.e -box "V"; arrow dashed; box dashed; -arrow dashed -> from last box.n to B3.e -.PE - -.LP -.I -A vertex appears on list L if and only if it is on some directed -circuit from $A$ to $A$. -.R -.br - -.LP -If a vertex is in such a circuit, there is an edge that enters it, which -is part of a circuit form $A$ to $A$; we already showed that this edge -is traversed by the algorithm, and thus the vertex will appear on list -L. Conversely, if a vertex appears on list L, then an edge entering -that vertex has been traversed by the algorithm; we showed that this -edge is part of a circuit from $A$ to $A$, and thus the vertex is -part of a circuit from $A$ to $A$. - -.LP -.I -When the algorithm is finished, each vertex that is part of some -directed circuit from $A$ to $A$ has its reference count decreased by exactly -the number of edges entering it that are part of a directed circuit from $A$ to $A$. -.R -.br - -.LP -Each edge that is part of some circuit from $A$ to $A$ is traversed -exactly once; the reference count of the endpoint is decreased -by one after an edge has been traversed. Thus, if a vertex is endpoint -of $k$ such vertices, its reference count is decreased by $k$. - -.LP -.I -If the reference count of each of the vertices visited by the algorithm -is 0 after the algorithm has finised, all these vertices can be deleted; -if the reference count is not zero for one or more of the visited -vertices, then none of them can be deleted. -.R -.br - -.LP -Suppose all visited vertices have reference count 0; this means that -each of the vertices is only entered by edges that are on a circuit -from $A$ to $A$. Therefore, it holds that any path leading to any -of the visited vertices has to start in one of the visited vertices; there -is no path starting in an unvisited vertex to a visited one. Thus, -all the visited vertices are unreachable. -Conversely, if one of the visited vertices has reference count not zero, -then there is a path from an unvisited vertex to this vertex. Because from -the vertex with reference count non zero, we can get to $A$, and from $A$ -we can get to any of the other vertices, all visited vertices are -reachable. - -.LP -The second part of the algorithm now checks if all reference counts are -zero, and if they are, it deletes all visited vertices. - - -.NH 2 -Marking loop elements - -.LP -One point we have omitted so far is how the edges in the prediction -graph that are part of a loop get marked. -Basically, a loop can be detected: - - a. when it is made; -.br - b. when we want to know about it. - -.LP -The first approach checks if a loop is constructed -as soon as we join two paths in the graph, and if so, marks all -edges of the loop. The other approach does not do any checking when two -paths are joined together; it starts looking for loops when we want -to delete an element with reference count not 0, marking all edges -belonging to the loops it discovers. In practice it turns out that -we very often encounter elements that we would like to delete, but that have -reference count not 0, whereas the joining of paths occurs relatively -infrequently. We therefore have chosen to check if a loop is created -when two paths in a prediction graph are joined. - -.LP -Now the question arises how to find and mark all edges of -the loop. For this problem we devised also an algorithm. -Because we already know that there is an edge from the element on which -the new path is connected to the successor of the joined element, the -algorithm only has to find a path from this last element back to the first one. -This can be done by a backtracking depth first search; to find a path from -one element to another we have to find a possible empty path -from one of the successors of the first element to the last element. As -soon as we have found a path, we can mark all the edges on the path and also -the backedge as loop edges. In case that there is more than one path -back to the first element it is necessary that the algorithm continues -searching after it has found one path. - -.LP -To avoid looping of this algorithm we have to set a flag at the elements -which are on the path already. When the algorithm is backtracking it can -clear the flags at the elements it is leaving. - -.LP -To speed up the searching process we can set flags at the edges we have already -visited but did not lead back to the first element. When the algorithm -encounters such an edge it already knows that this edge is not worth -searching again and can be skipped. At the end of the algorithm these -flags have to be cleared again. - -.LP -One might propose another optimization: as soon as -we reach an edge that is already marked as a loop edge, we -can stop searching for other loop edges. There is, however, -a case in which this can go wrong. Imagine the following situation: - -.PS -down -E: box "[E]" -arrow " C" ljust -D: box "[D]" -arrow " C" ljust -C: box "c" -arrow " C" ljust -box "b" -arrow " C" ljust -A: box "[A]" -arrow -box "a" - -move right from D -move right -J: box "[J]" -down -arrow from J.s " C" ljust -I: box "i" -arrow " C" ljust -H: box "[H]" -arrow from H.s to A.e - -arc <- from E.w to A.w -move left from C -move left -"C" -arc -> from H.e to J.e -move right from I -move right -"C" - -arrow dashed from E.s to J.n - - -.PE - -What we have here is a prediction graph with two loops; all edges that belong -to a loop are again marked with an `C'. Note that the edge between `[H]' -and `[A]' is not a loop edge. Suppose that `[J]' is not yet -completely substituted, i.e. there is another production rule for -J: -.br - -J: E - -.br -The `E' on top of the right path is now joined with the `[E]' -on the left path, which is depicted by the dashed arrow -between `[E]' and `[J]'. When we take a good look at the graph -we see that the two loops are merged into one. But that is not -the most important observation we have to make: not only the -edge between `[E]' and `[J]' must be marked as a loop edge, but -also the edge between `[H]' and `[A]'! So it is not possible -to stop searching for loop edges as soon as we have found an -edge which was already marked as a loop edge. We have to continue -until we reach the element at which we started: `[E]'. So the -optimization proposed above is incorrect. - - -.NH 2 -Optimizations using FIRST and FOLLOW sets - -.LP -In the algorithm as we have described it, every nonterminal on top of the graph -is substituted until only terminals remain on top; these terminals are -then matched against the current input symbol. However, by using -FIRST sets, we can save considerably on the number of computations -necessary. Suppose one of the top elements of the graph is nonterminal A, -and the current inputsymbol is $a$. Then, it is of no use to substitute -A if terminal $a$ is not in FIRST(A), because then substituting A will -never produce $a$ on top of the graph. So, before substituting a -nonterminal we check if the current inputsymbol is in its FIRST set; if -it is not, we can declare the path the nonterminal is on a dead end, and -delete it, without having to perform the actual substitution. Of course, if -A can produce empty, we still have to consider its successor in the graph. - -.LP -Similarly, when we have an END marker on top, with nonterminal B in -it, and we consider using rule -.br - - D: $alpha$ B C $gamma$ - -.br -We first check if the current inputsymbol is in FIRST(C); if this is -not the case, there is no need to start a graph component with this -rule, because it will never produce the next inputsymbol on top. -Again, if C produces empty, we still have to evaluate the part of the -rule following C. - -.LP -To circumvent the problems caused in the FIRST set optimization by -nonterminal that produce empty, we can also make use of FOLLOW-sets. -When substituting, if we encounter a nonterminal whose FIRST set does -not contain the current inputsymbol but which can produce empty, -we check if the current inputsymbol is in its FOLLOW set. If it is not, -there is no need to process its successor. Similarly, in case we -are processing an END marker as explained above, there is no need -to process the part of the rule following C if FIRST(C) does not -contain the input symbol, or C produces empty but the inputsymbol -is not in FOLLOW(C). -.bp -.nr PS 12 -.nr VS 14 - -.NH -Test results - -.nr PS 10 -.nr VS 12 -.RS - -.LP -In this chapter, we discuss some test results that were obtained -by recompiling existing ACK compilers with the modified LLgen. -We tried several combinations of possible optimizations, including -`dumb' ones, like no optimization at all, not even deleting unreachable -prediction graph elements. -The incorporation of LLgen with non-correcting error recovery went -smoothly; only minor modifications to the Make-files were necessary. -Specifically, these modifications consisted of passing an extra -flag to LLgen, and including the new generated C-file Lncor.c in -the list of generated C-files. Also, the LLmessage error reporting -routine had to be adapted. We successfully recompiled the C, Modula-2 -and Occam compilers; in the next sections, we discuss some test results -that were obtained with the Modula-2 and C compilers. - -.RE -.LP -.NH 2 -Performance - -.LP -We will now present and discuss, with the aid of some -diagrams, time and space measurements on the non-correcting error -recovery. We have measured the effect of various optimizations. -These optimizations include the first-set optimization and the follow-set -optimization. We also measured the effect of leaving out the loop-deletion -algorithm, regarding both time and space. We performed out measurements using -C- and Modula-2-programs of three different sizes; one of approximately -750 tokens, one of appr. 5000 tokens and one of appr. 15000 tokens. We have -chosen to represent the sizes of programs in the number of tokens instead of -number of lines, because the number of tokens more realistically -reflects the load the programs put on the error recovery mechanism. Also we give -our time measurements in usertime instead of realtime, because realtime -depends heavily on the load of the system, which usertime does not. -Our space measurements are based on the size of the prediction graphs. -Note that all files are entirely recognized by the non-correcting error -recovery technique. We achieved this by putting a `1' at the beginning -of each file; because then each file starts with a syntax error LLgen -is forced to continue with the non-correcting error recovery. - -.NH 3 -Time and space measurements on the effect of the first-set optimization - -.LP -In the diagram below we show our time measurements we got from recognizing -the C-programs both with and without first-set optimization. - -.G1 -coord x 0, 17000 y 0, 65 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw no_opt dashed -draw first_opt dashed - -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_opt at $1, $2 - next first_opt at $1, $3 -X until "XXX" - -742 2.5 .9 -5010 16.3 5.8 -14308 54.2 16.8 -XXX - -copy thru X "$1 $2" size -2 at 11000, $3 X until "XXX" -No optimization 55 -First-set optimization 20 -XXX -.G2 - -.I -.ce -Time measurements of three C-programs with and without first-set optimization -.R - -.LP -Notice the considerable time savings we -get when the first-set optimization is turned on; a factor of slightly more than -3. Obviously this is an extremely useful optimization. On the other hand -we found there were no measurable time savings when using the follow-set -optimization; for that reason we did not chart the result of this optimization. -It seems that the time savings gained by the optimization are -waisted again by the extra processing time needed. We conclude that -this optimization is of little or no use when we want to save on time. - -.LP -In the following picture the time measurements of three Modula-2 programs -are given, again with and without first-set optimization. - -.G1 -coord x 0, 17000 y 0, 65 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw no_opt dashed -draw first_opt dashed -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_opt at $1, $2 - next first_opt at $1, $3 -X until "XXX" - -823 1.3 .6 -4290 7.6 3.5 -16530 30.5 14.3 -XXX - -copy thru X "$1 $2" size -2 at 13000, $3 X until "XXX" -No optimization 30 -First-set optimization 15 -XXX -.G2 - -.I -.ce -Time measurements of three Modula-2-programs with and without first-set optimization -.R - -.LP -From this picture we can conclude mainly the same as above; considerable -time savings when we use the first-set optimization; -the factor is somewhat less, but still more than 2. Again we have omitted -the results of the follow-set optimization, for the same reason as before. - -.LP -There is however one remarkable difference between the two languages: parsing -C-programs needs almost twice the time as parsing programs of comparable -sizes written in Modula-2. This can be explained by the fact that the -C-grammar is far more complicated than that of Modula-2, and also the -production rules are longer in C, so building, deleting and definitely -traversing the graph will consume more time. - -.LP -Now we come to the space measurements of both C- and Modula-2 programs. -In the picture below we present the maximum sizes of the prediction graphs, -during the recognition of the three C-programs. - -.G1 -coord x 0, 17000 y 0, 18000 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "Maximum size of" "the prediction graph" "(bytes)"left .3 -draw no_opt dashed -draw first_opt dashed -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_opt at $1, $2 - next first_opt at $1, $3 -X until "XXX" - -742 5568 10444 -5010 7668 12664 -14308 13636 17308 -XXX - -copy thru X "$1 $2" size -2 at 8000, $3 X until "XXX" -No optimization 16000 -First-set optimization 7000 -XXX -.G2 - -.I -.ce -Maximum sizes of the prediction graphs when recognizing three C-programs -.R - -.LP -From this diagram we see that, although the prediction graphs -are smaller when the first-set optimization is used, the space savings are -not as spectacular as the time savings achieved by this optimization. - -.LP -In Modula-2 the first-set optimization also causes a decrease in memory -usage. The savings are less than in C, but still about 1.5 Kb. Again -this can be explained by the fact that the rules of the Modula-2 grammar -are shorter than that of C. - -.G1 -coord x 0, 17000 y 0, 12000 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "Maximum size of" "the prediction graph" "(bytes)" left .3 -draw no_opt dashed -draw first_opt dashed -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_opt at $1, $2 - next first_opt at $1, $3 -X until "XXX" - -823 5056 3292 -4290 6420 4664 -16530 11388 9632 -XXX - -copy thru X "$1 $2" size -2 at 8000, $3 X until "XXX" -No optimization 10000 -First-set optimization 4000 -XXX -.G2 - -.I -.ce -Maximum sizes of the prediction graphs when recognizing three Modula-2-programs -.R - -.NH 3 -Input that is recognized in quadratic time - -.LP -The measurements presented may suggest that the time required to -recognize input depends linearly on the length of the input; however, -this is not always the case. When there are recursive rules in the -grammar, the time needed to recognize input that is produced by this -rules can become proportional to the square of the input length. -Consider this set of grammar rules: -.br -.nf - - S: '{' A '}' - A: 'a' A | $epsilon$ - -.fi -.LP -When the input is `{aaa....', the algorithm will produce the following -prediction graphs: - -.PS -up; B1: box "END" "S"; arrow <- ;box "}";arrow <- ;box "A";arrow <- ;box "{"; -move right from B1.se; move -up; B2: box "END" "S"; arrow <-; box "}"; arrow <-; box "[A]"; -arrow <-; box "A"; arrow <-; box "a"; -move right from B2.se; move -up; B3: box "END" "S"; arrow <-; box "}"; arrow <-; box "[A]"; -arrow <-; box "[A]"; arrow <-; box "A"; arrow <-; box "a"; -move right from B3.se;move -up; B4: box "END" "S"; arrow <-; box "}"; arrow <-; box "[A]"; -arrow <-; box "[A]"; arrow <-; box "[A]"; arrow <- ; box "A"; arrow <-;box "a"; -.PE - -.LP -In each prediction phase, a new [A] appears on the prediction graph. However, -since A also produces empty, the prediction algorithm has to traverse all the -elements [A] until it finds the element `}'. In the first prediction phase, -there is one element [A], in the second there are two, etc, so in all -1 + 2 + 3 + ... + k = $k(k+1) over 2$ elements have to be traversed if -there are k prediction phases, making this proportional to the square -of the input length. We constructed a parser with this simple input grammar -and measured the processing time the error recovery mechanism used. -In the following diagram the dashed line shows the processing time needed; -the dotted line is the curve $t = 13 n sup 2$. Clearly the processing time -is proportional to the square of the number of tokens. - -.G1 -coord x 0, 2100 y 0, 60 -ticks bot out at 500, 1000, 1500, 2000 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw quad dashed - -copy thru X - times size +2 at $1, $2 - next quad at $1, $2 -X until "XXX" - -500 3.0 -1000 12.4 -1500 28.6 -2000 51.4 -XXX - -draw dotted -for i from 0 to 2100 by 25 do { next at i, 0.000013 * i * i } -.G2 - -.LP -In the grammar used for the C compiler, array initializations are handled by a recursive -rule, so we would expect that the error recovery mechanism needs quadratic -processing time to recognize such an initialization; we made measurements on -the processing time and indeed, the -processing time needed grows proportionally to the square of the size of the input, as the -next figure shows. Here, the processing times are about half of those in -the previous example; this is so because the recursion appears after two -tokens are recognized. Note that the algorithm only takes quadratic time -when it is recognizing input that is generated by a recursive grammar rule. -Other input is still recognized in linear time, regardless of the fact that -there are recursive grammar rules. - -.G1 -coord x 0, 5000 y 0, 85 -ticks bot out at 1150, 2400, 3600, 4800 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw quad dashed - -copy thru X - times size +2 at $1, $2 - next quad at $1, $2 -X until "XXX" - -1150 5.1 -2400 20.3 -3600 43.7 -4800 78.6 -XXX -.G2 - -.LP -Unfortunately, there is no easy way to speed up the recognition of these -recursively defined language elements; they are caused by the substituted -tokens that are left in the prediction graph, and we cannot just delete those -`dummies' from the graph during a prediction phase because the `join' part of the -prediction algorithm depends on them. One could traverse the graph after -a prediction phase to delete the dummies, but then the processing -time needed to recognize non-recursively defined language elements would -increase dramatically. However, we feel that in practice things -like large array initializations will not occur in hand-made programs; when -they occur, it is probably in computer-generated programs, which normally -will be correct anyway, meaning that the error recovery never sees them. -When testing such generated programs, one is likely -to use small test-cases, which are handled well by the error recovery. - -.NH 3 -Time measurements on the effect of leaving out the loop-deletion algorithm - -.LP -We now show what effect the loop-deletion algorithm has on processing time. -To put it another way: how much time can be saved when we turn off the -loop-deletion algorithm. In the diagram below we give the measurements of -the three C-programs; note that we do use the first-set optimization. - -.G1 -coord x 0, 17000 y 0, 22 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw no_loop dashed -draw loop dashed -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_loop at $1, $2 - next loop at $1, $3 -X until "XXX" - -742 .9 .4 -5010 5.8 6.8 -14308 16.8 20.5 -XXX - -copy thru X "$1 $2" size -2 at 11300, $3 X until "XXX" -With loop-deletion 20 -Without loop-deletion 9 -XXX -.G2 - -.I -.ce -Time measurements on processing three C-programs with and without the loop-deletion algorithm -.R - -The diagram shows that the loop-deletion algorithm -does not dramatically slow down the recognizing process. There is, however, -a measurable time loss of \(+-25%. As we will see later, the loop-deletion -algorithm will turn out to be extremely useful in efficient use of memory -when there are many loops in the graph. - -The effect of the loop-detecion algorithm on parsing Modula-2 programs -is even less than with C-programs; in fact there is no measurable -time loss: - -.G1 -coord x 0, 17000 y 0, 15 -ticks bot out at 750, 5000, 15000 -label bot "Number of tokens" -label left "User Time" "(sec)" left .3 -draw no_loop dashed -draw loop dashed -copy thru X - times size +2 at $1, $2 - times size +2 at $1, $3 - next no_loop at $1, $2 - next loop at $1, $3 -X until "XXX" - -823 .6 .6 -4290 3.5 3.8 -16530 14.3 14.3 -XXX - -copy thru X "$1 $2" size -2 at 11800, $3 X until "XXX" -With loop-deletion 13 -Without loop-deletion 7 -XXX -.G2 - -.I -.ce -Time measurements on processing three Modula-2-programs with and without a loop-deletion algorithm -.R - -There are at least two reasons for this; both result from the relative -simplicity of the Modula-2 grammar. The distance from a head to an -end of stack marker is shorter than in C, and secondly Modula-2 -causes fewer joins to occur than C, meaning that the loop marking algorithm -is run less often and when it is run it has fewer paths to search. - - -.NH 3 -Space measurements on the effect of leaving out the loop-deletion algorithm - -.LP -Clearly, to make any measurements on the space-usage effects of leaving out -the loop-deletion algorithm we need a program that causes the prediction -graph to contain loops; however, we have not been able to devise a C -or Modula-2 program that does this. In order to be able to make measurements, -we added an extra alternative to a rule of the C compiler grammar, making -it directly left-recursive. To make LLgen accept this new grammar, we -put a `%if' directive in the rule. - -.LP -We have input our standard C test program consisting of 800 tokens to -the error recovery routine for this `doctored' C compiler, -and compared the storage needed for the prediction graphs with the -loop deletion algorithm enabled with the storage needed when the -algorithm is disabled. With the loop-deletion algorithm enabled, the -maximum size of the prediction graph was 5576 bytes. When the loop -algorithm was disabled, the maximum size of the prediction graph -grew to 12676 bytes; furthermore, 12676 bytes of heap were allocated -for the prediction graph, but not deallocated again, because they were -in use by graph elements that were in inaccessible loops. The user-time -the program needed decreased only slightly, from 0.9 to 1.0 seconds. Given the -relatively small input program, this data suggests that when loops -are actually being made, the loop deletion algorithm is definitely -worth the extra overhead it costs, considering the space -that would otherwise be occupied by inaccessible loops. To verify this, -we input the C program consisting of 15000 tokens to the compiler; -execution time increased from 17.3 to 21.1 seconds after enabling -the loop deletion algorithm, while the maximum size of the prediction graph -shrunk from 328664 to 13664 bytes. With the loop-deletion algorithm -disabled, 326720 bytes allocated for the graph were not deallocated again. -Again, given the relatively small increase in execution time and the -large reduction of memory usage, we feel that the loop-deletion -algorithm is useful enough to justify the overhead it creates. - -.NH 2 -Problems encountered - -.LP -In this section we describe some of the problems we encountered -while testing the non-correcting error recovery. - -.NH 3 -The LLgen error reporting mechanism. - -.LP -The parsers generated by LLgen call a user-supplied error reporting -routine, usually called LLmessage. This routine is called with an -integer parameter that is positive, zero or negative. When the parameter -is positive the parser has just inserted a token, whose -number is equal to the parameter; if it is zero, the parser -has deleted a token whose number is in a global variable called LLsymb; if -it is negative, it means that LLgen expected end-of-file, but did not -find it. The routine LLmessage is supposed to print an error message, -and when a token is inserted, it should set all necessary attributes. - -.LP -However, when non-correcting error recovery is used, the situation becomes slightly -different; when the parser inserts a token, it is only to keep the -semantic actions consistent, and does no longer signify an error. -However, the LLmessage routine still has to be called because the -attributes of the inserted token need to be set. Therefore, when -non-correcting error recovery is used, the LLmessage routine should not -print an error message when the parameter is positive, or else it will -print highly confusing error messages indeed. Furthermore, the -LLmessage routine will usually print a message like `token ... deleted' when -it is called with parameter equal to zero; however, when the non-correcting -error recovery is used, it is more appropriate to report something -like `token ... illegal', as the non-correcting error recovery does -not delete tokens. Finally, when an unexpected end-of-file is encountered, -LLgen normally just inserts the missing tokens and calls -LLmessage with the parameter equal to the token number; -when non-correcting error recovery is used we need a way to -actually report we have encountered an unexpected end-of-file. The -way we achieved this is by calling LLgen with parameter 0 and the -global variable LLsymb set to EOFILE when this situation occurs; the -routine LLmessage should print something like `unexpected end of file' -when it is called with parameter 0 and LLsymb is EOFILE. To facilitate -switching between correcting and non-correcting error recovery, the -file Lpars.h contains a statement `#define LLNONCORR' if non-correcting -error recovery is used. - - -.NH 3 -Parsers being started in semantic actions - -.LP -LLgen allows the programmer to define more than one nonterminal as the -start symbol of the input grammar; it will generate a parsing routine -for each of the start symbols. However, the error recovery code -is generated only once; it is shared by all parsers. -The programmer is free to call any -of the generated parsers whenever he wants; for instance, in the C-compiler -a separate parser for expressions in #if and #elsif statements is used. Whenever -the lexical analyzer encounters such a statement, it calls the expression -parser. It is also possible to call a parser in a semantic action of -another parser; in the MODULA-2 compiler a separate parser for -definition modules is used. When the main parser encounters a -FROM defmod IMPORT statement a semantic -actions opens the definition module defmod and starts the parser for -definition modules. - -.LP -The fact that subparsers can be started just about anywhere causes -problems when non-correcting error recovery is used. -Suppose a parser calls another parser in a semantic action -to parse a separate input file. In the Modula-2 compiler, after -seeing the FROM defmod IMPORT statement a semantic action opens -defmod and parses it; now, if a syntax error occurred before the -FROM IMPORT statement, the non-correcting error recovery will not -execute the action that opens and parses the definition module, but -it will not report an error either, because the statement -FROM defmod IMPORT is part of the input language of the main parser. -However, suppose that during the parsing of a definition module -an error occurs; then, some semantic actions that would normally -be executed during parsing of the definition module will not have -taken place. When normal parsing is now resumed by the main parser, -after the non-correcting error recovery has finished with the -definition module, a lot of spurious semantic errors are likely to be -reported, because the semantic actions that would normally have been -executed during the definition module parsing have not been executed -by the error recovery. Therefore, it is desirable that the main parser -does not resume normal parsing, but instead continues with the non-correcting -error recovery as well. Any syntactic errors in the main program will -still be reported, but no spurious semantic errors will be reported -that way. - -.LP -When the lexical analyzer calls other parsers, as is the case in -the ACK C compiler, recursive invocations of the non-correcting error -recovery routine can occur. This will happen if a parser starts the -error recovery, the error recovery calls the lexical analyzer, which -starts another parser that finds a syntax error and calls the -error recovery again. This is not really a problem, but is has -consequences for the implementation of the error recovery routine. - -.LP -The worst case -occurs when two parsers are involved in parsing one input file, and -the secondary parser (e.g. an inline assembly parser) is called in a semantic -action of the main parser. Suppose now that the input text contains -a syntax error; after detecting this error, the parser starts the -non-correcting error recovery. This recovery does not execute any -semantic actions; therefore it will not start the subparser at those points -where the original LLgen generated parser would. As a result, parts -of the program that would be accepted by the subparser will now probably -be rejected as illegal, because the error recovery does not know it -should use another grammar to check these parts. This is a serious -problem, and we have devised and implemented two ways to solve it. - -.LP -The first solution is based on the assumption that whenever a semantic -action occurs in the grammar, another parser can be started at that -point. Obviously, we have no way of knowing which semantic actions start -a parser and which don't, so we assume the worst. -Now, assume that in the grammar there are k symbols defined as -start symbols, say $W sub 1 , W sub 2 , ..., W sub k$. Each of these symbols -will cause LLgen to generate a parser that can be called in any -of the semantic actions of the grammar. We now introduce a new -symbol $X$, and a new grammar rule $X -> W sub 1 X | W sub 2 X | ... | -W sub k X | -epsilon$. -In the grammar the error recovery algorithm uses, we insert this symbol -X at all positions where there are semantic actions in the original grammar, -so a rule $A -> alpha$ { action } $beta$ becomes $A -> alpha X beta$. As a -result, at each position in a grammar rule where a semantic action -occurs, we now accept any input that would be accepted by any of the -parsers. Clearly, this solution is somewhat of a kludge, as it will -accept a lot of input that is not accepted by the original parser. -However, it is guaranteed to never give spurious error messages, because -whenever a parser would be started by the original parser, there now -is an $X$ in the grammar that produces all the strings that would be -accepted by that parser. We have implemented this solution, and found -it to be extremely slow, which of course was to be expected given the -number of semantic actions in the average grammar. Furthermore, -because each time a semantic action occurs in the grammar -a string accepted by any of the generated parsers is accepted, including -strings recognized by the currently running parser, error messages -become hard to interpret. As an example, consider the following -C program: -.br -.nf - - - main() - { - int i, j; - - while (i < j - j++; - - i = 1; - j = 2; - - } - - -.fi -.LP -Clearly, there is a `)' missing in the while-statement; -however, if this program is input to the error recovery it will complain -"} illegal", since after recognizing the -expression controlling the while the original parser starts a -semantic action, so the non-correcting recovery will accept a valid -C program at that point; after recognizing the three statements -following the while-statement as a separate program the -recognizer expects the missing `)', but gets `}' instead. - -.LP -Our second solution is based on the observation that if we knew -which semantic actions can start other parsers, we would only -have to introduce the new symbol $X$ at those places where parsers -can get started. We have therefore extended LLgen with a new directive -%substart, which is used to indicate to the parser generator that -another parser may be started. The %substart is followed by the -startsymbols that will produce the parsers that can be called, -so %substart A, B, C; indicates that in the semantic action -following the directive the parsers produced by startsymbols -A, B, en C can be started. In the grammar used by the error -recovery, a new symbol $X$ will be introduced at this point, -along with a new rule $X -> AX | BX | CX | epsilon$. Of course, this -solution can still accept input that would not have been accepted -by original parser, for instance if a parser is started -conditionally, based on other semantic information. However, it -is a big improvement over the first solution, both in performance -and the input it accepts. - -.NH 3 -Syntactic errors being handled in semantic actions - -.LP -A programmer may decide to handle certain syntactic errors -in semantic actions, for instance because he is not satisfied with -the standard error recovery. However, since the non-correcting error -recovery does not execute semantic actions, this may cause errors -to remain undetected. We encountered the following example in the ACK -Modula-2 compiler, in the grammar rule for assignment statement: -.br -.nf - - - Assignment_statement: lvalue - [ - '=' - { - error(":= expected"); - } - - | - - ':=' - ] - expression - ; - -.fi -.LP -This works well in the original LLgen; however, statements like -`j=9' are not treated as syntactic, but as semantic errors. -The original LLgen generated parser -will print the (semantic) error message, but the non-correcting recovery -will not execute the semantic action and therefore the erroneous -input will be accepted. - -.LP -To facilitate the incorporation of non-correcting error recovery in parsers -that use this kind of `trick', we extended LLgen with the %erroneous -directive. The directive indicates to the non-correcting recovery -mechanism that the token following it is not really part of the grammar. -When recognizing input, the error recovery will ignore tokens in the -grammar that have %erroneous in front of them. If in the example above, -the '=' is replaced with %erroneous '=', the non-correcting mechanism will -report an error when it sees a statement like 'j = 9'. See appendix B -for details about the implementation of the %erroneous directive. - -.LP -Another example is in the ACK C compiler. For some reason, the -grammar accepts function definitions without `()', so according -to the syntax a function definition can look like: -.br -.nf - - int func - { - .... - } -.fi - -.LP -The absence of the `()', however, causes `func' to be entered in the -symbol table as non-function, and when the parser encounters the body -a semantic action will complain with the error message "Making function body -for non-function". This again will cause the non-correcting error -recovery to miss errors. Consider this piece of code: -.br -.nf - -int i int j = 1; -{} - -.fi - -.LP -where apparently there's a `;' missing between the declarations -of i and j. The original LLgen-generated parser only gives semantic errors: -.br -.nf -"Making function body for non-function" -"j is not in parameter list" -"Illegal initialization of formal parameter, ignored" -.fi -.LP -As a result, the non-correcting error recovery will not report -any errors in this piece of code, because it does not execute the -semantic actions that recognize and report the error. Unfortunately, -due to the way the C-grammar is written, it is not possible to solve -this problem using a %erroneous directive; the part of the grammar -that deals with declaratons would have to be rewritten so as to -syntactically reject functions without `()'. - -.NH 3 -Semantic actions that read input - -.LP -There are no restrictions on what a semantic action can do; -there is nothing to stop the programmer from writing a parser in such -a way that some of the input to the parser is processed by semantic -actions. Obviously, because the non-correcting error recovery does not -execute semantic actions, this kind of parser will not work at all -with the new error recovery. Ironically, LLgen itself is written in -such a fashion; {}-enclosed C-code in its input is processed by -a semantic action in the LLgen grammar. We feel that it is bad -practice to write parsers this way; the `eating' of parts of -the input should be done in the lexical analyzer, not in the parser. -After all, in the case of LLgen, one can regard a semantic action -in the input as one token, and thus it should be handled by -the lexical analyzer as such. - -.NH 2 -Examples of error recovery - -.LP -We will now give some examples that compare non-correcting error -recovery with the correcting error recovery used by parsers generated -by `standard' LLgen. - -Consider the next C program, where there is a `)' missing in the -header of function `test'. -.br -.nf - - 1 int test(a,b - 2 - 3 int a,b; - 4 - 5 { - 6 if (a < b) - 7 return(1); - 8 else - 9 return(0); - 10 } -.fi - -.LP -This small error derails the `standard' parser; it produces the -following error messages, where we have left out 7 messages reporting -semantic errors: -.br -.nf - - line 3: , missing before type_identifier - line 3: , missing before identifier - line 3: ) missing before ; - line 5: { deleted - line 6: if deleted - line 6: < deleted - line 6: ) missing before identifier - line 6: ) deleted - line 7: identifier missing before return - line 7: ; missing before return - line 7: { missing before return - line 8: else deleted - -.fi -.LP -In contrast, the parser using non-correcting error recovery produces -only one error message: -.br - - line 3: type_identifier illegal - -This error message correctly pin-points the error: there should -have been a `)' at the position where type-identifier `int' is. - -.LP -Now, an example with Modula-2; consider this program: -.br -.nf - - 1 MODULE test; - 2 - 3 TYPES - 4 ElementRecordType = RECORD - 5 Element: ElementType; - 6 Next, - 7 Prior: ElementPointerType; - 8 END; - 9 - 10 VARS a,b,c: ElementRecordType; - 11 - 12 - 13 BEGIN - 14 - 15 a := b; - 16 - 17 END test. - -.fi -.LP -There are two syntactic errors in this program; on line 3, TYPES should be TYPE, and -on line 10, VARS should be VAR. We have left out the type declarations of -ElementType and ElementPointerType; clearly this will generate semantic -errors, but we are only interested in syntactic errors anyway. -The correcting error recovery parser -again derails on this program; it produces the following syntactic error messages: -.br -.nf - - line 3: CONST missing before identifier - line 4: '=' missing before identifier - line 4: RECORD deleted - line 5: ':' deleted - line 5: ';' missing before identifier - line 5: '=' missing before ';' - line 5: number missing before ';' - line 6: ',' deleted - line 7: '=' missing before identifier - line 7: ':' deleted - line 7: ';' missing before identifier - line 7: '=' missing before ';' - line 7: number missing before ';' - line 8: ';' deleted - line 10: identifier deleted - line 10: ',' deleted - line 10: identifier deleted - line 10: ',' deleted - line 10: identifier deleted - line 10: ':' deleted - line 10: identifier deleted - line 10: ';' deleted - line 13: BEGIN deleted - line 15: identifier deleted - line 15: := deleted - line 15: identifier deleted - line 15: ';' deleted - line 17: END deleted - line 17: identifier deleted - -.fi -.LP -The error correction mechanism clearly makes the wrong guess by inserting -CONST on line 3; as a result, all that follows is rejected as incorrect. -In contrast, the non-correcting error recovery mechanism only produces -two error messages: -.br -.nf - - line 3: identifier illegal - line 10: identifier illegal - -.fi -.LP -This again exactly pin-points the errors: the identifiers TYPES and -VARS constitute the only errors in the program. Note that the -presence of more than one error does not cause any problems to the -non-correcting recovery mechanism. - -.bp -.nr PS 12 -.nr VS 14 - -.NH -Conclusion - -.nr PS 10 -.nr VS 12 - -.LP -After implementing and testing a non-correcting error recovery mechanism -we have come to the conclusion that it indeed is superior to correcting -mechanisms in what regards the error messages it produces; -the examples we have given clearly show this. However, there is a -clear loss of performance when errors are present in a program, -although we have found this performance -degradation to be acceptable. We feel that the benefits of -better error messages outweigh the loss of performance. In any case, -correct programs do not suffer at all from the incorporation -of a non-correcting recovery mechanism. -The error recovery mechanism we implemented does not make -unreasonable demands on resources; the size of the prediction -graphs stays within reasonable limits. - -.LP -The main problems we encountered had to do with recognizing -`languages within languages', and semantic actions that did -unreasonable things like eating input. The more `well-behaved' a -parser is, the better the results the non-correcting error recovery -mechanism gives. This is also true for the input grammars: with a -language like Modula-2, whose syntax has been designed with parser -generators in mind, the performance of the non-correcting mechanism -is better than with C, whose syntax is extremely hard, if not -impossible to describe with a LL(1) grammar. - -.bp -.nr PS 12 -.nr VS 14 - -.NH -Bibliography - -.nr PS 10 -.nr VS 12 - -.IP [CORMACK] 12 -Gordon V. Cormack, `An LR substring parser for noncorrecting syntax error -recovery', ACM SIGPLAN Notices, vol. 24, no. 7, p. 161-169, July 1989 - -.IP [GRUNE] 12 -Dick Grune, Ceriel J.H. Jacobs, `A programmer friendly LL(1) parser -generator', Softw. Pract. Exper., vol. 18, no. 1, p. 29-38, Jan 1988 - -.IP [RICHTER] 12 -Helmut Richter, `Noncorrecting syntax error recovery', ACM Trans. Prog. Lang. -Sys., vol.7, no.3, p. 478-489, July 1985 - -.IP [ROEHRICH] 12 -Johannes R\*:ohrich, `Methods for the automatic construction of error -correcting parsers', Acta Inform., vol. 13, no. 2, p. 115-139, Feb 1980 - -.IP [TOMITA] 12 -Masaru Tomita, Efficient parsing for natural language, Kluwer Academic -Publishers, Boston, p.210, 1986 -.bp -.SH -Appendix A: Implementation Issues - -.nr PS 10 -.nr VS 12 -.RS -.LP -In this appendix we will describe some implementation issues; -the data structure used to store the grammar during non-correcting -error recovery, postponing deletions of graph elements until after -the prediction phase, and the implementation of the %substart directive . -.RE - -.SH -A.1 The grammar data structure - -.LP -The grammar data structure used by the non-correcting error recovery technique has -to meet two conditions: easy access to a rule as a whole to make -substituting nonterminals efficient and easy access to each symbol in the RHS -of a rule to make starting error recovery and finding continuations -efficient. To fulfill these conditions we decided to construct the -storage of the grammar as follows. - -.LP -A rule in the grammar is divided in two -parts: a LHS and a RHS. The LHS is represented by a struct `lhs' and -for each symbol in the RHS a struct 'symbol' is constructed. -A struct `lhs' contains the number of the -nonterminal forming the LHS of the rule, a pointer to the RHS, the -first- and follow-sets of the nonterminal and a flag 'empty' which -indicates whether the nonterminal produces empty or not. A struct -`symbol' contains a field indicating the type of the symbol, i.e. -a terminal or a nonterminal, the number of the symbol, a `link' pointer -to a struct `symbol' that represents the same symbol, a `next' pointer -to the rest of the RHS and a pointer back to the LHS. - -.LP -A special struct `symbol' is added to the end of the RHS to indicate -the end of a rule. The type of this struct is LLEORULE, the number -is set to -1 and the pointers 'link' and `next' are nil. - -.LP -In case that there is more than one RHS for a LHS, all the RHS's -are put after each other and separated by another special struct -`symbol'. The type of this struct is LLALT, the number is set to --1 and the 'link' pointer is nil. After the last RHS a `LLEORULE'-struct -marker is added. - -.LP -Finally, to make searching efficient there are two arrays: `terminals' -and `nonterminals'. `terminals' is indexed by the number of a terminal -and contains for each terminal a struct containing a 'link' pointer -to a symbol, representing this terminal, in the RHS of a rule. Because -this symbol has again a 'link' pointer to another symbol representing -the terminal, it is possible by following this chain of pointers -to find all rules containing such a terminal. In a similar way `nonterminals' -is indexed by the number of a nonterminal and contains for each -nonterminal a struct. This struct not only contains a 'link' pointer -linking all rules with this nonterminal, but also contains a 'rule' -pointer. This pointer points to the RHS or RHS's of the rules of which -the nonterminal forms the LHS. - -.LP -As an example, consider the following grammar: - -.br -A: a B -.br -B: a | $epsilon$ -.br - -This will result in the picture below. Note that `pointer' fields -without an arrow indicate nil pointers. - -.PS -dx = 0.05 - -down -A_a: box ht boxht/2 "link" -box invis "a" ljust with .e at A_a.w - -move to A_a.s -move -move - -A: box "link" "rule" -B: box "link" "rule" -line dashed from A.w to A.e -line dashed from B.w to B.e -box invis "A" ljust with .e at A.w -box invis "B" ljust with .e at B.w - -move to A.ne -right -move -move -down - -LHS_A: box wid 1.2 * boxwid ht 2.5 * boxht "`A'" "rhs" "first" "follow" "empty 0" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to LHS_A.ne + (1,0) - -RHS_a1: box wid 2.0 * boxwid ht 2.5 * boxht "LLTERM" "`a'" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to RHS_a1.ne + (1,0) - -RHS_B: box wid 2.0 * boxwid ht 2.5 * boxht "LLNONTERM" "`B'" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to RHS_B.ne + (1,0) - -RHS_END1: box wid 2.0 * boxwid ht 2.5 *boxht "LLEORULE" "-1" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - - -move to LHS_A.s - (0,1) - -LHS_B: box wid 1.2 * boxwid ht 2.5 * boxht "`B'" "rhs" "first" "follow" "empty 1" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to LHS_B.ne + (1,0) - -RHS_a2: box wid 2.0 * boxwid ht 2.5 * boxht "LLTERM" "`a'" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to RHS_a2.ne + (1,0) - -RHS_ALT: box wid 2.0 * boxwid ht 2.5 * boxht "LLALT" "-1" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -move to RHS_ALT.ne + (1,0) - -RHS_END2: box wid 2.0 * boxwid ht 2.5 *boxht "LLEORULE" "-1" "link" "next" "lhs" -line dashed from 0.2 to 0.2 -line dashed from 0.4 to 0.4 -line dashed from 0.6 to 0.6 -line dashed from 0.8 to 0.8 - -# Next pointers upper row -.ps 30 -circle radius .01 at 0.75 - (dx, 0) -circle radius .01 at 0.3 - (dx, 0) -circle radius .01 at 0.7 - (dx, 0) -circle radius .01 at 0.7 - (dx, 0) -.ps 10 - -arrow from 0.75 - (dx, 0) to 0.3 -arrow from 0.3 - (dx, 0) to 0.3 -arrow from 0.7 - (dx, 0) to 0.7 -arrow from 0.7 - (dx, 0) to 0.7 - - -# Next pointers lower row -.ps 30 -circle radius .01 at 0.75 - (dx, 0) -circle radius .01 at 0.3 - (dx, 0) -circle radius .01 at 0.7 - (dx, 0) -circle radius .01 at 0.7 - (dx, 0) -.ps 10 - -arrow from 0.75 - (dx, 0) to 0.3 -arrow from 0.3 - (dx, 0) to 0.3 -arrow from 0.7 - (dx, 0) to 0.7 -arrow from 0.7 - (dx, 0) to 0.7 - - -# Link pointers -.ps 30 -circle radius .01 at 0.5 - (2*dx, 0) -circle radius .01 at 0.5 - (dx, 0) -circle radius .01 at 0.25 - (dx, 0) -.ps 10 - -arrow dashed from 0.5 - (2*dx, 0) to RHS_a2.ne - (2*dx,0) -line dashed from 0.5 - (dx, 0) right 4.0 * boxwid then to RHS_a1.ne - (2*dx, 0) -> -line dashed from 0.25 - (dx, 0) right then up .75 then right 7.0 * boxwid then to RHS_B.ne - (2*dx, 0) -> - - -# LHS pointers upper row -.ps 30 -circle radius .01 at 0.9 - (3*dx, 0) -circle radius .01 at 0.9 - (3*dx, 0) -circle radius .01 at 0.9 - (3*dx, 0) -.ps 10 - -line from 0.9 - (3*dx, 0) down -> -line from 0.9 - (3*dx, 0) down -> -line from 0.9 - (3*dx, 0) down then left 8.0 * boxwid then to LHS_A.se -> - - -# LHS pointers lower row -.ps 30 -circle radius .01 at 0.9 - (3*dx, 0) -circle radius .01 at 0.9 - (3*dx, 0) -circle radius .01 at 0.9 - (3*dx, 0) -.ps 10 - -line from 0.9 - (3*dx, 0) down -> -line from 0.9 - (3*dx, 0) down -> -line from 0.9 - (3*dx, 0) down then left 8.0 * boxwid then to LHS_B.se -> - - -# Text above structs -box invis ht boxht/2 "terminals" with .s at A_a.n -box invis ht boxht/2 "nonterminals" with .s at A.n -box invis ht boxht/2 "lhs" with .s at LHS_A.n -box invis ht boxht/2 "lhs" with .s at LHS_B.n -box invis ht boxht/2 "symbol" with .s at RHS_a1.n -box invis ht boxht/2 "symbol" with .s at RHS_B.n -box invis ht boxht/2 "symbol" with .s at RHS_END1.n -box invis ht boxht/2 "symbol" with .s at RHS_a2.n -box invis ht boxht/2 "symbol" with .s at RHS_ALT.n -box invis ht boxht/2 "symbol" with .s at RHS_END2.n -.PE - -.LP -Note that the empty alternative for `B' is represented in the -data structure by the `LLEORULE-struct' immediately following -the `LLALT'-struct. When there are still other alternatives -the `LLEORULE'-struct is replaced by a `LLALT'-struct followed -by the other alternatives and a `LLEORULE'-struct. -Finally, when the empty rule is the only rule for a -nonterminal the RHS will consist only of a `LLEORULE'-struct. - -.SH -A.2 Delayed deletes - -.LP -We encountered a problem with deleting elements during the -prediction phase. Imagine that we have a nonterminal `B' on top of -the graph, and `B' has two alternatives. Now suppose that we -apply the first alternative and we find out that this alternative leads -to a `dead end', i.e. a head that does not match the input symbol, so we want -to get rid of it. When we delete it immediately the deletion algorithm -will also deallocate `[B]' and possibly some elements below `[B]'. -However, there was another alternative for `[B]' which was not yet -developed and maybe this alternative leads to a head which is legal. -But `[B]' has already been deleted and thus cannot be used anymore. A similar -situation can occur when we want to delete a joined element; -the substitution of a nonterminal -that only produces empty and thus has no element above it in the graph -can also lead to such a situation. We therefore decided to put `dead ends' -on a list, `cleanup_arr[]', and after the prediction phase has -finished we delete all elements on this list, and all their descendants -that become unreachable of course. - -.SH -A.3 Clearing flags - -.LP -We implemented two different ways to clear the flags set by the prediction -phase of the algorithm; the first recursively tracks down the whole graph -following the flags, the second puts all elements visited by -the prediction phase -on a list; after the prediction phase has finished the algorithm walks -through this list clearing the flags of all elements on it. We took measurements -on both algorithms and found out that with small programs the times -did not differ much but large programs were processed faster by the -second algorithm. Therefore we decided to use the second algorithm. - -.LP -To speed up the algorithm even more, we do not deallocate the list -after a prediction phase has finished. We just set the number of -elements on the list to 0. This saves considerably on the number -of `Malloc'-calls. - -.SH -A.4 Implementation of %erroneous directive - -.LP -As explained in chapter 3, the user can put a %erroneous directive -in front of a terminal, making the non-correcting error recovery -mechanism ignore that terminal. However, implementing this directive -was not entirely straightforward; consider, for example, the rule -.br -.nf - - A: 'a' | %erroneous 'b' | 'c'; - -.fi -.LP -Just leaving out terminal 'b' will not do, because then nonterminal -A produces empty all of a sudden, which it did not before. -The rule should become -.br -.nf - - A: 'a' | 'c'; - -.fi -but this is hard to implement in LLgen. We took a different approach: -we introduce a new terminal 'ERRONEOUS', and substitute it for all -terminals with an %erroneous directive in front of them. Thus, the -example rule becomes -.br -.nf - - A: 'a' | ERRONEOUS | 'c'; - -.fi -.LP -Since the terminal ERRONEOUS will never be in the input to the parser, -this has exactly the desired effect; when a predicting phase produces -ERRONEOUS as head of a prediction graph this head will never match the -input. In particular, it will not match the terminal that was -originally there (in this case 'b') so that terminal is no longer -regarded as part of the input language at that point. -.bp -.SH -Appendix B: Using the non-correcting error recovery - -.LP -To use the new non-correcting error recovery mechanism, LLgen has to -be called with the new flag -n. LLgen will then create an extra file -called `Lncor.c' which contains the code for the non-correcting recovery -mechanism. This file has to be compiled and linked with the rest -of the program, just like the file `Lpars.c'. - -.LP -The user-supplied error reporting routine `LLmessage' will have to be -modified slightly; when it is called with a positive parameter, it -should only set the attributes of the inserted token, but not report an -error. Note that the lexical analyzer still must return the same token -as it did the last time it was called. When LLmessage is called with -parameter 0, it should report that the token in global variable LLsymb -is illegal; if the value of LLsymb is `EOFILE', the routine should -report an unexpected End-of-file. When LLmessage is called with parameter --1, it should report that end-of-file was expected. To facilitate -switching between correcting and non-correcting error recovery, -the file Lpars.h contains a statement `#define LLNONCORR' -which indicates that the non-correcting -mechanism is enabled. -Here is a -skeleton for the modified LLmessage routine: -.nr PS 8 -.nr VS 10 -.LP -.br -.nf - - #include "Lpars.h" - extern int LLsymb; - - LLmessage(flag) - int flag; - { - if (flag < 0) - { - /* Error message "end-of-file expected" */; - } - else if (flag) - { - /* flag equals the number of the inserted token */ -#ifndef LLNONCORR - - /* Error message "token inserted" */; -#endif - - /* Code to set attributes for inserted token */ - /* Code to make lexical analyzer return same token as before */ - - else - { - /* The number of the illegal or deleted token is in LLsymb */ -#ifndef LLNONCORR - - /* Error message "token deleted" */; -#else - - if (LLsymb == EOFILE) - { - /* Error message "unexpected end of file" */ - } - else - { - /* Error message "token illegal" */; - } -#endif - - } - - } - -.fi -.nr PS 10 -.nr VS 12 - -.LP -For best results, one should check if the parser calls other parsers -in semantic actions; if this is the case, and the called parser -processes the same input file as the calling parser, then a %substart -should be put in front of the semantic action that starts a parser. -If a semantic action calls parsers defined by startsymbols say -A and B, then `%substart A, B;' should be put in front of the action. -As an alternative, one can use the -s flag of LLgen; this has the -same effect as putting `%substart X, Y, ....;' in front of all -semantic actions, where X, Y, .... are the startsymbols of the grammar. -Clearly, it is preferable to analyze the grammar and put %substart -directives only where appropriate. - -Finally, beware of syntactic errors being handled in semantic -actions; eg, one could have a rule like -.nr PS 8 -.nr VS 10 -.LP -.br -.nf - - Assignment_statement: lvalue - [ - '=' - { - error(":= expected"); - } - - | - - ':=' - ] - expression - ; -.fi - -.nr PS 10 -.nr VS 12 -.LP -To ensure that the non-correcting mechanism will recognize the -`=' as a syntactic error, a `%erroneous' directive should be -put in front of it. diff --git a/doc/LLgen/Makefile b/doc/LLgen/Makefile deleted file mode 100644 index 058e6f111..000000000 --- a/doc/LLgen/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# $Id$ - -GRAP=grap -PIC=pic -EQN=eqn -REFER=refer -TBL=tbl - -all: ../LLgen.doc ../LLgen_NCER.doc - -../LLgen.doc: LLgen.n LLgen.refs - $(REFER) -sA+T -p LLgen.refs LLgen.n | $(EQN) | $(TBL) > $@ - -../LLgen_NCER.doc: LLgen_NCER.n - $(GRAP) LLgen_NCER.n | pic | eqn > $@ diff --git a/doc/LLgen/proto.make b/doc/LLgen/proto.make deleted file mode 100644 index 4eede6b8d..000000000 --- a/doc/LLgen/proto.make +++ /dev/null @@ -1,20 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/LLgen - -GRAP=grap -PIC=pic -EQN=eqn -REFER=refer -TBL=tbl - -all: $(TARGET_HOME)/doc/LLgen.doc $(TARGET_HOME)/doc/LLgen_NCER.doc - -$(TARGET_HOME)/doc/LLgen.doc: $(SRC_DIR)/LLgen.n $(SRC_DIR)/LLgen.refs - $(REFER) -sA+T -p $(SRC_DIR)/LLgen.refs $(SRC_DIR)/LLgen.n | $(EQN) | $(TBL) > $@ - -$(TARGET_HOME)/doc/LLgen_NCER.doc: $(SRC_DIR)/LLgen_NCER.n - $(GRAP) $(SRC_DIR)/LLgen_NCER.n | pic | eqn > $@ - diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index c63ad8c6d..000000000 --- a/doc/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -# $Id$ - -# This Makefile is not supposed to be used in the doc source directory. -# Instead, it is supposed to be copied to the target doc directory. - -SUF=dit -PRINT=dis -NROFF=troff -MS=-ms -OPR=dip - -RESFILES= \ - toolkit.$(SUF) install.$(SUF) em.$(SUF) ack.$(SUF) v7bugs.$(SUF) \ - peep.$(SUF) cg.$(SUF) ncg.$(SUF) regadd.$(SUF) LLgen.$(SUF) \ - basic.$(SUF) crefman.$(SUF) pascal.$(SUF) pcref.$(SUF) val.$(SUF) \ - ansi_C.$(SUF) \ - 6500.$(SUF) i80.$(SUF) z80.$(SUF) top.$(SUF) ego.$(SUF) \ - m68020.$(SUF) occam.$(SUF) m2ref.$(SUF) ceg.$(SUF) nopt.$(SUF) \ - sparc.$(SUF) int.$(SUF) lint.$(SUF) - -.SUFFIXES: .doc .$(SUF) .lpr .out - -.doc.$(SUF): - $(NROFF) $(MS) $< > $@ - -# directly to the printer: -.doc.lpr: - $(NROFF) $(MS) $< | $(OPR) - -# to standard output -.doc.out: - @$(NROFF) $(MS) $< - -# Exceptions, to be run without -ms - -v7bugs.$(SUF): v7bugs.doc - $(NROFF) v7bugs.doc >$@ - -v7bugs.lpr: v7bugs.doc - $(NROFF) v7bugs.doc | $(OPR) - -v7bugs.out: v7bugs.doc - @$(NROFF) v7bugs.doc - -pcref.$(SUF): pcref.doc - $(NROFF) pcref.doc >$@ - -pcref.lpr: pcref.doc - $(NROFF) pcref.doc | $(OPR) - -pcref.out: pcref.doc - @$(NROFF) pcref.doc - -val.$(SUF): val.doc - $(NROFF) val.doc >$@ - -val.lpr: val.doc - $(NROFF) val.doc | $(OPR) - -val.out: val.doc - @$(NROFF) val.doc - -pr: - @make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) \ - $(RESFILES) >make.pr.out 2>&1 - @$(PRINT) $(RESFILES) - -# The 'opr' entry creates a lot of paper ... but the user must be able -# to write the doc directory. I hope that this limits the users of -# this entry to persons that know what they are doing. -opr: - @make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) $(RESFILES) - $(OPR) $(RESFILES) - -clean: - -rm -f $(RESFILES) - -# The distr entry is only used when making a distribution tree. -# It makes a version of the installation manual, suitable for a simple -# line printer. -distr: install.doc - tbl install.doc | nroff -Tlp $(MS) >install.pr diff --git a/doc/READ_ME b/doc/READ_ME deleted file mode 100644 index 1ca1295b6..000000000 --- a/doc/READ_ME +++ /dev/null @@ -1,8 +0,0 @@ -Some of these documents use a font called CW. -If this font is not available, reference to it can be changed with -a sed-script like - s/\.ft CW/.ft yourfont/ - s/\\f(CW/\\fyourfont/g - s/^.fp\(.*\)CW$/.fp\1yourfont/ -However, the font must be a constant-width font for the documents to look -reasonable. diff --git a/doc/ack.doc b/doc/ack.doc deleted file mode 100644 index 89637991f..000000000 --- a/doc/ack.doc +++ /dev/null @@ -1,444 +0,0 @@ -.\" $Id$ -.nr PD 1v -.tr ~ -.TL -Ack Description File -.br -Reference Manual -.AU -Ed Keizer -.AI -Vakgroep Informatica -Vrije Universiteit -Amsterdam -.NH -Introduction -.PP -The program \fIack\fP(I) internally maintains a table of -possible transformations and a table of string variables. -The transformation table contains one entry for each possible -transformation of a file. -Which transformations are used depends on the suffix of the -source file. -Each transformation table entry tells which input suffixes are -allowed and what suffix/name the output file has. -When the output file does not already satisfy the request of the -user (indicated with the flag \fB\-c.suffix\fP), the table is scanned -starting with the next transformation in the table for another -transformation that has as input suffix the output suffix of -the previous transformation. -A few special transformations are recognized, among them is the -combiner, which is -a program combining several files into one. -When no stop suffix was specified (flag \fB\-c.suffix\fP) \fIack\fP -stops after executing the combiner with as arguments the \- -possibly transformed \- input files and libraries. -\fIAck\fP will only perform the transformations in the order in -which they are presented in the table. -.LP -The string variables are used while creating the argument list -and program call name for -a particular transformation. -.NH -Which descriptions are used -.PP -\fIAck\fP always uses two description files: one to define the -front-end transformations and one for the machine dependent -back-end transformations. -Each description has a name. -First the way of determining -the name of the descriptions needed is described. -.PP -When the shell environment variable ACKFE is set \fIack\fP uses -that to determine the front-end table name, otherwise it uses -\fBfe\fP. -.PP -The way the backend table name is determined is more -convoluted. -.br -First, when the last filename in the program call name is not -one of \fIack\fP or the front-end call-names, -this filename is used as the backend description name. -Second, when the \fB\-m\fP is present the \fB\-m\fP is chopped of this -flag and the rest is used as the backend description name. -Third, when both failed the shell environment variable ACKM is -used. -Last, when also ACKM was not present the default backend is -used, determined by the definition of ACKM in h/local.h. -The presence and value of the definition of ACKM is -determined at compile time of \fIack\fP. -.PP -Now, we have the names, but that is only the first step. -\fIAck\fP stores a few descriptions at compile time. -This descriptions are simply files read in at compile time. -At the moment of writing this document, the descriptions -included are: pdp, fe, i86, m68k2, vax2 and int. -The name of a description is first searched for internally, -then in lib/descr/\fIname\fP, then in -lib/\fIname\fP/descr, and finally in the current -directory of the user. -.NH -Using the description file -.PP -Before starting on a narrative of the description file, -the introduction of a few terms is necessary. -All these terms are used to describe the scanning of zero -terminated strings, thereby producing another string or -sequence of strings. -.IP Backslashing 5 -.br -All characters preceded by \e are modified to prevent -recognition at further scanning. -This modification is undone before a string is passed to the -outside world as argument or message. -When reading the description files the -sequences \e\e, \e# and \e have a special meaning. -\e\e translates to a single \e, \e# translates to a single # -that is not -recognized as the start of comment, but can be used in -recognition and finally, \e translates to nothing at -all, thereby allowing continuation lines. -.nr PD 0 -.IP "Variable replacement" -.br -The scan recognizes the sequences {{, {NAME} and {NAME?text} -Where NAME can be any combination if characters excluding ? and -} and text may be anything excluding }. -(~\e} is allowed of course~) -The first sequence produces an unescaped single {. -The second produces the contents of the NAME, definitions are -done by \fIack\fP and in description files. -When the NAME is not defined an error message is produced on -the diagnostic output. -The last sequence produces the contents of NAME if it is -defined and text otherwise. -.PP -.IP "Expression replacement" -.br -Syntax: (\fIsuffix sequence\fP:\fIsuffix sequence\fP=\fItext\fP) -.br -Example: (.c.p.e:.e=tail_em) -.br -If the two suffix sequences have a common member \-~\&.e in this -case~\- the text is produced. -When no common member is present the empty string is produced. -Thus the example given is a constant expression. -Normally, one of the suffix sequences is produced by variable -replacement. -\fIAck\fP sets three variables while performing the diverse -transformations: HEAD, TAIL and RTS. -All three variables depend on the properties \fIrts\fP and -\fIneed\fP from the transformations used. -Whenever a transformation is used for the first time, -the text following the \fIneed\fP is appended to both the HEAD and -TAIL variable. -The value of the variable RTS is determined by the first -transformation used with a \fIrts\fP property. -.IP -Two runtime flags have effect on the value of one or more of -these variables. -The flag \fB\-.suffix\fP has the same effect on these three variables -as if a file with that \fBsuffix\fP was included in the argument list -and had to be translated. -The flag \fB\-r.suffix\fP only has that effect on the TAIL -variable. -The program call names \fIacc\fP and \fIcc\fP have the effect -of an automatic \fB\-.c\fP flag. -\fIApc\fP and \fIpc\fP have the effect of an automatic \fB\-.p\fP flag. -.IP "Line splitting" -.br -The string is transformed into a sequence of strings by replacing -the blank space by string separators (nulls). -.IP "IO replacement" -.br -The > in the string is replaced by the output file name. -The < in the string is replaced by the input file name. -When multiple input files are present the string is duplicated -for each input file name. -.nr PD 1v -.LP -Each description is a sequence of variable definitions followed -by a sequence of transformation definitions. -Variable definitions use a line each, transformations -definitions consist of a sequence of lines. -Empty lines are discarded, as are lines with nothing but -comment. -Comment is started by a # character, and continues to the end -of the line. -Three special two-characters sequences exist: \e#, \e\e and -\e. -Their effect is described under 'backslashing' above. -Each \- nonempty \- line starts with a keyword, possibly -preceded by blank space. -The keyword can be followed by a further specification. -The two are separated by blank space. -.PP -Variable definitions use the keyword \fIvar\fP and look like this: -.DS X - var NAME=text -.DE -The name can be any identifier, the text may contain any -character. -Blank space before the equal sign is not part of the NAME. -Blank space after the equal is considered as part of the text. -The text is scanned for variable replacement before it is -associated with the variable name. -.br -.sp 2 -The start of a transformation definition is indicated by the -keyword \fIname\fP. -The last line of such a definition contains the keyword -\fIend\fP. -The lines in between associate properties to a transformation -and may be presented in any order. -The identifier after the \fIname\fP keyword determines the name -of the transformation. -This name is used for debugging and by the \fB\-R\fP flag. -The keywords are used to specify which input suffices are -recognized by that transformation, -the program to run, the arguments to be handed to that program -and the name or suffix of the resulting output file. -Two keywords are used to indicate which run-time startoffs and -libraries are needed. -The possible keywords are: -.IP \fIfrom\fP -.br -followed by a sequence of suffices. -Each file with one of these suffices is allowed as input file. -Preprocessor transformations do not need the \fIfrom\fP -keyword. All other transformations do. -.nr PD 0 -.IP \fIto\fP -.br -followed by the suffix of the output file name or in the case of a -linker -the output file name. -.IP \fIprogram\fP -.br -followed by name of the load file of the program, a pathname most likely -starts with either a / or {EM}. -This keyword must be -present, the remainder of the line -is subject to backslashing and variable replacement. -.IP \fImapflag\fP -.br -The mapflags are used to grab flags given to \fIack\fP and -pass them on to a specific transformation. -This feature uses a few simple pattern matching and replacement -facilities. -Multiple occurrences of this keyword are allowed. -This text following the keyword is -subjected to backslashing. -The keyword is followed by a match expression and a variable -assignment separated by blank space. -As soon as both description files are read, \fIack\fP looks -at all transformations in these files to find a match for the -flags given to \fIack\fP. -The flags \fB\-m\fP, \fB\-o\fP, -\fB\-O\fP, \fB\-r\fP, \fB\-v\fP, \fB\-g\fP, \-\fB\-c\fP, \fB\-t\fP, -\fB\-k\fP, \fB\-R\fP and \-\fB\-.\fP are specific to \fIack\fP and -not handed down to any transformation. -The matching is performed in the order in which the entries -appear in the definition. -The scanning stops after first match is found. -When a match is found, the variable assignment is executed. -A * in the match expression matches any sequence of characters, -a * in the right hand part of the assignment is -replaced by the characters matched by -the * in the expression. -The right hand part is also subject to variable replacement. -The variable will probably be used in the program arguments. -The \fB\-l\fP flags are special, -the order in which they are presented to \fIack\fP must be -preserved. -The identifier LNAME is used in conjunction with the scanning of -\fB\-l\fP flags. -The value assigned to LNAME is used to replace the flag. -The example further on shows the use of all this. -.IP \fIargs\fP -.br -The keyword is followed by the program call arguments. -It is subject to backslashing, variable replacement, expression -replacement, line splitting and IO replacement. -The variables assigned to by \fImapflags\fP will probably be -used here. -The flags not recognized by \fIack\fP or any of the transformations -are passed to the linker and inserted before all other arguments. -.IP \fIstdin\fP -.br -This keyword indicates that the transformation reads from standard input. -.IP \fIstdout\fP -.br -This keyword indicates that the transformation writes on standard output. -.IP \fIoptimizer\fP -.br -The presence of this keyword indicates that this transformation is an optimizer. -It can be followed by a number, indicating the "level" of the -optimizer (see description of the -O option in the ack(1ACK) manual page). -.IP \fIpriority\fP -.br -This \-~optional~\- keyword is followed by a number. Positive priority means -that the transformation is likely to be used, negative priority means that -the transformation is unlikely to be used. -Priorities can also be set with a ack(1ACK) command line option. -Priorities come in handy when there are several implementations of a -certain transformation. They can then be used to select a default one. -.IP \fIlinker\fP -.br -This keyword indicates that this transformation is the linker. -.IP \fIcombiner\fP -.br -This keyword indicates that this transformation is a combiner. A combiner -is a program combining several files into one, but is not a linker. -An example of a combiner is the global optimizer. -.IP \fIprep\fP -.br -This \-~optional~\- keyword is followed an option indicating its relation -to the preprocessor. -The possible options are: -.DS X - always the input files must be preprocessed - cond the input files must be preprocessed when starting with # - is this transformation is the preprocessor -.DE -.IP \fIrts\fP -.br -This \-~optional~\- keyword indicates that the rest of the line must be -used to set the variable RTS, if it was not already set. -Thus the variable RTS is set by the first transformation -executed which such a property or as a result from \fIack\fP's program -call name (acc, cc, apc or pc) or by the \fB\-.suffix\fP flag. -.IP \fIneed\fP -.br -This \-~optional~\- keyword indicates that the rest of the line must be -concatenated to the HEAD and TAIL variables. -This is done once for every transformation used or indicated -by one of the program call names mentioned above or indicated -by the \fB\-.suffix\fP flag. -.br -.nr PD 1v -.NH -Conventions used in description files -.PP -\fIAck\fP reads two description files. -A few of the variables defined in the machine specific file -are used by the descriptions of the front-ends. -Other variables, set by \fIack\fP, are of use to all -transformations. -.PP -\fIAck\fP sets the variable EM to the home directory of the -Amsterdam Compiler Kit. -The variable SOURCE is set to the name of the argument that is currently -being massaged, this is useful for debugging. -The variable SUFFIX is set to the suffix of the argument that is -currently being massaged. -.br -The variable M indicates the -directory in lib/{M}/tail_..... and NAME is the string to -be defined by the preprocessor with \-D{NAME}. -The definitions of {w}, {s}, {l}, {d}, {f} and {p} indicate -EM_WSIZE, EM_SSIZE, EM_LSIZE, EM_DSIZE, EM_FSIZE and EM_PSIZE -respectively. -.br -The variable INCLUDES is used as the last argument to \fIcpp\fP. -It is used to add directories to -the list of directories containing #include files. -.PP -The variables HEAD, TAIL and RTS are set by \fIack\fP and used -to compose the arguments for the linker. -.NH -Example -.PP -Description for front-end -.DS X -.ta 4n 40n -name cpp # the C-preprocessor - # no from, it's governed by the P property - to .i # result files have suffix i - program {EM}/lib/cpp # pathname of loadfile - mapflag \-I* CPP_F={CPP_F?} \-I* # grab \-I.. \-U.. and - mapflag \-U* CPP_F={CPP_F?} \-U* # \-D.. to use as arguments - mapflag \-D* CPP_F={CPP_F?} \-D* # in the variable CPP_F - args {CPP_F?} {INCLUDES?} \-D{NAME} \-DEM_WSIZE={w} \-DEM_PSIZE={p} \e - \-DEM_SSIZE={s} \-DEM_LSIZE={l} \-DEM_FSIZE={f} \-DEM_DSIZE={d} < - # The arguments are: first the \-[IUD]... - # then the include dir's for this machine - # then the NAME and size values finally - # followed by the input file name - stdout # Output on stdout - prep is # Is preprocessor -end -name cem # the C-compiler proper - from .c # used for files with suffix .c - to .k # produces compact code files - program {EM}/lib/em_cem # pathname of loadfile - mapflag \-p CEM_F={CEM_F?} \-Xp # pass \-p as \-Xp to cem - mapflag \-L CEM_F={CEM_F?} \-l # pass \-L as \-l to cem - args \-Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?} - # the arguments are the object sizes in - # the \-V... flag and possibly \-l and \-Xp - stdin # input from stdin - stdout # output on stdout - prep always # use cpp - rts .c # use the C run-time system - need .c # use the C libraries -end -name decode # make human readable files from compact code - from .k.m # accept files with suffix .k or .m - to .e # produce .e files - program {EM}/lib/em_decode # pathname of loadfile - args < # the input file name is the only argument - stdout # the output comes on stdout -end -.DE - -.DS X -.ta 4n 40n -Example of a backend, in this case the EM assembler/loader. - -var w=2 # wordsize 2 -var p=2 # pointersize 2 -var s=2 # short size 2 -var l=4 # long size 4 -var f=4 # float size 4 -var d=8 # double size 8 -var M=em22 -var NAME=em22 # for cpp (NAME=em22 results in #define em22 1) -var LIB=lib/{M}/tail_ # part of file name for libraries -var RT=lib/{M}/head_ # part of file name for run-time startoff -var SIZE_FLAG=\-sm # default internal table size flag -var INCLUDES=\-I{EM}/include # use {EM}/include for #include files -name asld # Assembler/loader - from .k.m.a # accepts compact code and archives - to e.out # output file name - program {EM}/lib/em_ass # load file pathname - mapflag \-l* LNAME={EM}/{LIB}* # e.g. \-ly becomes - # {EM}/mach/int/lib/tail_y - mapflag \-+* ASS_F={ASS_F?} \-+* # recognize \-+ and \-\- - mapflag \-\-* ASS_F={ASS_F?} \-\-* - mapflag \-s* SIZE_FLAG=\-s* # overwrite old value of SIZE_FLAG - args {SIZE_FLAG} \e - ({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) \-o > < \e - (.p:{TAIL}={EM}/{LIB}pc) \e - (.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \e - (.c.p:{TAIL}={EM}/{LIB}mon) - # \-s[sml] must be first argument - # the next line contains the choice for head_cc or head_pc - # and the specification of in- and output. - # the last three args lines choose libraries - linker -end -.DE - -The command \fIack \-mem22 \-v \-v \-I../h \-L \-ly prog.c\fP -would result in the following -calls (with exec(II)): -.DS X -.ta 4n -1) /lib/cpp \-I../h \-I/usr/em/include \-Dem22 \-DEM_WSIZE=2 \-DEM_PSIZE=2 \e - \-DEM_SSIZE=2 \-DEM_LSIZE=4 \-DEM_FSIZE=4 \-DEM_DSIZE=8 prog.c -2) /usr/em/lib/em_cem \-Vw2i2p2f4s2l4d8 \-l -3) /usr/em/lib/em_ass \-sm /usr/em/lib/em22/head_cc \-o e.out prog.k - /usr/em/lib/em22/tail_y /usr/em/lib/em22/tail_cc.1s - /usr/em/lib/em22/tail_cc.2g /usr/em/lib/em22/tail_mon -.DE diff --git a/doc/ansi_C.doc b/doc/ansi_C.doc deleted file mode 100755 index df9d0c2a7..000000000 --- a/doc/ansi_C.doc +++ /dev/null @@ -1,365 +0,0 @@ -.de NS -.sp -.in 0 -\\fBANS \\$1:\\fP -.. -.TL -Amsterdam Compiler Kit-ANSI C compiler compliance statements -.AU -Hans van Eck -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.PP -This document specifies the implementation-defined behaviour of the ANSI-C -front end of the Amsterdam Compiler Kit as required by ANS X3.159-1989. Since -the implementation-defined behaviour sometimes depends on the machine -compiling on or for, some items will be left unspecified in this -document\(dg. -.FS -\(dg when cross-compiling, run-time behaviour may be different from -compile-time behaviour -.FE -The compiler assumes that it runs on a UNIX system. -.NS A.6.3.1 -.IP - -Diagnostics are placed on the standard error output. They have the -following specification: -.br -"", line : [()] -.br -There are three classes of diagnostics: "error", "strict" and "warning". -When the class is "error", the is absent. -.br -The class "strict" is used for violations of the standard which are -not severe enough to stop compilation. An example is the the occurrence -of non white-space after an '#else' or '#endif' pre-processing -directive. The class "warning" is used for legal but dubious -constructions. An example is overflow of constant expressions. -.NS A.6.3.2 -.IP - -The function 'main' can have two arguments. The first argument is an -integer specifying the number of arguments on the command line. The second -argument is a pointer to an array of pointers to the arguments (as -strings). -.IP - -Interactive devices are terminals. -.NS A.6.3.3 -.IP - -The number of significant characters is an option. By default it is 64. -There is a distinction between upper and lower case. -.NS A.6.3.4 -.IP - -The compiler assumes ASCII-characters in both the source and execution -character set. -.IP - -There are no multi-byte characters. -.IP - -There 8 bits in a character. -.IP - -Character constants with values that can not be represented in 8 bits -are truncated. -.IP - -Character constants that are more than 1 character wide will have the -first character specified in the least significant byte. -.IP - -The only supported locale is "C". -.IP - -A plain 'char' has the same range of values as 'signed char'. -.NS A.6.3.5 -.IP - -The compiler assumes that it works on and compiles for a -2-complement binary-number system. Shorts will use 2 bytes and longs -will use 4 bytes. The size of integers are machine dependent. -.IP - -Converting an integer to a shorter signed integer is implemented by -ignoring the high-order byte(s) of the former. -Converting a unsigned integer to a signed integer of the same type is -only done in administration. This means that the bit-pattern remains -unchanged. -.IP - -The result of bitwise operations on signed integers are what can be -expected on a 2-complement machine. -.IP - -If either operand is negative, whether the result of the / operator is the -largest integer less than or equal to the algebraic quotient or the -smallest integer greater than or equal to the algebraic quotient is machine -dependent, as is the sign of the result of the % operator. -.IP - -The right-shift of a negative value is negative. -.NS A.6.3.6 -.IP - -The representation of floating-point values is machine-dependent. -When native floating-point is not present an IEEE-emulation is used. -The compiler uses high-precision floating-point for constant folding. -.IP - -Truncation is always to the nearest floating-point number that can -be represented. -.NS A.6.3.7 -.IP - -The type returned by the sizeof-operator (also known as size_t) -is 'unsigned int'. This is done for backward compatibility reasons. -.IP - -Casting an integer to a pointer or vice versa has no effect in -bit-pattern when the sizes are equal. Otherwise the value will be -truncated or zero-extended (depending on the direction of the -conversion and the relative sizes). -.IP - -When a pointer is as large as an integer, the type of a 'ptrdiff_t' will -be 'int'. Otherwise the type will be 'long'. -.NS A.6.3.8 -.IP - -Since the front end has only limited control over the registers, it can -only make it more likely that variables that are declared as -registers also end up in registers. The only things that can possibly be -put into registers are : 'int', 'long', 'float', 'double', 'long double' -and pointers. -.NS A.6.3.9 -.IP - -When a member of a union object is accessed using a member of a -different type, the resulting value will usually be garbage. The -compiler makes no effort to catch these errors. -.IP - -The alignment of types is a compile-time option. The alignment of -a structure-member is the alignment of its type. Usually, the -alignment is passed on to the compiler by the 'ack' program. When a -user wants to do this manually, he/she should be prepared for trouble. -.IP - -A "plain" 'int' bit-field is taken as a 'signed int'. This means that -a field with a size of 1 bit can only store the values 0 and -1. -.IP - -The order of allocation of bit-fields is a compile-time option. By -default, high-order bits are allocated first. -.IP - -An enum has the same size as a "plain" 'int'. -.NS A.6.3.10 -.IP - -An access to a volatile declared variable is done by just mentioning -the variable. E.g. the statement "x;" where x is declared volatile, -constitutes an access. -.S A.6.3.11 -.IP - -There is no fixed limit on the number of declarators that may modify an -arithmetic, structure or union type, although specifying too many may -cause the compiler to run out of memory. -.NS A.6.3.12 -.IP - -The maximum number of cases in a switch-statement is in the order of -1e9, although the compiler may run out of memory somewhat earlier. -.NS A.6.3.13 -.IP - -Since both the pre-processor and the compiler assume ASCII-characters, -a single character constant in a conditional-inclusion directive -matches the same value in the execution character set. -.IP - -The pre-processor recognizes -I... command-line options. The -directories thus specified are searched first. After that, depending on the -command that the preprocessor is called with, machine/system-dependant -directories are searched. After that, ~em/include/_tail_ac and -/usr/include are visited. -.IP - -Quoted names are first looked for in the directory in which the file -which does the include resides. -.IP - -The characters in a h- or q- char-sequence are taken to be UNIX -paths. -.IP - -Neither the compiler nor the preprocessor know any pragmas. -.IP - -Since the compiler runs on UNIX, __DATE__ and __TIME__ will always be -defined. -.NS A.6.3.14 -.IP - -NULL is defined as ((void *)0). This in order to flag dubious -constructions like "int x = NULL;". -.IP - -The diagnostic printed by 'assert' is as follows: -.ti +4n -"Assertion "" failed, file "", line ", -.br -where is the argument to the assert macro, printed as string. -(the and should be clear) -.KS -.IP - -The sets for character test macros. -.TS -l l. -name: set: -isalnum() 0-9A-Za-z -isalpha() A-Za-z -iscntrl() \e000-\e037\e177 -islower() a-z -isupper() A-Z -isprint() -~ (== \e040-\e176) -.TE -.KE -As an addition, there is an isascii() macro, which tests whether a character -is an ascii character. Characters in the range from \e000 to \e177 are ascii -characters. -.KS -.IP - -The behaviour of mathematic functions on domain error: -.TS -l c -l n. -name: returns: -asin() 0.0 -acos() 0.0 -atan2() 0.0 -fmod() 0.0 -log() -HUGE_VAL -log10() -HUGE_VAL -pow() 0.0 -sqrt() 0.0 -.TE -.KE -.IP - -Underflow range errors do not cause errno to be set. -.IP - -The function fmod() returns 0.0 and sets errno to EDOM when the second -argument is 0.0. -.IP - -The set of signals for the signal() function depends on the UNIX-system -which the compiler is compiling for. The default handling, semantics -and behaviour of these signals are those specified by the operating -system vendor. The default handling is not reset when SIGILL is -received. -.IP - -A text-stream need not end in a new-line character. -.IP - -White space characters before a new-line appear when read in. -.IP - -There may be any number of null characters appended to a binary -stream. -.IP - -The file position indicator of an append mode stream is initially -positioned at the beginning of the file. -.IP - -A write on a text stream does not cause the associated file to be -truncated beyond that point. -.IP - -The buffering intended by the standard is fully supported. -.IP - -A zero-length file actually exists. -.IP - -A file name can consist of any character, except for the '\e0' and -the '/'. -.IP - -A file can be open multiple times. -.IP - -When a remove() is done on an open file, reading and writing behave -just as can be expected from a non-removed file. When the associated -stream is closed, all written data will be lost. -.IP - -When a file exists prior to a call to rename(), the behaviour is that -of the underlying UNIX system. Normally, the call would fail. -.IP - -The %p conversion in fprintf() has the same effect as %#x or %#lx, -depending on the sizes of pointer and integer. -.IP - -The %p conversion in fscanf() has the same effect as %x or %lx, -depending on the sizes of pointer and integer. -.IP - -A - character that is neither the first nor the last character in the -scanlist for %[ conversion is taken to be a range indicator. When the -first character has a higher ASCII-value than the second, the - will -just be put into the scanlist. -.IP - -The value of errno when fgetpos() or ftell() failed is that of lseek(). -This means: -.RS -.IP "EBADF \-" 10 -when the stream is not valid -.IP "ESPIPE \-" -when fildes is associated with a pipe (and on some systems: sockets) -.IP "EINVAL \-" -the resulting file pointer would be negative -.RE -.LP -.IP - -The messages generated by perror() depend on the value of errno. -The mapping of errors to strings is done by strerror(). -.IP - -When the requested size is zero, malloc(), calloc() and realloc() -return a null-pointer. -.IP - -When abort() is called, output buffers will be flushed. Temporary files -(made with the tmpfile() function) will have disappeared when SIGABRT -is not caught or ignored. -.IP - -The exit() function returns the low-order eight bits of its argument -to the environment. -.IP - -The predefined environment names are controlled by the user. -Setting environment variables is done through the putenv() function. -This function accepts a pointer to char as its argument. -To set f.i. the environment variable TERM to a230 one writes -.ti +4n -putenv("TERM=a230"); -.br -The argument to putenv() is stored in an internal table, so malloc'ed -strings can not be freed until another call to putenv() (which sets the -same environment variable) is made. The function returns 1 if it fails, -0 otherwise. -.LP -.IP - -The argument to system is passed as argument to /bin/sh -c. -.IP - -The strings returned by strerror() depend on errno in the following -way: -.TS -l l. -errno string -0 "Error 0", -EPERM "Not owner", -ENOENT "No such file or directory", -ESRCH "No such process", -EINTR "Interrupted system call", -EIO "I/O error", -ENXIO "No such device or address", -E2BIG "Arg list too long", -ENOEXEC "Exec format error", -EBADF "Bad file number", -ECHILD "No children", -EAGAIN "No more processes", -ENOMEM "Not enough core", -EACCES "Permission denied", -EFAULT "Bad address", -ENOTBLK "Block device required", -EBUSY "Mount device busy", -EEXIST "File exists", -EXDEV "Cross-device link", -ENODEV "No such device", -ENOTDIR "Not a directory", -EISDIR "Is a directory", -EINVAL "Invalid argument", -ENFILE "File table overflow", -EMFILE "Too many open files", -ENOTTY "Not a typewriter", -ETXTBSY "Text file busy", -EFBUG "File too large", -ENOSPC "No space left on device", -ESPIPE "Illegal seek", -EROFS "Read-only file system", -EMLINK "Too many links", -EPIPE "Broken pipe", -EDOM "Math argument", -ERANGE "Result too large" -.TE -everything else causes strerror() to return "unknown error" -.IP - -The local time zone is per default MET (GMT + 1:00:00). This can be -changed through the TZ environment variable, or by some changes in the -sources. -.IP - -The clock() function returns the number of ticks since process -startup. -.SH -References -.IP [1] -ANS X3.159-1989 -.I -American National Standard for Information Systems - -Programming Language C -.R diff --git a/doc/basic.doc b/doc/basic.doc deleted file mode 100644 index 52b767fbc..000000000 --- a/doc/basic.doc +++ /dev/null @@ -1,949 +0,0 @@ -.\" $Id$ -.TL -.de Sy -.LP -.IP \fBsyntax\fR 10 -.. -.de PU -.IP \fBpurpose\fR 10 -.. -.de RM -.IP \fBremarks\fR 10 -.. -The ABC compiler -.AU -Martin L. Kersten -Gert-Jan Akkerman -Marcel Worring -Edo Westerhuis -Frans Kunst -Ronnie Lachniet -.AI -Department of Mathematics and Computer Science. -.br -Free University -.br -Amsterdam -.AB -This manual describes the -programming language BASIC and its compiler -included in the Amsterdam Compiler Kit. -.AE -.SH -INTRODUCTION. -.LP -The BASIC-EM compiler is an extensive implementation of the -programming language BASIC. -The language structure and semantics are modelled after the -BASIC interpreter/compiler of Microsoft (tr), a short comparison -is provided in appendix A. -.LP -The compiler generates code for a virtual machine, the EM machine -[[ACM, etc]]. -Using EM as an intermediate machine results in a highly portable -compiler and BASIC code. -.br -The drawback of EM is that it does not directly reflect one particular -hardware design, which means that many of the low level operations available -within BASIC are ill-defined or even inapplicable. -To mention a few, the peek and poke instructions are likely -to be behave errorneous, while line printer and tapedeck -primitives are unknown. -.LP -This manual is divided into three chapters. -.br -Chapter 1 discusses the general language syntax and semantics. -.br -Chapter 2 describes the statements available in BASIC-EM. -.br -Chapter 3 describes the predefined functions, ordered alphabetically. -.LP -Appendix A discusses the differences with Microsoft BASIC. -.br -Appendix B describes all reserved symbols. -.LP -.LP -.SH -SYNTAX NOTATION -.LP -The conventions for syntax presentation are as follows: -.IP CAPS 10 -Items are reserved words, must be input as shown. -.IP <> 10 -Items in lowercase letters enclosed in angular brackets -are to be supplied by the user. -.IP [] 10 -Items are optional. -.IP \.\.\. 10 -Items may be repeated any number of times -.IP {} 10 -A choice between two or more alternatives. At least one of the entries -must be chosen. -.IP | 10 -Vertical bars separate the choices within braces. -.LP -All punctuation must be included where shown. -.bp -.NH 1 -GENERAL INFORMATION -.LP -The BASIC-EM compiler is designed for a UNIX based environment. -It accepts a text file with a BASIC program (suffix .b) and generates -an executable file, called a.out. -.NH 2 -LINE FORMAT -.LP -A BASIC program consists of a series of lines, starting with a -positive line number in the range 0 to 32767. -A line may consists of more than one physical line on a terminal, but -is limited to 1024 characters. -Multiple BASIC statements may be placed on a single line, provided -they are separated by a colon (:). -.NH 2 -CONSTANTS -.LP -The BASIC compiler character set is comprised of alphabetic -characters, numeric characters, and special characters shown below. -.DS -= + - * / ^ ( ) % # $ \\ _ -! [ ] , . ; : & ' ? > < \\ (blanc) -.DE -.LP -BASIC uses two different types of constants during processing: -numeric and string constants. -.br -A string constant is a sequence of characters taken from the ASCII -character set enclosed by double quotation marks. -.br -Numeric constants are positive or negative numbers, grouped into -five different classes. -.IP "a) integer constants" 25 -.br -Whole numbers in the range of -32768 and 32767. Integer constants do -not contain decimal points. -.IP "b) fixed point constants" 25 -.br -Positive or negative real numbers, i.e. numbers with a decimal point. -.IP "c) floating point constants" 25 -.br -Real numbers in scientific notation. A floating point constant -consists of an optional signed integer or fixed point number -followed by the letter E (or D) and an optional signed integer -(the exponent). -The allowable range of floating point constants is 10^-38 to 10^+38. -.IP "d) Hex constants" 25 -.br -Hexadecimal numbers, denoted by the prefix &H. -.IP "e) Octal constants" 25 -.br -Octal numbers, denoted by the prefix &O. -.NH 2 -VARIABLES -.LP -Variables are names used to represent values in a BASIC program. -A variable is assigned a value by assigment specified in the program. -Before a variable is assigned its value is assumed to be zero. -.br -Variable names are composed of letters, digits or the decimal point, -starting with a letter. Up to 40 characters are significant. -A variable name can be followed by any of the following type -declaration characters: -.IP % 5 -Defines an integer variable -.IP ! 5 -Defines a single precision variable (see below) -.IP # 5 -Defines a double precision variable -.IP $ 5 -Defines a string variable. -.LP -Beside single valued variables, values may be grouped into tables or arrays. -Each element in an array is referenced by the array name and an index, -such a variable is called a subscripted variable. -An array has as many subscripts as there are dimensions in the array, -the maximum of which is 11. -.br -If a variable starts with FN it is assumed to be a call to a user defined -function. -.br -A variable name may not be a reserved word nor the name -of a predefined function. -A list of all reserved identifiers is included as Appendix B. -.LP -NOTES: -.br -Two variables with the same name but different type is -considered illegal. -.br -The type of a variable without typedeclaration-character is set, -at it's first occurence in the program, -to the defaulttype which is (in this implementation) double precision. -.br -Multi-dimensional array's must be declared before use (see -DIM-statement ). -.br -BASIC-EM differs from Microsoft BASIC in supporting floats in one precision -only (due to EM), eg doubles and floats have the same precision. -.NH 2 -EXPRESSIONS -.LP -When necessary the compiler will convert a numeric value from -one type to another. -A value is always converted to the precision of the variable it is assigned -to. -When a floating point value is converted to an integer the fractional -portion is rounded. -In an expression all values are converted to the same degree of precision, -i.e. that of the most precise operand. -.br -Division by zero results in the message "Division by zero". -If overflow (or underflow) occurs, the "Overflow (underflow)" message is -displayed and execution is terminated (contrary to Microsoft). -.SH -Arithmetic -.LP -The arithmetic operators in order of precedence,a re: -.DS L -^ Exponentiation -- Negation -*,/,\\\\\\\\,MOD Multiplication, Division, Remainder -+,- Addition, Substraction -.DE -The operator \\\\ denotes integer division, its operands are rounded to -integers before the operator is applied. -Modulus arithmetic is denoted by the operator MOD, which yields the -integer value that is the remainder of an integer division. -.br -The order in which operators are performed can be changed with parentheses. -.SH -Relational -.LP -The relational operators in order of precedence, are: -.DS -= Equality -<> Inequality -< Less than -> Greater than -<= Less than or equal to ->= Greater than or equal to -.DE -The relational operators are used to compare two values and returns -either "true" (-1) or "false" (0) (See IF statement). -The precedence of the relational operators is lower -then the arithmetic operators. -.SH -Logical -.LP -The logical operators performs tests on multiple relations, bit manipulations, -or boolean operations. -The logical operators returns a bitwise result ("true" or "false"). -In an expression, logical operators are performed after the relational and -arithmetic operators. -The logical operators work by converting their operands to signed -two-complement integers in the range -32768 to 32767. -.DS -NOT Bitwise negation -AND Bitwise and -OR Bitwise or -XOR Bitwise exclusive or -EQV Bitwise equivalence -IMP Bitwise implies -.DE -.SH -Functional -.LP -A function is used in an expression to call a system or user defined -function. -A list of predefined functions is presented in chapter 3. -.SH -String operations -.LP -Strings can be concatenated by using +. Strings can be compared with -the relational operators. String comparison is performed in lexicographic -order. -.NH 2 -ERROR MESSAGES -.LP -The occurence of an error results in termination of the program -unless an ON....ERROR statement has been encountered. -.bp -.NH 1 -B-EM STATEMENTS -.LP -This chapter describes the statements available within the BASIC-EM -compiler. Each description is formatted as follows: -.Sy -Shows the correct syntax for the statement. See introduction of -syntax notation above. -.PU -Describes the purpose and details of the instructions. -.RM -Describes special cases, deviation from Microsoft BASIC etc. -.LP -.NH 2 -CALL -.Sy -CALL [()] -.PU -The CALL statement provides the means to execute procedures -and functions written in another language included in the -Amsterdam Compiler Kit. -The argument list consist of (subscripted) variables. -The BASIC compiler pushes the address of the arguments on the stack in order -of encounter. -.RM -Not yet available. -.NH 2 -CLOSE -.Sy -CLOSE [[#][,[#]]] -.PU -To terminate I/O on a disk file. - is the number associated with the file -when it was OPENed (See OPEN-statement). Ommission of parameters results in closing -all files. -.sp -The END statement and STOP statement always issue a CLOSE of -all files. -.NH 2 -DATA -.Sy -DATA -.PU -DATA statements are used to construct a data bank of values that are -accessed by the program's READ statement. -DATA statements are non-executable, -the data items are assembled in a data file by the BASIC compiler. -This file can be replaced, provided the layout remains -the same (otherwise the RESTORE won't function properly). -.sp -The list of data items consists of numeric and string constants -as discussed in section 1. -Moreover, string constants starting with a letter and not -containing blancs, newlines, commas, colon need not be enclosed with -the string quotes. -.sp -DATA statements can be reread using the RESTORE statement. -.NH 2 -DEF FN -.Sy -DEF FN [()]= -.PU -To define and name a function that is written by the user. - must be an identifier and should be preceded by FN, -which is considered integral part of the function name. - defines the expression to be evaluated upon function call. -.sp -The parameter list is comprised of a comma separated -list of variable names, used within the function definition, -that are to replaced by values upon function call. -The variable names defined in the parameterlist, called formal -parameters, do not affect the definition and use of variables -defined with the same name in the rest of the BASIC program. -.sp -A type declaration character may be suffixed to the function name to -designate the data type of the function result. -.NH 2 -DEFINT/SNG/DBL/STR -.Sy -DEF -.PU -Any undefined variable starting with the letter included in the range of -letters is declared of type unless a type declaration character -is appended. -The range of letters is a comma separated list of characters and -character ranges (-). -.NH 2 -DIM -.Sy -DIM -.PU -The DIM statement allocates storage for subscripted variables. -If an undefined subscripted variable is used -the maximum value of the array subscript is assumed to be 10. -A subscript out of range is signalled by the program (when ACK works) -The minimum subscript value is 0, unless the OPTION BASE statement has been -encountered. -.sp -All variables in a subscripted variable are initially zero. -.sp -BUGS. Multi-dimensional arrays MUST be defined. Subscript out of range is -left unnotified. -.NH 2 -END -.Sy -END -.PU -END terminates a BASIC program and returns to the UNIX shell. -An END statement at the end of the BASIC program is optional. -.NH 2 -ERR and ERL -.Sy -= ERR -.br -= ERL -.PU -Whenever an error occurs the variable ERR contains the -error number and ERL the BASIC line where the error occurred. -The variables are usually used in error handling routines -provided by the user. -.NH 2 -ERROR -.Sy -ERROR -.PU -To simulate the occurrence of a BASIC error. -To define a private error code a value must be used that is not already in -use by the BASIC runtime system. -The list of error messages currently in use can be found in appendix B. -.NH 2 -FIELD -.PU -To be implemented. -.NH 2 -FOR...NEXT -.Sy -FOR = TO[STEP] -.br - ...... -.br -NEXT [][,...] -.PU -The FOR statements allows a series of statements to be performed -repeatedly. is used as a counter. During the first -execution pass it is assigned the value , -an arithmetic expression. After each pass the counter -is incremented (decremented) with the step size , an expression. -Ommission of the step size is intepreted as an increment of 1. -.br -Execution of the program lines specified between the FOR and the NEXT -statement is terminated as soon as is greater (less) than -.sp -The NEXT statement is labeled with the name(s) of the counter to be -incremented. -.sp -The variables mentioned in the NEXT statement may be ommitted, in which case -the variable of increment the counter of the most recent FOR statement. -If a NEXT statement is encountered before its corresponding FOR statement, -the error message "NEXT without FOR" is generated. -.NH 2 -GET -.Sy -GET [#][, ] -.PU -To be implemented. -.NH 2 -GOSUB...RETURN -.Sy -GOSUB - ... -.br -RETURN -.PU -The GOSUB statement branches to the first statement of a subroutine. -The RETURN statement cause a branch back to the statement following the -most recent GOSUB statement. -A subroutine may contain more than one RETURN statement. -.sp -Subroutines may be called recursively. -Nesting of subroutine calls is limited, upon exceeding the maximum depth -the error message "XXXXX" is displayed. -.NH 2 -GOTO -.Sy -GOTO -.PU -To branch unconditionally to a specified line in the program. -If does not exists, the compilation error message -"Line not defined" is displayed. -.RM -Microsoft BASIC continues at the first line -equal or greater then the line specified. -.NH 2 -IF...THEN -.Sy -.br -IF THEN {|} -[ELSE {|}] -.br -.Sy -IF GOTO -[ELSE {|}] -.PU -The IF statement is used -to make a decision regarding the program flow based on the -result of the expressions. -If the expression is not zero, the THEN or GOTO clause is -executed. If the result of is zero, the THEN or -GOTO clause is ignored and the ELSE clause, if present is -executed. -.br -IF..THEN..ELSE statements may be nested. -Nesting is limited by the length of the line. -The ELSE clause matches with the closests unmatched THEN. -.sp -When using IF to test equality for a value that is the -result of a floating point expression, remember that the -internal representation of the value may not be exact. -Therefore, the test should be against a range to -handle the relative error. -.RM -Microsoft BASIC allows a comma before THEN. -.NH 2 -INPUT -.Sy -INPUT [;][<"prompt string">;] -.PU -An INPUT statement can be used to obtain values from the user at the -terminal. -When an INPUT statement is encountered a question mark is printed -to indicate the program is awaiting data. -IF <"prompt string"> is included, the string is printed before the -the question mark. The question mark is suppressed when the prompt -string is followed by a comma, rather then a semicolon. -.sp -For each variable in the variable a list a value should be supplied. -Data items presented should be separated by a comma. -.sp -The type of the variable in the variable list must aggree with the -type of the data item entered. Responding with too few or too many -data items causes the message "?Redo". No assignment of input values -is made until an acceptable response is given. -.RM -The option to disgard the carriage return with the semicolon after the -input symbol is not yet implemented. -.NH 2 -INPUT [#] -.Sy -INPUT #, -.PU -The purpose of the INPUT# statement is to read data items from a sequential -file and assign them to program variables. - is the number used to open the file for input. -The variables mentioned are (subscripted) variables. -The type of the data items read should aggree with the type of the variables. -A type mismatch results in the error message "XXXXX". -.sp -The data items on the sequential file are separated by commas and newlines. -In scanning the file, leading spaces, new lines, tabs, and -carriage returns are ignored. The first character encountered -is assumed to be the state of a new item. -String items need not be enclosed with double quotes, provided -it does not contain spaces, tabs, newlines and commas, -.RM -Microsoft BASIC won't assign values until the end of input statement. -This means that the user has to supply all the information. -.NH 2 -LET -.Sy -[LET]= -.PU -To assign the value of an expression to a (subscribted) variable. -The type convertions as dictated in chapter 1 apply. -.NH 2 -LINE INPUT -.Sy -LINE INPUT [;][<"prompt string">;] -.PU -An entire line of input is assigned to the string variable. -See INPUT for the meaning of the <"prompt string"> option. -.NH 2 -LINE INPUT [#] -.Sy -LINE INPUT #, -.PU -Read an entire line of text from a sequential file -and assign it to a string variable. -.NH 2 -LSET and RSET -.PU -To be implemented -.NH 2 -MID$ -.Sy -MID$(,n[,m])= -.PU -To replace a portion of a string with another string value. -The characters of replaces characters in -starting at position n. If m is present, at most m characters are copied, -otherwise all characters are copied. -However, the string obtained never exceeds the length of string expr1. -.NH 2 -ON ERROR GOTO -.Sy -ON ERROR GOTO -.PU -To enable error handling within the BASIC program. -An error may result from arithmetic errors, disk problems, interrupts, or -as a result of the ERROR statement. -After printing an error message the program is continued at the -statements associated with . -.sp -Error handling is disabled using ON ERROR GOTO 0. -Subsequent errors result in an error message and program termination. -.NH 2 -ON...GOSUB and ON ...GOTO -.Sy -ON GOSUB -.br -ON GOTO -.PU -To branch to one of several specified line numbers or subroutines, based -on the result of the . The list of line numbers are considered -the first, second, etc alternative. Branching to the first occurs when -the expression evaluates to one, to the second alternative on two, etc. -If the value of the expression is zero or greater than the number of alternatives, processing continues at the first statement following the ON..GOTO -(ON GOSUB) statement. -.sp -When the expression results in a negative number the -an "Illegal function call" error occurs. -.sp -BUG If the value of the expression is zero or greater than the number of -alternatives, processing does NOT continue at the first statement -following the ON..GOTO (ON GOSUB) statement. -.NH 2 -OPEN -.Sy -OPEN {"i" | "o" | "r" } , [#] , -.PU -To open (filename should be quoted) for input/reading or output. -If file is not opened for output it has to be existent, otherwise an -"file not found" error will occur. -.NH 2 -OPTION BASE -.Sy -OPTION BASE n -.PU -To declare the lower bound of subsequent array subscripts as either -0 or 1. The default lower bound is zero. -.NH 2 -POKE -.Sy -POKE , -.PU -To poke around in memory. The use of this statement is not recommended, -because it requires full understanding of both -the implementation of the Amsterdam -Compiler Kit and the hardware characteristics. -.NH 2 -PRINT -.Sy -PRINT -.PU -To print constants or the contents of variables on the terminal-device. -If the variables or constants are seperated by comma's the values will -be printed seperated by tabs. -If the variables or constants are seperated by semi-colon's the values -will be printed without spaces in between. -The new-line generated at the end of the print-statement can be suppressed by -a semi-colon at the end of list of variables or constants. -.NH 2 -PRINT USING -.PU -To be implemented -.NH 2 -PUT -.PU -To be implemented -.NH 2 -RANDOMIZE -.Sy -RANDOMIZE [] -.PU -To reset the random seed. When the expression is ommitted, the system -will ask for a value between -32768 and 32767. -The random number generator returns the same sequence of values provided -the same seed is used. -.NH 2 -READ -.Sy -READ -.PU -To read values from the DATA statements and assign them to variables. -The type of the variables should match to the type of the items being read, -otherwise a "Syntax error" occurs. If all data is read the message "Out of -data" will be displayed. -.NH 2 -REM -.Sy -REM -.PU -To include explantory information in a program. -The REM statements are not executed. -A single quote has the same effect as : REM, which -allows for the inclusion of comment at the end of the line. -.RM -Microsoft BASIC does not allow REM statements as part of -DATA lines. -.NH 2 -RESTORE -.Sy -RESTORE [] -.PU -To allow DATA statements to be re-read from a specific line. -After a RESTORE statement is executed, the next READ accesses -the first item of the DATA statements. -If is specified, the next READ accesses the first -item in the specified line. -.sp -Note that data statements result in a sequential datafile generated -by the compiler, being read by the read statements. -This data file may be replaced using the operating system functions -with a modified version, provided the same layout of items -(same number of lines and items per line) is used. -.NH 2 -STOP -.Sy -STOP -.PU -To terminate the execution of a program and return to the operating system -command interpreter. A STOP statement results in the message "Break in line -???" -.NH 2 -SWAP -.Sy -SWAP , -.PU -To exchange the values of two variables. -.sp -BUG. Strings cannot be swapped ! -.NH 2 -TRON/TROFF -.Sy -TRON -.Sy -TROFF -.PU -As an aid in debugging the TRON statement results in a program -listing each line being interpreted. TROFF disables generation of -this code. -.NH 2 -WHILE...WEND -.Sy -WHILE - ..... -WEND -.PU -To execute a series of BASIC statements as long as a conditional expression -is true. WHILE...WEND loops may be nested. -.NH 2 -WRITE -.Sy -WRITE [] -.PU -To write data at the terminal in DATA statement layout conventions. -The expressions should be separated by commas. -.NH 2 -WRITE # -.Sy -WRITE # , -.PU -To write a sequential data file, being opened with the "O" mode. -The values are being writting using the DATA statements layout conventions. -.bp -.NH -FUNCTIONS -.LP -.IP ABS(X) 25 -Returns the absolute value of expression X -.IP ASC(X$) 25 -Returns the numeric value of the first character of the string. -If X$ is not initialized an "Illegal function call" error -is returned. -.IP ATN(X) 25 -Returns the arctangent of X in radians. Result is in the range -of -pi/2 to pi/2. -.IP CDBL(X) 25 -Converts X to a double precision number. -.IP CHR$(X) 25 -Converts the integer value X to its ASCII character. -X must be in the range of 0 to 257. -It is used for cursor addressing and generating bel signals. -.IP CINT(X) 25 -Converts X to an integer by rounding the fractional portion. -If X is not in the range -32768 to 32767 an "Overflow" -error occurs. -.IP COS(X) 25 -Returns the cosine of X in radians. -.IP CSNG(X) 25 -Converts X to a single precision number. -.IP CVI(<2-bytes>) 25 -Convert two byte string value to integer number. -.IP CVS(<4-bytes>) 25 -Convert four byte string value to single precision number. -.IP CVD(<8-bytes>) 25 -Convert eight byte string value to double precision number. -.IP EOF[()] 25 -Returns -1 (true) if the end of a sequential file has been reached. -.IP EXP(X) 25 -Returns e(base of natural logarithm) to the power of X. -X should be less then 10000.0. -.IP FIX(X) 25 -Returns the truncated integer part of X. FIX(X) is -equivalent to SGN(X)*INT(ABS(X)). -The major difference between FIX and INT is that FIX does not -return the next lower number for negative X. -.IP HEX$(X) 25 -Returns the string which represents the hexadecimal value of -the decimal argument. X is rounded to an integer using CINT -before HEX$ is evaluated. -.IP INT(X) 25 -Returns the largest integer <= X. -.IP INP$(X[,[#]Y]) 25 -Returns the string of X characters read from the terminal or -the designated file. -.IP LEN(X$) 25 -Returns the number of characters in the string X$. -Non printable and blancs are counted too. -.IP LOC() 25 -For sequential files LOC returns -position of the read/write head, counted in number of bytes. -For random files the function returns the record number just -read or written from a GET or PUT statement. -If nothing was read or written 0 is returned. -.IP LOG(X) 25 -Returns the natural logarithm of X. X must be greater than zero. -.IP MID$(X,I,[J]) 25 -Returns first J characters from string X starting at position I in X. -If J is omitted all characters starting of from position I in X are returned. -.IP MKI$(X) 25 -Converts an integer expression to a two-byte string. -.IP MKS$(X) 25 -Converts a single precision expression to a four-byte string. -.IP MKD$(X) 25 -Converts a double precision expression to a eight-byte string. -.IP OCT$(X) 25 -Returns the string which represents the octal value of the decimal -argument. X is rounded to an integer using CINT before OCTS is evaluated. -.IP PEEK(I) 25 -Returns the byte read from the indicated memory. (Of limited use -in the context of ACK) -.IP POS(I) 25 -Returns the current cursor position. To be implemented. -.IP RIGHT$(X$,I) -Returns the right most I characters of string X$. -If I=0 then the empty string is returned. -.IP RND(X) 25 -Returns a random number between 0 and 1. X is a dummy argument. -.IP SGN(X) 25 -If X>0 , SGN(X) returns 1. -.br -if X=0, SGN(X) returns 0. -.br -if X<0, SGN(X) returns -1. -.IP SIN(X) 25 -Returns the sine of X in radians. -.IP SPACE$(X) 25 -Returns a string of spaces length X. The expression -X is rounded to an integer using CINT. -.IP STR$(X) -Returns the string representation value of X. -.IP STRING$(I,J) 25 -Returns thes string of length Iwhose characters all -have ASCII code J. (or first character when J is a string) -.IP TAB(I) 25 -Spaces to position I on the terminal. If the current -print position is already beyond space I,TAB -goes to that position on the next line. -Space 1 is leftmost position, and the rightmost position -is width minus 1. To be used within PRINT statements only. -.IP TAN(X) 25 -Returns the tangent of X in radians. If TAN overflows -the "Overflow" message is displayed. -.IP VAL(X$) 25 -Returns the numerical value of string X$. -The VAL function strips leading blanks and tabs from the -argument string. -.bp -.SH -APPENDIX A DIFFERENCES WITH MICROSOFT BASIC -.LP -The following list of Microsoft commands and statements are -not recognized by the compiler. -.DS -SPC -USR -VARPTR -AUTO -CHAIN -CLEAR -CLOAD -COMMON -CONT -CSAVE -DELETE -EDIT -ERASE -FRE -KILL -LIST -LLIST -LOAD -LPRINT -MERGE -NAME -NEW -NULL -RENUM -RESUME -RUN -SAVE -WAIT -WIDTH LPRINT -.DE -Some statements are in the current implementation not available, -but will be soon. These include: -.DS -CALL -DEFUSR -FIELD -GET -INKEY -INPUT$ -INSTR$ -LEFT$ -LSET -RSET -PUT -.DE -.bp -.SH -APPENDIX B RESERVED WORDS IN BASIC-EM -.LP -The following list of words/symbols/names/identifiers are reserved, which -means that they can not be used for variable-names. -.DS -ABS AND ASC AS -ATN AUTO BASE CALL -CDBL CHAIN CHR CINT -CLEAR CLOAD CLOSE COMMON -CONT COS CSNG CSAVE -CVI CVS CVD DATA -DEFINT DEFSNG DEFDBL DEFSTR -DEF DELETE DIM EDIT -ELSE END EOF ERASE -ERROR ERR ERL ELSE -EQV EXP FIELD FIX -FOR FRE GET GOSUB -GOTO HEX IF IMP -INKEY INPUT INP INSTR -INT KILL LEFT LEN -LET LINE LIST LLIST -LOAD LOC LOG LPOS -LPRINT LSET MERGE MID -MKI MKS MKD MOD -NAME NEW NEXT NOT -NULL ON OCT OPEN -OPTION OR OUT PEEK -POKE PRINT POS PUT -RANDOMIZE READ REM RENUM -REN RESTORE RESUME RETURN -RIGHT RND RUN SAVE -STEP SGN SIN SPACE -SPC SQR STOP STRING -STR SWAP TAB TAN -THEN TO TRON TROFF -USING USR VAL VARPTR -WAIT WHILE WEND WIDTH -WRITE XOR -.DE diff --git a/doc/ceg/.distr b/doc/ceg/.distr deleted file mode 100644 index 92c3fa3af..000000000 --- a/doc/ceg/.distr +++ /dev/null @@ -1,3 +0,0 @@ -proto.make -ceg.ref -ceg.tr diff --git a/doc/ceg/Makefile b/doc/ceg/Makefile deleted file mode 100644 index de1e6080f..000000000 --- a/doc/ceg/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -PIC=pic -TBL=tbl -REFER=refer - -../ceg.doc: ceg.tr ceg.ref - $(PIC) ceg.tr | $(REFER) -e -p ceg.ref | $(TBL) > $@ diff --git a/doc/ceg/ceg.ref b/doc/ceg/ceg.ref deleted file mode 100644 index 54de7bf93..000000000 --- a/doc/ceg/ceg.ref +++ /dev/null @@ -1,42 +0,0 @@ -%T A Practical Toolkit For Making Compilers -%A A.S. Tanenbaum -%A H. v. Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J Communications of the ACM -%V 26 -%N 9 -%D September 1983 - -%T Description of a Machine Architecture for Use with Block Structured Languages -%A A.S. Tanenbuum -%A H. v. Staveren -%A E.G. Keizer -%A J.W. Stevenson -%R IR-81 -%I Dept. Mathematics and Computer Science, Vrije Universiteit -%C Amsterdam -%D August 1983 - -%T EM_CODE(3ACK) -%A ACK Documentation -%I Dept. Mathematics and Computer Science, Vrije Universiteit -%C Amsterdam - -%T ACK.OUT(5ACK) -%A ACK Documentation -%I Dept. Mathematics and Computer Science, Vrije Universiteit -%C Amsterdam -%K aout - -%T PRINT(3ACK) -%A ACK Documentation -%I Dept. Mathematics and Computer Science, Vrije Universiteit -%C Amsterdam - -%T The C Programming Language -%A B.W. Kernighan -%A D.M. Ritchie -%I Prentice-Hall Inc. -%C Englewood Cliffs, New Jersey -%D 1978 diff --git a/doc/ceg/ceg.tr b/doc/ceg/ceg.tr deleted file mode 100644 index f26d40000..000000000 --- a/doc/ceg/ceg.tr +++ /dev/null @@ -1,1587 +0,0 @@ -.nr PS 12 -.nr VS 14 -.nr LL 6i -.tr ~ -.TL -The Code Expander Generator -.AU -Frans Kaashoek -Koen Langendoen -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.NH -Introduction -.PP -A \fBcode expander\fR (\fBce\fR for short) is a part of the -Amsterdam Compiler Kit -.[ -toolkit -.] -(\fBACK\fR) and provides the user with -high-speed generation of medium-quality code. Although conceptually -equivalent to the more usual \fBcode generator\fR, it differs in some -aspects. -.PP -Normally, a program to be compiled with \fBACK\fR -is first fed to the preprocessor. The output of the preprocessor goes -into the appropriate front end, which produces EM -.[ -block -.] -(a -machine independent low level intermediate code). The generated EM code is fed -into the peephole optimizer, which scans it with a window of a few instructions, -replacing certain inefficient code sequences by better ones. After the -peephole optimizer a back end follows, which produces high-quality assembly code. -The assembly code goes via the target optimizer into the assembler and the -object code then goes into the -linker/loader, the final component in the pipeline. -.PP -For various applications -this scheme is too slow. When debugging, for example, -compile time is more important than execution time of a program. -For this purpose a new scheme is introduced: -.IP \ \ 1: -The code generator and assembler are -replaced by a library, the \fBcode expander\fR, consisting of a set of -routines, one for every EM-instruction. Each routine expands its EM-instruction -into relocatable object code. In contrast, the usual ACK code generator uses -expensive pattern matching on sequences of EM-instructions. -The peephole and target optimizer are not used. -.IP \ \ 2: -These routines replace the usual EM-generating routines in the front end; this -eliminates the overhead of intermediate files. -.LP -This results in a fast compiler producing object file, ready to be -linked and loaded, at the cost of unoptimized object code. -.PP -Because of the -simple nature of the code expander, it is much easier to build, to debug, and to -test. Experience has demonstrated that a code expander can be constructed, -debugged, and tested in less than two weeks. -.PP -This document describes the tools for automatically generating a -\fBce\fR (a library of C files) from two tables and -a few machine-dependent functions. -A thorough knowledge of EM is necessary to understand this document. -.NH -The code expander generator -.PP -The code expander generator (\fBceg\fR) generates a code expander from -two tables and a few machine-dependent functions. This section explains how -\fBceg\fR works. The first half describes the transformations that are done on -the two tables. The -second half tells how these transformations are done by the \fBceg\fR. -.PP -A code expander consists of a set of routines that convert EM-instructions -directly to relocatable object code. These routines are called by a front -end through the EM_CODE(3ACK) -.[ -EM_CODE -.] -interface. To free the table writer of the burden of building -an object file, we supply a set of routines that build an object file -in the ACK.OUT(5ACK) -.[ -aout -.] -format (see appendix B). This set of routines is called -the -\fBback\fR-primitives (see appendix A). In short, a code expander consists of a -set of routines that map the EM_CODE interface on the -\fBback\fR-primitives interface. -.PP -To avoid repetition of the same sequences of -\fBback\fR-primitives in different -EM-instructions -and to improve readability, the EM-to-object information must be supplied in -two -tables. The EM_table maps EM to an assembly language, and the as_table -maps -assembly code to \fBback\fR-primitives. The assembly language is chosen by the -table writer. It can either be an actual assembly language or his ad-hoc -designed language. -.LP -The following picture shows the dependencies between the different components: -.sp -.PS -linewid = 0.5i -A: line down 2i -B: line down 2i with .start at A.start + (1.5i, 0) -C: line down 2i with .start at B.start + (1.5i, 0) -D: arrow right with .start at A.center - (0.25i, 0) -E: arrow right with .start at B.center - (0.25i, 0) -F: arrow right with .start at C.center - (0.25i, 0) -"EM_CODE(3ACK)" at A.start above -"EM_table" at B.start above -"as_table" at C.start above -"source language " at D.start rjust -"EM" at 0.5 of the way between D.end and E.start -G: "assembly" at 0.5 of the way between E.end and F.start -H: " back primitives" at F.end ljust -"(user defined)" at G - (0, 0.2i) -" (ACK.OUT)" at H - (0, 0.2i) ljust -.PE -.PP -The picture suggests that, during compilation, the EM instructions are -first transformed into assembly instructions and then the assembly instructions -are transformed into object-generating calls. This -is not what happens in practice, although the user is free to think it does. -Actually, however the EM_table and the as_table are combined during code -expander generation time, yielding an imaginary compound table that results in -routines from the EM_CODE interface that generate object code directly. -.PP -As already indicated, the compound table does not exist either. Instead, each -assembly instruction in the as_table is converted to a routine generating C -.[ -Kernighan -.] -code -to generate C code to call the \fBback\fR-primitives. The EM_table is -converted into a program that for each EM instruction generates a routine, -using the routines generated from the as_table. Execution of the latter program -will then generate the code expander. -.PP -This scheme allows great flexibility -in the table writing, while still -resulting in a very efficient code expander. One implication is that the -as_table is interpreted twice and the EM_table only once. This has consequences -for their structure. -.PP -To illustrate what happens, we give an example. The example is an entry in -the tables for the VAX-machine. The assembly language chosen is a subset of the -VAX assembly language. -.PP -One of the most fundamental operations in EM is ``loc c'', load the value of c -on the stack. To expand this instruction the -tables contain the following information: -.DS -EM_table : -.ft CW - C_loc ==> "pushl $$$1". - /* $1 refers to the first argument of C_loc. - * $$ is a quoted $. */ - - -\fRas_table : -.ft CW - pushl src : CONST ==> - @text1( 0xd0); - @text1( 0xef); - @text4( %$( src->num)). -\fR -.DE -.LP -The as_table is transformed in the following routine: -.DS -.ft CW -pushl_instr(src) -t_operand *src; -/* ``t_operand'' is a struct defined by the - * table writer. */ -{ - printf("swtxt();"); - printf("text1( 0xd0 );"); - printf("text1( 0xef );"); - printf("text4(%s);", substitute_dollar( src->num)); -} -\fR -.DE -Using ``pushl_instr()'', the following routine is generated from the EM_table: -.DS -.ft CW -C_loc( c) -arith c; -/* text1() and text4() are library routines that fill the - * text segment. */ -{ - swtxt(); - text1( 0xd0); - text1( 0xef); - text4( c); -} -\fR -.DE -.LP -A compiler call to ``C_loc()'' will cause the 1-byte numbers ``0xd0'' -and ``0xef'' -and the 4-byte value of the variable ``c'' to be stored in the text segment. -.PP -The transformations on the tables are done automatically by the code expander -generator. -The code expander generator is made up of two tools: -\fBemg\fR and \fBasg\fR. \fBAsg\fR -transforms -each assembly instruction into a C routine. These C routines generate calls -to the \fBback\fR-primitives. The generated C routines are used -by \fBemg\fR to generate the actual code expander from the EM_table. -.PP -The link between \fBemg\fR and \fBasg\fR is an assembly language. -We did not enforce a specific syntax for the assembly language; -instead we have given the table writer the freedom -to make an ad-hoc assembly language or to use an actual assembly language -suitable for his purpose. Apart from a greater flexibility this -has another advantage; if the table writer adopts the assembly language that -runs on the machine at hand, he can test the EM_table independently from the -as_table. Of course there is a price to pay: the table writer has to -do the decoding of the operands himself. See section 4 for more details. -.PP -Before we describe the structure of the tables in detail, we will give -an overview of the four main phases. -.IP "phase 1:" -.br -The as_table is transformed by \fBasg\fR. This results in a set of C routines. -Each assembly-opcode generates one C routine. Note that a call to such a -routine does not generate the corresponding object code; it generates C code, -which, when executed, generates the desired object code. -.IP "phase 2:" -.br -The C routines generated by \fBasg\fR are used by emg to expand the EM_table. -This -results in a set of C routines, the code expander, which conform to the -procedural interface EM_CODE(3ACK). A call to such a routine does indeed -generate the desired object code. -.IP "phase 3:" -.br -The front end that uses the procedural interface is linked/loaded with the -code expander generated in phase 2 and the \fBback\fR-primitives (a supplied -library). This results in a compiler. -.IP "phase 4:" -.br -The compiler runs. The routines in the code expander are -executed and produce object code. -.RE -.NH -Description of the EM_table -.PP -This section describes the EM_table. It contains four subsections. -The first 3 sections describe the syntax of the EM_table, -the -semantics of the EM_table, and the functions and -constants that must be present in the EM_table, in the file ``mach.c'' or in -the file ``mach.h''. The last section explains how a table writer can generate -assembly code instead of object code. The section on -semantics contains many examples. -.NH 2 -Grammar -.PP -The following grammar describes the syntax of the EM_table. -.VS +4 -.TS -center tab(%); -l c l. -TABLE%::=%( RULE)* -RULE%::=%C_instr ( COND_SEQUENCE | SIMPLE) -COND_SEQUENCE%::=%( condition SIMPLE)* ``default'' SIMPLE -SIMPLE%::=% ``==>'' ACTION_LIST -ACTION_LIST%::=%[ ACTION ( ``;'' ACTION)* ] ``.'' -ACTION%::=%AS_INSTR -%|%function-call -AS_INSTR%::=%``"'' [ label ``:''] [ INSTR] ``"'' -INSTR%::=%mnemonic [ operand ( ``,'' operand)* ] -.TE -.VS -4 -.PP -The ``('' ``)'' brackets are used for grouping, ``['' ... ``]'' -means ... 0 or 1 time, -a ``*'' means zero or more times, and -a ``|'' means -a choice between left or right. A \fBC_instr\fR is -a name in the EM_CODE(3ACK) interface. \fBcondition\fR is a C expression. -\fBfunction-call\fR is a call of a C function. \fBlabel\fR, \fBmnemonic\fR, -and \fBoperand\fR are arbitrary strings. If an \fBoperand\fR -contains brackets, the -brackets must match. There is an upper bound on the number of -operands; the maximum number is defined by the constant MAX_OPERANDS in de -file ``const.h'' in the directory assemble.c. Comments in the table should be -placed between ``/*'' and ``*/''. -The table is processed by the C preprocessor, before being parsed by -\fBemg\fR. -.NH 2 -Semantics -.PP -The EM_table is processed by \fBemg\fR. \fBEmg\fR generates a C function -for every instruction in the EM_CODE(3ACK). -For every EM-instruction not mentioned in the EM_table, a -C function that prints an error message is generated. -It is possible to divide the EM_CODE(3ACK)-interface into four parts : -.IP \0\01: -text instructions (e.g., C_loc, C_adi, ..) -.IP \0\02: -pseudo instructions (e.g., C_open, C_df_ilb, ..) -.IP \0\03: -storage instructions (e.g., C_rom_icon, ..) -.IP \0\04: -message instructions (e.g., C_mes_begin, ..) -.LP -This section starts with giving the semantics of the grammar. The examples -are text instructions. The section ends with remarks on the pseudo -instructions and the storage instructions. Since message instructions are not -useful for a code expander, they are ignored. -.PP -.NH 3 -Actions -.PP -The EM_table is made up of rules describing how to expand a \fBC_instr\fR -defined by the EM_CODE(3ACK)-interface (corresponding -to an EM instruction) into actions. -There are two kinds of actions: assembly instructions and C function calls. -An assembly instruction is defined as a mnemonic followed by zero or more -operands separated by commas. The semantics of an assembly instruction is -defined by the table writer. When the assembly language is not expressive -enough, then, as an escape route, function calls can be made. However, this -reduces -the speed of the actual code expander. Finally, actions can be grouped into -a list of actions; actions are separated by a semicolon and terminated -by a ``.''. -.DS -.ft CW -C_nop ==> . - /* Empty action list : no operation. */ - -C_inc ==> "incl (sp)". - /* Assembler instruction, which is evaluated - * during expansion of the EM_table */ - -C_slu ==> C_sli( $1). - /* Function call, which is evaluated during - * execution of the compiler. */ -\fR -.DE -.NH 3 -Labels -.PP -Since an assembly language without instruction labels is a rather weak -language, labels inside a contiguous block of assembly instructions are -allowed. When using labels two rules must be observed: -.IP \0\01: -The name of a label should be unique inside an action list. -.IP \0\02: -The labels used in an assembler instruction should be defined in the same -action list. -.LP -The following example illustrates the usage of labels. -.DS -.ft CW - /* Compare the two top elements on the stack. */ -C_cmp ==> "pop bx"; - "pop cx"; - "xor ax, ax"; - "cmp cx, bx"; - /* Forward jump to local label */ - "je 2f"; - "jb 1f"; - "inc ax"; - "jmp 2f"; - "1: dec ax"; - "2: push ax". -\fR -.DE -We will come back to labels in the section on the as_table. -.NH 3 -Arguments of an EM instruction -.PP -In most cases the translation of a \fBC_instr\fR depends on its arguments. -The arguments of a \fBC_instr\fR are numbered from 1 to \fIn\fR, where \fIn\fR -is the -total number of arguments of the current \fBC_instr\fR (there are a few -exceptions, see Implicit arguments). The table writer may -refer to an argument as $\fIi\fR. If a plain $-sign is needed in an -assembly instruction, it must be preceded by a extra $-sign. -.PP -There are two groups of \fBC_instr\fRs whose arguments are handled specially: -.RS -.IP "1: Instructions dealing with local offsets" -.br -The value of the $\fIi\fR argument referring to a parameter ($\fIi\fR >= 0) -is increased by ``EM_BSIZE''. ``EM_BSIZE'' is the size of the return status block -and must be defined in the file ``mach.h'' (see section 3.3). For example : -.DS -.ft CW -C_lol ==> "push $1(bp)". - /* automatic conversion of $1 */ -\fR -.DE -.IP "2: Instructions using global names or instruction labels" -.br -All the arguments referring to global names or instruction labels will be -transformed into a unique assembly name. To prevent name clashes with library -names the table writer has to provide the -conversions in the file ``mach.h''. For example : -.DS -.ft CW -C_bra ==> "jmp $1". - /* automatic conversion of $1 */ - /* type arith is converted to string */ -\fR -.DE -.RE -.NH 3 -Conditionals -.PP -The rules in the EM_table can be divided into two groups: simple rules and -conditional rules. The simple rules are made up of a \fBC_instr\fR followed by -a list of actions, as described above. The conditional rules (COND_SEQUENCE) -allow the table writer to select an action list depending on the value of -a condition. -.PP -A CONDITIONAL is a list of a boolean expression with the corresponding -simple rule. If -the expression evaluates to true then the corresponding simple rule is carried -out. If more than one condition evaluates to true, the first one is chosen. -The last case of a COND_SEQUENCE of a \fBC_instr\fR must handle -the default case. -The boolean expressions in a COND_SEQUENCE must be C expressions. Besides the -ordinary C operators and constants, $\fIi\fR references can be used -in an expression. -.DS -.ft CW - /* Load address of LB $1 levels back. */ -C_lxl - $1 == 0 ==> "pushl fp". - $1 == 1 ==> "pushl 4(ap)". - default ==> "movl $$$1, r0"; - "jsb .lxl"; - "pushl r0". -\fR -.DE -.NH 3 -Abbreviations -.PP -EM instructions with an external as an argument come in three variants in -the EM_CODE(3ACK) interface. In most cases it will be possible to take -these variants together. For this purpose the ``..'' notation is introduced. -For the code expander there is no difference between the -following instructions. -.DS -.ft CW -C_loe_dlb ==> "pushl $1 + $2". -C_loe_dnam ==> "pushl $1 + $2". -C_loe ==> "pushl $1 + $2". -\fR -.DE -So it can be written in the following way. -.DS -.ft CW -C_loe.. ==> "pushl $1 + $2". -\fR -.DE -.NH 3 -Implicit arguments -.PP -In the last example ``C_loe'' has two arguments, but in the EM_CODE interface -it has one argument. This argument depends on the current ``hol'' -block; in the EM_table this is made explicit. Every \fBC_instr\fR whose -argument depends on a ``hol'' block has one extra argument; argument 1 refers -to the ``hol'' block. -.NH 3 -Pseudo instructions -.PP -Most pseudo instructions are machine independent and are provided -by \fBceg\fR. The table writer has only to supply the following functions, -which are used to build a stackframe: -.DS -.ft CW -C_prolog() -/* Performs the prolog, for example save - * return address */ - -C_locals( n) -arith n; -/* Allocate n bytes for locals on the stack */ - -C_jump( label) -char *label; -/* Generates code for a jump to ``label'' */ -\fR -.DE -.LP -These functions can be defined in ``mach.c'' or in the EM_table (see -section 3.3). -.NH 3 -Storage instructions -.PP -The storage instructions ``C_bss_\fIcstp()\fR'', ``C_hol_\fIcstp()\fR'', -''C_con_\fIcstp()\fR'', and ``C_rom_\fIcstp()\fR'', except for the instructions -dealing with constants of type string (C_..._icon, C_..._ucon, C_..._fcon), are -generated automatically. No information is needed in the table. -To generate the C_..._icon, C_..._ucon, C_..._fcon instructions -\fBceg\fR only has to know how to convert a number of type string to bytes; -this can be defined with the constants ONE_BYTE, TWO_BYTES, and FOUR_BYTES. -C_rom_icon, C_con_icon, C_bss_icon, C_hol_icon can be abbreviated by ..icon. -This also holds for ..ucon and ..fcon. -For example : -.DS -.ft CW -\\.\\.icon - $2 == 1 ==> gen1( (ONE_BYTE) atoi( $1)). - $2 == 2 ==> gen2( (TWO_BYTES) atoi( $1)). - $2 == 4 ==> gen4( (FOUR_BYTES) atol( $1)). - default ==> arg_error( "..icon", $2). -\fR -.DE -Gen1(), gen2() and gen4() are \fBback\fR-primitives (see appendix A), and -generate one, two, or four byte constants. Atoi() is a C library function that -converts strings to integers. -The constants ``ONE_BYTE'', ``TWO_BYTES'', and ``FOUR_BYTES'' must be defined in -the file ``mach.h''. -.NH 2 -User supplied definitions and functions -.PP -If the table writer uses all the default functions he has only to supply -the following constants and functions : -.TS -tab(#); -l c lw(10c). -C_prolog()#:#T{ -Do prolog -T} -C_jump( l)#:#T{ -Perform a jump to label l -T} -C_locals( n)#:#T{ -Allocate n bytes on the stack -T} -# -NAME_FMT#:#T{ -Print format describing name to a unique name conversion. The format must -contain %s. -T} -DNAM_FMT#:#T{ -Print format describing data-label to a unique name conversion. The format -must contain %s. -T} -DLB_FMT#:#T{ -Print format describing numerical-data-label to a unique name conversion. -The format must contain a %ld. -T} -ILB_FMT#:#T{ -Print format describing instruction-label to a unique name conversion. -The format must contain %d followed by %ld. -T} -HOL_FMT#:#T{ -Print format describing hol-block-number to a unique name conversion. -The format must contain %d. -T} -# -EM_WSIZE#:#T{ -Size of a word in bytes on the target machine -T} -EM_PSIZE#:#T{ -Size of a pointer in bytes on the target machine -T} -EM_BSIZE#:#T{ -Size of base block in bytes on the target machine -T} -# -ONE_BYTE#:#T{ -\\C suitable type that can hold one byte on the machine where the \fBce\fR runs -T} -TWO_BYTES#:#T{ -\\C suitable type that can hold two bytes on the machine where the \fBce\fR runs -T} -FOUR_BYTES#:#T{ -\\C suitable type that can hold four bytes on the machine where the \fBce\fR runs -T} -# -BSS_INIT#:#T{ -The default value that the loader puts in the bss segment -T} -# -BYTES_REVERSED#:#T{ -Must be defined if the byte order must be reversed. -By default the least significant byte is outputted first.\fR\(dg -.FS -\fR\(dg When both byte orders are used, for -example NS 16032, the table writer has to -supply his own set of routines. -.FE -T} -WORDS_REVERSED#:#T{ -Must be defined if the word order must be reversed. -By default the least significant word is outputted first. -T} -.TE -.LP -An example of the file ``mach.h'' for the vax4. -.TS -tab(:); -l l l. -#define : ONE_BYTE : int -#define : TWO_BYTES : int -#define : FOUR_BYTES : long -: -#define : EM_WSIZE : 4 -#define : EM_PSIZE : 4 -#define : EM_BSIZE : 0 -: -#define : BSS_INIT : 0 -: -#define : NAME_FMT : "_%s" -#define : DNAM_FMT : "_%s" -#define : DLB_FMT : "_%ld" -#define : ILB_FMT : "I%03d%ld" -#define : HOL_FMT : "hol%d" -.TE -Notice that EM_BSIZE is zero. The vax ``call'' instruction takes automatically -care of the base block. -.PP -There are three primitives that have to be defined by the table writer, either -as functions in the file ``mach.c'' or as rules in the EM_table. -For example, for the 8086 they look like this: -.DS -.ft CW -C_jump ==> "jmp $1". - -C_prolog ==> "push bp"; - "mov bp, sp". - -C_locals - $1 == 0 ==> . - $1 == 2 ==> "push ax". - $1 == 4 ==> "push ax"; - "push ax". - default ==> "sub sp, $1". -\fR -.DE -.NH 2 -Generating assembly code -.PP -When the code expander generator is used for generating assembly instead of -object code (see section 5), additional print formats have to be defined -in ``mach.h''. The following table lists these formats. -.TS -tab(#); -l c lw(10c). -BYTE_FMT#:#T{ -Print format to allocate and initialize one byte. The format must -contain %ld. -T} -WORD_FMT#:#T{ -Print format to allocate and initialize one word. The format must -contain %ld. -T} -LONG_FMT#:#T{ -Print format to allocate and initialize one long. The format must -contain %ld. -T} -BSS_FMT#:#T{ -Print format to allocate space in the bss segment. The format must -contain %ld (number of bytes). -T} -COMM_FMT#:#T{ -Print format to declare a "common". The format must contain a %s (name to be declared -common), followed by a %ld (number of bytes). -T} - -SEGTXT_FMT#:#T{ -Print format to switch to the text segment. -T} -SEGDAT_FMT#:#T{ -Print format to switch to the data segment. -T} -SEGBSS_FMT#:#T{ -Print format to switch to the bss segment. -T} - -SYMBOL_DEF_FMT#:#T{ -Print format to define a label. The format must contain %s. -T} -GLOBAL_FMT#:#T{ -Print format to declare a global name. The format must contain %s. -T} -LOCAL_FMT#:#T{ -Print format to declare a local name. The format must contain %s. -T} - -RELOC1_FMT#:#T{ -Print format to initialize a byte with an address expression. The format must -contain %s (name) and %ld (offset). -T} -RELOC2_FMT#:#T{ -Print format to initialize a word with an address expression. The format must -contain %s (name) and %ld (offset). -T} -RELOC4_FMT#:#T{ -Print format to initialize a long with an address expression. The format must -contain %s (name) and %ld (offset). -T} - -ALIGN_FMT#:#T{ -Print format to align a segment. -T} -.TE -.NH 1 -Description of the as_table -.PP -This section describes the as_table. Like the previous section, it is divided -into -four parts: the first two parts describe the grammar and the semantics of the -as_table; the third part gives an overview -of the functions and the constants that must be present in the as_table (in -the file ``as.h'' or in the file ``as.c''); the last part describes the case when -assembly is generated instead of object code. -The part on semantics contains examples that appear in the as_table for the -VAX or for the 8086. -.NH 2 -Grammar -.PP -The form of the as_table is given by the following grammar : -.VS +4 -.TS -center tab(#); -l c l. -TABLE#::=#( RULE)* -RULE#::=#( mnemonic | ``...'') DECL_LIST ``==>'' ACTION_LIST -DECL_LIST#::=#DECLARATION ( ``,'' DECLARATION)* -DECLARATION#::=#operand [ ``:'' type] -ACTION_LIST#::=#ACTION ( ``;'' ACTION) ``.'' -ACTION#::=#IF_STATEMENT -#|#function-call -#|#``@''function-call -IF_STATEMENT#::=#''@if'' ``('' condition ``)'' ACTION_LIST -##( ``@elsif'' ``('' condition ``)'' ACTION_LIST)* -##[ ``@else'' ACTION_LIST] -##''@fi'' -function-call#::=#function-identifier ``('' [arg (,arg)*] ``)'' -arg#::=#argument -#|#reference -.TE -.VS -4 -.LP -\fBmnemonic\fR, \fBoperand\fR, and \fBtype\fR are all C identifiers; -\fBcondition\fR is a normal C expression; -\fBfunction-call\fR must be a C function call. A function can be called with -standard C arguments or with a reference (see section 4.2.4). -Since the as_table is -interpreted during code expander generation as well as during code -expander execution, two levels of calls are present in it. A ``function-call'' -is done during code expander generation, a ``@function-call'' during code -expander execution. -.NH 2 -Semantics -.PP -The as_table is made up of rules that map assembly instructions onto -\fBback\fR-primitives, a set of functions that construct an object file. -The table is processed by \fBasg\fR, which generates a C functions -for each assembler mnemonic. The names of -these functions are the assembler mnemonics postfixed -with ``_instr'' (e.g., ``add'' becomes ``add_instr()''). These functions -will be used by the function -assemble() during the expansion of the EM_table. -After explaining the semantics of the as_table the function -assemble() will be described. -.NH 3 -Rules -.PP -A rule in the as_table is made up of a left and a right hand side; -the left hand side describes an assembler -instruction (mnemonic and operands); the -right hand side gives the corresponding actions as \fBback\fR-primitives or as -functions defined by the table writer, which call \fBback-primitives\fR. -Two simple examples from the VAX as_table and the 8086 as_table, resp.: -.DS -.ft CW -movl src, dst ==> @text1( 0xd0); - gen_operand( src); - gen_operand( dst). - /* ``gen_operand'' is a function that encodes - * operands by calling back-primitives. */ - -rep ens:MOVS ==> @text1( 0xf3); - @text1( 0xa5). - -\fR -.DE -.NH 3 -Declaration of types. -.PP -In general, a machine instruction is encoded as an opcode followed by zero or -more -the operands. There are two methods for mapping assembler mnemonics -onto opcodes: the mnemonic determines the opcode, or mnemonic and operands -together determine the opcode. Both cases can be -easily expressed in the as_table. -The first case is obvious. -The second case is handled by introducing type fields for the operands. -.PP -When mnemonic and operands together determine the opcode, the table writer has -to give several rules for each combination of mnemonic and operands. The rules -differ in the type fields of the operands. -The table writer has to supply functions that check the type -of the operand. The name of such a function is the name of the type; it -has one argument: a pointer to a struct of type \fIt_operand\fR; it returns -non-zero when the operand is of this type, otherwise it returns 0. -.PP -This will usually lead to a list of rules per mnemonic. To reduce the amount of -work an abbreviation is supplied. Once the mnemonic is specified it can be -referred to in the following rules by ``...''. -One has to make sure -that each mnemonic is mentioned only once in the as_table, otherwise -\fBasg\fR will generate more than one function with the same name. -.PP -The following example shows the usage of type fields. -.DS -.ft CW - mov dst:REG, src:EADDR ==> - @text1( 0x8b); /* opcode */ - mod_RM( %d(dst->reg), src). /* operands */ - - ... dst:EADDR, src:REG ==> - @text1( 0x89); /* opcode */ - mod_RM( %d(src->reg), dst). /* operands */ -\fR -.DE -The table-writer must supply the restriction functions, -.ft CW -REG\fR and -.ft CW -EADDR\fR in the previous example, in ``as.c'' or ''as.h''. -.NH 3 -The function of the @-sign and the if-statement. -.PP -The right hand side of a rule is made up of function calls. -Since the as_table is -interpreted on two levels, during code expander generation and during code -expander execution, two levels of calls are present in it. A function-call -without an ``@''-sign -is called during code expander generation (e.g., the -.ft CW -gen_operand()\fR in the -first example). -A function call with an ``@''-sign is called during code -expander execution (e.g., -the \fBback\fR-primitives). So the last group will be part of the compiler. -.PP -The need for the ``@''-sign construction arises, for example, when -implementing push/pop optimization (e.g., ``push x'' followed by ``pop y'' -can be replaced by ``move x, y''). -In this case flags need to be set, unset, and tested during the execution of -the compiler: -.DS L -.ft CW -PUSH src ==> /* save in ax */ - mov_instr( AX_oper, src); - /* set flag */ - @assign( push_waiting, TRUE). -\fR -.DE -.DS -.ft CW -POP dst ==> @if ( push_waiting) - /* ``mov_instr'' is asg-generated */ - mov_instr( dst, AX_oper); - @assign( push_waiting, FALSE). - @else - /* ``pop_instr'' is asg-generated */ - pop_instr( dst). - @fi. -\fR -.DE -.LP -Although the @-sign is followed syntactically by a -function name, this function can very well be the name of a macro defined in C. -This is in fact the case with ``@assign()'' in the above example. -.PP -The case may arise when information is needed that is not known -until execution of -the compiler. For example one needs to know if a ``$\fIi\fR'' argument fits in -one byte. -In this case one can use a special if-statement provided -by \fBasg\fR: @if, @elsif, @else, @fi. This means that the conditions -will be evaluated at -run time of the \fBce\fR. In such a condition one may of course refer -to the ''$\fIi\fR'' arguments. For example, constants can be -packed into one or two byte arguments as follows: -.DS -.ft CW -mov dst:ACCU, src:DATA ==> - @if ( fits_byte( %$(dst->expr))) - @text1( 0xc0); - @text1( %$(dst->expr)). - @else - @text1( 0xc8); - @text2( %$(dst->expr)). - @fi. -.DE -.NH 3 -References to operands -.PP -As noted before, the operands of an assembler instruction may be used as -pointers to the struct \fIt_operand\fR in the right hand side of the table. -Because of the free format assembler, the types of the fields in the struct -\fIt_operand\fR are unknown to \fBasg\fR. As these fields can appear in calls -to functions, \fBasg\fR must know -these types. This section explains how these types must be specified. -.PP -References to operands come in three forms: ordinary operands, operands that -contain ``$\fIi\fR'' references, and operands that refer to names of local labels. -The ``$\fIi\fR'' in operands represent names or numbers of a \fBC_instr\fR and must -be given as arguments to the \fBback\fR-primitives. Labels in operands -must be converted to a number that tells the distance, the number of bytes, -between the label and the current position in the text-segment. -.LP -All these three cases are treated in an uniform way. When the table writer -makes a reference to an operand of an assembly instruction, he must describe -the type of the operand in the following way. -.VS +4 -.TS -center tab(#); -l c l. -reference#::=#``%'' conversion -##``('' operand-name ``\->'' field-name ``)'' -conversion#::=# printformat -#|#``$'' -#|#``dist'' -printformat#::=#see PRINT(3ACK) -.[ -PRINT -.] -.TE -.VS -4 -.LP -The three cases differ only in the conversion field. The printformat conversion -applies to ordinary operands. The ``%$'' applies to operands that contain -a ``$\fIi\fR''. The expression between parentheses must result in a pointer to -a char. The -result of ``%$'' is of the type of ``$\fIi\fR''. The ``%dist'' -applies to operands that refer to a local label. The expression between -the brackets must result in a pointer to a char. The result of ``%dist'' is -of type arith. -.PP -The following example illustrates the usage of ``%$''. (For an -example that illustrates the usage of ordinary fields see -the section on ``User supplied definitions and functions''). -.DS -.ft CW -jmp dst ==> - @text1( 0xe9); - @reloc2( %$(dst->lab), %$(dst->off), PC_REL). -\fR -.DE -.PP -A useful function concerning $\fIi\fRs is arg_type(), which takes as input a -string starting with $\fIi\fR and returns the type of the \fIi\fR''th argument -of the current EM-instruction, which can be STRING, ARITH or INT. One may need -this function while decoding operands if the context of the $\fIi\fR does not -give enough information. -If the function arg_type() is used, the file -arg_type.h must contain the definition of STRING, ARITH and INT. -.PP -%dist is only guaranteed to work when called as a parameter of text1(), text2() or text4(). -The goal of the %dist conversion is to reduce the number of reloc1(), reloc2() -and reloc4() -calls, saving space and time (no relocation at compiler run time). -The following example illustrates the usage of ``%dist''. -.DS -.ft CW - jmp dst:ILB ==> /* label in an instruction list */ - @text1( 0xeb); - @text1( %dist( dst->lab)). - - ... dst:LABEL ==> /* global label */ - @text1( 0xe9); - @reloc2( %$(dst->lab), %$(dst->off), PC_REL). -\fR -.DE -.NH 3 -The functions assemble() and block_assemble() -.PP -The functions assemble() and block_assemble() are provided by \fBceg\fR. -If, however, the table writer is not satisfied with the way they work -he can -supply his own assemble() or block_assemble(). -The default function assemble() splits an assembly string into a -label, mnemonic, -and operands and performs the following actions on them: -.IP \0\01: -It processes the local label; it records the name and current position. Thereafter it calls the function process_label() with one argument of type string, -the label. The table writer has to define this function. -.IP \0\02: -Thereafter it calls the function process_mnemonic() with one argument of -type string, the mnemonic. The table writer has to define this function. -.IP \0\03: -It calls process_operand() for each operand. Process_operand() must be -written by the table-writer since no fixed representation for operands -is enforced. It has two arguments: a string (the operand to decode) -and a pointer to the struct \fIt_operand\fR. The declaration of the struct -\fIt_operand\fR must be given in the -file ``as.h'', and the table-writer can put all the information needed for -encoding the operand in machine format in it. -.IP \0\04: -It examines the mnemonic and calls the associated function, generated by -\fBasg\fR, with pointers to the decoded operands as arguments. This makes it -possible to use the decoded operands in the right hand side of a rule (see -below). -.LP -If the default assemble() does not work the way the table writer wants, he -can supply his own version of it. Assemble() has the following arguments: -.DS -.ft CW -assemble( instruction ) - char *instruction; -\fR -.DE -\fIinstruction\fR points to a null-terminated string. -.PP -The default function block_assemble() is called with a sequence of assembly -instructions that belong to one action list. It calls assemble() for -every assembly instruction in -this block. But if a special action is -required on a block of assembly instructions, the table writer only has to -rewrite this function to get a new \fBceg\fR that obliges to his wishes. -The function block_assemble has the following arguments: -.DS -.ft CW -block_assemble( instructions, nr, first, last) - char **instruction; - int nr, first, last; -\fR -.DE -\fIInstruction\fR point to an array of pointers to strings representing -assembly instructions. \fINr\fR is -the number of instructions that must be assembled. \fIFirst\fR -and \fIlast\fR have no function in the default block_assemble(), but are -useful when optimizations are done in block_assemble(). -.PP -Four things have to be specified in ``as.h'' and ``as.c''. First the user must -give the declaration of struct \fIt_operand\fR in ``as.h'', and the functions -process_operand(), process_mnemonic(), and process_label() must be given -in ``as.c''. If the right hand side of the as_table -contains function calls other than the \fBback\fR-primitives, these functions -must also be present in ``as.c''. Note that both the ``@''-sign (see 4.2.3) -and ``references'' (see 4.2.4) also work in the functions defined in ``as.c''. -.PP -The following example shows the representative and essential parts of the -8086 ``as.h'' and ``as.c'' files. -.nr PS 10 -.nr VS 12 -.LP -.DS L -.ft CW -/* Constants and type definitions in as.h */ - -#define UNKNOWN 0 -#define IS_REG 0x1 -#define IS_ACCU 0x2 -#define IS_DATA 0x4 -#define IS_LABEL 0x8 -#define IS_MEM 0x10 -#define IS_ADDR 0x20 -#define IS_ILB 0x40 - -#define AX 0 -#define BX 3 -#define CL 1 -#define SP 4 -#define BP 5 -#define SI 6 -#define DI 7 - -#define REG( op) ( op->type & IS_REG) -#define ACCU( op) ( op->type & IS_REG && op->reg == AX) -#define REG_CL( op) ( op->type & IS_REG && op->reg == CL) -#define DATA( op) ( op->type & IS_DATA) -#define LABEL( op) ( op->type & IS_LABEL) -#define ILB( op) ( op->type & IS_ILB) -#define MEM( op) ( op->type & IS_MEM) -#define ADDR( op) ( op->type & IS_ADDR) -#define EADDR( op) ( op->type & ( IS_ADDR | IS_MEM | IS_REG)) -#define CONST1( op) ( op->type & IS_DATA && strcmp( "1", op->expr) == 0) -#define MOVS( op) ( op->type & IS_LABEL&&strcmp("\"movs\"", op->lab) == 0) -#define IMMEDIATE( op) ( op->type & ( IS_DATA | IS_LABEL)) - -struct t_operand { - unsigned type; - int reg; - char *expr, *lab, *off; - }; - -extern struct t_operand saved_op, *AX_oper; -\fR -.DE -.nr PS 12 -.nr VS 14 -.LP -.nr PS 10 -.nr VS 12 -.DS L -.ft CW - -/* Some functions in as.c. */ - -#include "arg_type.h" -#include "as.h" - -#define last( s) ( s + strlen( s) - 1) -#define LEFT '(' -#define RIGHT ')' -#define DOLLAR '$' - -process_operand( str, op) -char *str; -struct t_operand *op; - -/* expr -> IS_DATA en IS_LABEL - * reg -> IS_REG en IS_ACCU - * (expr) -> IS_ADDR - * expr(reg) -> IS_MEM - */ -{ - char *ptr, *index(); - - op->type = UNKNOWN; - if ( *last( str) == RIGHT) { - ptr = index( str, LEFT); - *last( str) = '\0'; - *ptr = '\0'; - if ( is_reg( ptr+1, op)) { - op->type = IS_MEM; - op->expr = ( *str == '\0' ? "0" : str); - } - else { - set_label( ptr+1, op); - op->type = IS_ADDR; - } - } - else - if ( is_reg( str, op)) - op->type = IS_REG; - else { - if ( contains_label( str)) - set_label( str, op); - else { - op->type = IS_DATA; - op->expr = str; - } - } -} - -/*********************************************************************/ - -mod_RM( reg, op) -int reg; -struct t_operand *op; - -/* This function helps to decode operands in machine format. - * Note the $-operators - */ -{ - if ( REG( op)) - R233( 0x3, reg, op->reg); - else if ( ADDR( op)) { - R233( 0x0, reg, 0x6); - @reloc2( %$(op->lab), %$(op->off), ABSOLUTE); - } - else if ( strcmp( op->expr, "0") == 0) - switch( op->reg) { - case SI : R233( 0x0, reg, 0x4); - break; - - case DI : R233( 0x0, reg, 0x5); - break; - - case BP : R233( 0x1, reg, 0x6); /* exception! */ - @text1( 0); - break; - - case BX : R233( 0x0, reg, 0x7); - break; - - default : fprint( STDERR, "Wrong index register %d\en", - op->reg); - } - else { - @if ( fit_byte( %$(op->expr))) - switch( op->reg) { - case SI : R233( 0x1, reg, 0x4); - break; - - case DI : R233( 0x1, reg, 0x5); - break; - - case BP : R233( 0x1, reg, 0x6); - break; - - case BX : R233( 0x1, reg, 0x7); - break; - - default : fprint( STDERR, "Wrong index register %d\en", - op->reg); - } - @text1( %$(op->expr)); - @else - switch( op->reg) { - case SI : R233( 0x2, reg, 0x4); - break; - - case DI : R233( 0x2, reg, 0x5); - break; - - case BP : R233( 0x2, reg, 0x6); - break; - - case BX : R233( 0x2, reg, 0x7); - break; - - default : fprint( STDERR, "Wrong index register %d\en", - op->reg); - } - @text2( %$(op->expr)); - @fi - } -} -\fR -.DE -.nr PS 12 -.nr VS 14 -.NH 2 -Generating assembly code -.PP -It is possible to generate assembly instead of object files (see section 5), in -which case there is no need to supply ``as_table'', ``as.h'', and ``as.c''. -This option is useful for debugging the EM_table. -.NH 1 -Building a code expander -.PP -This section describes how to generate a code expander in two phases. -In phase one, the EM_table is -written and assembly code is generated. If the assembly code is an actual -language, the EM_table can be tested by assembling and running the generated -code. -If an ad-hoc assembly language is used by the table writer, it is not possible -to test the EM_table, but the code generated is at least in readable form. -In the second phase, the as_table is written and object code is generated. -After the generated object code is fed into the loader, it can be tested. -.NH 2 -Phase one -.PP -The following is a list of instructions to make a -code expander that generates assembly instructions. -.IP \0\01: -Create a new directory. -.IP \0\02: -Create the ``EM_table'', ``mach.h'', and ``mach.c'' files; there is no need -for ``as_table'', ``as.h'', and ``as.c'' at this moment. -.IP \0\03: -type -.br -.ft CW -install_ceg -as -\fR -.br -install_ceg will create a Makefile and three directories : ceg, ce, and back. -Ceg will contain the program ceg; this program will be -used to turn ``EM_table'' into a set of C source files (in the ce directory), -one for each -EM-instruction. All these files will be compiled and put in a library called -\fBce.a\fR. -.br -The option -.ft CW --as\fR means that a \fBback\fR-library will be -generated (in the directory ``back'') that -supports the generation of assembly language. The library is named ``back.a''. -.IP \0\04: -Link a front end, ``ce.a'', and ``back.a'' together resulting in a compiler -that generates assembly code. -.LP -If the table writer has chosen an actual assembly language, the EM_table can be -tested (e.g., by running the compiler on the EM test set). If an error occurs, -change the EM_table and type -.IP -.br -.ft CW -update_ceg\fR \fBC_instr -\fR -.br -.LP -where \fBC_instr\fR stands for the name of the erroneous EM-instruction. -If the table writer has chosen an ad-hoc assembly language, he can at least -read the generated code and look for possible errors. If an error is found, -the same procedure as described above can be followed. -.NH 2 -Phase two -.PP -The next phase is to generate a \fBce\fR that produces relocatable object -code. -.IP \0\01: -Remove the ``ce'', ``ceg'', and ``back'' directories. -.IP \0\02: -Write the ``as_table'', ``as.h'', and ``as.c'' files. -.IP \0\03: -type -.sp -.ft CW -install_ceg -obj \fR -.sp -The option -.ft CW --obj\fR means that ``back.a'' will contain a library -for generating -ACK.OUT(5ACK) object files, see appendix B. -If the writer does not want to use the default ``back.a'', -the -.ft CW --obj\fR flag must omitted and a ``back.a'' should be supplied that -generates the generates object code in the desired format. -.IP \0\04: -Link a front end, ``ce.a'', and ``back.a'' together resulting in a compiler -that generates object code. -.LP -The as_table is ready to be tested. If an error occurs, adapt the table. -Then there are two ways to proceed: -.IP \0\01: -recompile the whole EM_table, -.sp -.ft CW -update_ceg ALL \fR -.sp -.IP \0\02: -recompile just the few EM-instructions that contained the error, -.sp -.ft CW -update_ceg \fBC_instr\fR -.sp -where \fBC_instr\fR is an erroneous EM-instruction. -This has to be done for every EM-instruction that contained the erroneous -assembly instruction. -.NH -Acknowledgements -.PP -We want to thank Henri Bal, Dick Grune, and Ceriel Jacobs for their -valuable suggestions and the critical reading of this paper. -.NH -References -.LP -.[ -$LIST$ -.] -.bp -.SH -Appendix A, \fRthe \fBback\fR-primitives -.PP -This appendix describes the routines available to generate relocatable -object code. If the default back.a is used, the object code is in -ACK.OUT(5ACK) format. -In de default back.a, the names defined here are remapped to more hidden names, -to avoid name conflicts with for instance names used in the front-end. This -remapping is done in an include-file, "back.h". -A user-implemented back.a should do the same thing. -.nr PS 10 -.nr VS 12 -.PP -.IP A1. -Text and data generation; with ONE_BYTE b; TWO_BYTES w; FOUR_BYTES l; arith n; -.VS +4 -.TS -tab(#); -l c lw(10c). -text1( b)#:#T{ -Put one byte in text-segment. -T} -text2( w)#:#T{ -Put word (two bytes) in text-segment, byte-order is defined by -BYTES_REVERSED in mach.h. -T} -text4( l)#:#T{ -Put long ( two words) in text-segment, word-order is defined by -WORDS_REVERSED in mach.h. -T} -# -con1( b)#:#T{ -Same for CON-segment. -T} -con2( w)#: -con4( l)#: -# -rom1( b)#:#T{ -Same for ROM-segment. -T} -rom2( w)#: -rom4( l)#: -# -gen1( b)#:#T{ -Same for the current segment, only to be used in the ``..icon'', ``..ucon'', etc. -pseudo EM-instructions. -T} -gen2( w)#: -gen4( l)#: -# -bss( n)#:#T{ -Put n bytes in bss-segment, value is BSS_INIT. -T} -common( n)#:#T{ -If there is a saved label, generate a "common" for it, of size -n. Otherwise, it is equivalent to bss(n). -(see also the save_label routine). -T} -.TE -.VS -4 -.IP A2. -Relocation; with char *s; arith o; int r; -.VS +4 -.TS -tab(#); -l c lw(10c). -reloc1( s, o, r)#:#T{ -Generates relocation-information for 1 byte in the current segment. -T} -##s\0:\0the string which must be relocated -##o\0:\0the offset in bytes from the string. -##T{ -r\0:\0relocation type. It can have the values ABSOLUTE or PC_REL. These -two constants are defined in the file ``back.h'' -T} -reloc2( s, o, r)#:#T{ -Generates relocation-information for 1 word in the -current segment. Byte-order according to BYTES_REVERSED in mach.h. -T} -reloc4( s, o, r)#:#T{ -Generates relocation-information for 1 long in the -current segment. Word-order according to WORDS_REVERSED in mach.h. -T} -.TE -.VS -4 -.IP A3. -Symbol table interaction; with int seg; char *s; -.VS +4 -.TS -tab(#); -l c lw(10c). -switch_segment( seg)#:#T{ -sets current segment to ``seg'', and does alignment if necessary. ``seg'' -can be one of the four constants defined in ``back.h'': SEGTXT, SEGROM, -SEGCON, SEGBSS. -T} -# -symbol_definition( s)#:#T{ -Define s in symbol-table. -T} -set_local_visible( s)#:#T{ -Record scope-information in symbol table. -T} -set_global_visible( s)#:#T{ -Record scope-information in symbol table. -T} -.TE -.VS -4 -.IP A4. -Start/end actions; with char *f; -.VS +4 -.TS -tab(#); -l c lw(10c). -open_back( f)#:#T{ -Directs output to file ``f'', if f is the null pointer output must be given on -standard output. -T} -close_back()#:#T{ -close output stream. -T} -init_back()#:#T{ -Only used with user-written back-library, gives the opportunity to initialize. -T} -end_back()#:#T{ -Only used with user-written back-library. -T} -.TE -.VS -4 -.IP A5. -Label generation routines; with int n; arith g; char *l; These routines all -return a "char *" to a static area, which is overwritten at each call. -.VS +4 -.TS -tab(#); -l c lw(10c). -extnd_pro( n)#:#T{ -Label set at the end of procedure \fIn\fP, to generate space for locals. -T} -extnd_start( n)#:#T{ -Label set at the beginning of procedure \fIn\fP, to jump back to after generating -space for locals. -T} -extnd_name( l)#:#T{ -Create a name for a procedure named \fIl\fP. -T} -extnd_dnam( l)#:#T{ -Create a name for an external variable named \fIl\fP. -T} -extnd_dlb( g)#:#T{ -Create a name for numeric data label \fIg\fP. -T} -extnd_ilb( l, n)#:#T{ -Create a name for instruction label \fIl\fP in procedure \fIn\fP. -T} -extnd_hol( n)#:#T{ -Create a name for HOL block number \fIn\fP. -T} -extnd_part( n)#:#T{ -Create a unique label for the C_insertpart mechanism. -T} -extnd_cont( n)#:#T{ -Create another unique label for the C_insertpart mechanism. -T} -extnd_main( n)#:#T{ -Create yet another unique label for the C_insertpart mechanism. -T} -.TE -.VS -4 -.IP A6. -Some miscellaneous routines, with char *l; -.VS +4 -.TS -tab(#); -l c lw(10c). -save_label( l)#:#T{ -Save label \fIl\fP. Unfortunately, in EM, when a label is encountered, -it is not yet -known in which segment it will end up. The save_label/dump_label mechanism -is there to solve this problem. -T} -dump_label()#:#T{ -If there is a label saved, force definition for it now. -T} -align_word()#:#T{ -Align to a word boundary, if the current segment is not a text segment. -T} -.TE -.VS -4 -.nr PS 12 -.nr VS 14 -.bp -.SH -Appendix B, description of ACK-a.out library -.PP -The object file produced by \fBce\fR is by default in ACK.OUT(5ACK) -format. The object file is made up of one header, followed by -four segment headers, followed by text, data, relocation information, -symbol table, and the string area. The object file is tuned for the ACK-LED, -so there are some special things done just before the object file is dumped. -First, four relocation records are added which contain the names of the four -segments. Second, all the local relocation is resolved. This is done by the -function do_relo(). If there is a record belonging to a local -name this address is relocated in the segment to which the record belongs. -Besides doing the local relocation, do_relo() changes the ``nami''-field -of the local relocation records. This field receives the index of one of the -four -relocation records belonging to a segment. After the local -relocation has been resolved the routine output_back() dumps the -ACK object file. -.LP -If a different a.out format is wanted, one can choose between three strategies: -.IP \ \1: -The most simple one is to use a conversion program, which converts the ACK -a.out format to the wanted a.out format. This program exists for all most -all machines on which ACK runs. However, -not all conversion programs can generate relocation information. -The disadvantage is that the compiler will become slower. -.IP \ \2: -A better solution is to change the functions output_back(), do_relo(), -open_back(), and close_back() in such a way -that they produce the wanted a.out format. This strategy saves a lot of I/O. -.IP \ \3: -If this still is not satisfactory, the -\fBback\fR-primitives can be adapted to produce the wanted a.out format. diff --git a/doc/ceg/proposal.tr b/doc/ceg/proposal.tr deleted file mode 100644 index 0063bb623..000000000 --- a/doc/ceg/proposal.tr +++ /dev/null @@ -1,284 +0,0 @@ -.TL - -Code Expander -.br -(proposal) - -.SH -Introduction -.LP -The \fBcode expander\fR, \fBce\fR, is a program that translates EM-code to -objectcode. The main goal is to translate very fast. \fBce\fR is an instance -of the EM_CODE(3L)-interface. During execution of \fBce\fR, \fBce\fR will build -in core a machine independent objectfile ( NEW A.OUT(5L)). With \fBcv\fR or -with routines supplied by the user the machine independent objectcode will -be converted to a machine dependent object code. \fBce\fR needs -information about the targetmachine (e.g. the opcode's). We divide the -information into two parts: -.IP -- The description in assembly instructions of EM-code instructions. -.IP -- The description in objectcode of assembly instructions. -.LP -With these two tables we can make a \fBcode expander generator\fR which -generates a \fBce\fR. It is possible to put the information in one table -but that will probably introduce (propable) more bugs in the table. So we -divide and conquer. With this approach it is also possible to generate -assembly code ( rather yhan objectcode), wich is useful for debugging. -There is of course a link between the two tables, the link -consist of a restriction on the assembly format. Every assembly -instruction must have the following format: -.sp - INSTR ::= LABEL : MNEMONIC [ OPERAND ( "," OPERAND)* ] -.sp -.LP -\fBCeg\fR uses the following algorithm: -.IP \0\0a) -The assembly table will be converted to a (C-)routine assemble(). -assemble() gets as argument a string, the assembler instruction, -and can use the MNEMONIC to execute the corresponding action in the -assembly table. -.IP \0\0b) -The routine assemble() can now be used to convert the EM-code table to -a set of C-routines, wich together form an instance of the -EM_CODE(3L). -.SH -The EM-instruction table -.LP -We use the following grammar: -.sp -.TS -center box ; -l. -TABLE ::= (ROW)* -ROW ::= C_instr ( SPECIAL | SIMPLE) -SPECIAL ::= ( CONDITION SIMPLE)+ 'default' SIMPLE -SIMPLE ::= '==>' ACTIONLIST | '::=' ACTIONLIST -ACTIONLIST ::= [ ACTION ( ';' ACTION)* ] '.' -ACTION ::= function-call | assembly-instruction -.TE -.LP -An example for the 8086: -.LP -.DS -C_lxl - $arg1 == 0 ==> "push bp". - $arg1 == 1 ==> "push EM_BSIZE(bp)". - default ==> "mov cx, $arg1"; - "mov si, bp"; - "1: mov si, EM_BSIZE(si); - "loop 1b" - "push si". -.DE -.sp -Some remarks: -.sp -* The C_instr is a function indentifier in the EM_CODE(3L)-interface. -.LP -* CONDITION is a "boolean" C-expression. -.LP -* The arguments of an EM-instruction can be used in CONDITION and in assembly -instructions. They are referred by $arg\fIi\fR. \fBceg\fR modifies the -arguments as follows: -.IP \0\0- -For local variables at positive offsets it increases this offset by EM_BSIZE -.IP \0\0- -It makes names en labels unique. The user must supply the formats (see mach.h). -.LP -* function-call is allowed to implement e.g. push/pop optimization. -For example: -.LP -.DS -C_adi - $arg1 == 2 ==> combine( "pop ax"); - combine( "pop bx"); - "add ax, bx"; - save( "push ax"). - default ==> arg_error( "C_adi", $arg1). -.DE -.LP -* The C-functions called in the EM-instructions table have to use the routine -assemble()/gen?(). "assembler-instr" is in fact assemble( "assembler-instr"). -.LP -* \fBceg\fR takes care not only about the conversions of arguments but also -about -changes between segments. There are situation when one doesn't want -conversion of arguments. This can be done by using ::= in stead of ==>. -This is usefull when two C_instr are equivalent. For example: -.IP -C_slu ::= C_sli( $arg1) -.LP -* There are EM-CODE instructions wich are machine independent (e.g. C_open()). -For these EM_CODE instructions \fBceg\fR will generate \fIdefault\fR- -instructions. There is one exception: in the case of C_pro() the tablewriter -has to supply a function prolog(). -.LP -* Also the EM-pseudoinstructions C_bss_\fIcstp\fR(), C_hol_\fIcstp\fR(), -C_con_\fIcstp\fR() and C_rom_\fIcstp\fR can be translated automaticly. -\fBceg\fR only has to know how to interpretate string-constants: -.DS -\&..icon $arg2 == 1 ==> gen1( (char) atoi( $arg1)) - $arg2 == 2 ==> gen2( atoi( $arg1)) - $arg2 == 4 ==> gen4( atol( $arg1)) -\&..ucon $arg2 == 1 ==> gen1( (char) atoi( $arg1)) - $arg2 == 2 ==> gen2( atoi( $arg1)) - $arg2 == 4 ==> gen4( atol( $arg1)) -\&..fcon ::= not_implemented( "..fcon") -.DE -.LP -* Still, life can be made easier for the tablewriter; For the routines wich -he/she didn't implement \fBceg\fR will generate a default instruction wich -generates an error-message. \fBceg\fR seems to generate : -.IP -C_xxx ::= not_implemented( "C_xxx") -.SH -The assembly table -.LP -How to map assembly on objectcode. -.LP -Each row in the table consists of two fields, one field for the assembly -instruction, the other field for the corresponding objectcode. The tablewriter -can use the following primitives to generate code for the machine -instructions : -.IP "\0\0gen1( b)\0\0:" 17 -generates one byte in de machine independent objectfile. -.IP "\0\0gen2( w)\0\0:" 17 -generates one word ( = two bytes), the table writer can change the byte -order by setting the flag BYTES_REVERSED. -.IP "\0\0gen4( l)\0\0:" 17 -generates two words ( = four bytes), the table writer can change the word -order by setting the flag WORDS_REVERSED. -.IP "\0\0reloc( n, o, r)\0\0:" 17 -generates relocation information for a label ( = name + offset + -relocationtype). -.LP -Besides these primitives the table writer may use his self written -C-functions. This allows the table writer e.g. to write functions to set -bitfields within a byte. -.LP -There are more or less two methods to encode the assembly instructions: -.IP \0\0a) -MNEMONIC and OPERAND('s) are encoded independently of each other. This can be -done when the target machine has an orthogonal instruction set (e.g. pdp-11). -.IP \0\0b) -MNEMONIC and OPERAND('s) together determine the opcode. In this case the -assembler often uses overloading: one MNEMONIC is used for several -different machine-instructions. For example : (8086) -.br - mov ax, bx -.br - mov ax, variable -.br -These instructions have different opcodes. -.LP -As the transformation MNEMONIC-OPCODE is not one to -one the table writer must be allowed to put restrictions on the operands. -This can be done with type declarations. For example: -.LP -.DS - mov dst:REG, src:MEM ==> - gen1( 0x8b); - modRM( op2.reg, op1); -.DE -.DS - mov dst:REG, src:REG ==> - gen1( 0x89); - modRM( op2.reg, op1); -.DE -.LP -modRM() is a function written by the tablewriter and is used to encode -the operands. This frees the table writer of endless typing. -.LP -The table writer has to do the "typechecking" by himself. But typechecking -is almost the same as operand decoding. So it's more efficient to do this -in one function. We now have all the tools to describe the function -assemble(). -.IP -assemble() first calls the function -decode_operand() ( by the table writer written), with two arguments: a -string ( the operand) and a -pointer to a struct. The struct is declared by the table writer and must -consist of at least a field called type. ( the other fields in the struct can -be used to remember information about the decoded operand.) Now assemble() -fires a row wich is selected by mapping the MNEMONIC and the type of the -operands. -.br -In the second field of a row there may be references to other -fields in the struct (e.g. op2.reg in the example above). -.LP -We ignored one problem. It's possible when the operands are encoded, that -not everything is known. For example $arg\fIi\fR arguments in the -EM-instruction table get their value at runtime. This problem is solved by -introducing a function eval(). eval() has a string as argument and returns -an arith. The string consists of constants and/or $arg\fIi\fR's and the value -returned by eval() is the value of the string. To encode the $arg\fIi\fR's -in as few bytes as possible the table writer can use the statements %if, -%else and %endif. They can be used in the same manner as #if, #else and -#endif in C and result in a runtime test. An example : -.LP -.DS - -- Some rows of the assembly table - - mov dst:REG, src:DATA ==> - %if sfit( eval( src), 8) /* does the immediate-data fit in 1 byte? */ - R53( 0x16 , op1.reg); - gen1( eval( src)); - %else - R53( 0x17 , op1.reg); - gen2( eval( src)); - %endif -.LD - - mov dst:REG, src:REG ==> - gen1( 0x8b); - modRM( op1.reg, op2); - -.DE -.DS - -- The corresponding part in the function assemble() : - - case MNEM_mov : - decode_operand( arg1, &op1); - decode_operand( arg2, &op2); - if ( REG( op1.type) && DATA( op2.type)) { - printf( "if ( sfit( %s, 8)) {\\\\n", eval( src)); - R53( 0x16 , op1.reg); - printf( "gen1( %s)\\\\n", eval( arg2)); - printf( "}\\\\nelse {\\\\n"); - R53( 0x17 , op1.reg); - printf( "gen2( %s)\\\\n", eval( arg2)); - printf( "}\\\\n"); - } - else if ( REG( op1.type) && REG( op2.type)) { - gen1( 0x8b); - modRM( op1.reg, op2); - } - - -.DE -.DS - -- Some rows of the right part of the EM-instruction table are translated - -- in the following C-functions. - - "mov ax, $arg1" ==> - if ( sfit( w, 8)) { /* w is the actual argument of C_xxx( w) */ - gen1( 176); /* R53() */ - gen1( w); - } - else { - gen1( 184); - gen2( w); - } -.LD - - "mov ax, bx" ==> - gen1( 138); - gen1( 99); /* modRM() */ -.DE -.SH -Restrictions -.LP -.IP \0\01) -The EM-instructions C_exc() is not implemented. -.IP \0\03) -All messages are ignored. diff --git a/doc/ceg/proto.make b/doc/ceg/proto.make deleted file mode 100644 index f1069b0a6..000000000 --- a/doc/ceg/proto.make +++ /dev/null @@ -1,12 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/ceg - -PIC=pic -TBL=tbl -REFER=refer - -$(TARGET_HOME)/doc/ceg.doc: $(SRC_DIR)/ceg.tr $(SRC_DIR)/ceg.ref - $(PIC) $(SRC_DIR)/ceg.tr | $(REFER) -e -p $(SRC_DIR)/ceg.ref | $(TBL) > $@ diff --git a/doc/ceg/prototype.tr b/doc/ceg/prototype.tr deleted file mode 100644 index c5c5d91bd..000000000 --- a/doc/ceg/prototype.tr +++ /dev/null @@ -1,276 +0,0 @@ -.TL -A prototype Code expander -.NH -Introduction -.PP -A program to be compiled with ACK is first fed into the preprocessor. -The output of the preprocessor goes into the appropiate front end, -whose job it is to produce EM. The EM code generated is -fed into the peephole optimizer, wich scans it with a window of few -instructions, replacing certain inefficient code sequences by better -ones. Following the peephole optimizer follows a backend wich produces -good assembly code. The assembly code goes into the assembler and the objectcode -then goes into the loader/linker, the final component in the pipeline. -.PP -For various applications this scheme is too slow. For example for testing -programs; In this case the program has to be translated fast and the -runtime of the objectcode may be slower. A solution is to build a code -expander ( \fBce\fR) wich translates EM code to objectcode. Of course this -has to -be done automaticly by a code expander generator, but to get some feeling -for the problem we started out to build prototypes. -We built two types of ce's. One wich tranlated EM to assembly, one -wich translated EM to objectcode. -.NH -EM to assembly -.PP -We made one for the 8086 and one for the vax4. These ce's are instances of the -EM_CODE(3L)-interface and produce for a single EM instruction a set -of assembly instruction wich are semantic equivalent. -We implemented in the 8086-ce push/pop-optimalization. -.NH -EM to objectcode -.PP -Instead of producing assembly code we tried to produce vax4-objectcode. -During execution of ce, ce builds in core a machine independent -objectfile ( NEW A.OUT(5L)) and just before dumping the tables this -objectfile is converted to a Berkly 4.2BSD a.out-file. We build two versions; -One with static memory allocation and one with dynamic memory allocation. -If the first one runs out of memory it will give an error message and stop, -the second one will allocate more memory and proceed with producing -objectcode. -.PP -The C-frontend calls the EM_CODE-interface. So after linking the frontend -and the ce we have a pipeline in a program saving a lot of i/o. -It is interesting to compare this C-compiler ( called fcemcom) with "cc -c". -fcemcom1 (the dynamic variant of fcemcom) is tuned in such a way, that -alloc() won't be called. -.NH 2 -Compile time -.PP -fac.c is a small program that produces n! ( see below). foo.c is small program -that loops a lot. -.TS -center, box, tab(:); -c | c | c | c | c | c -c | c | n | n | n | n. -compiler : program : real : user : sys : object size -= -fcemcom : sort.c : 31.0 : 17.5 : 1.8 : 23824 -fcemcom1 : : 59.0 : 21.2 : 3.3 : -cc -c : : 50.0 : 38.0 : 3.5 : 6788 -_ -fcemcom : ed.c : 37.0 : 23.6 : 2.3 : 41744 -fcemcom1 : : 1.16.0 : 28.3 : 4.6 : -cc -c : : 1.19.0 : 54.8 : 4.3 : 11108 -_ -fcemcom : cp.c : 4.0 : 2.4 : 0.8 : 4652 -fcemcom1 : : 9.0 : 3.0 : 1.0 : -cc -c : : 8.0 : 5.2 : 1.6 : 1048 -_ -fcemcom : uniq.c : 5.0 : 2.5 : 0.8 : 5568 -fcemcom1 : : 9.0 : 2.9 : 0.8 : -cc -c : : 13.0 : 5.4 : 2.0 : 3008 -_ -fcemcom : btlgrep.c : 24.0 : 7.2 : 1.4 : 12968 -fcemcom1 : : 23.0 : 8.1 : 1.2 : -cc -c : : 1.20.0 : 15.3 : 3.8 : 2392 -_ -fcemcom : fac.c : 1.0 : 0.1 : 0.5 : 216 -fecmcom1 : : 2.0 : 0.2 : 0.5 : -cc -c : : 3.0 : 0.7 : 1.3 : 92 -_ -fcemcom : foo.c : 4.0 : 0.2 : 0.5 : 272 -fcemcom1 : : 11.0 : 0.3 : 0.5 : -cc -c : : 7.0 : 0.8 : 1.6 : 108 -.TE -.NH 2 -Run time -.LP -Is the runtime very bad? -.TS -tab(:), box, center; -c | c | c | c | c -c | c | n | n | n. -compiler : program : real : user : system -= -fcem : sort.c : 22.0 : 17.5 : 1.5 -cc : : 5.0 : 2.4 : 1.1 -_ -fcem : btlgrep.c : 1.58.0 : 27.2 : 4.2 -cc : : 12.0 : 3.6 : 1.1 -_ -fcem : foo.c : 1.0 : 0.7 : 0.1 -cc : : 1.0 : 0.4 : 0.1 -_ -fcem : uniq.c : 2.0 : 0.5 : 0.3 -cc : : 1.0 : 0.1 : 0.2 -.TE -.NH 2 -quality object code -.LP -The runtime is very bad so its interesting to have look at the code which is -produced by fcemcom and by cc -c. I took a program which computes recursively -n!. -.DS -long fac(); - -main() -{ - int n; - - scanf( "%D", &n); - printf( "fac is %D\\\\n", fac( n)); -} - -long fac( n) -int n; -{ - if ( n == 0) - return( 1); - else - return( n * fac( n-1)); -} -.DE -.br -.br -.br -.br -.LP -"cc -c fac.c" produces : -.DS -fac: tstl 4(ap) - bnequ 7f - movl $1, r0 - ret -7f: subl3 $1, 4(ap), r0 - pushl r0 - call $1, fac - movl r0, -4(fp) - mull3 -4(fp), 4(ap), r0 - ret -.DE -.br -.br -.LP -"fcem fac.c fac.o" produces : -.DS -_fac: 0 -42: jmp be -48: pushl 4(ap) -4e: pushl $0 -54: subl2 (sp)+,(sp) -57: tstl (sp)+ -59: bnequ 61 -5b: jmp 67 -61: jmp 79 -67: pushl $1 -6d: jmp ba -73: jmp b9 -79: pushl 4(ap) -7f: pushl $1 -85: subl2 (sp)+,(sp) -88: calls $0,_fac -8f: addl2 $4,sp -96: pushl r0 -98: pushl 4(ap) -9e: pushl $4 -a4: pushl $4 -aa: jsb .cii -b0: mull2 (sp)+,(sp) -b3: jmp ba -b9: ret -ba: movl (sp)+,r0 -bd: ret -be: jmp 48 -.DE -.NH 1 -Conclusions -.PP -comparing "cc -c" with "fcemcom" -.LP -.TS -center, box, tab(:); -c | c s | c | c s -^ | c s | ^ | c s -^ | c | c | ^ | c | c -l | n | n | n | n | n. -program : compile time : object size : runtime -:_::_ -: user : sys :: user : sys -= -sort.c : 0.47 : 0.5 : 3.5 : 7.3 : 1.4 -_ -ed.c : 0.46 : 0.5 : 3.8 : : : -_ -cp.c : 0.46 : 0.5 : 4.4 : : : -_ -uniq.c : 0.46 : 0.4 : 1.8 : : : -_ -btlgrep.c : 0.47 : 0.3 : 5.4 : 7.5 : 3.8 -_ -fac.c : 0.14 : 0.4 : 2.3 : 1.8 : 1.0 -_ -foo.c : 0.25 : 0.3 : 2.5 : 5.0 : 1.5 -.TE -.PP -The results for fcemcom1 are almost identical; The only thing that changes -is that fcemcom1 is 1.2 slower than fcemcom. ( compile time) This is due to -to an another datastructure . In the static version we use huge array's for -the text- and -data-segment, the relocation information, the symboltable and stringarea. -In the dynamic version we use linked lists, wich makes it expensive to get -and to put a byte on a abritrary memory location. So it is probably better -to use realloc(), because in the most cases there will be enough memory. -.PP -The quality of the objectcode is very bad. The reason is that the frontend -generates bad code and expects the peephole-optimizer to improve the code. -This is also one of the main reasons that the runtime is very bad. -(e.g. the expensive "cii" with arguments 4 and 4 could be deleted.) -So its seems a good -idea to put a new peephole-optimizer between the frontend and the ce. -.PP -Using the peephole optimizer the ce would produce : -.DS -_fac: 0 - pushl 4(ap) - tstl (sp)+ - beqlu 1f - jmp 3f - 1 : pushl $1 - jmp 2f - 3 : pushl 4(ap) - decl (sp) - calls $0,_fac - addl2 $4,sp - pushl r0 - pushl 4(ap) - mull2 (sp)+,(sp) - movl (sp)+,r0 - 2 : ret -.DE -.PP -Bruce McKenzy already implemented it and made some improvements in the -source code of the ce. The compile-time is two to two and a half times better -and the -size of the objectcode is two to three times bigger.(comparing with "cc -c") -Still we could do better. -.PP -Using peephole- and push/pop-optimization ce could produce : -.DS -_fac: 0 - tstl 4(ap) - beqlu 1f - jmp 2f - 1 : pushl $1 - jmp 3f - 2 : decl 4(ap) - calls $0,_fac - addl2 $4,sp - mull3 4(ap), r0, -(sp) - movl (sp)+, r0 - 3 : ret -.DE -.PP -prof doesn't cooperate, so no profile information. -.PP diff --git a/doc/cg.doc b/doc/cg.doc deleted file mode 100644 index c1cece750..000000000 --- a/doc/cg.doc +++ /dev/null @@ -1,1864 +0,0 @@ -.\" $Id$ -.RP -.ND Nov 1984 -.TL -The table driven code generator from -.br -the Amsterdam Compiler Kit -.AU -Hans van Staveren -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.AB -It is possible to automate the process of compiler building -to a great extent using collections of tools. -The Amsterdam Compiler Kit is such a collection of tools. -This document provides a description of the internal workings -of the table driven code generator in the Amsterdam Compiler Kit, -and a description of syntax and semantics of the driving table. -.PP ->>> NOTE <<< -.br -This document pertains to the \fBold\fP code generator. Refer to the -"Second Revised Edition" for the new code generator. -.AE -.NH 1 -Introduction -.PP -Part of the Amsterdam Compiler Kit is a code generator system consisting -of a code generator generator (\fIcgg\fP for short) and some machine -independent C code. -.I Cgg -reads a machine description table and creates two files, -tables.h and tables.c. -These are then used together with other C code to produce -a code generator for the machine at hand. -.PP -This in turn reads compact EM code and produces -assembly code. -The remainder of this document will first broadly describe -the working of the code generator, -then a description of the machine table follows after which -the internal workings of the code generator will be explained. -.PP -The reader is assumed to have at least a vague notion about the -semantics of the intermediary EM code. -Someone wishing to write a table for a new machine -should be thoroughly acquainted with EM code -and the assembly code of the machine at hand. -.NH 1 -Global overview of the workings of the code generator. -.PP -The code generator or -.I cg -tries to generate good code by simulating the runtime stack -of the program compiled and delaying emission of code as long -as possible. -It also keeps track of register contents, which enables it to -eliminate redundant moves, and tries to eliminate redundant tests -by keeping information about condition code status, -if applicable for the machine. -.PP -.I Cg -maintains a `fakestack' containing `tokens' that are built -by executing the pseudo code contained in the code rules given -by the table writer. -One can think of the fakestack as a logical extension of the real -stack the program compiled will have when run. -During code generation tokens will be kept on the fakestack as long -as possible but when they are moved to the real stack, -by generating code for the push, -all tokens above\u*\d -.FS -* in the rest of this document the stack is assumed to grow downwards, -although the top of the stack will mean the first element that will -be popped. -.FE -the tokens pushed will be pushed also, -so that the fakestack will not contain holes. -.PP -The main loop of -.I cg -is this: -.IP 1) -find a pattern of EM instructions starting at the current one to -generate code for. -This pattern will usually be of length one but longer patterns can be used. -.IP 2) -Select one of the possibly many stack patterns that go with this -EM pattern on the basis of heuristics and/or lookahead. -.IP 3) -Force the current fakestack contents to match the pattern. -This may involve -copying tokens to registers, making dummy transformations, e.g. to -transform a "local" into an "register offsetted" or might even -cause to have the complete fakestack contents put to the real stack -and then back into registers if no suitable transformations -were provided by the table writer. -.IP 4) -Execute the pseudocode associated with the code rule just selected, -this may cause registers to be allocated, -code to be emitted etc.. -.IP 5) -Put tokens onto the fakestack to reflect the result of the operation. -.IP 6) -Insert some EM instructions into the stream, -this is possible but not common. -.IP 7) -Account for the cost. -The cost is kept in a (space, time) vector and lookahead decisions -are based on a linear combination of these. -.PP -The table that drives -.I cg -is not read in every time, -but instead is used at compiletime -of -.I cg -to set parameters and to load pseudocode tables. -A program called -.I cgg -reads the table and produces large lists of numbers that are -compiled together with machine independent code to produce -a code generator for the machine at hand. -.NH 1 -Description of the machine table -.PP -The machine description table consists of the following sections: -.IP 1) -Constant definitions -.IP 2) -Register definitions -.IP 3) -Token definitions -.IP 4) -Token expression definitions -.IP 5) -Code rules -.IP 6) -Move definitions -.IP 7) -Test definitions -.IP 8) -Stacking definitions -.PP -Input is in free format, white space and newlines may be used -at will to improve legibility. -Identifiers used in the table have the same syntax as C identifiers, -upper and lower case considered different, all characters significant. -There is however one exception: -identifiers must be more than one character long for parsing reasons. -C style comments are accepted -.DS - /* this is a comment */ -.DE -and #define macros may be used if the need arises. -.NH 2 -Some constants -.PP -Before anything else three constants must be defined, -all with the syntax NAME=value, value being an integer. -These constants are: -.IP EM_WSIZE 10 -Number of bytes in a machine word. -This is the number of bytes -a simple \fBloc\fP instruction will put on the stack. -.IP EM_PSIZE -Number of bytes in a pointer. -This is the number of bytes -a \fBlal\fP instruction will put on the stack. -.IP EM_BSIZE -Number of bytes in the hole between AB and LB. -If the calling sequence just saves PC and LB this -size will be twice the pointersize. -.PP -EM_WSIZE and EM_PSIZE are checked when a program is compiled -with the resulting code generator. -EM_BSIZE is used by -.I cg -to add to the offset of instructions dealing with locals -having positive offsets, -i.e. parameters. -.PP -Optionally one can give here the factors with which the size and time -parts of the cost function have to be multiplied to ensure they have the -same order of magnitude. -This can be done as -.DS -TIMEFACTOR = C\d1\u/C\d2\u -SIZEFACTOR = C\d3\u/C\d4\u -.DE -Above numbers must be read as rational numbers. -Defaults are 1/1 for both of them. -These constants set the default size/time tradeoff in the code generator, -so if TIMEFACTOR and SIZEFACTOR are both 1 the code generator will choose -at random between two codesequences where one has -cost (10,4) and the other has cost (8,6). -See also the description of the cost field below. -.PP -Also optional is the definition of a printformat for integers in the codefile. -This is given as -.DS -FORMAT = string -.DE -The default for string is "%ld". -For example on the PDP 11 one can use -.DS -FORMAT= "0%lo" -.DE -to satisfy the old UNIX assembler that reads octal unless followed by -a period, and the ACK assembler that follows C conventions. -.NH 2 -Register definition -.PP -The next part of the tables describes the various registers of the -machine and defines identifiers -to be used in later parts of the tables. -Example for the PDP-11: -.DS L -REGISTERS: -R0 = ( "r0",2), REG. -R1 = ( "r1",2), REG, ODDREG. -R2 = ( "r2",2), REG. -R3 = ( "r3",2), REG, ODDREG. -R4 = ( "r4",2), REG. -LB = ( "r5",2), LOCALBASE. -R01= ( "r0",4,R0,R1), REGPAIR. -R23= ( "r2",4,R2,R3), REGPAIR. -FR0= ( "r0",4), FREG. -FR1= ( "r1",4), FREG. -FR2= ( "r2",4), FREG. -FR3= ( "r3",4), FREG. -DR0= ( "r0",8,FR0), DREG. -DR1= ( "r1",8,FR1), DREG. -DR2= ( "r2",8,FR2), DREG. -DR3= ( "r3",8,FR3), DREG. -.DE -.PP -The identifier before the '=' sign is the name of the register -as used further on in the table. -The string is the name of the register as far as the assembler is concerned. -The number is the size of the register in bytes. -Identifiers following the number but within the parentheses are previously -defined registernames that are contained in the register being defined. -The identifiers following the closing parenthesis are properties -of the register. -So for example R23 is a register with assembler name r2, 4 bytes long, -contains the registers R2 and R3 and has the property REGPAIR. -.PP -It might seem wise to list each and every property of a register, -so one might give R0 the extra property MFPTREG named after the not -too well known MFPT instruction on newer PDP-11 types, -but this is not a good idea. -Every extra property means the registerset is more unorthogonal -and -.I cg -execution time is influenced by that, -because it has to take into account a larger set of registers -that are not equivalent. -.PP -There is a predefined property SCRATCH that is dynamic, -i.e. a register can have the property SCRATCH one time, -and loose it the next. -A register has the property SCRATCH when it has a reference count of one. -One needs to be able to discriminate between SCRATCH registers -and others, -because it is only allowed to do arithmetic on -SCRATCH registers. -.NH 2 -Stack token definition -.PP -The next part describes all possible tokens that can reside on -the fakestack during code generation. -Attributes of a token are described in the form of a C struct declaration, -this is followed by the size in bytes of the token, -optionally followed by the cost of the token when used as an addressing mode -and the format -to be used on output. -.PP -Tokens should usually be declared for every addressing mode -of the machine at hand and for every size directly usable in -a machine instruction. -Example for the PDP-11 (incomplete): -.DS L -TOKENS: -IREG2 = { REGISTER reg; } 2 "*%[reg]" /* indirect register */ -REGCONST = { REGISTER reg; STRING off; } 2 /* not really addressable */ -REGOFF2 = { REGISTER reg; STRING off; } 2 "%[off](%[reg])" -IREGOFF2 = { REGISTER reg; STRING off; } 2 "*%[off](%[reg])" -CONST = { INT off; } 2 cost=(2,850) "$%[off]." -EXTERN2 = { STRING off; } 2 "%[off]" -IEXTERN2 = { STRING off; } 2 "*%[off]" -PAIRSIGNED = { REGISTER regeven,regodd; } 2 "%[regeven]" -.DE -.PP -Types allowed in the struct are REGISTER, INT and STRING. -Tokens without a printformat should never be output. -.PP -Notice that tokens need not correspond to addressing modes, -the REGCONST token listed above, -meaning the sum of the contents of the register and the constant, -has no corresponding addressing mode on the PDP-11, -but is included so that a sequence of add constant, load indirect, -can be handled efficiently. -This REGCONST token is needed as part of the path -.DS -REGISTER -> REGCONST -> REGOFF -.DE -of which the first and the last "exist" and the middle is needed -only as an intermediate step. -.NH 2 -Token expressions -.PP -Usually machines have certain collections of addressing modes that -can be used with certain instructions. -The stack patterns in the table are lists of these collections -and since it is cumbersome to write out these long lists -every time, there is a section here to give names to these -collections. -Please note that it is not forbidden to write out a token expression -in the remainder of the table, -but for clarity it is usually better not to. -Example for the PDP-11 (incomplete): -.DS L -TOKENEXPRESSIONS: -SOURCE2 = REG + IREG2 + REGOFF2 + IREGOFF2 + CONST + EXTERN2 + - IEXTERN2 -SREG = REG * SCRATCH -.DE -Permissible in the expressions are all PASCAL set operators, i.e. -.IP + -set union -.IP - -set difference -.IP * -set intersection -.PP -Every tokenidentifier is also a token expression identifier -denoting the singleton collection of tokens containing -just itself. -Every register property as defined above is also a token expression -matching all registers with that property when on the fakestack. -The standard token expression identifier ALL denotes the collection of -all tokens. -.NH 2 -Expressions -.PP -Throughout the rest of the table expressions can be used in some -places. -This section will give the syntax and semantics of expressions. -There are four types of expressions: integer, string, register and undefined. -Type checking is performed by -.I cgg . -An operator with at least one undefined operand returns undefined except -for the defined() function mentioned below. -An undefined expression is interpreted as FALSE when it is needed -as a truth value. -Basic terms in an expression are -.IP number 16 -A number is a constant of type integer. -.IP "string" -A string within double quotes is a constant of type string. -All the normal C style escapes may be used within the string. -.IP REGIDENT -The name of a register is a constant of type register. -.IP $\fIi\fP -A dollarsign followed by a number is the representation of the argument -of EM instruction \fI\fP. -The type of the operand is dependent on the instruction, -sometimes it is integer, -sometimes it is string. -It is undefined when the instruction has no operand. -.br -Although an exhaustive list could be given describing all the types -the following rule of thumb will suffice. -If it is unimaginable for the operand of the instruction ever to be -something different from a plain integer, the type is integer, -otherwise it is string. -.br -.I Cg -makes all necessary conversions, -like adding EM_BSIZE to positive arguments of instructions -dealing with locals, -prepending underlines to global names, -converting codelabels into a unique representation etc. -Details about this can be found in the section about -machine dependent C code. -.IP %[1] -This in general means the token mentioned first in the -stack pattern. -When used inside an expression the token must be a simple register. -Type of this is register. -.IP %[1.off] -This means field "off" of the first stack pattern token. -Type is the same as that of field "off". -To use this expression implies a check that all tokens -in the token expression used have the same attributes. -.IP %[1.1] -This is the first subregister of the first token. -Previous comments apply. -.IP %[b] -The second allocated register. -.IP %[a.2] -The second subregister of the first allocated register. -.PP -All normal C operators apply to integers, -the + operator serves for string concatenation -and register expressions can only be compared to each other. -Furthermore there are some special "functions": -.IP tostring(e) 16 -Converts an integer expression e to a string. -.IP defined(e) -Returns 1 if expression e is defined, 0 otherwise. -.IP samesign(e1,e2) -Returns 1 if integer expression e1 and e2 have the same sign. -.IP sfit(e1,e2) -Returns 1 if integer expression e1 fits as a signed integer -into a field of e2 bits, 0 otherwise. -.IP ufit(e1,e2) -Same as above but now for unsigned e1. -.IP rom(a,n) -Integer expression giving the n'th argument from the \fBrom\fP descriptor -pointed at by the a'th EM instruction. -Undefined if that descriptor does not exist. -.IP loww(a) -Returns the lower half of the argument of the a'th EM instruction. -This is used to split the arguments of a \fBldc\fP instruction. -.IP highw(a) -Same for upper half. -.NH 2 -Code rules -.PP -The largest section of the tables consists of the code generation rules. -They specify EM patterns, stack patterns, code to be generated etc. -Syntax is -.DS L -code rule : EM pattern '|' stack pattern '|' code '|' - stack replacement '|' EM replacement '|' cost ; -.DE -All parts are optional, however there must be at least one pattern present. -If the empattern is missing the rule becomes a rewriting rule or -.I coercion -to be used when code generation cannot continue -because of an invalid stack pattern. -The code rules are preceded by the word -.DS -CODE: -.DE -The next paragraphs describe the various parts in detail. -.NH 3 -The EM pattern -.PP -The EM pattern consists of a list of EM mnemonics followed -by a boolean expression. -Examples: -.DS -\fBloe\fP -.DE -will match a single \fBloe\fP instruction, -.DS -\fBloc\fP \fBloc\fP \fBcif\fP $1==2 && $2==8 -.DE -is a pattern that will match -.DS -\fBloc\fP 2 -\fBloc\fP 8 -\fBcif\fP -.DE -and -.DS -\fBlol\fP \fBinc\fP \fBstl\fP $1==$3 -.DE -will match for example -.DS -.ta 10m 20m 30m 40m 50m 60m -\fBlol\fP 6 \fBlol\fP -2 \fBlol\fP 4 -\fBinc\fP \fBinc\fP but \fInot\fP \fBinc\fP -\fBstl\fP 6 \fBstl\fP -2 \fBstl\fP -4 -.DE -A missing boolean expression evaluates to TRUE. -.PP -When the EM pattern is the same as in the previous code rule the pattern -should be given as `...'. -The code generator will match the longest EM pattern on every occasion, -if two patterns of the same length match the first in the table will be chosen, -while all patterns of length greater than or equal to three are considered -to be of the same length. -.NH 3 -The stack pattern -.PP -The stack pattern is a list of token expressions, -usually token expression identifiers for clarity. -No boolean expression is allowed here. -The first expression is the one that matches the top of the stack. -.PP -The pattern can be followed by the word STACK -in which case the pattern only matches if there is nothing -else on the fakestack. -The code generator will stack everything not matched at the start -of the rule. -.PP -The pattern can be preceded with the word -.DS -nocoercions: -.DE -which tells the code generator not to try to coerce to the pattern -but only to use it when it is already there. -There are two reasons for this construction, -correctness and speed. -It is needed for correctness when the pattern contains a register -that is not transparent when data is moved through it. -.PP -Example: on the PDP-11 the shortest code for -.DS -\fBlae\fP a -\fBloi\fP 8 -\fBlae\fP b -\fBsti\fP 8 -.DE -is -.DS -movf _a,fr0 -movf fr0,_b -.DE -assuming that the floating point processor is in double -precision mode and fr0 is free. -Unfortunately this is not correct since a trap can occur on certain -kinds of data. -This could happen if there was a pattern for \fBsti\fP\ 8 that allowed -one to move a floating point register not preceded by nocoercions: . -The code generator would then find that moving the 8-byte global _a -to a floating point register and then storing it to _b was the cheapest, -assuming that the space/time knob was turned far enough to space. -It is unfortunate that the type information is no longer present, -since if _a really is a floating point number the move could be -made without error. -.PP -The second reason for the nocoercions: construct is speed. -When the code generator has a long list of possible stack patterns -for one EM pattern it can waste a lot of time trying to find coercions -to all of them, while the mere presence of such a long list -indicates that the table writer has given a lot of special cases. -In this case prepending all the special cases by nocoercions: -will stop the code generator from trying to find things there aren't. -.NH 3 -The code part -.PP -The code part consists of three parts, stack cleanup, register allocation -and code to generate. -All of these may be omitted. -.NH 4 -Stack cleanup -.PP -The stack cleanup part describes certain stacktokens that should neither remain on -the fakestack, nor remembered as contents of registers. -This is usually only required with store operations. -The entire fakestack, except for the part matched in the stack pattern, -is searched for tokens matching the expression and they are copied -to the real stack. -Every register that contains the stacktoken is marked as empty. -.PP -Syntax is -.DS -remove(token expression) \fIor\fP -remove(token expression, boolean expression) -.DE -Example: -.DS -remove(REGOFF2,%[reg] != LB || %[off] == $1) -.DE -is part of a remove() call for use in the \fBstl\fP code rule. -It removes all register offsetted tokens where the register is not the -localbase plus the local wherein the store is done. -The necessity for this can be seen from the following example: -.DS -\fBlol\fP 4 -\fBinl\fP 4 -\fBstl\fP 6 -.DE -Without a proper remove() call in the rule for \fBinl\fP code would -be generated as here -.DS -inc 4(r5) -mov 4(r5),6(r5) -.DE -so local 6 would be given the new value of local 4 instead of the old -as the EM code prescribed. -.PP -When generating something like a branch instruction it -might be needed to empty the fakestack completely. -This can of course be done with -.DS -remove(ALL) -.DE -.NH 4 -Register allocation -.PP -The register allocation part describes the kind of registers needed. -Syntax for allocate() is -.DS -allocate(itemlist) -.DE -where itemlist is a list of three kinds of things: -.IP 1) -a tokendescription, for example %[1]. -.br -This will instruct the code generator to temporarily decrement the reference count -of all registers contained in the token, -so that they are available for allocation in this allocate() call -if they were only used in that token. -See example below. -.IP 2) -a register property. -.br -This will allocate a register with that property. -The register will be marked as empty at this point. -Lookahead will be performed if necessary. -.IP 3) -a register property with initialization. -.br -This will allocate the register as in 2) but will also -initialize it. -This eases the task of the code generator because it can -find a register already filled with the right value -if it exists. -.PP -Examples: -.DS -allocate(OREG) -.DE -will allocate an odd register, while -.DS -allocate(REG={REGOFF2,LB,$1}) -.DE -will allocate a register while simultaneously filling it with -the asked value. -.br -Inside the coercion from SOURCE2 to REGISTER in the PDP-11 table -the following allocate() can be found. -.DS -allocate(%[1],REG=%[1]) -.DE -This tells the code generator that registers contained in %[1] can be used -again and asks to fill the register allocated with %[1]. -So if %[1]={REGOFF2,R3,"4"} and R3 has a reference count of 1 -the following code might be generated. -.DS -mov 4(r3),r3 -.DE -In the rest of the line the registers allocated can be named by -%[a] and %[b.1],%[b.2], i.e. with lower case letters -in order of allocation. -.PP -Warning: -.DS -allocate(R3) -.DE -is \fRnot\fP the way to allocate R3. -R3 is not a register property, so it will be seen as a token description -and the effect is that R3 will have its reference count decremented. -.NH 4 -Code -.PP -Code to be generated is specified as a list of items of the following kind: -.IP 1) -a string in double quotes ("This is a string"). -.br -This is copied to the codefile and a newline ( \en ) is appended. -Inside the string all normal C string conventions are allowed, -and substitutions can be made of the following sorts. -.RS -.IP a) -$1, $2 etc. -These are the operands of the corresponding EM instructions -and are printed according to their type. -To put a real '$' inside the string it must be doubled ('$$'). -.IP b) -%[1], %[2.reg], %[b.1] etc. -These have their obvious meaning. -If they describe a complete token ( %[1] ) -the printformat for the token is used. -If they stand for a basic term in an expression -they will be printed according to their type. -To put a real '%' inside the string it must be doubled ('%%'). -.IP c) -%( arbitrary expression %). -This allows inclusion of arbitrary expressions inside strings. -Usually not needed very often, -so that the awkward notation is not too bad. -Note that %(%[1]%) is equivalent to %[1]. -.RE -.IP 2) -a move() call. -This has the following syntax: -.DS -move(token description, token description) -.DE -Moves are handled specially since that enables the code generator -to keep track of register contents. -Example: -.DS -move(R3,{REGOFF2,LB,$1}) -.DE -will generate code to move R3 to $1(r5) except when -R3 already was a copy of $1(r5). -Then the code will be omitted. -The rules describing how to move things to each other -can be found in the MOVES section described below. -.IP 3) -an erase() call. -This has the following syntax: -.DS -erase(register expression) -.DE -This tells the code generator that the register mentioned no longer has any -useful value. -This is -.I necessary -after code in the table has changed the contents of registers. -For example, after an add to a register the register must be erased, -because the contents do no longer match any token. -.IP 4) -For machines that have condition codes, -alas most of them do, -there are provisions to remember condition code setting -and prevent needless testing. -To set the condition code to a token put in the code the following call: -.DS -test(token) -.DE -where token can be all of the standard forms that can also be used in move(). -This will generate a test if the condition codes -were not already set to that token. -It is also possible to tell -.I cg -that a certain operation, like a preceding add -has set the condition codes to some token with the call -.DS -setcc(token) -.DE -So a sequence of a setcc and a test on the same token will generate -no code. -Another allowed call within the code is -.DS -samecc -.DE -which tells the code generator that condition codes were unaffected -in this rule. -If no setcc or samecc has been given the default is -.DS -nocc -.DE -when a piece of code contained strings, -which tells the code generator that the condition codes -have no useful value any more. -.NH 3 -Stack replacement -.PP -The stack replacement is a possibly empty list of items to be pushed onto -the fakestack. Three kinds of items are possible: -.IP 1) -An item of the form %[1]. This will push the stacktoken mentioned back -onto the stack unchanged. -.IP 2) -A register expression. This will push the register mentioned -onto the fakestack. -.IP 3) -An item of the form { REGOFF2,%[1.reg],$1 }. -This generates a token with tokenidentifier REGOFF2 and attributes -in order of declaration. -.PP -All tokens matched by the stack pattern at the beginning of the code rule -are first removed and their registers deallocated. -Items are pushed in the order of appearance. -This means that the last item will be on the top of the -stack after the push. -So if the stack pattern contained two token expressions -and they must be pushed back unchanged, -they have to be specified as stack replacement -.DS -%[2] %[1] -.DE -and not the other way around. -.NH 3 -EM replacement -.PP -In exceptional cases it might be useful to leave part of an empattern -undone. -For example, a \fBsdl\fP instruction might be split into two \fBstl\fP instructions -when there is no 4-byte quantity on the stack. The emreplacement part allows -one to express this. -Example: -.DS -\fBstl\fP $1 \fBstl\fP $1+2 -.DE -The instructions are inserted in the stream so that they can match -the first part of a pattern in the next step. -Note that since the code generator traverses the EM instructions in a strict -linear fashion, -it is impossible to let the EM replacement match later parts of a pattern. -So if there is a pattern -.DS -\fBloc\fP \fBstl\fP $1==0 -.DE -and the input is -.DS -\fBloc\fP 0 \fBsdl\fP 4 -.DE -the \fBloc\fP\ 0 will be processed first, -then the \fBsdl\fP might be split into two \fBstl\fP's but the pattern -cannot match now. -.NH 3 -Cost -.PP -The cost field can be specified when there is more than one -code rule with the same empattern. -If the code generator has a choice between two possibilities -to generate code it will choose the cheapest according to -the cost field. -The cost for a code generation is the sum of the costs -of all the coercions needed, plus the cost for freeing -registers plus the cost of the code rule itself. -.PP -The format of the costfield is -.DS -( nbytes, time ) or -( nbytes, time ) + %[\fIi\fP] -.DE -with time in the metric desired, like nanoseconds or states. -See constants section above. -The %[\fIi\fP] in the second example is used for adding the cost of a certain -address mode used in the code generated. -This can of course be repeated if desired. -The cost of the address mode must then be specified in the token definition -section. -.NH 3 -Examples -.PP -A list of examples for the PDP-11 is given here. -Far from being complete it gives examples of most kinds -of instructions. -.DS L -\fBadi\fP $1==2 | SREG,SOURCE2 | - "add %[2],%[1]" erase(%[1]) setcc(%[1]) - | %[1] | | (2,450) + %[2] -\&... | SOURCE2,SREG | - "add %[1],%[2]" erase(%[2]) setcc(%[2]) - | %[2] | | (2,450) + %[1] -.DE -is an example of the use of the `...' construct -and shows how to place erase() and setcc() calls. -.DS L - -\fBdvi\fP $1==2 | SOURCE2,SPAIRSIGNED | - "div %[1],%[2]" erase(%[2]) - | %[2.regeven] | | - -\fBcmi\fP \fBtgt\fP $1==2 | SOURCE2,SOURCE2 | allocate(REG={CONST,0}) - "cmp %[2],%[1];ble 1f;inc %[a];1:" erase(%[a]) - | %[a] | | - -\fBcal\fP | STACK | - "jsr pc,$1" - | | | - -\fBlol\fP | | | { REGOFF2, LB, $1 } | | - -\fBstl\fP | SOURCE2 | - remove(REGOFF2,%[off]==$1) - move(%[1],{REGOFF2,LB,$1}) - | | | - -| SOURCE2 | - allocate(%[1],REGPAIR) - move(%[1],%[a.2]) - test(%[a.2]) - "sxt %[a.even]" | { PAIRSIGNED, %[a.1], %[a.2] }| | -.DE -This coercion shows how to use the move and test calls. -At first one might think that the testcall is unnecessary, -since the move will have set the condition codes, -but the move may never have been executed -if the register already contained the value, -in which case it is necessary to do the test. -If the move was executed the test will be omitted. -.DS L -| SOURCE2 | allocate(%[1],REG=%[1]) | %[a] | | - -\fBsdl\fP | SOURCE2 | | %[1] | \fBstl\fP $1 \fBstl\fP $1+2 | - -\fBexg\fP $1==2 | SOURCE2 SOURCE2 | | %[1] %[2] | | -.DE -This last example again shows the difference in the order -of the stack pattern and the stack replacement. -.NH 2 -Move code rules -.PP -When issuing a move() call as described above or a register allocation -with initialization, the code generator has to know which -instruction to use for the move. -The code will of course only be generated if it cannot be omitted. -This is listed in the move section of the tables by giving a list -of tuples: -.DS -( source, destination, codepart [ , costfield ] ) -.DE -where the square brackets mean the costfield is optional. -Example for the PDP-11 -.DS -MOVES: -( CONST %[off]==0 , SOURCE2, "clr %[2]" ) -( SOURCE2, SOURCE2, "mov %[1],%[2]" ) -.DE -The moves are scanned from top to bottom, -so the first one that matches will be chosen. -.NH 2 -Test code rules -.PP -When issuing a test() call as described above, -the code generator has to know which instruction -to use for the test. -The code will only be generated if the condition codes -were not already set to the token. -This is listed in the test section of the tables by giving -a list of tuples: -.DS -( source, codepart [ , costfield ] ) -.DE -Example for the PDP-11 -.DS -TESTS: -( SOURCE2, "tst %[1]") -( DREG, "tstf %[1]\encfcc") -.DE -The tests are scanned from top to bottom, -so the first one that matches will be chosen. -.NH 2 -Stacking code rules. -.PP -When the code generator has to stack a token it must know -which code to use. -Since it must at all times be possible to empty the fakestack -even when no registers are free, -it is mandatory that all -tokens used must have a rule attached for stacking them -without using a scratch register. -Since however this might be clumsy and -a register might in practice be available -it is also possible to give rules -which use a register. -On the Intel 8086 for example, -there is no instruction to push a constant without using a register, -and the code needed to do it without, must use global data -and as such is very complicated and wasteful of memory and time. -It can therefore be left to be used in extreme cases, -while in general the constant is pushed through a register. -The stacking rules are listed in the stack section of the table as a list -of tuples: -.DS -(source, [ register property ] , codepart [ , costfield ] ) -.DE -Example for the Intel 8086: -.DS -STACKS: -(CONST, REG, move(%[1],%[a]) "push %[a]") -(REG ,, "push %[1]") -.DE -.NH 1 -The files mach.h and mach.c -.PP -The table writer must also supply two files containing -machine dependent declarations and C code. -These files are mach.h and mach.c. -.NH 2 -Types in the code generator -.PP -Three different types of integer coexist in the code generator -and their range depends on the machine at hand. -The type 'int' is used for things like labelcounters that won't require -more than 16 bits precision. -The type 'word' is used among others to assemble datawords and -is of type 'long'. -The type 'full' is used for addresses and is of type 'long' if -EM_WSIZE>2 or EM_PSIZE>2. -.PP -In macro and function definitions in later paragraphs implicit typing -will be used for parameters, that is parameters starting with an 's' -will be of type string, and the letters 'i','w','f' will stand for -int, word and full respectively. -.NH 2 -Global variables to work with -.PP -Some global variables are present in the code generator -that can be manipulated by the routines in mach.h and mach.c. -.LP -The declarations are: -.DS L -.ta 20 -FILE *codefile; /* code is emitted on this stream */ -word part_word; /* words to be output are put together here */ -int part_size; /* number of bytes already put in part_word */ -char str[]; /* Last string read in */ -long argval; /* Last int read and kept */ -.DE -.NH 2 -Macros in mach.h -.PP -In the file mach.h a collection of macros is defined that have -to do with formatting of assembly code for the machine at hand. -Some of these macros can of course be left undefined in which case the -macro calls are left in the source and will be treated as -function calls. -These functions can then be defined in \fImach.c\fR. -.PP -The macros to be defined are: -.IP ex_ap(s) 16 -Must print the magic incantations that will mark the symbol \fI\fR -to be exported to other modules. -This is the translation of the EM \fBexa\fP and \fBexp\fP instructions. -.IP in_ap(s) -Same to import the symbol. -Translation of \fBina\fP and \fBinp\fP. -.IP newplb(s) -Must print the definition of procedure label \fIs\fR. -If left undefined the newilb() macro is used instead. -.IP newilb(s) -Must print the definition of instruction label \fIs\fR. -.IP newdlb(s) -Must print the definition of data label \fIs\fR. -.IP dlbdlb(s1,s2) -Must define data label -.I s1 -to be equal to -.I s2 . -.IP newlbss(s,f) -Must declare a piece of memory initialized to BSS_INIT(see below) -of length -.I f -and with label -.I s . -.IP cst_fmt -Format to be used when converting constant arguments of -EM instructions to string. -Argument to be formatted will be 'full'. -.IP off_fmt -Format to be used for integer part of label+constant, -argument will be 'full'. -.IP fmt_ilb(ip,il,s) -Must use the numbers -.I ip -and -.I il -which are a procedure number -and a label number respectively and copy a string to -.I s -that must be unique for that combination. -This procedure is optional, if it is not given ilb_fmt -must be defined as below. -.IP ilb_fmt -Format to be used for creation of unique instruction labels. -Arguments will be a unique procedure number (int) and the label -number (int). -.IP dlb_fmt -Format to be used for printing numeric data labels. -Argument will be 'int'. -.IP hol_fmt -Format to be used for generation of labels for -space generated by a -.B hol -pseudo. -Argument will be 'int'. -.IP hol_off -Format to be used for printing of the address of an element in -.B hol -space. -Arguments will be the offset in the -.B hol -block (word) and the number of the -.B hol -(int). -.IP con_cst(w) -Must generate output that will assemble into one machineword. -.IP con_ilb(s) -Must generate output that will put the address of the instruction label -into the datastream. -.IP con_dlb(s) -Must generate output that will put the address of the data label -into the datastream. -.IP fmt_id(sf,st) -Must take the string in -.I sf -which is a nonnumeric global label, and transform it into a copy made to -.I st -which will not collide with reserved assembler words and system labels. -This procedure is optional, if it is not given the id_first macro is used -as defined below. -.IP id_first -Must be a character. -This is prepended to all nonnumeric global labels if their length -is shorter than the maximum allowed(currently 8) or if they already -start with that character. -This is to avoid conflicts of user labels with system labels. -.IP BSS_INIT -Must be a constant. -This is the value filled in all the words not initialized explicitly. -This is loader and system dependent. -If omitted no initialization is assumed. -.NH 3 -Example mach.h for the PDP-11 -.DS L -.ta 8 16 24 32 40 48 56 -#define ex_ap(y) fprintf(codefile,"\et.globl %s\en",y) -#define in_ap(y) /* nothing */ - -#define newplb(x) fprintf(codefile,"%s:\en",x) -#define newilb(x) fprintf(codefile,"%s:\en",x) -#define newdlb(x) fprintf(codefile,"%s:\en",x) -#define dlbdlb(x,y) fprintf(codefile,"%s=%s\en",x,y) -#define newlbss(l,x) fprintf(codefile,"%s:.=.+%d.\en",l,x); - -#define cst_fmt "$%d." -#define off_fmt "%d." -#define ilb_fmt "I%x_%x" -#define dlb_fmt "_%d" -#define hol_fmt "hol%d" - -#define hol_off "%ld.+hol%d" - -#define con_cst(x) fprintf(codefile,"%ld.\en",x) -#define con_ilb(x) fprintf(codefile,"%s\en",x) -#define con_dlb(x) fprintf(codefile,"%s\en",x) - -#define id_first '_' -#define BSS_INIT 0 -.DE -.NH 2 -Functions in mach.c -.PP -In mach.c some functions must be supplied, -mostly manipulating data resulting from pseudoinstructions. -The specifications are given here, -implicit typing of parameters as above. -.IP con_part(isz,word) 20 -This function must manipulate the globals -part_word and part_size to append the isz bytes -contained in word to the output stream. -If part_word is full, i.e. part_size==EM_WSIZE -the function part_flush() may be called to empty the buffer. -This is the function that must go through the trouble of -doing byte order in words correct. -.IP con_mult(w_size) -This function must take the string str[] and create an integer -from the string of size w_size and generate code to assemble global -data for that integer. -Only the sizes for which arithmetic is implemented need be -handled, -so if 200-byte integer division is not implemented, -200-byte integer global data do not have to be implemented. -Here one must take care of word order in long integers. -.IP con_float() -This function must generate code to assemble a floating -point number of which the size is contained in argval -and the ASCII representation in str[]. -.IP prolog(f_nlocals) -This function is called at the start of every procedure. -Function prolog code must be generated, -and room made for local variables for a total of f_nlocals bytes. -.IP mes(w_mesno) -This function is called when a -.B mes -pseudo is seen that is not handled by the machine independent part. -The example below probably shows all the table writer ever has to know -about that. -.IP segname[] -This is not a function, -but an array of four strings. -These strings are put out whenever the code generator -switches segments. -Segments are SEGTXT, SEGCON, SEGROM and SEGBSS in that order. -.NH 3 -Example mach.c for the PDP-11 -.PP -As an example of the sort of code expected, -the mach.c for the PDP-11 is presented here. -.DS L -.ta 8 16 24 32 40 48 56 64 -/* - * machine dependent back end routines for the PDP-11 - */ - -con_part(sz,w) register sz; word w; { - - while (part_size % sz) - part_size++; - if (part_size == EM_WSIZE) - part_flush(); - if (sz == 1) { - w &= 0xFF; - if (part_size) - w <<= 8; - part_word |= w; - } else { - assert(sz == 2); - part_word = w; - } - part_size += sz; -} - -con_mult(sz) word sz; { - long l; - - if (sz != 4) - fatal("bad icon/ucon size"); - l = atol(str); - fprintf(codefile,"\et%o;%o\en",(int)(l>>16),(int)l); -} - -con_float() { - double f; - register short *p,i; - - /* - * This code is correct only when the code generator is - * run on a PDP-11 or VAX-11 since it assumes native - * floating point format is PDP-11 format. - */ - - if (argval != 4 && argval != 8) - fatal("bad fcon size"); - f = atof(str); - p = (short *) &f; - i = *p++; - if (argval == 8) { - fprintf(codefile,"\et%o;%o;",i,*p++); - i = *p++; - } - fprintf(codefile,"\et%o;%o\en",i,*p++); -} - -prolog(nlocals) full nlocals; { - - fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); - if (nlocals == 0) - return; - if (nlocals == 2) - fprintf(codefile,"tst -(sp)\en"); - else - fprintf(codefile,"sub $%d.,sp\en",nlocals); -} - -mes(type) word type; { - int argt ; - - switch ( (int)type ) { - case ms_ext : - for (;;) { - switch ( argt=getarg( - ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { - case sp_cend : - return ; - default: - strarg(argt) ; - fprintf(codefile,".globl %s\en",argstr) ; - break ; - } - } - default : - while ( getarg(any_ptyp) != sp_cend ) ; - break ; - } -} - -char *segname[] = { - ".text", /* SEGTXT */ - ".data", /* SEGCON */ - ".data", /* SEGROM */ - ".bss" /* SEGBSS */ -}; -.DE -.NH 1 -Coercions -.PP -A central part in code generation is taken by the -.I coercions . -It is the responsibility of the table writer to provide -all necessary coercions so that code generation can continue. -The very minimal set of coercions are -the coercions to unstack every token expression, -in combination with the rules to stack every token. -.PP -If these are present the code generator can always make the necessary -transformations by stacking and unstacking. -Of course for codequality it is usually best to provide extra coercions -to prevent this stacking to take place. -.I Cg -discriminates three types of coercions: -.IP 1) -Unstacking coercions. -This category can use the allocate() call in its code. -.IP 2) -Splitting coercions, these are the coercions that split -larger tokens into smaller ones. -.IP 3) -Transforming coercions, these are the coercions that transform -a token into another one of the same size. -This category can use the allocate() call in its code. -.PP -When a stack configuration does not match the stack pattern -.I coercions -are searched for in the following order: -.IP 1) -First tokens are split if necessary to get their sizes right. -.IP 2) -Then transforming coercions are found that will make the pattern match. -.IP 3) -Finally if the stack pattern is longer than the fakestack contents -unstacking coercions will be used to fill up the pattern. -.PP -At any point, when coercions are missing so code generation could not -continue, the offending tokens are stacked. -.NH 1 -Internal workings of the code generator. -.NH 2 -Description of tables.c and tables.h contents -.PP -In this section the intermediate files will be described -that are produced by -.I cgg -and compiled with machine independent code to produce a code generator. -.NH 3 -Tables.c -.PP -Tables.c contains a large number of initialized array's of all sorts. -Description of each follows: -.br -.in 1i -.ti -0.5i -byte code rules[] -.br -Pseudo code interpreted by the code generator. -Always starts with some opcode followed by operands depending -on the opcode. -Integers in this table are between 0 and 32767 and have a one byte -encoding if between 0 and 127. -.ti -0.5i -char stregclass[] -.br -Number of computed static register class per register. -Two registers are in the same class if they have the same properties -and don't share a common subregister. -.ti -0.5i -struct reginfo machregs[] -.br -Info per register. -Initialized with representation string, size, -members of the register and set of registers affected when this -one is changed. -Also contains room for runtime information, -like contents and reference count. -.ti -0.5i -tkdef_t tokens[] -.br -Information per tokentype. -Initialized with size, cost, type of operands and formatstring. -.ti -0.5i -node_t enodes[] -.br -List of triples representing expressions for the code generator. -.ti -0.5i -string code strings[] -.br -List of strings. -All strings are put in a list and checked for duplication, -so only one copy per string will reside here. -.ti -0.5i -set_t machsets[] -.br -List of token expression sets. -Bit 0 of the set is used for the SCRATCH property of registers, -bit 1 upto NREG are for the corresponding registers -and bit NREG+1 upto the end are for corresponding tokens. -.ti -0.5i -inst_t tokeninstances[] -.br -List of descriptions for building tokens. -Contains type of rule for building one, -plus operands depending on the type. -.ti -0.5i -move_t moves[] -.br -List of move rules. -Contains token expressions for source and destination -plus cost and index for code rule. -.ti -0.5i -byte pattern[] -.br -EM patterns. -This is structured internally as chains of patterns, -each chain pointed at by pathash[]. -After each pattern the list of possible code rules is given. -.ti -0.5i -int pathash[256] -.br -Indices into pattern[] for all patterns with a certain low order -byte of the hashing function. -.ti -0.5i -c1_t c1coercs[] -.br -List of rules to stack tokens. -Contains token expressions, -register needed, -cost -and code rule. -.ti -0.5i -c2_t c2coercs[] -.br -List of splitting coercions. -Token expressions, -split factor, -replacements -and code rule. -.ti -0.5i -c3_t c3coercs[] -.br -List of one to one coercions. -Token expressions, -register needed, -replacement -and code rule. -.ti -0.5i -struct reginfo **reglist[] -.br -List of lists of pointers to register information. -For every property the list is here -to find the registers corresponding to it. -.in 0 -.NH 3 -tables.h -.PP -In tables.h various derived constants for the tables are -given. -They are then used to determine array sizes in the actual code generator, -plus loop termination in some cases. -.NH 2 -Other important data structures -.PP -During code generation some other data structures are used -and here is a short description of some of the important ones. -.PP -Tokens are kept in the code generator as a struct consisting of -one integer -.I t_token -which is -1 if the token is a register, -and the number of the token otherwise, -plus an array of -.I TOKENSIZE -unions -.I t_att -of which the first is the register number in case of a register. -.PP -The fakestack is an array of these tokens, -there is a global variable -.I stackheight . -.PP -The results of expressions are kept in a struct -.I result -with elements -.I e_typ , -giving the type of the expression: -.I EV_INT , -.I EV_REG -or -.I EV_STR , -and a union -.I e_v -which contains the real result. -.NH 2 -A tour through the sources -.NH 3 -codegen.c -.PP -The file codegen.c contains one large function consisting -of one giant switch statement. -It is the interpreter for the code generator pseudo code -as contained in code rules[]. -This function can call itself recursively when doing lookahead. -Arguments are: -.IP codep 10 -Pointer into code rules, pseudo program counter. -.IP ply -Number of EM pattern lookahead allowed. -.IP toplevel -Boolean telling whether this is the toplevel codegen() or -a deeper incarnation. -.IP costlimit -A cutoff value to limit searches. -If the cost crosses costlimit the incarnation can terminate. -.IP forced -A register number if nonzero. -This is used inside coercions to force the allocate() call to allocate -a register determined by earlier lookahead. -.PP -The instructions inplemented in the switch: -.NH 4 -DO_NEXTEM -.PP -Matches the next EM pattern and does lookahead if necessary to find the best -code rule associated with this pattern. -Heuristics are used to determine best code rule when possible. -This is done by calling the distance() function. -.NH 4 -DO_COERC -.PP -This sets the code generator in the state to do a from stack coercion. -.NH 4 -DO_XMATCH -.PP -This is done when a match no longer has to be checked. -Used when the nocoercions: trick is used in the table. -.NH 4 -DO_MATCH -.PP -This is the big one inside this function. -It has the task to transform the contents of the current -fakestack to match the pattern given after it. -.PP -Since the code generator does not know combining coercions, -i.e. there is no way to make a big token out of two smaller ones, -the first thing done is to stack every token that is too small. -After that all tokens too big are split if possible to the right size. -.PP -Next the coercions are sought that would transform tokens in place to -the right one, plus the coercions that would pop tokens of the stack. -Each of those might need a register, so a list of registers is generated -and at the end of looking for coercions the function -.I tuples() -is called to generate the list of all possible \fIn\fP-tuples, -where -.I n -equals the number of registers needed. -.PP -Lookahead is now performed if the number of tuples is greater than one. -If no possibility is found within the costlimit, -the fakestack is made smaller by pushing the bottom token, -and this process is repeated until either a way is found or -the fakestack is completely empty and there is still no way -to make the match. -.PP -If there is a way the corresponding coercions are executed -and the code is finished. -.NH 4 -DO_REMOVE -.PP -Here the remove() call is executed, all tokens matched by the -token expression plus boolean expression are pushed. -In the current implementation there is no attempt to move those -tokens to registers, but that is a possible future extension. -.NH 4 -DO_DEALLOCATE -.PP -This one temporarily decrements by one the reference count of all registers -contained in the token given as argument. -.NH 4 -DO_REALLOCATE -.PP -Here all temporary deallocates are made undone. -.NH 4 -DO_ALLOCATE -.PP -This is the part that allocates a register and decides which one to use. -If the -.I forced -argument was given its task is simple, -otherwise some work must be done. -First the list of possible registers is scanned, -all free registers noted and it is noted whether any of those -registers is already -containing the initialization. -If no registers are available some fakestack token is stacked and the -process is repeated. -.PP -After that if an exact match was found, -the list of registers is reduced to one register matching exactly -out of every register class. -Now lookahead is performed if necessary and the register chosen. -If an initialization was given the corresponding move is performed, -otherwise the register is marked empty. -.NH 4 -DO_LOUTPUT -.PP -This prints a string and an expression. -Only done on toplevel. -.NH 4 -DO_ROUTPUT -.PP -Prints a string and a new line. -Only on toplevel. -.NH 4 -DO_MOVE -.PP -Calls the move() function in the code generator to implement the move() -function in the table. -.NH 4 -DO_ERASE -.PP -Marks the register that is its argument as empty. -.NH 4 -DO_TOKREPLACE -.PP -This is the token replacement part. -It is also called if there is no token replacement because it has -some other functions as well. -.PP -First the tokens that will be pushed on the fakestack are computed -and stored in a temporary array. -Then the tokens that were matched in this rule are popped -and their embedded registers have their reference count -decremented. -After that the replacement tokens are pushed. -.PP -Finally all registers allocated in this rule have their reference count -decremented. -If they were not pushed on the fakestack they will be available again -in the next code rule. -.NH 4 -DO_EMREPLACE -.PP -Places replacement EM instructions back into the instruction stream. -.NH 4 -DO_COST -.PP -Accounts for cost as given in the code rule. -.NH 4 -DO_RETURN -.PP -Returns from this level of codegen(). -Is used at the end of coercions, -move rules etc.. -.NH 3 -compute.c -.PP -This module computes the various expressions as given -in the enodes[] array. -Nothing very special happens here, -it is just a recursive function computing leaves -of expressions and applying the operator. -.NH 3 -equiv.c -.PP -In this module the tuples() function is implemented. -It is given the number of registers needed and -a list of register lists and it constructs a list of tuples -where the \fIn\fP'th register comes from the \fIn\fP'th list. -Before the list is constructed however -the dynamic register classes are computed. -Two registers are in the same dynamic class if they are in the -same static class and their contents is the same. -.PP -After that the permute() recursive function is called to -generate the list of tuples. -After construction a generated tuple is added to the list -if it is not already pairwise in the same class -or if the register relations are not the same, -i.e. if the first and second register share a common -subregister in one tuple and not in the other they are considered different. -.NH 3 -fillem.c -.PP -This is the routine that does the reading of EM instructions -and the handling of pseudos. -The mach.c module provided by the table writer is included -at the end of this module. -The routine fillemlines() is called by nextem() at toplevel -to make sure there are enough instruction to match. -It fills the EM instruction buffer up to 5 places from the end to -keep room for EM replacement instructions, -or up to a pseudo. -.PP -The dopseudo() function performs the function of the pseudo last -encountered. -If the pseudo is a -.B rom -the corresponding label is saved with the contents of the -.B rom -to be available to the code generator later. -The rest of the routines are small service routines for either -input or data output. -.NH 3 -gencode.c -.PP -This module contains routines called by codegen() to generate the real -code to the codefile. -The function gencode() gets a string as argument and copies it to codefile -while processing certain embedded control characters implementing -the $2 and [1.reg] escapes. -The function genexpr() prints the expression given as argument. -It is used to implement the %(\ expr\ %) escape. -The prtoken() function interprets the tokenformat as given in -the tokens[] array. -.NH 3 -glosym.c -.PP -This module maintains a list of global symbols that have a -.B rom -pseudo associated. -There are functions to enter a symbol and to find a symbol. -.NH 3 -main.c -.PP -Main routine of the code generator. -Processes arguments and flags. -Flags available are: -.IP -d -Sets debug mode if the code generator was not compiled with -the NDEBUG macro defined. -Debug mode gives very long output on stderr indicating -all steps of the code generation process including nesting -of the codegen() function. -.IP -p\fIn\fP -Sets the lookahead depth to -.I n , -the -.I p -stands for ply, -a well known word in chess playing programs. -.IP -w\fIn\fP -Sets the weight percentage for size in the cost function to -.I n -percent. -Uses Euclides algorithm to simplify rationals. -.NH 3 -move.c -.PP -Function to implement the move() pseudo function in the tables, -register initialization and the setcc and test pseudo functions. -First tests are made to try to prevent the move from really happening. -The condition code register is treated special here. -After that, if there is an after that, -the move rule is found and the code executed. -.NH 3 -nextem.c -.PP -The entry point of this module is nextem(). -It hashes the next three EM instructions, -and uses the low order byte of the hash -as an index into the array pathash[], -to find a chain of patterns in the array -pattern[], -that are all tried for a match. -.PP -The function trypat() does most of the work -checking patterns. -When a pattern is found to match all instructions -the operands of the instruction are placed into the dollar[] array. -Then the boolean expression is tried. -If it matches the function can return, -leaving the operands still in the dollar[] array, -so later in the code rule they can still be used. -.NH 3 -reg.c -.PP -Collection of routines to handle registers. -Reference count routines are here, -chrefcount() and getrefcount(), -plus routines to erase a single register or all of them, -erasereg() and cleanregs(). -.PP -If NDEBUG hasn't been defined, here is also the routine that checks -if the reference count kept with the register information is in -agreement with the number of times it occurs on the fakestack. -.NH 3 -salloc.c -.PP -Module for string allocation and garbage collection. -Contains entry points myalloc(), -a routine calling malloc() and checking whether room is left, -myfree(), just free(), -popstr() a function called from state.c to free all strings -made since the last saved status. -Furthermore there is salloc() which has the size of the string as parameter -and returns a pointer to the allocated space, -while keeping a copy of the pointer for garbage allocation purposes. -.PP -The function garbage_collect is called from codegen() at toplevel -every now and then, -and checks all places where strings may reside to mark strings -as being in use. -Strings not in use are returned to the pool of free space. -.NH 3 -state.c -.PP -Set of routines called to save current status, -restore a previous saved state and to free the room -occupied by a saved state. -A list of structs is kept here to save the state. -If this is not done, -small allocates will take space -from the holes big enough for state saves, -and as a result every new state save will need a new struct. -The code generator runs out of room very rapidly under these conditions. -.NH 3 -subr.c -.PP -Random set of leftover routines. -.NH 4 -match -.PP -Computes whether a certain token matches a certain token expression. -Just computes a bitnumber according to the algorithm explained with -machsets[], -and tests the bit and the boolean expression if it is there. -.NH 4 -instance,cinstance -.PP -These two functions compute a token from a description. -They differ very slight, cinstance() is used to compute -the result of a coercion in a certain context -and therefore has more arguments, which it uses instead of -the global information instance() works on. -.NH 4 -eqtoken -.PP -eqtoken computes whether two tokens can be considered identical. -Used to check register contents during moves mainly. -.NH 4 -distance -.PP -This is the heuristic function that computes a distance from -the current fakestack contents to the token pattern in the table. -It likes exact matches most, then matches where at least the sizes are correct -and if the sizes are not correct it likes too large sizes more than too -small, since splitting a token is easier than combining one. -.NH 4 -split -.PP -This function tries to find a splitting coercion -and executes it immediately when found. -The fakestack is shuffled thoroughly when this happens, -so pieces below the token that must be split are saved first. -.NH 4 -docoerc -.PP -This function executes a coercion that was found. -The same shuffling is done, so the top of the stack is again saved. -.NH 4 -stackupto -.PP -This function gets a pointer into the fakestack and must stack -every token including the one pointed at up to the bottom of the fakestack. -The first stacking rule possible is used, -so rules using registers must come first. -.NH 4 -findcoerc -.PP -Looks for a one to one coercion, if found it returns a pointer -to it and leaves a list of possible registers to use in the global -variable curreglist. -This is used by codegen(). -.NH 3 -var.c -.PP -Global variables used by more than one module. -External definitions are in extern.h. diff --git a/doc/cref.doc b/doc/cref.doc deleted file mode 100644 index 76c4f84d6..000000000 --- a/doc/cref.doc +++ /dev/null @@ -1,323 +0,0 @@ -.\" $Header$ -.nr ID 4 -.de hd -'sp 2 -'tl ''-%-'' -'sp 3 -.. -.de fo -'bp -.. -.tr ~ -. TITLE -.de TL -.sp 15 -.ce -\\fB\\$1\\fR -.. -. AUTHOR -.de AU -.sp 15 -.ce -by -.sp 2 -.ce -\\$1 -.. -. DATE -.de DA -.sp 3 -.ce -( Dated \\$1 ) -.. -. INSTITUTE -.de VU -.sp 3 -.ce 4 -Wiskundig Seminarium -Vrije Universteit -De Boelelaan 1081 -Amsterdam -.. -. PARAGRAPH -.de PP -.sp -.ti +\n(ID -.. -.nr CH 0 1 -. CHAPTER -.de CH -.nr SH 0 1 -.bp -.in 0 -\\fB\\n+(CH.~\\$1\\fR -.PP -.. -. SUBCHAPTER -.de SH -.sp 3 -.in 0 -\\fB\\n(CH.\\n+(SH.~\\$1\\fR -.PP -.. -. INDENT START -.de IS -.sp -.in +\n(ID -.. -. INDENT END -.de IE -.in -\n(ID -.sp -.. -.de PT -.ti -\n(ID -.ta \n(ID -.fc " @ -"\\$1@"\c -.fc -.. -. DOUBLE INDENT START -.de DS -.sp -.in +\n(ID -.ll -\n(ID -.. -. DOUBLE INDENT END -.de DE -.ll +\n(ID -.in -\n(ID -.sp -.. -. EQUATION START -.de EQ -.sp -.nf -.. -. EQUATION END -.de EN -.fi -.sp -.. -. ITEM -.de IT -.sp -.in 0 -\\fB~\\$1\\fR -.ti +5 -.. -.de CS -.br -~-~\\ -.. -.br -.fi -.TL "Ack-C reference manual" -.AU "Ed Keizer" -.DA "September 12, 1983" -.VU -.wh 0 hd -.wh 60 fo -.CH "Introduction" -The C frontend included in the Amsterdam Compiler Kit -translates UNIX-V7 C into compact EM code [1]. -The language accepted is described in [2] and [3]. -This document describes which implementation dependent choices were -made in the Ack-C frontend and -some restrictions and additions. -.CH "The language" -.PP -Under the same heading as used in [2] we describe the -properties of the Ack-C frontend. -.IT "2.2 Identifiers" -External identifiers are unique up to 7 characters and allow -both upper and lower case. -.IT "2.3 Keywords" -The word \fBvoid\fP is also reserved as a keyword. -.IT "2.4.3 Character constants" -The ASCII-mapping is used when a character is converted to an -integer. -.IT "2.4.4 Floating constants" -To prevent loss of precision the compiler does not perform -floating point constant folding. -.IT "2.6 Hardware characteristics" -The size of objects of the several arithmetic types and -pointers depend on the EM-implementation used. -The ranges of the arithmetic types depend on the size used, -the C-frontend assumes two's complement representation for the -integral types. -All sizes are multiples of bytes. -The calling program \fIack\fP[4] passes information about the -size of the types to the compiler proper. -.br -However, a few general remarks must be made: -.sp 1 -.IS -.PT (a) -The size of pointers is a multiple of -(or equal to) the size of an \fIint\fP. -.PT (b) -The following relations exist for the sizes of the types -mentioned: -.br -.ti +5 -\fIchar<=short<=int<=long\fP -.PT (c) -Objects of type \fIchar\fP use one 8-bit byte of storage, -although several bytes are allocated sometimes. -.PT (d) -All sizes are in multiples of bytes. -.PT (e) -Most EM implementations use 4 bytes for floats and 8 bytes -for doubles, but exceptions to this rule occur. -.IE -.IT "4 What's in a name" -The type \fIvoid\fP is added. -Objects of type void do not exist. -Functions declared as returning void, do not return a value at all. -.IT "6.1 Characters and integers" -Objects of type \fIchar\fP are unsigned and do not cause -sign-extension when converted to \fIint\fP. -The range of characters values is from 0 to 255. -.IT "6.3 Floating and integral" -Floating point numbers are truncated towards zero when -converted to the integral types. -.IT "6.4 Pointers and integers" -When a \fIlong\fP is added to or subtracted from a pointer and -longs are larger then pointers the \fIlong\fP is converted to an -\fIint\fP before the operation is performed. -.IT "7.2 Unary operators" -It is allowed to cast any expression to the type \fIvoid\fP. -.IT "8.2 Type specifiers" -One type is added to the type-specifiers: -.br -.IS -void -.IE -.IT "8.5 Structure and union declarations" -The only type allowed for fields is \fIint\fP. -Fields with exactly the size of \fIint\fP are signed, -all other fields are unsigned. -.br -The size of any single structure must be less then 4096 bytes. -.IT "8.6 Initialization" -Initialization of structures containing bit fields is not -allowed. -There is one restriction when using an 'address expression' to initialize -an integral variable. -The integral variable must have the same size as a pointer. -Conversions altering the size of the address expression are not allowed. -.IT "9.10 Return statement" -Return statements of the form: -.IS - return ; -.IE -are the only form of return statement allowed in a function of type -function returning void. -.IT "10.1 External function definitions" -The total amount for storage used for parameters -in any function must be less then 4096 bytes. -The same holds for the total amount of storage occupied by the -automatic variables declared inside any function. -.sp -Using formal parameters whose size is smaller the the size of an int -is less efficient on several machines. -At procedure entry these parameters are converted from integer to the -declared type, because the compiler doesn't know where the least -significant bytes are stored in the int. -.IT "11.2 Scope of externals" -Most C compilers are rather lax in enforcing the restriction -that only one external definition without the keyword -\fIextern\fP is allowed in a program. -The Ack-C frontend is very strict in this. -The only exception is that declarations of arrays with a -missing first array bounds expression are regarded to have an -explicit keyword \fIextern\fP. -.IT "14.4 Explicit pointer conversions" -Pointers may be larger the ints, thus assigning a pointer to an -int and back will not always result in the same pointer. -The process mentioned above works with integrals -of the same size or larger as pointers in all EM implementations -having such integrals. -When converting pointers to an integral type or vice-versa, -the pointers is seen as an unsigned int. -.br -EM guarantees that any object can be placed at a word boundary, -this allows the C-programs to use \fIint\fP pointers -as pointers to objects of any type not smaller than an \fIint\fP. -.CH "Frontend options" -The C-frontend has a few options, these are controlled -by flags: -.IS -.PT -V -This flag is followed by a sequence of letters each followed by -positive integers. Each letter indicates a -certain type, the integer following it specifies the size of -objects of that type. One letter indicates the wordsize used. -.IS -.sp 1 -.TS -center tab(:); -l l16 l l. -letter:type:letter:type - -w:wordsize:i:int -s:short:l:long -f:float:d:double -p:pointer:: -.TE -.sp 1 -All existing implementations use an integer size equal to the -wordsize. -.IE -The calling program \fIack\fP[4] provides the frontend with -this flag, with values depending on the machine used. -.sp 1 -.PT -l -The frontend normally generates code to keep track of the line -number and source file name at runtime for debugging purposes. -Currently a pointer to a -string containing the filename is stored at a fixed place in -memory at each function -entry and the line number at the start of every expression. -At the return from a function these memory locations are not reset to -the values they had before the call. -Most library routines do not use this feature and thus do not -ruin the current line number and filename when called. -However, you are really unlucky when your program crashes due -to a bug in such a library function, because the line number -and filename do not indicate that something went wrong inside -the library function. -.br -Providing the flag -l to the frontend tells it not to generate -the code updating line number and file name. -This is, for example, used when translating the stdio library. -.br -When the \fIack\fP[4] is called with the -L flag it provides -the frontend with this flag. -.sp 1 -.PT -Xp -When this flag is present the frontend generates a call to -the function \fBprocentry\fP at each function entry and a -call to \fBprocexit\fP at each function exit. -Both functions are provided with one parameter, -a pointer to a string containing the function name. -.br -When \fIack\fP is called with the -p flag it provides the -frontend with this flag. -.IE -.CH References -.IS -.PT [1] -A.S. Tanenbaum, Hans van Staveren, Ed Keizer and Johan -Stevenson \fIDescription of a machine architecture for use with -block structured languages\fP Informatica report IR-81. -.sp 1 -.PT [2] -B.W. Kernighan and D.M. Ritchie, \fIThe C Programming -language\fP, Prentice-Hall, 1978 -.PT [3] -D.M. Ritchie, \fIC Reference Manual\fP -.sp -.PT [4] -UNIX manual ack(I). diff --git a/doc/crefman.doc b/doc/crefman.doc deleted file mode 100644 index 7378c072d..000000000 --- a/doc/crefman.doc +++ /dev/null @@ -1,629 +0,0 @@ -\." $Id$ -.\" eqn crefman.doc | troff -ms -.EQ -delim $$ -.EN -.RP -.TL -ACK/CEM Compiler -.br -Reference Manual -.AU -Erik H. Baalbergen -.AI -Department of Mathematics and Computer Science -Vrije Universiteit -Amsterdam -The Netherlands -.AB no -.AE -.NH -C Language -.PP -This section discusses the extensions to and deviations from the C language, -as described in [1]. -The issues are numbered according to the reference manual. -.SH -2.2 Identifiers -.PP -Upper and lower case letters are different. -The number of significant letters -is 32 by default, but may be set to another value using the \fB\-M\fP option. -The identifier length should be set according to the rest of the compilation -programs. -.SH -2.3 Keywords -.SH -\f(CWasm\fP -.PP -The keyword \f(CWasm\fP -is recognized. -However, the statement -.DS -.ft CW -asm(string); -.ft R -.DE -is skipped, while a warning is given. -.SH -\f(CWenum\fP -.PP -The \f(CWenum\fP keyword is recognized and interpreted. -.SH -\f(CWentry\fP, \f(CWfortran\fP -.PP -The words \f(CWentry\fP and \f(CWfortran\fP -are reserved under the restricted option. -The words are not interpreted by the compiler. -.SH -2.4.1 Integer Constants -.PP -The type of an integer constant is the first of the corresponding list -in which its value can be represented. Decimal: \f(CWint, long, unsigned long\fP; -octal or hexadecimal: \f(CWint, unsigned, long, unsigned long\fP; suffixed by -the letter L or l: \f(CWlong, unsigned long\fP. -.SH -2.4.3 Character Constants -.PP -A character constant is a sequence of 1 up to \f(CWsizeof(int)\fP characters -enclosed in single quotes. -The value of a character constant '$c sub 1 c sub 2 ... c sub n$' -is $d sub n + M \(mu d sub {n - 1} + ... + M sup {n - 1} \(mu d sub 2 + M sup n \(mu d sub 1$, -where M is 1 + maximum unsigned number representable in an \f(CWunsigned char\fP, -and $d sub i$ is the signed value (ASCII) -of character $c sub i$. -.SH -2.4.4 Floating Constants -.PP -The compiler does not support compile-time floating point arithmetic. -.SH -2.6 Hardware characteristics -.PP -The compiler is capable of producing EM code for machines with the following -properties -.IP \(bu -a \f(CWchar\fP is 8 bits -.IP \(bu -the size of \f(CWint\fP is equal to the word size -.IP \(bu -the size of \f(CWshort\fP may not exceed the size of \f(CWint\fP -.IP \(bu -the size of \f(CWint\fP may not exceed the size of \f(CWlong\fP -.IP \(bu -the size of pointers is equal to the size of either \f(CWshort\fP, \f(CWint\fP -or \f(CWlong\fP -.LP -.SH -4 What's in a name? -.SH -\f(CWchar\fP -.PP -Objects of type \f(CWchar\fP are taken to be signed. -The combination \f(CWunsigned char\fP is legal. -.SH -\f(CWunsigned\fP -.PP -The type combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP and -\f(CWunsigned long\fP are supported. -.SH -\f(CWenum\fP -.PP -The data type \f(CWenum\fP is implemented as described -in \fIRecent Changes to C\fP (see appendix A). -.I Cem -treats enumeration variables as if they were \f(CWint\fP. -.SH -\f(CWvoid\fP -.PP -Type \f(CWvoid\fP is implemented. -The type specifies an empty set of values, which takes no storage space. -.SH -\fRFundamental types\fP -.PP -The names of the fundamental types can be redefined by the user, using -\f(CWtypedef\fP. -.SH -7 Expressions -.PP -The order of evaluation of expressions depends on the complexity of the -subexpressions. -In case of commutative operations, the most complex subexpression is -evaluated first. -Parameter lists are evaluated from right to left. -.SH -7.2 Unary operators -.PP -The type of a \f(CWsizeof\fP expression is \f(CWunsigned int\fP. -.SH -7.13 Conditional operator -.PP -Both the second and the third expression in a conditional expression may -include assignment operators. -They may be structs or unions. -.SH -7.14 Assignment operators -.PP -Structures may be assigned, passed as arguments to functions, and returned -by functions. -The types of operands taking part must be the same. -.SH -8.2 Type specifiers -.PP -The combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP -and \f(CWunsigned long\fP are implemented. -.SH -8.5 Structure and union declarations -.PP -Fields of any integral type, either signed or unsigned, -are supported, as long as the type fits in a word on the target machine. -.PP -Fields are left adjusted by default; the first field is put into the left -part of a word, the next one on the right side of the first one, etc. -The \f(CW-Vr\fP option in the call of the compiler -causes fields to be right adjusted within a machine word. -.PP -The tags of structs and unions occupy a different name space from that of -variables and that of member names. -.SH -9.7 Switch statement -.PP -The type of \fIexpression\fP in -.DS -.ft CW -\f(CWswitch (\fP\fIexpression\fP\f(CW)\fP \fIstatement\fP -.ft -.DE -must be integral. -A warning is given under the restricted option if the type is \f(CWlong\fP. -.SH -10 External definitions -.PP -See [4] for a discussion on this complicated issue. -.SH -10.1 External function definitions -.PP -Structures may be passed as arguments to functions, and returned -by functions. -.SH -11.1 Lexical scope -.PP -Typedef names may be redeclared like any other variable name; the ice mentioned -in \(sc11.1 is walked correctly. -.SH -12 Compiler control lines -.PP -Lines which do not occur within comment, and with \f(CW#\fP as first -character, are interpreted as compiler control line. -There may be an arbitrary number of spaces, tabs and comments (collectively -referred as \fIwhite space\fP) following the \f(CW#\fP. -Comments may contain newline characters. -Control lines with only white space between the \f(CW#\fP and the line separator -are skipped. -.PP -The #\f(CWinclude\fP, #\f(CWifdef\fP, #\f(CWifndef\fP, #\f(CWundef\fP, #\f(CWelse\fP and -#\f(CWendif\fP control lines and line directives consist of a fixed number of -arguments. -The list of arguments may be followed an arbitrary sequence of characters, -in which comment is interpreted as such. -(I.e., the text between \f(CW/*\fP and \f(CW*/\fP is skipped, regardless of -newlines; note that commented-out lines beginning with \f(CW#\fP are not -considered to be control lines.) -.SH -12.1 Token replacement -.PP -The replacement text of macros is taken to be a string of characters, in which -an identifier may stand for a formal parameter, and in which comment is -interpreted as such. -Comments and newline characters, preceeded by a backslash, in the replacement -text are replaced by a space character. -.PP -The actual parameters of a macro are considered tokens and are -balanced with regard to \f(CW()\fP, \f(CW{}\fP and \f(CW[]\fP. -This prevents the use of macros like -.DS -.ft CW -CTL([) -.ft -.DE -.PP -Formal parameters of a macro must have unique names within the formal-parameter -list of that macro. -.PP -A message is given at the definition of a macro if the macro has -already been #\f(CWdefined\fP, while the number of formal parameters differ or -the replacement texts are not equal (apart from leading and trailing -white space). -.PP -Recursive use of macros is detected by the compiler. -.PP -Standard #\f(CWdefined\fP macros are -.DS -\f(CW__FILE__\fP name of current input file as string constant -\f(CW__DATE__\fP curent date as string constant; e.g. \f(CW"Tue Wed 2 14:45:23 1986"\fP -\f(CW__LINE__\fP current line number as an integer -.DE -.PP -No message is given if \fIidentifier\fP is not known in -.DS -.ft CW -#undef \fIidentifier\fP -.ft -.DE -.SH -12.2 File inclusion -.PP -A newline character is appended to each file which is included. -.SH -12.3 Conditional compilation -.PP -The #\f(CWif\fP, #\f(CWifdef\fP and #\f(CWifndef\fP control lines may be followed -by an arbitrary number of -.DS -.ft CW -#elif \fIconstant-expression\fP -.ft -.DE -control lines, before the corresponding #\f(CWelse\fP or #\f(CWendif\fP -is encountered. -The construct -.DS -.ft CW -#elif \fIconstant-expression\fP -some text -#endif /* corresponding to #elif */ -.ft -.DE -is equivalent to -.DS -.ft CW -#else -#if \fIconstant-expression\fP -some text -#endif /* corresponding to #if */ -#endif /* corresponding to #else */ -.ft -.DE -.PP -The \fIconstant-expression\fP in #\f(CWif\fP and #\f(CWelif\fP control lines -may contain the construction -.DS -.ft CW -defined(\fIidentifier\fP) -.ft -.DE -which is replaced by \f(CW1\fP, if \fIidentifier\fP has been #\f(CWdefined\fP, -and by \f(CW0\fP, if not. -.PP -Comments in skipped lines are interpreted as such. -.SH -12.4 Line control -.PP -Line directives may occur in the following forms: -.DS -.ft CW -#line \fIconstant\fP -#line \fIconstant\fP "\fIfilename\fP" -#\fIconstant\fP -#\fIconstant\fP "\fIfilename\fP" -.ft -.DE -Note that \fIfilename\fP is enclosed in double quotes. -.SH -14.2 Functions -.PP -If a pointer to a function is called, the function the pointer points to -is called instead. -.SH -15 Constant expressions -.PP -The compiler distinguishes the following types of integral constant expressions -.IP \(bu -field-width specifier -.IP \(bu -case-entry specifier -.IP \(bu -array-size specifier -.IP \(bu -global variable initialization value -.IP \(bu -enum-value specifier -.IP \(bu -truth value in \f(CW#if\fP control line -.LP -.PP -Constant integral expressions are compile-time evaluated while an effort -is made to report overflow. -Constant floating expressions are not compile-time evaluated. -.NH -Compiler flags -.IP \fB\-C\fR -Run the preprocessor stand-alone while maintaining the comments. -Line directives are produced whenever needed. -.IP \fB\-D\fP\fIname\fP=\fIstring-of-characters\fP -.br -Define \fIname\fR as macro with \fIstring-of-characters\fR as -replacement text. -.IP \fB\-D\fP\fIname\fP -.br -Equal to \fB\-D\fP\fIname\fP\fB=1\fP. -.IP \fB\-E\fP -Run the preprocessor stand alone, i.e., -list the sequence of input tokens and delete any comments. -Line directives are produced whenever needed. -.IP \fB\-I\fIpath\fR -.br -Prepend \fIpath\fR to the list of include directories. -To put the directories "include", "sys/h" and "util/h" into the -include directory list in that order, the user has to specify -.DS -.ft CW --Iinclude -Isys/h -Iutil/h -.ft R -.DE -An empty \fIpath\fP causes the standard include -directory (usually \f(CW/usr/include\fP) to be forgotten. -.IP \fB\-M\fP\fIn\fP -.br -Set maximum significant identifier length to \fIn\fP. -.IP \fB\-n\fP -Suppress EM register messages. -The user-declared variables are not stored into registers on the target -machine. -.IP \fB\-p\fP -Generate the EM \fBfil\fP and \fBlin\fP instructions in order to enable -an interpreter to keep track of the current location in the source code. -.IP \fB\-P\fP -Equivalent with \fB\-E\fP, but without line directives. -.IP \fB\-R\fP -Interpret the input as restricted C (according to the language as -described in [1]). -.IP \fB\-T\fP\fIpath\fP -.br -Create temporary files, if necessary, in directory \fIpath\fP. -.IP \fB\-U\fP\fIname\fP -.br -Get rid of the compiler-predefined macro \fIname\fP, i.e., -consider -.DS -.ft CW -#undef \fIname\fP -.ft R -.DE -to appear in the beginning of the file. -.IP \fB\-V\fIcm\fR.\fIn\fR,\ \fB\-V\fIcm\fR.\fIncm\fR.\fIn\fR\ ... -.br -Set the size and alignment requirements. -The letter \fIc\fR indicates the simple type, which is one of -\fBs\fR(short), \fBi\fR(int), \fBl\fR(long), \fBf\fR(float), \fBd\fR(double) -or \fBp\fR(pointer). -If \fIc\fR is \fBS\fP or \fBU\fP, then \fIn\fP is taken to be the initial -alignment of structs or unions, respectively. -The effective alignment of a struct or union is the least common multiple -of the initial struct/union alignment and the alignments of its members. -The \fIm\fR parameter can be used to specify the length of the type (in bytes) -and the \fIn\fR parameter for the alignment of that type. -Absence of \fIm\fR or \fIn\fR causes the default value to be retained. -To specify that the bitfields should be right adjusted instead of the -default left adjustment, specify \fBr\fR as \fIc\fR parameter. -.IP \fB\-w\fR -Suppress warning messages -.IP \fB\-\-\fIcharacter\fR -.br -Set debug-flag \fIcharacter\fP. -This enables some special features offered by a debug and develop version of -the compiler. -Some particular flags may be recognized, others may have surprising effects. -.RS -.IP \fBd\fP -Generate a dependency graph, reflecting the calling structure of functions. -Lines of the form -.DS -.ft CW -DFA: \fIcalling-function\fP: \fIcalled-function\fP -.ft -.DE -are generated whenever a function call is encountered. -.IP \fBf\fP -Dump whole identifier table, including macros and reserved words. -.IP \fBh\fP -Supply hash-table statistics. -.IP \fBi\fP -Print names of included files. -.IP \fBm\fP -Supply statistics concerning the memory allocation. -.IP \fBt\fP -Dump table of identifiers. -.IP \fBu\fP -Generate extra statistics concerning the predefined types and identifiers. -Works in combination with \fBf\fP or \fBt\fP. -.IP \fBx\fP -Print expression trees in human-readable format. -.RE -.LP -.SH -References -.IP [1] -Brian W. Kernighan, Dennis M. Ritchie, -.I -The C Programming Language -.R -.IP [2] -L. Rosler, -.I -Draft Proposed Standard - Programming Language C, -.R -ANSI X3J11 Language Subcommittee -.IP [3] -Erik H. Baalbergen, Dick Grune, Maarten Waage, -.I -The CEM Compiler, -.R -Informatica Manual IM-4, Dept. of Mathematics and Computer Science, Vrije -Universiteit, Amsterdam, The Netherlands -.IP [4] -Erik H. Baalbergen, -.I -Modeling global declarations in C, -.R -internal paper -.LP -.bp -.SH -Appendix A - Enumeration Type -.PP -The syntax is -.sp -.RS -.I enum-specifier : -.RS -\&\f(CWenum\fP { \fIenum-list\fP } -.br -\&\f(CWenum\fP \fIidentifier\fP { \fIenum-list\fP } -.br -\&\f(CWenum\fP \fIidentifier\fP -.RE -.sp -\&\fIenum-list\fP : -.RS -\&\fIenumerator\fP -.br -\&\fIenum-list\fP , \fIenumerator\fP -.RE -.sp -\&\fIenumerator\fP : -.RS -\&\fIidentifier\fP -.br -\&\fIidentifier\fP = \fIconstant-expression\fP -.RE -.sp -.RE -The identifier has the same role as the structure tag in a struct specification. -It names a particular enumeration type. -.PP -The identifiers in the enum-list are declared as constants, and may appear -whenever constants are required. -If no enumerators with -.B = -appear, then the values of the constants begin at 0 and increase by 1 as the -declaration is read from left to right. -An enumerator with -.B = -gives the associated identifier the value indicated; subsequent identifiers -continue the progression from the assigned value. -.PP -Enumeration tags and constants must all be distinct, and, unlike structure -tags and members, are drawn from the same set as ordinary identifiers. -.PP -Objects of a given enumeration type are regarded as having a type distinct -from objects of all other types. -.bp -.SH -Appendix B: C grammar in LL(1) form -.PP -The \fBbold-faced\fP and \fIitalicized\fP tokens represent terminal symbols. -.vs 16 -.nf -\fBexternal definitions\fP -program: external-definition* -external-definition: ext-decl-specifiers [declarator [function | non-function] | '\fB;\fP'] | asm-statement -ext-decl-specifiers: decl-specifiers? -non-function: initializer? ['\fB,\fP' init-declarator]* '\fB;\fP' -function: declaration* compound-statement -.sp 1 -\fBdeclarations\fP -declaration: decl-specifiers init-declarator-list? '\fB;\fP' -decl-specifiers: other-specifier+ [single-type-specifier other-specifier*]? | single-type-specifier other-specifier* -other-specifier: \fBauto\fP | \fBstatic\fP | \fBextern\fP | \fBtypedef\fP | \fBregister\fP | \fBshort\fP | \fBlong\fP | \fBunsigned\fP -type-specifier: decl-specifiers -single-type-specifier: \fItype-identifier\fP | struct-or-union-specifier | enum-specifier -init-declarator-list: init-declarator ['\fB,\fP' init-declarator]* -init-declarator: declarator initializer? -declarator: primary-declarator ['\fB(\fP' formal-list ? '\fB)\fP' | arrayer]* | '\fB*\fP' declarator -primary-declarator: identifier | '\fB(\fP' declarator '\fB)\fP' -arrayer: '\fB[\fP' constant-expression? '\fB]\fP' -formal-list: formal ['\fB,\fP' formal]* -formal: identifier -enum-specifier: \fBenum\fP [enumerator-pack | identifier enumerator-pack?] -enumerator-pack: '\fB{\fP' enumerator ['\fB,\fP' enumerator]* '\fB,\fP'? '\fB}\fP' -enumerator: identifier ['\fB=\fP' constant-expression]? -struct-or-union-specifier: [ \fBstruct\fP | \fBunion\fP] [ struct-declaration-pack | identifier struct-declaration-pack?] -struct-declaration-pack: '\fB{\fP' struct-declaration+ '\fB}\fP' -struct-declaration: type-specifier struct-declarator-list '\fB;\fP'? -struct-declarator-list: struct-declarator ['\fB,\fP' struct-declarator]* -struct-declarator: declarator bit-expression? | bit-expression -bit-expression: '\fB:\fP' constant-expression -initializer: '\fB=\fP'? initial-value -cast: '\fB(\fP' type-specifier abstract-declarator '\fB)\fP' -abstract-declarator: primary-abstract-declarator ['\fB(\fP' '\fB)\fP' | arrayer]* | '\fB*\fP' abstract-declarator -primary-abstract-declarator: ['\fB(\fP' abstract-declarator '\fB)\fP']? -.sp 1 -\fBstatements\fP -statement: - expression-statement - | label '\fB:\fP' statement - | compound-statement - | if-statement - | while-statement - | do-statement - | for-statement - | switch-statement - | case-statement - | default-statement - | break-statement - | continue-statement - | return-statement - | jump - | '\fB;\fP' - | asm-statement - ; -expression-statement: expression '\fB;\fP' -label: identifier -if-statement: \fBif\fP '\fB(\fP' expression '\fB)\fP' statement [\fBelse\fP statement]? -while-statement: \fBwhile\fP '\fB(\fP' expression '\fB)\fP' statement -do-statement: \fBdo\fP statement \fBwhile\fP '\fB(\fP' expression '\fB)\fP' '\fB;\fP' -for-statement: \fBfor\fP '\fB(\fP' expression? '\fB;\fP' expression? '\fB;\fP' expression? '\fB)\fP' statement -switch-statement: \fBswitch\fP '\fB(\fP' expression '\fB)\fP' statement -case-statement: \fBcase\fP constant-expression '\fB:\fP' statement -default-statement: \fBdefault\fP '\fB:\fP' statement -break-statement: \fBbreak\fP '\fB;\fP' -continue-statement: \fBcontinue\fP '\fB;\fP' -return-statement: \fBreturn\fP expression? '\fB;\fP' -jump: \fBgoto\fP identifier '\fB;\fP' -compound-statement: '\fB{\fP' declaration* statement* '\fB}\fP' -asm-statement: \fBasm\fP '\fB(\fP' \fIstring\fP '\fB)\fP' '\fB;\fP' -.sp 1 -\fBexpressions\fP -initial-value: assignment-expression | initial-value-pack -initial-value-pack: '\fB{\fP' initial-value-list '\fB}\fP' -initial-value-list: initial-value ['\fB,\fP' initial-value]* '\fB,\fP'? -primary: \fIidentifier\fP | constant | \fIstring\fP | '\fB(\fP' expression '\fB)\fP' -secundary: primary [index-pack | parameter-pack | selection]* -index-pack: '\fB[\fP' expression '\fB]\fP' -parameter-pack: '\fB(\fP' parameter-list? '\fB)\fP' -selection: ['\fB.\fP' | '\fB\->\fP'] identifier -parameter-list: assignment-expression ['\fB,\fP' assignment-expression]* -postfixed: secundary postop? -unary: cast unary | postfixed | unop unary | size-of -size-of: \fBsizeof\fP [cast | unary] -binary-expression: unary [binop binary-expression]* -conditional-expression: binary-expression ['\fB?\fP' expression '\fB:\fP' assignment-expression]? -assignment-expression: conditional-expression [asgnop assignment-expression]? -expression: assignment-expression ['\fB,\fP' assignment-expression]* -unop: '\fB*\fP' | '\fB&\fP' | '\fB\-\fP' | '\fB!\fP' | '\fB~ \fP' | '\fB++\fP' | '\fB\-\-\fP' -postop: '\fB++\fP' | '\fB\-\-\fP' -multop: '\fB*\fP' | '\fB/\fP' | '\fB%\fP' -addop: '\fB+\fP' | '\fB\-\fP' -shiftop: '\fB<<\fP' | '\fB>>\fP' -relop: '\fB<\fP' | '\fB>\fP' | '\fB<=\fP' | '\fB>=\fP' -eqop: '\fB==\fP' | '\fB!=\fP' -arithop: multop | addop | shiftop | '\fB&\fP' | '\fB^ \fP' | '\fB|\fP' -binop: arithop | relop | eqop | '\fB&&\fP' | '\fB||\fP' -asgnop: '\fB=\fP' | '\fB+\fP' '\fB=\fP' | '\fB\-\fP' '\fB=\fP' | '\fB*\fP' '\fB=\fP' | '\fB/\fP' '\fB=\fP' | '\fB%\fP' '\fB=\fP' - | '\fB<<\fP' '\fB=\fP' | '\fB>>\fP' '\fB=\fP' | '\fB&\fP' '\fB=\fP' | '\fB^ \fP' '\fB=\fP' | '\fB|\fP' '\fB=\fP' - | '\fB+=\fP' | '\fB\-=\fP' | '\fB*=\fP' | '\fB/=\fP' | '\fB%=\fP' - | '\fB<<=\fP' | '\fB>>=\fP' | '\fB&=\fP' | '\fB^=\fP' | '\fB|=\fP' -constant: \fIinteger\fP | \fIfloating\fP -constant-expression: assignment-expression -identifier: \fIidentifier\fP | \fItype-identifier\fP -.fi diff --git a/doc/ego/.distr b/doc/ego/.distr deleted file mode 100644 index 17d03f8d5..000000000 --- a/doc/ego/.distr +++ /dev/null @@ -1,18 +0,0 @@ -proto.make -bo -ca -cf -cj -cs -ic -il -intro -lv -ov -ra -refs.gen -refs.opt -refs.stat -sp -sr -ud diff --git a/doc/ego/Makefile b/doc/ego/Makefile deleted file mode 100644 index f6aefa1ca..000000000 --- a/doc/ego/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -REFS=-p refs.opt -p refs.stat -p refs.gen -INTRO=intro/intro? -OV=ov/ov? -IC=ic/ic? -CF=cf/cf? -IL=il/il? -SR=sr/sr? -CS=cs/cs? -SP=sp/sp? -UD=ud/ud? -LV=lv/lv? -CJ=cj/cj? -BO=bo/bo? -RA=ra/ra? -CA=ca/ca? -EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \ - $(UD) $(LV) $(RA) $(CA) -REFER=refer -TROFF=troff -TBL=tbl -TARGET=-Tlp - -../ego.doc: refs.opt refs.stat refs.gen intro/head intro/tail $(EGO) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | $(TBL) > ../ego.doc - -ego.f: refs.opt refs.stat refs.gen intro/head intro/tail $(EGO) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ego.f -intro.f: refs.opt refs.stat refs.gen intro/head intro/tail $(INTRO) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(INTRO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > intro.f -ov.f: refs.opt refs.stat refs.gen intro/head intro/tail $(OV) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(OV) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ov.f -ic.f: refs.opt refs.stat refs.gen intro/head intro/tail $(IC) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(IC) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ic.f -cf.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CF) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(CF) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cf.f -il.f: refs.opt refs.stat refs.gen intro/head intro/tail $(IL) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(IL) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > il.f -sr.f: refs.opt refs.stat refs.gen intro/head intro/tail $(SR) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(SR) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > sr.f -cs.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CS) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(CS) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cs.f -sp.f: refs.opt refs.stat refs.gen intro/head intro/tail $(SP) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(SP) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > sp.f -cj.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CJ) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(CJ) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cj.f -bo.f: refs.opt refs.stat refs.gen intro/head intro/tail $(BO) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(BO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > bo.f -ud.f: refs.opt refs.stat refs.gen intro/head intro/tail $(UD) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(UD) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ud.f -lv.f: refs.opt refs.stat refs.gen intro/head intro/tail $(LV) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(LV) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > lv.f -ra.f: refs.opt refs.stat refs.gen intro/head intro/tail $(RA) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(RA) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ra.f -ca.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CA) - $(REFER) -sA+T -l4,2 $(REFS) intro/head $(CA) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ca.f diff --git a/doc/ego/bo/.distr b/doc/ego/bo/.distr deleted file mode 100644 index 8f41f100d..000000000 --- a/doc/ego/bo/.distr +++ /dev/null @@ -1 +0,0 @@ -bo1 diff --git a/doc/ego/bo/bo1 b/doc/ego/bo/bo1 deleted file mode 100644 index 58c17c764..000000000 --- a/doc/ego/bo/bo1 +++ /dev/null @@ -1,162 +0,0 @@ -.bp -.NH 1 -Branch Optimization -.NH 2 -Introduction -.PP -The Branch Optimization phase (BO) performs two related -(branch) optimizations. -.NH 3 -Fusion of basic blocks -.PP -If two basic blocks B1 and B2 have the following properties: -.DS -SUCC(B1) = {B2} -PRED(B2) = {B1} -.DE -then B1 and B2 can be combined into one basic block. -If B1 ends in an unconditional jump to the beginning of B2, this -jump can be eliminated, -hence saving a little execution time and object code size. -This technique can be used to eliminate some deficiencies -introduced by the front ends (for example, the "C" front end -translates switch statements inefficiently due to its one pass nature). -.NH 3 -While-loop optimization -.PP -The straightforward way to translate a while loop is to -put the test for loop termination at the beginning of the loop. -.DS -while cond loop \kyLAB1: \kxTest cond - body of the loop --->\h'|\nxu'Branch On False To LAB2 -end loop\h'|\nxu'code for body of loop -\h'|\nxu'Branch To LAB1 -\h'|\nyu'LAB2: - -Fig. 10.1 Example of Branch Optimization -.DE -If the condition fails at the Nth iteration, the following code -gets executed (dynamically): -.DS -.TS -l l l. -N * conditional branch (which fails N-1 times) -N-1 * unconditional branch -N-1 * body of the loop -.TE -.DE -An alternative translation is: -.DS - Branch To LAB2 -LAB1: - code for body of loop -LAB2: - Test cond - Branch On True To LAB1 -.DE -This translation results in the following profile: -.DS -.TS -l l l. -N * conditional branch (which succeeds N-1 times) -1 * unconditional branch -N-1 * body of the loop -.TE -.DE -So the second translation will be significantly faster if N >> 2. -If N=2, execution time will be slightly increased. -On the average, the program will be speeded up. -Note that the code sizes of the two translations will be the same. -.NH 2 -Implementation -.PP -The basic block fusion technique is implemented -by traversing the control flow graph of a procedure, -looking for basic blocks B with only one successor (S). -If one is found, it is checked if S has only one predecessor -(which has to be B). -If so, the two basic blocks can in principle be combined. -However, as one basic block will have to be moved, -the textual order of the basic blocks will be altered. -This reordering causes severe problems in the presence -of conditional jumps. -For example, if S ends in a conditional branch, -the basic block that comes textually next to S must stay -in that position. -So the transformation in Fig. 10.2 is illegal. -.DS -.TS -l l l l l. -LAB1: S1 LAB1: S1 - BRA LAB2 S2 - ... --> BEQ LAB3 -LAB2: S2 ... - BEQ LAB3 S3 - S3 -.TE - -Fig. 10.2 An illegal transformation of Branch Optimization -.DE -If B is moved towards S the same problem occurs if the block before B -ends in a conditional jump. -The problem could be solved by adding one extra branch, -but this would reduce the gains of the optimization to zero. -Hence the optimization will only be done if the block that -follows S (in the textual order) is not a successor of S. -This condition assures that S does not end in a conditional branch. -The condition always holds for the code generated by the "C" -front end for a switch statement. -.PP -After the transformation has been performed, -some attributes of the basic blocks involved (such as successor and -predecessor sets and immediate dominator) must be recomputed. -.PP -The while-loop technique is applied to one loop at a time. -The list of basic blocks of the loop is traversed to find -a block B that satisfies the following conditions: -.IP 1. -the textually next block to B is not part of the loop -.IP 2. -the last instruction of B is an unconditional branch; -hence B has only one successor, say S -.IP 3. -the textually next block of B is a successor of S -.IP 4. -the last instruction of S is a conditional branch -.LP -If such a block B is found, the control flow graph is changed -as depicted in Fig. 10.3. -.DS -.ft 5 - | | - | v - v | - |-----<------| ----->-----| - ____|____ | | - | | | |-------| | - | S1 | | | v | - | Bcc | | | .... | -|--| | | | | -| --------- | | ----|---- | -| | | | | | -| .... ^ | | S2 | | -| | | | | | -| --------- | | | | | -v | | | ^ --------- | -| | S2 | | | | | -| | BRA | | | |-----<----- -| | | | | v -| --------- | | ____|____ -| | | | | | -| ------>------ | | S1 | -| | | Bnn | -|-------| | | | - | | ----|---- - v | | - |----<--| - | - v -.ft R - -Fig. 10.3 Transformation of the CFG by Branch Optimization -.DE diff --git a/doc/ego/ca/.distr b/doc/ego/ca/.distr deleted file mode 100644 index 4723880e2..000000000 --- a/doc/ego/ca/.distr +++ /dev/null @@ -1 +0,0 @@ -ca1 diff --git a/doc/ego/ca/ca1 b/doc/ego/ca/ca1 deleted file mode 100644 index ab06af430..000000000 --- a/doc/ego/ca/ca1 +++ /dev/null @@ -1,65 +0,0 @@ -.bp -.NH 1 -Compact assembly generation -.NH 2 -Introduction -.PP -The "Compact Assembly generation phase" (CA) transforms the -intermediate code of the optimizer into EM code in -Compact Assembly Language (CAL) format. -In the intermediate code, all program entities -(such as procedures, labels, global variables) -are denoted by a unique identifying number (see 3.5). -In the CAL output of the optimizer these numbers have to -be replaced by normal identifiers (strings). -The original identifiers of the input program are used whenever possible. -Recall that the IC phase generates two files that can be -used to map unique identifying numbers to procedure names and -global variable names. -For instruction labels CA always generates new names. -The reasons for doing so are: -.IP - -instruction labels are only visible inside one procedure, so they can -not be referenced in other modules -.IP - -the names are not very suggestive anyway, as they must be integer numbers -.IP - -the optimizer considerably changes the control structure of the program, -so there is really no one to one mapping of instruction labels in -the input and the output program. -.LP -As the optimizer combines all input modules into one module, -visibility problems may occur. -Two modules M1 and M2 can both define an identifier X (provided that -X is not externally visible in any of these modules). -If M1 and M2 are combined into one module M, two distinct -entities with the same name would exist in M, which -is not allowed. -.[~[ -tanenbaum machine architecture -.], section 11.1.4.3] -In these cases, CA invents a new unique name for one of the entities. -.NH 2 -Implementation -.PP -CA first reads the files containing the procedure and global variable names -and stores the names in two tables. -It scans these tables to make sure that all names are different. -Subsequently it reads the EM text, one procedure at a time, -and outputs it in CAL format. -The major part of the code that does the latter transformation -is adapted from the EM Peephole Optimizer. -.PP -The main problem of the implementation of CA is to -assure that the visibility rules are obeyed. -If an identifier must be externally visible (i.e. -it was externally visible in the input program) -and the identifier is defined (in the output program) before -being referenced, -an EXA or EXP pseudo must be generated for it. -(Note that the optimizer may change the order of definitions and -references, so some pseudos may be needed that were not -present in the input program). -On the other hand, an identifier may be only internally visible. -If such an identifier is referenced before being defined, -an INA or INP pseudo must be emitted prior to its first reference. diff --git a/doc/ego/cf/.distr b/doc/ego/cf/.distr deleted file mode 100644 index 6aff9ca72..000000000 --- a/doc/ego/cf/.distr +++ /dev/null @@ -1,6 +0,0 @@ -cf1 -cf2 -cf3 -cf4 -cf5 -cf6 diff --git a/doc/ego/cf/cf1 b/doc/ego/cf/cf1 deleted file mode 100644 index e65547458..000000000 --- a/doc/ego/cf/cf1 +++ /dev/null @@ -1,94 +0,0 @@ -.bp -.NH -The Control Flow Phase -.PP -In the previous chapter we described the intermediate -code of the global optimizer. -We also specified which part of this code -was constructed by the IC phase of the optimizer. -The Control Flow Phase (\fICF\fR) does -the remainder of the job, -i.e. it determines: -.IP - -the control flow graphs -.IP - -the loop tables -.IP - -the calling, change and use attributes of -the procedure table entries -.LP -CF operates on one procedure at a time. -For every procedure it first reads the EM instructions -from the EM-text file and groups them into basic blocks. -For every basic block, its successors and -predecessors are determined, -resulting in the control flow graph. -Next, the immediate dominator of every basic block -is computed. -Using these dominators, any loop in the -procedure is detected. -Finally, interprocedural analysis is done, -after which we will know the global effects of -every procedure call on its environment. -.sp -CF uses the same internal data structures -for the procedure table and object table as IC. -.NH 2 -Partitioning into basic blocks -.PP -With regard to flow of control, we distinguish -three kinds of EM instructions: -jump instructions, instruction label definitions and -normal instructions. -Jump instructions are all conditional or unconditional -branch instructions, -the case instructions (CSA/CSB) -and the RET (return) instruction. -A procedure call (CAL) is not considered to be a jump. -A defining occurrence of an instruction label -is regarded as an EM instruction. -.PP -An instruction starts -a new basic block, in any of the following cases: -.IP 1. -It is the first instruction of a procedure -.IP 2. -It is the first of a list of instruction label -defining occurrences -.IP 3. -It follows a jump -.LP -If there are several consecutive instruction labels -(which is highly unusual), -all of them are put in the same basic block. -Note that several cases may overlap, -e.g. a label definition at the beginning of a procedure -or a label following a jump. -.PP -A simple Finite State Machine is used to model -the above rules. -It also recognizes the end of a procedure, -marked by an END pseudo. -The basic blocks are stored internally as a doubly linked -linear list. -The blocks are linked in textual order. -Every node of this list has the attributes described -in the previous chapter (see syntax rule for -basic_block). -Furthermore, every node contains a pointer to its -EM instructions, -which are represented internally -as a linear, doubly linked list, -just as in the IC phase. -However, instead of one list per procedure (as in IC) -there is now one list per basic block. -.PP -On the fly, a table is build that maps -every label identifier to the label definition -instruction. -This table is used for computing the control flow. -The table is stored as a dynamically allocated array. -The length of the array is the number of labels -of the current procedure; -this value can be found in the procedure table, -where it was stored by IC. diff --git a/doc/ego/cf/cf2 b/doc/ego/cf/cf2 deleted file mode 100644 index c4dd95d5d..000000000 --- a/doc/ego/cf/cf2 +++ /dev/null @@ -1,50 +0,0 @@ -.NH 2 -Control Flow -.PP -A \fIsuccessor\fR of a basic block B is a block C -that can be executed immediately after B. -C is said to be a \fIpredecessor\fR of B. -A block ending with a RET instruction -has no successors. -Such a block is called a \fIreturn block\fR. -Any block that has no predecessors cannot be -executed at all (i.e. it is unreachable), -unless it is the first block of a procedure, -called the \fIprocedure entry block\fR. -.PP -Internally, the successor and predecessor -attributes of a basic block are stored as \fIsets\fR. -Alternatively, one may regard all these -sets of all basic blocks as a conceptual \fIgraph\fR, -in which there is an edge from B to C if C -is in the successor set of B. -We call this conceptual graph -the \fIControl Flow Graph\fR. -.PP -The only successor of a basic block ending on an -unconditional branch instruction is the block that -contains the label definition of the target of the jump. -The target instruction can be found via the LAB_ID -that is the operand of the jump instruction, -by using the label-map table mentioned -above. -If the last instruction of a block is a -conditional jump, -the successors are the target block and the textually -next block. -The last instruction can also be a case jump -instruction (CSA or CSB). -We then analyze the case descriptor, -to find all possible target instructions -and their associated blocks. -We require the case descriptor to be allocated in -a ROM, so it cannot be changed dynamically. -A case jump via an alterable descriptor could in principle -go to any label in the program. -In the presence of such an uncontrolled jump, -hardly any optimization can be done. -We do not expect any front end to generate such a descriptor, -however, because of the controlled nature -of case statements in high level languages. -If the basic block does not end in a jump instruction, -its only successor is the textually next block. diff --git a/doc/ego/cf/cf3 b/doc/ego/cf/cf3 deleted file mode 100644 index 42e8827b1..000000000 --- a/doc/ego/cf/cf3 +++ /dev/null @@ -1,53 +0,0 @@ -.NH 2 -Immediate dominators -.PP -A basic block B dominates a block C if every path -in the control flow graph from the procedure entry block -to C goes through B. -The immediate dominator of C is the closest dominator -of C on any path from the entry block. -See also -.[~[ -aho compiler design -.], section 13.1.] -.PP -There are a number of algorithms to compute -the immediate dominator relation. -.IP 1. -Purdom and Moore give an algorithm that is -easy to program and easy to describe (although the -description they give is unreadable; -it is given in a very messy Algol60 program full of gotos). -.[ -predominators -.] -.IP 2. -Aho and Ullman present a bitvector algorithm, which is also -easy to program and to understand. -(See -.[~[ -aho compiler design -.], section 13.1.]). -.IP 3 -Lengauer and Tarjan introduce a fast algorithm that is -hard to understand, yet remarkably easy to implement. -.[ -lengauer dominators -.] -.LP -The Purdom-Moore algorithm is very slow if the -number of basic blocks in the flow graph is large. -The Aho-Ullman algorithm in fact computes the -dominator relation, -from which the immediate dominator relation can be computed -in time quadratic to the number of basic blocks, worst case. -The storage requirement is also quadratic to the number -of blocks. -The running time of the third algorithm is proportional -to: -.DS -(number of edges in the graph) * log(number of blocks). -.DE -We have chosen this algorithm because it is fast -(as shown by experiments done by Lengauer and Tarjan), -it is easy to program and requires little data space. diff --git a/doc/ego/cf/cf4 b/doc/ego/cf/cf4 deleted file mode 100644 index 843a41117..000000000 --- a/doc/ego/cf/cf4 +++ /dev/null @@ -1,93 +0,0 @@ -.NH 2 -Loop detection -.PP -Loops are detected by using the loop construction -algorithm of. -.[~[ -aho compiler design -.], section 13.1.] -This algorithm uses \fIback edges\fR. -A back edge is an edge from B to C in the CFG, -whose head (C) dominates its tail (B). -The loop associated with this back edge -consists of C plus all nodes in the CFG -that can reach B without going through C. -.PP -As an example of how the algorithm works, -consider the piece of program of Fig. 4.1. -First just look at the program and try to -see what part of the code constitutes the loop. -.DS -loop - if cond then 1 - -- lots of simple - -- assignment - -- statements 2 3 - exit; -- exit loop - else - S; -- one statement - end if; -end loop; - -Fig. 4.1 A misleading loop -.DE -Although a human being may be easily deceived -by the brackets "loop" and "end loop", -the loop detection algorithm will correctly -reply that only the test for "cond" and -the single statement in the false-part -of the if statement are part of the loop! -The statements in the true-part only get -executed once, so there really is no reason at all -to say they're part of the loop too. -The CFG contains one back edge, "3->1". -As node 3 cannot be reached from node 2, -the latter node is not part of the loop. -.PP -A source of problems with the algorithm is the fact -that different back edges may result in -the same loop. -Such an ill-structured loop is -called a \fImessy\fR loop. -After a loop has been constructed, it is checked -if it is really a new loop. -.PP -Loops can partly overlap, without one being nested -inside the other. -This is the case in the program of Fig. 4.2. -.DS -1: 1 - S1; -2: - S2; 2 - if cond then - goto 4; - S3; 3 4 - goto 1; -4: - S4; - goto 1; - -Fig. 4.2 Partly overlapping loops -.DE -There are two back edges "3->1" and "4->1", -resulting in the loops {1,2,3} and {1,2,4}. -With every basic block we associate a set of -all loops it is part of. -It is not sufficient just to record its -most enclosing loop. -.PP -After all loops of a procedure are detected, we determine -the nesting level of every loop. -Finally, we find all strong and firm blocks of the loop. -If the loop has only one back edge (i.e. it is not messy), -the set of firm blocks consists of the -head of this back edge and its dominators -in the loop (including the loop entry block). -A firm block is also strong if it is not a -successor of a block that may exit the loop; -a block may exit a loop if it has an (immediate) successor -that is not part of the loop. -For messy loops we do not determine the strong -and firm blocks. These loops are expected -to occur very rarely. diff --git a/doc/ego/cf/cf5 b/doc/ego/cf/cf5 deleted file mode 100644 index 1926c450c..000000000 --- a/doc/ego/cf/cf5 +++ /dev/null @@ -1,82 +0,0 @@ -.NH 2 -Interprocedural analysis -.PP -It is often desirable to know the effects -a procedure call may have. -The optimization below is only possible if -we know for sure that the call to P cannot -change A. -.DS -.TS -l l. -A := 10; A:= 10; -P; -- procedure call --> P; -B := A + 2; B := 12; -.TE -.DE -Although it is not possible to predict exactly -all the effects a procedure call has, we may -determine a kind of upper bound for it. -So we compute all variables that may be -changed by P, although they need not be -changed at every invocation of P. -We can get hold of this set by just looking -at all assignment (store) instructions -in the body of P. -EM also has a set of \fIindirect\fR assignment -instructions, -i.e. assignment through a pointer variable. -In general, it is not possible to determine -which variable is affected by such an assignment. -In these cases, we just record the fact that P -does an indirect assignment. -Note that this does not mean that all variables -are potentially affected, as the front ends -may generate messages telling that certain -variables can never be accessed indirectly. -We also set a flag if P does a use (load) indirect. -Note that we only have to look at \fIglobal\fR -variables. -If P changes or uses any of its locals, -this has no effect on its environment. -Local variables of a lexically enclosing -procedure can only be accessed indirectly. -.PP -A procedure P may of course call another procedure. -To determine the effects of a call to P, -we also must know the effects of a call to the second procedure. -This second one may call a third one, and so on. -Effectively, we need to compute the \fItransitive closure\fR -of the effects. -To do this, we determine for every procedure -which other procedures it calls. -This set is the "calling" attribute of a procedure. -One may regard all these sets as a conceptual graph, -in which there is an edge from P to Q -if Q is in the calling set of P. This graph will -be referred to as the \fIcall graph\fR. -(Note the resemblance with the control flow graph). -.PP -We can detect which procedures are called by P -by looking at all CAL instructions in its body. -Unfortunately, a procedure may also be -called indirectly, via a CAI instruction. -Yet, only procedures that are used as operand of an LPI -instruction can be called indirect, -because this is the only way to take the address of a procedure. -We determine for every procedure whether it does -a CAI instruction. -We also build a set of all procedures used as -operand of an LPI. -.sp -After all procedures have been processed (i.e. all CFGs -are constructed, all loops are detected, -all procedures are analyzed to see which variables -they may change, which procedures they call, -whether they do a CAI or are used in an LPI) the -transitive closure of all interprocedural -information is computed. -During the same process, -the calling set of every procedure that uses a CAI -is extended with the above mentioned set of all -procedures that can be called indirect. diff --git a/doc/ego/cf/cf6 b/doc/ego/cf/cf6 deleted file mode 100644 index a560b48e3..000000000 --- a/doc/ego/cf/cf6 +++ /dev/null @@ -1,21 +0,0 @@ -.NH 2 -Source files -.PP -The sources of CF are in the following files and packages: -.IP cf.h: 14 -declarations of global variables and data structures -.IP cf.c: -the routine main; interprocedural analysis; -transitive closure -.IP succ: -control flow (successor and predecessor) -.IP idom: -immediate dominators -.IP loop: -loop detection -.IP get: -read object and procedure table; -read EM text and partition it into basic blocks -.IP put: -write tables, CFGs and EM text -.LP diff --git a/doc/ego/cj/.distr b/doc/ego/cj/.distr deleted file mode 100644 index a92acc3b1..000000000 --- a/doc/ego/cj/.distr +++ /dev/null @@ -1 +0,0 @@ -cj1 diff --git a/doc/ego/cj/cj1 b/doc/ego/cj/cj1 deleted file mode 100644 index e7174af61..000000000 --- a/doc/ego/cj/cj1 +++ /dev/null @@ -1,144 +0,0 @@ -.bp -.NH 1 -Cross jumping -.NH 2 -Introduction -.PP -The "Cross Jumping" optimization technique (CJ) -.[ -wulf design optimizing compiler -.] -is basically a space optimization technique. It looks for pairs of -basic blocks (B1,B2), for which: -.DS -SUCC(B1) = SUCC(B2) = {S} -.DE -(So B1 and B2 both have one and the same successor). -If the last few non-branch instructions are the same for B1 and B2, -one such sequence can be eliminated. -.DS -Pascal: - -if cond then - S1 - S3 -else - S2 - S3 - -(pseudo) EM: -.TS -l l l. - TEST COND TEST COND - BNE *1 BNE *1 - S1 S1 - S3 ---> BRA *2 - BRA *2 1: -1: S2 - S2 2: - S3 S3 -2: -.TE - -Fig. 9.1 An example of Cross Jumping -.DE -As the basic blocks have the same successor, -at least one of them ends in an unconditional branch instruction (BRA). -Hence no extra branch instruction is ever needed, just the target -of an existing branch needs to be changed; neither the program size -nor the execution time will ever increase. -In general, the execution time will remain the same, unless -further optimizations can be applied because of this optimization. -.PP -This optimization is particularly effective, -because it cannot always be done by the programmer at the source level, -as demonstrated by the Fig. 8.2. -.DS - Pascal: - -if cond then - x := f(4) -else - x := g(5) - - -EM: - -.TS -l l. -... ... -LOC 4 LOC 5 -CAL F CAL G -ASP 2 ASP 2 -LFR 2 LFR 2 -STL X STL X -.TE - -Fig. 9.2 Effectiveness of Cross Jumping -.DE -At the source level there is no common tail, -but at the EM level there is a common tail. -.NH 2 -Implementation -.PP -The implementation of cross jumping is rather straightforward. -The technique is applied to one procedure at a time. -The control flow graph of the procedure -is scanned for pairs of basic blocks -with the same (single) successor and with common tails. -Note that there may be more than two such blocks (e.g. as the result -of a case statement). -This is dealt with by repeating the entire process until no -further optimizations can de done for the current procedure. -.sp -If a suitable pair of basic blocks has been found, the control flow -graph must be altered. One of the basic -blocks must be split into two. -The control flow graphs before and after the optimization are shown -in Fig. 9.3 and Fig. 9.4. -.DS -.ft 5 - - -------- -------- - | | | | - | S1 | | S2 | - | S3 | | S3 | - | | | | - -------- -------- - | | - |------------------|--------------------| - | - v -.ft R - -Fig. 9.3 CFG before optimization -.DE -.DS -.ft 5 - -------- -------- - | | | | - | S1 | | S2 | - | | | | - -------- -------- - | | - |--------------------<------------------| - v - -------- - | | - | S3 | - | | - -------- - | - v -.ft R - -Fig. 9.4 CFG after optimization -.DE -Some attributes of the three resulting blocks (such as immediate dominator) -are updated. -.PP -In some cases, cross jumping might split the computation of an expression -into two, by inserting a branch somewhere in the middle. -Most code generators will generate very poor assembly code when -presented with such EM code. -Therefor, cross jumping is not performed in these cases. diff --git a/doc/ego/cs/.distr b/doc/ego/cs/.distr deleted file mode 100644 index 99b8c7793..000000000 --- a/doc/ego/cs/.distr +++ /dev/null @@ -1,5 +0,0 @@ -cs1 -cs2 -cs3 -cs4 -cs5 diff --git a/doc/ego/cs/cs1 b/doc/ego/cs/cs1 deleted file mode 100644 index 842e514a4..000000000 --- a/doc/ego/cs/cs1 +++ /dev/null @@ -1,45 +0,0 @@ -.bp -.NH 1 -Common subexpression elimination -.NH 2 -Introduction -.PP -The Common Subexpression Elimination optimization technique (CS) -tries to eliminate multiple computations of EM expressions -that yield the same result. -It places the result of one such computation -in a temporary variable, -and replaces the other computations by a reference -to this temporary variable. -The primary goal of this technique is to decrease -the execution time of the program, -but in general it will save space too. -.PP -As an example of the application of Common Subexpression Elimination, -consider the piece of program in Fig. 7.1(a). -.DS -.TS -l l l. -x := a * b; TMP := a * b; x := a * b; -CODE; x := TMP; CODE -y := c + a * b; CODE y := x; - y := c + TMP; - - (a) (b) (c) -.TE - -Fig. 7.1 Examples of Common Subexpression Elimination -.DE -If neither a nor b is changed in CODE, -the instructions can be replaced by those of Fig. 7.1(b), -which saves one multiplication, -but costs an extra store instruction. -If the value of x is not changed in CODE either, -the instructions can be replaced by those of Fig. 7.1(c). -In this case -the extra store is not needed. -.PP -In the following sections we will describe -which transformations are done -by CS and how this phase -was implemented. diff --git a/doc/ego/cs/cs2 b/doc/ego/cs/cs2 deleted file mode 100644 index 0fe4dfc16..000000000 --- a/doc/ego/cs/cs2 +++ /dev/null @@ -1,86 +0,0 @@ -.NH 2 -Specification of the Common Subexpression Elimination phase -.PP -In this section we will describe -the window -through which CS examines the code, -the expressions recognized by CS, -and finally the changes made to the code. -.NH 3 -The working window -.PP -The CS algorithm is applied to the -largest sequence of textually adjacent basic blocks -B1,..,Bn, for which -.DS -PRED(Bj) = {Bj-1}, j = 2,..,n. -.DE -Intuitively, this window consists of straight line code, -with only one entry point (at the beginning); it may -contain jumps, which should all have their targets outside the window. -This is illustrated in Fig. 7.2. -.DS -x := a * b; (1) -if x < 10 then (2) - y := a * b; (3) - -Fig. 7.2 The working window of CS -.DE -Line (2) can only be executed after line (1). -Likewise, line (3) can only be executed after -line (2). -Both a and b have the same values at line (1) and at line (3). -.PP -Larger windows were avoided. -In Fig. 7.3, the value of a at line (4) may have been obtained -at more than one point. -.DS -x := a * b; (1) -if x < 10 then (2) - a := 100; (3) -y := a * b; (4) - -Fig. 7.3 Several working windows -.DE -.NH 3 -Recognized expressions. -.PP -The computations eliminated by CS need not be normal expressions -(like "a * b"), -but can even consist of a single operand that is expensive to access, -such as an array element or a record field. -If an array element is used, -its address is computed implicitly. -CS is able to eliminate either the element itself or its -address, whichever one is most profitable. -A variable of a textually enclosing procedure may also be -expensive to access, depending on the lexical level difference. -.NH 3 -Transformations -.PP -CS creates a new temporary local variable (TMP) -for every eliminated expression, -unless it is able to use an existing local variable. -It emits code to initialize this variable with the -result of the expression. -Most recurrences of the expression -can simply be replaced by a reference to TMP. -If the address of an array element is recognized as -a common subexpression, -references to the element itself are replaced by -indirect references through TMP (see Fig. 7.4). -.DS -.TS -l l l. -x := A[i]; TMP := &A[i]; - . . . --> x := *TMP; -A[i] := y; . . . - *TMP := y; -.TE - -Fig. 7.4 Elimination of an array address computation -.DE -Here, '&' is the 'address of' operator, -and unary '*' is the indirection operator. -(Note that EM actually has different instructions to do -a use-indirect or an assign-indirect.) diff --git a/doc/ego/cs/cs3 b/doc/ego/cs/cs3 deleted file mode 100644 index 416d9e4bc..000000000 --- a/doc/ego/cs/cs3 +++ /dev/null @@ -1,250 +0,0 @@ -.NH 2 -Implementation -.PP -.NH 3 -The value number method -.PP -To determine whether two expressions have the same result, -there must be some way to determine whether their operands have -the same values. -We use a system of \fIvalue numbers\fP -.[ -kennedy data flow analysis -.] -in which each distinct value of whatever type, -created or used within the working window, -receives a unique identifying number, its value number. -Two items have the same value number if and only if, -based only upon information from the instructions in the window, -their values are provably identical. -For example, after processing the statement -.DS -a := 4; -.DE -the variable a and the constant 4 have the same value number. -.PP -The value number of the result of an expression depends only -on the kind of operator and the value number(s) of the operand(s). -The expressions need not be textually equal, as shown in Fig. 7.5. -.DS -.TS -l l. -a := c; (1) -use(a * b); (2) -d := b; (3) -use(c * d); (4) -.TE - -Fig. 7.5 Different expressions with the same value number -.DE -At line (1) a receives the same value number as c. -At line (2) d receives the same value number as b. -At line (4) the expression "c * d" receives the same value number -as the expression "a * b" at line (2), -because the value numbers of their left and right operands are the same, -and the operator (*) is the same. -.PP -As another example of the value number method, consider Fig. 7.6. -.DS -.TS -l l. -use(a * b); (1) -a := 123; (2) -use(a * b); (3) -.TE - -Fig. 7.6 Identical expressions with the different value numbers -.DE -Although textually the expressions "a * b" in line 1 and line 3 are equal, -a will have different value numbers at line 3 and line 1. -The two expressions will not mistakenly be recognized as equivalent. -.NH 3 -Entities -.PP -The Value Number Method distinguishes between operators and operands. -The value numbers of operands are stored in a table, -called the \fIsymbol table\fR. -The value number of a subexpression depends on the -(root) operator of the expression and on the value numbers -of its operands. -A table of "available expressions" is used to do this mapping. -.PP -CS recognizes the following kinds of EM operands, called \fIentities\fR: -.DS -- constant -- local variable -- external variable -- indirectly accessed entity -- offsetted entity -- address of local variable -- address of external variable -- address of offsetted entity -- address of local base -- address of argument base -- array element -- procedure identifier -- floating zero -- local base -- heap pointer -- ignore mask -.DE -.LP -Whenever a new entity is encountered in the working window, -it is entered in the symbol table and given a brand new value number. -Most entities have attributes (e.g. the offset in -the current stackframe for local variables), -which are also stored in the symbol table. -.PP -An entity is called static if its value cannot be changed -(e.g. a constant or an address). -.NH 3 -Parsing expressions -.PP -Common subexpressions are recognized by simulating the behaviour -of the EM machine. -The EM code is parsed from left to right; -as EM is postfix code, this is a bottom up parse. -At any point the current state of the EM runtime stack is -reflected by a simulated "fake stack", -containing descriptions of the parsed operands and expressions. -A descriptor consists of: -.DS -(1) the value number of the operand or expression -(2) the size of the operand or expression -(3) a pointer to the first line of EM-code - that constitutes the operand or expression -.DE -Note that operands may consist of several EM instructions. -Whenever an operator is encountered, the -descriptors of its operands are on top of the fake stack. -The operator and the value numbers of the operands -are used as indices in the table of available expressions, -to determine the value number of the expression. -.PP -During the parsing process, -we keep track of the first line of each expression; -we need this information when we decide to eliminate the expression. -.NH 3 -Updating entities -.PP -An entity is assigned a value number when it is -used for the first time -in the working window. -If the entity is used as left hand side of an assignment, -it gets the value number of the right hand side. -Sometimes the effects of an instruction on an entity cannot -be determined exactly; -the current value and value number of the entity may become -inconsistent. -Hence the current value number must be forgotten. -This is achieved by giving the entity a new value number -that was not used before. -The entity is said to be \fIkilled\fR. -.PP -As information is lost when an entity is killed, -CS tries to save as many entities as possible. -In case of an indirect assignment through a pointer, -some analysis is done to see which variables cannot be altered. -For a procedure call, the interprocedural information contained -in the procedure table is used to restrict the set of entities that may -be changed by the call. -Local variables for which the front end generated -a register message can never be changed by an indirect assignment -or a procedure call. -.NH 3 -Changing the EM text -.PP -When a new expression comes available, -it is checked whether its result is saved in a local -that may go in a register. -The last line of the expression must be followed -by a STL or SDL instruction -(depending on the size of the result) -and a register message must be present for -this local. -If there is such a local, -it is recorded in the available expressions table. -Each time a new occurrence of this expression -is found, -the value number of the local is compared against -the value number of the result. -If they are different the local cannot be used and is forgotten. -.PP -The available expressions are linked in a list. -New expressions are linked at the head of the list. -In this way expressions that are contained within other -expressions appear later in the list, -because EM-expressions are postfix. -The elimination process walks through the list, -starting at the head, to find the largest expressions first. -If an expression is eliminated, -any expression later on in the list, contained in the former expression, -is removed from the list, -as expressions can only be eliminated once. -.PP -A STL or SDL is emitted after the first occurrence of the expression, -unless there was an existing local variable that could hold the result. -.NH 3 -Desirability analysis -.PP -Although the global optimizer works on EM code, -the goal is to improve the quality of the object code. -Therefore some machine-dependent information is needed -to decide whether it is desirable to -eliminate a given expression. -Because it is impossible for the CS phase to know -exactly what code will be generated, -some heuristics are used. -CS essentially looks for some special cases -that should not be eliminated. -These special cases can be turned on or off for a given machine, -as indicated in a machine descriptor file. -.PP -Some operators can sometimes be translated -into an addressing mode for the machine at hand. -Such an operator is only eliminated -if its operand is itself expensive, -i.e. it is not just a simple load. -The machine descriptor file contains a set of such operators. -.PP -Eliminating the loading of the Local Base or -the Argument Base by the LXL resp. LXA instruction -is only beneficial if the difference in lexical levels -exceeds a certain threshold. -The machine descriptor file contains this threshold. -.PP -Replacing a SAR or a LAR by an AAR followed by a LOI -may possibly increase the size of the object code. -We assume that this is only possible when the -size of the array element is greater than some limit. -.PP -There are back ends that can very efficiently translate -the index computing instruction sequence LOC SLI ADS. -If this is the case, -the SLI instruction between a LOC -and an ADS is not eliminated. -.PP -To handle unforseen cases, the descriptor file may also contain -a set of operators that should never be eliminated. -.NH 3 -The algorithm -.PP -After these preparatory explanations, -the algorithm itself is easy to understand. -For each instruction within the current window, -the following steps are performed in the given order : -.IP 1. -Check if this instruction defines an entity. -If so, the set of entities is updated accordingly. -.IP 2. -Kill all entities that might be affected by this instruction. -.IP 3. -Simulate the instruction on the fake-stack. -If this instruction is an operator, -update the list of available expressions accordingly. -.PP -The result of this process is -a list of available expressions plus the information -needed to eliminate them. -Expressions that are desirable to eliminate are eliminated. -Next, the window is shifted and the process is repeated. diff --git a/doc/ego/cs/cs4 b/doc/ego/cs/cs4 deleted file mode 100644 index c0a42d5fc..000000000 --- a/doc/ego/cs/cs4 +++ /dev/null @@ -1,311 +0,0 @@ -.NH 2 -Implementation. -.PP -In this section we will discuss the implementation of the CS phase. -We will first describe the basic actions that are undertaken -by the algorithm, than the algorithm itself. -.NH 3 -Partioning the EM instructions -.PP -There are over 100 EM instructions. -For our purpose we partition this huge set into groups of -instructions which can be more or less conveniently handled together. -.PP -There are groups for all sorts of load instructions: -simple loads, expensive loads, loads of an array element. -A load is considered \fIexpensive\fP when more than one EM instructions -are involved in loading it. -The load of a lexical entity is also considered expensive. -For instance: LOF is expensive, LAL is not. -LAR forms a group on its own, -because it is not only an expensive load, -but also implicitly includes the ternary operator AAR, -which computes the address of the array element. -.PP -There are groups for all sorts of operators: -unary, binary, and ternary. -The groups of operators are further partitioned according to the size -of their operand(s) and result. -.\" .PP -.\" The distinction between operators and expensive loads is not always clear. -.\" The ADP instruction for example, -.\" might seem a unary operator because it pops one item -.\" (a pointer) from the stack. -.\" However, two ADP-instructions which pop an item with the same value number -.\" need not have the same result, -.\" because the attributes (an offset, to be added to the pointer) -.\" can be different. -.\" Is it then a binary operator? -.\" That would give rise to the strange, and undesirable, -.\" situation that some binary operators pop two operands -.\" and others pop one. -.\" The conclusion is inevitable: -.\" we have been fooled by the name (ADd Pointer). -.\" The ADP-instruction is an expensive load. -.\" In this context LAF, meaning Load Address of oFfsetted, -.\" would have been a better name, -.\" corresponding to LOF, like LAL, -.\" Load Address of Local, corresponds to LOL. -.PP -There are groups for all sorts of stores: -direct, indirect, array element. -The SAR forms a group on its own for the same reason -as appeared with LAR. -.PP -The effect of the remaining instructions is less clear. -They do not help very much in parsing expressions or -in constructing our pseudo symboltable. -They are partitioned according to the following criteria: -.RS -.IP "-" -They change the value of an entity without using the stack -(e.g. ZRL, DEE). -.IP "-" -They are subroutine calls (CAI, CAL). -.IP "-" -They change the stack in some irreproduceable way (e.g. ASP, LFR, DUP). -.IP "-" -They have no effect whatever on the stack or on the entities. -This does not mean they can be deleted, -but they can be ignored for the moment -(e.g. MES, LIN, NOP). -.IP "-" -Their effect is too complicate too compute, -so we just assume worst case behaviour. -Hopefully, they do not occur very often. -(e.g. MON, STR, BLM). -.IP "-" -They signal the end of the basic block (e.g. BLT, RET, TRP). -.RE -.NH 3 -Parsing expressions -.PP -To recognize expressions, -we simulate the behaviour of the EM machine, -by means of a fake-stack. -When we scan the instructions in sequential order, -we first encounter the instructions that load -the operands on the stack, -and then the instruction that indicates the operator, -because EM expressions are postfix. -When we find an instruction to load an operand, -we load on the fake-stack a struct with the following information: -.DS -.TS -l l. -(1) the value number of the operand -(2) the size of the operand -(3) a pointer to the first line of EM-code - that constitutes the operand -.TE -.DE -In most cases, (3) will point to the line -that loaded the operand (e.g. LOL, LOC), -i.e. there is only one line that refers to this operand, -but sometimes some information must be popped -to load the operand (e.g. LOI, LAR). -This information must have been pushed before, -so we also pop a pointer to the first line that pushed -the information. -This line is now the first line that defines the operand. -.PP -When we find the operator instruction, -we pop its operand(s) from the fake-stack. -The first line that defines the first operand is -now the first line of the expression. -We now have all information to determine -whether the just parsed expression has occurred before. -We also know the first and last line of the expression; -we need this when we decide to eliminate it. -Associated with each available expression is a set of -which the elements contains the first and last line of -a recurrence of this expression. -.PP -Not only will the operand(s) be popped from the fake-stack, -but the following will be pushed: -.DS -.TS -l l. -(1) the value number of the result -(2) the size of the result -(3) a pointer to the first line of the expression -.TE -.DE -In this way an item on the fake-stack always contains -the necessary information. -EM expressions are parsed bottum up. -.NH 3 -Updating entities -.PP -As said before, -we build our private "symboltable", -while scanning the EM-instructions. -The behaviour of the EM-machine is not only reflected -in the fake-stack, -but also in the entities. -When an entity is created, -we do not yet know its value, -so we assign a brand new value number to it. -Each time a store-instruction is encountered, -we change the value number of the target entity of this store -to the value number of the token that was popped -from the fake-stack. -Because entities may overlap, -we must also "forget" the value numbers of entities -that might be affected by this store. -Each such entity will be \fIkilled\fP, -i.e. assigned a brand new valuenumber. -.PP -Because we lose information when we forget -the value number of an entity, -we try to save as much entities as possible. -When we store into an external, -we don't have to kill locals and vice versa. -Furthermore, we can see whether two locals or -two externals overlap, -because we know the offset from the local base, -resp. the offset within the data block, -and the size. -The situation becomes more complicated when we have -to consider indirection. -The worst case is that we store through an unknown pointer. -In that case we kill all entities except those locals -for which a so-called \fIregister message\fP has been generated; -this register message indicates that this local can never be -accessed indirectly. -If we know this pointer we can be more careful. -If it points to a local then the entity that is accessed through -this pointer can never overlap with an external. -If it points to an external this entity can never overlap with a local. -Furthermore, in the latter case, -we can find the data block this entity belongs to. -Since pointer arithmetic is only defined within a data block, -this entity can never overlap with entities that are known to -belong to another data block. -.PP -Not only after a store-instruction but also after a -subroutine-call it may be necessary to kill entities; -the subroutine may affect global variables or store -through a pointer. -If a subroutine is called that is not available as EM-text, -we assume worst case behaviour, -i.e. we kill all entities without register message. -.NH 3 -Additions and replacements. -.PP -When a new expression comes available, -we check whether the result is saved in a local -that may go in a register. -The last line of the expression must be followed -by a STL or SDL instruction, -depending on the size of the result -(resp. WS and 2*WS), -and a register message must be present for -this local. -If we have found such a local, -we store a pointer to it with the available expression. -Each time a new occurrence of this expression -is found, -we compare the value number of the local against -the value number of the result. -When they are different we remove the pointer to it, -because we cannot use it. -.PP -The available expressions are singly linked in a list. -When a new expression comes available, -we link it at the head of the list. -In this way expressions that are contained within other -expressions appear later in the list, -because EM-expressions are postfix. -When we are going to eliminate expressions, -we walk through the list, -starting at the head, to find the largest expressions first. -When we decide to eliminate an expression, -we look at the expressions in the tail of the list, -starting from where we are now, -to delete expressions that are contained within -the chosen one because -we cannot eliminate an expression more than once. -.PP -When we are going to eliminate expressions, -and we do not have a local that holds the result, -we emit a STL or SDL after the line where the expression -was first found. -The other occurrences are simply removed, -unless they contain instructions that not only have -effect on the stack; e.g. messages, stores, calls. -Before each instruction that needs the result on the stack, -we emit a LOL or LDL. -When the expression was an AAR, -but the instruction was a LAR or a SAR, -we append a LOI resp. a STI of the number of bytes -in an array-element after each LOL/LDL. -.NH 3 -Desirability analysis -.PP -Although the global optimizer works on EM code, -the goal is to improve the quality of the object code. -Therefore we need some machine dependent information -to decide whether it is desirable to -eliminate a given expression. -Because it is impossible for the CS phase to know -exactly what code will be generated, -we use some heuristics. -In most cases it will save time when we eliminate an -operator, so we just do it. -We only look for some special cases. -.PP -Some operators can in some cases be translated -into an addressing mode for the machine at hand. -We only eliminate such an operator, -when its operand is itself "expensive", -i.e. not just a simple load. -The user of the CS phase has to supply -a set of such operators. -.PP -Eliminating the loading of the Local Base or -the Argument Base by the LXL resp. LXA instruction -is only beneficial when the number of lexical levels -we have to go back exceeds a certain threshold. -This threshold will be different when registers -are saved by the back end. -The user must supply this threshold. -.PP -Replacing a SAR or a LAR by an AAR followed by a LOI -may possibly increase the size of the object code. -We assume that this is only possible when the -size of the array element is greater than some -(user-supplied) limit. -.PP -There are back ends that can very efficiently translate -the index computing instruction sequence LOC SLI ADS. -If this is the case, -we do not eliminate the SLI instruction between a LOC -and an ADS. -.PP -To handle unforeseen cases, the user may also supply -a set of operators that should never be eliminated. -.NH 3 -The algorithm -.PP -After these preparatory explanations, -we can be short about the algorithm itself. -For each instruction within our window, -the following steps are performed in the order given: -.IP 1. -We check if this instructin defines an entity. -If this is the case the set of entities is updated accordingly. -.IP 2. -We kill all entities that might be affected by this instruction. -.IP 3. -The instruction is simulated on the fake-stack. -Copy propagation is done. -If this instruction is an operator, -we update the list of available expressions accordingly. -.PP -When we have processed all instructions this way, -we have built a list of available expressions plus the information we -need to eliminate them. -Those expressions of which desirability analysis tells us so, -we eliminate. -The we shift our window and continue. diff --git a/doc/ego/cs/cs5 b/doc/ego/cs/cs5 deleted file mode 100644 index eaf884020..000000000 --- a/doc/ego/cs/cs5 +++ /dev/null @@ -1,46 +0,0 @@ -.NH 2 -Source files of CS -.PP -The sources of CS are in the following files and packages: -.IP cs.h 14 -declarations of global variables and data structures -.IP cs.c -the routine main; -a driving routine to process -the basic blocks in the right order -.IP vnm -implements a procedure that performs -the value numbering on one basic block -.IP eliminate -implements a procedure that does the -transformations, if desirable -.IP avail -implements a procedure that manipulates the list of available expressions -.IP entity -implements a procedure that manipulates the set of entities -.IP getentity -implements a procedure that extracts the -pseudo symboltable information from EM-instructions; -uses a small table -.IP kill -implements several routines that find the entities -that might be changed by EM-instructions -and kill them -.IP partition -implements several routines that partition the huge set -of EM-instructions into more or less manageable, -more or less logical chunks -.IP profit -implements a procedure that decides whether it -is advantageous to eliminate an expression; -also removes expressions with side-effects -.IP stack -implements the fake-stack and operations on it -.IP alloc -implements several allocation routines -.IP aux -implements several auxiliary routines -.IP debug -implements several routines to provide debugging -and verbose output -.LP diff --git a/doc/ego/ic/.distr b/doc/ego/ic/.distr deleted file mode 100644 index eabb41472..000000000 --- a/doc/ego/ic/.distr +++ /dev/null @@ -1,5 +0,0 @@ -ic1 -ic2 -ic3 -ic4 -ic5 diff --git a/doc/ego/ic/ic1 b/doc/ego/ic/ic1 deleted file mode 100644 index 6347bc73f..000000000 --- a/doc/ego/ic/ic1 +++ /dev/null @@ -1,57 +0,0 @@ -.bp -.NH -The Intermediate Code and the IC phase -.PP -In this chapter the intermediate code of the EM global optimizer -will be defined. -The 'Intermediate Code construction' phase (IC), -which builds the initial intermediate code from -EM Compact Assembly Language, -will be described. -.NH 2 -Introduction -.PP -The EM global optimizer is a multi pass program, -hence there is a need for an intermediate code. -Usually, programs in the Amsterdam Compiler Kit use the -Compact Assembly Language format -.[~[ -keizer architecture -.], section 11.2] -for this purpose. -Although this code has some convenient features, -such as being compact, -it is quite unsuitable in our case, -because of a number of reasons. -At first, the code lacks global information -about whole procedures or whole basic blocks. -Second, it uses identifiers ('names') to bind -defining and applied occurrences of -procedures, data labels and instruction labels. -Although this is usual in high level programming -languages, it is awkward in an intermediate code -that must be read many times. -Each pass of the optimizer would have -to incorporate an identifier look-up mechanism -to associate a defining occurrence with each -applied occurrence of an identifier. -Finally, EM programs are used to declare blocks of bytes, -rather than variables. A 'hol 6' instruction may be used to -declare three 2-byte variables. -Clearly, the optimizer wants to deal with variables, and -not with rows of bytes. -.PP -To overcome these problems, we have developed a new -intermediate code. -This code does not merely consist of the EM instructions, -but also contains global information in the -form of tables and graphs. -Before describing the intermediate code we will -first leap aside to outline -the problems one generally encounters -when trying to store complex data structures such as -graphs outside the program, i.e. in a file. -We trust this will enhance the -comprehensibility of the -intermediate code definition and the design and implementation -of the IC phase. diff --git a/doc/ego/ic/ic2 b/doc/ego/ic/ic2 deleted file mode 100644 index f55e69927..000000000 --- a/doc/ego/ic/ic2 +++ /dev/null @@ -1,150 +0,0 @@ -.NH 2 -Representation of complex data structures in a sequential file -.PP -Most programmers are quite used to deal with -complex data structures, such as -arrays, graphs and trees. -There are some particular problems that occur -when storing such a data structure -in a sequential file. -We call data that is kept in -main memory -.UL internal -,as opposed to -.UL external -data -that is kept in a file outside the program. -.sp -We assume a simple data structure of a -scalar type (integer, floating point number) -has some known external representation. -An -.UL array -having elements of a scalar type can be represented -externally easily, by successively -representing its elements. -The external representation may be preceded by a -number, giving the length of the array. -Now, consider a linear, singly linked list, -the elements of which look like: -.DS -record - data: scalar_type; - next: pointer_type; -end; -.DE -It is significant to note that the "next" -fields of the elements only have a meaning within -main memory. -The field contains the address of some location in -main memory. -If a list element is written to a file in -some program, -and read by another program, -the element will be allocated at a different -address in main memory. -Hence this address value is completely -useless outside the program. -.sp -One may represent the list by ignoring these "next" fields -and storing the data items in the order they are linked. -The "next" fields are represented \fIimplicitly\fR. -When the file is read again, -the same list can be reconstructed. -In order to know where the external representation of the -list ends, -it may be useful to put the length of -the list in front of it. -.sp -Note that arrays and linear lists have the -same external representation. -.PP -A doubly linked, linear list, -with elements of the type: -.DS -record - data: scalar_type; - next, - previous: pointer_type; -end -.DE -can be represented in precisely the same way. -Both the "next" and the "previous" fields are represented -implicitly. -.PP -Next, consider a binary tree, -the nodes of which have type: -.DS -record - data: scalar_type; - left, - right: pointer_type; -end -.DE -Such a tree can be represented sequentially, -by storing its nodes in some fixed order, e.g. prefix order. -A special null data item may be used to -denote a missing left or right son. -For example, let the scalar type be integer, -and let the null item be 0. -Then the tree of fig. 3.1(a) -can be represented as in fig. 3.1(b). -.DS -.ft 5 - 4 - / \e - 9 12 - / \e / \e - 12 3 4 6 - / \e \e / - 8 1 5 1 -.ft R - -Fig. 3.1(a) A binary tree - - -.ft 5 -4 9 12 0 0 3 8 0 0 1 0 0 12 4 0 5 0 0 6 1 0 0 0 -.ft R - -Fig. 3.1(b) Its sequential representation -.DE -We are still able to represent the pointer fields ("left" -and "right") implicitly. -.PP -Finally, consider a general -.UL graph -, where each node has a "data" field and -pointer fields, -with no restriction on where they may point to. -Now we're at the end of our tale. -There is no way to represent the pointers implicitly, -like we did with lists and trees. -In order to represent them explicitly, -we use the following scheme. -Every node gets an extra field, -containing some unique number that identifies the node. -We call this number its -.UL id. -A pointer is represented externally as the id of the node -it points to. -When reading the file we use a table that maps -an id to the address of its node. -In general this table will not be completely filled in -until we have read the entire external representation of -the graph and allocated internal memory locations for -every node. -Hence we cannot reconstruct the graph in one scan. -That is, there may be some pointers from node A to B, -where B is placed after A in the sequential file than A. -When we read the node of A we cannot map the id of B -to the address of node B, -as we have not yet allocated node B. -We can overcome this problem if the size -of every node is known in advance. -In this case we can allocate memory for a node -on first reference. -Else, the mapping from id to pointer -cannot be done while reading nodes. -The mapping can be done either in an extra scan -or at every reference to the node. diff --git a/doc/ego/ic/ic3 b/doc/ego/ic/ic3 deleted file mode 100644 index d140160b7..000000000 --- a/doc/ego/ic/ic3 +++ /dev/null @@ -1,431 +0,0 @@ -.NH 2 -Definition of the intermediate code -.PP -The intermediate code of the optimizer consists -of several components: -.IP - -the object table -.IP - -the procedure table -.IP - -the em code -.IP - -the control flow graphs -.IP - -the loop table -.LP - -.PP -These components are described in -the next sections. -The syntactic structure of every component -is described by a set of context free syntax rules, -with the following conventions: -.DS -.TS -l l. -x a non-terminal symbol -A a terminal symbol (in capitals) -x: a b c; a grammar rule -a | b a or b -(a)+ 1 or more occurrences of a -{a} 0 or more occurrences of a -.TE -.DE -.NH 3 -The object table -.PP -EM programs declare blocks of bytes rather than (global) variables. -A typical program may declare 'HOL 7780' -to allocate space for 8 I/O buffers, -2 large arrays and 10 scalar variables. -The optimizer wants to deal with -.UL objects -like variables, buffers and arrays -and certainly not with huge numbers of bytes. -Therefore the intermediate code contains information -about which global objects are used. -This information can be obtained from an EM program -by just looking at the operands of instruction -such as LOE, LAE, LDE, STE, SDE, INE, DEE and ZRE. -.PP -The object table consists of a list of -.UL datablock -entries. -Each such entry represents a declaration like HOL, BSS, -CON or ROM. -There are five kinds of datablock entries. -The fifth kind, -UNKNOWN, denotes a declaration in a -separately compiled file that is not made -available to the optimizer. -Each datablock entry contains the type of the block, -its size, and a description of the objects that -belong to it. -If it is a rom, -it also contains a list of values given -as arguments to the rom instruction, -provided that this list contains only integer numbers. -An object has an offset (within its datablock) -and a size. -The size need not always be determinable. -Both datablock and object contain a unique -identifying number -(see previous section for their use). -.DS -.UL syntax -.TS -lw(1i) l l. -object_table: - {datablock} ; -datablock: - D_ID -- unique identifying number - PSEUDO -- one of ROM,CON,BSS,HOL,UNKNOWN - SIZE -- # bytes declared - FLAGS - {value} -- contents of rom - {object} ; -- objects of the datablock -object: - O_ID -- unique identifying number - OFFSET -- offset within the datablock - SIZE ; -- size of the object in bytes -value: - argument ; -.TE -.DE -A data block has only one flag: "external", indicating -whether the data label is externally visible. -The syntax for "argument" will be given later on -(see em_text). -.NH 3 -The procedure table -.PP -The procedure table contains global information -about all procedures that are made available -to the optimizer -and that are needed by the EM program. -(Library units may not be needed, see section 3.5). -The table has one entry for -every procedure. -.DS -.UL syntax -.TS -lw(1i) l l. -procedure_table: - {procedure} -procedure: - P_ID -- unique identifying number - #LABELS -- number of instruction labels - #LOCALS -- number of bytes for locals - #FORMALS -- number of bytes for formals - FLAGS -- flag bits - calling -- procedures called by this one - change -- info about global variables changed - use ; -- info about global variables used -calling: - {P_ID} ; -- procedures called -change: - ext -- external variables changed - FLAGS ; -use: - FLAGS ; -ext: - {O_ID} ; -- a set of objects -.TE -.DE -.PP -The number of bytes of formal parameters accessed by -a procedure is determined by the front ends and -passed via a message (parameter message) to the optimizer. -If the front end is not able to determine this number -(e.g. the parameter may be an array of dynamic size or -the procedure may have a variable number of arguments) the attribute -contains the value 'UNKNOWN_SIZE'. -.sp 0 -A procedure has the following flags: -.IP - -external: true if the proc. is externally visible -.IP - -bodyseen: true if its code is available as EM text -.IP - -calunknown: true if it calls a procedure that has its bodyseen -flag not set -.IP - -environ: true if it uses or changes a (non-global) variable in -a lexically enclosing procedure -.IP - -lpi: true if is used as operand of an lpi instruction, so -it may be called indirect -.LP -The change and use attributes both have one flag: "indirect", -indicating whether the procedure does a 'use indirect' -or a 'store indirect' (indirect means through a pointer). -.NH 3 -The EM text -.PP -The EM text contains the EM instructions. -Every EM instruction has an operation code (opcode) -and 0 or 1 operands. -EM pseudo instructions can have more than -1 operand. -The opcode is just a small (8 bit) integer. -.sp -There are several kinds of operands, which we will -refer to as -.UL types. -Many EM instructions can have more than one type of operand. -The types and their encodings in Compact Assembly Language -are discussed extensively in. -.[~[ -keizer architecture -.], section 11.2] -Of special interest is the way numeric values -are represented. -Of prime importance is the machine independency of -the representation. -Ultimately, one could store every integer -just as a string of the characters '0' to '9'. -As doing arithmetic on strings is awkward, -Compact Assembly Language allows several alternatives. -The main idea is to look at the value of the integer. -Integers that fit in 16, 32 or 64 bits are -represented as a row of resp. 2, 4 and 8 bytes, -preceded by an indication of how many bytes are used. -Longer integers are represented as strings; -this is only allowed within pseudo instructions, however. -This concept works very well for target machines -with reasonable word sizes. -At present, most ACK software cannot be used for word sizes -higher than 32 bits, -although the handles for using larger word sizes are -present in the design of the EM code. -In the intermediate code we essentially use the -same ideas. -We allow three representations of integers. -.IP - -integers that fit in a short are represented as a short -.IP - -integers that fit in a long but not in a short are represented -as longs -.IP - -all remaining integers are represented as strings -(only allowed in pseudos). -.LP -The terms short and long are defined in -.[~[ -ritchie reference manual programming language -.], section 4] -and depend only on the source machine -(i.e. the machine on which ACK runs), -not on the target machines. -For historical reasons a long will often be called an -.UL offset. -.PP -Operands can also be instruction labels, -objects or procedures. -Instruction labels are denoted by a -.UL label -.UL identifier, -which can be distinguished from a normal identifier. -.sp -The operand of a pseudo instruction can be a list of -.UL arguments. -Arguments can have the same type as operands, except -for the type short, which is not used for arguments. -Furthermore, an argument can be a string or -a string representation of a signed integer, unsigned integer -or floating point number. -If the number of arguments is not fully determined by -the pseudo instruction (e.g. a ROM pseudo can have any number -of arguments), then the list is terminated by a special -argument of type CEND. -.DS -.UL syntax -.TS -lw(1i) l l. -em_text: - {line} ; -line: - INSTR -- opcode - OPTYPE -- operand type - operand ; -operand: - empty | -- OPTYPE = NO - SHORT | -- OPTYPE = SHORT - OFFSET | -- OPTYPE = OFFSET - LAB_ID | -- OPTYPE = INSTRLAB - O_ID | -- OPTYPE = OBJECT - P_ID | -- OPTYPE = PROCEDURE - {argument} ; -- OPTYPE = LIST -argument: - ARGTYPE - arg ; -arg: - empty | -- ARGTYPE = CEND - OFFSET | - LAB_ID | - O_ID | - P_ID | - string | -- ARGTYPE = STRING - const ; -- ARGTYPE = ICON,UCON or FCON -string: - LENGTH -- number of characters - {CHARACTER} ; -const: - SIZE -- number of bytes - string ; -- string representation of (un)signed - -- or floating point constant -.TE -.DE -.NH 3 -The control flow graphs -.PP -Each procedure can be divided -into a number of basic blocks. -A basic block is a piece of code with -no jumps in, except at the beginning, -and no jumps out, except at the end. -.PP -Every basic block has a set of -.UL successors, -which are basic blocks that can follow it immediately in -the dynamic execution sequence. -The -.UL predecessors -are the basic blocks of which this one -is a successor. -The successor and predecessor attributes -of all basic blocks of a single procedure -are said to form the -.UL control -.UL flow -.UL graph -of that procedure. -.PP -Another important attribute is the -.UL immediate -.UL dominator. -A basic block B dominates a block C if -every path in the graph from the procedure entry block -to C goes through B. -The immediate dominator of C is the closest dominator -of C on any path from the entry block. -(Note that the dominator relation is transitive, -so the immediate dominator is well defined.) -.PP -A basic block also has an attribute containing -the identifiers of every -.UL loop -that the block belongs to (see next section for loops). -.DS -.UL syntax -.TS -lw(1i) l l. -control_flow_graph: - {basic_block} ; -basic_block: - B_ID -- unique identifying number - #INSTR -- number of EM instructions - succ - pred - idom -- immediate dominator - loops -- set of loops - FLAGS ; -- flag bits -succ: - {B_ID} ; -pred: - {B_ID} ; -idom: - B_ID ; -loops: - {LP_ID} ; -.TE -.DE -The flag bits can have the values 'firm' and 'strong', -which are explained below. -.NH 3 -The loop tables -.PP -Every procedure has an associated -.UL loop -.UL table -containing information about all the loops -in the procedure. -Loops can be detected by a close inspection of -the control flow graph. -The main idea is to look for two basic blocks, -B and C, for which the following holds: -.IP - -B is a successor of C -.IP - -B is a dominator of C -.LP -B is called the loop -.UL entry -and C is called the loop -.UL end. -Intuitively, C contains a jump backwards to -the beginning of the loop (B). -.PP -A loop L1 is said to be -.UL nested -within loop L2 if all basic blocks of L1 -are also part of L2. -It is important to note that loops could -originally be written as a well structured for -or -while loop or as a messy goto loop. -Hence loops may partly overlap without one -being nested inside the other. -The -.UL nesting -.UL level -of a loop is the number of loops in -which it is nested (so it is 0 for -an outermost loop). -The details of loop detection will be discussed later. -.PP -It is often desirable to know whether a -basic block gets executed during every iteration -of a loop. -This leads to the following definitions: -.IP - -A basic block B of a loop L is said to be a \fIfirm\fR block -of L if B is executed on all successive iterations of L, -with the only possible exception of the last iteration. -.IP - -A basic block B of a loop L is said to be a \fIstrong\fR block -of L if B is executed on all successive iterations of L. -.LP -Note that a strong block is also a firm block. -If a block is part of a conditional statement, it is neither -strong nor firm, as it may be skipped during some iterations -(see Fig. 3.2). -.DS -loop - if cond1 then - ... \kx-- this code will not - \h'|\nxu'-- result in a firm or strong block - end if; - ... -- strong (always executed) - exit when cond2; - ... \kx-- firm (not executed on last iteration). -end loop; - -Fig. 3.2 Example of firm and strong block -.DE -.DS -.UL syntax -.TS -lw(1i) l l. -looptable: - {loop} ; -loop: - LP_ID -- unique identifying number - LEVEL -- loop nesting level - entry -- loop entry block - end ; -entry: - B_ID ; -end: - B_ID ; -.TE -.DE diff --git a/doc/ego/ic/ic4 b/doc/ego/ic/ic4 deleted file mode 100644 index b75f13f88..000000000 --- a/doc/ego/ic/ic4 +++ /dev/null @@ -1,83 +0,0 @@ -.NH 2 -External representation of the intermediate code -.PP -The syntax of the intermediate code was given -in the previous section. -In this section we will make some remarks about -the representation of the code in sequential files. -.sp -We use sequential files in order to avoid -the bookkeeping of complex file indices. -As a consequence of this decision -we can't store all components -of the intermediate code -in one file. -If a phase wishes to change some attribute -of a procedure, -or wants to add or delete entire procedures -(inline substitution may do the latter), -the procedure table will only be fully updated -after the entire EM text has been scanned. -Yet, the next phase undoubtedly wants -to read the procedure table before it -starts working on the EM text. -Hence there is an ordering problem, which -can be solved easily by putting the -procedure table in a separate file. -Similarly, the data block table is kept -in a file of its own. -.PP -The control flow graphs (CFGs) could be mixed -with the EM text. -Rather, we have chosen to put them -in a separate file too. -The control flow graph file should be regarded as a -file that imposes some structure on the EM-text file, -just as an overhead sheet containing a picture -of a Flow Chart may be put on an overhead sheet -containing statements. -The loop tables are also put in the CFG file. -A loop imposes an extra structure on the -CFGs and hence on the EM text. -So there are four files: -.IP - -the EM-text file -.IP - -the procedure table file -.IP - -the object table file -.IP - -the CFG and loop tables file -.LP -Every table is preceded by its length, in order to -tell where it ends. -The CFG file also contains the number of instructions of -every basic block, -indicating which part of the EM text belongs -to that block. -.DS -.UL syntax -.TS -lw(1i) l l. -intermediate_code: - object_table_file - proctable_file - em_text_file - cfg_file ; -object_table_file: - LENGTH -- number of objects - object_table ; -proctable_file: - LENGTH -- number of procedures - procedure_table ; -em_text_file: - em_text ; -cfg_file: - {per_proc} ; -- one for every procedure -per_proc: - BLENGTH -- number of basic blocks - LLENGTH -- number of loops - control_flow_graph - looptable ; -.TE -.DE diff --git a/doc/ego/ic/ic5 b/doc/ego/ic/ic5 deleted file mode 100644 index eb91bd5d9..000000000 --- a/doc/ego/ic/ic5 +++ /dev/null @@ -1,166 +0,0 @@ -.NH 2 -The Intermediate Code construction phase -.PP -The first phase of the global optimizer, -called -.UL IC, -constructs a major part of the intermediate code. -To be specific, it produces: -.IP - -the EM text -.IP - -the object table -.IP - -part of the procedure table -.LP -The calling, change and use attributes of a procedure -and all its flags except the external and bodyseen flags -are computed by the next phase (Control Flow phase). -.PP -As explained before, -the intermediate code does not contain -any names of variables or procedures. -The normal identifiers are replaced by identifying -numbers. -Yet, the output of the global optimizer must -contain normal identifiers, as this -output is in Compact Assembly Language format. -We certainly want all externally visible names -to be the same in the input as in the output, -because the optimized EM module may be a library unit, -used by other modules. -IC dumps the names of all procedures and data labels -on two files: -.IP - -the procedure dump file, containing tuples (P_ID, procedure name) -.IP - -the data dump file, containing tuples (D_ID, data label name) -.LP -The names of instruction labels are not dumped, -as they are not visible outside the procedure -in which they are defined. -.PP -The input to IC consists of one or more files. -Each file is either an EM module in Compact Assembly Language -format, or a Unix archive file (library) containing such modules. -IC only extracts those modules from a library that are -needed somehow, just as a linker does. -It is advisable to present as much code -of the EM program as possible to the optimizer, -although it is not required to present the whole program. -If a procedure is called somewhere in the EM text, -but its body (text) is not included in the input, -its bodyseen flag in the procedure table will still -be off. -Whenever such a procedure is called, -we assume the worst case for everything; -it will change and use all variables it has access to, -it will call every procedure etc. -.sp -Similarly, if a data label is used -but not defined, the PSEUDO attribute in its data block -will be set to UNKNOWN. -.NH 3 -Implementation -.PP -Part of the code for the EM Peephole Optimizer -.[ -staveren peephole toplass -.] -has been used for IC. -Especially the routines that read and unravel -Compact Assembly Language and the identifier -lookup mechanism have been used. -New code was added to recognize objects, -build the object and procedure tables and to -output the intermediate code. -.PP -IC uses singly linked linear lists for both the -procedure and object table. -Hence there are no limits on the size of such -a table (except for the trivial fact that it must fit -in main memory). -Both tables are outputted after all EM code has -been processed. -IC reads the EM text of one entire procedure -at a time, -processes it and appends the modified code to -the EM text file. -EM code is represented internally as a doubly linked linear -list of EM instructions. -.PP -Objects are recognized by looking at the operands -of instructions that reference global data. -If we come across the instructions: -.DS -.TS -l l. -LDE X+6 -- Load Double External -LAE X+20 -- Load Address External -.TE -.DE -we conclude that the data block -preceded by the data label X contains an object -at offset 6 of size twice the word size, -and an object at offset 20 of unknown size. -.sp -A data block entry of the object table is allocated -at the first reference to a data label. -If this reference is a defining occurrence -or a INA pseudo instruction, -the label is not externally visible -.[~[ -keizer architecture -.], section 11.1.4.3] -In this case, the external flag of the data block -is turned off. -If the first reference is an applied occurrence -or a EXA pseudo instruction, the flag is set. -We record this information, because the -optimizer may change the order of defining and -applied occurrences. -The INA and EXA pseudos are removed from the EM text. -They may be regenerated by the last phase -of the optimizer. -.sp -Similar rules hold for the procedure table -and the INP and EXP pseudos. -.NH 3 -Source files of IC -.PP -The source files of IC consist -of the files ic.c, ic.h and several packages. -.UL ic.h -contains type definitions, macros and -variable declarations that may be used by -ic.c and by every package. -.UL ic.c -contains the definitions of these variables, -the procedure -.UL main -and some high level I/O routines used by main. -.sp -Every package xxx consists of two files. -ic_xxx.h contains type definitions, -macros, variable declarations and -procedure declarations that may be used by -every .c file that includes this .h file. -The file ic_xxx.c provides the -definitions of these variables and -the implementation of the declared procedures. -IC uses the following packages: -.IP lookup: 18 -procedures that loop up procedure, data label -and instruction label names; procedures to dump -the procedure and data label names. -.IP lib: -one procedure that gets the next useful input module; -while scanning archives, it skips unnecessary modules. -.IP aux: -several auxiliary routines. -.IP io: -low-level I/O routines that unravel the Compact -Assembly Language. -.IP put: -routines that output the intermediate code -.LP diff --git a/doc/ego/il/.distr b/doc/ego/il/.distr deleted file mode 100644 index 2aac56865..000000000 --- a/doc/ego/il/.distr +++ /dev/null @@ -1,6 +0,0 @@ -il1 -il2 -il3 -il4 -il5 -il6 diff --git a/doc/ego/il/il1 b/doc/ego/il/il1 deleted file mode 100644 index 5bc33e6af..000000000 --- a/doc/ego/il/il1 +++ /dev/null @@ -1,112 +0,0 @@ -.bp -.NH 1 -Inline substitution -.NH 2 -Introduction -.PP -The Inline Substitution technique (IL) -tries to decrease the overhead associated -with procedure calls (invocations). -During a procedure call, several actions -must be undertaken to set up the right -environment for the called procedure. -.[ -johnson calling sequence -.] -On return from the procedure, most of these -effects must be undone. -This entire process introduces significant -costs in execution time as well as -in object code size. -.PP -The inline substitution technique replaces -some of the calls by the modified body of -the called procedure, hence eliminating -the overhead. -Furthermore, as the calling and called procedure -are now integrated, they can be optimized -together, using other techniques of the optimizer. -This often leads to extra opportunities for -optimization -.[ -ball predicting effects -.] -.[ -carter code generation cacm -.] -.[ -scheifler inline cacm -.] -.PP -An inline substitution of a call to a procedure P increases -the size of the program, unless P is very small or P is -called only once. -In the latter case, P can be eliminated. -In practice, procedures that are called only once occur -quite frequently, due to the -introduction of structured programming. -(Carter -.[ -carter umi ann arbor -.] -states that almost 50% of the Pascal procedures -he analyzed were called just once). -.PP -Scheifler -.[ -scheifler inline cacm -.] -has a more general view of inline substitution. -In his model, the program under consideration is -allowed to grow by a certain amount, -i.e. code size is sacrificed to speed up the program. -The above two cases are just special cases of -his model, obtained by setting the size-change to -(approximately) zero. -He formulates the substitution problem as follows: -.IP -"Given a program, a subset of all invocations, -a maximum program size, and a maximum procedure size, -find a sequence of substitutions that minimizes -the expected execution time." -.LP -Scheifler shows that this problem is NP-complete -.[~[ -aho hopcroft ullman analysis algorithms -.], chapter 10] -by reduction to the Knapsack Problem. -Heuristics will have to be used to find a near-optimal -solution. -.PP -In the following chapters we will extend -Scheifler's view and adapt it to the EM Global Optimizer. -We will first describe the transformations that have -to be applied to the EM text when a call is substituted -in line. -Next we will examine in which cases inline substitution -is not possible or desirable. -Heuristics will be developed for -chosing a good sequence of substitutions. -These heuristics make no demand on the user -(such as making profiles -.[ -scheifler inline cacm -.] -or giving pragmats -.[~[ -ichbiah ada military standard -.], section 6.3.2]), -although the model could easily be extended -to use such information. -Finally, we will discuss the implementation -of the IL phase of the optimizer. -.PP -We will often use the term inline expansion -as a synonym of inline substitution. -.sp 0 -The inverse technique of procedure abstraction -(automatic subroutine generation) -.[ -shaffer subroutine generation -.] -will not be discussed in this report. diff --git a/doc/ego/il/il2 b/doc/ego/il/il2 deleted file mode 100644 index ea69b35d7..000000000 --- a/doc/ego/il/il2 +++ /dev/null @@ -1,93 +0,0 @@ -.NH 2 -Parameters and local variables. -.PP -In the EM calling sequence, the calling procedure -pushes its parameters on the stack -before doing the CAL. -The called routine first saves some -status information on the stack and then -allocates space for its own locals -(also on the stack). -Usually, one special purpose register, -the Local Base (LB) register, -is used to access both the locals and the -parameters. -If memory is highly segmented, -the stack frames of the caller and the callee -may be allocated in different fragments; -an extra Argument Base (AB) register is used -in this case to access the actual parameters. -See 4.2 of -.[ -keizer architecture -.] -for further details. -.PP -If a procedure call is expanded in line, -there are two problems: -.IP 1. 3 -No stack frame will be allocated for the called procedure; -we must find another place to put its locals. -.IP 2. -The LB register cannot be used to access the actual -parameters; -as the CAL instruction is deleted, the LB will -still point to the local base of the \fIcalling\fR procedure. -.LP -The local variables of the called procedure will -be put in the stack frame of the calling procedure, -just after its own locals. -The size of the stack frame of the -calling procedure will be increased -during its entire lifetime. -Therefore our model will allow a -limit to be set on the number of bytes -for locals that the called procedure may have -(see next section). -.PP -There are several alternatives to access the parameters. -An actual parameter may be any auxiliary expression, -which we will refer to as -the \fIactual parameter expression\fR. -The value of this expression is stored -in a location on the stack (see above), -the \fIparameter location\fR. -.sp 0 -The alternatives for accessing parameters are: -.IP - -save the value of the stackpointer at the point of the CAL -in a temporary variable X; -this variable can be used to simulate the AB register, i.e. -parameter locations are accessed via an offset to -the value of X. -.IP - -create a new temporary local variable T for -the parameter (in the stack frame of the caller); -every access to the parameter location must be changed -into an access to T. -.IP - -do not evaluate the actual parameter expression before the call; -instead, substitute this expression for every use of the -parameter location. -.LP -The first method may be expensive if X is not -put in a register. -We will not use this method. -The time required to evaluate and access the -parameters when the second method is used -will not differ much from the normal -calling sequence (i.e. not in line call). -It is not expensive, but there are no -extra savings either. -The third method is essentially the 'by name' -parameter mechanism of Algol60. -If the actual parameter is just a numeric constant, -it is advantageous to use it. -Yet, there are several circumstances -under which it cannot or should not be used. -We will deal with this in the next section. -.sp 0 -In general we will use the third method, -if it is possible and desirable. -Such parameters will be called \fIin line parameters\fR. -In all other cases we will use the second method. diff --git a/doc/ego/il/il3 b/doc/ego/il/il3 deleted file mode 100644 index 398b4c8bf..000000000 --- a/doc/ego/il/il3 +++ /dev/null @@ -1,164 +0,0 @@ -.NH 2 -Feasibility and desirability analysis -.PP -Feasibility and desirability analysis -of in line substitution differ -somewhat from most other techniques. -Usually, much effort is needed to find -a feasible opportunity for optimization -(e.g. a redundant subexpression). -Desirability analysis then checks -if it is really advantageous to do -the optimization. -For IL, opportunities are easy to find. -To see if an in line expansion is -desirable will not be hard either. -Yet, the main problem is to find the most -desirable ones. -We will deal with this problem later and -we will first attend feasibility and -desirability analysis. -.PP -There are several reasons why a procedure invocation -cannot or should not be expanded in line. -.sp -A call to a procedure P cannot be expanded in line -in any of the following cases: -.IP 1. 3 -The body of P is not available as EM text. -Clearly, there is no way to do the substitution. -.IP 2. -P, or any procedure called by P (transitively), -follows the chain of statically enclosing -procedures (via a LXL or LXA instruction) -or follows the chain of dynamically enclosing -procedures (via a DCH). -If the call were expanded in line, -one level would be removed from the chains, -leading to total chaos. -This chaos could be solved by patching up -every LXL, LXA or DCH in all procedures -that could be part of the chains, -but this is hard to implement. -.IP 3. -P, or any procedure called by P (transitively), -calls a procedure whose body is not -available as EM text. -The unknown procedure may use an LXL, LXA or DCH. -However, in several languages a separately -compiled procedure has no access to the -static or dynamic chain. -In this case -this point does not apply. -.IP 4. -P, or any procedure called by P (transitively), -uses the LPB instruction, which converts a -local base to an argument base; -as the locals and parameters are stored -in a non-standard way (differing from the -normal EM calling sequence) this instruction -would yield incorrect results. -.IP 5. -The total number of bytes of the parameters -of P is not known. -P may be a procedure with a variable number -of parameters or may have an array of dynamic size -as value parameter. -.LP -It is undesirable to expand a call to a procedure P in line -in any of the following cases: -.IP 1. 3 -P is large, i.e. the number of EM instructions -of P exceeds some threshold. -The expanded code would be large too. -Furthermore, several programs in ACK, -including the global optimizer itself, -may run out of memory if they they have to run -in a small address space and are provided -very large procedures. -The threshold may be set to infinite, -in which case this point does not apply. -.IP 2. -P has many local variables. -All these variables would have to be allocated -in the stack frame of the calling procedure. -.PP -If a call may be expanded in line, we have to -decide how to access its parameters. -In the previous section we stated that we would -use in line parameters whenever possible and desirable. -There are several reasons why a parameter -cannot or should not be expanded in line. -.sp -No parameter of a procedure P can be expanded in line, -in any of the following cases: -.IP 1. 3 -P, or any procedure called by P (transitively), -does a store-indirect or a use-indirect (i.e. through -a pointer). -However, if the front-end has generated messages -telling that certain parameters can not be accessed -indirectly, those parameters may be expanded in line. -.IP 2. -P, or any procedure called by P (transitively), -calls a procedure whose body is not available as EM text. -The unknown procedure may do a store-indirect -or a use-indirect. -However, the same remark about front-end messages -as for 1. holds here. -.IP 3. -The address of a parameter location is taken (via a LAL). -In the normal calling sequence, all parameters -are stored sequentially. If the address of one -parameter location is taken, the address of any -other parameter location can be computed from it. -Hence we must put every parameter in a temporary location; -furthermore, all these locations must be in -the same order as for the normal calling sequence. -.IP 4. -P has overlapping parameters; for example, it uses -the parameter at offset 10 both as a 2 byte and as a 4 byte -parameter. -Such code may be produced by the front ends if -the formal parameter is of some record type -with variants. -.PP -Sometimes a specific parameter must not be expanded in line. -.sp 0 -An actual parameter expression cannot be expanded in line -in any of the following cases: -.IP 1. 3 -P stores into the parameter location. -Even if the actual parameter expression is a simple -variable, it is incorrect to change the 'store into -formal' into a 'store into actual', because of -the parameter mechanism used. -In Pascal, the following expansion is incorrect: -.DS -procedure p (x:integer); -begin - x := 20; -end; -\&... -a := 10; \kxa := 10; -p(a); ---> \h'|\nxu'a := 20; -write(a); \h'|\nxu'write(a); -.DE -.IP 2. -P changes any of the operands of the -actual parameter expression. -If the expression is expanded and evaluated -after the operand has been changed, -the wrong value will be used. -.IP 3. -The actual parameter expression has side effects. -It must be evaluated only once, -at the place of the call. -.LP -It is undesirable to expand an actual parameter in line -in the following case: -.IP 1. 3 -The parameter is used more than once -(dynamically) and the actual parameter expression -is not just a simple variable or constant. -.LP diff --git a/doc/ego/il/il4 b/doc/ego/il/il4 deleted file mode 100644 index 8ef685829..000000000 --- a/doc/ego/il/il4 +++ /dev/null @@ -1,135 +0,0 @@ -.NH 2 -Heuristic rules -.PP -Using the information described -in the previous section, -we can find all calls that can -be expanded in line, and for which -this expansion is desirable. -In general, we cannot expand all these calls, -so we have to choose the 'best' ones. -With every CAL instruction -that may be expanded, we associate -a \fIpay off\fR, -which expresses how desirable it is -to expand this specific CAL. -.sp -Let Tc denote the portion of EM text involved -in a specific call, i.e. the pushing of the actual -parameter expressions, the CAL itself, -the popping of the parameters and the -pushing of the result (if any, via an LFR). -Let Te denote the EM text that would be obtained -by expanding the call in line. -Let Pc be the original program and Pe the program -with Te substituted for Tc. -The pay off of the CAL depends on two factors: -.IP - -T = execution_time(Pe) - execution_time(Pc) -.IP - -S = code_size(Pe) - code_size(Pc) -.LP -The change in execution time (T) depends on: -.IP - -T1 = execution_time(Te) - execution_time(Tc) -.IP - -N = number of times Te or Tc get executed. -.LP -We assume that T1 will be the same every -time the code gets executed. -This is a reasonable assumption. -(Note that we are talking about one CAL, -not about different calls to the same procedure). -Hence -.DS -T = N * T1 -.DE -T1 can be estimated by a careful analysis -of the transformations that are performed. -Below, we list everything that will be -different when a call is expanded in line: -.IP - -The CAL instruction is not executed. -This saves a subroutine jump. -.IP - -The instructions in the procedure prolog -are not executed. -These instructions, generated from the PRO pseudo, -save some machine registers -(including the old LB), set the new LB and allocate space -for the locals of the called routine. -The savings may be less if there are no -locals to allocate. -.IP - -In line parameters are not evaluated before the call -and are not pushed on the stack. -.IP - -All remaining parameters are stored in local variables, -instead of being pushed on the stack. -.IP - -If the number of parameters is nonzero, -the ASP instruction after the CAL is not executed. -.IP - -Every reference to an in line parameter is -substituted by the parameter expression. -.IP - -RET (return) instructions are replaced by -BRA (branch) instructions. -If the called procedure 'falls through' -(i.e. it has only one RET, at the end of its code), -even the BRA is not needed. -.IP - -The LFR (fetch function result) is not executed -.PP -Besides these changes, which are caused directly by IL, -other changes may occur as IL influences other optimization -techniques, such as Register Allocation and Constant Propagation. -Our heuristic rules do not take into account the quite -inpredictable effects on Register Allocation. -It does, however, favour calls that have numeric \fIconstants\fR -as parameter; especially the constant "0" as an inline -parameter gets high scores, -as further optimizations may often be possible. -.PP -It cannot be determined statically how often a CAL instruction gets -executed. -We will use \fIloop nesting\fR information here. -The nesting level of the loop in which -the CAL appears (if any) will be used as an -indication for the number of times it gets executed. -.PP -Based on all these facts, -the pay off of a call will be computed. -The following model was developed empirically. -Assume procedure P calls procedure Q. -The call takes place in basic block B. -.DS -.TS -l l l. -ZP \&= # zero parameters -CP \&= # constant parameters - ZP -LN \&= Loop Nesting level (0 if outside any loop) -F \&= \fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0 -FT \&= \fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0 -S \&= size(Q) - 1 - # inline_parameters - F -L \&= \fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1 -A \&= CP + 2 * ZP -N \&= \fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2 -FM \&= \fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1 - -pay_off \&= (100/S + FT + F + L + A) * N * FM -.TE -.DE -S stands for the size increase of the program, -which is slightly less than the size of Q. -The size of a procedure is taken to be its number -of (non-pseudo) EM instructions. -The terms "loop nesting level" and "firm" were defined -in the chapter on the Intermediate Code (section "loop tables"). -If a call is not inside a loop and the calling procedure -is itself never called from a loop (transitively), -then the call will probably be executed at most once. -Such a call is never expanded in line (its pay off is zero). -If the calling procedure doesn't have local variables, a penalty (L) -is introduced, as it will most likely get local variables if the -call gets expanded. diff --git a/doc/ego/il/il5 b/doc/ego/il/il5 deleted file mode 100644 index 2c434de69..000000000 --- a/doc/ego/il/il5 +++ /dev/null @@ -1,446 +0,0 @@ -.NH 2 -Implementation -.PP -A major factor in the implementation -of Inline Substitution is the requirement -not to use an excessive amount of memory. -IL essentially analyzes the entire program; -it makes decisions based on which procedure calls -appear in the whole program. -Yet, because of the memory restriction, it is -not feasible to read the entire program -in main memory. -To solve this problem, the IL phase has been -split up into three subphases that are executed sequentially: -.IP 1. -analyze every procedure; see how it accesses its parameters; -simultaneously collect all calls -appearing in the whole program an put them -in a \fIcall-list\fR. -.IP 2. -use the call-list and decide which calls will be substituted -in line. -.IP 3. -take the decisions of subphase 2 and modify the -program accordingly. -.LP -Subphases 1 and 3 scan the input program; only -subphase 3 modifies it. -It is essential that the decisions can be made -in subphase 2 -without using the input program, -provided that subphase 1 puts enough information -in the call-list. -Subphase 2 keeps the entire call-list in main memory -and repeatedly scans it, to -find the next best candidate for expansion. -.PP -We will specify the -data structures used by IL before -describing the subphases. -.NH 3 -Data structures -.NH 4 -The procedure table -.PP -In subphase 1 information is gathered about every procedure -and added to the procedure table. -This information is used by the heuristic rules. -A proctable entry for procedure p has -the following extra information: -.IP - -is it allowed to substitute an invocation of p in line? -.IP - -is it allowed to put any parameter of such a call in line? -.IP - -the size of p (number of EM instructions) -.IP - -does p 'fall through'? -.IP - -a description of the formal parameters that p accesses; this information -is obtained by looking at the code of p. For every parameter f, -we record: -.RS -.IP - -the offset of f -.IP - -the type of f (word, double word, pointer) -.IP - -may the corresponding actual parameter be put in line? -.IP - -is f ever accessed indirectly? -.IP - -if f used: never, once or more than once? -.RE -.IP - -the number of times p is called (see below) -.IP - -the file address of its call-count information (see below). -.LP -.NH 4 -Call-count information -.PP -As a result of Inline Substitution, some procedures may -become useless, because all their invocations have been -substituted in line. -One of the tasks of IL is to keep track which -procedures are no longer called. -Note that IL is especially keen on procedures that are -called only once -(possibly as a result of expanding all other calls to it). -So we want to know how many times a procedure -is called \fIduring\fR Inline Substitution. -It is not good enough to compute this -information afterwards. -The task is rather complex, because -the number of times a procedure is called -varies during the entire process: -.IP 1. -If a call to p is substituted in line, -the number of calls to p gets decremented by 1. -.IP 2. -If a call to p is substituted in line, -and p contains n calls to q, then the number of calls to q -gets incremented by n. -.IP 3. -If a procedure p is removed (because it is no -longer called) and p contains n calls to q, -then the number of calls to q gets decremented by n. -.LP -(Note that p may be the same as q, if p is recursive). -.sp 0 -So we actually want to have the following information: -.DS -NRCALL(p,q) = number of call to q appearing in p, - -for all procedures p and q that may be put in line. -.DE -This information, called \fIcall-count information\fR is -computed by the first subphase. -It is stored in a file. -It is represented as a number of lists, rather than as -a (very sparse) matrix. -Every procedure has a list of (proc,count) pairs, -telling which procedures it calls, and how many times. -The file address of its call-count list is stored -in its proctable entry. -Whenever this information is needed, it is fetched from -the file, using direct access. -The proctable entry also contains the number of times -a procedure is called, at any moment. -.NH 4 -The call-list -.PP -The call-list is the major data structure use by IL. -Every item of the list describes one procedure call. -It contains the following attributes: -.IP - -the calling procedure (caller) -.IP - -the called procedure (callee) -.IP - -identification of the CAL instruction (sequence number) -.IP - -the loop nesting level; our heuristic rules appreciate -calls inside a loop (or even inside a loop nested inside -another loop, etc.) more than other calls -.IP - -the actual parameter expressions involved in the call; -for every actual, we record: -.RS -.IP - -the EM code of the expression -.IP - -the number of bytes of its result (size) -.IP - -an indication if the actual may be put in line -.RE -.LP -The structure of the call-list is rather complex. -Whenever a call is expanded in line, new calls -will suddenly appear in the program, -that were not contained in the original body -of the calling subroutine. -These calls are inherited from the called procedure. -We will refer to these invocations as \fInested calls\fR -(see Fig. 5.1). -.DS -.TS -lw(2.5i) l. -procedure p is -begin . - a(); . - b(); . -end; -.TE - -.TS -lw(2.5i) l. -procedure r is procedure r is -begin begin - x(); x(); - p(); -- in line a(); -- nested call - y(); b(); -- nested call -end; y(); - end; -.TE - -Fig. 5.1 Example of nested procedure calls -.DE -Nested calls may subsequently be put in line too -(probably resulting in a yet deeper nesting level, etc.). -So the call-list does not always reflect the source program, -but changes dynamically, as decisions are made. -If a call to p is expanded, all calls appearing in p -will be added to the call-list. -.sp 0 -A convenient and elegant way to represent -the call-list is to use a LISP-like list. -.[ -poel lisp trac -.] -Calls that appear at the same level -are linked in the CDR direction. If a call C -to a procedure p is expanded, -all calls appearing in p are put in a sub-list -of C, i.e. in its CAR. -In the example above, before the decision -to expand the call to p is made, the -call-list of procedure r looks like: -.DS -(call-to-x, call-to-p, call-to-y) -.DE -After the decision, it looks like: -.DS -(call-to-x, (call-to-p*, call-to-a, call-to-b), call-to-y) -.DE -The call to p is marked, because it has been -substituted. -Whenever IL wants to traverse the call-list of some procedure, -it uses the well-known LISP technique of -recursion in the CAR direction and -iteration in the CDR direction -(see page 1.19-2 of -.[ -poel lisp trac -.] -). -All list traversals look like: -.DS -traverse(list) -{ - for (c = first(list); c != 0; c = CDR(c)) { - if (c is marked) { - traverse(CAR(c)); - } else { - do something with c - } - } -} -.DE -The entire call-list consists of a number of LISP-like lists, -one for every procedure. -The proctable entry of a procedure contains a pointer -to the beginning of the list. -.NH 3 -The first subphase: procedure analysis -.PP -The tasks of the first subphase are to determine -several attributes of every procedure -and to construct the basic call-list, -i.e. without nested calls. -The size of a procedure is determined -by simply counting its EM instructions. -Pseudo instructions are skipped. -A procedure does not 'fall through' if its CFG -contains a basic block -that is not the last block of the CFG and -that ends on a RET instruction. -The formal parameters of a procedure are determined -by inspection of -its code. -.PP -The call-list in constructed by looking at all CAL instructions -appearing in the program. -The call-list should only contain calls to procedures -that may be put in line. -This fact is only known if the procedure was -analyzed earlier. -If a call to a procedure p appears in the program -before the body of p, -the call will always be put in the call-list. -If p is later found to be unsuitable, -the call will be removed from the list by the -second subphase. -.PP -An important issue is the recognition -of the actual parameter expressions of the call. -The front ends produces messages telling how many -bytes of formal parameters every procedure accesses. -(If there is no such message for a procedure, it -cannot be put in line). -The actual parameters together must account for -the same number of bytes.A recursive descent parser is used -to parse side-effect free EM expressions. -It uses a table and some -auxiliary routines to determine -how many bytes every EM instruction pops from the stack -and how many bytes it pushes onto the stack. -These numbers depend on the EM instruction, its argument, -and the wordsize and pointersize of the target machine. -Initially, the parser has to recognize the -number of bytes specified in the formals-message, -say N. -Assume the first instruction before the CAL pops S bytes -and pushes R bytes. -If R > N, too many bytes are recognized -and the parser fails. -Else, it calls itself recursively to recognize the -S bytes used as operand of the instruction. -If it succeeds in doing so, it continues with the next instruction, -i.e. the first instruction before the code recognized by -the recursive call, to recognize N-R more bytes. -The result is a number of EM instructions that collectively push N bytes. -If an instruction is come across that has side-effects -(e.g. a store or a procedure call) or of which R and S cannot -be computed statically (e.g. a LOS), it fails. -.sp 0 -Note that the parser traverses the code backwards. -As EM code is essentially postfix code, the parser works top down. -.PP -If the parser fails to recognize the parameters, the call will not -be substituted in line. -If the parameters can be determined, they still have to -match the formal parameters of the called procedure. -This check is performed by the second subphase; it cannot be -done here, because it is possible that the called -procedure has not been analyzed yet. -.PP -The entire call-list is written to a file, -to be processed by the second subphase. -.NH 3 -The second subphase: making decisions -.PP -The task of the second subphase is quite easy -to understand. -It reads the call-list file, -builds an incore call-list and deletes every -call that may not be expanded in line (either because the called -procedure may not be put in line, or because the actual parameters -of the call do not match the formal parameters of the called procedure). -It assigns a \fIpay-off\fR to every call, -indicating how desirable it is to expand it. -.PP -The subphase repeatedly scans the call-list and takes -the call with the highest ratio. -The chosen one gets marked, -and the call-list is extended with the nested calls, -as described above. -These nested calls are also assigned a ratio, -and will be considered too during the next scans. -.sp 0 -After every decision the number of times -every procedure is called is updated, using -the call-count information. -Meanwhile, the subphase keeps track of the amount of space left -available. -If all space is used, or if there are no more calls left to -be expanded, it exits this loop. -Finally, calls to procedures that are called only -once are also chosen. -.PP -The actual parameters of a call are only needed by -this subphase to assign a ratio to a call. -To save some space, these actuals are not kept in main memory. -They are removed after the call has been read and a ratio -has been assigned to it. -So this subphase works with \fIabstracts\fR of calls. -After all work has been done, -the actual parameters of the chosen calls are retrieved -from a file, -as they are needed by the transformation subphase. -.NH 3 -The third subphase: doing transformations -.PP -The third subphase makes the actual modifications to -the EM text. -It is directed by the decisions made in the previous subphase, -as expressed via the call-list. -The call-list read by this subphase contains -only calls that were selected for expansion. -The list is ordered in the same way as the EM text, -i.e. if a call C1 appears before a call C2 in the call-list, -C1 also appears before C2 in the EM text. -So the EM text is traversed linearly, -the calls that have to be substituted are determined -and the modifications are made. -If a procedure is come across that is no longer needed, -it is simply not written to the output EM file. -The substitution of a call takes place in distinct steps: -.IP "change the calling sequence" 7 -.sp 0 -The actual parameter expressions are changed. -Parameters that are put in line are removed. -All remaining ones must store their result in a -temporary local variable, rather than -push it on the stack. -The CAL instruction and any ASP (to pop actual parameters) -or LFR (to fetch the result of a function) -are deleted. -.IP "fetch the text of the called procedure" -.sp 0 -Direct disk access is used to to read the text of the -called procedure. -The file offset is obtained from the proctable entry. -.IP "allocate bytes for locals and temporaries" -.sp 0 -The local variables of the called procedure will be put in the -stack frame of the calling procedure. -The same applies to any temporary variables -that hold the result of parameters -that were not put in line. -The proctable entry of the caller is updated. -.IP "put a label after the CAL" -.sp 0 -If the called procedure contains a RET (return) instruction -somewhere in the middle of its text (i.e. it does -not fall through), the RET must be changed into -a BRA (branch), to jump over the -remainder of the text. -This label is not needed if the called -procedure falls through. -.IP "copy the text of the called procedure and modify it" -.sp 0 -References to local variables of the called routine -and to parameters that are not put in line -are changed to refer to the -new local of the caller. -References to in line parameters are replaced -by the actual parameter expression. -Returns (RETs) are either deleted or -replaced by a BRA. -Messages containing information about local -variables or parameters are changed. -Global data declarations and the PRO and END pseudos -are removed. -Instruction labels and references to them are -changed to make sure they do not have the -same identifying number as -labels in the calling procedure. -.IP "insert the modified text" -.sp 0 -The pseudos of the called procedure are put after the pseudos -of the calling procedure. -The real text of the callee is put at -the place where the CAL was. -.IP "take care of nested substitutions" -.sp 0 -The expanded procedure may contain calls that -have to be expanded too (nested calls). -If the descriptor of this call contains actual -parameter expressions, -the code of the expressions has to be changed -the same way as the code of the callee was changed. -Next, the entire process of finding CALs and doing -the substitutions is repeated recursively. -.LP diff --git a/doc/ego/il/il6 b/doc/ego/il/il6 deleted file mode 100644 index a7e37a4a9..000000000 --- a/doc/ego/il/il6 +++ /dev/null @@ -1,27 +0,0 @@ -.NH 2 -Source files of IL -.PP -The sources of IL are in the following files -and packages (the prefixes 1_, 2_ and 3_ refer to the three subphases): -.IP il.h: 14 -declarations of global variables and -data structures -.IP il.c: -the routine main; the driving routines of the three subphases -.IP 1_anal: -contains a subroutine that analyzes a procedure -.IP 1_cal: -contains a subroutine that analyzes a call -.IP 1_aux: -implements auxiliary procedures used by subphase 1 -.IP 2_aux: -implements auxiliary procedures used by subphase 2 -.IP 3_subst: -the driving routine for doing the substitution -.IP 3_change: -lower level routines that do certain modifications -.IP 3_aux: -implements auxiliary procedures used by subphase 3 -.IP aux: -implements auxiliary procedures used by several subphases. -.LP diff --git a/doc/ego/intro/.distr b/doc/ego/intro/.distr deleted file mode 100644 index 45de40776..000000000 --- a/doc/ego/intro/.distr +++ /dev/null @@ -1,3 +0,0 @@ -head -intro1 -tail diff --git a/doc/ego/intro/head b/doc/ego/intro/head deleted file mode 100644 index ccc710bd3..000000000 --- a/doc/ego/intro/head +++ /dev/null @@ -1,10 +0,0 @@ -.ND -.\".ll 80m -.\".nr LL 80m -.\".nr tl 78m -.tr ~ -.ds >. . -.ds >, , -.ds [. " [ -.ds .] ] -.cs 5 22 diff --git a/doc/ego/intro/intro1 b/doc/ego/intro/intro1 deleted file mode 100644 index de7a5ae89..000000000 --- a/doc/ego/intro/intro1 +++ /dev/null @@ -1,79 +0,0 @@ -.TL -The design and implementation of -the EM Global Optimizer -.AU -H.E. Bal -.AI -Vrije Universiteit -Wiskundig Seminarium, Amsterdam -.AB -The EM Global Optimizer is part of the Amsterdam Compiler Kit, -a toolkit for making retargetable compilers. -It optimizes the intermediate code common to all compilers of -the toolkit (EM), -so it can be used for all programming languages and -all processors supported by the kit. -.PP -The optimizer is based on well-understood concepts like -control flow analysis and data flow analysis. -It performs the following optimizations: -Inline Substitution, Strength Reduction, Common Subexpression Elimination, -Stack Pollution, Cross Jumping, Branch Optimization, Copy Propagation, -Constant Propagation, Dead Code Elimination and Register Allocation. -.PP -This report describes the design of the optimizer and several -of its implementation issues. -.AE -.bp -.NH 1 -Introduction -.PP -.FS -This work was supported by the -Stichting Technische Wetenschappen (STW) -under grant VWI00.0001. -.FE -The EM Global Optimizer is part of a software toolkit -for making production-quality retargetable compilers. -This toolkit, -called the Amsterdam Compiler Kit -.[ -tanenbaum toolkit rapport -.] -.[ -tanenbaum toolkit cacm -.] -runs under the Unix* -.FS -*Unix is a Trademark of Bell Laboratories -.FE -operating system. -.sp 0 -The main design philosophy of the toolkit is to use -a language- and machine-independent -intermediate code, called EM. -.[ -keizer architecture -.] -The basic compilation process can be split up into -two parts. -A language-specific front end translates the source program into EM. -A machine-specific back end transforms EM to assembly code -of the target machine. -.PP -The global optimizer is an optional phase of the -compilation process, and can be used to obtain -machine code of a higher quality. -The optimizer transforms EM-code to better EM-code, -so it comes between the front end and the back end. -It can be used with any combination of languages -and machines, as far as they are supported by -the compiler kit. -.PP -This report describes the design of the -global optimizer and several of its -implementation issues. -Measurements can be found in. -.[ -bal tanenbaum global -.] diff --git a/doc/ego/intro/tail b/doc/ego/intro/tail deleted file mode 100644 index 46cced8ac..000000000 --- a/doc/ego/intro/tail +++ /dev/null @@ -1,17 +0,0 @@ -.SH -Acknowledgements -.PP -The author would like to thank Andy Tanenbaum for his guidance, -Duk Bekema for implementing the Common Subexpression Elimination phase -and writing the initial documentation of that phase, -Dick Grune for reading the manuscript of this report -and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren -and the members of the S.T.W. user's group for their -interest and assistance. -.bp -.SH -References -.LP -.[ -$LIST$ -.] diff --git a/doc/ego/lv/.distr b/doc/ego/lv/.distr deleted file mode 100644 index b82f3da53..000000000 --- a/doc/ego/lv/.distr +++ /dev/null @@ -1 +0,0 @@ -lv1 diff --git a/doc/ego/lv/lv1 b/doc/ego/lv/lv1 deleted file mode 100644 index 7574ca6f8..000000000 --- a/doc/ego/lv/lv1 +++ /dev/null @@ -1,95 +0,0 @@ -.bp -.NH 1 -Live-Variable analysis -.NH 2 -Introduction -.PP -The "Live-Variable analysis" optimization technique (LV) -performs some code improvements and computes information that may be -used by subsequent optimizations. -The main task of this phase is the -computation of \fIlive-variable information\fR. -.[~[ -aho compiler design -.] section 14.4] -A variable A is said to be \fIdead\fR at some point p of the -program text, if on no path in the control flow graph -from p to a RET (return), A can be used before being changed; -else A is said to be \fIlive\fR. -.PP -A statement of the form -.DS -VARIABLE := EXPRESSION -.DE -is said to be dead if the left hand side variable is dead just after -the statement and the right hand side expression has no -side effects (i.e. it doesn't change any variable). -Such a statement can be eliminated entirely. -Dead code will seldom be present in the original program, -but it may be the result of earlier optimizations, -such as copy propagation. -.PP -Live-variable information is passed to other phases via -messages in the EM code. -Live/dead messages are generated at points in the EM text where -variables become dead or live. -This information is especially useful for the Register -Allocation phase. -.NH 2 -Implementation -.PP -The implementation uses algorithm 14.6 of. -.[ -aho compiler design -.] -First two sets DEF and USE are computed for every basic block b: -.IP DEF(b) 9 -the set of all variables that are assigned a value in b before -being used -.IP USE(b) 9 -the set of all variables that may be used in b before being changed. -.LP -(So variables that may, but need not, be used resp. changed via a procedure -call or through a pointer are included in USE but not in DEF). -The next step is to compute the sets IN and OUT : -.IP IN[b] 9 -the set of all variables that are live at the beginning of b -.IP OUT[b] 9 -the set of all variables that are live at the end of b -.LP -IN and OUT can be computed for all blocks simultaneously by solving the -data flow equations: -.DS -(1) IN[b] = OUT[b] - DEF[b] + USE[b] -[2] OUT[b] = IN[s1] + ... + IN[sn] ; - where SUCC[b] = {s1, ... , sn} -.DE -The equations are solved by a similar algorithm as for -the Use Definition equations (see previous chapter). -.PP -Finally, each basic block is visited in turn to remove its dead code -and to emit the live/dead messages. -Every basic block b is traversed from its last -instruction backwards to the beginning of b. -Initially, all variables that are dead at the end -of b are marked dead. All others are marked live. -If we come across an assignment to a variable X that -was marked live, a live-message is put after the -assignment and X is marked dead; -if X was marked dead, the assignment may be removed, provided that -the right hand side expression contains no side effects. -If we come across a use of a variable X that -was marked dead, a dead-message is put after the -use and X is marked live. -So at any point, the mark of X tells whether X is -live or dead immediately before that point. -A message is also generated at the start of a basic block -for every variable that was live at the end of the (textually) -previous block, but dead at the entry of this block, or v.v. -.PP -Only local variables are considered. -This significantly reduces the memory needed by this phase, -eases the implementation and is hardly less efficient than -considering all variables. -(Note that it is very hard to prove that an assignment to -a global variable is dead). diff --git a/doc/ego/ov/.distr b/doc/ego/ov/.distr deleted file mode 100644 index 9170d50dc..000000000 --- a/doc/ego/ov/.distr +++ /dev/null @@ -1 +0,0 @@ -ov1 diff --git a/doc/ego/ov/ov1 b/doc/ego/ov/ov1 deleted file mode 100644 index 78d4326e6..000000000 --- a/doc/ego/ov/ov1 +++ /dev/null @@ -1,374 +0,0 @@ -.bp -.NH 1 -Overview of the global optimizer -.NH 2 -The ACK compilation process -.PP -The EM Global Optimizer is one of three optimizers that are -part of the Amsterdam Compiler Kit (ACK). -The phases of ACK are: -.IP 1. -A Front End translates a source program to EM -.IP 2. -The Peephole Optimizer -.[ -tanenbaum staveren peephole toplass -.] -reads EM code and produces 'better' EM code. -It performs a number of optimizations (mostly peephole -optimizations) -such as constant folding, strength reduction and unreachable code -elimination. -.IP 3. -The Global Optimizer further improves the EM code. -.IP 4. -The Code Generator transforms EM to assembly code -of the target computer. -.IP 5. -The Target Optimizer improves the assembly code. -.IP 6. -An Assembler/Loader generates an executable file. -.LP -For a more extensive overview of the ACK compilation process, -we refer to. -.[ -tanenbaum toolkit rapport -.] -.[ -tanenbaum toolkit cacm -.] -.PP -The input of the Global Optimizer may consist of files and -libraries. -Every file or module in the library must contain EM code in -Compact Assembly Language format. -.[~[ -tanenbaum machine architecture -.], section 11.2] -The output consists of one such EM file. -The input files and libraries together need not -constitute an entire program, -although as much of the program as possible should be supplied. -The more information about the program the optimizer -gets, the better its output code will be. -.PP -The Global Optimizer is language- and machine-independent, -i.e. it can be used for all languages and machines supported by ACK. -Yet, it puts some unavoidable restrictions on the EM code -produced by the Front End (see below). -It must have some knowledge of the target machine. -This knowledge is expressed in a machine description table -which is passed as argument to the optimizer. -This table does not contain very detailed information about the -target (such as its instruction set and addressing modes). -.NH 2 -The EM code -.PP -The definition of EM, the intermediate code of all ACK compilers, -is given in a separate document. -.[ -tanenbaum machine architecture -.] -We will only discuss some features of EM that are most relevant -to the Global Optimizer. -.PP -EM is the assembly code of a virtual \fIstack machine\fR. -All operations are performed on the top of the stack. -For example, the statement "A := B + 3" may be expressed in EM as: -.DS -.TS -l l. -LOL -4 -- push local variable B -LOC 3 -- push constant 3 -ADI 2 -- add two 2-byte items on top of - -- the stack and push the result -STL -2 -- pop A -.TE -.DE -So EM is essentially a \fIpostfix\fR code. -.PP -EM has a rich instruction set, containing several arithmetic -and logical operators. -It also contains special-case instructions (such as INCrement). -.PP -EM has \fIglobal\fR (\fIexternal\fR) variables, accessible -by all procedures and \fIlocal\fR variables, accessible by a few -(nested) procedures. -The local variables of a lexically enclosing procedure may -be accessed via a \fIstatic link\fR. -EM has instructions to follow the static chain. -There are EM instruction to allow a procedure -to access its local variables directly (such as LOL and STL above). -Local variables are referenced via an offset in the stack frame -of the procedure, rather than by their names (e.g. -2 and -4 above). -The EM code does not contain the (source language) type -of the variables. -.PP -All structured statements in the source program are expressed in -low level jump instructions. -Besides conditional and unconditional branch instructions, there are -two case instructions (CSA and CSB), -to allow efficient translation of case statements. -.NH 2 -Requirements on the EM input -.PP -As the optimizer should be useful for all languages, -it clearly should not put severe restrictions on the EM code -of the input. -There is, however, one immovable requirement: -it must be possible to determine the \fIflow of control\fR of the -input program. -As virtually all global optimizations are based on control flow information, -the optimizer would be totally powerless without it. -For this reason we restrict the usage of the case jump instructions (CSA/CSB) -of EM. -Such an instruction is always called with the address of a case descriptor -on top the the stack. -.[~[ -tanenbaum machine architecture -.] section 7.4] -This descriptor contains the labels of all possible -destinations of the jump. -We demand that all case descriptors are allocated in a global -data fragment of type ROM, i.e. the case descriptors -may not be modifyable. -Furthermore, any case instruction should be immediately preceded by -a LAE (Load Address External) instruction, that loads the -address of the descriptor, -so the descriptor can be uniquely identified. -.PP -The optimizer will work improperly if the user deceives the control flow. -We will give two methods to do this. -.PP -In "C" the notorious library routines "setjmp" and "longjmp" -.[ -unix programmer's manual McIlroy -.] -may be used to jump out of a procedure, -but can also be used for a number of other stuffy purposes, -for example, to create an extra entry point in a loop. -.DS - while (condition) { - .... - setjmp(buf); - ... - } - ... - longjmp(buf); -.DE -The invocation to longjmp actually is a jump to the place of -the last call to setjmp with the same argument (buf). -As the calls to setjmp and longjmp are indistinguishable from -normal procedure calls, the optimizer will not see the danger. -No need to say that several loop optimizations will behave -unexpectedly when presented with such pathological input. -.PP -Another way to deceive the flow of control is -by using exception handling routines. -Ada* -.FS -* Ada is a registered trademark of the U.S. Government -(Ada Joint Program Office). -.FE -has clearly recognized the dangers of exception handling, -but other languages (such as PL/I) have not. -.[ -ada rationale -.] -.PP -The optimizer will be more effective if the EM input contains -some extra information about the source program. -Especially the \fIregister message\fR is very important. -These messages indicate which local variables may never be -accessed indirectly. -Most optimizations benefit significantly by this information. -.PP -The Inline Substitution technique needs to know how many bytes -of formal parameters every procedure accesses. -Only calls to procedures for which the EM code contains this information -will be substituted in line. -.NH 2 -Structure of the optimizer -.PP -The Global Optimizer is organized as a number of \fIphases\fR, -each one performing some task. -The main structure is as follows: -.IP IC 6 -the Intermediate Code construction phase transforms EM into the -intermediate code (ic) of the optimizer -.IP CF -the Control Flow phase extends the ic with control flow -information and interprocedural information -.IP OPTs -zero or more optimization phases, each one performing one or -more related optimizations -.IP CA -the Compact Assembly phase generates Compact Assembly Language EM code -out of ic. -.LP -.PP -An important issue in the design of a global optimizer is the -interaction between optimization techniques. -It is often advantageous to combine several techniques in -one algorithm that takes into account all interactions between them. -Ideally, one single algorithm should be developed that does -all optimizations simultaneously and deals with all possible interactions. -In practice, such an algorithm is still far out of reach. -Instead some rather ad hoc (albeit important) combinations are chosen, -such as Common Subexpression Elimination and Register Allocation. -.[ -prabhala sethi common subexpressions -.] -.[ -sethi ullman optimal code -.] -.PP -In the Em Global Optimizer there is one separate algorithm for -every technique. -Note that this does not mean that all techniques are independent -of each other. -.PP -In principle, the optimization phases can be run in any order; -a phase may even be run more than once. -However, the following rules should be obeyed: -.IP - -the Live Variable analysis phase (LV) must be run prior to -Register Allocation (RA), as RA uses information outputted by LV. -.IP - -RA should be the last phase; this is a consequence of the way -the interface between RA and the Code Generator is defined. -.LP -The ordering of the phases has significant impact on -the quality of the produced code. -In -.[ -wulf overview production quality carnegie-mellon -.] -two kinds of phase ordering problems are distinguished. -If two techniques A and B both take away opportunities of each other, -there is a "negative" ordering problem. -If, on the other hand, both A and B introduce new optimization -opportunities for each other, the problem is called "positive". -In the Global Optimizer the following interactions must be -taken into account: -.IP - -Inline Substitution (IL) may create new opportunities for most -other techniques, so it should be run as early as possible -.IP - -Use Definition analysis (UD) may introduce opportunities for LV. -.IP - -Strength Reduction may create opportunities for UD -.LP -The optimizer has a default phase ordering, which can -be changed by the user. -.NH 2 -Structure of this document -.PP -The remaining chapters of this document each describe one -phase of the optimizer. -For every phase, we describe its task, its design, -its implementation, and its source files. -The latter two sections are intended to aid the -maintenance of the optimizer and -can be skipped by the initial reader. -.NH 2 -References -.PP -There are very -few modern textbooks on optimization. -Chapters 12, 13, and 14 of -.[ -aho compiler design -.] -are a good introduction to the subject. -Wulf et. al. -.[ -wulf optimizing compiler -.] -describe one specific optimizing (Bliss) compiler. -Anklam et. al. -.[ -anklam vax-11 -.] -discuss code generation and optimization in -compilers for one specific machine (a Vax-11). -Kirchgaesner et. al. -.[ -optimizing ada compiler -.] -present a brief description of many -optimizations; the report also contains a lengthy (over 60 pages) -bibliography. -.PP -The number of articles on optimization is quite impressive. -The Lowry and Medlock paper on the Fortran H compiler -.[ -object code optimization Lowry Medlock -.] -is a classical one. -Other papers on global optimization are. -.[ -faiman optimizing pascal -.] -.[ -perkins sites -.] -.[ -harrison general purpose optimizing -.] -.[ -morel partial redundancies -.] -.[ -Mintz global optimizer -.] -Freudenberger -.[ -freudenberger setl optimizer -.] -describes an optimizer for a Very High Level Language (SETL). -The Production-Quality Compiler-Compiler (PQCC) project uses -very sophisticated compiler techniques, as described in. -.[ -wulf overview ieee -.] -.[ -wulf overview carnegie-mellon -.] -.[ -wulf machine-relative -.] -.PP -Several Ph.D. theses are dedicated to optimization. -Davidson -.[ -davidson simplifying -.] -outlines a machine-independent peephole optimizer that -improves assembly code. -Katkus -.[ -katkus -.] -describes how efficient programs can be obtained at little cost by -optimizing only a small part of a program. -Photopoulos -.[ -photopoulos mixed code -.] -discusses the idea of generating interpreted intermediate code as well -as assembly code, to obtain programs that are both small and fast. -Shaffer -.[ -shaffer automatic -.] -describes the theory of automatic subroutine generation. -.] -Leverett -.[ -leverett register allocation compilers -.] -deals with register allocation in the PQCC compilers. -.PP -References to articles about specific optimization techniques -will be given in later chapters. diff --git a/doc/ego/proto.make b/doc/ego/proto.make deleted file mode 100644 index 2d4afd782..000000000 --- a/doc/ego/proto.make +++ /dev/null @@ -1,64 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/ego - -REFS=-p $(SRC_DIR)/refs.opt -p $(SRC_DIR)/refs.stat -p $(SRC_DIR)/refs.gen -REFFILES = $(SRC_DIR)/refs.opt $(SRC_DIR)/refs.stat $(SRC_DIR)/refs.gen -INTRO=$(SRC_DIR)/intro/intro? -OV=$(SRC_DIR)/ov/ov? -IC=$(SRC_DIR)/ic/ic? -CF=$(SRC_DIR)/cf/cf? -IL=$(SRC_DIR)/il/il? -SR=$(SRC_DIR)/sr/sr? -CS=$(SRC_DIR)/cs/cs? -SP=$(SRC_DIR)/sp/sp? -UD=$(SRC_DIR)/ud/ud? -LV=$(SRC_DIR)/lv/lv? -CJ=$(SRC_DIR)/cj/cj? -BO=$(SRC_DIR)/bo/bo? -RA=$(SRC_DIR)/ra/ra? -CA=$(SRC_DIR)/ca/ca? -EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \ - $(UD) $(LV) $(RA) $(CA) -REFER=refer -TROFF=troff -TBL=tbl -TARGET=-Tlp -HEAD = $(SRC_DIR)/intro/head -TAIL = $(SRC_DIR)/intro/tail - -$(TARGET_HOME)/doc/ego.doc: $(REFFILES) $(HEAD) $(TAIL) $(EGO) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) > $(TARGET_HOME)/doc/ego.doc - -ego.f: $(REFFILES) $(HEAD) $(TAIL) $(EGO) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ego.f -intro.f: $(REFFILES) $(HEAD) $(TAIL) $(INTRO) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(INTRO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > intro.f -ov.f: $(REFFILES) $(HEAD) $(TAIL) $(OV) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(OV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ov.f -ic.f: $(REFFILES) $(HEAD) $(TAIL) $(IC) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(IC) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ic.f -cf.f: $(REFFILES) $(HEAD) $(TAIL) $(CF) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CF) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cf.f -il.f: $(REFFILES) $(HEAD) $(TAIL) $(IL) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(IL) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > il.f -sr.f: $(REFFILES) $(HEAD) $(TAIL) $(SR) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(SR) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sr.f -cs.f: $(REFFILES) $(HEAD) $(TAIL) $(CS) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CS) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cs.f -sp.f: $(REFFILES) $(HEAD) $(TAIL) $(SP) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(SP) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sp.f -cj.f: $(REFFILES) $(HEAD) $(TAIL) $(CJ) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CJ) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cj.f -bo.f: $(REFFILES) $(HEAD) $(TAIL) $(BO) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(BO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > bo.f -ud.f: $(REFFILES) $(HEAD) $(TAIL) $(UD) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(UD) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ud.f -lv.f: $(REFFILES) $(HEAD) $(TAIL) $(LV) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(LV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > lv.f -ra.f: $(REFFILES) $(HEAD) $(TAIL) $(RA) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(RA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ra.f -ca.f: $(REFFILES) $(HEAD) $(TAIL) $(CA) - $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ca.f diff --git a/doc/ego/ra/.distr b/doc/ego/ra/.distr deleted file mode 100644 index d9cbc6df4..000000000 --- a/doc/ego/ra/.distr +++ /dev/null @@ -1,4 +0,0 @@ -ra1 -ra2 -ra3 -ra4 diff --git a/doc/ego/ra/ra1 b/doc/ego/ra/ra1 deleted file mode 100644 index fb5343f93..000000000 --- a/doc/ego/ra/ra1 +++ /dev/null @@ -1,33 +0,0 @@ -.bp -.NH 1 -Register Allocation -.NH 2 -Introduction -.PP -The efficient usage of the general purpose registers -of the target machine plays a key role in any optimizing compiler. -This subject, often referred to as \fIRegister Allocation\fR, -has great impact on both the code generator and the -optimizing part of such a compiler. -The code generator needs registers for at least the evaluation of -arithmetic expressions; -the optimizer uses the registers to decrease the access costs -of frequently used entities (such as variables). -The design of an optimizing compiler must pay great -attention to the cooperation of optimization, register allocation -and code generation. -.PP -Register allocation has received much attention in literature (see -.[ -leverett register allocation compilers -.] -.[ -chaitin register coloring -.] -.[ -freiburghouse usage counts -.] -and -.[~[ -sites register -.]]). diff --git a/doc/ego/ra/ra2 b/doc/ego/ra/ra2 deleted file mode 100644 index e6dfc138f..000000000 --- a/doc/ego/ra/ra2 +++ /dev/null @@ -1,139 +0,0 @@ -.NH 2 -Usage of registers in ACK compilers -.PP -We will first describe the major design decisions -of the Amsterdam Compiler Kit, -as far as they concern register allocation. -Subsequently we will outline -the role of the Global Optimizer in the register -allocation process and the interface -between the code generator and the optimizer. -.NH 3 -Usage of registers without the intervention of the Global Optimizer -.PP -Registers are used for two purposes: -.IP 1. -for the evaluation of arithmetic expressions -.IP 2. -to hold local variables, for the duration of the procedure they -are local to. -.LP -It is essential to note that no translation part of the compilers, -except for the code generator, knows anything at all -about the register set of the target computer. -Hence all decisions about registers are ultimately made by -the code generator. -Earlier phases of a compiler can only \fIadvise\fR the code generator. -.PP -The code generator splits the register set into two: -a fixed part for the evaluation of expressions (called \fIscratch\fR -registers) and a fixed part to store local variables. -This partitioning, which depends only on the target computer, significantly -reduces the complexity of register allocation, at the penalty -of some loss of code quality. -.PP -The code generator has some (machine-dependent) knowledge of the access costs -of memory locations and registers and of the costs of saving and -restoring registers. (Registers are always saved by the \fIcalled\fR -procedure). -This knowledge is expressed in a set of procedures for each target machine. -The code generator also knows how many registers there are and of -which type they are. -A register can be of type \fIpointer\fR, \fIfloating point\fR -or \fIgeneral\fR. -.PP -The front ends of the compilers determine which local variables may -be put in a register; -such a variable may never be accessed indirectly (i.e. through a pointer). -The front end also determines the types and sizes of these variables. -The type can be any of the register types or the type \fIloop variable\fR, -which denotes a general-typed variable that is used as loop variable -in a for-statement. -All this information is collected in a \fIregister message\fR in -the EM code. -Such a message is a pseudo EM instruction. -This message also contains a \fIscore\fR field, -indicating how desirable it is to put this variable in a register. -A front end may assign a high score to a variable if it -was declared as a register variable (which is only possible in -some languages, such as "C"). -Any compiler phase before the code generator may change this score field, -if it has reason to do so. -The code generator bases its decisions on the information contained -in the register message, most notably on the score. -.PP -If the global optimizer is not used, -the score fields are set by the Peephole Optimizer. -This optimizer simply counts the number of occurrences -of every local (register) variable and adds this count -to the score provided by the front end. -In this way a simple, yet quite effective -register allocation scheme is achieved. -.NH 3 -The role of the Global Optimizer -.PP -The Global Optimizer essentially tries to improve the scheme -outlined above. -It uses the following principles for this purpose: -.IP - -Entities are not always assigned a register for the duration -of an entire procedure; smaller regions of the program text -may be considered too. -.IP - -several variables may be put in the same register simultaneously, -provided at most one of them is live at any point. -.IP - -besides local variables, other entities (such as constants and addresses of -variables and procedures) may be put in a register. -.IP - -more accurate cost estimates are used. -.LP -To perform its task, the optimizer must have some -knowledge of the target machine. -.NH 3 -The interface between the register allocator and the code generator -.PP -The RA phase of the optimizer must somehow be able to express its -decisions. -Such decisions may look like: 'put constant 1283 in a register from -line 12 to line 40'. -To be precise, RA must be able to tell the code generator to: -.IP - -initialize a register with some value -.IP - -update an entity from a register -.IP - -replace all occurrences of an entity in a certain region -of text by a reference to the register. -.LP -At least three problems occur here: the code generator is only used to -put local variables in registers, -it only assigns a register to a variable for the duration of an entire -procedure and it is not used to have some earlier compiler phase -make all the decisions. -.PP -All problems are solved by one mechanism, that involves no changes -to the code generator. -With every (non-scratch) register R that will be used in -a procedure P, we associate a new variable T, local to P. -The size of T is the same as the size of R. -A register message is generated for T with an exceptionally high score. -The scores of all original register messages are set to zero. -Consequently, the code generator will always assign precisely those new -variables to a register. -If the optimizer wants to put some entity, say the constant 1283, in -a register, it emits the code "T := 1283" and replaces all occurrences -of '1283' by T. -Similarly, it can put the address of a procedure in T and replace all -calls to that procedure by indirect calls. -Furthermore, it can put several different entities in T (and thus in R) -during the lifetime of P. -.PP -In principle, the code generated by the optimizer in this way would -always be valid EM code, even if the optimizer would be presented -a totally wrong description of the target computer register set. -In practice, it would be a waste of data as well as text space to -allocate memory for these new variables, as they will always be assigned -a register (in the correct order of events). -Hence, no memory locations are allocated for them. -For this reason they are called pseudo local variables. diff --git a/doc/ego/ra/ra3 b/doc/ego/ra/ra3 deleted file mode 100644 index d02ed3bc9..000000000 --- a/doc/ego/ra/ra3 +++ /dev/null @@ -1,386 +0,0 @@ -.NH 2 -The register allocation phase -.PP -.NH 3 -Overview -.PP -The RA phase deals with one procedure at a time. -For every procedure, it first determines which entities -may be put in a register. Such an entity -is called an \fIitem\fR. -For every item it decides during which parts of the procedure it -might be assigned a register. -Such a region is called a \fItimespan\fR. -For any item, several (possibly overlapping) timespans may -be considered. -A pair (item,timespan) is called an \fIallocation\fR. -If the items of two allocations are both live at some -point of time in the intersections of their timespans, -these allocations are said to be \fIrivals\fR of each other, -as they cannot be assigned the same register. -The rivals-set of every allocation is computed. -Next, the gains of assigning a register to an allocation are estimated, -for every allocation. -With all this information, decisions are made which allocations -to store in which registers (\fIpacking\fR). -Finally, the EM text is transformed to reflect these decisions. -.NH 3 -The item recognition subphase -.PP -RA tries to put the following entities in a register: -.IP - -a local variable for which a register message was found -.IP - -the address of a local variable for which no -register message was found -.IP - -the address of a global variable -.IP - -the address of a procedure -.IP - -a numeric constant. -.LP -Only the \fIaddress\fR of a global variable -may be put in a register, not the variable itself. -This approach avoids the very complex problems that would be -caused by procedure calls and indirect pointer references (see -.[~[ -aho design compiler -.] sections 14.7 and 14.8] -and -.[~[ -spillman side-effects -.]]). -Still, on most machines accessing a global variable using indirect -addressing through a register is much cheaper than -accessing it via its address. -Similarly, if the address of a procedure is put in a register, the -procedure can be called via an indirect call. -.PP -With every item we associate a register type. -This type is -.DS -for local variables: the type contained in the register message -for addresses of variables and procedures: the pointer type -for constants: the general type -.DE -An entity other than a local variable is not taken to be an item -if it is used only once within the current procedure. -.PP -An item is said to be \fIlive\fR at some point of the program text -if its value may be used before it is changed. -As addresses and constants are never changed, all items but local -variables are always live. -The region of text during which a local variable is live is -determined via the live/dead messages generated by the -Live Variable analysis phase of the Global Optimizer. -.NH 3 -The allocation determination subphase -.PP -If a procedure has more items than registers, -it may be advantageous to put an item in a register -only during those parts of the procedure where it is most -heavily used. -Such a part will be called a timespan. -With every item we may associate a set of timespans. -If two timespans of an item overlap, -at most one of them may be granted a register, -as there is no use in putting the same item in two -registers simultaneously. -If two timespans of an item are distinct, -both may be chosen; -the item will possibly be put in two -different registers during different parts of the procedure. -The timespan may also consist -of the whole procedure. -.PP -A list of (item,timespan) pairs (allocations) -is build, which will be the input to the decision making -subphase of RA (packing subphase). -This allocation list is the main data structure of RA. -The description of the remainder of RA will be in terms -of allocations rather than items. -The phrase "to assign a register to an allocation" means "to assign -a register to the item of the allocation for the duration of -the timespan of the allocation". -Subsequent subphases will add more information -to this list. -.PP -Several factors must be taken into account when a -timespan for an item is constructed: -.IP 1. -At any \fIentry point\fR of the timespan where the -item is live, -the register must be initialized with the item -.IP 2. -At any exit point of the timespan where the item is live, -the item must be updated. -.LP -In order to decrease these costs, we will only consider timespans with -one entry point -and no live exit points. -.NH 3 -The rivals computation subphase -.PP -As stated before, several different items may be put in the -same register, provided they are not live simultaneously. -For every allocation we determine the intersection -of its timespan and the lifetime of its item (i.e. the part of the -procedure during which the item is live). -The allocation is said to be busy during this intersection. -If two allocations are ever busy simultaneously they are -said to be rivals of each other. -The rivals information is added to the allocation list. -.NH 3 -The profits computation subphase -.PP -To make good decisions, the packing subphase needs to -know which allocations can be assigned the same register -(rivals information) and how much is gained by -granting an allocation a register. -.PP -Besides the gains of using a register instead of an -item, -two kinds of overhead costs must be -taken into account: -.IP - -the register must be initialized with the item -.IP - -the register must be saved at procedure entry -and restored at procedure exit. -.LP -The latter costs should not be due to a single -allocation, as several allocations can be assigned the same register. -These costs are dealt with after packing has been done. -They do not influence the decisions of the packing algorithm, -they may only undo them. -.PP -The actual profits consist of improvements -of execution time and code size. -As the former is far more difficult to estimate , we will -discuss code size improvements first. -.PP -The gains of putting a certain item in a register -depends on how the item is used. -Suppose the item is -a pointer variable. -On machines that do not have a -double-indirect addressing mode, -two instructions are needed to dereference the variable -if it is not in a register, but only one if it is put in a register. -If the variable is not dereferenced, but simply copied, one instruction -may be sufficient in both cases. -So the gains of putting a pointer variable in a register are higher -if the variable is dereferenced often. -.PP -To make accurate estimates, detailed knowledge of -the target machine and of the code generator -would be needed. -Therefore, a simplification has been made that substantially limits -the amount of target machine information that is needed. -The estimation of the number of bytes saved does -not take into account how an item is used. -Rather, an average number is used. -So these gains are computed as follows: -.DS -#bytes_saved = #occurrences * gains_per_occurrence -.DE -The number of occurrences is derived from -the EM code. -Note that this is not exact either, -as there is no one-to-one correspondence between occurrences in -the EM code and in the assembler code. -.PP -The gains of one occurrence depend on: -.IP 1. -the type of the item -.IP 2. -the size of the item -.IP 3. -the type of the register -.LP -and for local variables and addresses of local variables: -.IP 4. -the type of the local variable -.IP 5. -the offset of the variable in the stackframe -.LP -For every allocation we try two types of registers: the register type -of the item and the general register type. -Only the type with the highest profits will subsequently be used. -This type is added to the allocation information. -.PP -To compute the gains, RA uses a machine-dependent table -that is read from a machine descriptor file. -By means of this table the number of bytes saved can be computed -as a function of the five properties. -.PP -The costs of initializing a register with an item -is determined in a similar way. -The cost of one initialization is also -obtained from the descriptor file. -Note that there can be at most one initialization for any -allocation. -.PP -To summarize, the number of bytes a certain allocation would -save is computed as follows: -.DS -.TS -l l. -net_bytes_saved = bytes_saved - init_cost -bytes_saved = #occurrences * gains_per_occ -init_cost = #initializations * costs_per_init -.TE -.DE -.PP -It is inherently more difficult to estimate the execution -time saved by putting an item in a register, -because it is impossible to predict how -many times an item will be used dynamically. -If an occurrence is part of a loop, -it may be executed many times. -If it is part of a conditional statement, -it may never be executed at all. -In the latter case, the speed of the program may even get -worse if an initialization is needed. -As a clear example, consider the piece of "C" code in Fig. 13.1. -.DS -switch(expr) { - case 1: p(); break; - case 2: p(); p(); break; - case 3: p(); break; - default: break; -} - -Fig. 13.1 A "C" switch statement -.DE -Lots of bytes may be saved by putting the address of procedure p -in a register, as p is called four times (statically). -Dynamically, p will be called zero, one or two times, -depending on the value of the expression. -.PP -The optimizer uses the following strategy for optimizing -execution time: -.IP 1. -try to put items in registers during \fIloops\fR first -.IP 2. -always keep the initializing code outside the loop -.IP 3. -if an item is not used in a loop, do not put it in a register if -the initialization costs may be higher than the gains -.LP -The latter condition can be checked by determining the -minimal number of usages (dynamically) of the item during the procedure, -via a shortest path algorithm. -In the example above, this minimal number is zero, so the address of -p is not put in a register. -.PP -The costs of one occurrence is estimated as described above for the -code size. -The number of dynamic occurrences is guessed by looking at the -loop nesting level of every occurrence. -If the item is never used in a loop, -the minimal number of occurrences is used. -From these facts, the execution time improvement is assessed -for every allocation. -.NH 3 -The packing subphase -.PP -The packing subphase takes as input the allocation -list and outputs a -description of which allocations should be put -in which registers. -So it is essentially the decision making part of RA. -.PP -The packing system tries to assign a register to allocations one -at a time, in some yet to be defined order. -For every allocation A, it first checks if there is a register -(of the right type) -that is already assigned to one or more allocations, -none of which are rivals of A. -In this case A is assigned the same register. -Else, A is assigned a new register, if one exists. -A table containing the number of free registers for every type -is maintained. -It is initialized with the number of non-scratch registers of -the target computer and updated whenever a -new register is handed out. -The packing algorithm stops when no more allocations can -or need be assigned a register. -.PP -After an allocation A has been packed, -all allocations with non-disjunct timespans (including -A itself) are removed from the allocation list. -.PP -In case the number of items exceeds the number of registers, it -is important to choose the most profitable allocations. -Due to the possibility of having several allocations -occupying the same register, -this problem is quite complex. -Our packing algorithm uses simple heuristic rules -and avoids any combinatorial search. -It has distinct rules for different costs measures. -.PP -If object code size is the most important factor, -the algorithm is greedy and chooses allocations in -decreasing order of their profits attribute. -It does not take into account the fact that -other allocations may be passed over because of -this decision. -.PP -If execution time is at prime stake, the algorithm -first considers allocations whose timespans consist of loops. -After all these have been packed, it considers the remaining -allocations. -Within the two subclasses, it considers allocations -with the highest profits first. -When assigning a register to an allocation with a loop -as timespan, the algorithm checks if the item has -already been put in a register during another loop. -If so, it tries to use the same register for the -new allocation. -After all packing has been done, -it checks if the item has always been assigned the same -register (although not necessarily during all loops). -If so, it tries to put the item in that register during -the entire procedure. This is possible -if the allocation (item,whole_procedure) is not a rival -of any allocation with a different item that has been -assigned to the same register. -Note that this approach is essentially 'bottom up', -as registers are first assigned over small regions -of text which are later collapsed into larger regions. -The advantage of this approach is the fact that -the decisions for one loop can be made independently -of all other loops. -.PP -After the entire packing process has been completed, -we compute for each register how much is gained in using -this register, by simply adding the net profits -of all allocations assigned to it. -This total yield should outweigh the costs of -saving/restoring the register at procedure entry/exit. -As most modern processors (e.g. 68000, Vax) have special -instructions to save/restore several registers, -the differential costs of saving one extra register are by -no means constant. -The costs are read from the machine descriptor file and -compared to the total yields of the registers. -As a consequence of this analysis, some allocations -may have their registers taken away. -.NH 3 -The transformation subphase -.PP -The final subphase of RA transforms the EM text according to the -decisions made by the packing system. -It traverses the text of the currently optimized procedure and -changes all occurrences of items at points where -they are assigned a register. -It also clears the score field of the register messages for -normal local variables and emits register messages with a very -high score for the pseudo locals. -At points where registers have to be initialized with items, -it generates EM code to do so. -Finally it tries to decrease the size of the stackframe -of the procedure by looking at which local variables need not -be given memory locations. diff --git a/doc/ego/ra/ra4 b/doc/ego/ra/ra4 deleted file mode 100644 index 4bfeef74a..000000000 --- a/doc/ego/ra/ra4 +++ /dev/null @@ -1,28 +0,0 @@ -.NH 2 -Source files of RA -.PP -The sources of RA are in the following files and packages: -.IP ra.h: 14 -declarations of global variables and data structures -.IP ra.c: -the routine main; initialization of target machine-dependent tables -.IP items: -a routine to build the list of items of one procedure; -routines to manipulate items -.IP lifetime: -contains a subroutine that determines when items are live/dead -.IP alloclist: -contains subroutines that build the initial allocations list -and that compute the rivals sets. -.IP profits: -contains a subroutine that computes the profits of the allocations -and a routine that determines the costs of saving/restoring registers -.IP pack: -contains the packing subphase -.IP xform: -contains the transformation subphase -.IP interval: -contains routines to manipulate intervals of time -.IP aux: -contains auxiliary routines -.LP diff --git a/doc/ego/refs.gen b/doc/ego/refs.gen deleted file mode 100644 index 408fc50d9..000000000 --- a/doc/ego/refs.gen +++ /dev/null @@ -1,120 +0,0 @@ -%T A Practical Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A H. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%I Vrije Universiteit, Amsterdam -%R Rapport nr IR-74 -%D October 1981 - -%T A Practical Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A H. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J CACM -%V 26 -%N 9 -%P 654-660 -%D September 1983 - -%T A Unix Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A H. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J Proceedings USENIX conf. -%C Toronto, Canada -%V 26 -%D July 1983 -%P 255-261 - -%T Using Peephole Optimization on Intermediate Code -%A A.S. Tanenbaum -%A H. van Staveren -%A J.W. Stevenson -%J TOPLAS -%V 4 -%N 1 -%P 21-36 -%D January 1982 - -%T Language- and Machine-independent Global Optimization on Intermediate Code -%A H.E. Bal -%A A.S. Tanenbaum -%J Computer Languages -%V 11 -%N 2 -%P 105-121 -%D April 1986 - -%T Description of a machine architecture for use with -block structured languages -%A A.S. Tanenbaum -%A H. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%I Vrije Universiteit, Amsterdam -%R Rapport nr IR-81 -%D August 1983 - -%T Amsterdam Compiler Kit documentation -%A A.S. Tanenbaum et. al. -%I Vrije Universiteit, Amsterdam -%R Rapport nr IR-90 -%D June 1984 - -%T The C Programming Language - Reference Manual -%A D.M. Ritchie -%I Bell Laboratories -%C Murray Hill, New Jersey -%D 1978 - -%T Unix programmer's manual, Seventh Edition -%A B.W. Kernighan -%A M.D. McIlroy -%I Bell Laboratories -%C Murray Hill, New Jersey -%V 1 -%D January 1979 - -%T A Tour Through the Portable C Compiler -%A S.C. Johnson -%I Bell Laboratories -%B Unix programmer's manual, Seventh Edition -%C Murray Hill, New Jersey -%D January 1979 - - -%T Ada Programming Language - MILITARY STANDARD -%A J.D. Ichbiah -%I U.S. Department of Defense -%R ANSI/MIL-STD-1815A -%D 22 January 1983 - -%T Rationale for the Design of the Ada Programming Language -%A J.D. Ichbiah -%J SIGPLAN Notices -%V 14 -%N 6 -%D June 1979 - -%T The Programming Languages LISP and TRAC -%A W.L. van der Poel -%I Technische Hogeschool Delft -%C Delft -%D 1972 - -%T Compiler construction -%A W.M. Waite -%A G. Goos -%I Springer-Verlag -%C New York -%D 1984 - -%T The C Programming Language -%A B.W. Kernighan -%A D.M. Ritchie -%I Prentice-Hall, Inc -%C Englewood Cliffs,NJ -%D 1978 diff --git a/doc/ego/refs.opt b/doc/ego/refs.opt deleted file mode 100644 index 6029c7bcf..000000000 --- a/doc/ego/refs.opt +++ /dev/null @@ -1,546 +0,0 @@ -%T Principles of compiler design -%A A.V. Aho -%A J.D. Ullman -%I Addison-Wesley -%C Reading, Massachusetts -%D 1978 - -%T The Design and Analysis of Computer Algorithms -%A A.V. Aho -%A J.E. Hopcroft -%A J.D. Ullman -%I Addison-Wesley -%C Reading, Massachusetts -%D 1974 - -%T Code generation in a machine-independent compiler -%A R.G.G. Cattell -%A J.M. Newcomer -%A B.W. Leverett -%J SIGPLAN Notices -%V 14 -%N 8 -%P 65-75 -%D August 1979 - -%T An algorithm for Reduction of Operator Strength -%A J. Cocke -%A K. Kennedy -%J CACM -%V 20 -%N 11 -%P 850-856 -%D November 1977 - -%T Reduction of Operator Strength -%A F.E. Allen -%A J. Cocke -%A K. Kennedy -%B Program Flow Analysis -%E S.S. Muchnick and D. Jones -%I Prentice-Hall -%C Englewood Cliffs, N.J. -%D 1981 - -%T Simplifying Code Generation Through Peephole Optimization -%A J.W. Davidson -%R Ph.D. thesis -%I Dept. of Computer Science -%C Univ. of Arizona -%D December 1981 - -%T A study of selective optimization techniques -%A G.R. Katkus -%R Ph.D. Thesis -%C University of Southern California -%D 1973 - -%T Automatic subroutine generation in an optimizing compiler -%A J.B. Shaffer -%R Ph.D. Thesis -%C University of Maryland -%D 1978 - -%T Optimal mixed code generation for microcomputers -%A D.S. Photopoulos -%R Ph.D. Thesis -%C Northeastern University -%D 1981 - -%T The Design of an Optimizing Compiler -%A W.A. Wulf -%A R.K. Johnsson -%A C.B. Weinstock -%A S.O. Hobbs -%A C.M. Geschke -%I American Elsevier Publishing Company -%C New York -%D 1975 - -%T Retargetable Compiler Code Generation -%A M. Ganapathi -%A C.N. Fischer -%A J.L. Hennessy -%J ACM Computing Surveys -%V 14 -%N 4 -%P 573-592 -%D December 1982 - -%T An Optimizing Pascal Compiler -%A R.N. Faiman -%A A.A. Kortesoja -%J IEEE Trans. on Softw. Eng. -%V 6 -%N 6 -%P 512-518 -%D November 1980 - -%T Experience with the SETL Optimizer -%A S.M. Freudenberger -%A J.T. Schwartz -%J TOPLAS -%V 5 -%N 1 -%P 26-45 -%D Januari 1983 - -%T An Optimizing Ada Compiler -%A W. Kirchgaesner -%A J. Uhl -%A G. Winterstein -%A G. Goos -%A M. Dausmann -%A S. Drossopoulou -%I Institut fur Informatik II, Universitat Karlsruhe -%D February 1983 - -%T A Fast Algorithm for Finding Dominators -in a Flowgraph -%A T. Lengauer -%A R.E. Tarjan -%J TOPLAS -%V 1 -%N 1 -%P 121-141 -%D July 1979 - -%T Optimization of hierarchical directed graphs -%A M.T. Lepage -%A D.T. Barnard -%A A. Rudmik -%J Computer Languages -%V 6 -%N 1 -%P 19-34 -%D Januari 1981 - -%T Object Code Optimization -%A E.S. Lowry -%A C.W. Medlock -%J CACM -%V 12 -%N 1 -%P 13-22 -%D Januari 1969 - -%T Automatic Program Improvement: -Variable Usage Transformations -%A B. Maher -%A D.H. Sleeman -%J TOPLAS -%V 5 -%N 2 -%P 236-264 -%D April 1983 - -%T The design of a global optimizer -%A R.J. Mintz -%A G.A. Fisher -%A M. Sharir -%J SIGPLAN Notices -%V 14 -%N 9 -%P 226-234 -%D September 1979 - -%T Global Optimization by Suppression of Partial Redundancies -%A E. Morel -%A C. Renvoise -%J CACM -%V 22 -%N 2 -%P 96-103 -%D February 1979 - -%T Efficient Computation of Expressions with Common Subexpressions -%A B. Prabhala -%A R. Sethi -%J JACM -%V 27 -%N 1 -%P 146-163 -%D Januari 1980 - -%T An Analysis of Inline Substitution for a Structured -Programming Language -%A R.W. Scheifler -%J CACM -%V 20 -%N 9 -%P 647-654 -%D September 1977 - -%T Immediate Predominators in a Directed Graph -%A P.W. Purdom -%A E.F. Moore -%J CACM -%V 15 -%N 8 -%P 777-778 -%D August 1972 - -%T The Generation of Optimal Code for Arithmetic Expressions -%A R. Sethi -%A J.D. Ullman -%J JACM -%V 17 -%N 4 -%P 715-728 -%D October 1970 - -%T Exposing side-effects in a PL/I optimizing compiler -%A T.C. Spillman -%B Information Processing 1971 -%I North-Holland Publishing Company -%C Amsterdam -%P 376-381 -%D 1971 - -%T Inner Loops in Flowgraphs and Code Optimization -%A S. Vasudevan -%J Acta Informatica -%N 17 -%P 143-155 -%D 1982 - -%T A New Strategy for Code Generation - the General-Purpose -Optimizing Compiler -%A W.H. Harrison -%J IEEE Trans. on Softw. Eng. -%V 5 -%N 4 -%P 367-373 -%D July 1979 - -%T PQCC: A Machine-Relative Compiler Technology -%A W.M. Wulf -%R CMU-CS-80-144 -%I Carnegie-Mellon University -%C Pittsburgh -%D 25 september 1980 - -%T Machine-independent Pascal code optimization -%A D.R. Perkins -%A R.L. Sites -%J SIGPLAN Notices -%V 14 -%N 8 -%P 201-207 -%D August 1979 - -%T A Case Study of a New Code Generation Technique for Compilers -%A J.L. Carter -%J CACM -%V 20 -%N 12 -%P 914-920 -%D December 1977 - -%T Table-driven Code Generation -%A S.L. Graham -%J IEEE Computer -%V 13 -%N 8 -%P 25-33 -%D August 1980 - -%T Register Allocation in Optimizing Compilers -%A B.W. Leverett -%R Ph.D. Thesis, CMU-CS-81-103 -%I Carnegie-Mellon University -%C Pittsburgh -%D February 1981 - -%T Register Allocation via Coloring -%A G.J. Chaitin -%A M.A. Auslander -%A A.K. Chandra -%A J. Cocke -%A M.E. Hopkins -%A P.W. Markstein -%J Computer Languages -%V 6 -%N 1 -%P 47-57 -%D January 1981 - -%T How to Call Procedures, or Second Thoughts on -Ackermann's Function -%A B.A. Wichmann -%J Software - Practice and Experience -%V 7 -%P 317-329 -%D 1977 - -%T Register Allocation Via Usage Counts -%A R.A. Freiburghouse -%J CACM -%V 17 -%N 11 -%P 638-642 -%D November 1974 - -%T Machine-independent register allocation -%A R.L. Sites -%J SIGPLAN Notices -%V 14 -%N 8 -%P 221-225 -%D August 1979 - -%T An Overview of the Production-Quality Compiler-Compiler Project -%A B.W. Leverett -%A R.G.G Cattell -%A S.O. Hobbs -%A J.M. Newcomer -%A A.H. Reiner -%A B.R. Schatz -%A W.A. Wulf -%J IEEE Computer -%V 13 -%N 8 -%P 38-49 -%D August 1980 - -%T An Overview of the Production-Quality Compiler-Compiler Project -%A B.W. Leverett -%A R.G.G Cattell -%A S.O. Hobbs -%A J.M. Newcomer -%A A.H. Reiner -%A B.R. Schatz -%A W.A. Wulf -%R CMU-CS-79-105 -%I Carnegie-Mellon University -%C Pittsburgh -%D 1979 - -%T Topics in Code Generation and Register Allocation -%A B.W. Leverett -%R CMU-CS-82-130 -%I Carnegie-Mellon University -%C Pittsburgh -%D 28 July 1982 - -%T Predicting the Effects of Optimization on a Procedure Body -%A J.E. Ball -%J SIGPLAN Notices -%V 14 -%N 8 -%P 214-220 -%D August 1979 - -%T The C Language Calling Sequence -%A S.C. Johnson -%A D.M. Ritchie -%I Bell Laboratories -%C Murray Hill, New Jersey -%D September 1981 - -%T A Generalization of Two Code Ordering Optimizations -%A C.W. Fraser -%R TR 82-11 -%I Department of Computer Science -%C The University of Arizona, Tucson -%D October 1982 - -%T A Survey of Data Flow Analysis Techniques -%A K. Kennedy -%B Program Flow Analysis -%E S.S. Muchnick and D. Jones -%I Prentice-Hall -%C Englewood Cliffs -%D 1981 - -%T Delayed Binding in PQCC Generated Compilers -%A W.A. Wulf -%A K.V. Nori -%R CMU-CS-82-138 -%I Carnegie-Mellon University -%C Pittsburgh -%D 1982 - -%T Interprocedural Data Flow Analysis in the presence -of Pointers, Procedure Variables, and Label Variables -%A W.E. Weihl -%J Conf. Rec. of the 7th ACM Symp. on Principles of -Programming Languages -%C Las Vegas, Nevada -%P 83-94 -%D 1980 - -%T Low-Cost, High-Yield Code Optimization -%A D.R. Hanson -%R TR 82-17 -%I Department of Computer Science -%C The University of Arizona, Tucson -%D November 1982 - -%T Program Flow Analysis -%E S.S. Muchnick and D. Jones -%I Prentice-Hall -%C Englewood Cliffs -%D 1981 - -%T A machine independent algorithm for code generation and its -use in retargetable compilers -%A R. Glanville -%R Ph.D. thesis -%C University of California, Berkeley -%D December 1977 - -%T A formal framework for the derivation of machine-specific optimizers -%A R. Giegerich -%J TOPLAS -%V 5 -%N 3 -%P 478-498 -%D July 1983 - -%T Engineering a compiler: Vax-11 code generation and optimization -%A P. Anklam -%A D. Cutler -%A R. Heinen -%A M. MacLaren -%I Digital Equipment Corporation -%D 1982 - -%T Analyzing exotic instructions for a retargetable code generator -%A T.M. Morgan -%A L.A. Rowe -%J SIGPLAN Notices -%V 17 -%N 6 -%P 197-204 -%D June 1982 - -%T TCOLAda and the Middle End of the PQCC Ada Compiler -%A B.M. Brosgol -%J SIGPLAN Notices -%V 15 -%N 11 -%P 101-112 -%D November 1980 - -%T Implementation Implications of Ada Generics -%A G. Bray -%J Ada Letters -%V III -%N 2 -%P 62-71 -%D September 1983 - -%T Attributed Linear Intermediate Representations for Retargetable -Code Generators -%A M. Ganapathi -%A C.N. Fischer -%J Software-Practice and Experience -%V 14 -%N 4 -%P 347-364 -%D April 1984 - -%T UNCOL: The myth and the fact -%A T.B. Steel -%J Annu. Rev. Autom. Program. -%V 2 -%D 1960 -%P 325-344 - -%T Experience with a Graham-Glanville Style Code Generator -%A P. Aigrain -%A S.L. Graham -%A R.R. Henry -%A M.K. McKusick -%A E.P. Llopart -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 13-24 - -%T Using Dynamic Programming to generate Optimized Code in a -Graham-Glanville Style Code Generator -%A T.W. Christopher -%A P.J. Hatcher -%A R.C. Kukuk -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 25-36 - -%T Peep - An Architectural Description Driven Peephole Optimizer -%A R.R. Kessler -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 106-110 - -%T Automatic Generation of Peephole Optimizations -%A J.W. Davidson -%A C.W. Fraser -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 111-116 - -%T Analysing and Compressing Assembly Code -%A C.W. Fraser -%A E.W. Myers -%A A.L. Wendt -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 117-121 - -%T Register Allocation by Priority-based Coloring -%A F. Chow -%A J. Hennessy -%J SIGPLAN Notices -%V 19 -%N 6 -%D June 1984 -%P 222-232 -%V 19 -%N 6 -%D June 1984 -%P 117-121 - -%T Code Selection through Object Code Optimization -%A J.W. Davidson -%A C.W. Fraser -%I Dept. of Computer Science -%C Univ. of Arizona -%D November 1981 - -%T A Portable Machine-Independent Global Optimizer - Design -and Measurements -%A F.C. Chow -%I Computer Systems Laboratory -%C Stanford University -%D December 1983 diff --git a/doc/ego/refs.stat b/doc/ego/refs.stat deleted file mode 100644 index 56fcd7fd1..000000000 --- a/doc/ego/refs.stat +++ /dev/null @@ -1,29 +0,0 @@ -%T An analysis of Pascal Programs -%A L.R. Carter -%I UMI Research Press -%C Ann Arbor, Michigan -%D 1982 - -%T An Emperical Study of FORTRAN Programs -%A D.E. Knuth -%J Software - Practice and Experience -%V 1 -%P 105-133 -%D 1971 - -%T F77 Performance -%A D.A. Mosher -%A R.P. Corbett -%J ;login: -%V 7 -%N 3 -%D June 1982 - -%T Ada Language Statistics for the iMAX 432 Operating System -%A S.F. Zeigler -%A R.P. Weicker -%J Ada LETTERS -%V 2 -%N 6 -%P 63-67 -%D May 1983 diff --git a/doc/ego/sp/.distr b/doc/ego/sp/.distr deleted file mode 100644 index fb3527e1b..000000000 --- a/doc/ego/sp/.distr +++ /dev/null @@ -1 +0,0 @@ -sp1 diff --git a/doc/ego/sp/sp1 b/doc/ego/sp/sp1 deleted file mode 100644 index 86df413fd..000000000 --- a/doc/ego/sp/sp1 +++ /dev/null @@ -1,184 +0,0 @@ -.bp -.NH 1 -Stack pollution -.NH 2 -Introduction -.PP -The "Stack Pollution" optimization technique (SP) decreases the costs -(time as well as space) of procedure calls. -In the EM calling sequence, the actual parameters are popped from -the stack by the \fIcalling\fR procedure. -The ASP (Adjust Stack Pointer) instruction is used for this purpose. -A call in EM is shown in Fig. 8.1 -.DS -.TS -l l. -Pascal: EM: - -f(a,2) LOC 2 - LOE A - CAL F - ASP 4 -- pop 4 bytes -.TE - -Fig. 8.1 An example procedure call in Pascal and EM -.DE -As procedure calls occur often in most programs, -the ASP is one of the most frequently used EM instructions. -.PP -The main intention of removing the actual parameters after a procedure call -is to avoid the stack size to increase rapidly. -Yet, in some cases, it is possible to \fIdelay\fR or even \fIavoid\fR the -removal of the parameters without letting the stack grow -significantly. -In this way, considerable savings in code size and execution time may -be achieved, at the cost of a slightly increased stack size. -.PP -A stack adjustment may be delayed if there is some other stack adjustment -later on in the same basic block. -The two ASPs can be combined into one. -.DS -.TS -l l l. -Pascal: EM: optimized EM: - -f(a,2) LOC 2 LOC 2 -g(3,b,c) LOE A LOE A - CAL F CAL F - ASP 4 LOE C - LOE C LOE B - LOE B LOC 3 - LOC 3 CAL G - CAL G ASP 10 - ASP 6 -.TE - -Fig. 8.2 An example of local Stack Pollution -.DE -The stacksize will be increased only temporarily. -If the basic block contains another ASP, the ASP 10 may subsequently be -combined with that next ASP, and so on. -.PP -For some back ends, a stack adjustment also takes place -at the point of a procedure return. -There is no need to specify the number of bytes to be popped at a -return. -This provides an opportunity to remove ASPs more globally. -If all ASPs outside any loop are removed, the increase of the -stack size will still only be small, as no such ASP is executed more -than once without an intervening return from the procedure it is part of. -.PP -This second approach is not generally applicable to all target machines, -as some back ends require the stack to be cleaned up at the point of -a procedure return. -.NH 2 -Implementation -.PP -There is one main problem the implementation has to solve. -In EM, the stack is not only used for passing parameters, -but also for evaluating expressions. -Hence, ASP instructions can only be combined or removed -if certain conditions are satisfied. -.PP -Two consecutive ASPs of one basic block can only be combined -(as described above) if: -.IP 1. -On no point of text in between the two ASPs, any item is popped from -the stack that was pushed onto it before the first ASP. -.IP 2. -The number of bytes popped from the stack by the second ASP must equal -the number of bytes pushed since the first ASP. -.LP -Condition 1. is not satisfied in Fig. 8.3. -.DS -.TS -l l. -Pascal: EM: - -5 + f(10) + g(30) LOC 5 - LOC 10 - CAL F - ASP 2 -- cannot be removed - LFR 2 -- push function result - ADI 2 - LOC 30 - CAL G - ASP 2 - LFR 2 - ADI 2 -.TE - -Fig. 8.3 An illegal transformation -.DE -If the first ASP were removed (delayed), the first ADI would add -10 and f(10), instead of 5 and f(10). -.sp -Condition 2. is not satisfied in Fig. 8.4. -.DS -.TS -l l. -Pascal: EM: - -f(10) + 5 * g(30) LOC 10 - CAL F - ASP 2 - LFR 2 - LOC 5 - LOC 30 - CAL G - ASP 2 - LFR 2 - MLI 2 -- 5 * g(30) - ADI 2 -.TE - -Fig. 8.4 A second illegal transformation -.DE -If the two ASPs were combined into one 'ASP 4', the constant 5 would -have been popped, rather than the parameter 10 (so '10 + f(10)*g(30)' -would have been computed). -.PP -The second approach to deleting ASPs (i.e. let the procedure return -do the stack clean-up) -is only applied to the last ASP of every basic block. -Any preceding ASPs are dealt with by the first approach. -The last ASP of a basic block B will only be removed if: -.IP - -on no path in the control flow graph from B to any block containing a -RET (return) there is a basic block that, at some point of its text, pops -items from the stack that it has not itself pushed earlier. -.LP -Clearly, if this condition is satisfied, no harm can be done; no -other basic block will ever access items that were pushed -on the stack before the ASP. -.PP -The number of bytes pushed onto or popped from the stack can be -easily encoded in a so called "pop-push table". -The numbers in general depend on the target machine word- and pointer -size and on the argument given to the instruction. -For example, an ADS instruction is described by: -.DS - -a-p+p -.DE -which means: an 'ADS n' first pops an n-byte value (n being the argument), -next pops a pointer-size value and finally pushes a pointer-size value. -For some infrequently used EM instructions the pop-push numbers -cannot be computed statically. -.PP -The stack pollution algorithm first performs a depth first search over -the control flow graph and marks all blocks that do not satisfy -the global condition. -Next it visits all basic blocks in turn. -For every pair of adjacent ASPs, it checks conditions 1. and 2. and -combines the ASPs if they are satisfied. -The new ASP may be used as first ASP in the next pair. -If a condition fails, it simply continues with the next ASP. -Finally, the last ASP is removed if: -.IP - -nothing has been popped from the stack after the last ASP that was -pushed before it -.IP - -the block was not marked by the depth first search -.IP - -the block is not in a loop -.LP diff --git a/doc/ego/sr/.distr b/doc/ego/sr/.distr deleted file mode 100644 index 6de854118..000000000 --- a/doc/ego/sr/.distr +++ /dev/null @@ -1,4 +0,0 @@ -sr1 -sr2 -sr3 -sr4 diff --git a/doc/ego/sr/sr1 b/doc/ego/sr/sr1 deleted file mode 100644 index 7273d8ffa..000000000 --- a/doc/ego/sr/sr1 +++ /dev/null @@ -1,47 +0,0 @@ -.bp -.NH 1 -Strength reduction -.NH 2 -Introduction -.PP -The Strength Reduction optimization technique (SR) -tries to replace expensive operators -by cheaper ones, -in order to decrease the execution time -of the program. -A classical example is replacing a 'multiplication by 2' -by an addition or a shift instruction. -These kinds of local transformations are already -done by the EM Peephole Optimizer. -Strength reduction can also be applied -more generally to operators used in a loop. -.DS -.TS -l l. -i := 1; i := 1; -while i < 100 loop\ \ \ \ \ \ \ --> TMP := i * 118; - put(i * 118); while i < 100 loop - i := i + 1; put(TMP); -end loop; i := i + 1; - TMP := TMP + 118; - end loop; -.TE - -Fig. 6.1 An example of Strenght Reduction -.DE -In Fig. 6.1, a multiplication inside a loop is -replaced by an addition inside the loop and a multiplication -outside the loop. -Clearly, this is a global optimization; it cannot -be done by a peephole optimizer. -.PP -In some cases a related technique, \fItest replacement\fR, -can be used to eliminate the -loop variable i. -This technique will not be discussed in this report. -.sp 0 -In the example above, the resulting code -can be further optimized by using -constant propagation. -Obviously, this is not the task of the -Strength Reduction phase. diff --git a/doc/ego/sr/sr2 b/doc/ego/sr/sr2 deleted file mode 100644 index 9f7f01e88..000000000 --- a/doc/ego/sr/sr2 +++ /dev/null @@ -1,223 +0,0 @@ -.NH 2 -The model of strength reduction -.PP -In this section we will describe -the transformations performed by -Strength Reduction (SR). -Before doing so, we will introduce the -central notion of an induction variable. -.NH 3 -Induction variables -.PP -SR looks for variables whose -values form an arithmetic progression -at the beginning of a loop. -These variables are called induction variables. -The most frequently occurring example of such -a variable is a loop-variable in a high-order -programming language. -Several quite sophisticated models of strength -reduction can be found in the literature. -.[ -cocke reduction strength cacm -.] -.[ -allen cocke kennedy reduction strength -.] -.[ -lowry medlock cacm -.] -.[ -aho compiler design -.] -In these models the notion of an induction variable -is far more general than the intuitive notion -of a loop-variable. -The definition of an induction variable we present here -is more restricted, -yielding a simpler model and simpler transformations. -We think the principle source for strength reduction lies in -expressions using a loop-variable, -i.e. a variable that is incremented or decremented -by the same amount after every loop iteration, -and that cannot be changed in any other way. -.PP -Of course, the EM code does not contain high level constructs -such as for-statements. -We will define an induction variable in terms -of the Intermediate Code of the optimizer. -Note that the notions of a loop in the -EM text and of a firm basic block -were defined in section 3.3.5. -.sp -.UL definition -.sp 0 -An induction variable i of a loop L is a local variable -that is never accessed indirectly, -whose size is the word size of the target machine, and -that is assigned exactly once within L, -the assignment: -.IP - -being of the form i := i + c or i := c +i, -c is a constant -called the \fIstep value\fR of i. -.IP - -occurring in a firm block of L. -.LP -(Note that the first restriction on the assignment -is not described in terms of the Intermediate Code; -we will give such a description later; the current -definition is easier to understand however). -.NH 3 -Recognized expressions -.PP -SR recognizes certain expressions using -an induction variable and replaces -them by cheaper ones. -Two kinds of expensive operations are recognized: -multiplication and array address computations. -The expressions that are simplified must -use an induction variable -as an operand of -a multiplication or as index in an array expression. -.PP -Often a linear function of an induction variable is used, -rather than the variable itself. -In these cases optimization is still possible. -We call such expressions \fIiv-expressions\fR. -.sp -.UL definition: -.sp 0 -An iv-expression of an induction variable i of a loop L is -an expression that: -.IP - -uses only the operators + and - (unary as well as binary) -.IP - -uses i as operand exactly once -.IP - -uses (besides i) only constants or variables that are -never changed in L as operands. -.LP -.PP -The expressions recognized by SR are of the following forms: -.IP (1) -iv_expression * constant -.IP (2) -constant * iv_expression -.IP (3) -A[iv-expression] := \kx(assign to array element) -.IP (4) -A[iv-expression] \h'|\nxu'(use array element) -.IP (5) -& A[iv-expression] \h'|\nxu'(take address of array element) -.LP -(Note that EM has different instructions to use an array element, -store into one, or take the address of one, resp. LAR, SAR, and AAR). -.sp 0 -The size of the elements of A must -be known statically. -In cases (3) and (4) this size -must equal the word size of the -target machine. -.NH 3 -Transformations -.PP -With every recognized expression we associate -a new temporary local variable TMP, -allocated in the stack frame of the -procedure containing the expression. -At any program point within the loop, TMP will -contain the following value: -.IP multiplication: 18 -the current value of iv-expression * constant -.IP arrays: -the current value of &A[iv-expression]. -.LP -In the second case, TMP essentially is a pointer variable, -pointing to the element of A that is currently in use. -.sp 0 -If the same expression occurs several times in the loop, -the same temporary local is used each time. -.PP -Three transformations are applied to the EM text: -.IP (1) -TMP is initialized with the right value. -This initialization takes place just -before the loop. -.IP (2) -The recognized expression is simplified. -.IP (3) -TMP is incremented; this takes place just -after the induction variable is incremented. -.LP -For multiplication, the initial value of TMP -is the value of the recognized expression at -the program point immediately before the loop. -For arrays, TMP is initialized with the address -of the first array element that is accessed. -So the initialization code is: -.DS -TMP := iv-expression * constant; or -TMP := &A[iv-expression] -.DE -At the point immediately before the loop, -the induction variable will already have been -initialized, -so the value used in the code above will be the -value it has during the first iteration. -.PP -For multiplication, the recognized expression can simply be -replaced by TMP. -For array optimizations, the replacement -depends on the form: -.DS -.TS -l l l. -\fIform\fR \fIreplacement\fR -(3) A[iv-expr] := *TMP := (assign indirect) -(4) A[iv-expr] *TMP (use indirect) -(5) &A[iv-expr] TMP -.TE -.DE -The '*' denotes the indirect operator. (Note that -EM has different instructions to do -an assign-indirect and a use-indirect). -As the size of the array elements is restricted -to be the word size in case (3) and (4), -only one EM instruction needs to -be generated in all cases. -.PP -The amount by which TMP is incremented is: -.IP multiplication: 18 -step value * constant -.IP arrays: -step value * element size -.LP -Note that the step value (see definition of induction variable above), -the constant, and the element size (see previous section) can all -be determined statically. -If the sign of the induction variable in the -iv-expression is negative, the amount -must be negated. -.PP -The transformations are demonstrated by an example. -.DS -.TS -l l. -i := 100; i := 100; -while i > 1 loop TMP := (6-i) * 5; - X := (6-i) * 5 + 2; while i > 1 loop - Y := (6-i) * 5 - 8;\ \ \ \ \ \ \ --> X := TMP + 2; - i := i - 3; Y := TMP - 8; -end loop; i := i - 3; - TMP := TMP + 15; - end loop; -.TE - -Fig. 6.2 Example of complex Strength Reduction transformations -.DE -The expression '(6-i)*5' is recognized twice. The constant -is 5. -The step value is -3. -The sign of i in the recognized expression is '-'. -So the increment value of TMP is -(-3*5) = +15. diff --git a/doc/ego/sr/sr3 b/doc/ego/sr/sr3 deleted file mode 100644 index 10dbf64a7..000000000 --- a/doc/ego/sr/sr3 +++ /dev/null @@ -1,244 +0,0 @@ -.NH 2 -Implementation -.PP -Like most phases, SR deals with one procedure -at a time. -Within a procedure, SR works on one loop at a time. -Loops are processed in textual order. -If loops are nested inside each other, -SR starts with the outermost loop and proceeds in the -inwards direction. -This order is chosen, -because it enables the optimization -of multi-dimensional array address computations, -if the elements are accessed in the usual way -(i.e. row after row, rather than column after column). -For every loop, SR first detects all induction variables -and then tries to recognize -expressions that can be optimized. -.NH 3 -Finding induction variables -.PP -The process of finding induction variables -can conveniently be split up -into two parts. -First, the EM text of the loop is scanned to find -all \fIcandidate\fR induction variables, -which are word-sized local variables -that are assigned precisely once -in the loop, within a firm block. -Second, for every candidate, the single assignment -is inspected, to see if it has the form -required by the definition of an induction variable. -.PP -Candidates are found by scanning the EM code of the loop. -During this scan, two sets are maintained. -The set "cand" contains all variables that were -assigned exactly once so far, within a firm block. -The set "dismiss" contains all variables that -should not be made a candidate. -Initially, both sets are empty. -If a variable is assigned to, it is put -in the cand set, if three conditions are met: -.IP 1. -the variable was not in cand or dismiss already -.IP 2. -the assignment takes place in a firm block -.IP 3. -the assignment is not a ZRL instruction (assignment by zero) -or a SDL instruction (store double local). -.LP -If any condition fails, the variable is dismissed from cand -(if it was there already) and put in dismiss -(if it was not there already). -.sp 0 -All variables for which no register message was generated (i.e. those -variables that may be accessed indirectly) are assumed -to be changed in the loop. -.sp 0 -All variables that remain in cand are candidate induction variables. -.PP -From the set of candidates, the induction variables can -be determined, by inspecting the single assignment. -The assignment must match one of the EM patterns below. -('x' is the candidate. 'ws' is the word size of the target machine. -'n' is any number.) -.DS -.TS -l l. -\fIpattern\fR \fIstep size\fR -INL x | +1 -DEL x | -1 -LOL x ; (INC | DEC) ; STL x | +1 | -1 -LOL x ; LOC n ; (ADI ws | SBI ws) ; STL x | +n | -n -LOC n ; LOL x ; ADI ws ; STL x +n -.TE -.DE -From the patterns the step size of the induction variable -can also be determined. -These step sizes are displayed on the right hand side. -.sp -For every induction variable we maintain the following information: -.IP - -the offset of the variable in the stackframe of its procedure -.IP - -a pointer to the EM text of the assignment statement -.IP - -the step value -.LP -.NH 3 -Optimizing expressions -.PP -If any induction variables of the loop were found, -the EM text of the loop is scanned again, -to detect expressions that can be optimized. -SR scans for multiplication and array instructions. -Whenever it finds such an instruction, it analyses the -code in front of it. -If an expression is to be optimized, it must -be generated by the following syntax rules. -.DS -.TS -l l. -optimizable_expr: - iv_expr const mult | - const iv_expr mult | - address iv_expr address array_instr; -mult: - MLI ws | - MLU ws ; -array_instr: - LAR ws | - SAR ws | - AAR ws ; -const: - LOC n ; -.TE -.DE -An 'address' is an EM instruction that loads an -address on the stack. -An instruction like LOL may be an 'address', if -the size of an address (pointer size, =ps) is -the same as the word size. -If the pointer size is twice the word size, -instructions like LDL are an 'address'. -(The addresses in the third grammar rule -denote resp. the array address and the -array descriptor address). -.DS -.TS -l l. -address: - LAE | - LAL | - LOL if ps=ws | - LOE ,, | - LIL ,, | - LDL if ps=2*ws | - LDE ,, ; -.TE -.DE -The notion of an iv-expression was introduced earlier. -.DS -.TS -l l. -iv_expr: - iv_expr unair_op | - iv_expr iv_expr binary_op | - loopconst | - iv ; -unair_op: - NGI ws | - INC | - DEC ; -binary_op: - ADI ws | - ADU ws | - SBI ws | - SBU ws ; -loopconst: - const | - LOL x if x is not changed in loop ; -iv: - LOL x if x is an induction variable ; -.TE -.DE -An iv_expression must satisfy one additional constraint: -it must use exactly one operand that is an induction -variable. -A simple, hand written, top-down parser is used -to recognize an iv-expression. -It scans the EM code from right to left -(recall that EM is essentially postfix). -It uses semantic attributes (inherited as well as -derived) to check the additional constraint. -.PP -All information assembled during the recognition -process is put in a 'code_info' structure. -This structure contains the following information: -.IP - -the optimizable code itself -.IP - -the loop and basic block the code is part of -.IP - -the induction variable -.IP - -the iv-expression -.IP - -the sign of the induction variable in the -iv-expression -.IP - -the offset and size of the temporary local variable -.IP - -the expensive operator (MLI, LAR etc.) -.IP - -the instruction that loads the constant -(for multiplication) or the array descriptor -(for arrays). -.LP -The entire transformation process is driven -by this information. -As the EM text is represented internally -as a list, this process consists -mainly of straightforward list manipulations. -.sp 0 -The initialization code must be put -immediately before the loop entry. -For this purpose a \fIheader block\fR is -created that has the loop entry block as -its only successor and that dominates the -entry block. -The CFG and all relations (SUCC,PRED, IDOM, LOOPS etc.) -are updated. -.sp 0 -An EM instruction that will -replace the optimizable code -is created and put at the place of the old code. -The list representing the old optimizable code -is used to create a list for the initializing code, -as they are similar. -Only two modifications are required: -.IP - -if the expensive operator is a LAR or SAR, -it must be replaced by an AAR, as the initial value -of TMP is the \fIaddress\fR of the first -array element that is accessed. -.IP - -code must be appended to store the result of the -expression in TMP. -.LP -Finally, code to increment TMP is created and put after -the code of the single assignment to the -induction variable. -The generated code uses either an integer addition -(ADI) or an integer-to-pointer addition (ADS) -to do the increment. -.PP -SR maintains a set of all expressions that have already -been recognized in the present loop. -Such expressions are said to be \fIavailable\fR. -If an expression is recognized that is -already available, -no new temporary local variable is allocated for it, -and the code to initialize and increment the local -is not generated. diff --git a/doc/ego/sr/sr4 b/doc/ego/sr/sr4 deleted file mode 100644 index ae8764378..000000000 --- a/doc/ego/sr/sr4 +++ /dev/null @@ -1,28 +0,0 @@ -.NH 2 -Source files of SR -.PP -The sources of SR are in the following files -and packages: -.IP sr.h: 14 -declarations of global variables and -data structures -.IP sr.c: -the routine main; a driving routine to process -(possibly nested) loops in the right order -.IP iv -implements a procedure that finds the induction variables -of a loop -.IP reduce -implements a procedure that finds optimizable expressions -and that does the transformations -.IP cand -implements a procedure that finds the candidate induction -variables; used to implement iv -.IP xform -implements several useful routines that transform -lists of EM text or a CFG; used to implement reduce -.IP expr -implements a procedure that parses iv-expressions -.IP aux -implements several auxiliary procedures. -.LP diff --git a/doc/ego/ud/.distr b/doc/ego/ud/.distr deleted file mode 100644 index f64dc1145..000000000 --- a/doc/ego/ud/.distr +++ /dev/null @@ -1,5 +0,0 @@ -ud1 -ud2 -ud3 -ud4 -ud5 diff --git a/doc/ego/ud/ud1 b/doc/ego/ud/ud1 deleted file mode 100644 index 8f2a12f53..000000000 --- a/doc/ego/ud/ud1 +++ /dev/null @@ -1,58 +0,0 @@ -.bp -.NH 1 -Use-Definition analysis -.NH 2 -Introduction -.PP -The "Use-Definition analysis" phase (UD) consists of two related optimization -techniques that both depend on "Use-Definition" information. -The techniques are Copy Propagation and Constant Propagation. -They are best explained via an example (see Figs. 11.1 and 11.2). -.DS - (1) A := B A := B - ... --> ... - (2) use(A) use(B) - -Fig. 11.1 An example of Copy Propagation -.DE -.DS - (1) A := 12 A := 12 - ... --> ... - (2) use(A) use(12) - -Fig. 11.2 An example of Constant Propagation -.DE -Both optimizations have to check that the value of A at line (2) -can only be obtained at line (1). -Copy Propagation also has to assure that the value of B is -the same at line (1) as at line (2). -.PP -One purpose of both transformations is to introduce -opportunities for the Dead Code Elimination optimization. -If the variable A is used nowhere else, the assignment A := B -becomes useless and can be eliminated. -.sp 0 -If B is less expensive to access than A (e.g. this is sometimes the case -if A is a local variable and B is a global variable), -Copy Propagation directly improves the code itself. -If A is cheaper to access the transformation will not be performed. -Likewise, a constant as operand may be cheeper than a variable. -Having a constant as operand may also facilitate other optimizations. -.PP -The design of UD is based on the theory described in section -14.1 and 14.3 of. -.[ -aho compiler design -.] -As a main departure from that theory, -we do not demand the statement A := B to become redundant after -Copy Propagation. -If B is cheaper to access than A, the optimization is always performed; -if B is more expensive than A, we never do the transformation. -If A and B are equally expensive UD uses the heuristic rule to -replace infrequently used variables by frequently used ones. -This rule increases the chances of the assignment to become useless. -.PP -In the next section we will give a brief outline of the data -flow theory used -for the implementation of UD. diff --git a/doc/ego/ud/ud2 b/doc/ego/ud/ud2 deleted file mode 100644 index 21174f459..000000000 --- a/doc/ego/ud/ud2 +++ /dev/null @@ -1,64 +0,0 @@ -.NH 2 -Data flow information -.PP -.NH 3 -Use-Definition information -.PP -A \fIdefinition\fR of a variable A is an assignment to A. -A definition is said to \fIreach\fR a point p if there is a -path in the control flow graph from the definition to p, such that -A is not redefined on that path. -.PP -For every basic block B, we define the following sets: -.IP GEN[b] 9 -the set of definitions in b that reach the end of b. -.IP KILL[b] -the set of definitions outside b that define a variable that -is changed in b. -.IP IN[b] -the set of all definitions reaching the beginning of b. -.IP OUT[b] -the set of all definitions reaching the end of b. -.LP -GEN and KILL can be determined by inspecting the code of the procedure. -IN and OUT are computed by solving the following data flow equations: -.DS -(1) OUT[b] = IN[b] - KILL[b] + GEN[b] -(2) IN[b] = OUT[p1] + ... + OUT[pn], - where PRED(b) = {p1, ... , pn} -.DE -.NH 3 -Copy information -.PP -A \fIcopy\fR is a definition of the form "A := B". -A copy is said to be \fIgenerated\fR in a basic block n if -it occurs in n and there is no subsequent assignment to B in n. -A copy is said to be \fIkilled\fR in n if: -.IP (i) -it occurs in n and there is a subsequent assignment to B within n, or -.IP (ii) -it occurs outside n, the definition A := B reaches the beginning of n -and B is changed in n (note that a copy also is a definition). -.LP -A copy \fIreaches\fR a point p, if there are no assignments to B -on any path in the control flow graph from the copy to p. -.PP -We define the following sets: -.IP C_GEN[b] 11 -the set of all copies in b generated in b. -.IP C_KILL[b] -the set of all copies killed in b. -.IP C_IN[b] -the set of all copies reaching the beginning of b. -.IP C_OUT[b] -the set of all copies reaching the end of b. -.LP -C_IN and C_OUT are computed by solving the following equations: -(root is the entry node of the current procedure; '*' denotes -set intersection) -.DS -(1) C_OUT[b] = C_IN[b] - C_KILL[b] + C_GEN[b] -(2) C_IN[b] = C_OUT[p1] * ... * C_OUT[pn], - where PRED(b) = {p1, ... , pn} and b /= root - C_IN[root] = {all copies} -.DE diff --git a/doc/ego/ud/ud3 b/doc/ego/ud/ud3 deleted file mode 100644 index 99bf2a036..000000000 --- a/doc/ego/ud/ud3 +++ /dev/null @@ -1,26 +0,0 @@ -.NH 2 -Pointers and subroutine calls -.PP -The theory outlined above assumes that variables can -only be changed by a direct assignment. -This condition does not hold for EM. -In case of an assignment through a pointer variable, -it is in general impossible to see which variable is affected -by the assignment. -Similar problems occur in the presence of procedure calls. -Therefore we distinguish two kinds of definitions: -.IP - -an \fIexplicit\fR definition is a direct assignment to one -specific variable -.IP - -an \fIimplicit\fR definition is the potential alteration of -a variable as a result of a procedure call or an indirect assignment. -.LP -An indirect assignment causes implicit definitions to -all variables that may be accessed indirectly, i.e. -all local variables for which no register message was generated -and all global variables. -If a procedure contains an indirect assignment it may change the -same set of variables, else it may change some global variables directly. -The KILL, GEN, IN and OUT sets contain explicit as well -as implicit definitions. diff --git a/doc/ego/ud/ud4 b/doc/ego/ud/ud4 deleted file mode 100644 index c31ad64b2..000000000 --- a/doc/ego/ud/ud4 +++ /dev/null @@ -1,78 +0,0 @@ -.NH 2 -Implementation -.PP -UD first builds a number of tables: -.IP locals: 9 -contains information about the local variables of the -current procedure (offset,size,whether a register message was found -for it and, if so, the score field of that message) -.IP defs: -a table of all explicit definitions appearing in the -current procedure. -.IP copies: -a table of all copies appearing in the -current procedure. -.LP -Every variable (local as well as global), definition and copy -is identified by a unique number, which is the index -in the table. -All tables are constructed by traversing the EM code. -A fourth table, "vardefs" is used, indexed by a 'variable number', -which contains for every variable the set of explicit definitions of it. -Also, for each basic block b, the set CHGVARS containing all variables -changed by it is computed. -.PP -The GEN sets are obtained in one scan over the EM text, -by analyzing every EM instruction. -The KILL set of a basic block b is computed by looking at the -set of variables -changed by b (i.e. CHGVARS[b]). -For every such variable v, all explicit definitions to v -(i.e. vardefs[v]) that are not in GEN[b] are added to KILL[b]. -Also, the implicit defininition of v is added to KILL[b]. -Next, the data flow equations for use-definition information -are solved, -using a straight forward, iterative algorithm. -All sets are represented as bitvectors, so the operations -on sets (union, difference) can be implemented efficiently. -.PP -The C_GEN and C_KILL sets are computed simultaneously in one scan -over the EM text. -For every copy A := B appearing in basic block b we do -the following: -.IP 1. -for every basic block n /= b that changes B, see if the definition A := B -reaches the beginning of n (i.e. check if the index number of A := B in -the "defs" table is an element of IN[n]); -if so, add the copy to C_KILL[n] -.IP 2. -if B is redefined later on in b, add the copy to C_KILL[b], else -add it to C_GEN[b] -.LP -C_IN and C_OUT are computed from C_GEN and C_KILL via the second set of -data flow equations. -.PP -Finally, in one last scan all opportunities for optimization are -detected. -For every use u of a variable A, we check if -there is a unique explicit definition d reaching u. -.sp -If the definition is a copy A := B and B has the same value at d as -at u, then the use of A at u may be changed into B. -The latter condition can be verified as follows: -.IP - -if u and d are in the same basic block, see if there is -any assignment to B in between d and u -.IP - -if u and d are in different basic blocks, the condition is -satisfied if there is no assignment to B in the block of u prior to u -and d is in C_IN[b]. -.LP -Before the transformation is actually done, UD first makes sure the -alteration is really desirable, as described before. -The information needed for this purpose (access costs of local and -global variables) is read from a machine descriptor file. -.sp -If the only definition reaching u has the form "A := constant", the use -of A at u is replaced by the constant. - diff --git a/doc/ego/ud/ud5 b/doc/ego/ud/ud5 deleted file mode 100644 index 1d617e128..000000000 --- a/doc/ego/ud/ud5 +++ /dev/null @@ -1,19 +0,0 @@ - -.NH 2 -Source files of UD -.PP -The sources of UD are in the following files and packages: -.IP ud.h: 14 -declarations of global variables and data structures -.IP ud.c: -the routine main; initialization of target machine dependent tables -.IP defs: -routines to compute the GEN and KILL sets and routines to analyse -EM instructions -.IP const: -routines involved in constant propagation -.IP copy: -routines involved in copy propagation -.IP aux: -contains auxiliary routines -.LP diff --git a/doc/em/.distr b/doc/em/.distr deleted file mode 100644 index 76d9cae6b..000000000 --- a/doc/em/.distr +++ /dev/null @@ -1,28 +0,0 @@ -proto.make -READ_ME -app.codes.nr -app.exam.nr -assem.nr -cont.nr -descr.nr -dspace.nr -em.i -env.nr -even.c -exam.e -exam.p -int -intro.nr -ip.awk -ispace.nr -mach.nr -macr.nr -mapping.nr -mem.nr -title.nr -traps.nr -types.nr -mkdispatch.c -dispat1.sed -dispat2.sed -dispat3.sed diff --git a/doc/em/Makefile b/doc/em/Makefile deleted file mode 100644 index e8bc072d8..000000000 --- a/doc/em/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -HOME=../.. - -TBL=tbl -NROFF=nroff -SUF=pr -TARGET=-Tlp - -head: ../em.$(SUF) - -FILES = macr.nr title.nr intro.nr mem.nr ispace.nr dspace.nr mapping.nr \ - types.nr descr.nr env.nr traps.nr mach.nr assem.nr \ - app.int.nr app.codes.nr app.exam.nr cont.nr - -IOP=$(HOME)/etc/ip_spec.t# # to construct itables from - -../em.$(SUF): $(FILES) itables dispatdummy em.i Makefile - $(TBL) $(FILES) | $(NROFF) -mkun $(TARGET) > ../em.$(SUF) - -app.codes.pr: app.codes.nr itables dispatdummy - -itables: $(IOP) ip.awk - awk -f ip.awk $(IOP) | sed 's/-/\\-/g' | $(TBL) >itables - -dispatdummy: $(IOP) mkdispatch - mkdispatch < $(IOP) > dispatdummy - sed -f dispat1.sed < dispatdummy | $(TBL) > dispat1 - sed -f dispat2.sed < dispatdummy | $(TBL) > dispat2 - sed -f dispat3.sed < dispatdummy | $(TBL) > dispat3 - -mkdispatch: mkdispatch.c - $(CC) -I$(HOME)/h -o mkdispatch mkdispatch.c $(HOME)/lib.bin/em_data.a - -.SUFFIXES : .pr .nr -.nr.pr: ; $(TBL) macr.nr $*.nr | $(NROFF) -mkun >$@ - -clean: - rm -f *.pr itables *.out dispatdummy dispat? *.o mkdispatch diff --git a/doc/em/READ_ME b/doc/em/READ_ME deleted file mode 100644 index 1d0ae71a4..000000000 --- a/doc/em/READ_ME +++ /dev/null @@ -1,6 +0,0 @@ -This it the text of IR-81, -DESCRIPTION OF A MACHINE ARCHITECTURE FOR USE WITH BLOCK STRUCTURED LANGUAGES - -The file em.i (text of the defining interpreter) was hand-edited from int/em.p - -The directory int contains the interpreter. diff --git a/doc/em/addend.n b/doc/em/addend.n deleted file mode 100644 index 368ca2d12..000000000 --- a/doc/em/addend.n +++ /dev/null @@ -1,1122 +0,0 @@ -.lg 0 -.ta 8 16 24 32 40 48 56 64 72 80 -.hw iden-ti-fi-er -.nr a 0 1 -.nr f 1 1 -.de x1 -'sp 2 -'tl '''%' -'sp 2 -.ns -.. -.wh 0 x1 -.de fo -'bp -.. -.wh 60 fo -.ll 79 -.lt 79 -.de HT -.ti -4 -.. -.de PP -.sp -.ne 2 -.ti +5 -.. -.de SE -.bp -\fB\\n+a. \\$1\fR -.nr b 0 1 -.. -.de SB -.br -.ne 10 -.sp 5 -\fB\\na.\\n+b. \\$1\fR -.. -.de DC -.ti -14 -DECISION~\\$1: -.. -.de IN -.in +6 -.. -.de OU -.in -6 -.. -.tr ~ -.sp 5 -.rs -.sp 10 -.ce 3 -Changes in EM-1 - -Addendum to Informatica Rapport IR-54 -.sp 5 -.PP -This document describes a revision of EM-1. -A list of differences is presented roughly in the order IR-54 -describes the original architecture. -A complete list of EM-1 pseudo's and instructions is also included. -.SE Introduction -.PP -EM is a family of intermediate languages, resembling assembly -language for a stack machine. -EM defines the layout of data memory and a partitioning -of instruction memory. -EM has can do operations on five basic types: -pointers, signed integers, unsigned integers, floating point numbers -and sets of bits. -The size of pointers is fixed in each member, -in contrast to the sizes of the other types. -Each member has one more fixed size: the word size. -This is the mimimum size of any object on the stack. -The sizes of all objects on the stack are assumed to -multiples of the word size. -We assume that pointer and word-sizes are both powers of two. -.PP -It is possible to load objects smaller then the word size from memory. -These objects are converted to objects of the word size by -clearing the most significant bytes. -(A separate conversion instruction can do sign extension). -While storing objects smaller then the word size are stored in memory, -the most significant bytes are ignored. -The size of such objects has to be a divisor of the word size. -.PP -Put in other terms, instructions such as LOC, LOL, LOE, STF, etc. -manipulate WORDS. Up until now, a word was defined as 16 bits. -It is now possible to define a word size other than 16 bits. For -example, MES 2,1,2 defines a word to be 8 bits and a pointer to be -16 bits. As another example, MES 2,4,4 defines a word to be 32 bits -and a pointer to be 32 bits. -.PP -If a compiler receives flags telling it to use 32 bit integers, it now -has a choice of setting the word length to 16 bits and using LDL etc -for dealing with integers, or setting the word length to 32 bits and using -LOL etc for integers. -For example, x:=a+b for 32-bit integers would become: - - MES 2,2,4 MES 2,4,4 - LDL a LOL a - LDL b LOL b - ADI 4 ADI 4 - SDL x STL x - -In many cases, the target machine code that is finally produced from either -of the above sequences will not show any traces of the stack machine, however -for some instructions actual pushes and pops at run time will be necessary. -Choosing a wider EM word will usually produce fewer stack operations than -a narrower word, but it eliminates the possibility of doing arithmetic on -quantities smaller than a word. If, for example, a compiler chooses a 32-bit -EM word, it will be difficult to add two 16 bit integers with ADI, since -the argument must be multiple of the word size. -(The operation can be done by converting the operands to 32 bits using CII, -adding the 32-bit numbers, and reconverting the result.) -On the other hand, choosing a 16-bit EM word makes it possible to do both -16-bit adds (ADI 2) and 32-bit adds (ADI 4), -but the price paid is that 32-bit operations will be viewed as double -precision, and may be slightly less efficient on target machines with a -32-bit word, i.e. the EM to target translator may not take full advantage -of the 32 bit facilities. -.PP -Note that since LOC pushes a WORD on the stack, the argument of LOC -must fit ina word. LOC 256 on an EM machine with a 1-byte word length -is not allowed. LDC 256 is allowed, however. -.PP -A general rule of thumb is that the compiler should choose an EM word -length equal to the width of a single precision integer. -Obviously, compilers should be well parameterized to allow the integer -size(s) and word size(s) to be changed by just changing a few constants. -.PP -The size of a instruction space pointer in is the same -as the size of a data space pointer. -.PP -EM assumes two's complement arithmetic on signed integers, -but does not define an ordering of the bytes in a integer. -The lowest numbered byte of a two-byte object can contain -either the most or the least significant part. -.SE Memory -.PP -EM has two separate addressing spaces, instruction and data. -The sizes of these spaces are not specified. -The layout of instruction space in not defined. -Any interpreter or translator may assume a layout fitting his/her needs. -The layout of data memory is specified by EM. -EM data memory consists of a sequence of 8-bit bytes each separately -addressable. -Certain alignment restrictions exist for object consisting of multiple bytes. -Objects smaller then the word size can only be addressed -at multiples of the object size. -For example: in a member with a four-byte word size, two-byte integers -can only be accessed from even addresses. -Objects larger then the word size can only be placed at multiples -of the word size. -For example: in a member with a four-byte word size, -eight-byte floating point numbers can be fetched at addresses -0, 4, 8, 12, etc. -.SB "Procedure identifiers" -.PP -Procedure identifiers in EM have the same size -as pointers. -Any implementation of EM is free to use any method of identifying procedures. -Common methods are indices into tables containing further information -and addresses of the first instructions of procedures. -.SB "Heap and Stack in global data" -.PP -The stack grows downward, the heap grows upward. -The stack pointer points to the lowest occupied word on the stack. -The heap pointer marks the first free word in the heap area. -.br -.ne 39 -.sp 1 -.nf - 65534 -> |-------------------------------| - |///////////////////////////////| - |//// unimplemented memory /////| - |///////////////////////////////| - SB -> |-------------------------------| - | | - | stack and local area | <- LB - | | - | | - |-------------------------------| <- SP - |///////////////////////////////| - |// implementation dependent //| - |///////////////////////////////| - |-------------------------------| <- HP - | | - | heap area | - | | - | | - |-------------------------------| - | | - | global area | - | | - EB -> |-------------------------------| - | | - | | - | program text | <- PC - | | - | | - PB -> |-------------------------------| - |///////////////////////////////| - |////////// undefined //////////| - |///////////////////////////////| - 0 -> |-------------------------------| - - Fig. \nf. Example of memory layout showing typical register - positions during execution of an EM program. -.fi -.SB "Data addresses as arguments" -.PP -Anywhere previous versions of the EM assembly language -allowed identifiers of objects in -data space, -it is also possible to use 'identifier+constant' or 'identifier-constant'. -For example, both "CON LABEL+4" and "LAE SAVED+3" are allowed. -More complicated expressions are illegal. -.SB "Local data area" -.PP -The mark block has been banished. -When calling a procedure, -the calling routine first has to push the actual parameters. -All language implementations currently push their arguments -in reverse order, to be compatible with C. -Then the procedure is called using a CAL or CAI instruction. -Either the call or the procedure prolog somehow has to save -the return address and dynamic link. -The prolog allocates the space needed for locals and is free to -surround this space with saved registers and other information it -deems necessary. -.PP -The locals are now accessed using negative offsets in LOL, LDL, SDL, LAL, -LIL, SIL and STL instructions. -The parameters are accessed using positive offsets in LOL, LDL, SDL, LAL, -LIL, STL and -STL instructions. -The prolog might have stored information in the area between parameters and -locals. -As a consequence there are two bases, AB(virtual) and LB. -AB stands for Argument Base and LB stands for Local Base. -Positive arguments to LOL etc ... are interpreted as offsets from AB, -negative arguments as offsets from LB. -.PP -The BEG instruction is not needed to allocate the locals because -storage for locals is set aside in the prolog. -The instruction still exists under the name ASP (Adjust Stack Pointer). -.PP -Procedures return using the RET instruction. -The RET pops the function result from the stack and -brings the stack pointer and other relevant registers to the state -they had just before the procedure was called. -The RET instruction expects that - aside from possible function results - -the stack pointer has the value it had after execution of the prolog. -RET finally returns control to the calling routine. -The actual parameters have to be removed from the stack by the calling routine, -and not by the called procedure. -.sp 1 -.ne 38 -.nf - - - - |===============================| - | actual argument n | - |-------------------------------| - | . | - | . | - | . | - |-------------------------------| - | actual argument 1 | ( <- AB ) - |===============================| - |///////////////////////////////| - |// implementation dependent //| - |///////////////////////////////| <- LB - |===============================| - | | - | local variables | - | | - |-------------------------------| - | | - | compiler temporaries | - | | - |===============================| - |///////////////////////////////| - |// implementation dependent //| - |///////////////////////////////| - |===============================| - | | - | dynamic local generators | - | | - |===============================| - | operand | - |-------------------------------| - | operand | <- SP - |===============================| - - A sample procedure frame. - -.fi -.sp 1 -This scheme allows procedures to be called with a variable number -of parameters. -The parameters have to be pushed in reverse order, -because the called procedure has to be able to locate the first one. -.PP -.PP -Since the mark block has disappeared, a new mechanism for static -links had to be created. -All compilers use the convention that EM procedures needing -a static link will find a link in their zero'th parameter, -i.e. the last one pushed on the stack. -This parameter should be invisible to users of the compiler. -The link needs to be in a fixed place because the lexical instructions -have to locate it. -The LEX instruction is replaced by two instructions: LXL and LXA. -\&"LXL~n" finds the LB of a procedure n static levels removed. -\&"LXA~n" finds the (virtual) AB. -The value used for static link is LB. -.PP -When a procedure needing a static link is called, first the actual -parameters are pushed, then the static link is pushed using LXL -and finally the procedure is called with a CAL with the procedure's -name as argument. -.br -.ne 40 -.nf - - - - |===============================| - | actual argument n | - |-------------------------------| - | . | - | . | - | . | - |-------------------------------| - | actual argument 1 | - |-------------------------------| - | static link | ( <- AB ) - |===============================| - |///////////////////////////////| - |// implementation dependent //| - |///////////////////////////////| <- LB - |===============================| - | | - | local variables | - | | - |-------------------------------| - | | - | compiler temporaries | - | | - |===============================| - |///////////////////////////////| - |// implementation dependent //| - |///////////////////////////////| - |===============================| - | | - | dynamic local generators | - | | - |===============================| - | operand | - |-------------------------------| - | operand | <- SP - |===============================| - - A procedure frame with static link. - -.fi -.sp 1 -.sp 1 -.PP -Pascal and other languages have to use procedure -instance identifiers containing -the procedure identifier -'ul -and -the static link the procedure has to be called with. -A static link having a value of zero signals -that the called procedure does not need a static link. -C uses the same convention for pointers to C-routines. -In pointers to C-routines the static link is set to zero. -.PP -Note: The distance from LB to AB must be known for each procedure, otherwise -LXA can not be implemented. -Most implementations will have a fixed size area between -the parameter and local storage. -The zone between the compiler temporaries and the dynamic -local generators can be used -to save a variable number of registers. -.PP -.ne 11 -Prolog examples: -.sp 2 -.nf - - proc1 proc2 - - mov lb,-(sp) mov lb,-(sp) - mov sp,lb mov sp,lb - sub $loc_size,sp sub $loc_size,sp - mov r2,-(sp) ; save r2 mov r2,-(sp) - mov r4,-(sp) ; save r4 - -.fi -.SB "Return values" -.PP -The return value popped by RET is stored in an unnamed 'function return area'. -This area can be different for different sized objects returned, -e.g. one register for two byte objects, -two registers for four byte objects, -memory for larger objects. -The area is available for 'READ-ONCE' access using the LFR instruction. -The result of a LFR is only defined if the sizes used to store and -fetch are identical. -The only instructions guaranteed not to destroy the contents of -any 'function return area' are ASP and BRA. -Thus parameters can be popped before fetching the function result. -The maximum size of all function return areas is -implementation dependant, -but allows procedure instance identifiers and all -implemented objects of type integer, unsigned, float -and pointer to be returned. - -.SE "EM Assembly Language" -.nr b 0 1 -.SB "Object types and instructions" -.PP -EM knows five basic object types: -pointers, -signed integers, -unsigned integers, -floating point numbers and -sets of bits. -Operations on objects of the last four types do not assume -a specific size. -Pointers (including procedure identifiers) have a fixed size in each -implementation. -Instructions acting on one or more objects of the last four types need -explicit size information. -This information can be given either as the argument of the -instruction or on top of the stack. -.sp 1 -For example: -.nf -addition of integers LOL a, LOL b, ADI 2 -subtraction of two floats LDL a, LDL b, SBF 4 -integer to float LOL a, LOC 2, LOC 4, CIF, SDL b -.fi -.sp -Note that conversion instructions always expect size -before and size after conversion on the stack. -.sp -No obligation exists to implement all operations on all possible sizes. -.PP -The EM assembly language -allows constants as instruction arguments up to a size of four bytes. -In all EM's it is possible to initialize any type and size object. -BSS, HOL, CON and ROM allow type and size indication in initializers. -.SB "Conversion instructions" -.PP -The conversion operators can convert from any type and size to any -type and size. -The types are specified by the instruction, -the sizes should be in words on top of the stack. -Normally the sizes are multiples of the word size, -There is one exception: the CII instructions sign-extends if the -size of the source is a divisor of the word size. -.SB "CSA and CSB" -.PP -The tables used by these instructions do not contain the procedure -identifier any more. -See also "Descriptors". -.SB EXG -.PP -The EXG instruction is deleted from the EM instruction set. -If future applications show any need for this instruction, -it will be added again. -.SB "FIL" -.PP -A FIL instruction has been introduced. -When using separate compilation, -the LIN feature of EM was insufficient. -FIL expects as argument an address in global data. -This address is stored in a fixed place in memory, -where it can be used by any implementation for diagnostics etc. -Like LIN, it provides access to the ABS fragment at the start -of external data. -.SB "LAI and SAI" -.PP -LAI and SAI have been dropped, they thwarted register optimization. -.SB LNC -.PP -The LNC instruction is deleted from the instruction set. -LOC -n wil do what it is supposed to. -.SB "Branch instructions" -.PP -The branch instructions are allowed to branch both forward and backward. -Consequently BRF and BRB are deleted and a BRA instruction is added. -BRA branches unconditionally in any direction. -.SB LDC -.PP -Loads a double word constant on the stack. -.SB LEX -.PP -LXA and LXL replace LEX. -.SB LFR -.PP -LFR loads the function result stored by RET. -.SB "LIL and SIL" -.PP -They replace LOP and STP. (Name change only) -.SB "Traps and Interrupts" -.PP -The numbers used for distinguishing the various types -of traps and interrupts have been reassigned. -The new instructions LIM and SIM -allow setting and clearing of bits in a mask. -The bits in the mask control the action taken upon encountering certain -errors at runtime. -A 1 bit causes the corresponding error to be ignored, -a 0 bit causes the run-time system to trap. -.SB LPI -.PP -Loads a procedure identifier on the stack. -LOC cannot be used to do this anymore. -.SB "ZER and ZRF" -.PP -ZER loads S zero bytes on the stack. -ZRF loads a floating point zero of size S. -.SB "Descriptors" -.PP -All instructions using descriptors have the size of the integer used -in the descriptor as argument. -The descriptors are: case descriptors (CSA and CSB), -range check descriptors (RCK) and -array descriptors ( LAR, SAR, AAR). -.SB "Case descriptors" -.PP -The value used in a case descriptor to indicate the absence of a label -is zero instead of -1. -.SE "EM assembly language" -.SB "Instruction arguments" -.PP -The previous EM had different instructions for distinguishing -between operand on the stack and explicit argument in the instruction. -For example, LOI and LOS. -This distinction has been removed. -Several instructions have two possible forms: -with explicit argument and with implicit argument on top of the stack. -The size of the implicit argument is the word size. -The implicit argument is always popped before all other operands. -Appendix 1 shows what is allowed for each instruction. -.SB Notation -.PP -First the notation used for the arguments of -instructions and pseudo instructions. -.in +12 -.ti -11 -~~=~~an integer number in the range -32768..32767 -.ti -11 -~~=~~an offset -2**31..2**31~-~1 -.ti -11 -~~=~~an identifier -.ti -11 -~~=~~ or or + or - -.ti -11 -~~=~~integer constant, -unsigned constant, -floating point constant -.ti -11 -~~=~~string constant (surrounded by double quotes), -.ti -11 -~~=~~instruction label ('*' followed by an integer in the range -0..32767). -.ti -11 -~~=~~procedure number ('$' followed by a procedure name) -.ti -11 -~~=~~, -, - or -. -.ti -11 -<...>*~=~~zero or more of <...> -.ti -11 -<...>+~=~~one or more of <...> -.ti -11 -[...]~~=~~optional ... -.in -12 -.SB Labels -.PP -No label, instruction or data, can have a (pseudo) instruction -on the same line. -.SB Constants -.PP -All constants in EM are interpreted in the decimal base. -.PP -In BSS, HOL, CON and ROM pseudo-instructions -numbers must be followed by I, U or F -indicating Integer, Unsigned or Float. -If no character is present I is assumed. -This character can be followed by an even positive number or a 1. -The number indicates the size in bytes of the object to be initialized, -up to 32766. -Double precision integers can no longer be indicated by a trailing L. -As said before CON and ROM also allow expressions of the form: -\&"LABEL+offset" and "LABEL-offset". -The offset must be an unsigned decimal number. -The 'IUF' indicators cannot be used with the offsets. -.PP -Areas reserved in the global data area by HOL or BSS can be -initialized. -BSS and HOL have a third parameter indicating whether the initialization -is mandatory or optional. -.PP -Since EM needs aligment of objects, this alignment is enforced by the -pseudo instructions. -All objects are aligned on a multiple of their size or the word size -whichever is smaller. -Switching to another type of fragment or placing a label forces word-alignment. -There are three types of fragments in global data space: CON, ROM and BSS-HOL. -.sp -.SB "Pseudo instructions" -.PP -The LET, IMC and FWC pseudo's have disappeared. -The only application of these pseudo's was in postponing the -specification of the size of the local storage to just before -the END of the procedure. -A new mechanism has been introduced to handle this problem. -.ti +5 -The pseudos involved in separate compilation and linking have -been reorganized. -.ti +5 -PRO and END are altered and reflect the new calling sequence. -EOF has disappeared. -.ti +5 -BSS and HOL allow initialization of the requested data areas. -.sp 2 -Four pseudo instructions request global data: -.sp 2 - BSS ,, -.IN -Reserve bytes. - is the value used to initialize the area. - must be a multiple of the size of . - is 0 if the initialization is not strictly necessary, -1 otherwise. -.OU -.sp - HOL ,, -.IN -Idem, but all following absolute global data references will -refer to this block. -Only one HOL is allowed per procedure, -it has to be placed before the first instruction. -.OU -.sp - CON + -.IN -Assemble global data words initialized with the constants. -.OU -.sp - ROM + -.IN -Idem, but the initialized data will never be changed by the program. -.OU -.sp 2 -Two pseudo instructions partition the input into procedures: -.sp 2 - PRO [,] -.IN -Start of procedure. - is the procedure name. - is the number of bytes for locals. -The number of bytes for locals must be specified in the PRO or -END pseudo-instruction. -When specified in both, they must be identical. -.OU -.sp - END [] -.IN -End of Procedure. - is the number of bytes for locals. -The number of bytes for locals must be specified in either the PRO or -END pseudo-instruction or both. -.OU -.PP -Names of data and procedures in a EM module can either be -internal or external. -External names are known outside the module and are used to link -several pieces of a program. -Internal names are not known outside the modules they are used in. -Other modules will not 'see' an internal name. -.ti +5 -In order to reduce the number of passes needed, -it must be known at the first occurrence whether -a name is internal or external. -If the first occurrence of a name is in a definition, -the name is considered to be internal. -If the first occurrence of a name is a reference, -the name is considered to be external. -If the first occurrence is in one of the following pseudo instructions, -the effect of the pseudo has precedence. -.sp 2 - EXA -.IN -External name. - is external to this module. -Note that may be defined in the same module. -.OU -.sp - EXP -.IN -External procedure identifier. -Note that may be defined in the same module. -.OU -.sp - INA -.IN -Internal name. - is internal to this module and must be defined in this module. -.OU -.sp - INP -.IN -Internal procedure. - is internal to this module and must be defined in this module. -.OU -.sp 2 -Two other pseudo instructions provide miscellaneous features: -.sp 2 - EXC , -.IN -Two blocks of instructions preceding this one are -interchanged before being processed. - gives the number of lines of the first block. - gives the number of lines of the second one. -Blank and pure comment lines do not count. -This instruction is obsolete. Its use is strongly discouraged. -.OU -.sp - MES ,* -.IN -A special type of comment. Used by compilers to communicate with the -optimizer, assembler, etc. as follows: -.br - MES 0 - -.IN -An error has occurred, stop further processing. -.OU -.br - MES 1 - -.IN -Suppress optimization -.OU -.br - MES 2,, -.IN -Use word-size and pointer size . -.OU -.br - MES 3,,, - -.IN -Indicates that a local variable is never referenced indirectly. - is offset in bytes from LB if positive -and offset from AB if negative. - gives the size of the variable. - indicates the class of the variable. -.OU -.br - MES 4,, -.IN -Number of source lines in file (for profiler). -.OU -.br - MES 5 - -.IN -Floating point used. -.OU -.br - MES 6,* - -.IN -Comment. Used to provide comments in compact assembly language (see below). -.OU -.sp 1 -Each back end is free to skip irrelevant MES pseudos. -.OU -.SB "The Compact Assembly Language" -.PP -The assembler accepts input in a highly encoded form. This -form is intended to reduce the amount of file transport between the compiler -and assembler, and also reduce the amount of storage required for storing -libraries. -Libraries are stored as archived compact assembly language, not machine language. -.PP -When beginning to read the input, the assembler is in neutral state, and -expects either a label or an instruction (including the pseudoinstructions). -The meaning of the next byte(s) when in neutral state is as follows, where b1, b2 -etc. represent the succeeding bytes. -.sp - 0 Reserved for future use - 1-129 Machine instructions, see Appendix 2, alphabetical list - 130-149 Reserved for future use - 150-161 BSS,CON,END,EXC,EXA,EXP,HOL,INA,INP,MES,PRO,ROM - 162-179 Reserved for future pseudoinstructions - 180-239 Instruction labels 0 - 59 (180 is local label 0 etc.) - 240-244 See the Common Table below - 245-255 Not used - -After a label, the assembler is back in neutral state; it can immediately -accept another label or an instruction in the very next byte. There are -no linefeeds used to separate lines. -.PP -If an opcode expects no arguments, -the assembler is back in neutral state after -reading the one byte containing the instruction number. If it has one or -more arguments (only pseudos have more than 1), the arguments follow directly, -encoded as follows: -.sp - 0-239 Offsets from -120 to 119 -.br - 240-255 See the Common Table below -.sp 2 -If an opcode has one optional argument, -a special byte is used to announce that the argument is not present. -.ce 1 -Common Table for Neutral State and Arguments -.sp -.nf - 240 b1 Instruction label b1 (Not used for branches) - 241 b1 b2 16 bit instruction label (256*b2 + b1) - 242 b1 Global label .0-.255, with b1 being the label - 243 b1 b2 Global label .0-.32767 - with 256*b2+b1 being the label - 244 Global symbol not of the form .nnn -. \" Only the previous can occur in neutral state. - 245 b1 b2 (16 bit constant) 256*b2+b1 - 246 b1 b2 b3 b4 (32 bit constant) (256*(256*(256*b4)+b3)+b2)+b1 - 247 Global label + (possibly negative) constant - 248 Procedure name (not including $) - 249 String used in CON or ROM (no quotes) - 250 Integer constant, size bytes - 251 Unsigned constant, size bytes - 252 Floating constant, size bytes - 255 Delimiter for argument lists or - indicates absence of optional argument - -.fi -.PP -The notation consists first of a length field, and then an -arbitrary string of bytes. -The length is specified by a . -.PP -.ne 8 -The pseudoinstructions fall into several categories, depending on their -arguments: -.sp - Group 1 -- EXC, BSS, HOL have a known number of arguments - Group 2 -- EXA, EXP, INA, INP start with a string - Group 3 -- CON, MES, ROM have a variable number of various things - Group 4 -- END, PRO have a trailing optional argument. - -Groups 1 and 2 -use the encoding described above. -Group 3 also uses the encoding listed above, with a byte after the -last argument to indicate the end of the list. -Group 4 uses -a byte if the trailing argument is not present. - -.ad -.fi -.sp 2 -.ne 12 -.nf -Example ASCII Example compact -(LOC = 66, BRA = 18 here): - - 2 182 - 1 181 - LOC 10 66 130 - LOC -10 66 110 - LOC 300 66 245 44 1 - BRA 19 18 139 - 300 241 44 1 - .3 242 3 - CON 4,9,*2,$foo 151 124 130 240 2 248 3 102 111 111 255 - LOC .35 66 242 35 -.fi -.nr a 0 1 -.SE "ASSEMBLY LANGUAGE INSTRUCTION LIST" -.PP -For each instruction in the list the range of operand values -in the assembly language is given. -All constants, offsets and sizes are in the range -2**31~..~2**31-1. -The column headed \fIassem\fP contains the mnemonics defined -in 4.1. -The following column indicates restrictions in the range of the operand. -Addresses have to obey the restrictions mentioned in chapter 2 - Memory -. -The size parameter of most instructions has to be a multiple -of the word size. -The classes of operands -are indicated by letters: -.ds b \fBb\fP -.ds c \fBc\fP -.ds d \fBd\fP -.ds g \fBg\fP -.ds f \fBf\fP -.ds l \fBl\fP -.ds n \fBn\fP -.ds i \fBi\fP -.ds p \fBp\fP -.ds r \fBr\fP -.ds s \fBs\fP -.ds z \fBz\fP -.ds - \fB-\fP -.nf - - \fIassem\fP constraints rationale - -\&\*c off 1-word constant -\&\*d off 2-word constant -\&\*l off local offset -\&\*g arg >= 0 global offset -\&\*f off fragment offset -\&\*n num >= 0 counter -\&\*s off > 0 object size -\&\*z off >= 0 object size -\&\*i off > 0 object size * -\&\*p pro pro identifier -\&\*b lab >= 0 label number -\&\*r num 0,1,2 register number -\&\*- no operand - -.fi -.PP -The * at the rationale for \*i indicates that the operand -can either be given as argument or on top of the stack. -If the operand has to be fetched from the stack, -it is assumed to be a word-sized unsigned integer. -.PP -Instructions that check for undefined operands and underflow or overflow -are indicated by (*). -.nf - -GROUP 1 - LOAD - - LOC \*c : Load constant (i.e. push one word onto the stack) - LDC \*d : Load double constant ( push two words ) - LOL \*l : Load word at \*l-th local (l<0) or parameter (l>=0) - LOE \*g : Load external word \*g - LIL \*l : Load word pointed to by \*l-th local or parameter - LOF \*f : Load offsetted. (top of stack + \*f yield address) - LAL \*l : Load address of local or parameter - LAE \*g : Load address of external - LXL \*n : Load lexical. (address of LB \*n static levels back) - LXA \*n : Load lexical. (address of AB \*n static levels back) - LOI \*s : Load indirect \*s bytes (address is popped from the stack) - LOS \*i : Load indirect. \*i-byte integer on top of stack gives object size - LDL \*l : Load double local or parameter (two consecutive words are stacked) - LDE \*g : Load double external (two consecutive externals are stacked) - LDF \*f : Load double offsetted (top of stack + \*f yield address) - LPI \*p : Load procedure identifier - -GROUP 2 - STORE - - STL \*l : Store local or parameter - STE \*g : Store external - SIL \*l : Store into word pointed to by \*l-th local or parameter - STF \*f : Store offsetted - STI \*s : Store indirect \*s bytes (pop address, then data) - STS \*i : Store indirect. \*i-byte integer on top of stack gives object size - SDL \*l : Store double local or parameter - SDE \*g : Store double external - SDF \*f : Store double offsetted - -GROUP 3 - INTEGER ARITHMETIC - - ADI \*i : Addition (*) - SBI \*i : Subtraction (*) - MLI \*i : Multiplication (*) - DVI \*i : Division (*) - RMI \*i : Remainder (*) - NGI \*i : Negate (two's complement) (*) - SLI \*i : Shift left (*) - SRI \*i : Shift right (*) - -GROUP 4 - UNSIGNED ARITHMETIC - - ADU \*i : Addition - SBU \*i : Subtraction - MLU \*i : Multiplication - DVU \*i : Division - RMU \*i : Remainder - SLU \*i : Shift left - SRU \*i : Shift right - -GROUP 5 - FLOATING POINT ARITHMETIC (Format not defined) - - ADF \*i : Floating add (*) - SBF \*i : Floating subtract (*) - MLF \*i : Floating multiply (*) - DVF \*i : Floating divide (*) - NGF \*i : Floating negate (*) - FIF \*i : Floating multiply and split integer and fraction part (*) - FEF \*i : Split floating number in exponent and fraction part (*) - -GROUP 6 - POINTER ARITHMETIC - - ADP \*f : Add \*c to pointer on top of stack - ADS \*i : Add \*i-byte value and pointer - SBS \*i : Subtract pointers in same fragment and push diff as size \*i integer - -GROUP 7 - INCREMENT/DECREMENT/ZERO - - INC \*- : Increment top of stack by 1 (*) - INL \*l : Increment local or parameter (*) - INE \*g : Increment external (*) - DEC \*- : Decrement top of stack by 1 (*) - DEL \*l : Decrement local or parameter (*) - DEE \*g : Decrement external (*) - ZRL \*l : Zero local or parameter - ZRE \*g : Zero external - ZRF \*i : Load a floating zero of size \*i - ZER \*i : Load \*i zero bytes - -GROUP 8 - CONVERT ( stack: source, source size, dest. size (top) ) - - CII \*- : Convert integer to integer (*) - CUI \*- : Convert unsigned to integer (*) - CFI \*- : Convert floating to integer (*) - CIF \*- : Convert integer to floating (*) - CUF \*- : Convert unsigned to floating (*) - CFF \*- : Convert floating to floating (*) - CIU \*- : Convert integer to unsigned - CUU \*- : Convert unsigned to unsigned - CFU \*- : Convert floating to unsigned - -GROUP 9 - LOGICAL - - AND \*i : Boolean and on two groups of \*i bytes - IOR \*i : Boolean inclusive or on two groups of \*i bytes - XOR \*i : Boolean exclusive or on two groups of \*i bytes - COM \*i : Complement (one's complement of top \*i bytes) - ROL \*i : Rotate left a group of \*i bytes - ROR \*i : Rotate right a group of \*i bytes - -GROUP 10 - SETS - - INN \*i : Bit test on \*i byte set (bit number on top of stack) - SET \*i : Create singleton \*i byte set with bit n on (n is top of stack) - -GROUP 11 - ARRAY - - LAR \*i : Load array element, descriptor contains integers of size \*i - SAR \*i : Store array element - AAR \*i : Load address of array element - -GROUP 12 - COMPARE - - CMI \*i : Compare \*i byte integers. Push negative, zero, positive for <, = or > - CMF \*i : Compare \*i byte reals - CMU \*i : Compare \*i byte unsigneds - CMS \*i : Compare \*i byte sets. can only be used for equality test. - CMP \*- : Compare pointers - - TLT \*- : True if less, i.e. iff top of stack < 0 - TLE \*- : True if less or equal, i.e. iff top of stack <= 0 - TEQ \*- : True if equal, i.e. iff top of stack = 0 - TNE \*- : True if not equal, i.e. iff top of stack non zero - TGE \*- : True if greater or equal, i.e. iff top of stack >= 0 - TGT \*- : True if greater, i.e. iff top of stack > 0 - -GROUP 13 - BRANCH - - BRA \*b : Branch unconditionally to label \*b - - BLT \*b : Branch less (pop 2 words, branch if top > second) - BLE \*b : Branch less or equal - BEQ \*b : Branch equal - BNE \*b : Branch not equal - BGE \*b : Branch greater or equal - BGT \*b : Branch greater - - ZLT \*b : Branch less than zero (pop 1 word, branch negative) - ZLE \*b : Branch less or equal to zero - ZEQ \*b : Branch equal zero - ZNE \*b : Branch not zero - ZGE \*b : Branch greater or equal zero - ZGT \*b : Branch greater than zero - -GROUP 14 - PROCEDURE CALL - - CAI \*- : Call procedure (procedure instance identifier on stack) - CAL \*p : Call procedure (with name \*p) - LFR \*s : Load function result - RET \*z : Return (function result consists of top \*z bytes) - -GROUP 15 - MISCELLANEOUS - - ASP \*f : Adjust the stack pointer by \*f - ASS \*i : Adjust the stack pointer by \*i-byte integer - BLM \*z : Block move \*z bytes; first pop destination addr, then source addr - BLS \*i : Block move, size is in \*i-byte integer on top of stack - CSA \*i : Case jump; address of jump table at top of stack - CSB \*i : Table lookup jump; address of jump table at top of stack - DUP \*s : Duplicate top \*s bytes - DUS \*i : Duplicate top \*i bytes - FIL \*g : File name (external 4 := \*g) - LIM \*- : Load 16 bit ignore mask - LIN \*n : Line number (external 0 := \*n) - LNI \*- : Line number increment - LOR \*r : Load register (0=LB, 1=SP, 2=HP) - MON \*- : Monitor call - NOP \*- : No operation - RCK \*i : Range check; trap on error - RTT \*- : Return from trap - SIG \*- : Trap errors to proc nr on top of stack (-2 resets default). Static - link of procedure is below procedure number. Old values returned - SIM \*- : Store 16 bit ignore mask - STR \*r : Store register (0=LB, 1=SP, 2=HP) - TRP \*- : Cause trap to occur (Error number on stack) -.fi diff --git a/doc/em/app.codes.nr b/doc/em/app.codes.nr deleted file mode 100644 index 256e8b294..000000000 --- a/doc/em/app.codes.nr +++ /dev/null @@ -1,153 +0,0 @@ -.bp -.AP "EM CODE TABLES" -The following table is used by the assembler for EM machine -language. -It specifies the opcodes used for each instruction and -how arguments are mapped to machine language arguments. -The table is presented in three columns, -each line in each column contains three or four fields. -Each line describes a range of interpreter opcodes by -specifying for which instruction the range is used, the type of the -opcodes (mini, shortie, etc..) and range for the instruction -argument. -.QQ -The first field on each line gives the EM instruction mnemonic, -the second field gives some flags. -If the opcodes are minis or shorties the third field specifies -how many minis/shorties are used. -The last field gives the number of the (first) interpreter -opcode. -.LP -Flags : -.IP "" -Opcode type, only one of the following may be specified. -.RS -.IP \- -opcode without argument -.IP m -mini -.IP s -shortie -.IP 2 -opcode with 2-byte signed argument -.IP 4 -opcode with 4-byte signed argument -.IP 8 -opcode with 8-byte signed argument -.IP u -opcode with 2-byte unsigned argument -.RE -.IP "" -Secondary (escaped) opcodes. -.RS -.IP e -The opcode thus marked is in the secondary opcode group instead -of the primary -.RE -.IP "" -restrictions on arguments -.RS -.IP N -Negative arguments only -.IP P -Positive and zero arguments only -.RE -.IP "" -mapping of arguments -.RS -.IP w -argument must be divisible by the wordsize and is divided by the -wordsize before use as opcode argument. -.IP o -argument ( possibly after division ) must be >= 1 and is -decremented before use as opcode argument -.RE -.LP -If the opcode type is 2,4 or 8 the resulting argument is used as -opcode argument (least significant byte first). -If the opcode type is mini, the argument is added -to the first opcode \- if in range \- . -If the argument is negative, the absolute value minus one is -used in the algorithm above. -.br -For shorties with positive arguments the first opcode is used -for arguments in the range 0..255, the second for the range -256..511, etc.. -For shorties with negative arguments the first opcode is used -for arguments in the range \-1..\-256, the second for the range -\-257..\-512, etc.. -The byte following the opcode contains the least significant -byte of the argument. -First some examples of these specifications. -.IP "aar mwPo 1 34" -.br -Indicates that opcode 34 is used as a mini for Positive -instruction arguments only. -The w and o indicate division and decrementing of the -instruction argument. -Because the resulting argument must be zero ( only opcode 34 may be used), -this mini can only be used for instruction argument 2. -Conclusion: opcode 34 is for "AAR 2". -.IP "adp sP 1 41" -.br -Opcode 41 is used as shortie for ADP with arguments in the range -0..255. -.IP "bra sN 2 60" -.br -Opcode 60 is used as shortie for BRA with arguments \-1..\-256, -61 is used for arguments \-257..\-512. -.IP "zer e\- 145" -.br -Escaped opcode 145 is used for ZER. -.LP -The interpreter opcode table: -.DS -.so itables -.DE -.PP -The table above results in the following dispatch tables. -Dispatch tables are used by interpreters to jump to the -routines implementing the EM instructions, indexed by the next opcode. -Each line of the dispatch tables gives the routine names -of eight consecutive opcodes, preceded by the first opcode number -on that line. -Routine names consist of an EM mnemonic followed by a suffix. -The suffices show the encoding used for each opcode. -.LP -The following suffices exist: -.TS -tab(:); -l l. -.z:no arguments -.l:16-bit argument -.L:32-bit argument -.u:16-bit unsigned argument -.lw:16-bit argument divided by the wordsize -.Lw:32-bit argument divided by the wordsize -.p:positive 16-bit argument -.P:positive 32-bit argument -.pw:positive 16-bit argument divided by the wordsize -.Pw:positive 32-bit argument divided by the wordsize -.n:negative 16-bit argument -.N:negative 32-bit argument -.nw:negative 16-bit argument divided by the wordsize -.Nw:negative 32-bit argument divided by the wordsize -.s:shortie with as high order argument byte -.w:shortie with argument divided by the wordsize -.:mini with as argument -.W:mini with *wordsize as argument -.TE -.LP - is a possibly negative integer. -.LP -The dispatch table for the 256 primary opcodes: -.sp 1 -.so dispat1 -.sp 2 -The list of secondary opcodes (escape1): -.sp 1 -.so dispat2 -.sp 2 -Finally, the list of opcodes with four byte arguments (escape2). -.sp 1 -.so dispat3 diff --git a/doc/em/app.exam.nr b/doc/em/app.exam.nr deleted file mode 100644 index 3080d6ad8..000000000 --- a/doc/em/app.exam.nr +++ /dev/null @@ -1,275 +0,0 @@ -.bp -.AP "AN EXAMPLE PROGRAM" -.PP -.na -.ta 4n 8n 12n 16n 20n -.nf - 1 program example(output); - 2 {This program just demonstrates typical EM code.} - 3 type rec = record r1: integer; r2:real; r3: boolean end; - 4 var mi: integer; mx:real; r:rec; - 5 - 6 function sum(a,b:integer):integer; - 7 begin - 8 sum := a + b - 9 end; -10 -11 procedure test(var r: rec); -12 label 1; -13 var i,j: integer; -14 x,y: real; -15 b: boolean; -16 c: char; -17 a: array[1..100] of integer; -18 -19 begin -20 j := 1; -21 i := 3 * j + 6; -22 x := 4.8; -23 y := x/0.5; -24 b := true; -25 c := 'z'; -26 for i:= 1 to 100 do a[i] := i * i; -27 r.r1 := j+27; -28 r.r3 := b; -29 r.r2 := x+y; -30 i := sum(r.r1, a[j]); -31 while i > 0 do begin j := j + r.r1; i := i - 1 end; -32 with r do begin r3 := b; r2 := x+y; r1 := 0 end; -33 goto 1; -34 1: writeln(j, i:6, x:9:3, b) -35 end; {test} -36 begin {main program} -37 mx := 15.96; -38 mi := 99; -39 test(r) -40 end. -.fi -.ad -.bp -The EM code as produced by the Pascal-VU compiler is given below. Comments -have been added manually. Note that this code has already been optimized. -.LP -.na -.nf -.ta 1n 24n - mes 2,2,2 ; wordsize 2, pointersize 2 -\&.1 - rom 't.p\e000' ; the name of the source file - hol 552,\-32768,0 ; externals and buf occupy 552 bytes - exp $sum ; sum can be called from other modules - pro $sum,2 ; procedure sum ; 2 bytes local storage - lin 8 ; code from source line 8 - ldl 0 ; load two locals ( a and b ) - adi 2 ; add them - ret 2 ; return the result - end 2 ; end of procedure ( still two bytes local storage ) -\&.2 - rom 1,99,2 ; descriptor of array a[] - exp $test ; the compiler exports all level 0 procedures - pro $test,226 ; procedure test, 226 bytes local storage -\&.3 - rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in -\&.4 ; global storage - rom 0.5F8 ; same for 0.5 - mes 3,\-226,2,2 ; compiler temporary not referenced by address - mes 3,\-24,2,0 ; the same is true for i, j, b and c in test - mes 3,\-22,2,0 - mes 3,\-4,2,0 - mes 3,\-2,2,0 - mes 3,\-20,8,0 ; and for x and y - mes 3,\-12,8,0 - lin 20 ; maintain source line number - loc 1 - stl \-4 ; j := 1 - lni ; lin 21 prior to optimization - lol \-4 - loc 3 - mli 2 - loc 6 - adi 2 - stl \-2 ; i := 3 * j + 6 - lni ; lin 22 prior to optimization - lae .3 - loi 8 - lal \-12 - sti 8 ; x := 4.8 - lni ; lin 23 prior to optimization - lal \-12 - loi 8 - lae .4 - loi 8 - dvf 8 - lal \-20 - sti 8 ; y := x / 0.5 - lni ; lin 24 prior to optimization - loc 1 - stl \-22 ; b := true - lni ; lin 25 prior to optimization - loc 122 - stl \-24 ; c := 'z' - lni ; lin 26 prior to optimization - loc 1 - stl \-2 ; for i:= 1 -2 - lol \-2 - dup 2 - mli 2 ; i*i - lal \-224 - lol \-2 - lae .2 - sar 2 ; a[i] := - lol \-2 - loc 100 - beq *3 ; to 100 do - inl \-2 ; increment i and loop - bra *2 -3 - lin 27 - lol \-4 - loc 27 - adi 2 ; j + 27 - sil 0 ; r.r1 := - lni ; lin 28 prior to optimization - lol \-22 ; b - lol 0 - stf 10 ; r.r3 := - lni ; lin 29 prior to optimization - lal \-20 - loi 16 - adf 8 ; x + y - lol 0 - adp 2 - sti 8 ; r.r2 := - lni ; lin 30 prior to optimization - lal \-224 - lol \-4 - lae .2 - lar 2 ; a[j] - lil 0 ; r.r1 - cal $sum ; call now - asp 4 ; remove parameters from stack - lfr 2 ; get function result - stl \-2 ; i := -4 - lin 31 - lol \-2 - zle *5 ; while i > 0 do - lol \-4 - lil 0 - adi 2 - stl \-4 ; j := j + r.r1 - del \-2 ; i := i - 1 - bra *4 ; loop -5 - lin 32 - lol 0 - stl \-226 ; make copy of address of r - lol \-22 - lol \-226 - stf 10 ; r3 := b - lal \-20 - loi 16 - adf 8 - lol \-226 - adp 2 - sti 8 ; r2 := x + y - loc 0 - sil \-226 ; r1 := 0 - lin 34 ; note the absence of the unnecessary jump - lae 22 ; address of output structure - lol \-4 - cal $_wri ; write integer with default width - asp 4 ; pop parameters - lae 22 - lol \-2 - loc 6 - cal $_wsi ; write integer width 6 - asp 6 - lae 22 - lal \-12 - loi 8 - loc 9 - loc 3 - cal $_wrf ; write fixed format real, width 9, precision 3 - asp 14 - lae 22 - lol \-22 - cal $_wrb ; write boolean, default width - asp 4 - lae 22 - cal $_wln ; writeln - asp 2 - ret 0 ; return, no result - end 226 - exp $_main - pro $_main,0 ; main program -\&.6 - con 2,\-1,22 ; description of external files -\&.5 - rom 15.96F8 - fil .1 ; maintain source file name - lae .6 ; description of external files - lae 0 ; base of hol area to relocate buffer addresses - cal $_ini ; initialize files, etc... - asp 4 - lin 37 - lae .5 - loi 8 - lae 2 - sti 8 ; mx := 15.96 - lni ; lin 38 prior to optimization - loc 99 - ste 0 ; mi := 99 - lni ; lin 39 prior to optimization - lae 10 ; address of r - cal $test - asp 2 - loc 0 ; normal exit - cal $_hlt ; cleanup and finish - asp 2 - end 0 - mes 5 ; reals were used -.fi -.ad -.PP -The compact code corresponding to the above program is listed below. -Read it horizontally, line by line, not column by column. -Each number represents a byte of compact code, printed in decimal. -The first two bytes form the magic word. -.LP -.Dr 33 - 173 0 159 122 122 122 255 242 1 161 250 124 116 46 112 0 - 255 156 245 40 2 245 0 128 120 155 249 123 115 117 109 160 - 249 123 115 117 109 122 67 128 63 120 3 122 88 122 152 122 - 242 2 161 121 219 122 255 155 249 124 116 101 115 116 160 249 - 124 116 101 115 116 245 226 0 242 3 161 253 128 123 52 46 - 56 255 242 4 161 253 128 123 48 46 53 255 159 123 245 30 - 255 122 122 255 159 123 96 122 120 255 159 123 98 122 120 255 - 159 123 116 122 120 255 159 123 118 122 120 255 159 123 100 128 - 120 255 159 123 108 128 120 255 67 140 69 121 113 116 68 73 - 116 69 123 81 122 69 126 3 122 113 118 68 57 242 3 72 - 128 58 108 112 128 68 58 108 72 128 57 242 4 72 128 44 - 128 58 100 112 128 68 69 121 113 98 68 69 245 122 0 113 - 96 68 69 121 113 118 182 73 118 42 122 81 122 58 245 32 - 255 73 118 57 242 2 94 122 73 118 69 220 10 123 54 118 - 18 122 183 67 147 73 116 69 147 3 122 104 120 68 73 98 - 73 120 111 130 68 58 100 72 136 2 128 73 120 4 122 112 - 128 68 58 245 32 255 73 116 57 242 2 59 122 65 120 20 - 249 123 115 117 109 8 124 64 122 113 118 184 67 151 73 118 - 128 125 73 116 65 120 3 122 113 116 41 118 18 124 185 67 - 152 73 120 113 245 30 255 73 98 73 245 30 255 111 130 58 - 100 72 136 2 128 73 245 30 255 4 122 112 128 69 120 104 - 245 30 255 67 154 57 142 73 116 20 249 124 95 119 114 105 - 8 124 57 142 73 118 69 126 20 249 124 95 119 115 105 8 - 126 57 142 58 108 72 128 69 129 69 123 20 249 124 95 119 - 114 102 8 134 57 142 73 98 20 249 124 95 119 114 98 8 - 124 57 142 20 249 124 95 119 108 110 8 122 88 120 152 245 - 226 0 155 249 125 95 109 97 105 110 160 249 125 95 109 97 - 105 110 120 242 6 151 122 119 142 255 242 5 161 253 128 125 - 49 53 46 57 54 255 50 242 1 57 242 6 57 120 20 249 - 124 95 105 110 105 8 124 67 157 57 242 5 72 128 57 122 - 112 128 68 69 219 110 120 68 57 130 20 249 124 116 101 115 - 116 8 122 69 120 20 249 124 95 104 108 116 8 122 152 120 - 159 124 160 255 159 125 255 -.De diff --git a/doc/em/app.int.nr b/doc/em/app.int.nr deleted file mode 100644 index 26dd3a7c6..000000000 --- a/doc/em/app.int.nr +++ /dev/null @@ -1,11 +0,0 @@ -.BP -.AP "EM INTERPRETER" -.nf -.ft CW -.lg 0 -.nr x \w' ' -.ta \nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu -.so em.i -.ft P -.lg 1 -.fi diff --git a/doc/em/app.nr b/doc/em/app.nr deleted file mode 100644 index 78e082fcc..000000000 --- a/doc/em/app.nr +++ /dev/null @@ -1,488 +0,0 @@ -.BP -.AP "EM INTERPRETER" -.nf -.ta 8 16 24 32 40 48 56 64 72 80 -.so em.i -.fi -.BP -.AP "EM CODE TABLES" -The following table is used by the assembler for EM machine -language. -It specifies the opcodes used for each instruction and -how arguments are mapped to machine language arguments. -The table is presented in three columns, -each line in each column contains three or four fields. -Each line describes a range of interpreter opcodes by -specifying for which instruction the range is used, the type of the -opcodes (mini, shortie, etc..) and range for the instruction -argument. -.A -The first field on each line gives the EM instruction mnemonic, -the second field gives some flags. -If the opcodes are minis or shorties the third field specifies -how many minis/shorties are used. -The last field gives the number of the (first) interpreter -opcode. -.N 1 -Flags : -.IS 3 -.N 1 -Opcode type, only one of the following may be specified. -.PS - 5 " " -.PT - -opcode without argument -.PT m -mini -.PT s -shortie -.PT 2 -opcode with 2-byte signed argument -.PT 4 -opcode with 4-byte signed argument -.PT 8 -opcode with 8-byte signed argument -.PE -Secondary (escaped) opcodes. -.PS - 5 " " -.PT e -The opcode thus marked is in the secondary opcode group instead -of the primary -.PE -restrictions on arguments -.PS - 5 " " -.PT N -Negative arguments only -.PT P -Positive and zero arguments only -.PE -mapping of arguments -.PS - 5 " " -.PT w -argument must be divisible by the wordsize and is divided by the -wordsize before use as opcode argument. -.PT o -argument ( possibly after division ) must be >= 1 and is -decremented before use as opcode argument -.PE -.IE -If the opcode type is 2,4 or 8 the resulting argument is used as -opcode argument (least significant byte first). -.N -If the opcode type is mini, the argument is added -to the first opcode - if in range - . -If the argument is negative, the absolute value minus one is -used in the algorithm above. -.N -For shorties with positive arguments the first opcode is used -for arguments in the range 0..255, the second for the range -256..511, etc.. -For shorties with negative arguments the first opcode is used -for arguments in the range -1..-256, the second for the range --257..-512, etc.. -The byte following the opcode contains the least significant -byte of the argument. -First some examples of these specifications. -.PS - 5 -.PT "aar mwPo 1 34" -Indicates that opcode 34 is used as a mini for Positive -instruction arguments only. -The w and o indicate division and decrementing of the -instruction argument. -Because the resulting argument must be zero ( only opcode 34 may be used -), this mini can only be used for instruction argument 2. -Conclusion: opcode 34 is for "AAR 2". -.PT "adp sP 1 41" -Opcode 41 is used as shortie for ADP with arguments in the range -0..255. -.PT "bra sN 2 60" -Opcode 60 is used as shortie for BRA with arguments -1..-256, -61 is used for arguments -257..-512. -.PT "zer e- 145" -Escaped opcode 145 is used for ZER. -.PE -The interpreter opcode table: -.N 1 -.IS 3 -.DS B -.so itables -.DE 0 -.IE -.P -The table above results in the following dispatch tables. -Dispatch tables are used by interpreters to jump to the -routines implementing the EM instructions, indexed by the next opcode. -Each line of the dispatch tables gives the routine names -of eight consecutive opcodes, preceded by the first opcode number -on that line. -Routine names consist of an EM mnemonic followed by a suffix. -The suffices show the encoding used for each opcode. -.N -The following suffices exist: -.N 1 -.VS 1 0 -.IS 4 -.PS - 11 -.PT .z -no arguments -.PT .l -16-bit argument -.PT .lw -16-bit argument divided by the wordsize -.PT .p -positive 16-bit argument -.PT .pw -positive 16-bit argument divided by the wordsize -.PT .n -negative 16-bit argument -.PT .nw -negative 16-bit argument divided by the wordsize -.PT .s -shortie with as high order argument byte -.PT .sw -shortie with argument divided by the wordsize -.PT . -mini with as argument -.PT .W -mini with *wordsize as argument -.PE 3 - is a possibly negative integer. -.VS 1 1 -.IE -The dispatch table for the 256 primary opcodes: -.DS B - 0 loc.0 loc.1 loc.2 loc.3 loc.4 loc.5 loc.6 loc.7 - 8 loc.8 loc.9 loc.10 loc.11 loc.12 loc.13 loc.14 loc.15 - 16 loc.16 loc.17 loc.18 loc.19 loc.20 loc.21 loc.22 loc.23 - 24 loc.24 loc.25 loc.26 loc.27 loc.28 loc.29 loc.30 loc.31 - 32 loc.32 loc.33 aar.1W adf.s0 adi.1W adi.2W adp.l adp.1 - 40 adp.2 adp.s0 adp.s-1 ads.1W and.1W asp.1W asp.2W asp.3W - 48 asp.4W asp.5W asp.w0 beq.l beq.s0 bge.s0 bgt.s0 ble.s0 - 56 blm.s0 blt.s0 bne.s0 bra.l bra.s-1 bra.s-2 bra.s0 bra.s1 - 64 cal.1 cal.2 cal.3 cal.4 cal.5 cal.6 cal.7 cal.8 - 72 cal.9 cal.10 cal.11 cal.12 cal.13 cal.14 cal.15 cal.16 - 80 cal.17 cal.18 cal.19 cal.20 cal.21 cal.22 cal.23 cal.24 - 88 cal.25 cal.26 cal.27 cal.28 cal.s0 cff.z cif.z cii.z - 96 cmf.s0 cmi.1W cmi.2W cmp.z cms.s0 csa.1W csb.1W dec.z - 104 dee.w0 del.w-1 dup.1W dvf.s0 dvi.1W fil.l inc.z ine.lw - 112 ine.w0 inl.-1W inl.-2W inl.-3W inl.w-1 inn.s0 ior.1W ior.s0 - 120 lae.l lae.w0 lae.w1 lae.w2 lae.w3 lae.w4 lae.w5 lae.w6 - 128 lal.p lal.n lal.0 lal.-1 lal.w0 lal.w-1 lal.w-2 lar.W - 136 ldc.0 lde.lw lde.w0 ldl.0 ldl.w-1 lfr.1W lfr.2W lfr.s0 - 144 lil.w-1 lil.w0 lil.0 lil.1W lin.l lin.s0 lni.z loc.l - 152 loc.-1 loc.s0 loc.s-1 loe.lw loe.w0 loe.w1 loe.w2 loe.w3 - 160 loe.w4 lof.l lof.1W lof.2W lof.3W lof.4W lof.s0 loi.l - 168 loi.1 loi.1W loi.2W loi.3W loi.4W loi.s0 lol.pw lol.nw - 176 lol.0 lol.1W lol.2W lol.3W lol.-1W lol.-2W lol.-3W lol.-4W - 184 lol.-5W lol.-6W lol.-7W lol.-8W lol.w0 lol.w-1 lxa.1 lxl.1 - 192 lxl.2 mlf.s0 mli.1W mli.2W rck.1W ret.0 ret.1W ret.s0 - 200 rmi.1W sar.1W sbf.s0 sbi.1W sbi.2W sdl.w-1 set.s0 sil.w-1 - 208 sil.w0 sli.1W ste.lw ste.w0 ste.w1 ste.w2 stf.l stf.W - 216 stf.2W stf.s0 sti.1 sti.1W sti.2W sti.3W sti.4W sti.s0 - 224 stl.pw stl.nw stl.0 stl.1W stl.-1W stl.-2W stl.-3W stl.-4W - 232 stl.-5W stl.w-1 teq.z tgt.z tlt.z tne.z zeq.l zeq.s0 - 240 zeq.s1 zer.s0 zge.s0 zgt.s0 zle.s0 zlt.s0 zne.s0 zne.s-1 - 248 zre.lw zre.w0 zrl.-1W zrl.-2W zrl.w-1 zrl.nw escape1 escape2 -.DE 2 -The list of secondary opcodes (escape1): -.N 1 -.DS B - 0 aar.l aar.z adf.l adf.z adi.l adi.z ads.l ads.z - 8 adu.l adu.z and.l and.z asp.lw ass.l ass.z bge.l - 16 bgt.l ble.l blm.l bls.l bls.z blt.l bne.l cai.z - 24 cal.l cfi.z cfu.z ciu.z cmf.l cmf.z cmi.l cmi.z - 32 cms.l cms.z cmu.l cmu.z com.l com.z csa.l csa.z - 40 csb.l csb.z cuf.z cui.z cuu.z dee.lw del.pw del.nw - 48 dup.l dus.l dus.z dvf.l dvf.z dvi.l dvi.z dvu.l - 56 dvu.z fef.l fef.z fif.l fif.z inl.pw inl.nw inn.l - 64 inn.z ior.l ior.z lar.l lar.z ldc.l ldf.l ldl.pw - 72 ldl.nw lfr.l lil.pw lil.nw lim.z los.l los.z lor.s0 - 80 lpi.l lxa.l lxl.l mlf.l mlf.z mli.l mli.z mlu.l - 88 mlu.z mon.z ngf.l ngf.z ngi.l ngi.z nop.z rck.l - 96 rck.z ret.l rmi.l rmi.z rmu.l rmu.z rol.l rol.z - 104 ror.l ror.z rtt.z sar.l sar.z sbf.l sbf.z sbi.l - 112 sbi.z sbs.l sbs.z sbu.l sbu.z sde.l sdf.l sdl.pw - 120 sdl.nw set.l set.z sig.z sil.pw sil.nw sim.z sli.l - 128 sli.z slu.l slu.z sri.l sri.z sru.l sru.z sti.l - 136 sts.l sts.z str.s0 tge.z tle.z trp.z xor.l xor.z - 144 zer.l zer.z zge.l zgt.l zle.l zlt.l zne.l zrf.l - 152 zrf.z zrl.pw dch.z exg.s0 exg.l exg.z lpb.z gto.l -.DE 2 -Finally, the list of opcodes with four byte arguments (escape2). -.DS - - 0 loc -.DE 0 -.BP -.AP "AN EXAMPLE PROGRAM" -.DS B - 1 program example(output); - 2 {This program just demonstrates typical EM code.} - 3 type rec = record r1: integer; r2:real; r3: boolean end; - 4 var mi: integer; mx:real; r:rec; - 5 - 6 function sum(a,b:integer):integer; - 7 begin - 8 sum := a + b - 9 end; -10 -11 procedure test(var r: rec); -12 label 1; -13 var i,j: integer; -14 x,y: real; -15 b: boolean; -16 c: char; -17 a: array[1..100] of integer; -18 -19 begin -20 j := 1; -21 i := 3 * j + 6; -22 x := 4.8; -23 y := x/0.5; -24 b := true; -25 c := 'z'; -26 for i:= 1 to 100 do a[i] := i * i; -27 r.r1 := j+27; -28 r.r3 := b; -29 r.r2 := x+y; -30 i := sum(r.r1, a[j]); -31 while i > 0 do begin j := j + r.r1; i := i - 1 end; -32 with r do begin r3 := b; r2 := x+y; r1 := 0 end; -33 goto 1; -34 1: writeln(j, i:6, x:9:3, b) -35 end; {test} -36 begin {main program} -37 mx := 15.96; -38 mi := 99; -39 test(r) -40 end. -.DE 0 -.BP -The EM code as produced by the Pascal-VU compiler is given below. Comments -have been added manually. Note that this code has already been optimized. -.DS B - mes 2,2,2 ; wordsize 2, pointersize 2 - .1 - rom 't.p\e000' ; the name of the source file - hol 552,-32768,0 ; externals and buf occupy 552 bytes - exp $sum ; sum can be called from other modules - pro $sum,2 ; procedure sum; 2 bytes local storage - lin 8 ; code from source line 8 - ldl 0 ; load two locals ( a and b ) - adi 2 ; add them - ret 2 ; return the result - end 2 ; end of procedure ( still two bytes local storage ) - .2 - rom 1,99,2 ; descriptor of array a[] - exp $test ; the compiler exports all level 0 procedures - pro $test,226 ; procedure test, 226 bytes local storage - .3 - rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in - .4 ; global storage - rom 0.5F8 ; same for 0.5 - mes 3,-226,2,2 ; compiler temporary not referenced by address - mes 3,-24,2,0 ; the same is true for i, j, b and c in test - mes 3,-22,2,0 - mes 3,-4,2,0 - mes 3,-2,2,0 - mes 3,-20,8,0 ; and for x and y - mes 3,-12,8,0 - lin 20 ; maintain source line number - loc 1 - stl -4 ; j := 1 - lni ; lin 21 prior to optimization - lol -4 - loc 3 - mli 2 - loc 6 - adi 2 - stl -2 ; i := 3 * j + 6 - lni ; lin 22 prior to optimization - lae .3 - loi 8 - lal -12 - sti 8 ; x := 4.8 - lni ; lin 23 prior to optimization - lal -12 - loi 8 - lae .4 - loi 8 - dvf 8 - lal -20 - sti 8 ; y := x / 0.5 - lni ; lin 24 prior to optimization - loc 1 - stl -22 ; b := true - lni ; lin 25 prior to optimization - loc 122 - stl -24 ; c := 'z' - lni ; lin 26 prior to optimization - loc 1 - stl -2 ; for i:= 1 - 2 - lol -2 - dup 2 - mli 2 ; i*i - lal -224 - lol -2 - lae .2 - sar 2 ; a[i] := - lol -2 - loc 100 - beq *3 ; to 100 do - inl -2 ; increment i and loop - bra *2 - 3 - lin 27 - lol -4 - loc 27 - adi 2 ; j + 27 - sil 0 ; r.r1 := - lni ; lin 28 prior to optimization - lol -22 ; b - lol 0 - stf 10 ; r.r3 := - lni ; lin 29 prior to optimization - lal -20 - loi 16 - adf 8 ; x + y - lol 0 - adp 2 - sti 8 ; r.r2 := - lni ; lin 23 prior to optimization - lal -224 - lol -4 - lae .2 - lar 2 ; a[j] - lil 0 ; r.r1 - cal $sum ; call now - asp 4 ; remove parameters from stack - lfr 2 ; get function result - stl -2 ; i := - 4 - lin 31 - lol -2 - zle *5 ; while i > 0 do - lol -4 - lil 0 - adi 2 - stl -4 ; j := j + r.r1 - del -2 ; i := i - 1 - bra *4 ; loop - 5 - lin 32 - lol 0 - stl -226 ; make copy of address of r - lol -22 - lol -226 - stf 10 ; r3 := b - lal -20 - loi 16 - adf 8 - lol -226 - adp 2 - sti 8 ; r2 := x + y - loc 0 - sil -226 ; r1 := 0 - lin 34 ; note the abscence of the unnecesary jump - lae 22 ; address of output structure - lol -4 - cal $_wri ; write integer with default width - asp 4 ; pop parameters - lae 22 - lol -2 - loc 6 - cal $_wsi ; write integer width 6 - asp 6 - lae 22 - lal -12 - loi 8 - loc 9 - loc 3 - cal $_wrf ; write fixed format real, width 9, precision 3 - asp 14 - lae 22 - lol -22 - cal $_wrb ; write boolean, default width - asp 4 - lae 22 - cal $_wln ; writeln - asp 2 - ret 0 ; return, no result - end 226 - exp $_main - pro $_main,0 ; main program - .6 - con 2,-1,22 ; description of external files - .5 - rom 15.96F8 - fil .1 ; maintain source file name - lae .6 ; description of external files - lae 0 ; base of hol area to relocate buffer addresses - cal $_ini ; initialize files, etc... - asp 4 - lin 37 - lae .5 - loi 8 - lae 2 - sti 8 ; mx := 15.96 - lni ; lin 38 prior to optimization - loc 99 - ste 0 ; mi := 99 - lni ; lin 39 prior to optimization - lae 10 ; address of r - cal $test - asp 2 - loc 0 ; normal exit - cal $_hlt ; cleanup and finish - asp 2 - end 0 - mes 5 ; reals were used -.DE 0 -The compact code corresponding to the above program is listed below. -Read it horizontally, line by line, not column by column. -Each number represents a byte of compact code, printed in decimal. -The first two bytes form the magic word. -.N 1 -.IS 3 -.DS B -173 0 159 122 122 122 255 242 1 161 250 124 116 46 112 0 -255 156 245 40 2 245 0 128 120 155 249 123 115 117 109 160 -249 123 115 117 109 122 67 128 63 120 3 122 88 122 152 122 -242 2 161 121 219 122 255 155 249 124 116 101 115 116 160 249 -124 116 101 115 116 245 226 0 242 3 161 253 128 123 52 46 - 56 255 242 4 161 253 128 123 48 46 53 255 159 123 245 30 -255 122 122 255 159 123 96 122 120 255 159 123 98 122 120 255 -159 123 116 122 120 255 159 123 118 122 120 255 159 123 100 128 -120 255 159 123 108 128 120 255 67 140 69 121 113 116 68 73 -116 69 123 81 122 69 126 3 122 113 118 68 57 242 3 72 -128 58 108 112 128 68 58 108 72 128 57 242 4 72 128 44 -128 58 100 112 128 68 69 121 113 98 68 69 245 122 0 113 - 96 68 69 121 113 118 182 73 118 42 122 81 122 58 245 32 -255 73 118 57 242 2 94 122 73 118 69 220 10 123 54 118 - 18 122 183 67 147 73 116 69 147 3 122 104 120 68 73 98 - 73 120 111 130 68 58 100 72 136 2 128 73 120 4 122 112 -128 68 58 245 32 255 73 116 57 242 2 59 122 65 120 20 -249 123 115 117 109 8 124 64 122 113 118 184 67 151 73 118 -128 125 73 116 65 120 3 122 113 116 41 118 18 124 185 67 -152 73 120 113 245 30 255 73 98 73 245 30 255 111 130 58 -100 72 136 2 128 73 245 30 255 4 122 112 128 69 120 104 -245 30 255 67 154 57 142 73 116 20 249 124 95 119 114 105 - 8 124 57 142 73 118 69 126 20 249 124 95 119 115 105 8 -126 57 142 58 108 72 128 69 129 69 123 20 249 124 95 119 -114 102 8 134 57 142 73 98 20 249 124 95 119 114 98 8 -124 57 142 20 249 124 95 119 108 110 8 122 88 120 152 245 -226 0 155 249 125 95 109 97 105 110 160 249 125 95 109 97 -105 110 120 242 6 151 122 119 142 255 242 5 161 253 128 125 - 49 53 46 57 54 255 50 242 1 57 242 6 57 120 20 249 -124 95 105 110 105 8 124 67 157 57 242 5 72 128 57 122 -112 128 68 69 219 110 120 68 57 130 20 249 124 116 101 115 -116 8 122 69 120 20 249 124 95 104 108 116 8 122 152 120 -159 124 160 255 159 125 255 -.DE 0 -.IE -.MS T A 0 -.ME -.BP -.MS B A 0 -.ME -.CT diff --git a/doc/em/assem.nr b/doc/em/assem.nr deleted file mode 100644 index 59ea75536..000000000 --- a/doc/em/assem.nr +++ /dev/null @@ -1,802 +0,0 @@ -.bp -.P1 "EM ASSEMBLY LANGUAGE" -.PP -We use two representations for assembly language programs, -one is in ASCII and the other is the compact assembly language. -The latter needs less space than the first for the same program -and therefore allows faster processing. -Our only program accepting ASCII assembly -language converts it to the compact form. -All other programs expect compact assembly input. -The first part of the chapter describes the ASCII assembly -language and its semantics. -The second part describes the syntax of the compact assembly -language. -The last part lists the EM instructions with the type of -arguments allowed and an indication of the function. -Appendix A gives a detailed description of the effect of all -instructions in the form of a Pascal program. -.P2 "ASCII assembly language" -.PP -An assembly language program consists of a series of lines, each -line may be blank, contain one (pseudo)instruction or contain one -label. -Input to the assembler is in lower case. -Upper case is used in this -document merely to distinguish keywords from the surrounding prose. -Comment is allowed at the end of each line and starts with a semicolon ";". -This kind of comment does not exist in the compact form. -.QQ -Labels must be placed all by themselves on a line and start in -column 1. -There are two kinds of labels, instruction and data labels. -Instruction labels are unsigned positive integers. -The scope of an instruction label is its procedure. -.QQ -The pseudoinstructions CON, ROM and BSS may be preceded by a -line containing a -1\-8 character data label, the first character of which is a -letter, period or underscore. -The period may only be followed by -digits, the others may be followed by letters, digits and underscores. -The use of the character "." followed by a constant, -which must be in the range 1 to 32767 (e.g. ".40") is recommended -for compiler -generated programs. -These labels are considered as a special case and handled -more efficiently in compact assembly language (see below). -Note that a data label on its own or two consecutive labels are not -allowed. -.PP -Each statement may contain an instruction mnemonic or pseudoinstruction. -These must begin in column 2 or later (not column 1) and must be followed -by a space, tab, semicolon or LF. -Everything on the line following a semicolon is -taken as a comment. -.PP -Each input file contains one module. -A module may contain many procedures, -which may be nested. -A procedure consists of -a PRO statement, a (possibly empty) -collection of instructions and pseudoinstructions and finally an END -statement. -Pseudoinstructions are also allowed between procedures. -They do not belong to a specific procedure. -.PP -All constants in EM are interpreted in the decimal base. -The ASCII assembly language accepts constant expressions -wherever constants are allowed. -The operators recognized are: +, \-, *, % and / with the usual -precedence order. -Use of the parentheses ( and ) to alter the precedence order is allowed. -.P3 "Instruction arguments" -.PP -Unlike many other assembly languages, the EM assembly -language requires all arguments of normal and pseudoinstructions -to be either a constant or an identifier, but not a combination -of these two. -There is one exception to this rule: when a data label is used -for initialization or as an instruction argument, -expressions of the form 'label+constant' and 'label-constant' -are allowed. -This makes it possible to address, for example, the -third word of a ten word BSS block -directly. -Thus LOE LABEL+4 is permitted and so is CON LABEL+3. -The resulting address is must be in the same fragment as the label. -It is not allowed to add or subtract from instruction labels or procedure -identifiers, -which certainly is not a severe restriction and greatly aids -optimization. -.PP -Instruction arguments can be constants, -data labels, data labels offsetted by a constant, instruction -labels and procedure identifiers. -The range of integers allowed depends on the instruction. -Most instructions allow only integers -(signed or unsigned) -that fit in a word. -Arguments used as offsets to pointers should fit in a -pointer-sized integer. -Finally, arguments to LDC should fit in a double-word integer. -.PP -Several instructions have two possible forms: -with an explicit argument and with an implicit argument on top of the stack. -The size of the implicit argument is the wordsize. -The implicit argument is always popped before all other operands. -For example: 'CMI 4' specifies that two four-byte signed -integers on top of the stack are to be compared. -\&'CMI' without an argument expects a wordsized integer -on top of the stack that specifies the size of the integers to -be compared. -Thus the following two sequences are equivalent: -.KS -.TS -center, tab(:) ; -l r 30 l r. -LDL:\-10:LDL:\-10 -LDL:\-14:LDL:\-14 -::LOC:4 -CMI:4:CMI: -ZEQ:*1:ZEQ:*1 -.TE -.KE -Section 11.1.6 shows the arguments allowed for each instruction. -.P3 "Pseudoinstruction arguments" -.PP -Pseudoinstruction arguments can be divided in two classes: -Initializers and others. -The following initializers are allowed: signed integer constants, -unsigned integer constants, floating-point constants, strings, -data labels, data labels offsetted by a constant, instruction -labels and procedure identifiers. -.PP -Constant initializers in BSS, HOL, CON and ROM pseudoinstructions -can be followed by a letter I, U or F. -This indicator -specifies the type of the initializer: Integer, Unsigned or Float. -If no indicator is present I is assumed. -The size of the initializer is the wordsize unless -the indicator is followed by an integer specifying the -initializer's size. -This integer is governed by the same restrictions as for -transfer of objects to/from memory. -As in instruction arguments, initializers include expressions of the form: -\&"LABEL+offset" and "LABEL\-offset". -The offset must be an unsigned decimal constant. -The 'IUF' indicators cannot be used in the offsets. -.PP -Data labels are referred to by their name. -.PP -Strings are surrounded by double quotes ("). -Semicolon's in string do not indicate the start of comment. -In the ASCII representation the escape character \e (backslash) -alters the meaning of subsequent character(s). -This feature allows inclusion of zeroes, graphic characters and -the double quote in the string. -The following escape sequences exist: -.TS -center, tab(:); -l l l. -newline:NL\|(LF):\en -horizontal tab:HT:\et -backspace:BS:\eb -carriage return:CR:\er -form feed:FF:\ef -backslash:\e:\e\e -double quote:":\e" -bit pattern:\fBddd\fP:\e\fBddd\fP -.TE -The escape \fB\eddd\fP consists of the backslash followed by 1, -2, or 3 octal digits specifying the value of -the desired character. -If the character following a backslash is not one of those -specified, -the backslash is ignored. -Example: CON "hello\e012\e0". -Each string element initializes a single byte. -The ASCII character set is used to map characters onto values. -.PP -Instruction labels are referred to as *1, *2, etc. in both branch -instructions and as initializers. -.PP -The notation $procname means the identifier for the procedure -with the specified name. -This identifier has the size of a pointer. -.P3 Notation -.PP -First, the notation used for the arguments, classes of -instructions and pseudoinstructions. -.DS -.TS -tab(:); -l l l. -:\&=:integer constant (current range \-2**31..2**31\-1) -:\&=:data label -:\&=: or or + or \- -:\&=:integer constant, unsigned constant, floating-point constant -:\&=:string constant (surrounded by double quotes), -:\&=:instruction label -::'*' followed by an integer in the range 0..32767. -:\&=:procedure number ('$' followed by a procedure name) -:\&=:, , or . -:\&=: or -<...>*:\&=:zero or more of <...> -<...>+:\&=:one or more of <...> -[...]:\&=:optional ... -.TE -.DE -.P3 "Pseudoinstructions" -.P4 "Storage declaration" -.PP -Initialized global data is allocated by the pseudoinstruction CON, -which needs at least one argument. -Each argument is used to allocate and initialize a number of -consecutive bytes in data memory. -The number of bytes to be allocated and the alignment depend on the type -of the argument. -For each argument, an integral number of words, -determined by the argument type, is allocated and initialized. -.PP -The pseudoinstruction ROM is the same as CON, -except that it guarantees that the initialized words -will not change during the execution of the program. -This information allows optimizers to do -certain calculations such as array indexing and -subrange checking at compile time instead -of at run time. -.PP -The pseudoinstruction BSS allocates -uninitialized global data or large blocks of data initialized -by the same value. -The first argument to this pseudo is the number -of bytes required, which must be a multiple of the wordsize. -The other arguments specify the value used for initialization and -whether the initialization is only for convenience or a strict necessity. -The pseudoinstruction HOL is similar to BSS in that it requests an -(un)initialized global data block. -Addressing of a HOL block, however, is quasi absolute. -The first byte is addressed by 0, -the second byte by 1 etc. in assembly language. -The assembler/loader adds the base address of -the HOL block to these numbers to obtain the -absolute address in the machine language. -.PP -The scope of a HOL block starts at the HOL pseudo and -ends at the next HOL pseudo or at the end of a module -whatever comes first. -Each instruction falls in the scope of at most one -HOL block, the current HOL block. -It is not allowed to have more than one HOL block per procedure. -.PP -The alignment restrictions are enforced by the -pseudoinstructions. -All initializers are aligned on a multiple of their size or the wordsize -whichever is smaller. -Strings form an exception, they are to be seen as a sequence of initializers -each for one byte, i.e. strings are not padded with zero bytes. -Switching to another type of fragment or placing a label forces -word-alignment. -There are three types of fragments in global data space: CON, ROM and -BSS/HOL. -.IP "BSS ,," -.br -Reserve bytes. - is the value used to initialize the area. - must be a multiple of the size of . - is 0 if the initialization is not strictly necessary, -1 if it is. -.IP "HOL ,," -.br -Idem, but all following absolute global data references will -refer to this block. -Only one HOL is allowed per procedure, -it has to be placed before the first instruction. -.IP "CON +" -.br -Assemble global data words initialized with the constants. -.IP "ROM +" -.br -Idem, but the initialized data will never be changed by the program. -.P4 "Partitioning" -.PP -Two pseudoinstructions partition the input into procedures: -.IP "PRO [,]" -.br -Start of procedure. - is the procedure name. - is the number of bytes for locals. -The number of bytes for locals must be specified in the PRO or -END pseudoinstruction. -When specified in both, they must be identical. -.IP "END []" -.br -End of Procedure. - is the number of bytes for locals. -The number of bytes for locals must be specified in either the PRO or -END pseudoinstruction or both. -.P4 "Visibility" -.PP -Names of data and procedures in an EM module can either be -internal or external. -External names are known outside the module and are used to link -several pieces of a program. -Internal names are not known outside the modules they are used in. -Other modules will not 'see' an internal name. -.QQ -To reduce the number of passes needed, -it must be known at the first occurrence whether -a name is internal or external. -If the first occurrence of a name is in a definition, -the name is considered to be internal. -If the first occurrence of a name is a reference, -the name is considered to be external. -If the first occurrence is in one of the following pseudoinstructions, -the effect of the pseudo has precedence. -.IP "EXA " -.br -External name. - is known, possibly defined, outside this module. -Note that may be defined in the same module. -.IP "EXP " -.br -External procedure identifier. -Note that may be defined in the same module. -.IP "INA " -.br -Internal name. - is internal to this module and must be defined in this module. -.IP "INP " -.br -Internal procedure. - is internal to this module and must be defined in this module. -.P4 "Miscellaneous" -.PP -Two other pseudoinstructions provide miscellaneous features: -.IP "EXC ," -.br -Two blocks of instructions preceding this one are -interchanged before being processed. - gives the number of lines of the first block. - gives the number of lines of the second one. -Blank and pure comment lines do not count. -This instruction is obsolete. Its use is strongly discouraged. -.IP "MES [,]*" -.br -A special type of comment. -Used by compilers to communicate with the -optimizer, assembler, etc. as follows: -.RS -.IP "MES 0" -.br -An error has occurred, stop further processing. -.IP "MES 1" -.br -Suppress optimization. -.IP "MES 2,," -.br -Use wordsize and pointer size . -.IP "MES 3,,,," -.br -Indicates that a local variable is never referenced indirectly. -Used to indicate that a register may be used for a specific -variable. - is offset in bytes from AB if positive -and offset from LB if negative. - gives the size of the variable. - indicates the class of the variable. -The following values are currently recognized: -.br -0\0\0\0The variable can be used for anything. -.br -1\0\0\0The variable is used as a loopindex. -.br -2\0\0\0The variable is used as a pointer. -.br -3\0\0\0The variable is used as a floating point number. -.br - gives the priority of the variable, -higher numbers indicate better candidates. -.IP "MES 4,," -.br -Number of source lines in file (for profiler). -.IP "MES 5" -.br -Floating point used. -.IP "MES 6,*" -.br -Comment. Used to provide comments in compact assembly language. -.IP "MES 7,....." -.br -Reserved. -.IP "MES 8,[,]..." -.br -Library module. Indicates that the module may only be loaded -if it is useful, that is, if it can satisfy any unresolved -references during the loading process. -May not be preceded by any other pseudo, except MES's. -.IP "MES 9," -.br -Guarantees that no more than bytes of parameters are -accessed, either directly or indirectly. -.IP "MES 10,[,]* -.br -This message number is reserved for the global optimizer. -It inserts these messages in its output as hints to backends. - indicates the type of hint. -.IP "MES 11" -.br -Procedures containing this message are possible destinations of -non-local goto's with the GTO instruction. -Some backends keep locals in registers, -the locals in this procedure should not be kept in registers and -all registers containing locals of other procedures should be -saved upon entry to this procedure. -.RE -.IP "" -Each backend is free to skip irrelevant MES pseudos. -.P2 "The Compact Assembly Language" -.PP -The assembler accepts input in a highly encoded form. -This -form is intended to reduce the amount of file transport between the -front ends, optimizers -and back ends, and also reduces the amount of storage required for storing -libraries. -Libraries are stored as archived compact assembly language, not machine -language. -.PP -When beginning to read the input, the assembler is in neutral state, and -expects either a label or an instruction (including the pseudoinstructions). -The meaning of the next byte(s) when in neutral state is as follows, where -b1, b2 -etc. represent the succeeding bytes. -.TS -tab(:); -rw17 4 l. -0:Reserved for future use -1\-129:Machine instructions, see Appendix A, alphabetical list -130\-149:Reserved for future use -150\-161:BSS,CON,END,EXA,EXC,EXP,HOL,INA,INP,MES,PRO,ROM -162\-179:Reserved for future pseudoinstructions -180\-239:Instruction labels 0 \- 59 (180 is local label 0 etc.) -240\-244:See the Common Table below -245\-255:Not used -.TE -After a label, the assembler is back in neutral state; it can immediately -accept another label or an instruction in the next byte. -No linefeeds are used to separate lines. -.PP -If an opcode expects no arguments, -the assembler is back in neutral state after -reading the one byte containing the instruction number. -If it has one or -more arguments (only pseudos have more than 1), the arguments follow directly, -encoded as follows: -.TS -tab(:); -r l. -0\-239:Offsets from \-120 to 119 -240\-255:See the Common Table below -.TE -Absence of an optional argument is indicated by a special -byte. -.TS -tab(:); -c s s s -c c s c -l4 l l4 l. -Common Table for Neutral State and Arguments -class:bytes:description - -:240:b1:Instruction label b1 (Not used for branches) -:241:b1 b2:16 bit instruction label (256*b2 + b1) -:242:b1:Global label .0\-.255, with b1 being the label -:243:b1 b2:Global label .0\-.32767 -:::with 256*b2+b1 being the label -:244::Global symbol not of the form .nnn -:245:b1 b2:16 bit constant -:246:b1 b2 b3 b4:32 bit constant -:247:b1 .. b8:64 bit constant -:248::Global label + (possibly negative) constant -:249::Procedure name (not including $) -:250::String used in CON or ROM (no quotes-no escapes) -:251::Integer constant, size bytes -:252::Unsigned constant, size bytes -:253::Floating constant, size bytes -:254::unused -:255::Delimiter for argument lists or -:::indicates absence of optional argument -.TE 1 -.PP -The bytes specifying the value of a 16, 32 or 64 bit constant -are presented in two's complement notation, with the least -significant byte first. For example: the value of a 32 bit -constant is ((s4*256+b3)*256+b2)*256+b1, where s4 is b4\-256 if -b4 is greater than 128 else s4 takes the value of b4. -A consists of a immediately followed by -a sequence of bytes with length . -.PP -.ne 8 -The pseudoinstructions fall into several categories, depending on their -arguments: -.DS -Group 1 \- EXC, BSS, HOL have a known number of arguments -Group 2 \- EXA, EXP, INA, INP have a string as argument -Group 3 \- CON, MES, ROM have a variable number of various things -Group 4 \- END, PRO have a trailing optional argument. -.DE -Groups 1 and 2 -use the encoding described above. -Group 3 also uses the encoding listed above, with an byte after the -last argument to indicate the end of the list. -Group 4 uses -an byte if the trailing argument is not present. -.TS -tab(|); -l s l -l s s -l 2 lw(30) l. -Example ASCII|Example compact -(LOC = 69, BRA = 18 here): - -2||182 -1||181 -\0LOC|10|69 130 -\0LOC|\-10|69 110 -\0LOC|300|69 245 44 1 -\0BRA|*19|18 139 -300||241 44 1 -.3||242 3 -\0CON|4,9,*2,$foo|151 124 129 240 2 249 123 102 111 111 255 -\0CON|.35|151 242 35 255 -.TE -.P2 "Assembly language instruction list" -.PP -For each instruction in the list the range of argument values -in the assembly language is given. -The column headed \fIassem\fP contains the mnemonics defined -in 11.1.3. -The following column specifies restrictions of the argument -value. -Addresses have to obey the restrictions mentioned in chapter 2. -The classes of arguments -are indicated by letters: -.ds b \fBb\fP -.ds c \fBc\fP -.ds d \fBd\fP -.ds g \fBg\fP -.ds f \fBf\fP -.ds l \fBl\fP -.ds n \fBn\fP -.ds w \fBw\fP -.ds p \fBp\fP -.ds r \fBr\fP -.ds s \fBs\fP -.ds z \fBz\fP -.ds o \fBo\fP -.ds - \fB\-\fP -.sp -.TS -tab(:); -c s l l -l l 15 l l. -\fIassem\fP:constraints:rationale - -\&\*c:cst:fits word:constant -\&\*d:cst:fits double word:constant -\&\*l:cst::local offset -\&\*g:arg:>= 0:global offset -\&\*f:cst::fragment offset -\&\*n:cst:>= 0:counter -\&\*s:cst:>0 , word multiple:object size -\&\*z:cst:>= 0 , zero or word multiple:object size -\&\*o:cst:> 0 , word multiple or fraction:object size -\&\*w:cst:> 0 , word multiple:object size * -\&\*p:pro::pro identifier -\&\*b:ilb:>= 0:label number -\&\*r:cst:0,1,2:register number -\&\*-:::no argument -.TE -.PP -The * at the rationale for \*w indicates that the argument -can either be given as argument or on top of the stack. -If the argument is omitted, the argument is fetched from the -stack; -it is assumed to be a wordsized unsigned integer. -Instructions that check for undefined integer or floating-point -values and underflow or overflow -are indicated below by (*). -.sp 1 -.DS -.ta 12n -GROUP 1 \- LOAD - - LOC \*c : Load constant (i.e. push one word onto the stack) - LDC \*d : Load double constant ( push two words ) - LOL \*l : Load word at \*l-th local (\*l<0) or parameter (\*l>=0) - LOE \*g : Load external word \*g - LIL \*l : Load word pointed to by \*l-th local or parameter - LOF \*f : Load offsetted (top of stack + \*f yield address) - LAL \*l : Load address of local or parameter - LAE \*g : Load address of external - LXL \*n : Load lexical (address of LB \*n static levels back) - LXA \*n : Load lexical (address of AB \*n static levels back) - LOI \*o : Load indirect \*o bytes (address is popped from the stack) - LOS \*w : Load indirect, \*w-byte integer on top of stack gives object size - LDL \*l : Load double local or parameter (two consecutive words are stacked) - LDE \*g : Load double external (two consecutive externals are stacked) - LDF \*f : Load double offsetted (top of stack + \*f yield address) - LPI \*p : Load procedure identifier -.DE - -.DS -GROUP 2 \- STORE - - STL \*l : Store local or parameter - STE \*g : Store external - SIL \*l : Store into word pointed to by \*l-th local or parameter - STF \*f : Store offsetted - STI \*o : Store indirect \*o bytes (pop address, then data) - STS \*w : Store indirect, \*w-byte integer on top of stack gives object size - SDL \*l : Store double local or parameter - SDE \*g : Store double external - SDF \*f : Store double offsetted -.DE - -.DS -GROUP 3 \- INTEGER ARITHMETIC - - ADI \*w : Addition (*) - SBI \*w : Subtraction (*) - MLI \*w : Multiplication (*) - DVI \*w : Division (*) - RMI \*w : Remainder (*) - NGI \*w : Negate (two's complement) (*) - SLI \*w : Shift left (*) - SRI \*w : Shift right (*) -.DE - -.DS -GROUP 4 \- UNSIGNED ARITHMETIC - - ADU \*w : Addition - SBU \*w : Subtraction - MLU \*w : Multiplication - DVU \*w : Division - RMU \*w : Remainder - SLU \*w : Shift left - SRU \*w : Shift right -.DE - -.DS -GROUP 5 \- FLOATING POINT ARITHMETIC - - ADF \*w : Floating add (*) - SBF \*w : Floating subtract (*) - MLF \*w : Floating multiply (*) - DVF \*w : Floating divide (*) - NGF \*w : Floating negate (*) - FIF \*w : Floating multiply and split integer and fraction part (*) - FEF \*w : Split floating number in exponent and fraction part (*) -.DE - -.DS -GROUP 6 \- POINTER ARITHMETIC - - ADP \*f : Add \*f to pointer on top of stack - ADS \*w : Add \*w-byte value and pointer - SBS \*w : Subtract pointers in same fragment and push diff as size \*w integer -.DE - -.DS -GROUP 7 \- INCREMENT/DECREMENT/ZERO - - INC \*- : Increment word on top of stack by 1 (*) - INL \*l : Increment local or parameter (*) - INE \*g : Increment external (*) - DEC \*- : Decrement word on top of stack by 1 (*) - DEL \*l : Decrement local or parameter (*) - DEE \*g : Decrement external (*) - ZRL \*l : Zero local or parameter - ZRE \*g : Zero external - ZRF \*w : Load a floating zero of size \*w - ZER \*w : Load \*w zero bytes -.DE - -.DS -GROUP 8 \- CONVERT (stack: source, source size, dest. size (top)) - - CII \*- : Convert integer to integer (*) - CUI \*- : Convert unsigned to integer (*) - CFI \*- : Convert floating to integer (*) - CIF \*- : Convert integer to floating (*) - CUF \*- : Convert unsigned to floating (*) - CFF \*- : Convert floating to floating (*) - CIU \*- : Convert integer to unsigned - CUU \*- : Convert unsigned to unsigned - CFU \*- : Convert floating to unsigned -.DE - -.DS -GROUP 9 \- LOGICAL - - AND \*w : Boolean and on two groups of \*w bytes - IOR \*w : Boolean inclusive or on two groups of \*w bytes - XOR \*w : Boolean exclusive or on two groups of \*w bytes - COM \*w : Complement (one's complement of top \*w bytes) - ROL \*w : Rotate left a group of \*w bytes - ROR \*w : Rotate right a group of \*w bytes -.DE - -.DS -GROUP 10 \- SETS - - INN \*w : Bit test on \*w byte set (bit number on top of stack) - SET \*w : Create singleton \*w byte set with bit n on (n is top of stack) -.DE - -.DS -GROUP 11 \- ARRAY - - LAR \*w : Load array element, descriptor contains integers of size \*w - SAR \*w : Store array element - AAR \*w : Load address of array element -.DE - -.DS -GROUP 12 \- COMPARE - - CMI \*w : Compare \*w byte integers, Push negative, zero, positive for <, = or > - CMF \*w : Compare \*w byte reals - CMU \*w : Compare \*w byte unsigneds - CMS \*w : Compare \*w byte values, can only be used for bit for bit equality test - CMP \*- : Compare pointers - - TLT \*- : True if less, i.e. iff top of stack < 0 - TLE \*- : True if less or equal, i.e. iff top of stack <= 0 - TEQ \*- : True if equal, i.e. iff top of stack = 0 - TNE \*- : True if not equal, i.e. iff top of stack non zero - TGE \*- : True if greater or equal, i.e. iff top of stack >= 0 - TGT \*- : True if greater, i.e. iff top of stack > 0 -.DE - -.DS -GROUP 13 \- BRANCH - - BRA \*b : Branch unconditionally to label \*b - - BLT \*b : Branch less (pop 2 words, branch if top > second) - BLE \*b : Branch less or equal - BEQ \*b : Branch equal - BNE \*b : Branch not equal - BGE \*b : Branch greater or equal - BGT \*b : Branch greater - - ZLT \*b : Branch less than zero (pop 1 word, branch negative) - ZLE \*b : Branch less or equal to zero - ZEQ \*b : Branch equal zero - ZNE \*b : Branch not zero - ZGE \*b : Branch greater or equal zero - ZGT \*b : Branch greater than zero -.DE - -.DS -GROUP 14 \- PROCEDURE CALL - - CAI \*- : Call procedure (procedure identifier on stack) - CAL \*p : Call procedure (with identifier \*p) - LFR \*s : Load function result - RET \*z : Return (function result consists of top \*z bytes) -.DE - -.DS -GROUP 15 \- MISCELLANEOUS - - ASP \*f : Adjust the stack pointer by \*f - ASS \*w : Adjust the stack pointer by \*w-byte integer - BLM \*z : Block move \*z bytes; first pop destination addr, then source addr - BLS \*w : Block move, size is in \*w-byte integer on top of stack - CSA \*w : Case jump; address of jump table at top of stack - CSB \*w : Table lookup jump; address of jump table at top of stack - DCH \*- : Follow dynamic chain, convert LB to LB of caller - DUP \*s : Duplicate top \*s bytes - DUS \*w : Duplicate top \*w bytes - EXG \*w : Exchange top \*w bytes - FIL \*g : File name (external 4 := \*g) - GTO \*g : Non-local goto, descriptor at \*g - LIM \*- : Load 16 bit ignore mask - LIN \*n : Line number (external 0 := \*n) - LNI \*- : Line number increment - LOR \*r : Load register (0=LB, 1=SP, 2=HP) - LPB \*- : Convert local base to argument base - MON \*- : Monitor call - NOP \*- : No operation - RCK \*w : Range check; trap on error - RTT \*- : Return from trap - SIG \*- : Trap errors to proc identifier on top of stack, \-2 resets default - SIM \*- : Store 16 bit ignore mask - STR \*r : Store register (0=LB, 1=SP, 2=HP) - TRP \*- : Cause trap to occur (Error number on stack) -.DE diff --git a/doc/em/cont.nr b/doc/em/cont.nr deleted file mode 100644 index e61369f64..000000000 --- a/doc/em/cont.nr +++ /dev/null @@ -1,4 +0,0 @@ -.de PT -.. -.bp -.Ct diff --git a/doc/em/descr.nr b/doc/em/descr.nr deleted file mode 100644 index 7035a6ea5..000000000 --- a/doc/em/descr.nr +++ /dev/null @@ -1,153 +0,0 @@ -.bp -.P1 "DESCRIPTORS" -.PP -Several instructions use descriptors, notably the range check instruction, -the array instructions, the goto instruction and the case jump instructions. -Descriptors reside in data space. -They may be constructed at run time, but -more often they are fixed and allocated in ROM data. -.PP -All instructions using descriptors, except GTO, have as argument -the size of the integers in the descriptor. -All implementations have to allow integers of the size of a -word in descriptors. -All integers popped from the stack and used for indexing or comparing -must have the same size as the integers in the descriptor. -.P2 "Range check descriptors" -.PP -Range check descriptors consist of two integers: -.IP 1. -lower bound signed -.IP 2. -upper bound signed -.LP -The range check instruction checks an integer on the stack against -these bounds and causes a trap if the value is outside the interval. -The value itself is neither changed nor removed from the stack. -.P2 "Array descriptors" -.PP -Each array descriptor describes a single dimension. -For multi-dimensional arrays, several array instructions are -needed to access a single element. -Array descriptors contain the following three integers: -.IP 1. -lower bound signed -.IP 2. -upper bound \- lower bound unsigned -.IP 3. -number of bytes per element unsigned -.LP -The array instructions LAR, SAR and AAR have the pointer to the start -of the descriptor as operand on the stack. -.LP -The element A[I] is fetched as follows: -.IP 1. -Stack the address of A (e.g., using LAE or LAL) -.IP 2. -Stack the value of I (n-byte integer) -.IP 3. -Stack the pointer to the descriptor (e.g., using LAE) -.IP 4. -LAR n (n is the size of the integers in the descriptor and I) -.LP -All array instructions first pop the address of the descriptor -and the index. -If the index is not within the bounds specified, a trap occurs. -If ok, (I~\-~lower bound) is multiplied -by the number of bytes per element (the third word). The result is added -to the address of A and replaces A on the stack. -.QQ -At this point LAR, SAR and AAR diverge. -AAR is finished. LAR pops the address and fetches the data -item, -the size being specified by the descriptor. -The usual restrictions for memory access must be obeyed. -SAR pops the address and stores the -data item now exposed. -.P2 "Non-local goto descriptors" -.PP -The GTO instruction provides a way of returning directly to any -active procedure invocation. -The argument of the instruction is the address of a descriptor -containing three pointers: -.IP 1. -value of PC after the jump -.IP 2. -value of SP after the jump -.IP 3. -value of LB after the jump -.LP -GTO replaces the loads PC, SP and LB from the descriptor, -thereby jumping to a procedure -and removing zero or more frames from the stack. -The LB, SP and PC in the descriptor must belong to a -dynamically enclosing procedure, -because some EM implementations will need to backtrack through -the dynamic chain and use the implementation dependent data -in frames to restore registers etc. -.P2 "Case descriptors" -.PP -The case jump instructions CSA and CSB both -provide multiway branches selected by a case index. -Both fetch two operands from the stack: -first a pointer to the low address of the case descriptor -and then the case index. -CSA uses the case index as index in the descriptor table, but CSB searches -the table for an occurrence of the case index. -Therefore, the descriptors for CSA and CSB, -as shown in figure 4, are different. -All pointers in the table must be addresses of instructions in the -procedure executing the case instruction. -.PP -CSA selects the new PC by indexing. -If the index, a signed integer, is greater than or equal to -the lower bound and less than or equal to the upper bound, -then fetch the new PC from the list of instruction pointers by indexing with -index-lower. -The table does not contain the value of the upper bound, -but the value of upper-lower as an unsigned integer. -The default instruction pointer is used when the index is out of bounds. -If the resulting PC is 0, then trap. -.PP -CSB selects the new PC by searching. -The table is searched for an entry with index value equal to the case index. -That entry or, if none is found, the default entry contains the -new PC. -When the resulting PC is 0, a trap is performed. -.PP -The choice of which case instruction to use for -each source language case statement -is up to the front end. -If the range of the index value is dense, i.e -.DS -(highest value \- lowest value) / number of cases -.DE -is less than some threshold, then CSA is the obvious choice. -If the range is sparse, CSB is better. -.Dr 30 - |--------------------| |--------------------| high address - | pointer for upb | | pointer n-1 | - |--------------------| |- - - - - - - | - | . | | index n-1 | - | . | |--------------------| - | . | | . | - | . | | . | - | . | | . | - | . | |--------------------| - | . | | pointer 1 | - |--------------------| |- - - - - - - | - | pointer for lwb+1 | | index 1 | - |--------------------| |--------------------| - | pointer for lwb | | pointer 0 | - |--------------------| |- - - - - - - | - | upper - lower | | index 0 | - |--------------------| |--------------------| - | lower bound | | number of entries | - |--------------------| |--------------------| - | default pointer | | default pointer | low address - |--------------------| |--------------------| - - CSA descriptor CSB descriptor -.Df -Figure 4. Descriptor layout for CSA and CSB -.De diff --git a/doc/em/dispat1.sed b/doc/em/dispat1.sed deleted file mode 100644 index c459211c7..000000000 --- a/doc/em/dispat1.sed +++ /dev/null @@ -1,6 +0,0 @@ -1c\ -.TS\ -r l l l l l l l l. -s/-/\\-/g -/DISPATCH2/,$c\ -.TE diff --git a/doc/em/dispat2.sed b/doc/em/dispat2.sed deleted file mode 100644 index 8955df58d..000000000 --- a/doc/em/dispat2.sed +++ /dev/null @@ -1,6 +0,0 @@ -1,/DISPATCH2/c\ -.TS\ -r l l l l l l l l. -s/-/\\-/g -/DISPATCH3/,$c\ -.TE diff --git a/doc/em/dispat3.sed b/doc/em/dispat3.sed deleted file mode 100644 index 881881ef9..000000000 --- a/doc/em/dispat3.sed +++ /dev/null @@ -1,6 +0,0 @@ -1,/DISPATCH3/c\ -.TS\ -r l l l l l l l l. -s/-/\\-/g -$a\ -.TE diff --git a/doc/em/dspace.nr b/doc/em/dspace.nr deleted file mode 100644 index 810520db9..000000000 --- a/doc/em/dspace.nr +++ /dev/null @@ -1,376 +0,0 @@ -.bp -.P1 "DATA ADDRESS SPACE" -.PP -The data address space is divided into three parts, called 'areas', -each with its own addressing method: -global data area, -local data area (including the stack), -and heap data area. -These data areas must be part of the same -address space because all data is accessed by -the same type of pointers. -.PP -Space for global data is reserved using several pseudoinstructions in the -assembly language, as described in -the next paragraph and chapter 11. -The size of the global data area is fixed per program. -.QQ -Global data is addressed absolutely in the machine language. -Many instructions are available to address global data. -They all have an absolute address as argument. -Examples are LOE, LAE and STE. -.PP -Part of the global data area is initialized by the -compiler, the -rest is not initialized at all or is initialized -with a value, typically \-32768 or 0. -Part of the initialized global data may be made read-only -if the implementation supports protection. -.PP -The local data area is used as a stack, -which grows from high to low addresses -and contains some data for each active procedure -invocation, called a 'frame'. -The size of the local data area varies dynamically during -execution. -Below the current procedure frame resides the operand stack. -The stack pointer SP always points to the bottom of -the local data area. -Local data is addressed by offsetting from the local base pointer LB. -LB always points to the frame of the current procedure. -Only the words of the current frame and the parameters -can be addressed directly. -Variables in other active procedures are addressed by following -the chain of statically enclosing procedures using the LXL or LXA instruction. -The variables in dynamically enclosing procedures can be -addressed with the use of the DCH instruction. -.QQ -Many instructions have offsets to LB as argument, -for instance LOL, LAL and STL. -The arguments of these instructions range from \-1 to some -(negative) minimum -for the access of local storage and from 0 to some (positive) -maximum for parameter access. -.PP -The procedure call instructions CAL and CAI each create a new frame -on the stack. -Each procedure has an assembly-time parameter specifying -the number of bytes needed for local storage. -This storage is allocated each time the procedure is called and -must be a multiple of the wordsize. -Each procedure, therefore, starts with a stack with the local variables -already allocated. -The return instructions RET and RTT remove a frame. -The actual parameters must be removed by the calling procedure. -.PP -RET may copy some words from the stack of -the returning procedure to an unnamed 'function return area'. -This area is available for 'READ-ONCE' access using the LFR instruction. -The result of a LFR is only defined if the size used to fetch -is identical to the size used in the last return. -The instruction ASP, used to remove the parameters from the -stack, the branch instruction BRA and the non-local goto -instruction GTO are the only ones that leave the contents of -the 'function return area' intact. -All other instructions are allowed to destroy the function -return area. -Thus parameters can be popped before fetching the function result. -The maximum size of all function return areas is -implementation dependent, -but should allow procedure instance identifiers and all -implemented objects of type integer, unsigned, float -and pointer to be returned. -In most implementations -the maximum size of the function return -area is twice the pointer size, -because we want to be able to handle 'procedure instance -identifiers' which consist of a procedure identifier and the LB -of a frame belonging to that procedure. -.PP -The heap data area grows upwards, to higher numbered -addresses. -It is initially empty. -The initial value of the heap pointer HP -marks the low end. -The heap pointer may be manipulated -by the LOR and STR instructions. -The heap can only be addressed indirectly, -by pointers derived from previous values of HP. -.P2 "Global data area" -.PP -The initial size of the global data area is determined at assembly time. -Global data is allocated by several -pseudoinstructions in the EM assembly -language. -Each pseudoinstruction allocates one or more bytes. -The bytes allocated for a single pseudo form -a 'block'. -A block differs from a fragment, because, -under certain conditions, several blocks are allocated -in a single fragment. -This guarantees that the bytes of these blocks -are consecutive. -.PP -Global data is addressed absolutely in binary -machine language. -Most compilers, however, -cannot assign absolute addresses to their global variables, -especially not if the language -allows programs to be composed of several separately compiled modules. -The assembly language therefore allows the compiler to name -the first address of a global data block with an alphanumeric label. -Moreover, the only way to address such a named global data block -in the assembly language is by using its name. -It is the task of the assembler/loader to -translate these labels into absolute addresses. -These labels may also be used -in CON and ROM pseudoinstructions to initialize pointers. -.PP -The pseudoinstruction CON allocates initialized data. -ROM acts like CON but indicates that the initialized data will -not change during execution of the program. -The pseudoinstruction BSS allocates a block of uninitialized -or identically initialized -data. -The pseudoinstruction HOL is similar to BSS, -but it alters the meaning of subsequent absolute addressing in -the assembly language. -.PP -Another type of global data is a small block, -called the ABS block, with an implementation defined size. -Storage in this type of block can only be addressed -absolutely in assembly language. -The first word has address 0 and is used to maintain the -source line number. -Special instructions LIN and LNI are provided to -update this counter. -A pointer at location 4 points to a string containing the -current source file name. -The instruction FIL can be used to update the pointer. -.PP -All numeric arguments of the instructions that address -the global data area refer to locations in the -ABS block unless -they are preceded by at least one HOL pseudo in the same -module, -in which case they refer to the storage area allocated by the -last HOL pseudoinstruction. -Thus LOE 0 loads the zeroth word of the most recent HOL, unless no HOL has -appeared in the current file so -far, in which case it loads the zeroth word of the -ABS fragment. -.PP -The global data area is highly fragmented. -The ABS block and each HOL and BSS block are separate fragments. -The way fragments are formed from CON and ROM blocks is more complex. -The assemblers group several blocks into a single fragment. -A fragment only contains blocks of the same type: CON or ROM. -It is guaranteed that the bytes allocated for two consecutive CON pseudos are -allocated consecutively in a single fragment, unless -these CON pseudos are separated in the assembly language program -by a data label definition or one or more of the following pseudos: -.DS -ROM, BSS, HOL and END -.DE -An analogous rule holds for ROM pseudos. -.P2 "Local data area" -.PP -The local data area consists of a sequence of frames, one for -each active procedure. -Below the frame of the current procedure resides the -expression stack. -Frames are generated by procedure calls and are -removed by procedure returns. -A procedure frame consists of six 'zones': -.DS -1. The return status block -2. The local variables and compiler temporaries -3. The register save block -4. The dynamic local generators -5. The operand stack. -6. The parameters of a procedure one level deeper -.DE -A sample frame is shown in Figure 1. -.PP -Before a procedure call is performed the actual -parameters are pushed onto the stack of the calling procedure. -The exact details are compiler dependent. -EM allows procedures to be called with a variable number of -parameters. -The implementation of the C-language almost forces its runtime -system to push the parameters in reverse order, that is, -the first positional parameter last. -Most compilers use the C calling convention to be compatible. -The parameters of a procedure belong to the frame of the -calling procedure. -Note that the evaluation of the actual parameters may imply -the calling of procedures. -The parameters can be accessed with certain instructions using -offsets of 0 and greater. -The first byte of the last parameter pushed has offset 0. -Note that the parameter at offset 0 has a special use in the -instructions following the static chain (LXL and LXA). -These instructions assume that this parameter contains the LB of -the statically enclosing procedure. -Procedures that do not have a dynamically enclosing procedure -do not need a static link at offset 0. -.PP -Two instructions are available to perform procedure calls, CAL -and CAI. -Several tasks are performed by these call instructions. -.QQ -First, a part of the status of the calling procedure is -saved on the stack in the return status block. -This block should contain the return address of the calling -procedure, its LB and other implementation dependent data. -The size of this block is fixed for any given implementation -because the lexical instructions LPB, LXL and LXA must be able to -obtain the base addresses of the procedure parameters \fBand\fP local -variables. -An alternative solution can be used on machines with a highly -segmented address space. -The stack frames need not be contiguous then and the first -status save area can contain the parameter base AB, -which has the value of SP just after the last parameter has -been pushed. -.QQ -Second, the LB is changed to point to the -first word above the local variables. -The new LB is a copy of the SP after the return status -block has been pushed. -.QQ -Third, the amount of local storage needed by the procedure is -reserved. -The parameters and local storage are accessed by the same instructions. -Negative offsets are used for access to local variables. -The highest byte, that is the byte nearest -to LB, has to be accessed with offset \-1. -The pseudoinstruction specifying the entry point of a -procedure, has an argument that specifies the amount of local -storage needed. -The local variables allocated by the CAI or CAL instructions -are the only ones that can be accessed with a fixed negative offset. -The initial value of the allocated words is -not defined, but implementations that check for undefined -values will probably initialize them with a -special 'undefined' pattern, typically \-32768. -.QQ -Fourth, any EM implementation is allowed to reserve a variable size -block beneath the local variables. -This block could, for example, be used to save a variable number -of registers. -.QQ -Finally, the address of the entry point of the called procedure -is loaded into the Program Counter. -.PP -The ASP instruction can be used to allocate further (dynamic) -local storage. -The base address of such storage must be obtained with a LOR~SP -instruction. -This same instruction ASP may also be used -to remove some words from the stack. -.PP -There is a version of ASP, called ASS, which fetches the number -of bytes to allocate from the stack. -It can be used to allocate space for local -objects whose size is unknown at compile time, -so called 'dynamic local generators'. -.PP -Control is returned to the calling procedure with a RET instruction. -Any return value is then copied to the 'function return area'. -The frame created by the call is deallocated and the status of -the calling procedure is restored. -The value of SP just after the return value has been popped must -be the same as the -value of SP just before executing the first instruction of this -invocation. -This means that when a RET is executed the operand stack can -only contain the return value and all dynamically generated locals must be -deallocated. -Violating this restriction might result in hard to detect -errors. -The calling procedure has to remove the parameters from the stack. -This can be done with the aforementioned ASP instruction. -.PP -Each procedure frame is a separate fragment. -Because any fragment may be placed anywhere in memory, -procedure frames need not be contiguous. -.Dr 47 - |===============================| - | actual parameter n-1 | - |-------------------------------| - | . | - | . | - | . | - |-------------------------------| - | actual parameter 0 | ( <\- AB ) - |===============================| - - - |===============================| - |///////////////////////////////| - |///// return status block /////| - |///////////////////////////////| <\- LB - |===============================| - | | - | local variables | - | | - |-------------------------------| - | | - | compiler temporaries | - | | - |===============================| - |///////////////////////////////| - |///// register save block /////| - |///////////////////////////////| - |===============================| - | | - | dynamic local generators | - | | - |===============================| - | operand | - |-------------------------------| - | operand | - |===============================| - | parameter m-1 | - |-------------------------------| - | . | - | . | - | . | - |-------------------------------| - | parameter 0 | <\- SP - |===============================| -.Df -Figure 1. A sample procedure frame and parameters. -.De -.P2 "Heap data area" -.PP -The heap area starts empty, with HP -pointing to the low end of it. -HP always contains a word address. -A copy of HP can always be obtained with the LOR instruction. -A new value may be stored in the heap pointer using the STR instruction. -If the new value is greater than the old one, -then the heap grows. -If it is smaller, then the heap shrinks. -HP may never point below its original value. -All words between the current HP and the original HP -are allocated to the heap. -The heap may not grow into a part of memory that is already allocated. -When this is attempted, the STR instruction will cause a trap to occur. -In this case, HP retains its old value. -.PP -The only way to address the heap is indirectly. -Whenever an object is allocated by increasing HP, -then the old HP value must be saved and can be used later to address -the allocated object. -If, in the meantime, HP is decreased so that the object -is no longer part of the heap, then an attempt to access -the object is not allowed. -Furthermore, if the heap pointer is increased again to above -the object address, then access to the old object gives undefined results. -.PP -The heap is a single fragment. -All bytes have consecutive addresses. -No limits are imposed on the size of the heap as long as it fits -in the available data address space. diff --git a/doc/em/em.i b/doc/em/em.i deleted file mode 100644 index 20b05fe8b..000000000 --- a/doc/em/em.i +++ /dev/null @@ -1,1678 +0,0 @@ -.bp -.AP "EM INTERPRETER" -.nf -.ft CW -.lg 0 -.nr x \w' ' -.ta \nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu - -{ This is an interpreter for EM. It serves as the official machine - definition. This interpreter must run on a machine which supports - arithmetic with words and memory offsets. - - Certain aspects of the definition are over specified. In particular: - - 1. The representation of an address on the stack need not be the - numerical value of the memory location. - - 2. The state of the stack is not defined after a trap has aborted - an instruction in the middle. For example, it is officially un- - defined whether the second operand of an ADD instruction has - been popped or not if the first one is undefined ( -32768 or - unsigned 32768). - - 3. The memory layout is implementation dependent. Only the most - basic checks are performed whenever memory is accessed. - - 4. The representation of an integer or set on the stack is not fixed - in bit order. - - 5. The format and existence of the procedure descriptors depends on - the implementation. - - 6. The result of the compare operators CMI etc. are -1, 0 and 1 - here, but other negative and positive values will do and they - need not be the same each time. - - 7. The shift count for SHL, SHR, ROL and ROR must be in the range 0 - to object size in bits - 1. The effect of a count not in this - range is undefined. -} -.bp -{$i256} {$d+} -program em(tables,prog,input,output); - -label 8888,9999; - -const - t15 = 32768; { 2**15 } - t15m1 = 32767; { 2**15 -1 } - t16 = 65536; { 2**16 } - t16m1 = 65535; { 2**16 -1 } - t31m1 = 2147483647; { 2**31 -1 } - - wsize = 2; { number of bytes in a word } - asize = 2; { number of bytes in an address } - fsize = 4; { number of bytes in a floating point number } - maxret =4; { number of words in the return value area } - - signbit = t15; { the power of two indicating the sign bit } - negoff = t16; { the next power of two } - maxsint = t15m1; { the maximum signed integer } - maxuint = t16m1; { the maximum unsigned integer } - maxdbl = t31m1; { the maximum double signed integer } - maxadr = t16m1; { the maximum address } - maxoffs = t15m1; { the maximum offset from an address } - maxbitnr= 15; { the number of the highest bit } - - lineadr = 0; { address of the line number } - fileadr = 4; { address of the file name } - maxcode = 8191; { highest byte in code address space } - maxdata = 8191; { highest byte in data address space } - - { format of status save area } - statd = 4; { how far is static link from lb } - dynd = 2; { how far is dynamic link from lb } - reta = 0; { how far is the return address from lb } - savsize = 4; { size of save area in bytes } - - { procedure descriptor format } - pdlocs = 0; { offset for size of local variables in bytes } - pdbase = asize; { offset for the procedure base } - pdsize = 4; { size of procedure descriptor in bytes = 2*asize } - - { header words } - NTEXT = 1; - NDATA = 2; - NPROC = 3; - ENTRY = 4; - NLINE = 5; - SZDATA = 6; - - escape1 = 254; { escape to secondary opcodes } - escape2 = 255; { escape to tertiary opcodes } - undef = signbit; { the range of integers is -32767 to +32767 } - - { error codes } - EARRAY = 0; ERANGE = 1; ESET = 2; EIOVFL = 3; EFOVFL = 4; - EFUNFL = 5; EIDIVZ = 6; EFDIVZ = 7; EIUND = 8; EFUND = 9; - ECONV = 10; ESTACK = 16; EHEAP = 17; EILLINS = 18; EODDZ = 19; - ECASE = 20; EMEMFLT = 21; EBADPTR = 22; EBADPC = 23; EBADLAE = 24; - EBADMON = 25; EBADLIN = 26; EBADGTO = 27; -.ne 20 -.bp -{---------------------------------------------------------------------------} -{ Declarations } -{---------------------------------------------------------------------------} - -type - bitval= 0..1; { one bit } - bitnr= 0..maxbitnr; { bits in machine words are numbered 0 to 15 } - byte= 0..255; { memory is an array of bytes } - adr= {0..maxadr} long; { the range of addresses } - word= {0..maxuint} long;{ the range of unsigned integers } - offs= -maxoffs..maxoffs; { the range of signed offsets from addresses } - size= 0..maxoffs; { the range of sizes is the positive offsets } - sword= {-signbit..maxsint} long; { the range of signed integers } - full= {-maxuint..maxuint} long; { intermediate results need this range } - double={-maxdbl..maxdbl} long; { double precision range } - bftype= (andf,iorf,xorf); { tells which boolean operator needed } - insclass=(prim,second,tert); { tells which opcode table is in use } - instype=(implic,explic); { does opcode have implicit or explicit operand } - iflags= (mini,short,sbit,wbit,zbit,ibit); - ifset= set of iflags; - - mnem = ( NON, - AAR, ADF, ADI, ADP, ADS, ADU,XAND, ASP, ASS, BEQ, - BGE, BGT, BLE, BLM, BLS, BLT, BNE, BRA, CAI, CAL, - CFF, CFI, CFU, CIF, CII, CIU, CMF, CMI, CMP, CMS, - CMU, COM, CSA, CSB, CUF, CUI, CUU, DCH, DEC, DEE, - DEL, DUP, DUS, DVF, DVI, DVU, EXG, FEF, FIF, FIL, - GTO, INC, INE, INL, INN, IOR, LAE, LAL, LAR, LDC, - LDE, LDF, LDL, LFR, LIL, LIM, LIN, LNI, LOC, LOE, - LOF, LOI, LOL, LOR, LOS, LPB, LPI, LXA, LXL, MLF, - MLI, MLU, MON, NGF, NGI, NOP, RCK, RET, RMI, RMU, - ROL, ROR, RTT, SAR, SBF, SBI, SBS, SBU, SDE, SDF, - SDL,XSET, SIG, SIL, SIM, SLI, SLU, SRI, SRU, STE, - STF, STI, STL, STR, STS, TEQ, TGE, TGT, TLE, TLT, - TNE, TRP, XOR, ZEQ, ZER, ZGE, ZGT, ZLE, ZLT, ZNE, - ZRE, ZRF, ZRL); - - dispatch = record - iflag: ifset; - instr: mnem; - case instype of - implic: (implicit:sword); - explic: (ilength:byte); - end; - - -var - code: packed array[0..maxcode] of byte; { code space } - data: packed array[0..maxdata] of byte; { data space } - retarea: array[1..maxret ] of word; { return area } - pc,lb,sp,hp,pd: adr; { internal machine registers } - i: integer; { integer scratch variable } - s,t :word; { scratch variables } - sz:size; { scratch variables } - ss,st: sword; { scratch variables } - k :double; { scratch variables } - j:size; { scratch variable used as index } - a,b:adr; { scratch variable used for addresses } - dt,ds:double; { scratch variables for double precision } - rt,rs,x,y:real; { scratch variables for real } - found:boolean; { scratch } - opcode: byte; { holds the opcode during execution } - iclass: insclass; { true for escaped opcodes } - dispat: array[insclass,byte] of dispatch; - retsize:size; { holds size of last LFR } - insr: mnem; { holds the instruction number } - halted: boolean; { normally false } - exitstatus:word; { parameter of MON 1 } - ignmask:word; { ignore mask for traps } - uerrorproc:adr; { number of user defined error procedure } - intrap:boolean; { Set when executing trap(), to catch recursive calls} - trapval:byte; { Set to number of last trap } - header: array[1..8] of adr; - - tables: text; { description of EM instructions } - prog: file of byte; { program and initialized data } -.ne 20 -.sp 2 -{---------------------------------------------------------------------------} -{ Various check routines } -{---------------------------------------------------------------------------} - -{ Only the most basic checks are performed. These routines are inherently - implementation dependent. } - -procedure trap(n:byte); forward; - -procedure memadr(a:adr); -begin if (a>maxdata) or ((a=hp)) then trap(EMEMFLT) end; - -procedure wordadr(a:adr); -begin memadr(a); if (a mod wsize<>0) then trap(EBADPTR) end; - -procedure chkadr(a:adr; s:size); -begin memadr(a); memadr(a+s-1); { assumption: size is ok } - if s0 then trap(EBADPTR) end - else if a mod wsize<>0 then trap(EBADPTR) -end; - -procedure newpc(a:double); -begin if (a<0) or (a>maxcode) then trap(EBADPC); pc:=a end; - -procedure newsp(a:adr); -begin if (a>lb) or (a0) then trap(ESTACK); sp:=a end; - -procedure newlb(a:adr); -begin if (a0) then trap(ESTACK); lb:=a end; - -procedure newhp(a:adr); -begin if (a>sp) or (a>maxdata+1) or (a mod wsize<>0) - then trap(EHEAP) - else hp:=a -end; - -function argc(a:double):sword; -begin if (a<-signbit) or (a>maxsint) then trap(EILLINS); argc:=a end; - -function argd(a:double):double; -begin if (a<-maxdbl) or (a>maxdbl) then trap(EILLINS); argd:=a end; - -function argl(a:double):offs; -begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argl:=a end; - -function argg(k:double):adr; -begin if (k<0) or (k>maxadr) then trap(EILLINS); argg:=k end; - -function argf(a:double):offs; -begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argf:=a end; - -function argn(a:double):word; -begin if (a<0) or (a>maxuint) then trap(EILLINS); argn:=a end; - -function args(a:double):size; -begin if (a<=0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - args:=a ; -end; - -function argz(a:double):size; -begin if (a<0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - argz:=a ; -end; - -function argo(a:double):size; -begin if (a<=0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize<>0) and (wsize mod a<>0) then trap(EODDZ); - argo:=a ; -end; - -function argw(a:double):size; -begin if (a<=0) or (a>maxoffs) or (a>maxuint) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - argw:=a ; -end; - -function argp(a:double):size; -begin if (a<0) or (a>=header[NPROC]) then trap(EILLINS); argp:=a end; - -function argr(a:double):word; -begin if (a<0) or (a>2) then trap(EILLINS); argr:=a end; - -procedure argwf(s:double); -begin if argw(s)<>fsize then trap(EILLINS) end; - -function szindex(s:double):integer; -begin s:=argw(s); if (s mod wsize <> 0) or (s>2*wsize) then trap(EILLINS); - szindex:=s div wsize -end; - -function locadr(l:double):adr; -begin l:=argl(l); if l<0 then locadr:=lb+l else locadr:=lb+l+savsize end; - -function signwd(w:word):sword; -begin if w = undef then trap(EIUND); - if w >= signbit then signwd:=w-negoff else signwd:=w -end; - -function dosign(w:word):sword; -begin if w >= signbit then dosign:=w-negoff else dosign:=w end; - -function unsign(w:sword):word; -begin if w<0 then unsign:=w+negoff else unsign:=w end; - -function chopw(dw:double):word; -begin chopw:=dw mod negoff end; - -function fitsw(w:full;trapno:byte):word; -{ checks whether value fits in signed word, returns unsigned representation} -begin - if (w>maxsint) or (w<-signbit) then - begin trap(trapno); - if w<0 then fitsw:=negoff- (-w)mod negoff - else fitsw:=w mod negoff; - end - else fitsw:=unsign(w) -end; - -function fitd(w:full):double; -begin - if abs(w) > maxdbl then trap(ECONV); - fitd:=w -end; -.ne 20 -.sp 2 -{---------------------------------------------------------------------------} -{ Memory access routines } -{---------------------------------------------------------------------------} - -{ memw returns a machine word as an unsigned integer - memb returns a single byte as a positive integer: 0 <= memb <= 255 - mems(a,s) fetches an object smaller than a word and returns a word - store(a,v) stores the word v at machine address a - storea(a,v) stores the address v at machine address a - storeb(a,b) stores the byte b at machine address a - stores(a,s,v) stores the s least significant bytes of a word at address a - memi returns an offset from the instruction space - Note that the procedure descriptors are part of instruction space. - nextpc returns the next byte addressed by pc, incrementing pc - - lino changes the line number word. - filna changes the pointer to the file name. - - All routines check to make sure the address is within range and valid for - the size of the object. If an addressing error is found, a trap occurs. -} - - -function memw(a:adr):word; -var b:word; i:integer; -begin wordadr(a); b:=0; - for i:=wsize-1 downto 0 do b:=256*b + data[a+i] ; - memw:=b -end; - -function memd(a:adr):double; { Always signed } -var b:double; i:integer; -begin wordadr(a); b:=data[a+2*wsize-1]; - if b>=128 then b:=b-256; - for i:=2*wsize-2 downto 0 do b:=256*b + data[a+i] ; - memd:=b -end; - -function mema(a:adr):adr; -var b:adr; i:integer; -begin wordadr(a); b:=0; - for i:=asize-1 downto 0 do b:=256*b + data[a+i] ; - mema:=b -end; - -function mems(a:adr;s:size):word; -var i:integer; b:word; -begin chkadr(a,s); b:=0; for i:=1 to s do b:=b*256+data[a+s-i]; mems:=b end; - -function memb(a:adr):byte; -begin memadr(a); memb:=data[a] end; - -procedure store(a:adr; x:word); -var i:integer; -begin wordadr(a); - for i:=0 to wsize-1 do - begin data[a+i]:=x mod 256; x:=x div 256 end -end; - -procedure storea(a:adr; x:adr); -var i:integer; -begin wordadr(a); - for i:=0 to asize-1 do - begin data[a+i]:=x mod 256; x:=x div 256 end -end; - -procedure stores(a:adr;s:size;v:word); -var i:integer; -begin chkadr(a,s); - for i:=0 to s-1 do begin data[a+i]:=v mod 256; v:=v div 256 end; -end; - -procedure storeb(a:adr; b:byte); -begin memadr(a); data[a]:=b end; - -function memi(a:adr):adr; -var b:adr; i:integer; -begin if (a mod wsize<>0) or (a+asize-1>maxcode) then trap(EBADPTR); b:=0; - for i:=asize-1 downto 0 do b:=256*b + code[a+i] ; - memi:=b -end; - -function nextpc:byte; -begin if pc>=pd then trap(EBADPC); nextpc:=code[pc]; newpc(pc+1) end; - -procedure lino(w:word); -begin store(lineadr,w) end; - -procedure filna(a:adr); -begin storea(fileadr,a) end; -.ne 20 -.sp 2 -{---------------------------------------------------------------------------} -{ Stack Manipulation Routines } -{---------------------------------------------------------------------------} - -{ push puts a word on the stack - pushsw takes a signed one word integer and pushes it on the stack - pop removes a machine word from the stack and delivers it as a word - popsw removes a machine word from the stack and delivers a signed integer - pusha pushes an address on the stack - popa removes a machine word from the stack and delivers it as an address - pushd pushes a double precision number on the stack - popd removes two machine words and returns a double precision integer - pushr pushes a float (floating point) number on the stack - popr removes several machine words and returns a float number - pushx puts an object of arbitrary size on the stack - popx removes an object of arbitrary size - } - -procedure push(x:word); -begin newsp(sp-wsize); store(sp,x) end; - -procedure pushsw(x:sword); -begin newsp(sp-wsize); store(sp,unsign(x)) end; - -function pop:word; -begin pop:=memw(sp); newsp(sp+wsize) end; - -function popsw:sword; -begin popsw:=signwd(pop) end; - -procedure pusha(x:adr); -begin newsp(sp-asize); storea(sp,x) end; - -function popa:adr; -begin popa:=mema(sp); newsp(sp+asize) end; - -procedure pushd(y:double); -begin { push double integer onto the stack } newsp(sp-2*wsize) end; - -function popd:double; -begin { pop double integer from the stack } newsp(sp+2*wsize); popd:=0 end; - -procedure pushr(z:real); -begin { Push a float onto the stack } newsp(sp-fsize) end; - -function popr:real; -begin { pop float from the stack } newsp(sp+fsize); popr:=0.0 end; - -procedure pushx(objsize:size; a:adr); -var i:integer; -begin - if objsize= 0 then w := w div 2 else w := (w-1) div 2 end; - -procedure suright(var w:word); { 1 bit right shift without sign extension } -begin w := w div 2 end; - -procedure sdright(var d:double); { 1 bit right shift } -begin { shift two word signed integer } end; - -procedure rleft(var w:word); { 1 bit left rotate } -begin if w >= t15 - then w:=(w-t15)*2 + 1 - else w:=w*2 -end; - -procedure rright(var w:word); { 1 bit right rotate } -begin if w mod 2 = 1 - then w:=w div 2 + t15 - else w:=w div 2 -end; - -function sextend(w:word;s:size):word; -var i:size; -begin - for i:=1 to (wsize-s)*8 do rleft(w); - for i:=1 to (wsize-s)*8 do sright(w); - sextend:=w; -end; - -function bit(b:bitnr; w:word):bitval; { return bit b of the word w } -var i:bitnr; -begin for i:= 1 to b do rright(w); bit:= w mod 2 end; - -function bf(ty:bftype; w1,w2:word):word; { return boolean fcn of 2 words } -var i:bitnr; j:word; -begin j:=0; - for i:= maxbitnr downto 0 do - begin j := 2*j; - case ty of - andf: if bit(i,w1)+bit(i,w2) = 2 then j:=j+1; - iorf: if bit(i,w1)+bit(i,w2) > 0 then j:=j+1; - xorf: if bit(i,w1)+bit(i,w2) = 1 then j:=j+1 - end - end; - bf:=j -end; - -{---------------------------------------------------------------------------} -{ Array indexing } -{---------------------------------------------------------------------------} - -function arraycalc(c:adr):adr; { subscript calculation } -var j:full; objsize:size; a:adr; -begin j:= popsw - signwd(memw(c)); - if (j<0) or (j>memw(c+wsize)) then trap(EARRAY); - objsize := argo(memw(c+wsize+wsize)); - a := j*objsize+popa; chkadr(a,objsize); - arraycalc:=a -end; -.ne 20 -.sp 2 -{---------------------------------------------------------------------------} -{ Double and Real Arithmetic } -{---------------------------------------------------------------------------} - -{ All routines for doubles and floats are dummy routines, since the format of - doubles and floats is not defined in EM. -} - -function doadi(ds,dt:double):double; -begin { add two doubles } doadi:=0 end; - -function dosbi(ds,dt:double):double; -begin { subtract two doubles } dosbi:=0 end; - -function domli(ds,dt:double):double; -begin { multiply two doubles } domli:=0 end; - -function dodvi(ds,dt:double):double; -begin { divide two doubles } dodvi:=0 end; - -function dormi(ds,dt:double):double; -begin { modulo of two doubles } dormi:=0 end; - -function dongi(ds:double):double; -begin { negative of a double } dongi:=0 end; - -function doadf(x,y:real):real; -begin { add two floats } doadf:=0.0 end; - -function dosbf(x,y:real):real; -begin { subtract two floats } dosbf:=0.0 end; - -function domlf(x,y:real):real; -begin { multiply two floats } domlf:=0.0 end; - -function dodvf(x,y:real):real; -begin { divide two floats } dodvf:=0.0 end; - -function dongf(x:real):real; -begin { negate a float } dongf:=0.0 end; - -procedure dofif(x,y:real;var intpart,fraction:real); -begin { dismember x*y into integer and fractional parts } - intpart:=0.0; { integer part of x*y, same sign as x*y } - fraction:=0.0; - { fractional part of x*y, 0<=abs(fraction)<1 and same sign as x*y } -end; - -procedure dofef(x:real;var mantissa:real;var exponent:sword); -begin { dismember x into mantissa and exponent parts } - mantissa:=0.0; { mantissa of x , >= 1/2 and <1 } - exponent:=0; { base 2 exponent of x } -end; -.bp -{---------------------------------------------------------------------------} -{ Trap and Call } -{---------------------------------------------------------------------------} - -procedure call(p:adr); { Perform the call } -begin - pusha(lb);pusha(pc); - newlb(sp);newsp(sp - memi(pd + pdsize*p + pdlocs)); - newpc(memi(pd + pdsize*p+ pdbase)) -end; - -procedure dotrap(n:byte); -var i:size; -begin - if (uerrorproc=0) or intrap then - begin - if intrap then - writeln('Recursive trap, first trap number was ', trapval:1); - writeln('Error ', n:1); - writeln('With',ord(insr):4,' arg ',k:1); - goto 9999 - end; - { Deposit all interpreter variables that need to be saved on - the stack. This includes all scratch variables that can - be in use at the moment and ( not possible in this interpreter ) - the internal address of the interpreter where the error occurred. - This would make it possible to execute an RTT instruction totally - transparent to the user program. - It can, for example, occur within an ADD instruction that both - operands are undefined and that the result overflows. - Although this will generate 3 error traps it must be possible - to ignore them all. -} - intrap:=true; trapval:=n; - for i:=retsize div wsize downto 1 do push(retarea[i]); - push(retsize); { saved return area } - pusha(mema(fileadr)); { saved current file name pointer } - push(memw(lineadr)); { saved line number } - push(n); { push error number } - a:=argp(uerrorproc); - uerrorproc:=0; { reset signal } - call(a); { call the routine } - intrap:=false; { Don't catch recursive traps anymore } - goto 8888; { reenter main loop } -end; - -procedure trap; -{ This routine is invoked for overflow, and other run time errors. - For non-fatal errors, trap returns to the calling routine -} -begin - if n>=16 then dotrap(n) else if bit(n,ignmask)=0 then dotrap(n); -end; - -procedure dortt; -{ The restoration of file address and line number is not essential. - The restoration of the return save area is. -} -var i:size; - n:word; -begin - newsp(lb); lb:=maxdata+1 ; { to circumvent ESTACK for the popa + pop } - newpc(popa); newlb(popa); { So far a plain RET 0 } - n:=pop; if (n>=16) and (n<64) then goto 9999 ; - lino(pop); filna(popa); retsize:=pop; - for i:=1 to retsize div wsize do retarea[i]:=pop ; -end; -.sp 2 -{---------------------------------------------------------------------------} -{ monitor calls } -{---------------------------------------------------------------------------} - - -procedure domon(entry:word); -var index: 1..63; - dummy: double; - count,rwptr: adr; - token: byte; - i: integer; -begin - if (entry<=0) or (entry>63) then entry:=63 ; - index:=entry; - case index of - 1: begin { exit } exitstatus:=pop; halted:=true end; - 3: begin { read } dummy:=pop; { All input is from stdin } - rwptr:=popa; count:=popa; - i:=0 ; - while (not eof(input)) and (i0 then - begin i:=20; found:=false; - while (i<>0) and not found do - begin c:=memb(a); a:=a+1; found:=true; i:=i-1; - if (c>=48) and (c<=57) then - begin found:=false; write(chr(ord('0')+c-48)) end; - if (c>=65) and (c<=90) then - begin found:=false; write(chr(ord('A')+c-65)) end; - if (c>=97) and (c<=122) then - begin found:=false; write(chr(ord('a')+c-97)) end; - end; - end; - writeln; -end; - -procedure initialize; { start the ball rolling } -{ This is not part of the machine definition } -var cset:set of char; - f:ifset; - iclass:insclass; - insno:byte; - nops:integer; - opcode:byte; - i,j,n:integer; - wtemp:sword; - count:integer; - repc:adr; - nexta,firsta:adr; - elem:byte; - amount,ofst:size; - c:char; - - function readb(n:integer):double; - var b:byte; - begin read(prog,b); if n>1 then readb:=readb(n-1)*256+b else readb:=b end; - - function readbyte:byte; - begin readbyte:=readb(1) end; - - function readword:word; - begin readword:=readb(wsize) end; - - function readadr:adr; - begin readadr:=readb(asize) end; - - function ifind(ordinal:byte):mnem; - var loopvar:mnem; - found:boolean; - begin ifind:=NON; - loopvar:=insr; found:=false; - repeat - if ordinal=ord(loopvar) then - begin found:=true; ifind:=loopvar end; - if loopvar<>ZRL then loopvar:=succ(loopvar) else loopvar:=NON; - until found or (loopvar=insr) ; - end; - - procedure readhdr; - type hdrw=0..32767 ; { 16 bit header words } - var hdr: hdrw; - i: integer; - begin - for i:=0 to 7 do - begin hdr:=readb(2); - case i of - 0: if hdr<>3757 then { 07255 } - begin writeln('Not an em load file'); halt end; - 2: if hdr<>0 then - begin writeln('Unsolved references'); halt end; - 3: if hdr<>3 then - begin writeln('Incorrect load file version'); halt end; - 4: if hdr<>wsize then - begin writeln('Incorrect word size'); halt end; - 5: if hdr<>asize then - begin writeln('Incorrect pointer size'); halt end; - 1,6,7:; - end - end - end; - - procedure noinit; - begin writeln('Illegal initialization'); halt end; - - procedure readint(a:adr;s:size); - var i:size; - begin { construct integer out of byte sequence } - for i:=1 to s do { construct the value and initialize at a } - begin storeb(a,readbyte); a:=a+1 end - end; - - procedure readuns(a:adr;s:size); - begin { construct unsigned out of byte sequence } - readint(a,s) { identical to readint } - end; - - procedure readfloat(a:adr;s:size); - var i:size; b:byte; - begin { construct float out of string} - if (s<>4) and (s<>8) then noinit; i:=0; - repeat { eat the bytes, construct the value and intialize at a } - b:=readbyte; i:=i+1; - until b=0 ; - end; - -begin - halted:=false; - exitstatus:=undef; - uerrorproc:=0; intrap:=false; - - { initialize tables } - for i:=0 to maxcode do code[i]:=0; - for i:=0 to maxdata do data[i]:=0; - for iclass:=prim to tert do - for i:=0 to 255 do - with dispat[iclass][i] do - begin instr:=NON; iflag:=[zbit] end; - - { read instruction table file. see appendix B } - { The table read here is a simple transformation of the table on page xx } - { - instruction names were transformed to numbers } - { - the '-' flag was transformed to an 'i' flag for 'w' type instructions } - { - the 'S' flag was added for instructions having signed operands } - reset(tables); - insr:=NON; - repeat - read(tables,insno) ; cset:=[]; f:=[]; - insr:=ifind(insno); - if insr=NON then begin writeln('Incorrect table'); halt end; - repeat read(tables,c) until c<>' ' ; - repeat - cset:=cset+[c]; - read(tables,c) - until c=' ' ; - if 'm' in cset then f:=f+[mini]; - if 's' in cset then f:=f+[short]; - if '-' in cset then f:=f+[zbit]; - if 'i' in cset then f:=f+[ibit]; - if 'S' in cset then f:=f+[sbit]; - if 'w' in cset then f:=f+[wbit]; - if (mini in f) or (short in f) then read(tables,nops) else nops:=1 ; - readln(tables,opcode); - if ('4' in cset) or ('8' in cset) then - begin iclass:=tert end - else if 'e' in cset then - begin iclass:=second end - else iclass:=prim; - for i:=0 to nops-1 do - begin - with dispat[iclass,opcode+i] do - begin - iflag:=f; instr:=insr; - if '2' in cset then ilength:=2 - else if 'u' in cset then ilength:=2 - else if '4' in cset then ilength:=4 - else if '8' in cset then ilength:=8 - else if (mini in f) or (short in f) then - begin - if 'N' in cset then wtemp:=-1-i else wtemp:=i ; - if 'o' in cset then wtemp:=wtemp+1 ; - if short in f then wtemp:=wtemp*256 ; - implicit:=wtemp - end - end - end - until eof(tables); - - { read in program text, data and procedure descriptors } - reset(prog); - readhdr; { verify first header } - for i:=1 to 8 do header[i]:=readadr; { read second header } - hp:=maxdata+1; sp:=maxdata+1; lino(0); - { read program text } - if header[NTEXT]+header[NPROC]*pdsize>maxcode then - begin writeln('Text size too large'); halt end; - if header[SZDATA]>maxdata then - begin writeln('Data size too large'); halt end; - for i:=0 to header[NTEXT]-1 do code[i]:=readbyte; - { read data blocks } - nexta:=0; - for i:=1 to header[NDATA] do - begin - n:=readbyte; - if n<>0 then - begin - elem:=readbyte; firsta:=nexta; - case n of - 1: { uninitialized words } - for j:=1 to elem do - begin store(nexta,undef); nexta:=nexta+wsize end; - 2: { initialized bytes } - for j:=1 to elem do - begin storeb(nexta,readbyte); nexta:=nexta+1 end; - 3: { initialized words } - for j:=1 to elem do - begin store(nexta,readword); nexta:=nexta+wsize end; - 4,5: { instruction and data pointers } - for j:=1 to elem do - begin storea(nexta,readadr); nexta:=nexta+asize end; - 6: { signed integers } - begin readint(nexta,elem); nexta:=nexta+elem end; - 7: { unsigned integers } - begin readuns(nexta,elem); nexta:=nexta+elem end; - 8: { floating point numbers } - begin readfloat(nexta,elem); nexta:=nexta+elem end; - end - end - else - begin - repc:=readadr; amount:=nexta-firsta; - for count:=1 to repc do - begin - for ofst:=0 to amount-1 do data[nexta+ofst]:=data[firsta+ofst]; - nexta:=nexta+amount; - end - end - end; - if header[SZDATA]<>nexta then writeln('Data initialization error'); - hp:=nexta; - { read descriptor table } - pd:=header[NTEXT]; - for i:=1 to header[NPROC]*pdsize do code[pd+i-1]:=readbyte; - { call the entry point routine } - ignmask:=0; { catch all traps, higher numbered traps cannot be ignored} - retsize:=0; - lb:=maxdata; { illegal dynamic link } - pc:=maxcode; { illegal return address } - push(0); a:=sp; { No environment } - push(0); b:=sp; { No args } - pusha(a); { envp } - pusha(b); { argv } - push(0); { argc } - call(argp(header[ENTRY])); -end; -.bp -{---------------------------------------------------------------------------} -{ MAIN LOOP OF THE INTERPRETER } -{---------------------------------------------------------------------------} -{ It should be noted that the interpreter (microprogram) for an EM - machine can be written in two fundamentally different ways: (1) the - instruction operands are fetched in the main loop, or (2) the in- - struction operands are fetched after the 256 way branch, by the exe- - cution routines themselves. In this interpreter, method (1) is used - to simplify the description of execution routines. The dispatch - table dispat is used to determine how the operand is encoded. There - are 4 possibilities: - - 0. There is no operand - 1. The operand and instruction are together in 1 byte (mini) - 2. The operand is one byte long and follows the opcode byte(s) - 3. The operand is two bytes long and follows the opcode byte(s) - 4. The operand is four bytes long and follows the opcode byte(s) - - In this interpreter, the main loop determines the operand type, - fetches it, and leaves it in the global variable k for the execution - routines to use. Consequently, instructions such as LOL, which use - three different formats, need only be described once in the body of - the interpreter. - However, for a production interpreter, or a hardware EM - machine, it is probably better to use method (2), i.e. to let the - execution routines themselves fetch their own operands. The reason - for this is that each opcode uniquely determines the operand format, - so no table lookup in the dispatch table is needed. The whole table - is not needed. Method (2) therefore executes much faster. - However, separate execution routines will be needed for LOL with - a one byte offset, and LOL with a two byte offset. It is to avoid - this additional clutter that method (1) is used here. In a produc- - tion interpreter, it is envisioned that the main loop will fetch the - next instruction byte, and use it as an index into a 256 word table - to find the address of the interpreter routine to jump to. The - routine jumped to will begin by fetching its operand, if any, - without any table lookup, since it knows which format to expect. - After doing the work, it returns to the main loop by jumping in- - directly to a register that contains the address of the main loop. - A slight variation on this idea is to have the register contain - the address of the branch table, rather than the address of the main - loop. - Another issue is whether the execution routines for LOL 0, LOL - 2, LOL 4, etc. should all be have distinct execution routines. Doing - so provides for the maximum speed, since the operand is implicit in - the routine itself. The disadvantage is that many nearly identical - execution routines will then be needed. Another way of doing it is - to keep the instruction byte fetched from memory (LOL 0, LOL 2, LOL - 4, etc.) in some register, and have all the LOL mini format instruc- - tions branch to a common routine. This routine can then determine - the operand by subtracting the code for LOL 0 from the register, - leaving the true operand in the register (as a word quantity of - course). This method makes the interpreter smaller, but is a bit - slower. -.bp - To make this important point a little clearer, consider how a - production interpreter for the PDP-11 might appear. Let us assume the - following opcodes have been assigned: - - 31: LOL -2 (2 bytes, i.e. next word) - 32: LOL -4 - 33: LOL -6 - 34: LOL b (format with a one byte offset) - 35: LOL w (format with a one word, i.e. two byte offset) - - Further assume that each of the 5 opcodes will have its own execution - routine, i.e. we are making a tradeoff in favor of fast execution and - a slightly larger interpreter. - Register r5 is the em program counter. - Register r4 is the em LB register - Register r3 is the em SP register (the stack grows toward low core) - Register r2 contains the interpreter address of the main loop - - The main loop looks like this: - - movb (r5)+,r0 /fetch the opcode into r0 and increment r5 - asl r0 /shift r0 left 1 bit. Now: -256<=r0<=+254 - jmp *table(r0) /jump to execution routine - - Notice that no operand fetching has been done. The execution routines for - the 5 sample instructions given above might be as follows: - - lol2: mov -2(r4),-(sp) /push local -2 onto stack - jmp (r2) /go back to main loop - lol4: mov -4(r4),-(sp) /push local -4 onto stack - jmp (r2) /go back to main loop - lol6: mov -6(r4),-(sp) /push local -6 onto stack - jmp (r2) /go back to main loop - lolb: mov $177400,r0 /prepare to fetch the 1 byte operand - bisb (r5)+,r0 /operand is now in r0 - asl r0 /r0 is now offset from LB in bytes, not words - add r4,r0 /r0 is now address of the needed local - mov (r0),-(sp) /push the local onto the stack - jmp (r2) - lolw: clr r0 /prepare to fetch the 2 byte operand - bisb (r5)+,r0 /fetch high order byte first !!! - swab r0 /insert high order byte in place - bisb (r5)+,r0 /insert low order byte in place - asl r0 /convert offset to bytes, from words - add r4,r0 /r0 is now address of needed local - mov (r0),-(sp) /stack the local - jmp (r2) /done - - The important thing to notice is where and how the operand fetch occurred: - lol2, lol4, and lol6, (the mini's) have implicit operands - lolb knew it had to fetch one byte, and did so without any table lookup - lolw knew it had to fetch a word, and did so, high order byte first } -.bp -.sp 4 -{---------------------------------------------------------------------------} -{ Routines for the individual instructions } -{---------------------------------------------------------------------------} -procedure loadops; -var j:integer; -begin - case insr of - { LOAD GROUP } - LDC: pushd(argd(k)); - LOC: pushsw(argc(k)); - LOL: push(memw(locadr(k))); - LOE: push(memw(argg(k))); - LIL: push(memw(mema(locadr(k)))); - LOF: push(memw(popa+argf(k))); - LAL: pusha(locadr(k)); - LAE: pusha(argg(k)); - LXL: begin a:=lb; for j:=1 to argn(k) do a:=mema(a+savsize); pusha(a) end; - LXA: begin a:=lb; - for j:=1 to argn(k) do a:= mema(a+savsize); - pusha(a+savsize) - end; - LOI: pushx(argo(k),popa); - LOS: begin k:=argw(k); if k<>wsize then trap(EILLINS); - k:=pop; pushx(argo(k),popa) - end; - LDL: begin a:=locadr(k); push(memw(a+wsize)); push(memw(a)) end; - LDE: begin k:=argg(k); push(memw(k+wsize)); push(memw(k)) end; - LDF: begin k:=argf(k); - a:=popa; push(memw(a+k+wsize)); push(memw(a+k)) - end; - LPI: push(argp(k)) - end -end; - -procedure storeops; -begin - case insr of - { STORE GROUP } - STL: store(locadr(k),pop); - STE: store(argg(k),pop); - SIL: store(mema(locadr(k)),pop); - STF: begin a:=popa; store(a+argf(k),pop) end; - STI: popx(argo(k),popa); - STS: begin k:=argw(k); if k<>wsize then trap(EILLINS); - k:=popa; popx(argo(k),popa) - end; - SDL: begin a:=locadr(k); store(a,pop); store(a+wsize,pop) end; - SDE: begin k:=argg(k); store(k,pop); store(k+wsize,pop) end; - SDF: begin k:=argf(k); a:=popa; store(a+k,pop); store(a+k+wsize,pop) end - end -end; - -procedure intarith; -var i:integer; -begin - case insr of - { SIGNED INTEGER ARITHMETIC } - ADI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:=popsw; push(fitsw(ss+st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(doadi(ds,dt)) end; - end ; - SBI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:= popsw; push(fitsw(ss-st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(dosbi(ds,dt)) end; - end ; - MLI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:= popsw; push(fitsw(ss*st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(domli(ds,dt)) end; - end ; - DVI: case szindex(argw(k)) of - 1: begin st:= popsw; ss:= popsw; - if st=0 then trap(EIDIVZ) else pushsw(ss div st) - end; - 2: begin dt:=popd; ds:=popd; pushd(dodvi(ds,dt)) end; - end; - RMI: case szindex(argw(k)) of - 1: begin st:= popsw; ss:=popsw; - if st=0 then trap(EIDIVZ) else pushsw(ss - (ss div st)*st) - end; - 2: begin dt:=popd; ds:=popd; pushd(dormi(ds,dt)) end - end; - NGI: case szindex(argw(k)) of - 1: begin st:=popsw; pushsw(-st) end; - 2: begin ds:=popd; pushd(dongi(ds)) end - end; - SLI: begin t:=pop; - case szindex(argw(k)) of - 1: begin ss:=popsw; - for i:= 1 to t do sleft(ss); pushsw(ss) - end - end - end; - SRI: begin t:=pop; - case szindex(argw(k)) of - 1: begin ss:=popsw; - for i:= 1 to t do sright(ss); pushsw(ss) - end; - 2: begin ds:=popd; - for i:= 1 to t do sdright(ss); pushd(ss) - end - end - end - end -end; - -procedure unsarith; -var i:integer; -begin - case insr of - { UNSIGNED INTEGER ARITHMETIC } - ADU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s+t)) end; - 2: trap(EILLINS); - end ; - SBU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s-t)) end; - 2: trap(EILLINS); - end ; - MLU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s*t)) end; - 2: trap(EILLINS); - end ; - DVU: case szindex(argw(k)) of - 1: begin t:= pop; s:= pop; - if t=0 then trap(EIDIVZ) else push(s div t) - end; - 2: trap(EILLINS); - end; - RMU: case szindex(argw(k)) of - 1: begin t:= pop; s:=pop; - if t=0 then trap(EIDIVZ) else push(s - (s div t)*t) - end; - 2: trap(EILLINS); - end; - SLU: case szindex(argw(k)) of - 1: begin t:=pop; s:=pop; - for i:= 1 to t do suleft(s); push(s) - end; - 2: trap(EILLINS); - end; - SRU: case szindex(argw(k)) of - 1: begin t:=pop; s:=pop; - for i:= 1 to t do suright(s); push(s) - end; - 2: trap(EILLINS); - end - end -end; - -procedure fltarith; -begin - case insr of - { FLOATING POINT ARITHMETIC } - ADF: begin argwf(k); rt:=popr; rs:=popr; pushr(doadf(rs,rt)) end; - SBF: begin argwf(k); rt:=popr; rs:=popr; pushr(dosbf(rs,rt)) end; - MLF: begin argwf(k); rt:=popr; rs:=popr; pushr(domlf(rs,rt)) end; - DVF: begin argwf(k); rt:=popr; rs:=popr; pushr(dodvf(rs,rt)) end; - NGF: begin argwf(k); rt:=popr; pushr(dongf(rt)) end; - FIF: begin argwf(k); rt:=popr; rs:=popr; - dofif(rt,rs,x,y); pushr(y); pushr(x) - end; - FEF: begin argwf(k); rt:=popr; dofef(rt,x,ss); pushr(x); pushsw(ss) end - end -end; - -procedure ptrarith; -begin - case insr of - { POINTER ARITHMETIC } - ADP: pusha(popa+argf(k)); - ADS: case szindex(argw(k)) of - 1: begin st:=popsw; pusha(popa+st) end; - 2: begin dt:=popd; pusha(popa+dt) end; - end; - SBS: begin - a:=popa; b:=popa; - case szindex(argw(k)) of - 1: push(fitsw(b-a,EIOVFL)); - 2: pushd(b-a) - end - end - end -end; - -procedure incops; -var j:integer; -begin - case insr of - { INCREMENT/DECREMENT/ZERO } - INC: push(fitsw(popsw+1,EIOVFL)); - INL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; - INE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; - DEC: push(fitsw(popsw-1,EIOVFL)); - DEL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; - DEE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; - ZRL: store(locadr(k),0); - ZRE: store(argg(k),0); - ZER: for j:=1 to argw(k) div wsize do push(0); - ZRF: pushr(0); - end -end; - -procedure convops; -begin - case insr of - { CONVERT GROUP } - CII: begin s:=pop; t:=pop; - if tmaxsint then trap(ECONV); push(s) end; - 2: trap(EILLINS); - end; - 2: case szindex(argw(pop)) of - 1: pushd(pop); - 2: trap(EILLINS); - end; - end; - CUU: case szindex(argw(pop)) of - 1: if szindex(argw(pop))=2 then trap(EILLINS); - 2: trap(EILLINS); - end; - CUF: begin argwf(pop); - if szindex(argw(pop))=1 then pushr(pop) else trap(EILLINS) - end; - CFI: begin sz:=argw(pop); argwf(pop); rt:=popr; - case szindex(sz) of - 1: push(fitsw(trunc(rt),ECONV)); - 2: pushd(fitd(trunc(rt))); - end - end; - CFU: begin sz:=argw(pop); argwf(pop); rt:=popr; - case szindex(sz) of - 1: push( chopw(trunc(abs(rt)-0.5)) ); - 2: trap(EILLINS); - end - end; - CFF: begin argwf(pop); argwf(pop) end - end -end; - -procedure logops; -var i,j:integer; -begin - case insr of - { LOGICAL GROUP } - XAND: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(andf,memw(a),t)) end; - end; - IOR: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(iorf,memw(a),t)) end; - end; - XOR: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(xorf,memw(a),t)) end; - end; - COM: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin - store(sp+k-wsize*j, bf(xorf,memw(sp+k-wsize*j), negoff-1)) - end - end; - ROL: begin k:=argw(k); if k<>wsize then trap(EILLINS); - t:=pop; s:=pop; for i:= 1 to t do rleft(s); push(s) - end; - ROR: begin k:=argw(k); if k<>wsize then trap(EILLINS); - t:=pop; s:=pop; for i:= 1 to t do rright(s); push(s) - end - end -end; - -procedure setops; -var i,j:integer; -begin - case insr of - { SET GROUP } - INN: - begin k:=argw(k); - t:=pop; - i:= t mod 8; t:= t div 8; - if t>=k then - begin trap(ESET); s:=0 end - else - begin s:=memb(sp+t) end; - newsp(sp+k); push(bit(i,s)); - end; - XSET: - begin k:=argw(k); - t:=pop; - i:= t mod 8; t:= t div 8; - for j:= 1 to k div wsize do push(0); - if t>=k then - trap(ESET) - else - begin s:=1; for j:= 1 to i do rleft(s); storeb(sp+t,s) end - end - end -end; - -procedure arrops; -begin - case insr of - { ARRAY GROUP } - LAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - pushx(argo(memw(a+2*k)),arraycalc(a)) - end; - SAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - popx(argo(memw(a+2*k)),arraycalc(a)) - end; - AAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - push(arraycalc(a)) - end - end -end; - -procedure cmpops; -begin - case insr of - { COMPARE GROUP } - CMI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:=popsw; - if ss memw(sp+k+j) then t:=1; - j:=j+wsize - end; - newsp(sp+wsize*k); push(t); - end; - - TLT: if popsw < 0 then push(1) else push(0); - TLE: if popsw <= 0 then push(1) else push(0); - TEQ: if pop = 0 then push(1) else push(0); - TNE: if pop <> 0 then push(1) else push(0); - TGE: if popsw >= 0 then push(1) else push(0); - TGT: if popsw > 0 then push(1) else push(0); - end -end; - -procedure branchops; -begin - case insr of - { BRANCH GROUP } - BRA: newpc(pc+k); - - BLT: begin st:=popsw; if popsw < st then newpc(pc+k) end; - BLE: begin st:=popsw; if popsw <= st then newpc(pc+k) end; - BEQ: begin t :=pop ; if pop = t then newpc(pc+k) end; - BNE: begin t :=pop ; if pop <> t then newpc(pc+k) end; - BGE: begin st:=popsw; if popsw >= st then newpc(pc+k) end; - BGT: begin st:=popsw; if popsw > st then newpc(pc+k) end; - - ZLT: if popsw < 0 then newpc(pc+k); - ZLE: if popsw <= 0 then newpc(pc+k); - ZEQ: if pop = 0 then newpc(pc+k); - ZNE: if pop <> 0 then newpc(pc+k); - ZGE: if popsw >= 0 then newpc(pc+k); - ZGT: if popsw > 0 then newpc(pc+k) - end -end; - -procedure callops; -var j:integer; -begin - case insr of - { PROCEDURE CALL GROUP } - CAL: call(argp(k)); - CAI: begin call(argp(popa)) end; - RET: begin k:=argz(k); if k div wsize>maxret then trap(EILLINS); - for j:= 1 to k div wsize do retarea[j]:=pop; retsize:=k; - newsp(lb); lb:=maxdata+1; { To circumvent stack overflow error } - newpc(popa); - if pc=maxcode then - begin - halted:=true; - if retsize=wsize then exitstatus:=retarea[1] - else exitstatus:=undef - end - else - newlb(popa); - end; - LFR: begin k:=args(k); if k<>retsize then trap(EILLINS); - for j:=k div wsize downto 1 do push(retarea[j]); - end - end -end; - -procedure miscops; -var i,j:integer; -begin - case insr of - { MISCELLANEOUS GROUP } - ASP,ASS: - begin if insr=ASS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=popsw end; - k:=argf(k); - if k<0 - then for j:= 1 to -k div wsize do push(undef) - else newsp(sp+k); - end; - BLM,BLS: - begin if insr=BLS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; - k:=argz(k); - b:=popa; a:=popa; - for j := 1 to k div wsize do - store(b-wsize+wsize*j,memw(a-wsize+wsize*j)) - end; - CSA: begin k:=argw(k); if k<>wsize then trap(EILLINS); - a:=popa; - st:= popsw - signwd(memw(a+asize)); - if (st>=0) and (st<=memw(a+wsize+asize)) then - b:=mema(a+2*wsize+asize+asize*st) else b:=mema(a); - if b=0 then trap(ECASE) else newpc(b) - end; - CSB: begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - t:=pop; i:=1; found:=false; - while (i<=memw(a+asize)) and not found do - if t=memw(a+(asize+wsize)*i) then found:=true else i:=i+1; - if found then b:=memw(a+(asize+wsize)*i+wsize) else b:=memw(a); - if b=0 then trap(ECASE) else newpc(b); - end; - DCH: begin pusha(mema(popa+dynd)) end; - DUP,DUS: - begin if insr=DUS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; - k:=args(k); - for i:=1 to k div wsize do push(memw(sp+k-wsize)); - end; - EXG: begin - k:=argw(k); - for i:=1 to k div wsize do push(memw(sp+k-wsize)); - for i:=0 to k div wsize - 1 do - store(sp+k+i*wsize,memw(sp+k+k+i*wsize)); - for i:=1 to k div wsize do - begin t:=pop ; store(sp+k+k-wsize,t) end; - end; - FIL: filna(argg(k)); - GTO: begin k:=argg(k); - newlb(mema(k+2*asize)); newsp(mema(k+asize)); newpc(mema(k)) - end; - LIM: push(ignmask); - LIN: lino(argn(k)); - LNI: lino(memw(0)+1); - LOR: begin i:=argr(k); - case i of 0:pusha(lb); 1:pusha(sp); 2:pusha(hp) end; - end; - LPB: pusha(popa+statd); - MON: domon(pop); - NOP: writeln('NOP at line ',memw(0):5) ; - RCK: begin a:=popa; - case szindex(argw(k)) of - 1: if (signwd(memw(sp))signwd(memw(a+wsize))) then trap(ERANGE); - 2: if (memd(sp)memd(a+2*wsize)) then trap(ERANGE); - end - end; - RTT: dortt; - SIG: begin a:=popa; pusha(uerrorproc); uerrorproc:=a end; - SIM: ignmask:=pop; - STR: begin i:=argr(k); - case i of 0: newlb(popa); 1: newsp(popa); 2: newhp(popa) end; - end; - TRP: trap(pop) - end -end; -.bp -{---------------------------------------------------------------------------} -{ Main Loop } -{---------------------------------------------------------------------------} - -begin initialize; -8888: - repeat - opcode := nextpc; { fetch the first byte of the instruction } - if opcode=escape1 then iclass:=second - else if opcode=escape2 then iclass:=tert - else iclass:=prim; - if iclass<>prim then opcode := nextpc; - with dispat[iclass][opcode] do - begin insr:=instr; - if not (zbit in iflag) then - if ibit in iflag then k:=pop else - begin - if mini in iflag then k:=implicit else - begin - if short in iflag then k:=implicit+nextpc else - begin k:=nextpc; - if (sbit in iflag) and (k>=128) then k:=k-256; - for i:=2 to ilength do k:=256*k + nextpc - end - end; - if wbit in iflag then k:=k*wsize; - end - end; -case insr of - - NON: trap(EILLINS); - - { LOAD GROUP } - LDC,LOC,LOL,LOE,LIL,LOF,LAL,LAE,LXL,LXA,LOI,LOS,LDL,LDE,LDF,LPI: - loadops; - - { STORE GROUP } - STL,STE,SIL,STF,STI,STS,SDL,SDE,SDF: - storeops; - - { SIGNED INTEGER ARITHMETIC } - ADI,SBI,MLI,DVI,RMI,NGI,SLI,SRI: - intarith; - - { UNSIGNED INTEGER ARITHMETIC } - ADU,SBU,MLU,DVU,RMU,SLU,SRU: - unsarith; - - { FLOATING POINT ARITHMETIC } - ADF,SBF,MLF,DVF,NGF,FIF,FEF: - fltarith; - - { POINTER ARITHMETIC } - ADP,ADS,SBS: - ptrarith; - - { INCREMENT/DECREMENT/ZERO } - INC,INL,INE,DEC,DEL,DEE,ZRL,ZRE,ZER,ZRF: - incops; - - { CONVERT GROUP } - CII,CIU,CIF,CUI,CUU,CUF,CFI,CFU,CFF: - convops; - - { LOGICAL GROUP } - XAND,IOR,XOR,COM,ROL,ROR: - logops; - - { SET GROUP } - INN,XSET: - setops; - - { ARRAY GROUP } - LAR,SAR,AAR: - arrops; - - { COMPARE GROUP } - CMI,CMU,CMP,CMF,CMS, TLT,TLE,TEQ,TNE,TGE,TGT: - cmpops; - - { BRANCH GROUP } - BRA, BLT,BLE,BEQ,BNE,BGE,BGT, ZLT,ZLE,ZEQ,ZNE,ZGE,ZGT: - branchops; - - { PROCEDURE CALL GROUP } - CAL,CAI,RET,LFR: - callops; - - { MISCELLANEOUS GROUP } - ASP,ASS,BLM,BLS,CSA,CSB,DCH,DUP,DUS,EXG,FIL,GTO,LIM, - LIN,LNI,LOR,LPB,MON,NOP,RCK,RTT,SIG,SIM,STR,TRP: - miscops; - - end; { end of case statement } - if not ( (insr=RET) or (insr=ASP) or (insr=BRA) or (insr=GTO) ) then - retsize:=0 ; - until halted; -9999: - writeln('halt with exit status: ',exitstatus:1); - doident; -end. -.ft P -.lg 1 -.fi diff --git a/doc/em/env.nr b/doc/em/env.nr deleted file mode 100644 index 45ead04a6..000000000 --- a/doc/em/env.nr +++ /dev/null @@ -1,193 +0,0 @@ -.bp -.P1 "ENVIRONMENT INTERACTIONS" -.PP -EM programs can interact with their environment in three ways. -Two, starting/stopping and monitor calls, are dealt with in this chapter. -The remaining way to interact, interrupts, will be treated -together with traps in chapter 9. -.P2 "Program starting and stopping" -.PP -EM user programs start with a call to a procedure called -_m_a_i_n. -The assembler and backends look for the definition of a procedure -with this name in their input. -The call passes three parameters to the procedure. -The parameters are similar to the parameters supplied by the -.UX -operating system to C programs. -These parameters are often called \fBargc\fP, \fBargv\fP and \fBenvp\fP. -Argc is the parameter nearest to LB and is a wordsized integer. -The other two are pointers to the first element of an array of -string pointers. -The \fBargv\fP array contains \fBargc\fP -strings, the first of which contains the program call name. -The other strings in the \fBargv\fP -array are the program parameters. -.PP -The \fBenvp\fP -array contains strings in the form "name=string", where 'name' -is the name of an environment variable and string its value. -The \fBenvp\fP -is terminated by a zero pointer. -.PP -An EM user program stops if the program returns from the first -invocation of _m_a_i_n. -The contents of the function return area are used to procure a -wordsized program return code. -EM programs also stop when traps and interrupts occur that are -not caught and when the exit monitor call is executed. -.P2 "Input/Output and other monitor calls" -.PP -EM differs from most conventional machines in that it has high level i/o -instructions. -Typical instructions are OPEN FILE and READ FROM FILE instead -of low level instructions such as setting and clearing -bits in device registers. -By providing such high level i/o primitives, the task of implementing -EM on various non EM machines is made considerably easier. -.PP -I/O is initiated by the MON instruction, which expects an iocode on top -of the stack. -Often there are also parameters which are pushed on the -stack in reverse order, that is: last -parameter first. -Some i/o functions also provide results, which are returned on the stack. -In the list of monitor calls we use several types of parameters and results, -these types consist of integers and unsigneds of varying sizes, but never -smaller than the wordsize, and the two pointer types. -.LP -The names of the types used are: -.DS -.TS -tab(:); -l l. -int:an integer of wordsize -int2:an integer whose size is the maximum of the wordsize and 2 bytes -int4:an integer whose size is the maximum of the wordsize and 4 bytes -intp:an integer with the size of a pointer -uns2:an unsigned integer whose size is the maximum of the wordsize and 2 -unsp:an unsigned integer with the size of a pointer -ptr:a pointer into data space -.TE -.DE -.LP -The table below lists the i/o codes with their results and -parameters. -This list is similar to the system calls of the UNIX Version 7 -operating system. -.QQ -To execute a monitor call, proceed as follows: -.IP a) -Stack the parameters, in reverse order, last parameter first. -.IP b) -Push the monitor call number (iocode) onto the stack. -.IP c) -Execute the MON instruction. -.LP -An error code is present on the top of the stack after -execution of most monitor calls. -If this error code is zero, the call performed the action -requested and the results are available on top of the stack. -Non-zero error codes indicate a failure, in this case no -results are available and the error code has been pushed twice. -This construction enables programs to test for failure with a -single instruction (~TEQ or TNE~) and still find out the cause of -the failure. -The result name 'e' is reserved for the error code. -.ne 5 -.LP -List of monitor calls. -.LP -.nf -.na -.ta 4n 13n 29n 52n -nr name parameters results function - -1 Exit status:int Terminate this process -2 Fork e,flag,pid:int Spawn new process -3 Read fildes:int;buf:ptr;nbytes:unsp - e:int;rbytes:unsp Read from file -4 Write fildes:int;buf:ptr;nbytes:unsp - e:int;wbytes:unsp Write on a file -5 Open string:ptr;flag:int - e,fildes:int Open file for read and/or write -6 Close fildes:int e:int Close a file -7 Wait e:int;status,pid:int2 - Wait for child -8 Creat string:ptr;mode:int - e,fildes:int Create a new file -9 Link string1,string2:ptr - e:int Link to a file -10 Unlink string:ptr e:int Remove directory entry -12 Chdir string:ptr e:int Change default directory -14 Mknod string:ptr;mode,addr:int2 - e:int Make a special file -15 Chmod string:ptr;mode:int2 - e:int Change mode of file -16 Chown string:ptr;owner,group:int2 - e:int Change owner/group of a file -18 Stat string,statbuf:ptr - e:int Get file status -19 Lseek fildes:int;off:int4;whence:int - e:int;oldoff:int4 Move read/write pointer -20 Getpid pid:int2 Get process identification -21 Mount special,string:ptr;rwflag:int - e:int Mount file system -22 Umount special:ptr e:int Unmount file system -23 Setuid userid:int2 e:int Set user ID -24 Getuid e_uid,r_uid:int2 Get user ID -25 Stime time:int4 e:int Set time and date -26 Ptrace request:int;pid:int2;addr:ptr;data:int - e,value:int Process trace -27 Alarm seconds:uns2 previous:uns2 Schedule signal -28 Fstat fildes:int;statbuf:ptr - e:int Get file status -29 Pause Stop until signal -30 Utime string,timep:ptr - e:int Set file times -33 Access string:ptr;mode:int - e:int Determine file accessibility -34 Nice incr:int Set program priority -35 Ftime bufp:ptr e:int Get date and time -36 Sync Update filesystem -37 Kill pid:int2;sig:int - e:int Send signal to a process -41 Dup fildes,newfildes:int - e,fildes:int Duplicate a file descriptor -42 Pipe e,w_des,r_des:int Create a pipe -43 Times buffer:ptr Get process times -44 Profil buff:ptr;bufsiz,offset,scale:intp - Execution time profile -46 Setgid gid:int2 e:int Set group ID -47 Getgid e_gid,r_gid:int Get group ID -48 Sigtrp trapno,signo:int - e,prevtrap:int See below -51 Acct file:ptr e:int Turn accounting on or off -53 Lock flag:int e:int Lock a process -54 Ioctl fildes,request:int;argp:ptr - e:int Control device -56 Mpxcall cmd:int;vec:ptr e:int Multiplexed file handling -59 Exece name,argv,envp:ptr - e:int Execute a file -60 Umask mask:int2 oldmask:int2 Set file creation mode mask -61 Chroot string:ptr e:int Change root directory -.fi -.ad -.LP -Codes 0, 11, 13, 17, 31, 32, 38, 39, 40, 45, 49, 50, 52, -55, 57, 58, 62, and 63 are -not used. -.PP -All monitor calls, except fork and sigtrp -are the same as the UNIX version 7 system calls. -.PP -The sigtrp entry maps UNIX signals onto EM interrupts. -Normally, trapno is in the range 0 to 252. -In that case it requests that signal signo -will cause trap trapno to occur. -When given trap number \-2, default signal handling is reset, and when given -trap number \-3, the signal is ignored. -.PP -The flag returned by fork is 1 in the child process and 0 in -the parent. -The pid returned is the process-id of the other process. diff --git a/doc/em/even.c b/doc/em/even.c deleted file mode 100644 index 645d9b6b0..000000000 --- a/doc/em/even.c +++ /dev/null @@ -1,9 +0,0 @@ -main() { - register int l,j ; - - for ( j=0 ; (l=getchar()) != -1 ; j++ ) { - if ( j%16 == 15 ) printf("%3d\n",l&0377 ) ; - else printf("%3d ",l&0377 ) ; - } - printf("\n") ; -} diff --git a/doc/em/exam.e b/doc/em/exam.e deleted file mode 100644 index ff5e210b6..000000000 --- a/doc/em/exam.e +++ /dev/null @@ -1,178 +0,0 @@ - mes 2,2,2 ; wordsize 2, pointersize 2 - .1 - rom 't.p\000' ; the name of the source file - hol 552,-32768,0 ; externals and buf occupy 552 bytes - exp $sum ; sum can be called from other modules - pro $sum,2 ; procedure sum; 2 bytes local storage - lin 8 ; code from source line 8 - ldl 0 ; load two locals ( a and b ) - adi 2 ; add them - ret 2 ; return the result - end 2 ; end of procedure ( still two bytes local storage ) - .2 - rom 1,99,2 ; descriptor of array a[] - exp $test ; the compiler exports all level 0 procedures - pro $test,226 ; procedure test, 226 bytes local storage - .3 - rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in - .4 ; global storage - rom 0.5F8 ; same for 0.5 - mes 3,-226,2,2 ; compiler temporary not referenced indirect - mes 3,-24,2,0 ; the same is true for i, j, b and c in test - mes 3,-22,2,0 - mes 3,-4,2,0 - mes 3,-2,2,0 - mes 3,-20,8,0 ; and for x and y - mes 3,-12,8,0 - lin 20 ; maintain source line number - loc 1 - stl -4 ; j := 1 - lni ; was lin 21 prior to optimization - lol -4 - loc 3 - mli 2 - loc 6 - adi 2 - stl -2 ; i := 3 * j + 6 - lni ; was lin 22 prior to optimization - lae .3 - loi 8 - lal -12 - sti 8 ; x := 4.8 - lni ; was lin 23 prior to optimization - lal -12 - loi 8 - lae .4 - loi 8 - dvf 8 - lal -20 - sti 8 ; y := x / 0.5 - lni ; was lin 24 prior to optimization - loc 1 - stl -22 ; b := true - lni ; was lin 25 prior to optimization - loc 122 - stl -24 ; c := 'z' - lni ; was lin 26 prior to optimization - loc 1 - stl -2 ; for i:= 1 - 2 - lol -2 - dup 2 - mli 2 ; i*i - lal -224 - lol -2 - lae .2 - sar 2 ; a[i] := - lol -2 - loc 100 - beq *3 ; to 100 do - inl -2 ; increment i and loop - bra *2 - 3 - lin 27 - lol -4 - loc 27 - adi 2 ; j + 27 - sil 0 ; r.r1 := - lni ; was lin 28 prior to optimization - lol -22 ; b - lol 0 - stf 10 ; r.r3 := - lni ; was lin 29 prior to optimization - lal -20 - loi 16 - adf 8 ; x + y - lol 0 - adp 2 - sti 8 ; r.r2 := - lni ; was lin 30 prior to optimization - lal -224 - lol -4 - lae .2 - lar 2 ; a[j] - lil 0 ; r.r1 - cal $sum ; call now - asp 4 ; remove parameters from stack - lfr 2 ; get function result - stl -2 ; i := - 4 - lin 31 - lol -2 - zle *5 ; while i > 0 do - lol -4 - lil 0 - adi 2 - stl -4 ; j := j + r.r1 - del -2 ; i := i - 1 - bra *4 ; loop - 5 - lin 32 - lol 0 - stl -226 ; make copy of address of r - lol -22 - lol -226 - stf 10 ; r3 := b - lal -20 - loi 16 - adf 8 - lol -226 - adp 2 - sti 8 ; r2 := x + y - loc 0 - sil -226 ; r1 := 0 - lin 34 ; note the absence of the unnecessary jump - lae 22 ; address of output structure - lol -4 - cal $_wri ; write integer with default width - asp 4 ; pop parameters - lae 22 - lol -2 - loc 6 - cal $_wsi ; write integer width 6 - asp 6 - lae 22 - lal -12 - loi 8 - loc 9 - loc 3 - cal $_wrf ; write fixed format real, width 9, precision 3 - asp 14 - lae 22 - lol -22 - cal $_wrb ; write boolean, default width - asp 4 - lae 22 - cal $_wln ; writeln - asp 2 - ret 0 ; return, no result - end 226 - exp $_main - pro $_main,0 ; main program - .6 - con 2,-1,22 ; description of external files - .5 - rom 15.96F8 - fil .1 ; maintain source file name - lae .6 ; description of external files - lae 0 ; base of hol area to relocate buffer addresses - cal $_ini ; initialize files, etc... - asp 4 - lin 37 - lae .5 - loi 8 - lae 2 - sti 8 ; x := 15.9 - lni ; was lin 38 prior to optimization - loc 99 - ste 0 ; mi := 99 - lni ; was lin 39 prior to optimization - lae 10 ; address of r - cal $test - asp 2 - loc 0 ; normal exit - cal $_hlt ; cleanup and finish - asp 2 - end 0 - mes 4,40 ; length of source file is 40 lines - mes 5 ; reals were used diff --git a/doc/em/exam.p b/doc/em/exam.p deleted file mode 100644 index 5d2e985cc..000000000 --- a/doc/em/exam.p +++ /dev/null @@ -1,40 +0,0 @@ - program example(output); - {This program just demonstrates typical EM code.} - type rec = record r1: integer; r2:real; r3: boolean end; - var mi: integer; mx:real; r:rec; - - function sum(a,b:integer):integer; - begin - sum := a + b - end; - - procedure test(var r: rec); - label 1; - var i,j: integer; - x,y: real; - b: boolean; - c: char; - a: array[1..100] of integer; - - begin - j := 1; - i := 3 * j + 6; - x := 4.8; - y := x/0.5; - b := true; - c := 'z'; - for i:= 1 to 100 do a[i] := i * i; - r.r1 := j+27; - r.r3 := b; - r.r2 := x+y; - i := sum(r.r1, a[j]); - while i > 0 do begin j := j + r.r1; i := i - 1 end; - with r do begin r3 := b; r2 := x+y; r1 := 0 end; - goto 1; - 1: writeln(j, i:6, x:9:3, b) - end; {test} - begin {main program} - mx := 15.96; - mi := 99; - test(r) - end. diff --git a/doc/em/int/.distr b/doc/em/int/.distr deleted file mode 100644 index 8fb14befa..000000000 --- a/doc/em/int/.distr +++ /dev/null @@ -1,5 +0,0 @@ -proto.make -READ_ME -em.p -emdmp.c -mktables.c diff --git a/doc/em/int/Makefile b/doc/em/int/Makefile deleted file mode 100644 index 7895cb20e..000000000 --- a/doc/em/int/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -CFLAGS=-O -HOME=../../.. - -install \ -all: em emdmp tables - -tables: mktables $(HOME)/etc/ip_spec.t - mktables $(HOME)/etc/ip_spec.t tables - -mktables: mktables.c $(HOME)/h/em_spec.h $(HOME)/h/em_flag.h \ - $(HOME)/lib.bin/em_data.a $(HOME)/h/ip_spec.h - $(CC) -I$(HOME)/h -O -o mktables mktables.c $(HOME)/lib.bin/em_data.a - -em.out: em.p - apc -mint -O em.p >emerrs ; mv e.out em.out - -em: em.p - apc -O -i em.p >emerrs ; mv a.out em - -nem.p: em.p - sed -e '/maxadr = t16/s//maxadr =t15/' -e '/maxdata = 8191; /s//maxdata = 14335;/' -e '/ adr=.*long/s// adr= 0..maxadr/' nem.p - -nem: nem.p - apc -O -i nem.p >emerrs ; mv a.out nem - -emdmp: emdmp.c - $(CC) -I$(HOME)/h -I$(HOME)/config -o emdmp -O emdmp.c - -cmp: - -pr: - @pr em.p mktables.c emdmp.c diff --git a/doc/em/int/READ_ME b/doc/em/int/READ_ME deleted file mode 100644 index bd14ade37..000000000 --- a/doc/em/int/READ_ME +++ /dev/null @@ -1,5 +0,0 @@ -This interpreter is meant for inclusion in the EM manual. -Although slow, it showed decent behaviour on several tests. -The only monitor calls implemented are exit, read(untested), -write and ioctl - just reurns the correct code for telling it's -a terminal - diff --git a/doc/em/int/em.p b/doc/em/int/em.p deleted file mode 100644 index 1a6cbcfc9..000000000 --- a/doc/em/int/em.p +++ /dev/null @@ -1,1768 +0,0 @@ -# -{ This is an interpreter for EM. It serves as a specification for the - EM machine. This interpreter must run on a machine which supports - arithmetic with words and memory offsets. - - Certain aspects are over specified. In particular: - - 1. The representation of an address on the stack need not be the - numerical value of the memory location. - - 2. The state of the stack is not defined after a trap has aborted - an instruction in the middle. For example, it is officially un- - defined whether the second operand of an ADD instruction has - been popped or not if the first one is undefined ( -32768 or - unsigned 32768). - - 3. The memory layout is implementation dependent. Only the most - basic checks are performed whenever memory is accessed. - - 4. The representation of an integer or set on the stack is not fixed - in bit order. - - 5. The format and existence of the procedure descriptors depends on - the implementation. - - 6. The result of the compare operators CMI etc. are -1, 0 and 1 - here, but other negative and positive values will do and they - need not be the same each time. - - 7. The shift count for SHL, SHR, ROL and ROR must be in the range 0 - to object size in bits - 1. The effect of a count not in this - range is undefined. - - 8. This interpreter does not work for double word integers, although - any decent EM implementation will include double word arithmetic. - } - - - - - - - - - - - - - - - - - - - - - - -{$i256} -{$d+} -#ifndef DOC -program em(tables,prog,core,input,output); -#else -program em(tables,prog,input,output); -#endif - - -label 8888,9999; - -const - t15 = 32768; { 2**15 } - t15m1 = 32767; { 2**15 -1 } - t16 = 65536; { 2**16 } - t16m1 = 65535; { 2**16 -1 } - t31m1 = 2147483647; { 2**31 -1 } - - { constants indicating the size of words and addresses } - wsize = 2; { number of bytes in a word } - asize = 2; { number of bytes in an address } - fsize = 4; { number of bytes in a floating point number } - maxret =4; { number of words in the return value area } - - signbit = t15; { the power of two indicating the sign bit } - negoff = t16; { the next power of two } - maxsint = t15m1; { the maximum signed integer } - maxuint = t16m1; { the maximum unsigned integer } - maxdbl = t31m1; { the maximum double signed integer } - maxadr = t16m1; { the maximum address } - maxoffs = t15m1; { the maximum offset from an address } - maxbitnr= 15; { the number of the highest bit } - - lineadr = 0; { address of the line number } - fileadr = 4; { address of the file name } - maxcode = 8191; { highest byte in code address space } - maxdata = 8191; { highest byte in data address space } - - { format of status save area } - statd = 4; { how far is static link from lb } - dynd = 2; { how far is dynamic link from lb } - reta = 0; { how far is the return address from lb } - savsize = 4; { size of save area in bytes } - - { procedure descriptor format } - pdlocs = 0; { offset for size of local variables in bytes } - pdbase = asize; { offset for the procedure base } - pdsize = 4; { size of procedure descriptor in bytes = 2*asize } - - { header words } - NTEXT = 1; - NDATA = 2; - NPROC = 3; - ENTRY = 4; - NLINE = 5; - SZDATA = 6; - - escape1 = 254; { escape to secondary opcodes } - escape2 = 255; { escape to tertiary opcodes } - undef = signbit; { the range of integers is -32767 to +32767 } - - { error codes } - EARRAY = 0; ERANGE = 1; ESET = 2; EIOVFL = 3; - EFOVFL = 4; EFUNFL = 5; EIDIVZ = 6; EFDIVZ = 7; - EIUND = 8; EFUND = 9; ECONV = 10; ESTACK = 16; - EHEAP = 17; EILLINS = 18; EODDZ = 19; ECASE = 20; - EMEMFLT = 21; EBADPTR = 22; EBADPC = 23; EBADLAE = 24; - EBADMON = 25; EBADLIN = 26; EBADGTO = 27; -{ -.ne 20 -.bp -----------------------------------------------------------------------------} -{ Declarations } -{---------------------------------------------------------------------------} - -type - bitval= 0..1; { one bit } - bitnr= 0..maxbitnr; { bits in machine words are numbered 0 to 15 } - byte= 0..255; { memory is an array of bytes } - adr= {0..maxadr} long; { the range of addresses } - word= {0..maxuint} long;{ the range of unsigned integers } - offs= -maxoffs..maxoffs; { the range of signed offsets from addresses } - size= 0..maxoffs; { the range of sizes is the positive offsets } - sword= {-signbit..maxsint} long; { the range of signed integers } - full= {-maxuint..maxuint} long; { intermediate results need this range } - double={-maxdbl..maxdbl} long; { double precision range } - bftype= (andf,iorf,xorf); { tells which boolean operator needed } - insclass=(prim,second,tert); { tells which opcode table is in use } - instype=(implic,explic); { does opcode have implicit or explicit operand } - iflags= (mini,short,sbit,wbit,zbit,ibit); - ifset= set of iflags; - - mnem = ( NON, - AAR, ADF, ADI, ADP, ADS, ADU,XAND, ASP, ASS, BEQ, - BGE, BGT, BLE, BLM, BLS, BLT, BNE, BRA, CAI, CAL, - CFF, CFI, CFU, CIF, CII, CIU, CMF, CMI, CMP, CMS, - CMU, COM, CSA, CSB, CUF, CUI, CUU, DCH, DEC, DEE, - DEL, DUP, DUS, DVF, DVI, DVU, EXG, FEF, FIF, FIL, - GTO, INC, INE, INL, INN, IOR, LAE, LAL, LAR, LDC, - LDE, LDF, LDL, LFR, LIL, LIM, LIN, LNI, LOC, LOE, - LOF, LOI, LOL, LOR, LOS, LPB, LPI, LXA, LXL, MLF, - MLI, MLU, MON, NGF, NGI, NOP, RCK, RET, RMI, RMU, - ROL, ROR, RTT, SAR, SBF, SBI, SBS, SBU, SDE, SDF, - SDL,XSET, SIG, SIL, SIM, SLI, SLU, SRI, SRU, STE, - STF, STI, STL, STR, STS, TEQ, TGE, TGT, TLE, TLT, - TNE, TRP, XOR, ZEQ, ZER, ZGE, ZGT, ZLE, ZLT, ZNE, - ZRE, ZRF, ZRL); - - dispatch = record - iflag: ifset; - instr: mnem; - case instype of - implic: (implicit:sword); - explic: (ilength:byte); - end; - - -var - code: packed array[0..maxcode] of byte; { code space } - data: packed array[0..maxdata] of byte; { data space } - retarea: array[1..maxret ] of word; { return area } - pc,lb,sp,hp,pd: adr; { internal machine registers } - i: integer; { integer scratch variable } - s,t :word; { scratch variables } - sz:size; { scratch variables } - ss,st: sword; { scratch variables } - k :double; { scratch variables } - j:size; { scratch variable used as index } - a,b:adr; { scratch variable used for addresses } - dt,ds:double; { scratch variables for double precision } - rt,rs,x,y:real; { scratch variables for real } - found:boolean; { scratch } - opcode: byte; { holds the opcode during execution } - iclass: insclass; { true for escaped opcodes } - dispat: array[insclass,byte] of dispatch; - retsize:size; { holds size of last LFR } - insr: mnem; { holds the instruction number } - halted: boolean; { normally false } - exitstatus:word; { parameter of MON 1 } - ignmask:word; { ignore mask for traps } - uerrorproc:adr; { number of user defined error procedure } - intrap:boolean; { Set when executing trap(), to catch recursive calls} - trapval:byte; { Set to number of last trap } - header: array[1..8] of adr; - - tables: text; { description of EM instructions } - prog: file of byte; { program and initialized data } -#ifndef DOC - core: file of byte; { post mortem dump } -#endif -{ -.ne 20 -.sp 5 -{---------------------------------------------------------------------------} -{ Various check routines } -{---------------------------------------------------------------------------} - -{ Only the most basic checks are performed. These routines are inherently - implementation dependent. } - -procedure trap(n:byte); forward; -#ifndef DOC -procedure writecore(n:byte); forward; -#endif - -procedure memadr(a:adr); -begin if (a>maxdata) or ((a=hp)) then trap(EMEMFLT) end; - -procedure wordadr(a:adr); -begin memadr(a); if (a mod wsize<>0) then trap(EBADPTR) end; - -procedure chkadr(a:adr; s:size); -begin memadr(a); memadr(a+s-1); { assumption: size is ok } - if s0 then trap(EBADPTR) end - else if a mod wsize<>0 then trap(EBADPTR) -end; - -procedure newpc(a:double); -begin if (a<0) or (a>maxcode) then trap(EBADPC); pc:=a end; - -procedure newsp(a:adr); -begin if (a>lb) or (a0) then trap(ESTACK); sp:=a end; - -procedure newlb(a:adr); -begin if (a0) then trap(ESTACK); lb:=a end; - -procedure newhp(a:adr); -begin if (a>sp) or (a>maxdata+1) or (a mod wsize<>0) - then trap(EHEAP) - else hp:=a -end; - -function argc(a:double):sword; -begin if (a<-signbit) or (a>maxsint) then trap(EILLINS); argc:=a end; - -function argd(a:double):double; -begin if (a<-maxdbl) or (a>maxdbl) then trap(EILLINS); argd:=a end; - -function argl(a:double):offs; -begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argl:=a end; - -function argg(k:double):adr; -begin if (k<0) or (k>maxadr) then trap(EILLINS); argg:=k end; - -function argf(a:double):offs; -begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argf:=a end; - -function argn(a:double):word; -begin if (a<0) or (a>maxuint) then trap(EILLINS); argn:=a end; - -function args(a:double):size; -begin if (a<=0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - args:=a ; -end; - -function argz(a:double):size; -begin if (a<0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - argz:=a ; -end; - -function argo(a:double):size; -begin if (a<=0) or (a>maxoffs) - then trap(EODDZ) - else if (a mod wsize<>0) and (wsize mod a<>0) then trap(EODDZ); - argo:=a ; -end; - -function argw(a:double):size; -begin if (a<=0) or (a>maxoffs) or (a>maxuint) - then trap(EODDZ) - else if (a mod wsize)<>0 then trap(EODDZ); - argw:=a ; -end; - -function argp(a:double):size; -begin if (a<0) or (a>=header[NPROC]) then trap(EILLINS); argp:=a end; - -function argr(a:double):word; -begin if (a<0) or (a>2) then trap(EILLINS); argr:=a end; - -procedure argwf(s:double); -begin if argw(s)<>fsize then trap(EILLINS) end; - -function szindex(s:double):integer; -begin s:=argw(s); if (s mod wsize <> 0) or (s>2*wsize) then trap(EILLINS); - szindex:=s div wsize -end; - -function locadr(l:double):adr; -begin l:=argl(l); if l<0 then locadr:=lb+l else locadr:=lb+l+savsize end; - -function signwd(w:word):sword; -begin if w = undef then trap(EIUND); - if w >= signbit then signwd:=w-negoff else signwd:=w -end; - -function dosign(w:word):sword; -begin if w >= signbit then dosign:=w-negoff else dosign:=w end; - -function unsign(w:sword):word; -begin if w<0 then unsign:=w+negoff else unsign:=w end; - -function chopw(dw:double):word; -begin chopw:=dw mod negoff end; - -function fitsw(w:full;trapno:byte):word; -{ checks whether value fits in signed word, returns unsigned representation} -begin - if (w>maxsint) or (w<-signbit) then - begin trap(trapno); - if w<0 then fitsw:=negoff- (-w)mod negoff - else fitsw:=w mod negoff; - end - else fitsw:=unsign(w) -end; - -function fitd(w:full):double; -begin - if abs(w) > maxdbl then trap(ECONV); - fitd:=w -end; - -{ -.ne 20 -.sp 5 -{---------------------------------------------------------------------------} -{ Memory access routines } -{---------------------------------------------------------------------------} - -{ memw returns a machine word as an unsigned integer - memb returns a single byte as a positive integer: 0 <= memb <= 255 - mems(a,s) fetches an object smaller than a word and returns a word - store(a,v) stores the word v at machine address a - storea(a,v) stores the address v at machine address a - storeb(a,b) stores the byte b at machine address a - stores(a,s,v) stores the s least significant bytes of a word at address a - memi returns an offset from the instruction space - Note that the procedure descriptors are part of instruction space. - nextpc returns the next byte addressed by pc, incrementing pc - - lino changes the line number word. - filna changes the pointer to the file name. - - All routines check to make sure the address is within range and valid for - the size of the object. If an addressing error is found, a trap occurs. -} - - -function memw(a:adr):word; -var b:word; i:integer; -begin wordadr(a); b:=0; - for i:=wsize-1 downto 0 do b:=256*b + data[a+i] ; - memw:=b -end; - -function memd(a:adr):double; { Always signed } -var b:double; i:integer; -begin wordadr(a); b:=data[a+2*wsize-1]; - if b>=128 then b:=b-256; - for i:=2*wsize-2 downto 0 do b:=256*b + data[a+i] ; - memd:=b -end; - -function mema(a:adr):adr; -var b:adr; i:integer; -begin wordadr(a); b:=0; - for i:=asize-1 downto 0 do b:=256*b + data[a+i] ; - mema:=b -end; - -function mems(a:adr;s:size):word; -var i:integer; b:word; -begin chkadr(a,s); b:=0; for i:=1 to s do b:=b*256+data[a+s-i]; mems:=b end; - -function memb(a:adr):byte; -begin memadr(a); memb:=data[a] end; - -procedure store(a:adr; x:word); -var i:integer; -begin wordadr(a); - for i:=0 to wsize-1 do - begin data[a+i]:=x mod 256; x:=x div 256 end -end; - -procedure storea(a:adr; x:adr); -var i:integer; -begin wordadr(a); - for i:=0 to asize-1 do - begin data[a+i]:=x mod 256; x:=x div 256 end -end; - -procedure stores(a:adr;s:size;v:word); -var i:integer; -begin chkadr(a,s); - for i:=0 to s-1 do begin data[a+i]:=v mod 256; v:=v div 256 end; -end; - -procedure storeb(a:adr; b:byte); -begin memadr(a); data[a]:=b end; - -function memi(a:adr):adr; -var b:adr; i:integer; -begin if (a mod wsize<>0) or (a+asize-1>maxcode) then trap(EBADPTR); b:=0; - for i:=asize-1 downto 0 do b:=256*b + code[a+i] ; - memi:=b -end; - -function nextpc:byte; -begin if pc>=pd then trap(EBADPC); nextpc:=code[pc]; newpc(pc+1) end; - -procedure lino(w:word); -begin store(lineadr,w) end; - -procedure filna(a:adr); -begin storea(fileadr,a) end; -{ -.ne 20 -.sp 5 -{---------------------------------------------------------------------------} -{ Stack Manipulation Routines } -{---------------------------------------------------------------------------} - -{ push puts a word on the stack - pushsw takes a signed one word integer and pushes it on the stack - pop removes a machine word from the stack and delivers it as a word - popsw removes a machine word from the stack and delivers a signed integer - pusha pushes an address on the stack - popa removes a machine word from the stack and delivers it as an address - pushd pushes a double precision number on the stack - popd removes two machine words and returns a double precision integer - pushr pushes a float (floating point) number on the stack - popr removes several machine words and returns a float number - pushx puts an object of arbitrary size on the stack - popx removes an object of arbitrary size - } - -procedure push(x:word); -begin newsp(sp-wsize); store(sp,x) end; - -procedure pushsw(x:sword); -begin newsp(sp-wsize); store(sp,unsign(x)) end; - -function pop:word; -begin pop:=memw(sp); newsp(sp+wsize) end; - -function popsw:sword; -begin popsw:=signwd(pop) end; - -procedure pusha(x:adr); -begin newsp(sp-asize); storea(sp,x) end; - -function popa:adr; -begin popa:=mema(sp); newsp(sp+asize) end; - -procedure pushd(y:double); -begin { push double integer onto the stack } newsp(sp-2*wsize) end; - -function popd:double; -begin { pop double integer from the stack } newsp(sp+2*wsize); popd:=0 end; - -procedure pushr(z:real); -begin { Push a float onto the stack } newsp(sp-fsize) end; - -function popr:real; -begin { pop float from the stack } newsp(sp+fsize); popr:=0.0 end; - -procedure pushx(objsize:size; a:adr); -var i:integer; -begin - if objsize= 0 then w := w div 2 else w := (w-1) div 2 end; - -procedure suright(var w:word); { 1 bit right shift without sign extension } -begin w := w div 2 end; - -procedure sdright(var d:double); { 1 bit right shift } -begin { shift two word signed integer } end; - -procedure rleft(var w:word); { 1 bit left rotate } -begin if w >= t15 - then w:=(w-t15)*2 + 1 - else w:=w*2 -end; - -procedure rright(var w:word); { 1 bit right rotate } -begin if w mod 2 = 1 - then w:=w div 2 + t15 - else w:=w div 2 -end; - -function sextend(w:word;s:size):word; -var i:size; -begin - for i:=1 to (wsize-s)*8 do rleft(w); - for i:=1 to (wsize-s)*8 do sright(w); - sextend:=w; -end; - -function bit(b:bitnr; w:word):bitval; { return bit b of the word w } -var i:bitnr; -begin for i:= 1 to b do rright(w); bit:= w mod 2 end; - -function bf(ty:bftype; w1,w2:word):word; { return boolean fcn of 2 words } -var i:bitnr; j:word; -begin j:=0; - for i:= maxbitnr downto 0 do - begin j := 2*j; - case ty of - andf: if bit(i,w1)+bit(i,w2) = 2 then j:=j+1; - iorf: if bit(i,w1)+bit(i,w2) > 0 then j:=j+1; - xorf: if bit(i,w1)+bit(i,w2) = 1 then j:=j+1 - end - end; - bf:=j -end; - -{---------------------------------------------------------------------------} -{ Array indexing -{---------------------------------------------------------------------------} - -function arraycalc(c:adr):adr; { subscript calculation } -var j:full; objsize:size; a:adr; -begin j:= popsw - signwd(memw(c)); - if (j<0) or (j>memw(c+wsize)) then trap(EARRAY); - objsize := argo(memw(c+wsize+wsize)); - a := j*objsize+popa; chkadr(a,objsize); - arraycalc:=a -end; -{ -.ne 20 -.sp 5 -{---------------------------------------------------------------------------} -{ Double and Real Arithmetic } -{---------------------------------------------------------------------------} - -{ All routines for doubles and floats are dummy routines, since the format of - doubles and floats is not defined in EM. -} - -function doadi(ds,dt:double):double; -begin { add two doubles } doadi:=0 end; - -function dosbi(ds,dt:double):double; -begin { subtract two doubles } dosbi:=0 end; - -function domli(ds,dt:double):double; -begin { multiply two doubles } domli:=0 end; - -function dodvi(ds,dt:double):double; -begin { divide two doubles } dodvi:=0 end; - -function dormi(ds,dt:double):double; -begin { modulo of two doubles } dormi:=0 end; - -function dongi(ds:double):double; -begin { negative of a double } dongi:=0 end; - -function doadf(x,y:real):real; -begin { add two floats } doadf:=0.0 end; - -function dosbf(x,y:real):real; -begin { subtract two floats } dosbf:=0.0 end; - -function domlf(x,y:real):real; -begin { multiply two floats } domlf:=0.0 end; - -function dodvf(x,y:real):real; -begin { divide two floats } dodvf:=0.0 end; - -function dongf(x:real):real; -begin { negate a float } dongf:=0.0 end; - -procedure dofif(x,y:real;var intpart,fraction:real); -begin { dismember x*y into integer and fractional parts } - intpart:=0.0; { integer part of x*y, same sign as x*y } - fraction:=0.0; - { fractional part of x*y, 0<=abs(fraction)<1 and same sign as x*y } -end; - -procedure dofef(x:real;var mantissa:real;var exponent:sword); -begin { dismember x into mantissa and exponent parts } - mantissa:=0.0; { mantissa of x , >= 1/2 and <1 } - exponent:=0; { base 2 exponent of x } -end; - -{ -.ne 20 -.sp 5 -.bp -{---------------------------------------------------------------------------} -{ Trap and Call } -{---------------------------------------------------------------------------} - -procedure call(p:adr); { Perform the call } -begin - pusha(lb);pusha(pc); - newlb(sp);newsp(sp - memi(pd + pdsize*p + pdlocs)); - newpc(memi(pd + pdsize*p+ pdbase)) -end; - -procedure dotrap(n:byte); -var i:size; -begin - if (uerrorproc=0) or intrap then - begin - if intrap then - writeln('Recursive trap, first trap number was ', trapval:1); - writeln('Error ', n:1); - writeln('With',ord(insr):4,' arg ',k:1); -#ifndef DOC - writecore(n); -#endif - goto 9999 - end; - { Deposit all interpreter variables that need to be saved on - the stack. This includes all scratch variables that can - be in use at the moment and ( not possible in this interpreter ) - the internal address of the interpreter where the error occurred. - This would make it possible to execute an RTT instruction totally - transparent to the user program. - It can, for example, occur within an ADD instruction that both - operands are undefined and that the result overflows. - Although this will generate 3 error traps it must be possible - to ignore them all. - - } - intrap:=true; trapval:=n; - for i:=retsize div wsize downto 1 do push(retarea[i]); - push(retsize); { saved return area } - pusha(mema(fileadr)); { saved current file name pointer } - push(memw(lineadr)); { saved line number } - push(n); { push error number } - a:=argp(uerrorproc); - uerrorproc:=0; { reset signal } - call(a); { call the routine } - intrap:=false; { Do not catch recursive traps anymore } - goto 8888; { reenter main loop } -end; - -procedure trap; -{ This routine is invoked for overflow, and other run time errors. - For non-fatal errors, trap returns to the calling routine -} -begin - if n>=16 then dotrap(n) else if bit(n,ignmask)=0 then dotrap(n); -end; - -procedure dortt; -{ The restoration of file address and line number is not essential. - The restoration of the return save area is. -} -var i:size; - n:word; -begin - newsp(lb); lb:=maxdata+1 ; { to circumvent ESTACK for the popa + pop } - newpc(popa); newlb(popa); { So far a plain RET 0 } - n:=pop; if (n>=16) and (n<64) then - begin -#ifndef DOC - writecore(n); -#endif - goto 9999 - end; - lino(pop); filna(popa); retsize:=pop; - for i:=1 to retsize div wsize do retarea[i]:=pop ; -end; -{ -.sp 5 -{---------------------------------------------------------------------------} -{ monitor calls } -{---------------------------------------------------------------------------} - - -procedure domon(entry:word); -var index: 1..63; - dummy: double; - count,rwptr: adr; - token: byte; - i: integer; -begin - if (entry<=0) or (entry>63) then entry:=63 ; - index:=entry; - case index of - 1: begin { exit } exitstatus:=pop; halted:=true end; - 3: begin { read } dummy:=pop; { All input is from stdin } - rwptr:=popa; count:=popa; - i:=0 ; - while (not eof(input)) and (i0 then - begin i:=20; found:=false; - while (i<>0) and not found do - begin c:=memb(a); a:=a+1; found:=true; i:=i-1; - if (c>=48) and (c<=57) then - begin found:=false; write(chr(ord('0')+c-48)) end; - if (c>=65) and (c<=90) then - begin found:=false; write(chr(ord('A')+c-65)) end; - if (c>=97) and (c<=122) then - begin found:=false; write(chr(ord('a')+c-97)) end; - end; - end; - writeln; -end; - -#ifndef DOC -{---------------------------------------------------------------------------} -{ Post Mortem Dump } -{ } -{This a not a part of the machine definition, but an ad hoc debugging method} -{---------------------------------------------------------------------------} - -procedure writecore; -var ncoreb,i:integer; - -procedure wrbyte(b:byte); -begin write(core,b); ncoreb:=ncoreb+1 end; - -procedure wradr(a:adr); -var i:integer; -begin for i:=1 to asize do begin wrbyte(a mod 256); a:=a div 256 end end; - -begin - rewrite(core); ncoreb:=0; - wrbyte(173); wrbyte(16); { Magic } - wrbyte(3);wrbyte(0); { Version } - wrbyte(wsize);wrbyte(0); { Wordsize } - wrbyte(asize);wrbyte(0); { Address size } - wradr(0); { Text size in dump } - wradr(maxdata+1); { Data size in dump } - wradr(ignmask); - wradr(uerrorproc); - wradr(n); { Cause } - wradr(pc); wradr(sp); wradr(lb); wradr(hp); wradr(pd); wradr(0){pb} ; - while ncoreb<>512 do wradr(0); { Fill } - for i:=0 to maxdata do wrbyte(data[i]) -end; - -#endif - -procedure initialize; { start the ball rolling } -{ This is not part of the machine definition } -var cset:set of char; - f:ifset; - iclass:insclass; - insno:byte; - nops:integer; - opcode:byte; - i,j,n:integer; - wtemp:sword; - count:integer; - repc:adr; - nexta,firsta:adr; - elem:byte; - amount,ofst:size; - c:char; - - function readb(n:integer):double; - var b:byte; - begin read(prog,b); if n>1 then readb:=readb(n-1)*256+b else readb:=b end; - - function readbyte:byte; - begin readbyte:=readb(1) end; - - function readword:word; - begin readword:=readb(wsize) end; - - function readadr:adr; - begin readadr:=readb(asize) end; - - function ifind(ordinal:byte):mnem; - var loopvar:mnem; - found:boolean; - begin ifind:=NON; - loopvar:=insr; found:=false; - repeat - if ordinal=ord(loopvar) then - begin found:=true; ifind:=loopvar end; - if loopvar<>ZRL then loopvar:=succ(loopvar) else loopvar:=NON; - until found or (loopvar=insr) ; - end; - - procedure readhdr; - type hdrw=0..32767 ; { 16 bit header words } - var hdr: hdrw; - i: integer; - begin - for i:=0 to 7 do - begin hdr:=readb(2); - case i of - 0: if hdr<>3757 then { 07255 } - begin writeln('Not an em load file'); halt end; - 2: if hdr<>0 then - begin writeln('Unsolved references'); halt end; - 3: if hdr<>3 then - begin writeln('Incorrect load file version'); halt end; - 4: if hdr<>wsize then - begin writeln('Incorrect word size'); halt end; - 5: if hdr<>asize then - begin writeln('Incorrect pointer size'); halt end; - 1,6,7:; - end - end - end; - - procedure noinit; - begin writeln('Illegal initialization'); halt end; - - procedure readint(a:adr;s:size); - var i:size; - begin { construct integer out of byte sequence } - for i:=1 to s do { construct the value and initialize at a } - begin storeb(a,readbyte); a:=a+1 end - end; - - procedure readuns(a:adr;s:size); - begin { construct unsigned out of byte sequence } - readint(a,s) { identical to readint } - end; - - procedure readfloat(a:adr;s:size); - var i:size; b:byte; - begin { construct float out of string} - if (s<>4) and (s<>8) then noinit; i:=0; - repeat { eat the bytes, construct the value and intialize at a } - b:=readbyte; i:=i+1; - until b=0 ; - end; - -begin - halted:=false; - exitstatus:=undef; - uerrorproc:=0; intrap:=false; - - { initialize tables } - for i:=0 to maxcode do code[i]:=0; - for i:=0 to maxdata do data[i]:=0; - for iclass:=prim to tert do - for i:=0 to 255 do - with dispat[iclass][i] do - begin instr:=NON; iflag:=[zbit] end; - - { read instruction table file. see appendix B } - { The table read here is a simple transformation of the table on page xx } - { - instruction names were transformed to numbers } - { - the '-' flag was transformed to an 'i' flag for 'w' type instructions } - { - the 'S' flag was added for instructions having signed operands } - reset(tables); - insr:=NON; - repeat - read(tables,insno) ; cset:=[]; f:=[]; - insr:=ifind(insno); - if insr=NON then begin writeln('Incorrect table'); halt end; - repeat read(tables,c) until c<>' ' ; - repeat - cset:=cset+[c]; - read(tables,c) - until c=' ' ; - if 'm' in cset then f:=f+[mini]; - if 's' in cset then f:=f+[short]; - if '-' in cset then f:=f+[zbit]; - if 'i' in cset then f:=f+[ibit]; - if 'S' in cset then f:=f+[sbit]; - if 'w' in cset then f:=f+[wbit]; - if (mini in f) or (short in f) then read(tables,nops) else nops:=1 ; - readln(tables,opcode); - if ('4' in cset) or ('8' in cset) then - begin iclass:=tert end - else if 'e' in cset then - begin iclass:=second end - else iclass:=prim; - for i:=0 to nops-1 do - begin - with dispat[iclass,opcode+i] do - begin - iflag:=f; instr:=insr; - if '2' in cset then ilength:=2 - else if 'u' in cset then ilength:=2 - else if '4' in cset then ilength:=4 - else if '8' in cset then ilength:=8 - else if (mini in f) or (short in f) then - begin - if 'N' in cset then wtemp:=-1-i else wtemp:=i ; - if 'o' in cset then wtemp:=wtemp+1 ; - if short in f then wtemp:=wtemp*256 ; - implicit:=wtemp - end - end - end - until eof(tables); - - { read in program text, data and procedure descriptors } - reset(prog); - readhdr; { verify first header } - for i:=1 to 8 do header[i]:=readadr; { read second header } - hp:=maxdata+1; sp:=maxdata+1; lino(0); - { read program text } - if header[NTEXT]+header[NPROC]*pdsize>maxcode then - begin writeln('Text size too large'); halt end; - if header[SZDATA]>maxdata then - begin writeln('Data size too large'); halt end; - for i:=0 to header[NTEXT]-1 do code[i]:=readbyte; - { read data blocks } - nexta:=0; - for i:=1 to header[NDATA] do - begin - n:=readbyte; - if n<>0 then - begin - elem:=readbyte; firsta:=nexta; - case n of - 1: { uninitialized words } - for j:=1 to elem do - begin store(nexta,undef); nexta:=nexta+wsize end; - 2: { initialized bytes } - for j:=1 to elem do - begin storeb(nexta,readbyte); nexta:=nexta+1 end; - 3: { initialized words } - for j:=1 to elem do - begin store(nexta,readword); nexta:=nexta+wsize end; - 4,5: { instruction and data pointers } - for j:=1 to elem do - begin storea(nexta,readadr); nexta:=nexta+asize end; - 6: { signed integers } - begin readint(nexta,elem); nexta:=nexta+elem end; - 7: { unsigned integers } - begin readuns(nexta,elem); nexta:=nexta+elem end; - 8: { floating point numbers } - begin readfloat(nexta,elem); nexta:=nexta+elem end; - end - end - else - begin - repc:=readadr; - amount:=nexta-firsta; - for count:=1 to repc do - begin - for ofst:=0 to amount-1 do data[nexta+ofst]:=data[firsta+ofst]; - nexta:=nexta+amount; - end - end - end; - if header[SZDATA]<>nexta then writeln('Data initialization error'); - hp:=nexta; - { read descriptor table } - pd:=header[NTEXT]; - for i:=1 to header[NPROC]*pdsize do code[pd+i-1]:=readbyte; - { call the entry point routine } - ignmask:=0; { catch all traps, higher numbered traps cannot be ignored} - retsize:=0; - lb:=maxdata; { illegal dynamic link } - pc:=maxcode; { illegal return address } - push(0); a:=sp; { No environment } - push(0); b:=sp; { No args } - pusha(a); { envp } - pusha(b); { argv } - push(0); { argc } - call(argp(header[ENTRY])); -end; -{ -.bp -{---------------------------------------------------------------------------} -{ MAIN LOOP OF THE INTERPRETER } -{---------------------------------------------------------------------------} -{ It should be noted that the interpreter (microprogram) for an EM - machine can be written in two fundamentally different ways: (1) the - instruction operands are fetched in the main loop, or (2) the in- - struction operands are fetched after the 256 way branch, by the exe- - cution routines themselves. In this interpreter, method (1) is used - to simplify the description of execution routines. The dispatch - table dispat is used to determine how the operand is encoded. There - are 4 possibilities: - - 0. There is no operand - 1. The operand and instruction are together in 1 byte (mini) - 2. The operand is one byte long and follows the opcode byte(s) - 3. The operand is two bytes long and follows the opcode byte(s) - 4. The operand is four bytes long and follows the opcode byte(s) - - In this interpreter, the main loop determines the operand type, - fetches it, and leaves it in the global variable k for the execution - routines to use. Consequently, instructions such as LOL, which use - three different formats, need only be described once in the body of - the interpreter. - However, for a production interpreter, or a hardware EM - machine, it is probably better to use method (2), i.e. to let the - execution routines themselves fetch their own operands. The reason - for this is that each opcode uniquely determines the operand format, - so no table lookup in the dispatch table is needed. The whole table - is not needed. Method (2) therefore executes much faster. - However, separate execution routines will be needed for LOL with - a one byte offset, and LOL with a two byte offset. It is to avoid - this additional clutter that method (1) is used here. In a produc- - tion interpreter, it is envisioned that the main loop will fetch the - next instruction byte, and use it as an index into a 256 word table - to find the address of the interpreter routine to jump to. The - routine jumped to will begin by fetching its operand, if any, - without any table lookup, since it knows which format to expect. - After doing the work, it returns to the main loop by jumping in- - directly to a register that contains the address of the main loop. - A slight variation on this idea is to have the register contain - the address of the branch table, rather than the address of the main - loop. - Another issue is whether the execution routines for LOL 0, LOL - 2, LOL 4, etc. should all be have distinct execution routines. Doing - so provides for the maximum speed, since the operand is implicit in - the routine itself. The disadvantage is that many nearly identical - execution routines will then be needed. Another way of doing it is - to keep the instruction byte fetched from memory (LOL 0, LOL 2, LOL - 4, etc.) in some register, and have all the LOL mini format instruc- - tions branch to a common routine. This routine can then determine - the operand by subtracting the code for LOL 0 from the register, - leaving the true operand in the register (as a word quantity of - course). This method makes the interpreter smaller, but is a bit - slower. -.bp - To make this important point a little clearer, consider how a - production interpreter for the PDP-11 might appear. Let us assume the - following opcodes have been assigned: - - 31: LOL -2 (2 bytes, i.e. next word) - 32: LOL -4 - 33: LOL -6 - 34: LOL b (format with a one byte offset) - 35: LOL w (format with a one word, i.e. two byte offset) - - Further assume that each of the 5 opcodes will have its own execution - routine, i.e. we are making a tradeoff in favor of fast execution and - a slightly larger interpreter. - Register r5 is the em program counter. - Register r4 is the em LB register - Register r3 is the em SP register (the stack grows toward low core) - Register r2 contains the interpreter address of the main loop - - The main loop looks like this: - - movb (r5)+,r0 /fetch the opcode into r0 and increment r5 - asl r0 /shift r0 left 1 bit. Now: -256<=r0<=+254 - jmp *table(r0) /jump to execution routine - - Notice that no operand fetching has been done. The execution routines for - the 5 sample instructions given above might be as follows: - - lol2: mov -2(r4),-(sp) /push local -2 onto stack - jmp (r2) /go back to main loop - lol4: mov -4(r4),-(sp) /push local -4 onto stack - jmp (r2) /go back to main loop - lol6: mov -6(r4),-(sp) /push local -6 onto stack - jmp (r2) /go back to main loop - lolb: mov $177400,r0 /prepare to fetch the 1 byte operand - bisb (r5)+,r0 /operand is now in r0 - asl r0 /r0 is now offset from LB in bytes, not words - add r4,r0 /r0 is now address of the needed local - mov (r0),-(sp) /push the local onto the stack - jmp (r2) - lolw: clr r0 /prepare to fetch the 2 byte operand - bisb (r5)+,r0 /fetch high order byte first !!! - swab r0 /insert high order byte in place - bisb (r5)+,r0 /insert low order byte in place - asl r0 /convert offset to bytes, from words - add r4,r0 /r0 is now address of needed local - mov (r0),-(sp) /stack the local - jmp (r2) /done - - The important thing to notice is where and how the operand fetch occurred: - lol2, lol4, and lol6, (the minis) have implicit operands - lolb knew it had to fetch one byte, and did so without any table lookup - lolw knew it had to fetch a word, and did so, high order byte first } -{ -.bp -.sp 4 -{---------------------------------------------------------------------------} -{ Routines for the individual instructions } -{---------------------------------------------------------------------------} -procedure loadops; -var j:integer; -begin - case insr of - { LOAD GROUP } - LDC: pushd(argd(k)); - LOC: pushsw(argc(k)); - LOL: push(memw(locadr(k))); - LOE: push(memw(argg(k))); - LIL: push(memw(mema(locadr(k)))); - LOF: push(memw(popa+argf(k))); - LAL: pusha(locadr(k)); - LAE: pusha(argg(k)); - LXL: begin a:=lb; for j:=1 to argn(k) do a:=mema(a+savsize); pusha(a) end; - LXA: begin a:=lb; - for j:=1 to argn(k) do a:= mema(a+savsize); - pusha(a+savsize) - end; - LOI: pushx(argo(k),popa); - LOS: begin k:=argw(k); if k<>wsize then trap(EILLINS); - k:=pop; pushx(argo(k),popa) - end; - LDL: begin a:=locadr(k); push(memw(a+wsize)); push(memw(a)) end; - LDE: begin k:=argg(k); push(memw(k+wsize)); push(memw(k)) end; - LDF: begin k:=argf(k); - a:=popa; push(memw(a+k+wsize)); push(memw(a+k)) - end; - LPI: push(argp(k)) - end -end; - -procedure storeops; -begin - case insr of - { STORE GROUP } - STL: store(locadr(k),pop); - STE: store(argg(k),pop); - SIL: store(mema(locadr(k)),pop); - STF: begin a:=popa; store(a+argf(k),pop) end; - STI: popx(argo(k),popa); - STS: begin k:=argw(k); if k<>wsize then trap(EILLINS); - k:=popa; popx(argo(k),popa) - end; - SDL: begin a:=locadr(k); store(a,pop); store(a+wsize,pop) end; - SDE: begin k:=argg(k); store(k,pop); store(k+wsize,pop) end; - SDF: begin k:=argf(k); a:=popa; store(a+k,pop); store(a+k+wsize,pop) end - end -end; - -procedure intarith; -var i:integer; -begin - case insr of - { SIGNED INTEGER ARITHMETIC } - ADI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:=popsw; push(fitsw(ss+st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(doadi(ds,dt)) end; - end ; - SBI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:= popsw; push(fitsw(ss-st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(dosbi(ds,dt)) end; - end ; - MLI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:= popsw; push(fitsw(ss*st,EIOVFL)) end; - 2: begin dt:=popd; ds:=popd; pushd(domli(ds,dt)) end; - end ; - DVI: case szindex(argw(k)) of - 1: begin st:= popsw; ss:= popsw; - if st=0 then trap(EIDIVZ) else pushsw(ss div st) - end; - 2: begin dt:=popd; ds:=popd; pushd(dodvi(ds,dt)) end; - end; - RMI: case szindex(argw(k)) of - 1: begin st:= popsw; ss:=popsw; - if st=0 then trap(EIDIVZ) else pushsw(ss - (ss div st)*st) - end; - 2: begin dt:=popd; ds:=popd; pushd(dormi(ds,dt)) end - end; - NGI: case szindex(argw(k)) of - 1: begin st:=popsw; pushsw(-st) end; - 2: begin ds:=popd; pushd(dongi(ds)) end - end; - SLI: begin t:=pop; - case szindex(argw(k)) of - 1: begin ss:=popsw; - for i:= 1 to t do sleft(ss); pushsw(ss) - end - end - end; - SRI: begin t:=pop; - case szindex(argw(k)) of - 1: begin ss:=popsw; - for i:= 1 to t do sright(ss); pushsw(ss) - end; - 2: begin ds:=popd; - for i:= 1 to t do sdright(ss); pushd(ss) - end - end - end - end -end; - -procedure unsarith; -var i:integer; -begin - case insr of - { UNSIGNED INTEGER ARITHMETIC } - ADU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s+t)) end; - 2: trap(EILLINS); - end ; - SBU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s-t)) end; - 2: trap(EILLINS); - end ; - MLU: case szindex(argw(k)) of - 1: begin t:=pop; s:= pop; push(chopw(s*t)) end; - 2: trap(EILLINS); - end ; - DVU: case szindex(argw(k)) of - 1: begin t:= pop; s:= pop; - if t=0 then trap(EIDIVZ) else push(s div t) - end; - 2: trap(EILLINS); - end; - RMU: case szindex(argw(k)) of - 1: begin t:= pop; s:=pop; - if t=0 then trap(EIDIVZ) else push(s - (s div t)*t) - end; - 2: trap(EILLINS); - end; - SLU: case szindex(argw(k)) of - 1: begin t:=pop; s:=pop; - for i:= 1 to t do suleft(s); push(s) - end; - 2: trap(EILLINS); - end; - SRU: case szindex(argw(k)) of - 1: begin t:=pop; s:=pop; - for i:= 1 to t do suright(s); push(s) - end; - 2: trap(EILLINS); - end - end -end; - -procedure fltarith; -begin - case insr of - { FLOATING POINT ARITHMETIC } - ADF: begin argwf(k); rt:=popr; rs:=popr; pushr(doadf(rs,rt)) end; - SBF: begin argwf(k); rt:=popr; rs:=popr; pushr(dosbf(rs,rt)) end; - MLF: begin argwf(k); rt:=popr; rs:=popr; pushr(domlf(rs,rt)) end; - DVF: begin argwf(k); rt:=popr; rs:=popr; pushr(dodvf(rs,rt)) end; - NGF: begin argwf(k); rt:=popr; pushr(dongf(rt)) end; - FIF: begin argwf(k); rt:=popr; rs:=popr; - dofif(rt,rs,x,y); pushr(y); pushr(x) - end; - FEF: begin argwf(k); rt:=popr; dofef(rt,x,ss); pushr(x); pushsw(ss) end - end -end; - -procedure ptrarith; -begin - case insr of - { POINTER ARITHMETIC } - ADP: pusha(popa+argf(k)); - ADS: case szindex(argw(k)) of - 1: begin st:=popsw; pusha(popa+st) end; - 2: begin dt:=popd; pusha(popa+dt) end; - end; - SBS: begin - a:=popa; b:=popa; - case szindex(argw(k)) of - 1: push(fitsw(b-a,EIOVFL)); - 2: pushd(b-a) - end - end - end -end; - -procedure incops; -var j:integer; -begin - case insr of - { INCREMENT/DECREMENT/ZERO } - INC: push(fitsw(popsw+1,EIOVFL)); - INL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; - INE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; - DEC: push(fitsw(popsw-1,EIOVFL)); - DEL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; - DEE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; - ZRL: store(locadr(k),0); - ZRE: store(argg(k),0); - ZER: for j:=1 to argw(k) div wsize do push(0); - ZRF: pushr(0); - end -end; - -procedure convops; -begin - case insr of - { CONVERT GROUP } - CII: begin s:=pop; t:=pop; - if tmaxsint then trap(ECONV); push(s) end; - 2: trap(EILLINS); - end; - 2: case szindex(argw(pop)) of - 1: pushd(pop); - 2: trap(EILLINS); - end; - end; - CUU: case szindex(argw(pop)) of - 1: if szindex(argw(pop))=2 then trap(EILLINS); - 2: trap(EILLINS); - end; - CUF: begin argwf(pop); - if szindex(argw(pop))=1 then pushr(pop) else trap(EILLINS) - end; - CFI: begin sz:=argw(pop); argwf(pop); rt:=popr; - case szindex(sz) of - 1: push(fitsw(trunc(rt),ECONV)); - 2: pushd(fitd(trunc(rt))); - end - end; - CFU: begin sz:=argw(pop); argwf(pop); rt:=popr; - case szindex(sz) of - 1: push( chopw(trunc(abs(rt)-0.5)) ); - 2: trap(EILLINS); - end - end; - CFF: begin argwf(pop); argwf(pop) end - end -end; - -procedure logops; -var i,j:integer; -begin - case insr of - { LOGICAL GROUP } - XAND: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(andf,memw(a),t)) end; - end; - IOR: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(iorf,memw(a),t)) end; - end; - XOR: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin a:=sp+k; t:=pop; store(a,bf(xorf,memw(a),t)) end; - end; - COM: - begin k:=argw(k); - for j:= 1 to k div wsize do - begin - store(sp+k-wsize*j, bf(xorf,memw(sp+k-wsize*j), negoff-1)) - end - end; - ROL: begin k:=argw(k); if k<>wsize then trap(EILLINS); - t:=pop; s:=pop; for i:= 1 to t do rleft(s); push(s) - end; - ROR: begin k:=argw(k); if k<>wsize then trap(EILLINS); - t:=pop; s:=pop; for i:= 1 to t do rright(s); push(s) - end - end -end; - -procedure setops; -var i,j:integer; -begin - case insr of - { SET GROUP } - INN: - begin k:=argw(k); - t:=pop; - i:= t mod 8; t:= t div 8; - if t>=k then - begin trap(ESET); s:=0 end - else - begin s:=memb(sp+t) end; - newsp(sp+k); push(bit(i,s)); - end; - XSET: - begin k:=argw(k); - t:=pop; - i:= t mod 8; t:= t div 8; - for j:= 1 to k div wsize do push(0); - if t>=k then - trap(ESET) - else - begin s:=1; for j:= 1 to i do rleft(s); storeb(sp+t,s) end - end - end -end; - -procedure arrops; -begin - case insr of - { ARRAY GROUP } - LAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - pushx(argo(memw(a+2*k)),arraycalc(a)) - end; - SAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - popx(argo(memw(a+2*k)),arraycalc(a)) - end; - AAR: - begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - push(arraycalc(a)) - end - end -end; - -procedure cmpops; -begin - case insr of - { COMPARE GROUP } - CMI: case szindex(argw(k)) of - 1: begin st:=popsw; ss:=popsw; - if ss memw(sp+k+j) then t:=1; - j:=j+wsize - end; - newsp(sp+wsize*k); push(t); - end; - - TLT: if popsw < 0 then push(1) else push(0); - TLE: if popsw <= 0 then push(1) else push(0); - TEQ: if pop = 0 then push(1) else push(0); - TNE: if pop <> 0 then push(1) else push(0); - TGE: if popsw >= 0 then push(1) else push(0); - TGT: if popsw > 0 then push(1) else push(0); - end -end; - -procedure branchops; -begin - case insr of - { BRANCH GROUP } - BRA: newpc(pc+k); - - BLT: begin st:=popsw; if popsw < st then newpc(pc+k) end; - BLE: begin st:=popsw; if popsw <= st then newpc(pc+k) end; - BEQ: begin t :=pop ; if pop = t then newpc(pc+k) end; - BNE: begin t :=pop ; if pop <> t then newpc(pc+k) end; - BGE: begin st:=popsw; if popsw >= st then newpc(pc+k) end; - BGT: begin st:=popsw; if popsw > st then newpc(pc+k) end; - - ZLT: if popsw < 0 then newpc(pc+k); - ZLE: if popsw <= 0 then newpc(pc+k); - ZEQ: if pop = 0 then newpc(pc+k); - ZNE: if pop <> 0 then newpc(pc+k); - ZGE: if popsw >= 0 then newpc(pc+k); - ZGT: if popsw > 0 then newpc(pc+k) - end -end; - -procedure callops; -var j:integer; -begin - case insr of - { PROCEDURE CALL GROUP } - CAL: call(argp(k)); - CAI: begin call(argp(popa)) end; - RET: begin k:=argz(k); if k div wsize>maxret then trap(EILLINS); - for j:= 1 to k div wsize do retarea[j]:=pop; retsize:=k; - newsp(lb); lb:=maxdata+1; { To circumvent stack overflow error } - newpc(popa); - if pc=maxcode then - begin - halted:=true; - if retsize=wsize then exitstatus:=retarea[1] - else exitstatus:=undef - end - else - newlb(popa); - end; - LFR: begin k:=args(k); if k<>retsize then trap(EILLINS); - for j:=k div wsize downto 1 do push(retarea[j]); - end - end -end; - -procedure miscops; -var i,j:integer; -begin - case insr of - { MISCELLANEOUS GROUP } - ASP,ASS: - begin if insr=ASS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=popsw end; - k:=argf(k); - if k<0 - then for j:= 1 to -k div wsize do push(undef) - else newsp(sp+k); - end; - BLM,BLS: - begin if insr=BLS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; - k:=argz(k); - b:=popa; a:=popa; - for j := 1 to k div wsize do - store(b-wsize+wsize*j,memw(a-wsize+wsize*j)) - end; - CSA: begin k:=argw(k); if k<>wsize then trap(EILLINS); - a:=popa; - st:= popsw - signwd(memw(a+asize)); - if (st>=0) and (st<=memw(a+wsize+asize)) then - b:=mema(a+2*wsize+asize+asize*st) else b:=mema(a); - if b=0 then trap(ECASE) else newpc(b) - end; - CSB: begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; - t:=pop; i:=1; found:=false; - while (i<=memw(a+asize)) and not found do - if t=memw(a+(asize+wsize)*i) then found:=true else i:=i+1; - if found then b:=memw(a+(asize+wsize)*i+wsize) else b:=memw(a); - if b=0 then trap(ECASE) else newpc(b); - end; - DCH: begin pusha(mema(popa+dynd)) end; - DUP,DUS: - begin if insr=DUS then - begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; - k:=args(k); - for i:=1 to k div wsize do push(memw(sp+k-wsize)); - end; - EXG: begin - k:=argw(k); - for i:=1 to k div wsize do push(memw(sp+k-wsize)); - for i:=0 to k div wsize - 1 do - store(sp+k+i*wsize,memw(sp+k+k+i*wsize)); - for i:=1 to k div wsize do - begin t:=pop ; store(sp+k+k-wsize,t) end; - end; - FIL: filna(argg(k)); - GTO: begin k:=argg(k); - newlb(mema(k+2*asize)); newsp(mema(k+asize)); newpc(mema(k)) - end; - LIM: push(ignmask); - LIN: lino(argn(k)); - LNI: lino(memw(0)+1); - LOR: begin i:=argr(k); - case i of 0:pusha(lb); 1:pusha(sp); 2:pusha(hp) end; - end; - LPB: pusha(popa+statd); - MON: domon(pop); - NOP: writeln('NOP at line ',memw(0):5) ; - RCK: begin a:=popa; - case szindex(argw(k)) of - 1: if (signwd(memw(sp))signwd(memw(a+wsize))) then trap(ERANGE); - 2: if (memd(sp)memd(a+2*wsize)) then trap(ERANGE); - end - end; - RTT: dortt; - SIG: begin a:=popa; pusha(uerrorproc); uerrorproc:=a end; - SIM: ignmask:=pop; - STR: begin i:=argr(k); - case i of 0: newlb(popa); 1: newsp(popa); 2: newhp(popa) end; - end; - TRP: trap(pop) - end -end; -{ -.bp -{---------------------------------------------------------------------------} -{ Main Loop } -{---------------------------------------------------------------------------} - -begin initialize; -8888: - repeat - opcode := nextpc; { fetch the first byte of the instruction } - if opcode=escape1 then iclass:=second - else if opcode=escape2 then iclass:=tert - else iclass:=prim; - if iclass<>prim then opcode := nextpc; - with dispat[iclass][opcode] do - begin insr:=instr; - if not (zbit in iflag) then - if ibit in iflag then k:=pop else - begin - if mini in iflag then k:=implicit else - begin - if short in iflag then k:=implicit+nextpc else - begin k:=nextpc; - if (sbit in iflag) and (k>=128) then k:=k-256; - for i:=2 to ilength do k:=256*k + nextpc - end - end; - if wbit in iflag then k:=k*wsize; - end - end; -case insr of - - NON: trap(EILLINS); - - { LOAD GROUP } - LDC,LOC,LOL,LOE,LIL,LOF,LAL,LAE,LXL,LXA,LOI,LOS,LDL,LDE,LDF,LPI: - loadops; - - { STORE GROUP } - STL,STE,SIL,STF,STI,STS,SDL,SDE,SDF: - storeops; - - { SIGNED INTEGER ARITHMETIC } - ADI,SBI,MLI,DVI,RMI,NGI,SLI,SRI: - intarith; - - { UNSIGNED INTEGER ARITHMETIC } - ADU,SBU,MLU,DVU,RMU,SLU,SRU: - unsarith; - - { FLOATING POINT ARITHMETIC } - ADF,SBF,MLF,DVF,NGF,FIF,FEF: - fltarith; - - { POINTER ARITHMETIC } - ADP,ADS,SBS: - ptrarith; - - { INCREMENT/DECREMENT/ZERO } - INC,INL,INE,DEC,DEL,DEE,ZRL,ZRE,ZER,ZRF: - incops; - - { CONVERT GROUP } - CII,CIU,CIF,CUI,CUU,CUF,CFI,CFU,CFF: - convops; - - { LOGICAL GROUP } - XAND,IOR,XOR,COM,ROL,ROR: - logops; - - { SET GROUP } - INN,XSET: - setops; - - { ARRAY GROUP } - LAR,SAR,AAR: - arrops; - - { COMPARE GROUP } - CMI,CMU,CMP,CMF,CMS, TLT,TLE,TEQ,TNE,TGE,TGT: - cmpops; - - { BRANCH GROUP } - BRA, BLT,BLE,BEQ,BNE,BGE,BGT, ZLT,ZLE,ZEQ,ZNE,ZGE,ZGT: - branchops; - - { PROCEDURE CALL GROUP } - CAL,CAI,RET,LFR: - callops; - - { MISCELLANEOUS GROUP } - ASP,ASS,BLM,BLS,CSA,CSB,DCH,DUP,DUS,EXG,FIL,GTO,LIM, - LIN,LNI,LOR,LPB,MON,NOP,RCK,RTT,SIG,SIM,STR,TRP: - miscops; - - end; { end of case statement } - if not ( (insr=RET) or (insr=ASP) or (insr=BRA) or (insr=GTO) ) then - retsize:=0 ; - until halted; -9999: - writeln('halt with exit status: ',exitstatus:1); - doident; -end. diff --git a/doc/em/int/emdmp.c b/doc/em/int/emdmp.c deleted file mode 100644 index f6e3ab6e5..000000000 --- a/doc/em/int/emdmp.c +++ /dev/null @@ -1,200 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - * - */ - -/* Author: E.G. Keizer */ - -/* Print a readable version of the data in the post mortem dump */ -/* dmpc [-s] [-dn,m] [file] */ - -#include -#include -#include - -int dflag = 0 ; -long l_low,l_high; - -int sflag; - -int wsize,asize; -long tsize,dsize; -long ignmask,uerrorproc,cause; -long pc,sp,lb,hp,pd,pb; - -char *cstr[] = { - "Array bound error", - "Range bound error", - "Set error", - "Integer overflow", - "Float overflow", - "Float underflow", - "Divide by 0", - "Divide by 0.0", - "Integer undefined", - "Float undefined", - "Conversion error", - "User error 11", - "User error 12", - "User error 13", - "User error 14", - "User error 15", - "Stack overflow", - "Heap overflow", - "Illegal instruction", - "Illegal size parameter", - "Case error", - "Memory fault", - "Illegal pointer", - "Illegal pc", - "Bad argument of LAE", - "Bad monitor call", - "Bad line number", - "GTO descriptor error" -}; - -FILE *fcore; -char *core = "core" ; -int nbyte=0; - -char *pname; - -int readbyte(); -int read2(); -long readaddr(); -long readword(); -unsigned getbyte(); -long getword(); -long getaddr(); - -main(argc,argv) char **argv; -{ - register i ; - long line,fileaddr; - char tok ; - - scanargs(argc,argv); fcore=fopen(core,"r") ; - if ( fcore==NULL ) fatal("Can't open %s",core) ; - - if ( read2()!=010255 ) fatal("not a post mortem dump"); - if ( read2()!=VERSION ) fatal("wrong version dump file"); - wsize=read2(); asize=read2(); - if ( wsize>4 ) fatal("cannot handle word size %d",wsize) ; - if ( asize>4 ) fatal("cannot handle pointer size %d",asize) ; - tsize=readaddr(); dsize=readaddr(); - ignmask=readaddr(); uerrorproc=readaddr(); cause=readaddr(); - pc=readaddr(); sp=readaddr(); lb=readaddr(); hp=readaddr(); - pd=readaddr(); pb=readaddr(); - if ( sflag==0 ) { - line=getword(0L); - fileaddr=getaddr(4L); - if ( fileaddr ) { - for ( i=0 ; i<40 ; i++ ) { - tok=getbyte(fileaddr++) ; - if ( !isprint(tok) ) break ; - putc(tok,stdout); - } - printf(" "); - } - if ( line ) { - printf("line %ld",line) ; - } - if ( fileaddr || line ) printf(", "); - fseek(fcore,512L,0); - - if ( cause>27 ) { - printn("cause",cause) ; - } else { - prints("cause",cstr[(int)cause]); - } - printn("pc",pc);printn("sp",sp);printn("lb",lb); - printn("hp",hp); - if ( pd ) printn("pd",pd) ; - if ( pb ) printn("pb",pb) ; - printn("errproc",uerrorproc) ; - printn("ignmask",ignmask) ; - if ( tsize ) printn("Text size",tsize) ; - if ( dsize ) printn("Data size",dsize) ; - } - if ( dflag==0 ) exit(0); - fatal("d-flag not implemeted (yet)"); - exit(1) ; -} - -scanargs(argc,argv) char **argv ; { - pname=argv[0]; - while ( argv++, argc-- > 1 ) { - switch( argv[0][0] ) { - case '-': switch( argv[0][1] ) { - case 's': sflag++ ; break ; - case 'l': dflag++ ; break ; - default : fatal(": [-s] [-ln.m] [file]") ; - } ; - break ; - default :core=argv[0] ; - } - } -} - -prints(s1,s2) char *s1,*s2; { - printf("%-15s %s\n",s1,s2); -} - -printn(s1,d) char *s1; long d; { - printf("%-15s %15ld\n",s1,d); -} - -/* VARARGS1 */ -fatal(s1,p1,p2,p3,p4,p5) char *s1 ; { - fprintf(stderr,"%s: ",pname); - fprintf(stderr,s1,p1,p2,p3,p4,p5) ; - fprintf(stderr,"\n") ; - exit(1) ; -} - -int getb() { - int i ; - i=getc(fcore) ; - if ( i==EOF ) fatal("Premature EOF"); - return i&0377 ; -} - -int read2() { - int i ; - i=getb() ; return getb()*256 + i ; -} - -long readaddr() { - long res ; - register int i ; - - res=0 ; - for (i=0 ; i -#include -#include -#include - -/* This program reads the human readable interpreter specification - and produces a efficient machine representation that can be - translated by a C-compiler. -*/ - -#define ESCAP 256 - -int nerror = 0 ; -int atend = 0 ; -int line = 1 ; -int maxinsl= 0 ; - -extern char em_mnem[][4] ; -char esca[] = "escape" ; -#define ename(no) ((no)==ESCAP?esca:em_mnem[(no)]) - -extern char em_flag[] ; - -main(argc,argv) char **argv ; { - if ( argc>1 ) { - if ( freopen(argv[1],"r",stdin)==NULL) { - fatal("Cannot open %s",argv[1]) ; - } - } - if ( argc>2 ) { - if ( freopen(argv[2],"w",stdout)==NULL) { - fatal("Cannot create %s",argv[2]) ; - } - } - if ( argc>3 ) { - fatal("%s [ file [ file ] ]",argv[0]) ; - } - atend=0 ; - readin(); - atend=1 ; - exit(nerror) ; -} - -readin() { - char *ident(); - char *firstid ; - int opcode,flags; - int c; - - while ( !feof(stdin) ) { - firstid=ident() ; - if ( *firstid=='\n' || feof(stdin) ) continue ; - opcode = getmnem(firstid) ; - printf("%d ",opcode+1) ; - flags = decflag(ident(),opcode) ; - switch(em_flag[opcode]&EM_PAR) { - case PAR_D: case PAR_F: case PAR_B: case PAR_L: case PAR_C: - putchar('S') ; - } - putchar(' '); - while ( (c=readchar())!='\n' && c!=EOF ) putchar(c) ; - putchar('\n') ; - } -} - -char *ident() { - /* skip spaces and tabs, anything up to space,tab or eof is - a identifier. - Anything from # to end-of-line is an end-of-line. - End-of-line is an identifier all by itself. - */ - - static char array[200] ; - register int c ; - register char *cc ; - - do { - c=readchar() ; - } while ( c==' ' || c=='\t' ) ; - for ( cc=array ; cc<&array[(sizeof array) - 1] ; cc++ ) { - if ( c=='#' ) { - do { - c=readchar(); - } while ( c!='\n' && c!=EOF ) ; - } - *cc = c ; - if ( c=='\n' && cc==array ) break ; - c=readchar() ; - if ( c=='\n' ) { - pushback(c) ; - break ; - } - if ( c==' ' || c=='\t' || c==EOF ) break ; - } - *++cc=0 ; - return array ; -} - -int getmnem(str) char *str ; { - char (*ptr)[4] ; - - for ( ptr = em_mnem ; *ptr<= &em_mnem[sp_lmnem][0] ; ptr++ ) { - if ( strcmp(*ptr,str)==0 ) return (ptr-em_mnem) ; - } - error("Illegal mnemonic") ; - return 0 ; -} - -error(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - if ( !atend ) fprintf(stderr,"line %d: ",line) ; - fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; - fprintf(stderr,"\n"); - nerror++ ; -} - -mess(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - if ( !atend ) fprintf(stderr,"line %d: ",line) ; - fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; - fprintf(stderr,"\n"); -} - -fatal(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - error(str,a1,a2,a3,a4,a5,a6) ; - exit(1) ; -} - -#define ILLGL -1 - -check(val) int val ; { - if ( val!=ILLGL ) error("Illegal flag combination") ; -} - -int decflag(str,opc) char *str ; { - int type ; - int escape ; - int range ; - int wordm ; - int notzero ; - char c; - - type=escape=range=wordm=notzero= ILLGL ; - while ( c= *str++ ) { - switch ( c ) { - case 'm' : - check(type) ; type=OPMINI ; break ; - case 's' : - check(type) ; type=OPSHORT ; break ; - case '-' : - check(type) ; type=OPNO ; - if ( (em_flag[opc]&EM_PAR)==PAR_W ) c='i' ; - break ; - case '1' : - check(type) ; type=OP8 ; break ; - case '2' : - check(type) ; type=OP16 ; break ; - case '4' : - check(type) ; type=OP32 ; break ; - case '8' : - check(type) ; type=OP64 ; break ; - case 'u' : - check(type) ; type=OP16U ; break ; - case 'e' : - check(escape) ; escape=0 ; break ; - case 'N' : - check(range) ; range= 2 ; break ; - case 'P' : - check(range) ; range= 1 ; break ; - case 'w' : - check(wordm) ; wordm=0 ; break ; - case 'o' : - check(notzero) ; notzero=0 ; break ; - default : - error("Unknown flag") ; - } - putchar(c); - } - if ( type==ILLGL ) error("Type must be specified") ; - switch ( type ) { - case OP64 : - case OP32 : - if ( escape!=ILLGL ) error("Conflicting escapes") ; - escape=ILLGL ; - case OP16 : - case OP16U : - case OP8 : - case OPSHORT : - case OPNO : - if ( notzero!=ILLGL ) mess("Improbable OPNZ") ; - if ( type==OPNO && range!=ILLGL ) { - mess("No operand in range") ; - } - } - if ( escape!=ILLGL ) type|=OPESC ; - if ( wordm!=ILLGL ) type|=OPWORD ; - switch ( range) { - case ILLGL : type|=OP_BOTH ; break ; - case 1 : type|=OP_POS ; break ; - case 2 : type|=OP_NEG ; break ; - } - if ( notzero!=ILLGL ) type|=OPNZ ; - return type ; -} - -static int pushchar ; -static int pushf ; - -int readchar() { - int c ; - - if ( pushf ) { - pushf=0 ; - c = pushchar ; - } else { - if ( feof(stdin) ) return EOF ; - c=getc(stdin) ; - } - if ( c=='\n' ) line++ ; - return c ; -} - -pushback(c) { - if ( pushf ) { - fatal("Double pushback") ; - } - pushf++ ; - pushchar=c ; - if ( c=='\n' ) line-- ; -} diff --git a/doc/em/int/proto.make b/doc/em/int/proto.make deleted file mode 100644 index f6866cf05..000000000 --- a/doc/em/int/proto.make +++ /dev/null @@ -1,33 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/em/int - -install \ -all: em emdmp tables - -tables: mktables $(SRC_HOME)/etc/ip_spec.t - mktables $(SRC_HOME)/etc/ip_spec.t tables - -mktables: $(SRC_DIR)/mktables.c $(TARGET_HOME)/h/em_spec.h \ - $(TARGET_HOME)/h/em_flag.h \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) $(TARGET_HOME)/h/ip_spec.h - $(CC) -I$(TARGET_HOME)/h -O -o mktables $(SRC_DIR)/mktables.c $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) - -em: $(SRC_DIR)/em.p - apc -O $(SRC_DIR)/em.p >emerrs ; mv a.out em - -nem.p: $(SRC_DIR)/em.p - sed -e '/maxadr = t16/s//maxadr =t15/' -e '/maxdata = 8191; /s//maxdata = 14335;/' -e '/ adr=.*long/s// adr= 0..maxadr/' <$(SRC_DIR)/em.p >nem.p - -nem: nem.p - apc -O nem.p >emerrs ; mv a.out nem - -emdmp: $(SRC_DIR)/emdmp.c - $(CC) -I$(TARGET_HOME)/h -I$(TARGET_HOME)/config -o emdmp -O $(SRC_DIR)/emdmp.c - -cmp: - -pr: - @pr $(SRC_DIR)/em.p $(SRC_DIR)/mktables.c $(SRC_DIR)/emdmp.c diff --git a/doc/em/intro.nr b/doc/em/intro.nr deleted file mode 100644 index 7f7c711a0..000000000 --- a/doc/em/intro.nr +++ /dev/null @@ -1,173 +0,0 @@ -.bp -.P1 "INTRODUCTION" -.PP -EM is a family of intermediate languages designed for producing -portable compilers. -The general strategy is for a program called \fBfront end\fP -to translate the source program to EM. -Another program, \fBback end\fP, -translates EM to target assembly language. -Alternatively, the EM code can be assembled to a binary form -and interpreted. -These considerations led to the following goals: -.IP 1 -The design should allow translation to, -or interpretation on, a wide range of existing machines. -Design decisions should be delayed as far as possible -and the implications of these decisions should -be localized as much as possible. -.br -The current microcomputer technology offers 8, 16 and 32 bit machines -with various sizes of address space. -EM should be flexible enough to be useful on most of these -machines. -The differences between the members of the EM family should only -concern the wordsize and address space size. -.IP 2 -The architecture should ease the task of code generation for -high level languages such as Pascal, C, Ada, Algol 68, BCPL. -.IP 3 -The instruction set used by the interpreter should be compact, -to reduce the amount of memory needed -for program storage, and to reduce the time needed to transmit -programs over communication lines. -.IP 3 -It should be designed with microprogrammed implementations in -mind; in particular, the use of many short fields within -instruction opcodes should be avoided, because their extraction by the -microprogram or conversion to other instruction formats is inefficient. -.PP -The basic architecture is based on the concept of a stack. The stack -is used for procedure return addresses, actual parameters, local variables, -and arithmetic operations. -There are several built-in object types, -for example, signed and unsigned integers, -floating point numbers, pointers and sets of bits. -There are instructions to push and pop objects -to and from the stack. -The push and pop instructions are not typed. -They only care about the size of the objects. -For each built-in type there are -reverse Polish type instructions that pop one or more -objects from the top of -the stack, perform an operation, and push the result back onto the -stack. -For all types except pointers, -these instructions have the object size -as argument. -.PP -There are no visible general registers used for arithmetic operands -etc. This is in contrast to most third generation computers, which usually -have 8 or 16 general registers. The decision not to have a group of -general registers was fully intentional, and follows W.L. Van der -Poel's dictum that a machine should have 0, 1, or an infinite -number of any feature. General registers have two primary uses: to hold -intermediate results of complicated expressions, e.g. -.DS -((a*b + c*d)/e + f*g/h) * i -.DE -and to hold local variables. -.PP -Various studies -have shown that the average expression has fewer than two operands, -making the former use of registers of doubtful value. The present trend -toward structured programs consisting of many small -procedures greatly reduces the value of registers to hold local variables -because the large number of procedure calls implies a large overhead in -saving and restoring the registers at every call. -.PP -Although there are no general purpose registers, there are a -few internal registers with specific functions as follows: -.TS -tab(:); -l 1 l l l. -PC:\-:Program Counter:Pointer to next instruction -LB:\-:Local Base:Points to base of the local variables -:::in the current procedure. -SP:\-:Stack Pointer:Points to the highest occupied word on the stack. -HP:\-:Heap Pointer:Points to the top of the heap area. -.TE -.PP -Furthermore, reverse Polish code is much easier to generate than -multi-register machine code, especially if highly efficient code is -desired. -When translating to assembly language the back end can make -good use of the target machine's registers. -An EM machine can -achieve high performance by keeping part of the stack -in high speed storage (a cache or microprogram scratchpad memory) rather -than in primary memory. -.PP -Again according to van der Poel's dictum, -all EM instructions have zero or one argument. -We believe that instructions needing two arguments -can be split into two simpler ones. -The simpler ones can probably be used in other -circumstances as well. -Moreover, these two instructions together often -have a shorter encoding than the single -instruction before. -.PP -This document describes EM at three different levels: -the abstract level, the assembly language level and -the machine language level. -.QQ -The most important level is that of the abstract EM architecture. -This level deals with the basic design issues. -Only the functional capabilities of instructions are relevant, not their -format or encoding. -Most chapters of this document refer to the abstract level -and it is explicitly stated whenever -another level is described. -.QQ -The assembly language is intended for the compiler writer. -It presents a more or less orthogonal instruction -set and provides symbolic names for data. -Moreover, it facilitates the linking of -separately compiled 'modules' into a single program -by providing several pseudoinstructions. -.QQ -The machine language is designed for interpretation with a compact -program text and easy decoding. -The binary representation of the machine language instruction set is -far from orthogonal. -Frequent instructions have a short opcode. -The encoding is fully byte oriented. -These bytes do not contain small bit fields, because -bit fields would slow down decoding considerably. -.PP -A common use for EM is for producing portable (cross) compilers. -When used this way, the compilers produce -EM assembly language as their output. -To run the compiled program on the target machine, -the back end, translates the EM assembly language to -the target machine's assembly language. -When this approach is used, the format of the EM -machine language instructions is irrelevant. -On the other hand, when writing an interpreter for EM machine language -programs, the interpreter must deal with the machine language -and not with the symbolic assembly language. -.PP -As mentioned above, the -current microcomputer technology offers 8, 16 and 32 bit -machines with address spaces ranging from -.Ex 2 16 -to -.Ex 2 32 -bytes. -Having one size of pointers and integers restricts -the usefulness of the language. -We decided to have a different language for each combination of -word and pointer size. -All languages offer the same instruction set and differ only in -memory alignment restrictions and the implicit size assumed in -several instructions. -The languages -differ slightly for the -different size combinations. -For example: the -size of any object on the stack and alignment restrictions. -The wordsize is restricted to powers of 2 and -the pointer size must be a multiple of the wordsize. -Almost all programs handling EM will be parametrized with word -and pointer size. diff --git a/doc/em/iotrap.nr b/doc/em/iotrap.nr deleted file mode 100644 index 716f363b9..000000000 --- a/doc/em/iotrap.nr +++ /dev/null @@ -1,376 +0,0 @@ -.SN 8 -.VS 1 0 -.BP -.S1 "ENVIRONMENT INTERACTIONS" -EM programs can interact with their environment in three ways. -Two, starting/stopping and monitor calls, are dealt with in this chapter. -The remaining way to interact, interrupts, will be treated -together with traps in chapter 9. -.S2 "Program starting and stopping" -EM user programs start with a call to a procedure called -m_a_i_n. -The assembler and backends look for the definition of a procedure -with this name in their input. -The call passes three parameters to the procedure. -The parameters are similar to the parameters supplied by the -UNIX -.FS -UNIX is a Trademark of Bell Laboratories. -.FE -operating system to C programs. -These parameters are often called -.BW argc , -.B argv -and -.BW envp . -Argc is the parameter nearest to LB and is a wordsized integer. -The other two are pointers to the first element of an array of -string pointers. -.N -The -.B argv -array contains -.B argc -strings, the first of which contains the program call name. -The other strings in the -.B argv -array are the program parameters. -.P -The -.B envp -array contains strings in the form "name=string", where 'name' -is the name of an environment variable and string its value. -The -.B envp -is terminated by a zero pointer. -.P -An EM user program stops if the program returns from the first -invocation of m_a_i_n. -The contents of the function return area are used to procure a -wordsized program return code. -EM programs also stop when traps and interrupts occur that are -not caught and when the exit monitor call is executed. -.S2 "Input/Output and other monitor calls" -EM differs from most conventional machines in that it has high level i/o -instructions. -Typical instructions are OPEN FILE and READ FROM FILE instead -of low level instructions such as setting and clearing -bits in device registers. -By providing such high level i/o primitives, the task of implementing -EM on various non EM machines is made considerably easier. -.P -I/O is initiated by the MON instruction, which expects an iocode on top -of the stack. -Often there are also parameters which are pushed on the -stack in reverse order, that is: last -parameter first. -Some i/o functions also provide results, which are returned on the stack. -In the list of monitor calls we use several types of parameters and results, -these types consist of integers and unsigneds of varying sizes, but never -smaller than the wordsize, and the two pointer types. -.N 1 -The names of the types used are: -.IS 4 -.PS - 10 -.PT int -an integer of wordsize -.PT int2 -an integer whose size is the maximum of the wordsize and 2 -bytes -.PT int4 -an integer whose size is the maximum of the wordsize and 4 -bytes -.PT intp -an integer with the size of a pointer -.PT uns2 -an unsigned integer whose size is the maximum of the wordsize and 2 -.PT unsp -an unsigned integer with the size of a pointer -.PT ptr -a pointer into data space -.PE 1 -.IE 0 -The table below lists the i/o codes with their results and -parameters. -This list is similar to the system calls of the UNIX Version 7 -operating system. -.BP -.A -To execute a monitor call, proceed as follows: -.IS 2 -.N 1 -.PS a 4 "" ) -.PT -Stack the parameters, in reverse order, last parameter first. -.PT -Push the monitor call number (iocode) onto the stack. -.PT -Execute the MON instruction. -.PE 1 -.IE -An error code is present on the top of the stack after -execution of most monitor calls. -If this error code is zero, the call performed the action -requested and the results are available on top of the stack. -Non-zero error codes indicate a failure, in this case no -results are available and the error code has been pushed twice. -This construction enables programs to test for failure with a -single instruction (~TEQ or TNE~) and still find out the cause of -the failure. -The result name 'e' is reserved for the error code. -.N 1 -List of monitor calls. -.DS B -number name parameters results function - - 1 Exit status:int Terminate this process - 2 Fork e,flag,pid:int Spawn new process - 3 Read fildes:int;buf:ptr;nbytes:unsp - e:int;rbytes:unsp Read from file - 4 Write fildes:int;buf:ptr;nbytes:unsp - e:int;wbytes:unsp Write on a file - 5 Open string:ptr;flag:int - e,fildes:int Open file for read and/or write - 6 Close fildes:int e:int Close a file - 7 Wait e:int;status,pid:int2 - Wait for child - 8 Creat string:ptr;mode:int - e,fildes:int Create a new file - 9 Link string1,string2:ptr - e:int Link to a file - 10 Unlink string:ptr e:int Remove directory entry - 12 Chdir string:ptr e:int Change default directory - 14 Mknod string:ptr;mode,addr:int2 - e:int Make a special file - 15 Chmod string:ptr;mode:int2 - e:int Change mode of file - 16 Chown string:ptr;owner,group:int2 - e:int Change owner/group of a file - 18 Stat string,statbuf:ptr - e:int Get file status - 19 Lseek fildes:int;off:int4;whence:int - e:int;oldoff:int4 Move read/write pointer - 20 Getpid pid:int2 Get process identification - 21 Mount special,string:ptr;rwflag:int - e:int Mount file system - 22 Umount special:ptr e:int Unmount file system - 23 Setuid userid:int2 e:int Set user ID - 24 Getuid e_uid,r_uid:int2 Get user ID - 25 Stime time:int4 e:int Set time and date - 26 Ptrace request:int;pid:int2;addr:ptr;data:int - e,value:int Process trace - 27 Alarm seconds:uns2 previous:uns2 Schedule signal - 28 Fstat fildes:int;statbuf:ptr - e:int Get file status - 29 Pause Stop until signal - 30 Utime string,timep:ptr - e:int Set file times - 33 Access string,mode:int e:int Determine file accessibility - 34 Nice incr:int Set program priority - 35 Ftime bufp:ptr e:int Get date and time - 36 Sync Update filesystem - 37 Kill pid:int2;sig:int - e:int Send signal to a process - 41 Dup fildes,newfildes:int - e,fildes:int Duplicate a file descriptor - 42 Pipe e,w_des,r_des:int Create a pipe - 43 Times buffer:ptr Get process times - 44 Profil buff:ptr;bufsiz,offset,scale:intp Execution time profile - 46 Setgid gid:int2 e:int Set group ID - 47 Getgid e_gid,r_gid:int Get group ID - 48 Sigtrp trapno,signo:int - e,prevtrap:int See below - 51 Acct file:ptr e:int Turn accounting on or off - 53 Lock flag:int e:int Lock a process - 54 Ioctl fildes,request:int;argp:ptr - e:int Control device - 56 Mpxcall cmd:int;vec:ptr e:int Multiplexed file handling - 59 Exece name,argv,envp:ptr - e:int Execute a file - 60 Umask complmode:int2 oldmask:int2 Set file creation mode mask - 61 Chroot string:ptr e:int Change root directory -.DE 1 -Codes 0, 11, 13, 17, 31, 32, 38, 39, 40, 45, 49, 50, 52, -55, 57, 58, 62, and 63 are -not used. -.P -All monitor calls, except fork and sigtrp -are the same as the UNIX version 7 system calls. -.P -The sigtrp entry maps UNIX signals onto EM interrupts. -Normally, trapno is in the range 0 to 252. -In that case it requests that signal signo -will cause trap trapno to occur. -When given trap number -2, default signal handling is reset, and when given -trap number -3, the signal is ignored. -.P -The flag returned by fork is 1 in the child process and 0 in -the parent. -The pid returned is the process-id of the other process. -.BP -.S1 "TRAPS AND INTERRUPTS" -EM provides a means for the user program to catch all traps -generated by the program itself, the hardware, or external conditions. -This mechanism uses five instructions: LIM, SIM, SIG, TRP and RTT. -This section of the manual may be omitted on the first reading since it -presupposes knowledge of the EM instruction set. -.P -The action taken when a trap occures is determined by the value -of an internal EM trap register. -This register contains a pointer to a procedure. -Initially the pointer used is zero and all traps halt the -program with, hopefully, a useful message to the outside world. -The SIG instruction can be used to alter the trap register, -it pops a procedure pointer from the -stack into the trap register. -When a trap occurs after storing a nonzero value in the trap -register, the procedure pointed to by the trap register -is called with the trap number -as the only parameter (see below). -SIG returns the previous value of the trap register on the -stack. -Two consecutive SIGs are a no-op. -When a trap occurs, the trap register is reset to its initial -condition, to prevent recursive traps from hanging the machine up, -e.g. stack overflow in the stack overflow handling procedure. -.P -The runtime systems for some languages need to ignore some EM -traps. -EM offers a feature called the ignore mask. -It contains one bit for each of the lowest 16 trap numbers. -The bits are numbered 0 to 15, with the least significant bit -having number 0. -If a certain bit is 1 the corresponding trap never -occurs and processing simply continues. -The actions performed by the offending instruction are -described by the Pascal program in appendix A. -.N -If the bit is 0, traps are not ignored. -The instructions LIM and SIM allow copying and replacement of -the ignore mask.~ -.P -The TRP instruction generates a trap, the trap number being found on the -stack. -This is, among other things, -useful for library procedures and runtime systems. -It can also be used by a low level trap procedure to pass the trap to a -higher level one (see example below). -.P -The RTT instruction returns from the trap procedure and continues after the -trap. -In the list below all traps marked with an asterisk ('*') are -considered to be fatal and it is explicitly undefined what happens if -you try to restart after the trap. -.P -The way a trap procedure is called is completely compatible -with normal calling conventions. The only way a trap procedure -differs from normal procedures is the return. It has to use RTT instead -of RET. This is necessary because the complete runtime status is saved on the -stack before calling the procedure and all this status has to be reloaded. -Error numbers are in the range 0 to 252. -The trap numbers are divided into three categories: -.IS 4 -.N 1 -.PS - 10 -.PT ~~0-~63 -EM machine errors, e.g. illegal instruction. -.PS - 8 -.PT ~0-15 -maskable -.PT 16-63 -not maskable -.PE -.PT ~64-127 -Reserved for use by compilers, run time systems, etc. -.PT 128-252 -Available for user programs. -.PE 1 -.IE -EM machine errors are numbered as follows: -.DS I 5 -.TS -tab(@); -n l l. -0@EARRAY@Array bound error -1@ERANGE@Range bound error -2@ESET@Set bound error -3@EIOVFL@Integer overflow -4@EFOVFL@Floating overflow -5@EFUNFL@Floating underflow -6@EIDIVZ@Divide by 0 -7@EFDIVZ@Divide by 0.0 -8@EIUND@Undefined integer -9@EFUND@Undefined float -10@ECONV@Conversion error -16*@ESTACK@Stack overflow -17*@EHEAP@Heap overflow -18*@EILLINS@Illegal instruction -19*@EODDZ@Illegal size argument -20*@ECASE@Case error -21*@EMEMFLT@Addressing non existent memory -22*@EBADPTR@Bad pointer used -23*@EBADPC@Program counter out of range -24@EBADLAE@Bad argument of LAE -25@EBADMON@Bad monitor call -26@EBADLIN@Argument of LIN too high -27@EBADGTO@GTO descriptor error -.TE -.DE 0 -.P -As an example, -suppose a subprocedure has to be written to do a numeric -calculation. -When an overflow occurs the computation has to be stopped and -the higher level procedure must be resumed. -This can be programmed as follows using the mechanism described above: -.DS B - mes 2,2,2 ; set sizes -ersave - bss 2,0,0 ; Room to save previous value of trap procedure -msave - bss 2,0,0 ; Room to save previous value of trap mask - - pro calcule,0 ; entry point - lxl 0 ; fill in non-local goto descriptor with LB - ste jmpbuf+4 - lor 1 ; and SP - ste jmpbuf+2 - lim ; get current ignore mask - ste msave ; save it - lim - loc 16 ; bit for EFOVFL - ior 2 ; set in mask - sim ; ignore EFOVFL from now on - lpi $catch ; load procedure identifier - sig ; catch wil get all traps now - ste ersave ; save previous trap procedure identifier -; perform calculation now, possibly generating overflow -1 ; label jumped to by catch procedure - loe ersave ; get old trap procedure - sig ; refer all following trap to old procedure - asp 2 ; remove result of sig - loe msave ; restore previous mask - sim ; done now -; load result of calculation - ret 2 ; return result -jmpbuf - con *1,0,0 - end -.DE 0 -.VS 1 1 -.DS -Example of catch procedure - pro catch,0 ; Local procedure that must catch the overflow trap - lol 2 ; Load trap number - loc 4 ; check for overflow - bne *1 ; if other trap, call higher trap procedure - gto jmpbuf ; return to procedure calcule -1 ; other trap has occurred - loe ersave ; previous trap procedure - sig ; other procedure will get the traps now - asp 2 ; remove the result of sig - lol 2 ; stack trap number - trp ; call other trap procedure - rtt ; if other procedure returns, do the same - end -.DE diff --git a/doc/em/ip.awk b/doc/em/ip.awk deleted file mode 100644 index 6c3658698..000000000 --- a/doc/em/ip.awk +++ /dev/null @@ -1,11 +0,0 @@ -BEGIN { printf(".TS\n"); - for (i = 0; i < 3; i++) - printf("lw(4) 0 lw(6) 0 rw(2) 0 rw(5) 8 "); - printf(".\n"); - } -NF == 4 { printf "%s\t%s\t%d\t%d",$1,$2,$3,$4 } -NF == 3 { printf "%s\t%s\t\t%d",$1,$2,$3 } - { if ( NR%3 == 0 ) printf("\n") ; else printf("\t"); } -END { if ( NR%3 != 0 ) printf("\n"); - printf(".TE\n"); - } diff --git a/doc/em/ispace.nr b/doc/em/ispace.nr deleted file mode 100644 index db1c71fb2..000000000 --- a/doc/em/ispace.nr +++ /dev/null @@ -1,57 +0,0 @@ -.bp -.P1 "INSTRUCTION ADDRESS SPACE" -The instruction space of the EM machine contains -the code for procedures. -Tables necessary for the execution of this code, for example, procedure -descriptor tables, may also be present. -The instruction space does not change during -the execution of a program, so that it may be -protected. -No further restrictions to the instruction address space are -necessary for the abstract and assembly language level. -.PP -Each procedure has a single entry point: the first instruction. -A special type of pointer identifies a procedure. -Pointers into the instruction -address space have the same size as pointers into data space and -can, for example, contain the address of the first instruction -or an index in a procedure descriptor table. -.QQ -There is a single EM program counter, PC, pointing -to the next instruction to be executed. -The procedure pointed to by PC is -called the 'current' procedure. -A procedure may call another procedure using the CAL or CAI -instruction. -The calling procedure remains 'active' and is resumed whenever the called -procedure returns. -Note that a procedure has several 'active' invocations when -called recursively. -.PP -Each procedure must return properly. -It is not allowed to fall through to the -code of the next procedure. -There are several ways to exit from a procedure: -.IP - -the RET instruction, which returns to the -calling procedure. -.IP - -the RTT instruction, which exits a trap handling routine and resumes -the trapping instruction (see next chapter). -.IP - -the GTO instruction, which is used for non-local goto's. -It can remove several frames from the stack and transfer -control to an active procedure. -(see also MES~11 in paragraph 11.1.4.4) -.PP -All branch instructions can transfer control -to any label within the same procedure. -Branch instructions can never jump out of a procedure. -.PP -Several language implementations use a so called procedure -instance identifier, a combination of a procedure identifier and -the LB of a stack frame, also called static link. -.PP -The program text for each procedure, as well as any tables, -are fragments and can be allocated anywhere -in the instruction address space. diff --git a/doc/em/itables b/doc/em/itables deleted file mode 100644 index a4825dc9f..000000000 --- a/doc/em/itables +++ /dev/null @@ -1,2922 +0,0 @@ -.TS -.if \n+(b.=1 .nr d. \n(.c-\n(c.-1 -.de 35 -.ps \n(.s -.vs \n(.vu -.in \n(.iu -.if \n(.u .fi -.if \n(.j .ad -.if \n(.j=0 .na -.. -.nf -.nr #~ 0 -.if \n(.T .if n .nr #~ 0.6n -.ds #d .d -.if \(ts\n(.z\(ts\(ts .ds #d nl -.fc -.nr 33 \n(.s -.rm 66 67 68 69 70 71 72 73 74 75 76 77 -.nr 66 0 -.nr 38 \waar -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wadp -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wadp -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wasp -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbeq -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wble -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbne -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbra -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcff -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcmf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcms -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdec -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdup -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wfil -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wine -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \winn -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlae -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlal -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlal -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wldc -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wldl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlfr -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlil -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlni -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wloc -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wloe -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlof -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wloi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlol -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlol -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlxa -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wmli -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wret -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsbf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wset -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsli -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wstf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsti -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wstl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wstl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wtgt -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzeq -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzge -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzlt -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzre -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzrl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \waar -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wadi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wads -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wand -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wass -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbgt -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbls -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbne -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcfi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcmf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcmi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcmu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcom -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcsb -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wcui -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdel -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdus -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdvf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdvu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wfef -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \winl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \winn -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlar -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wldf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlfr -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlim -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlor -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlxl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wmli -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wmlu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wngf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wnop -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wret -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wrmu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wrol -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wrtt -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsbf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsbi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsbu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsdf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wset -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsil -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsli -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wslu -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsru -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsts -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wtge -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wxor -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzer -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzle -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzrf -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdch -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wexg -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wldc -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlal -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wldl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlil -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlof -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wlpi -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbeq -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wble -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wbne -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wdee -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wfil -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \winl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsde -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wsdl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wste -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wstl -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzgt -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzne -.if \n(66<\n(38 .nr 66 \n(38 -.nr 38 \wzrl -.if \n(66<\n(38 .nr 66 \n(38 -.66 -.rm 66 -.nr 38 4n -.if \n(66<\n(38 .nr 66 \n(38 -.nr 67 0 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsN -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsw -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wN2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wswP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmN -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \ww2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwP2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwN -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmPo -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwP2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wmwN -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wsP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \ww2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wswN -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wewP2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wewP2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wesP -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wewP2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we2 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \we- -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wN4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwP4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwN4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \ww4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwP4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwN4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \ww4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwN4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \w4 -.if \n(67<\n(38 .nr 67 \n(38 -.nr 38 \wwN4 -.if \n(67<\n(38 .nr 67 \n(38 -.67 -.rm 67 -.nr 38 6n -.if \n(67<\n(38 .nr 67 \n(38 -.nr 68 0 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w5 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w2 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w2 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w4 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w8 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w2 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w5 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.nr 38 \w1 -.if \n(68<\n(38 .nr 68 \n(38 -.68 -.rm 68 -.nr 38 2n -.if \n(68<\n(38 .nr 68 \n(38 -.nr 69 0 -.nr 38 \w34 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w38 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w42 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w45 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w52 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w55 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w58 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w62 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w93 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w96 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w100 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w103 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w106 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w109 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w112 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w117 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w120 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w129 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w132 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w136 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w139 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w143 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w146 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w150 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w152 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w155 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w162 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w168 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w174 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w180 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w190 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w194 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w199 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w202 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w206 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w209 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w214 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w218 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w224 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w228 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w235 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w238 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w242 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w245 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w248 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w252 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w1 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w4 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w7 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w10 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w13 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w16 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w19 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w22 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w25 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w28 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w31 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w34 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w37 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w40 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w43 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w46 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w49 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w52 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w55 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w58 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w61 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w64 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w67 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w70 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w73 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w76 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w79 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w82 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w85 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w88 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w91 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w94 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w97 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w100 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w103 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w106 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w109 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w112 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w115 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w118 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w121 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w124 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w127 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w130 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w133 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w136 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w139 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w142 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w145 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w148 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w151 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w154 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w157 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w0 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w3 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w6 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w9 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w12 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w15 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w18 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w21 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w24 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w27 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w30 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w33 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w36 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w39 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w42 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w45 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w48 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w51 -.if \n(69<\n(38 .nr 69 \n(38 -.nr 38 \w54 -.if \n(69<\n(38 .nr 69 \n(38 -.69 -.rm 69 -.nr 38 5n -.if \n(69<\n(38 .nr 69 \n(38 -.nr 70 0 -.nr 38 \wadf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wadp -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wads -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wasp -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wbge -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wblm -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wbra -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcal -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcif -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcmi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcsa -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdee -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdvf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \winc -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \winl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wior -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlae -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlal -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlal -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlde -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wldl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlil -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlin -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wloc -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wloc -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wloe -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlof -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wloi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlol -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlol -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlxl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wrck -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wrmi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsbi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsil -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wste -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wstf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsti -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wstl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wstl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wtlt -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzeq -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzgt -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzne -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzre -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzrl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wadf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wadi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wadu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wand -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wass -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wble -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wbls -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcai -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcfu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcmf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcms -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcmu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcsa -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcsb -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wcuu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdel -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdus -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdvi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdvu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wfif -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \winl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wior -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlar -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wldl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlil -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlos -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlpi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wmlf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wmli -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wmon -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wngi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wrck -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wrmi -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wrmu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wror -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsar -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsbf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsbs -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsbu -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsdl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wset -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsil -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsli -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsri -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsru -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsts -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wtle -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wxor -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzge -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzlt -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzrf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wexg -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlpb -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlae -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlde -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wldl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wloc -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wlol -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wadp -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wbge -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wblm -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wbra -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wdel -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wgto -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \winl -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsdf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wsil -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wstf -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzeq -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzle -.if \n(70<\n(38 .nr 70 \n(38 -.nr 38 \wzre -.if \n(70<\n(38 .nr 70 \n(38 -.70 -.rm 70 -.nr 38 4n -.if \n(70<\n(38 .nr 70 \n(38 -.nr 71 0 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsw -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsw -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \ww2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsw -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \ww2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wmwPo -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wswN -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wsw -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewP2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewP2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewP2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wewN2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we2 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wesP -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \we- -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \ww4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwN4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwP4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwP4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwN4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \wwP4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \w4 -.if \n(71<\n(38 .nr 71 \n(38 -.nr 38 \ww4 -.if \n(71<\n(38 .nr 71 \n(38 -.71 -.rm 71 -.nr 38 6n -.if \n(71<\n(38 .nr 71 \n(38 -.nr 72 0 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w28 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w3 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w7 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w5 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w4 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w4 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w2 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.nr 38 \w1 -.if \n(72<\n(38 .nr 72 \n(38 -.72 -.rm 72 -.nr 38 2n -.if \n(72<\n(38 .nr 72 \n(38 -.nr 73 0 -.nr 38 \w35 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w39 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w43 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w50 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w53 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w56 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w59 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w64 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w94 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w97 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w101 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w104 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w107 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w110 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w113 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w118 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w121 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w130 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w133 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w137 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w140 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w144 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w148 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w151 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w153 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w156 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w166 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w169 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w175 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w188 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w191 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w196 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w200 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w203 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w207 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w210 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w215 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w219 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w225 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w233 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w236 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w239 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w243 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w246 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w249 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w253 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w2 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w5 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w8 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w11 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w14 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w17 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w20 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w23 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w26 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w29 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w32 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w35 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w38 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w41 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w44 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w47 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w50 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w53 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w56 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w59 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w62 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w65 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w68 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w71 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w74 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w77 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w80 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w83 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w86 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w89 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w92 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w95 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w98 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w101 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w104 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w107 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w110 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w113 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w116 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w119 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w122 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w125 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w128 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w131 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w134 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w137 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w140 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w143 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w146 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w149 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w152 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w155 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w158 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w1 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w4 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w7 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w10 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w13 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w16 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w19 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w22 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w25 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w28 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w31 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w34 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w37 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w40 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w43 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w46 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w49 -.if \n(73<\n(38 .nr 73 \n(38 -.nr 38 \w52 -.if \n(73<\n(38 .nr 73 \n(38 -.73 -.rm 73 -.nr 38 5n -.if \n(73<\n(38 .nr 73 \n(38 -.nr 74 0 -.nr 38 \wadi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wadp -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wand -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wbeq -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wbgt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wblt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wbra -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcii -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcmp -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcsb -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdel -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdvi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wine -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \winl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wior -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlar -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlde -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlfr -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlil -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlin -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wloc -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wloc -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlof -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wloi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wloi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlol -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlol -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wmlf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wret -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsar -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsdl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsil -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wste -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wstf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsti -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wstl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wteq -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wtne -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzer -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzle -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzne -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzrl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \waar -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wadf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wads -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wadu -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wasp -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wbge -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wblm -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wblt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wciu -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcmi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcms -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcom -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcsa -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcuf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdee -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdup -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdvf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdvi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wfef -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wfif -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \winn -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wior -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wldc -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wldl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlil -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlos -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlxa -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wmlf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wmlu -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wngf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wngi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wrck -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wrmi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wrol -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wror -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsar -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsbi -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsbs -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsde -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsdl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsig -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsim -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wslu -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsri -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsti -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wstr -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wtrp -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzer -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzgt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzne -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzrl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wexg -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wgto -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wldf -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlil -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wloe -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlol -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wasp -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wbgt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wblt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wcal -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wdel -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wine -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wlin -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsdl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wsil -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wstl -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzge -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzlt -.if \n(74<\n(38 .nr 74 \n(38 -.nr 38 \wzrl -.if \n(74<\n(38 .nr 74 \n(38 -.74 -.rm 74 -.nr 38 4n -.if \n(74<\n(38 .nr 74 \n(38 -.nr 75 0 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \ww2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wP2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsw -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwPo -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wswP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsw -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wsN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wmwN -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wew2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wew2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wewN2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wewN2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wewN2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wesP -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we- -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wewP2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \we2 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wP4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwP4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \ww4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwN4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \ww4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwN4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \ww4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwP4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwN4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwP4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \w4 -.if \n(75<\n(38 .nr 75 \n(38 -.nr 38 \wwP4 -.if \n(75<\n(38 .nr 75 \n(38 -.75 -.rm 75 -.nr 38 6n -.if \n(75<\n(38 .nr 75 \n(38 -.nr 76 0 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w34 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w4 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w3 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w2 -.if \n(76<\n(38 .nr 76 \n(38 -.nr 38 \w1 -.if \n(76<\n(38 .nr 76 \n(38 -.76 -.rm 76 -.nr 38 2n -.if \n(76<\n(38 .nr 76 \n(38 -.nr 77 0 -.nr 38 \w36 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w41 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w44 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w51 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w54 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w57 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w60 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w92 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w95 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w99 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w102 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w105 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w108 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w111 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w116 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w119 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w128 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w131 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w135 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w138 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w141 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w145 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w149 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w0 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w154 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w161 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w167 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w173 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w176 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w189 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w193 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w197 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w201 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w205 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w208 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w211 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w217 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w223 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w226 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w234 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w237 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w241 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w244 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w247 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w250 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w0 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w3 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w6 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w9 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w12 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w15 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w18 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w21 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w24 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w27 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w30 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w33 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w36 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w39 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w42 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w45 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w48 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w51 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w54 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w57 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w60 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w63 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w66 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w69 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w72 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w75 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w78 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w81 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w84 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w87 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w90 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w93 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w96 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w99 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w102 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w105 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w108 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w111 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w114 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w117 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w120 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w123 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w126 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w129 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w132 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w135 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w138 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w141 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w144 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w147 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w150 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w153 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w156 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w159 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w2 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w5 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w8 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w11 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w14 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w17 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w20 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w23 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w26 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w29 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w32 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w35 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w38 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w41 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w44 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w47 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w50 -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 \w53 -.if \n(77<\n(38 .nr 77 \n(38 -.77 -.rm 77 -.nr 38 5n -.if \n(77<\n(38 .nr 77 \n(38 -.nr 38 1n -.nr 65 0 -.nr 40 \n(65+((0*\n(38)/2) -.nr 66 +\n(40 -.nr 41 \n(66+((0*\n(38)/2) -.nr 67 +\n(41 -.nr 42 \n(67+((0*\n(38)/2) -.nr 68 +\n(42 -.nr 43 \n(68+((0*\n(38)/2) -.nr 69 +\n(43 -.nr 44 \n(69+((16*\n(38)/2) -.nr 70 +\n(44 -.nr 45 \n(70+((0*\n(38)/2) -.nr 71 +\n(45 -.nr 46 \n(71+((0*\n(38)/2) -.nr 72 +\n(46 -.nr 47 \n(72+((0*\n(38)/2) -.nr 73 +\n(47 -.nr 48 \n(73+((16*\n(38)/2) -.nr 74 +\n(48 -.nr 49 \n(74+((0*\n(38)/2) -.nr 75 +\n(49 -.nr 50 \n(75+((0*\n(38)/2) -.nr 76 +\n(50 -.nr 51 \n(76+((0*\n(38)/2) -.nr 77 +\n(51 -.nr TW \n(77 -.if t .if (\n(TW+\n(.o)>7.65i .tm Table at line 121 file Input is too wide - \n(TW units -.fc   -.nr #T 0-1 -.nr #a 0-1 -.eo -.de T# -.nr 35 1m -.ds #d .d -.if \(ts\n(.z\(ts\(ts .ds #d nl -.mk ## -.nr ## -1v -.ls 1 -.ls -.. -.ec -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'aar\h'|\n(41u'mwPo\h'|\n(42u'1\h'|\n(43u'34\h'|\n(44u'adf\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'35\h'|\n(48u'adi\h'|\n(49u'mwPo\h'|\n(50u'2\h'|\n(51u'36 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'adp\h'|\n(41u'2\h'|\n(42u'\h'|\n(43u'38\h'|\n(44u'adp\h'|\n(45u'mPo\h'|\n(46u'2\h'|\n(47u'39\h'|\n(48u'adp\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'41 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'adp\h'|\n(41u'sN\h'|\n(42u'1\h'|\n(43u'42\h'|\n(44u'ads\h'|\n(45u'mwPo\h'|\n(46u'1\h'|\n(47u'43\h'|\n(48u'and\h'|\n(49u'mwPo\h'|\n(50u'1\h'|\n(51u'44 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'asp\h'|\n(41u'mwPo\h'|\n(42u'5\h'|\n(43u'45\h'|\n(44u'asp\h'|\n(45u'swP\h'|\n(46u'1\h'|\n(47u'50\h'|\n(48u'beq\h'|\n(49u'2\h'|\n(50u'\h'|\n(51u'51 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'beq\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'52\h'|\n(44u'bge\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'53\h'|\n(48u'bgt\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'54 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ble\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'55\h'|\n(44u'blm\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'56\h'|\n(48u'blt\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'57 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bne\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'58\h'|\n(44u'bra\h'|\n(45u'2\h'|\n(46u'\h'|\n(47u'59\h'|\n(48u'bra\h'|\n(49u'sN\h'|\n(50u'2\h'|\n(51u'60 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bra\h'|\n(41u'sP\h'|\n(42u'2\h'|\n(43u'62\h'|\n(44u'cal\h'|\n(45u'mPo\h'|\n(46u'28\h'|\n(47u'64\h'|\n(48u'cal\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'92 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cff\h'|\n(41u'-\h'|\n(42u'\h'|\n(43u'93\h'|\n(44u'cif\h'|\n(45u'-\h'|\n(46u'\h'|\n(47u'94\h'|\n(48u'cii\h'|\n(49u'-\h'|\n(50u'\h'|\n(51u'95 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cmf\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'96\h'|\n(44u'cmi\h'|\n(45u'mwPo\h'|\n(46u'2\h'|\n(47u'97\h'|\n(48u'cmp\h'|\n(49u'-\h'|\n(50u'\h'|\n(51u'99 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cms\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'100\h'|\n(44u'csa\h'|\n(45u'mwPo\h'|\n(46u'1\h'|\n(47u'101\h'|\n(48u'csb\h'|\n(49u'mwPo\h'|\n(50u'1\h'|\n(51u'102 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dec\h'|\n(41u'-\h'|\n(42u'\h'|\n(43u'103\h'|\n(44u'dee\h'|\n(45u'sw\h'|\n(46u'1\h'|\n(47u'104\h'|\n(48u'del\h'|\n(49u'swN\h'|\n(50u'1\h'|\n(51u'105 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dup\h'|\n(41u'mwPo\h'|\n(42u'1\h'|\n(43u'106\h'|\n(44u'dvf\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'107\h'|\n(48u'dvi\h'|\n(49u'mwPo\h'|\n(50u'1\h'|\n(51u'108 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'fil\h'|\n(41u'2\h'|\n(42u'\h'|\n(43u'109\h'|\n(44u'inc\h'|\n(45u'-\h'|\n(46u'\h'|\n(47u'110\h'|\n(48u'ine\h'|\n(49u'w2\h'|\n(50u'\h'|\n(51u'111 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ine\h'|\n(41u'sw\h'|\n(42u'1\h'|\n(43u'112\h'|\n(44u'inl\h'|\n(45u'mwN\h'|\n(46u'3\h'|\n(47u'113\h'|\n(48u'inl\h'|\n(49u'swN\h'|\n(50u'1\h'|\n(51u'116 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'inn\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'117\h'|\n(44u'ior\h'|\n(45u'mwPo\h'|\n(46u'1\h'|\n(47u'118\h'|\n(48u'ior\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'119 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lae\h'|\n(41u'2\h'|\n(42u'\h'|\n(43u'120\h'|\n(44u'lae\h'|\n(45u'sw\h'|\n(46u'7\h'|\n(47u'121\h'|\n(48u'lal\h'|\n(49u'P2\h'|\n(50u'\h'|\n(51u'128 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lal\h'|\n(41u'N2\h'|\n(42u'\h'|\n(43u'129\h'|\n(44u'lal\h'|\n(45u'mP\h'|\n(46u'1\h'|\n(47u'130\h'|\n(48u'lal\h'|\n(49u'mN\h'|\n(50u'1\h'|\n(51u'131 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lal\h'|\n(41u'swP\h'|\n(42u'1\h'|\n(43u'132\h'|\n(44u'lal\h'|\n(45u'swN\h'|\n(46u'2\h'|\n(47u'133\h'|\n(48u'lar\h'|\n(49u'mwPo\h'|\n(50u'1\h'|\n(51u'135 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ldc\h'|\n(41u'mP\h'|\n(42u'1\h'|\n(43u'136\h'|\n(44u'lde\h'|\n(45u'w2\h'|\n(46u'\h'|\n(47u'137\h'|\n(48u'lde\h'|\n(49u'sw\h'|\n(50u'1\h'|\n(51u'138 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ldl\h'|\n(41u'mP\h'|\n(42u'1\h'|\n(43u'139\h'|\n(44u'ldl\h'|\n(45u'swN\h'|\n(46u'1\h'|\n(47u'140\h'|\n(48u'lfr\h'|\n(49u'mwPo\h'|\n(50u'2\h'|\n(51u'141 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lfr\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'143\h'|\n(44u'lil\h'|\n(45u'swN\h'|\n(46u'1\h'|\n(47u'144\h'|\n(48u'lil\h'|\n(49u'swP\h'|\n(50u'1\h'|\n(51u'145 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lil\h'|\n(41u'mwP\h'|\n(42u'2\h'|\n(43u'146\h'|\n(44u'lin\h'|\n(45u'2\h'|\n(46u'\h'|\n(47u'148\h'|\n(48u'lin\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'149 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lni\h'|\n(41u'-\h'|\n(42u'\h'|\n(43u'150\h'|\n(44u'loc\h'|\n(45u'2\h'|\n(46u'\h'|\n(47u'151\h'|\n(48u'loc\h'|\n(49u'mP\h'|\n(50u'34\h'|\n(51u'0 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'loc\h'|\n(41u'mN\h'|\n(42u'1\h'|\n(43u'152\h'|\n(44u'loc\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'153\h'|\n(48u'loc\h'|\n(49u'sN\h'|\n(50u'1\h'|\n(51u'154 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'loe\h'|\n(41u'w2\h'|\n(42u'\h'|\n(43u'155\h'|\n(44u'loe\h'|\n(45u'sw\h'|\n(46u'5\h'|\n(47u'156\h'|\n(48u'lof\h'|\n(49u'2\h'|\n(50u'\h'|\n(51u'161 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lof\h'|\n(41u'mwPo\h'|\n(42u'4\h'|\n(43u'162\h'|\n(44u'lof\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'166\h'|\n(48u'loi\h'|\n(49u'2\h'|\n(50u'\h'|\n(51u'167 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'loi\h'|\n(41u'mPo\h'|\n(42u'1\h'|\n(43u'168\h'|\n(44u'loi\h'|\n(45u'mwPo\h'|\n(46u'4\h'|\n(47u'169\h'|\n(48u'loi\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'173 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lol\h'|\n(41u'wP2\h'|\n(42u'\h'|\n(43u'174\h'|\n(44u'lol\h'|\n(45u'wN2\h'|\n(46u'\h'|\n(47u'175\h'|\n(48u'lol\h'|\n(49u'mwP\h'|\n(50u'4\h'|\n(51u'176 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lol\h'|\n(41u'mwN\h'|\n(42u'8\h'|\n(43u'180\h'|\n(44u'lol\h'|\n(45u'swP\h'|\n(46u'1\h'|\n(47u'188\h'|\n(48u'lol\h'|\n(49u'swN\h'|\n(50u'1\h'|\n(51u'189 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lxa\h'|\n(41u'mPo\h'|\n(42u'1\h'|\n(43u'190\h'|\n(44u'lxl\h'|\n(45u'mPo\h'|\n(46u'2\h'|\n(47u'191\h'|\n(48u'mlf\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'193 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'mli\h'|\n(41u'mwPo\h'|\n(42u'2\h'|\n(43u'194\h'|\n(44u'rck\h'|\n(45u'mwPo\h'|\n(46u'1\h'|\n(47u'196\h'|\n(48u'ret\h'|\n(49u'mwP\h'|\n(50u'2\h'|\n(51u'197 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ret\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'199\h'|\n(44u'rmi\h'|\n(45u'mwPo\h'|\n(46u'1\h'|\n(47u'200\h'|\n(48u'sar\h'|\n(49u'mwPo\h'|\n(50u'1\h'|\n(51u'201 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sbf\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'202\h'|\n(44u'sbi\h'|\n(45u'mwPo\h'|\n(46u'2\h'|\n(47u'203\h'|\n(48u'sdl\h'|\n(49u'swN\h'|\n(50u'1\h'|\n(51u'205 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'set\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'206\h'|\n(44u'sil\h'|\n(45u'swN\h'|\n(46u'1\h'|\n(47u'207\h'|\n(48u'sil\h'|\n(49u'swP\h'|\n(50u'1\h'|\n(51u'208 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sli\h'|\n(41u'mwPo\h'|\n(42u'1\h'|\n(43u'209\h'|\n(44u'ste\h'|\n(45u'w2\h'|\n(46u'\h'|\n(47u'210\h'|\n(48u'ste\h'|\n(49u'sw\h'|\n(50u'3\h'|\n(51u'211 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'stf\h'|\n(41u'2\h'|\n(42u'\h'|\n(43u'214\h'|\n(44u'stf\h'|\n(45u'mwPo\h'|\n(46u'2\h'|\n(47u'215\h'|\n(48u'stf\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'217 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sti\h'|\n(41u'mPo\h'|\n(42u'1\h'|\n(43u'218\h'|\n(44u'sti\h'|\n(45u'mwPo\h'|\n(46u'4\h'|\n(47u'219\h'|\n(48u'sti\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'223 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'stl\h'|\n(41u'wP2\h'|\n(42u'\h'|\n(43u'224\h'|\n(44u'stl\h'|\n(45u'wN2\h'|\n(46u'\h'|\n(47u'225\h'|\n(48u'stl\h'|\n(49u'mwP\h'|\n(50u'2\h'|\n(51u'226 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'stl\h'|\n(41u'mwN\h'|\n(42u'5\h'|\n(43u'228\h'|\n(44u'stl\h'|\n(45u'swN\h'|\n(46u'1\h'|\n(47u'233\h'|\n(48u'teq\h'|\n(49u'-\h'|\n(50u'\h'|\n(51u'234 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'tgt\h'|\n(41u'-\h'|\n(42u'\h'|\n(43u'235\h'|\n(44u'tlt\h'|\n(45u'-\h'|\n(46u'\h'|\n(47u'236\h'|\n(48u'tne\h'|\n(49u'-\h'|\n(50u'\h'|\n(51u'237 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zeq\h'|\n(41u'2\h'|\n(42u'\h'|\n(43u'238\h'|\n(44u'zeq\h'|\n(45u'sP\h'|\n(46u'2\h'|\n(47u'239\h'|\n(48u'zer\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'241 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zge\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'242\h'|\n(44u'zgt\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'243\h'|\n(48u'zle\h'|\n(49u'sP\h'|\n(50u'1\h'|\n(51u'244 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zlt\h'|\n(41u'sP\h'|\n(42u'1\h'|\n(43u'245\h'|\n(44u'zne\h'|\n(45u'sP\h'|\n(46u'1\h'|\n(47u'246\h'|\n(48u'zne\h'|\n(49u'sN\h'|\n(50u'1\h'|\n(51u'247 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zre\h'|\n(41u'w2\h'|\n(42u'\h'|\n(43u'248\h'|\n(44u'zre\h'|\n(45u'sw\h'|\n(46u'1\h'|\n(47u'249\h'|\n(48u'zrl\h'|\n(49u'mwN\h'|\n(50u'2\h'|\n(51u'250 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zrl\h'|\n(41u'swN\h'|\n(42u'1\h'|\n(43u'252\h'|\n(44u'zrl\h'|\n(45u'wN2\h'|\n(46u'\h'|\n(47u'253\h'|\n(48u'aar\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'0 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'aar\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'1\h'|\n(44u'adf\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'2\h'|\n(48u'adf\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'3 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'adi\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'4\h'|\n(44u'adi\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'5\h'|\n(48u'ads\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'6 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ads\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'7\h'|\n(44u'adu\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'8\h'|\n(48u'adu\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'9 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'and\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'10\h'|\n(44u'and\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'11\h'|\n(48u'asp\h'|\n(49u'ew2\h'|\n(50u'\h'|\n(51u'12 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ass\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'13\h'|\n(44u'ass\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'14\h'|\n(48u'bge\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'15 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bgt\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'16\h'|\n(44u'ble\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'17\h'|\n(48u'blm\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'18 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bls\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'19\h'|\n(44u'bls\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'20\h'|\n(48u'blt\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'21 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bne\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'22\h'|\n(44u'cai\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'23\h'|\n(48u'cal\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'24 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cfi\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'25\h'|\n(44u'cfu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'26\h'|\n(48u'ciu\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'27 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cmf\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'28\h'|\n(44u'cmf\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'29\h'|\n(48u'cmi\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'30 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cmi\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'31\h'|\n(44u'cms\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'32\h'|\n(48u'cms\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'33 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cmu\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'34\h'|\n(44u'cmu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'35\h'|\n(48u'com\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'36 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'com\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'37\h'|\n(44u'csa\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'38\h'|\n(48u'csa\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'39 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'csb\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'40\h'|\n(44u'csb\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'41\h'|\n(48u'cuf\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'42 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'cui\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'43\h'|\n(44u'cuu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'44\h'|\n(48u'dee\h'|\n(49u'ew2\h'|\n(50u'\h'|\n(51u'45 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'del\h'|\n(41u'ewP2\h'|\n(42u'\h'|\n(43u'46\h'|\n(44u'del\h'|\n(45u'ewN2\h'|\n(46u'\h'|\n(47u'47\h'|\n(48u'dup\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'48 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dus\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'49\h'|\n(44u'dus\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'50\h'|\n(48u'dvf\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'51 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dvf\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'52\h'|\n(44u'dvi\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'53\h'|\n(48u'dvi\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'54 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dvu\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'55\h'|\n(44u'dvu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'56\h'|\n(48u'fef\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'57 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'fef\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'58\h'|\n(44u'fif\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'59\h'|\n(48u'fif\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'60 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'inl\h'|\n(41u'ewP2\h'|\n(42u'\h'|\n(43u'61\h'|\n(44u'inl\h'|\n(45u'ewN2\h'|\n(46u'\h'|\n(47u'62\h'|\n(48u'inn\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'63 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'inn\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'64\h'|\n(44u'ior\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'65\h'|\n(48u'ior\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'66 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lar\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'67\h'|\n(44u'lar\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'68\h'|\n(48u'ldc\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'69 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ldf\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'70\h'|\n(44u'ldl\h'|\n(45u'ewP2\h'|\n(46u'\h'|\n(47u'71\h'|\n(48u'ldl\h'|\n(49u'ewN2\h'|\n(50u'\h'|\n(51u'72 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lfr\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'73\h'|\n(44u'lil\h'|\n(45u'ewP2\h'|\n(46u'\h'|\n(47u'74\h'|\n(48u'lil\h'|\n(49u'ewN2\h'|\n(50u'\h'|\n(51u'75 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lim\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'76\h'|\n(44u'los\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'77\h'|\n(48u'los\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'78 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lor\h'|\n(41u'esP\h'|\n(42u'1\h'|\n(43u'79\h'|\n(44u'lpi\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'80\h'|\n(48u'lxa\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'81 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lxl\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'82\h'|\n(44u'mlf\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'83\h'|\n(48u'mlf\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'84 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'mli\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'85\h'|\n(44u'mli\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'86\h'|\n(48u'mlu\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'87 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'mlu\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'88\h'|\n(44u'mon\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'89\h'|\n(48u'ngf\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'90 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ngf\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'91\h'|\n(44u'ngi\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'92\h'|\n(48u'ngi\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'93 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'nop\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'94\h'|\n(44u'rck\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'95\h'|\n(48u'rck\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'96 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ret\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'97\h'|\n(44u'rmi\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'98\h'|\n(48u'rmi\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'99 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'rmu\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'100\h'|\n(44u'rmu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'101\h'|\n(48u'rol\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'102 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'rol\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'103\h'|\n(44u'ror\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'104\h'|\n(48u'ror\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'105 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'rtt\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'106\h'|\n(44u'sar\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'107\h'|\n(48u'sar\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'108 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sbf\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'109\h'|\n(44u'sbf\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'110\h'|\n(48u'sbi\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'111 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sbi\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'112\h'|\n(44u'sbs\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'113\h'|\n(48u'sbs\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'114 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sbu\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'115\h'|\n(44u'sbu\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'116\h'|\n(48u'sde\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'117 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sdf\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'118\h'|\n(44u'sdl\h'|\n(45u'ewP2\h'|\n(46u'\h'|\n(47u'119\h'|\n(48u'sdl\h'|\n(49u'ewN2\h'|\n(50u'\h'|\n(51u'120 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'set\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'121\h'|\n(44u'set\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'122\h'|\n(48u'sig\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'123 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sil\h'|\n(41u'ewP2\h'|\n(42u'\h'|\n(43u'124\h'|\n(44u'sil\h'|\n(45u'ewN2\h'|\n(46u'\h'|\n(47u'125\h'|\n(48u'sim\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'126 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sli\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'127\h'|\n(44u'sli\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'128\h'|\n(48u'slu\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'129 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'slu\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'130\h'|\n(44u'sri\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'131\h'|\n(48u'sri\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'132 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sru\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'133\h'|\n(44u'sru\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'134\h'|\n(48u'sti\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'135 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sts\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'136\h'|\n(44u'sts\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'137\h'|\n(48u'str\h'|\n(49u'esP\h'|\n(50u'1\h'|\n(51u'138 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'tge\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'139\h'|\n(44u'tle\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'140\h'|\n(48u'trp\h'|\n(49u'e-\h'|\n(50u'\h'|\n(51u'141 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'xor\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'142\h'|\n(44u'xor\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'143\h'|\n(48u'zer\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'144 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zer\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'145\h'|\n(44u'zge\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'146\h'|\n(48u'zgt\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'147 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zle\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'148\h'|\n(44u'zlt\h'|\n(45u'e2\h'|\n(46u'\h'|\n(47u'149\h'|\n(48u'zne\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'150 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zrf\h'|\n(41u'e2\h'|\n(42u'\h'|\n(43u'151\h'|\n(44u'zrf\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'152\h'|\n(48u'zrl\h'|\n(49u'ewP2\h'|\n(50u'\h'|\n(51u'153 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dch\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'154\h'|\n(44u'exg\h'|\n(45u'esP\h'|\n(46u'1\h'|\n(47u'155\h'|\n(48u'exg\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'156 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'exg\h'|\n(41u'e-\h'|\n(42u'\h'|\n(43u'157\h'|\n(44u'lpb\h'|\n(45u'e-\h'|\n(46u'\h'|\n(47u'158\h'|\n(48u'gto\h'|\n(49u'e2\h'|\n(50u'\h'|\n(51u'159 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ldc\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'0\h'|\n(44u'lae\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'1\h'|\n(48u'lal\h'|\n(49u'P4\h'|\n(50u'\h'|\n(51u'2 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lal\h'|\n(41u'N4\h'|\n(42u'\h'|\n(43u'3\h'|\n(44u'lde\h'|\n(45u'w4\h'|\n(46u'\h'|\n(47u'4\h'|\n(48u'ldf\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'5 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ldl\h'|\n(41u'wP4\h'|\n(42u'\h'|\n(43u'6\h'|\n(44u'ldl\h'|\n(45u'wN4\h'|\n(46u'\h'|\n(47u'7\h'|\n(48u'lil\h'|\n(49u'wP4\h'|\n(50u'\h'|\n(51u'8 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lil\h'|\n(41u'wN4\h'|\n(42u'\h'|\n(43u'9\h'|\n(44u'loc\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'10\h'|\n(48u'loe\h'|\n(49u'w4\h'|\n(50u'\h'|\n(51u'11 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lof\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'12\h'|\n(44u'lol\h'|\n(45u'wP4\h'|\n(46u'\h'|\n(47u'13\h'|\n(48u'lol\h'|\n(49u'wN4\h'|\n(50u'\h'|\n(51u'14 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'lpi\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'15\h'|\n(44u'adp\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'16\h'|\n(48u'asp\h'|\n(49u'w4\h'|\n(50u'\h'|\n(51u'17 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'beq\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'18\h'|\n(44u'bge\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'19\h'|\n(48u'bgt\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'20 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ble\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'21\h'|\n(44u'blm\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'22\h'|\n(48u'blt\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'23 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'bne\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'24\h'|\n(44u'bra\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'25\h'|\n(48u'cal\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'26 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'dee\h'|\n(41u'w4\h'|\n(42u'\h'|\n(43u'27\h'|\n(44u'del\h'|\n(45u'wP4\h'|\n(46u'\h'|\n(47u'28\h'|\n(48u'del\h'|\n(49u'wN4\h'|\n(50u'\h'|\n(51u'29 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'fil\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'30\h'|\n(44u'gto\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'31\h'|\n(48u'ine\h'|\n(49u'w4\h'|\n(50u'\h'|\n(51u'32 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'inl\h'|\n(41u'wP4\h'|\n(42u'\h'|\n(43u'33\h'|\n(44u'inl\h'|\n(45u'wN4\h'|\n(46u'\h'|\n(47u'34\h'|\n(48u'lin\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'35 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sde\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'36\h'|\n(44u'sdf\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'37\h'|\n(48u'sdl\h'|\n(49u'wP4\h'|\n(50u'\h'|\n(51u'38 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'sdl\h'|\n(41u'wN4\h'|\n(42u'\h'|\n(43u'39\h'|\n(44u'sil\h'|\n(45u'wP4\h'|\n(46u'\h'|\n(47u'40\h'|\n(48u'sil\h'|\n(49u'wN4\h'|\n(50u'\h'|\n(51u'41 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'ste\h'|\n(41u'w4\h'|\n(42u'\h'|\n(43u'42\h'|\n(44u'stf\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'43\h'|\n(48u'stl\h'|\n(49u'wP4\h'|\n(50u'\h'|\n(51u'44 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'stl\h'|\n(41u'wN4\h'|\n(42u'\h'|\n(43u'45\h'|\n(44u'zeq\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'46\h'|\n(48u'zge\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'47 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zgt\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'48\h'|\n(44u'zle\h'|\n(45u'4\h'|\n(46u'\h'|\n(47u'49\h'|\n(48u'zlt\h'|\n(49u'4\h'|\n(50u'\h'|\n(51u'50 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zne\h'|\n(41u'4\h'|\n(42u'\h'|\n(43u'51\h'|\n(44u'zre\h'|\n(45u'w4\h'|\n(46u'\h'|\n(47u'52\h'|\n(48u'zrl\h'|\n(49u'wP4\h'|\n(50u'\h'|\n(51u'53 -.ta \n(66u \n(67u \n(68u \n(69u \n(70u \n(71u \n(72u \n(73u \n(74u \n(75u \n(76u \n(77u -.nr 35 1m -.nr 31 \n(.f -\&\h'|\n(40u'zrl\h'|\n(41u'wN4\h'|\n(42u'\h'|\n(43u'54\h'|\n(44u'\h'|\n(45u'\h'|\n(46u'\h'|\n(47u'\h'|\n(48u'\h'|\n(49u'\h'|\n(50u'\h'|\n(51u' -.fc -.nr T. 1 -.T# 1 -.35 -.TE -.if \n-(b.=0 .nr c. \n(.c-\n(d.-120 diff --git a/doc/em/mach.nr b/doc/em/mach.nr deleted file mode 100644 index 2f82202d6..000000000 --- a/doc/em/mach.nr +++ /dev/null @@ -1,360 +0,0 @@ -.bp -.P1 "EM MACHINE LANGUAGE" -.PP -The EM machine language is designed to make program text compact -and to make decoding easy. -Compact program text has many advantages: programs execute faster, -programs occupy less primary and secondary storage and loading -programs into satellite processors is faster. -The decoding of EM machine language is so simple, -that it is feasible to use interpreters as long as EM hardware -machines are not available. -This chapter is irrelevant when back ends are used to -produce executable target machine code. -.P2 "Instruction encoding" -.PP -A design goal of EM is to make the -program text as compact as possible. -Decoding must be easy, however. -The encoding is fully byte oriented, without any small bit fields. -There are 256 primary opcodes, two of which are an escape to -two groups of 256 secondary opcodes each. -.QQ -EM instructions without arguments have a single opcode assigned, -possibly escaped: -.ta 12n 24n -.Dr 6 - |--------------| - | opcode | - |--------------| -.De - or -.Dr 6 - |--------------|--------------| - | escape | opcode | - |--------------|--------------| -.De -The encoding for instructions with an argument is more complex. -Several instructions have an address from the global data area -as argument. -Other instructions have different opcodes for positive -and negative arguments. -.LP -There is always an opcode that takes the next two bytes as argument, -high byte first: -.Dr 6 - |--------------|--------------|--------------| - | opcode | hibyte | lobyte | - |--------------|--------------|--------------| -.De - or -.Dr 6 - |--------------|--------------|--------------|--------------| - | escape | opcode | hibyte | lobyte | - |--------------|--------------|--------------|--------------| -.De -An extra escape is provided for instructions with four or eight byte arguments. -.Dr 6 - |--------------|--------------|--------------| |--------------| - | ESCAPE | opcode | hibyte |...| lobyte | - |--------------|--------------|--------------| |--------------| -.De -For most instructions some argument values predominate. -The most frequent combinations of instruction and argument -will be encoded in a single byte, called a mini: -.Dr 6 - |---------------| - |opcode+argument| (mini) - |---------------| -.De -The number of minis is restricted, because only -254 primary opcodes are available. -Many instructions have the bulk of their arguments -fall in the range 0 to 255. -Instructions that address global data have their arguments -distributed over a wider range, -but small values of the high byte are common. -For all these cases there is another encoding -that combines the instruction and the high byte of the argument -into a single opcode. -These opcodes are called shorties. -Shorties may be escaped. -.Dr 6 - |--------------|--------------| - | opcode+high | lobyte | (shortie) - |--------------|--------------| -.De - or -.Dr 6 - |--------------|--------------|--------------| - | escape | opcode+high | lobyte | - |--------------|--------------|--------------| -.De -Escaped shorties are useless if the normal encoding has a primary opcode. -Note that for some instruction-argument combinations -several different encodings are available. -It is the task of the assembler to select the shortest of these. -The savings by these mini and shortie -opcodes are considerable, about 55%. -.PP -Further improvements are possible: -the arguments of -many instructions are a multiple of the wordsize. -Some do also not allow zero as an argument. -If these arguments are divided by the wordsize and, -when zero is not allowed, then decremented by 1, more of them can -be encoded as shortie or mini. -The arguments of some other instructions -rarely or never assume the value 0, but start at 1. -The value 1 is then encoded as 0, -2 as 1 and so on. -.PP -Assigning opcodes to instructions by the assembler is completely -table driven. -For details see appendix B. -.P2 "Procedure descriptors" -.PP -The procedure identifiers used in the interpreter are indices -into a table of procedure descriptors. -Each descriptor contains: -.IP 1. -the number of bytes to be reserved for locals at each -invocation. -.br -This is a pointer-sized integer. -.IP 2. -the start address of the procedure -.P2 "Load format" -.PP -The EM machine language load format defines the interface between -the EM assembler/loader and the EM machine itself. -A load file consists of a header, the program text to be executed, -a description of the global data area and the procedure descriptor table, -in this order. -All integers in the load file are presented with the -least significant byte first. -.PP -The header has two parts: the first half (eight 16-bit integers) -aids in selecting -the correct EM machine or interpreter. -Some EM machines, for instance, may have hardware floating point -instructions. -.N -The header entries are as follows (bit 0 is rightmost): -.IP 1: -magic number (07255) -.IP 2: -flag bits with the following meaning: -.RS -.IP "bit 0" -TEST; test for integer overflow etc. -.IP "bit 1" -PROFILE; for each source line: count the number of memory -cycles executed. -.IP "bit 2" -FLOW; for each source line: set a bit in a bit map table if -instructions on that line are executed. -.IP "bit 3" -COUNT; for each source line: increment a counter if that line -is entered. -.IP "bit 4" -REALS; set if a program uses floating point instructions. -.IP "bit 5" -EXTRA; more tests during compiler debugging. -.RE -.IP 3: -number of unresolved references. -.IP 4: -version number; used to detect obsolete EM load files. -.IP 5: -wordsize ; the number of bytes in each machine word. -.IP 6: -pointer size ; the number of bytes available for addressing. -.IP 7: -unused -.IP 8: -unused -.LP -The second part of the header (eight entries, of pointer size bytes each) -describes the load file itself: -.IP 1: -NTEXT; the program text size in bytes. -.IP 2: -NDATA; the number of load-file descriptors (see below). -.IP 3: -NPROC; the number of entries in the procedure descriptor table. -.IP 4: -ENTRY; procedure number of the procedure to start with. -.IP 5: -NLINE; the maximum source line number. -.IP 6: -SZDATA; the address of the lowest uninitialized data byte. -.IP 7: -unused -.IP 8: -unused -.PP -The program text consists of NTEXT bytes. -NTEXT is always a multiple of the wordsize. -The first byte of the program text is the -first byte of the instruction address -space, i.e. it has address 0. -Pointers into the program text are found in the procedure descriptor -table where relocation is simple and in the global data area. -The initialization of the global data area allows easy -relocation of pointers into both address spaces. -.PP -The global data area is described by the NDATA descriptors. -Each descriptor describes a number of consecutive words (of~wordsize) -and consists of a sequence of bytes. -While reading the descriptors from the load file, one can -initialize the global data area from low to high addresses. -The size of the initialized data area is given by SZDATA, -this number can be used to check the initialization. -.br -The header of each descriptor consists of a byte, describing the type, -and a count. -The number of bytes used for this (unsigned) count depends on the -type of the descriptor and -is either a pointer-sized integer -or one byte. -The meaning of the count depends on the descriptor type. -At load time an interpreter can -perform any conversion deemed necessary, such as -reordering bytes in integers -and pointers and adding base addresses to pointers. -.QQ -In the following pictures we show a graphical notation of the -initializers. -The leftmost rectangle represents the leading byte. -.LP -Fields marked with -.TS -tab(:); -l l. -n:contain a pointer-sized integer used as a count -m:contain a one-byte integer used as a count -b:contain a one-byte integer -w:contain a wordsized integer -p:contain a data or instruction pointer -s:contain a null terminated ASCII string -.TE -.Dr 6 - ------------------- - | 0 | n | repeat last initialization n times - ------------------- -.De -.Dr 4 - --------- - | 1 | m | m uninitialized words - --------- -.De -.Dr 6 - ____________ - / bytes \e - ----------------- ----- - | 2 | m | b | b |...| b | m initialized bytes - ----------------- ----- -.De -.Dr 6 - _________ - / word \e - ----------------------- - | 3 | m | w |... m initialized wordsized integers - ----------------------- -.De -.Dr 6 - _________ - / pointer \e - ----------------------- - | 4 | m | p |... m initialized data pointers - ----------------------- -.De -.Dr 6 - _________ - / pointer \e - ----------------------- - | 5 | m | p |... m initialized instruction pointers - ----------------------- -.De -.Dr 6 - ____________ - / bytes \e - ------------------------- - | 6 | m | b | b |...| b | initialized integer of size m - ------------------------- -.De -.Dr 6 - ____________ - / bytes \e - ------------------------- - | 7 | m | b | b |...| b | initialized unsigned of size m - ------------------------- -.De -.Dr 6 - ____________ - / string \e - ------------------------- - | 8 | m | s | initialized float of size m - ------------------------- -.De -.IP type~0: 10 -If the last initialization initialized k bytes starting -at address \fIa\fP, do the same initialization again n times, -starting at \fIa\fP+k, \fIa\fP+2*k, .... \fIa\fP+n*k. -This is the only descriptor whose starting byte -is followed by an integer with the -size of a -pointer, -in all other descriptors the first byte is followed by a one-byte count. -This descriptor must be preceded by a descriptor of -another type. -.IP type~1: 10 -Reserve m words, not explicitly initialized (BSS and HOL). -.IP type~2: 10 -The m bytes following the descriptor header are -initializers for the next m bytes of the -global data area. -m is divisible by the wordsize. -.IP type~3: 10 -The m words following the header are initializers for the next m words of the -global data area. -.IP type~4: 10 -The m data address space pointers following the header are -initializers for the next -m data pointers in the global data area. -Interpreters that represent EM pointers by -target machine addresses must relocate all data pointers. -.IP type~5: 10 -The m instruction address space pointers following the header are -initializers for the next -m instruction pointers in the global data area. -Interpreters that represent EM instruction pointers by -target machine addresses must relocate these pointers. -.IP type~6: 10 -The m bytes following the header form -a signed integer number with a size of m bytes, -which is an initializer for the next m bytes -of the global data area. -m is governed by the same restrictions as for -transfer of objects to/from memory. -.IP type~7: 10 -The m bytes following the header form -an unsigned integer number with a size of m bytes, -which is an initializer for the next m bytes -of the global data area. -m is governed by the same restrictions as for -transfer of objects to/from memory. -.IP type~8: 10 -The header is followed by an ASCII string, null terminated, to -initialize, in global data, -a floating point number with a size of m bytes. -m is governed by the same restrictions as for -transfer of objects to/from memory. -The ASCII string contains the notation of a real as used in the -Pascal language. -.PP -The NPROC procedure descriptors on the load file consist of -an instruction space address (of~pointer~size) and -an integer (of~pointer~size) specifying the number of bytes for -locals. diff --git a/doc/em/macr.nr b/doc/em/macr.nr deleted file mode 100644 index f13e8e124..000000000 --- a/doc/em/macr.nr +++ /dev/null @@ -1,113 +0,0 @@ -.LP -.if n \{\ -.nr LL 78 -.ll 78 \} -.tr ~ -.\" below are three simple macros to get the drawings right -.\" added by Dick Grune -.de Dr \" Drawing $1 (size) -.sp 1 -.ne \\$1 -.na -.ft CW \" constant spacing -.lg 0 \" no ligatures -.. -.de Df \" Drawing Footer -.br -.sp 1 -.ft R -.ce 1000 -.lg 1 -.. -.de De \" Drawing End $1 (lines) -.br -.ft R -.lg 1 -.ce 0 -.ad -.sp \\$1 -.. -.\" macro for exponents, added by Ceriel Jacobs -.de Ex \" Exponent $1 $2 [$3] -\\$1\v'-0.5m'\s-2\\$2\s+2\v'0.5m'\\$3 -.. -.\" QQ is like PP, but without space -. \" use .PP, with PD 0. -.de QQ -.nr xx \\n(PD -.nr PD 0 -.PP -.nr PD \\n(xx -.. -.nr N1 0 -.nr N2 0 -.nr N3 0 -.nr N4 0 -.nr N5 0 -.nr A5 0 -.af A5 A -.de P1 -.nr N2 0 -.nr N1 \\n(N1+1 -.ds Tl "\\n(N1. \\$1 -.Ca 0 -.sp -.LP -\\fB\\n(N1. \\$1\\fP -.sp -.. -.de P2 -.nr N3 0 -.nr N2 \\n(N2+1 -.ds Tl "\\n(N1.\\n(N2 \\$1 -.ne 5 -.Ca 2 -.sp -.LP -\\fB\\n(N1.\\n(N2 \\$1\fP -.. -.de P3 -.nr N4 0 -.nr N3 \\n(N3+1 -.ds Tl "\\n(N1.\\n(N2.\\n(N3 \\$1 -.Ca 4 -.LP -\\fI\\n(N1.\\n(N2.\\n(N3 \\$1\fP -.. -.de P4 -.nr N4 \\n(N4+1 -.ds Tl "\\n(N1.\\n(N2.\\n(N3.\\n(N4 \\$1 -.ne 5 -.Ca 6 -.LP -\\fI\\n(N1.\\n(N2.\\n(N3.\\n(N4 \\$1\fP -.. -.de AP -.nr N5 \\n(N5+1 -.nr A5 \\n(N5 -.ds Tl "\\n(A5. \\$1 -.ne 5 -.Ca 0 -.LP -\\fB\\n(A5. \\$1\\fP -.sp -.. -.de Ca -.da Cc -.if \\$1=0 \!.sp \\\\n(PDu -\!\l\&\\$1n\ \&\\*(Tl \l\&|\\\\n(LLu-\w\&\ \\n(PN\&u.\&\ \\n(PN -\!.br -.da -.. -.de Ct -.Cc -.rm Cc -.. -.de PT -.lt \\n(LLu -.pc % -.nr PN \\n%-1 -.if \\n(PN%2=1 .tl '''\\n(PN' -.if (\\n(PN%2=0)&(\\n(PN) .tl '\\n(PN''' -.lt \\n(.lu -.. diff --git a/doc/em/mapping.nr b/doc/em/mapping.nr deleted file mode 100644 index 22a75c8a3..000000000 --- a/doc/em/mapping.nr +++ /dev/null @@ -1,232 +0,0 @@ -.bp -.P1 "MAPPING OF EM DATA MEMORY ONTO TARGET MACHINE MEMORY" -.PP -The EM architecture is designed to be implemented -on many existing and future machines. -EM memory is highly fragmented to make -adaptation to various memory architectures possible. -Format and encoding of pointers is explicitly undefined. -.PP -This chapter gives solutions to some of the -anticipated problems. -First, we describe a possible memory layout for machines -with 64K bytes of address space. -Here we use a member of the EM family with 2-byte word and pointer -size. -The most straightforward layout is shown in figure 2. -.Dr 40 - 65534 \-> |-------------------------------| - |///////////////////////////////| - |//// unimplemented memory /////| - |///////////////////////////////| - ML \-> |-------------------------------| - | | - | | <\- LB - | stack and local area | - | | - |-------------------------------| <\- SP - |///////////////////////////////| - |//////// inaccessible /////////| - |///////////////////////////////| - |-------------------------------| <\- HP - | | - | heap area | - | | - | | - HB \-> |-------------------------------| - | | - | global data area | - | | - EB \-> |-------------------------------| - | | - | program text | <\- PC - | | - | ( and tables ) | - | | - | | - PB \-> |-------------------------------| - |///////////////////////////////| - |////////// undefined //////////| - |///////////////////////////////| - 0 \-> |-------------------------------| -.Df -Figure 2. Memory layout showing typical register -positions during execution of an EM program. -.De -.sp 1 -The base registers for the various memory pieces can be stored -in target machine registers or memory. -.TS -tab(;); -l 1 l l l. -PB;:;program base;points to the base of the instruction address space. -EB;:;external base;points to the base of the data address space. -HB;:;heap base;points to the base of the heap area. -ML;:;memory limit;marks the high end of the addressable data space. -.TE -.LP -The stack grows from high -EM addresses to low EM addresses, and the heap the -other way. -The memory between SP and HP is not accessible, -but may be allocated later to the stack or the heap if needed. -The local data area is allocated starting at the high end of -memory. -.PP -Because EM address 0 is not mapped onto target -address 0, a problem arises when pointers are used. -If a program pushed a constant, say 6, onto the stack, -and then tried to indirect through it, -the wrong word would be fetched, -because EM address 6 is mapped onto target address EB+6 -and not target address 6 itself. -This particular problem is solved by explicitly declaring -the format of a pointer to be undefined, -so that using a constant as a pointer is completely illegal. -However, the general problem of mapping pointers still exists. -.PP -There are two possible solutions. -In the first solution, EM pointers are represented -in the target machine as true EM addresses, -for example, a pointer to EM address 6 really is -stored as a 6 in the target machine. -This solution implies that every time a pointer is fetched -EB must be added before referencing -the target machine's memory. -If the target machine has powerful indexing -facilities, EB can be kept in a target machine register, -and the relocation can indeed be done on -every reference to the data address space -at a modest cost in speed. -.PP -The other solution consists of having EM pointers -refer to the true target machine address. -Thus the instruction LAE 6 (Load Address of External 6) -would push the value of EB+6 onto the stack. -When this approach is chosen, back ends must know -how to offset from EB, to translate all -instructions that manipulate EM addresses. -However, the problem is not completely solved, -because a front end may have to initialize a pointer -in CON or ROM data to point to a global address. -This pointer must also be relocated by the back end or the interpreter. -.PP -Although the EM stack grows from high to low EM addresses, -some machines have hardware PUSH and POP -instructions that require the stack to grow upwards. -If reasons of efficiency demand the use of these -instructions, then EM -can be implemented with the memory layout -upside down, as shown in figure 3. -This is possible because the pointer format is explicitly undefined. -The first element of a word array will have a -lower physical address than the second element. -.Dr 18 - | | | | - | EB=60 | | ^ | - | | | | | - |-----------------| |-----------------| - 105 | 45 | 44 | 104 214 | 41 | 40 | 215 - |-----------------| |-----------------| - 103 | 43 | 42 | 102 212 | 43 | 42 | 213 - |-----------------| |-----------------| - 101 | 41 | 40 | 100 210 | 45 | 44 | 211 - |-----------------| |-----------------| - | | | | | - | v | | EB=255 | - | | | | - - Type A Type B -.Df -Figure 3. Two possible memory implementations. -Numbers within the boxes are EM addresses. -The other numbers are physical addresses. -.De -.LP -So, we have two different EM memory implementations: -.IP "A~\-" -stack downwards -.IP "B~\-" -stack upwards -.PP -For each of these two possibilities we give the translation of -the EM instructions to push the third byte of a global data -block starting at EM address 40 onto the stack and to load the -word at address 40. -All translations assume a word and pointer size of two bytes. -The target machine used is a PDP-11 augmented with push and pop instructions. -Registers 'r0' and 'r1' are used and suffer from sign extension for byte -transfers. -Push $40 means push the constant 40, not word 40. -.PP -The translation of the EM instructions depends on the pointer representation -used. -For each of the two solutions explained above the translation is given. -.PP -First, the translation for the two implementations using EM addresses as -pointer representation: -.KS -.TS -tab(:), center; -l s l s l s -l 2 l 6 l 2 l 6 l 2 l. -EM:type A:type B -_ -LAE:40:push:$40:push:$40 - -ADP:3:pop:r0:pop:r0 -::add:$3,r0:add:$3,r0 -::push:r0:push:r0 - -LOI:1:pop:r0:pop:r0 -::\-::neg:r0 -::clr:r1:clr:r1 -::bisb:eb(r0),r1:bisb:eb(r0),r1 -::push:r1:push:r1 - -LOE:40:push:eb+40:push:eb-41 -.TE -.KE -.PP -The translation for the two implementations, if the target machine address is -used as pointer representation, is: -.KS -.TS -tab(:), center; -l s l s l s -l 2 l 6 l 2 l 6 l 2 l. -EM:type A:type B -_ -LAE:40:push:$eb+40:push:$eb-40 - -ADP:3:pop:r0:pop:r0 -::add:$3,r0:sub:$3,r0 -::push:r0:push:r0 - -LOI:1:pop:r0:pop:r0 -::clr:r1:clr:r1 -::bisb:(r0),r1:bisb:(r0),r1 -::push:r1:push:r1 - -LOE:40:push:eb+40:push:eb-41 -.TE -.KE -.PP -The translation presented above is not intended to be optimal. -Most machines can handle these simple cases in one or two instructions. -It demonstrates, however, the flexibility of the EM design. -.PP -There are several possibilities to implement EM on machines with -address spaces larger than 64k bytes. -For EM with two byte pointers one could allocate instruction and -data space each in a separate 64k piece of memory. -EM pointers still have to fit in two bytes, -but the base registers PB and EB may be loaded in hardware registers -wider than 16 bits, if available. -EM implementations can also make efficient use of a machine -with separate instruction and data space. -.PP -EM with 32 bit pointers allows one to make use of machines -with large address spaces. -In a virtual, segmented memory system one could use a separate -segment for each fragment. diff --git a/doc/em/mem.nr b/doc/em/mem.nr deleted file mode 100644 index ee1364ff6..000000000 --- a/doc/em/mem.nr +++ /dev/null @@ -1,80 +0,0 @@ -.bp -.P1 MEMORY -.PP -The EM machine has two distinct address spaces, -one for instructions and one for data. -The data space is divided up into 8-bit bytes. -The smallest addressable unit is a byte. -Bytes are numbered consecutively from 0 to some maximum. -All sizes in EM are expressed in bytes. -.PP -Some EM instructions can transfer objects containing several bytes -to and/or from memory. -The size of all objects larger than a word must be a multiple of -the wordsize. -The size of all objects smaller than a word must be a divisor -of the wordsize. -For example: if the wordsize is 2 bytes, objects of the sizes 1, -2, 4, 6,... are allowed. -The address of such an object is the lowest address of all bytes it contains. -For objects smaller than the wordsize, the -address must be a multiple of the object size. -For all other objects the address must be a multiple of the -wordsize. -For example, if an instruction transfers a 4-byte object to memory at -location \fIm\fP and the wordsize is 2, -\fIm\fP must be a multiple of 2 and the bytes at -locations \fIm\fP, \fIm\fP\|+\|1,\fIm\fP\|+\|2 and -\fIm\fP\|+\|3 are overwritten. -.PP -The size of almost all objects in EM -is an integral number of words. -Only two operations are allowed on -objects whose size is a divisor of the wordsize: -push it onto the stack and pop it from the stack. -The addressing of these objects in memory is always indirect. -If such a small object is pushed onto the stack -it is assumed to be a small integer and stored -in the least significant part of a word. -The rest of the word is cleared to zero, -although -EM provides a way to sign-extend a small integer. -Popping a small object from the stack removes a word -from the stack, stores the least significant byte(s) -of this word in memory and discards the rest of the word. -.PP -The format of pointers into both address spaces is explicitly undefined. -The size of a pointer, however, is fixed for a member of EM, so that -the compiler writer knows how much storage to allocate for a pointer. -.PP -A minor problem is raised by the undefined pointer format. -Some languages, notably Pascal, require a special, -otherwise illegal, pointer value to represent the nil pointer. -The current Pascal-VU compiler uses the -integer value 0 as nil pointer. -This value is also used by many C programs as a normally impossible address. -A better solution would be to have a special -instruction loading an illegal pointer value, -but it is hard to imagine an implementation -for which the current solution is inadequate, -especially because the first word in the EM data space -is special and probably not the target of any pointer. -.PP -The next two chapters describe the EM memory -in more detail. -One describes the instruction address space, -the other the data address space. -.PP -A design goal of EM has been to allow -its implementation on a wide range of existing machines, -as well as allowing a new one to be built in hardware. -To this extent we have tried to minimize the demands -of EM on the memory structure of the target machine. -Therefore, apart from the logical partitioning, -EM memory is divided into 'fragments'. -A fragment consists of consecutive machine -words and has a base address and a size. -Pointer arithmetic is only defined within a fragment. -The only exception to this rule is comparison with the null -pointer. -All fragments must be word aligned. diff --git a/doc/em/mkdispatch.c b/doc/em/mkdispatch.c deleted file mode 100644 index 613b582c6..000000000 --- a/doc/em/mkdispatch.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - * - */ - -#include "ip_spec.h" -#include -#include "em_spec.h" -#include "em_flag.h" - -#ifndef NORCSID -static char rcs_id[] = "$Id$" ; -#endif - -/* This program reads the human readable interpreter specification - and produces a efficient machine representation that can be - translated by a C-compiler. -*/ - -#define NOTAB 600 /* The max no of interpreter specs */ -#define ESCAP1 256 -#define ESCAP2 257 - -struct opform intable[NOTAB] ; -struct opform *lastform = intable-1 ; - -int nerror = 0 ; -int atend = 0 ; -int line = 1 ; - -extern char em_mnem[][4] ; -char esca1[] = "escape1" ; -char esca2[] = "escape2" ; -#define ename(no) ((no)==ESCAP1?esca1:(no)==ESCAP2?esca2:em_mnem[(no)]) - -extern char em_flag[] ; - -main(argc,argv) char **argv ; { - if ( argc>1 ) { - if ( freopen(argv[1],"r",stdin)==NULL) { - fatal("Cannot open %s",argv[1]) ; - } - } - if ( argc>2 ) { - if ( freopen(argv[2],"w",stdout)==NULL) { - fatal("Cannot create %s",argv[2]) ; - } - } - if ( argc>3 ) { - fatal("%s [ file [ file ] ]",argv[0]) ; - } - atend=0 ; - readin(); - atend=1 ; - checkall(); - if ( nerror==0 ) { - writeout(); - } - exit(nerror) ; -} - -readin() { - register struct opform *nextform ; - char *ident(); - char *firstid ; - - for ( nextform=intable ; - !feof(stdin) && nextform<&intable[NOTAB] ; ) { - firstid=ident() ; - if ( *firstid=='\n' || feof(stdin) ) continue ; - lastform=nextform ; - nextform->i_opcode = getmnem(firstid) ; - nextform->i_flag = decflag(ident()) ; - switch ( nextform->i_flag&OPTYPE ) { - case OPMINI: - case OPSHORT: - nextform->i_num = atoi(ident()) ; - break ; - } - nextform->i_low = atoi(ident()) ; - if ( *ident()!='\n' ) { - int c ; - error("End of line expected"); - while ( (c=readchar())!='\n' && c!=EOF ) ; - } - nextform++ ; - } - if ( !feof(stdin) ) fatal("Internal table too small") ; -} - -char *ident() { - /* skip spaces and tabs, anything up to space,tab or eof is - a identifier. - Anything from # to end-of-line is an end-of-line. - End-of-line is an identifier all by itself. - */ - - static char array[200] ; - register int c ; - register char *cc ; - - do { - c=readchar() ; - } while ( c==' ' || c=='\t' ) ; - for ( cc=array ; cc<&array[(sizeof array) - 1] ; cc++ ) { - if ( c=='#' ) { - do { - c=readchar(); - } while ( c!='\n' && c!=EOF ) ; - } - *cc = c ; - if ( c=='\n' && cc==array ) break ; - c=readchar() ; - if ( c=='\n' ) { - pushback(c) ; - break ; - } - if ( c==' ' || c=='\t' || c==EOF ) break ; - } - *++cc=0 ; - return array ; -} - -int getmnem(str) char *str ; { - char (*ptr)[4] ; - - for ( ptr = em_mnem ; *ptr<= &em_mnem[sp_lmnem-sp_fmnem][0] ; ptr++ ) { - if ( strcmp(*ptr,str)==0 ) return (ptr-em_mnem) ; - } - error("Illegal mnemonic") ; - return 0 ; -} - -error(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - if ( !atend ) fprintf(stderr,"line %d: ",line) ; - fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; - fprintf(stderr,"\n"); - nerror++ ; -} - -mess(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - if ( !atend ) fprintf(stderr,"line %d: ",line) ; - fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; - fprintf(stderr,"\n"); -} - -fatal(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { - error(str,a1,a2,a3,a4,a5,a6) ; - exit(1) ; -} - -#define ILLGL -1 - -check(val) int val ; { - if ( val!=ILLGL ) error("Illegal flag combination") ; -} - -int decflag(str) char *str ; { - int type ; - int escape ; - int range ; - int wordm ; - int notzero ; - - type=escape=range=wordm=notzero= ILLGL ; - while ( *str ) switch ( *str++ ) { - case 'm' : - check(type) ; type=OPMINI ; break ; - case 's' : - check(type) ; type=OPSHORT ; break ; - case '-' : - check(type) ; type=OPNO ; break ; - case '1' : - check(type) ; type=OP8 ; break ; - case '2' : - check(type) ; type=OP16 ; break ; - case '4' : - check(type) ; type=OP32 ; break ; - case '8' : - check(type) ; type=OP64 ; break ; - case 'u' : - check(type) ; type=OP16U ; break ; - case 'e' : - check(escape) ; escape=0 ; break ; - case 'N' : - check(range) ; range= 2 ; break ; - case 'P' : - check(range) ; range= 1 ; break ; - case 'w' : - check(wordm) ; wordm=0 ; break ; - case 'o' : - check(notzero) ; notzero=0 ; break ; - default : - error("Unknown flag") ; - } - if ( type==ILLGL ) error("Type must be specified") ; - switch ( type ) { - case OP64 : - case OP32 : - if ( escape!=ILLGL ) error("Conflicting escapes") ; - escape=ILLGL ; - case OP16 : - case OP16U : - case OP8 : - case OPSHORT : - case OPNO : - if ( notzero!=ILLGL ) mess("Improbable OPNZ") ; - if ( type==OPNO && range!=ILLGL ) { - mess("No operand in range") ; - } - } - if ( escape!=ILLGL ) type|=OPESC ; - if ( wordm!=ILLGL ) type|=OPWORD ; - switch ( range) { - case ILLGL : type|=OP_BOTH ; - if ( type==OPMINI || type==OPSHORT ) - error("Minies and shorties must have P or N") ; - break ; - case 1 : type|=OP_POS ; break ; - case 2 : type|=OP_NEG ; break ; - } - if ( notzero!=ILLGL ) type|=OPNZ ; - return type ; -} - -/* ----------- checking --------------*/ - -int ecodes[256],codes[256],lcodes[256] ; -char eflags[256], flags[256], lflags[256] ; -int elows[256], lows[256], llows[256]; - -#define NMNEM (sp_lmnem-sp_fmnem+1) -#define MUST 1 -#define MAY 2 -#define FORB 3 - -char negc[NMNEM], zc[NMNEM], posc[NMNEM], lnegc[NMNEM], lposc[NMNEM] ; - -checkall() { - register i,flag ; - register struct opform *next ; - int opc,low ; - - for ( i=0 ; ii_flag&0377 ; - opc = next->i_opcode&0377 ; - low = next->i_low&0377 ; - chkc(flag,low,opc,low) ; - switch(flag&OPTYPE) { - case OPNO : zc[opc]++ ; break ; - case OPMINI : - case OPSHORT : - for ( i=1 ; i<((next->i_num)&0377) ; i++ ) { - chkc(flag,low+i,opc,low) ; - } - if ( !(em_flag[opc]&PAR_G) && - (flag&OPRANGE)==OP_BOTH) { - mess("Mini's and shorties should have P or N"); - } - break ; - case OP8 : - error("OP8 is removed") ; - break ; - case OP16 : - if ( flag&OP_NEG ) - negc[opc]++ ; - else if ( flag&OP_POS ) - posc[opc]++ ; - break ; - case OP32 : - if ( flag&OP_NEG ) - lnegc[opc]++ ; - else if ( flag&OP_POS ) - lposc[opc]++ ; - break ; - case OP16U : - break ; - default : - error("Illegal type") ; - break ; - } - } - atend=1 ; - for ( i=0 ; i<256 ; i++ ) if ( codes[i]== -1 ) { - mess("interpreter opcode %d not used",i) ; - } - for ( opc=0 ; opc1 ) mess("More then one OPNO for %s",ename(emc)) ; - if ( posc[emc]>1 ) mess("More then one OP16(pos) for %s",ename(emc)) ; - if ( negc[emc]>1 ) mess("More then one OP16(neg) for %s",ename(emc)) ; - if ( lposc[emc]>1 ) mess("More then one OP32(pos) for %s",ename(emc)) ; - if ( lnegc[emc]>1 ) mess("More then one OP32(neg) for %s",ename(emc)) ; - switch(zf) { - case MUST: - if ( zc[emc]==0 ) mess("No OPNO for %s",ename(emc)) ; - break ; - case FORB: - if ( zc[emc]==1 ) mess("Forbidden OPNO for %s",ename(emc)) ; - break ; - } - switch(pf) { - case MUST: - if ( posc[emc]==0 ) mess("No OP16(pos) for %s",ename(emc)) ; - break ; - case FORB: - if ( posc[emc]==1 ) - mess("Forbidden OP16(pos) for %s",ename(emc)) ; - break ; - } - switch(nf) { - case MUST: - if ( negc[emc]==0 ) mess("No OP16(neg) for %s",ename(emc)) ; - break ; - case FORB: - if ( negc[emc]==1 ) - mess("Forbidden OP16(neg) for %s",ename(emc)) ; - break ; - } -} - -static int pushchar ; -static int pushf ; - -int readchar() { - int c ; - - if ( pushf ) { - pushf=0 ; - c = pushchar ; - } else { - if ( feof(stdin) ) return EOF ; - c=getc(stdin) ; - } - if ( c=='\n' ) line++ ; - return c ; -} - -pushback(c) { - if ( pushf ) { - fatal("Double pushback") ; - } - pushf++ ; - pushchar=c ; - if ( c=='\n' ) line-- ; -} - -writeout() { - register int i; - - printf("DISPATCH1"); - for (i = 0; i < 256;) { - if (!(i % 8)) printf("\n%d", i); - printf("\t%s", ename(codes[i])); - if (i < 254) { - prx(flags[i],lows[i],i); - } - i++; - } - - printf("\nDISPATCH2"); - for (i = 0; i < 256;) { - if (ecodes[i] != -1) { - if (!(i % 8)) printf("\n%d", i); - printf("\t%s", ename(ecodes[i])); - prx(eflags[i],elows[i],i); - } - else break; - i++; - } - - printf("\nDISPATCH3"); - i = 0; - while (lcodes[i] != -1) { - if (!(i % 8)) printf("\n%d", i); - printf("\t%s", ename(lcodes[i])); - prx(lflags[i],llows[i],i); - i++; - } - while (i++ % 8) putchar('\t'); - putchar('\n'); -} - -prx(flg,low,opc) - register int flg; -{ - int arg = opc - low; - - putchar('.'); - switch(flg&OPTYPE) { - case OPNO: - putchar('z'); - break; - case OP16U: - putchar('u'); - break; - case OP16: - if (flg&OP_POS) putchar('p'); - else if (flg&OP_NEG) putchar('n'); - else putchar('l'); - if (flg&OPWORD) putchar('w'); - break; - case OP32: - if (flg&OP_POS) putchar('P'); - else if (flg&OP_NEG) putchar('N'); - else putchar('L'); - if (flg&OPWORD) putchar('w'); - break; - case OPSHORT: - if (flg & OPWORD) putchar('w'); - else putchar('s'); - /* fall through */ - case OPMINI: - if (flg & OPNZ) arg++; - if (flg & OP_NEG) arg = -arg - 1; - printf("%d",arg); - if((flg&OPTYPE) == OPMINI && (flg & OPWORD)) putchar('W'); - } -} diff --git a/doc/em/print b/doc/em/print deleted file mode 100755 index a9b9b0335..000000000 --- a/doc/em/print +++ /dev/null @@ -1,5 +0,0 @@ - -case $# in -1) make "$1".t ; ntlp "$1".t^lpr ;; -*) echo $0 heeft een argument nodig ;; -esac diff --git a/doc/em/proto.make b/doc/em/proto.make deleted file mode 100644 index f75fa2122..000000000 --- a/doc/em/proto.make +++ /dev/null @@ -1,49 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -TBL=tbl - -TARGET_DIR = $(TARGET_HOME)/doc -SRC_DIR = $(SRC_HOME)/doc/em - -head: $(TARGET_DIR)/em.doc - -FILES = $(SRC_DIR)/macr.nr \ - $(SRC_DIR)/title.nr \ - $(SRC_DIR)/intro.nr \ - $(SRC_DIR)/mem.nr \ - $(SRC_DIR)/ispace.nr \ - $(SRC_DIR)/dspace.nr \ - $(SRC_DIR)/mapping.nr \ - $(SRC_DIR)/types.nr \ - $(SRC_DIR)/descr.nr \ - $(SRC_DIR)/env.nr \ - $(SRC_DIR)/traps.nr \ - $(SRC_DIR)/mach.nr \ - $(SRC_DIR)/assem.nr \ - $(SRC_DIR)/em.i \ - $(SRC_DIR)/app.codes.nr \ - $(SRC_DIR)/app.exam.nr \ - $(SRC_DIR)/cont.nr - -IOP=$(SRC_HOME)/etc/ip_spec.t# # to construct itables from - -$(TARGET_DIR)/em.doc: $(FILES) itables dispatdummy - $(TBL) $(FILES) | soelim > $(TARGET_DIR)/em.doc - -itables: $(IOP) $(SRC_DIR)/ip.awk - awk -f $(SRC_DIR)/ip.awk $(IOP) | sed 's/-/\\-/g' | $(TBL) >itables - -dispatdummy: $(IOP) mkdispatch - mkdispatch < $(IOP) > dispatdummy - sed -f $(SRC_DIR)/dispat1.sed < dispatdummy | $(TBL) > dispat1 - sed -f $(SRC_DIR)/dispat2.sed < dispatdummy | $(TBL) > dispat2 - sed -f $(SRC_DIR)/dispat3.sed < dispatdummy | $(TBL) > dispat3 - -mkdispatch: $(SRC_DIR)/mkdispatch.c - $(UCC) -c -I$(UTIL_HOME)/h $(SRC_DIR)/mkdispatch.c - $(UCC) $(ULDOPTIONS) -o mkdispatch mkdispatch.$(USUF) $(UTIL_HOME)/lib.bin/em_data.$(ULIBSUF) - -clean: - rm -f itables dispatdummy dispat? *.o mkdispatch Out diff --git a/doc/em/show b/doc/em/show deleted file mode 100755 index f60e8e463..000000000 --- a/doc/em/show +++ /dev/null @@ -1,4 +0,0 @@ -case $# in -1) make $1.t ; ntout $1.t ;; -*) echo $0 heeft een argument nodig ;; -esac diff --git a/doc/em/title.nr b/doc/em/title.nr deleted file mode 100644 index a31051041..000000000 --- a/doc/em/title.nr +++ /dev/null @@ -1,34 +0,0 @@ -.LP -\& -.sp 10 -.ce 4 -DESCRIPTION OF A MACHINE -ARCHITECTURE FOR USE WITH -BLOCK STRUCTURED LANGUAGES -.sp 6 -.ce 4 -Andrew S. Tanenbaum -Hans van Staveren -Ed G. Keizer -Johan W. Stevenson\v'-0.5m'*\v'0.5m' -.sp 2 -.ce -August 1983 -.sp 2 -.ce -Informatica Rapport IR-81 -.sp 13 -Abstract -.sp 2 -.ti +5 -EM is a family of intermediate languages -designed for producing portable compilers. -A program called \fBfront end\fP -translates source programs to EM. -Another program, \fBback end\fP, -translates EM to the assembly language of the target machine. -Alternatively, the EM program can be assembled to a highly -efficient binary format for interpretation. -This document describes the EM languages in detail. -.sp 4 -\v'-0.5m'*\v'0.5m' Present affiliation: NV Philips, Eindhoven diff --git a/doc/em/traps.nr b/doc/em/traps.nr deleted file mode 100644 index 13c8db6f8..000000000 --- a/doc/em/traps.nr +++ /dev/null @@ -1,169 +0,0 @@ -.bp -.P1 "TRAPS AND INTERRUPTS" -.PP -EM provides a means for the user program to catch all traps -generated by the program itself, the hardware, or external conditions. -This mechanism uses five instructions: LIM, SIM, SIG, TRP and RTT. -This section of the manual may be omitted on the first reading since it -presupposes knowledge of the EM instruction set. -.PP -The action taken when a trap occurs is determined by the value -of an internal EM trap register. -This register contains a pointer to a procedure. -Initially the pointer used is zero and all traps halt the -program with, hopefully, a useful message to the outside world. -The SIG instruction can be used to alter the trap register, -it pops a procedure pointer from the -stack into the trap register. -When a trap occurs after storing a nonzero value in the trap -register, the procedure pointed to by the trap register -is called with the trap number -as the only parameter (see below). -SIG returns the previous value of the trap register on the -stack. -Two consecutive SIGs are a no-op. -When a trap occurs, the trap register is reset to its initial -condition, to prevent recursive traps from hanging the machine up, -e.g. stack overflow in the stack overflow handling procedure. -.PP -The runtime systems for some languages need to ignore some EM -traps. -EM offers a feature called the ignore mask. -It contains one bit for each of the lowest 16 trap numbers. -The bits are numbered 0 to 15, with the least significant bit -having number 0. -If a certain bit is 1 the corresponding trap never -occurs and processing simply continues. -The actions performed by the offending instruction are -described by the Pascal program in appendix A. -.br -If the bit is 0, traps are not ignored. -The instructions LIM and SIM allow copying and replacement of -the ignore mask.~ -.PP -The TRP instruction generates a trap, the trap number being found on the -stack. -This is, among other things, -useful for library procedures and runtime systems. -It can also be used by a low level trap procedure to pass the trap to a -higher level one (see example below). -.PP -The RTT instruction returns from the trap procedure and continues after the -trap. -In the list below all traps marked with an asterisk ('*') are -considered to be fatal and it is explicitly undefined what happens when -restarting after the trap. -.PP -The way a trap procedure is called is completely compatible -with normal calling conventions. The only way a trap procedure -differs from normal procedures is the return. It has to use RTT instead -of RET. This is necessary because the complete runtime status is saved on the -stack before calling the procedure and all this status has to be reloaded. -Error numbers are in the range 0 to 252. -The trap numbers are divided into three categories: -.IP "\0\00\-\063" 12 -EM machine errors, e.g. illegal instruction. -.RS -.IP "\00\-15" 8 -maskable -.IP "16\-63" 8 -not maskable -.RE -.IP "\064\-127" 12 -Reserved for use by compilers, run time systems, etc. -.IP "128\-252" 12 -Available for user programs. -.LP -EM machine errors are numbered as follows: -.TS -tab(@); -n l l. -0@EARRAY@Array bound error -1@ERANGE@Range bound error -2@ESET@Set bound error -3@EIOVFL@Integer overflow -4@EFOVFL@Floating overflow -5@EFUNFL@Floating underflow -6@EIDIVZ@Divide by 0 -7@EFDIVZ@Divide by 0.0 -8@EIUND@Undefined integer -9@EFUND@Undefined float -10@ECONV@Conversion error -16*@ESTACK@Stack overflow -17@EHEAP@Heap overflow -18*@EILLINS@Illegal instruction -19*@EODDZ@Illegal size argument -20*@ECASE@Case error -21*@EMEMFLT@Addressing non existent memory -22*@EBADPTR@Bad pointer used -23*@EBADPC@Program counter out of range -24@EBADLAE@Bad argument of LAE -25@EBADMON@Bad monitor call -26@EBADLIN@Argument of LIN too high -27@EBADGTO@GTO descriptor error -.TE -.PP -As an example, -suppose a subprocedure has to be written to do a numeric -calculation. -When an overflow occurs the computation has to be stopped and -the higher level procedure must be resumed. -This can be programmed as follows using the mechanism described above: -.LP -.KS -.nf -.ta 1n 24n - mes 2,2,2 ; set sizes -ersave - bss 2,0,0 ; Room to save previous value of trap procedure -msave - bss 2,0,0 ; Room to save previous value of trap mask - - pro $calcule,0 ; entry point - lxl 0 ; fill in non-local goto descriptor with LB - ste jmpbuf+4 - lor 1 ; and SP - ste jmpbuf+2 - lim ; get current ignore mask - ste msave ; save it - lim - loc 16 ; bit for EFOVFL - ior 2 ; set in mask - sim ; ignore EFOVFL from now on - lpi $catch ; load procedure identifier - sig ; catch wil get all traps now - ste ersave ; save previous trap procedure identifier - ; perform calculation now, possibly generating overflow -1 ; label jumped to by catch procedure - loe ersave ; get old trap procedure - sig ; refer all following trap to old procedure - asp 2 ; remove result of sig - loe msave ; restore previous mask - sim ; done now - ; load result of calculation - ret 2 ; return result -jmpbuf - con *1,0,0 - end -.KE -.KS -.LP -Example of catch procedure -.LP -.nf -.ta 1n 24n - pro $catch,0 ; Local procedure that must catch the overflow trap - lol 2 ; Load trap number - loc 4 ; check for overflow - bne *1 ; if other trap, call higher trap procedure - gto jmpbuf ; return to procedure calcule -1 ; other trap has occurred - loe ersave ; previous trap procedure - sig ; other procedure will get the traps now - asp 2 ; remove the result of sig - lol 2 ; stack trap number - trp ; call other trap procedure - rtt ; if other procedure returns, do the same - end -.KE -.fi diff --git a/doc/em/types.nr b/doc/em/types.nr deleted file mode 100644 index 12f6a5b13..000000000 --- a/doc/em/types.nr +++ /dev/null @@ -1,142 +0,0 @@ -.bp -.P1 "TYPE REPRESENTATIONS" -.PP -The representations used for typed objects are not precisely -specified by EM. -Sometimes we only specify that a typed object occupies a -certain amount of space and state no further restrictions. -If one wants to have a different representation of the value of -an object on the stack one has to use a convert instruction -in most cases. -We do specify some relations between the representations of -types. -This allows some intermixed use of operators for different types -on the same object(s). -For example, the instruction ZER pushes signed and -unsigned integers with the value zero and empty sets. -ZER has as only argument the size of the object. -.QQ -The representation of floating point numbers is a good example, -it allows widely varying implementations. -The only ways to create floating point numbers are via -initialization and via conversions from integer numbers. -Only by using conversions to integers and comparing -two floating point numbers with each other, can these numbers -be converted to human readable output. -Implementations may use base 10, base 2 or any other -base for exponents, and have freedom in choosing the range of -exponent and mantissa. -.QQ -Other types are more precisely described. -In the following paragraphs a description will be given of the -restrictions imposed on the representation of the types used. -A number \fBn\fP used in these paragraphs indicates the size of -the object in \fIbits\fP. -.P2 "Unsigned integers" -.PP -The range of unsigned integers is 0.. -.Ex 2 "\fBn\fP" -1. -A binary representation is assumed. -The order of the bits within an object is knowingly left -unspecified. -Discussing bit order within each 8-bit byte is academic, -so the only real freedom of this specification lies in the byte -order. -We really do not care whether an implementation of a 4-byte -integer has its bytes in a particular order of significance. -This of course means that some sequences of instructions have -unpredictable effects. -For example: -.DS -LOC 258 ; STL 0 ; LAL 0 ; LOI 1 ( wordsize >=2 ) -.DE -The value on the stack after executing this sequence -can be anything, -but will most likely be 1 or 2. -.QQ -Conversion between unsigned integers of different sizes have to -be done with explicit convert instructions. -One cannot simply pad an unsigned integer with zero's at either end -and expect a correct result. -.QQ -We assume existence of at least single word unsigned arithmetic -in any implementation. -.P2 "Signed Integers" -.PP -The range of signed integers is -.Ex \-2 "\fBn\fP\-1" ~.. -.Ex 2 "\fBn\fP\-1" \-1, -in other words the range of signed integers of \fBn\fP bits -using two's complement arithmetic. -The representation is the same as for unsigned integers except the range -.Ex 2 "\fBn\fP\-1" ~.. -.Ex 2 "\fBn\fP" \-1 -is mapped on the -range -.Ex \-2 "\fBn\fP\-1" ~..~\-1. -In other words, the most significant bit is used as sign bit. -The convert instructions between signed and unsigned integers -of the same size can be used to catch errors. -.QQ -The value -.Ex \-2 "\fBn\fP\-1" -is used for undefined -signed integers. -EM implementations should trap when this value is used in an -operation on signed integers. -The instruction mask, accessed with SIM and LIM \-~see chapter 9~\-, -can be used to disable such traps. -.QQ -We assume existence of at least single word signed arithmetic -in any implementation. -.P2 "Floating point values" -.PP -Floating point values must have a signed mantissa and a signed -exponent. -Although no base is specified, base 2 is the normal choice, -because the FEF instruction pushes the exponent in base 2. -.QQ -The implementation of floating point arithmetic is optional. -The compilers currently in use have runtime parameters for the -size of the floating point values they should use. -Common choices are 4 and/or 8 bytes. -.P2 Pointers -.PP -EM has two kinds of pointers: for instruction and for data -space. -Each kind can only be used for its own space, conversion between -these two subtypes is impossible. -We assume that pointers have a range from 0 upwards. -Any implementation may have holes in the pointer range between -fragments. -One can of course not expect to be able to address two megabyte -of memory using a 2-byte pointer. -Normally, a 2-byte pointer allows up to 65536 bytes of -addressable memory. -.QQ -Pointer representation has one restriction. -The pointer with the same representation as the integer zero of -the same size should be invalid. -Some languages and/or runtime systems represent the nil -pointer as zero. -.P2 "Bit sets" -.PP -All bit sets of size \fBn\fP are subsets of the set -{~i~|~i>=0,~i<\fBn\fP~}. -A bit set contains a bit for each element showing its -presence or absence. -Bit sets are subdivided into words. -The word with the lowest EM address governs the subset -{~i~|~i>=0,~i<\fBm\fP~}, where \fBm\fP is the number of bits in -a word. -The next higher words each govern the next higher \fBm\fP set elements. -The relation between a set with size of -a word and an unsigned integer word is that -the value of the unsigned integer is the summation of the -2\v'-0.5m'i\v'0.5m' where i is in the set. -.QQ -Example: a 2-word bit set (wordsize 2) containing the -elements 1, 6, 8, 15, 18, 21, 27 and 28 is composed of two -integers, e.g. at addresses 40 and 42. -The word at 40 contains the value 33090 (or~\-32446), -the word at 42 contains the value 6180. diff --git a/doc/i80.doc b/doc/i80.doc deleted file mode 100644 index e2019b0ad..000000000 --- a/doc/i80.doc +++ /dev/null @@ -1,816 +0,0 @@ -. \" $Id$ -.RP -.ND April 1985 -.TL -Back end table for the Intel 8080 micro-processor -.AU -Gerard Buskermolen -.AB -A back end is a part of the Amsterdam Compiler Kit (ACK). -It translates EM, a family of intermediate languages, into the -assembly language of some target machine, here the Intel 8080 and Intel 8085 microprocessors. -.AE -.NH1 -INTRODUCTION -.PP -To simplify the task of producing portable (cross) compilers and -interpreters, the Vrije Universiteit designed an integrated collection -of programs, the Amsterdam Compiler Kit (ACK). -It is based on the old UNCOL-idea ([4]) which attempts to solve the problem -of making a compiler for each of -.B N -languages on -.B M -different machines without having to write -.B N\ *\ M -programs. -.sp 1 -The UNCOL approach is to write -.B N -"front ends", each of which translates one source language into -a common intermediate language, UNCOL (UNiversal Computer Oriented -Language), and -.B M -"back ends", each of which translates programs in UNCOL into a -specific machine language. -Under these conditions, only -.B N\ +\ M -programs should be written to provide all -.B N -languages on all -.B M -machines, instead of -.B N\ *\ M -programs. -.sp 1 -The intermediate language for the Amsterdam Compiler Kit is the machine -language for a simple stack machine called EM (Encoding Machine). -So a back end for the Intel 8080 micro translates EM code into -8080 assembly language. -.sp 1 -The back end is a single program that is driven by a machine dependent -driving table. -This driving table, or back end table, -defines the mapping from EM code to the machine's assembly language. -.NH 1 -THE 8080 MICRO PROCESSOR -.PP -This back end table can be used without modification for the Intel 8085 -processor. -Except for two additional instructions, the 8085 instruction set -is identical and fully compatible with the 8080 instruction set. -So everywhere in this document '8080' can be read as '8080 and 8085'. -.NH 2 -Registers -.PP -The 8080 processor has an 8 bit accumulator, -six general purpose 8-bit registers, -a 16 bit programcounter and a 16 bit stackpointer. -Assembler programs can refer the accumulator by A and -the general purpose registers by B, C, D, E, H and L. (*) -.FS -* In this document 8080 registers and mnemonics are referenced by capitals, for the sake of clarity. -Nevertheless the assembler expects small letters. -.FE -Several instructions address registers in groups of two, thus creating -16 bit registers: -.DS -Registers referenced: Symbolic reference: - B and C B - D and E D - H and L H -.DE -The first named register, contains the high order byte -(H and L stand for High and Low). -.br -The instruction determines how the processor interprets the reference. -For example, ADD B is an 8 bit operation, adding the contents of -register B to accumulator A. By contrast PUSH B is a 16 bit operation -pushing B and C onto the stack. -.sp 1 -There are no index registers. -.sp 1 -.NH 2 -Flip-flops -.PP -The 8080 microprocessor provides five flip-flops used as condition flags -(S, Z, P, C, AC) and one interrupt enable flip-flop IE. -.br -The sign bit S is set (cleared) by certain instructions when the most significant -bit of the result of an operation equals one (zero). -The zero bit Z is set (cleared) by certain operations when the -8-bit result of an operation equals (does not equal) zero. -The parity bit P is set (cleared) if the 8-bit result of an -operation includes an even (odd) number of ones. -C is the normal carry bit. -AC is an auxiliary carry that indicates whether there has been a carry -out of bit 3 of the accumulator. -This auxiliary carry is used only by the DAA instruction, which -adjusts the 8-bit value in the accumulator to form two 4-bit -binary coded decimal digits. -Needless to say this instruction is not used in the back-end. -.sp 1 -The interrupt enable flip-flop IE is set and cleared under -program control using the instructions EI (Enable Interrupt) and -DI (Disable Interrupt). -It is automatically cleared when the CPU is reset and when -an interrupt occurs, disabling further interrupts until IE = 1 again. -.NH 2 -Addressing modes -.NH 3 -Implied addressing -.PP -The addressing mode of some instructions is implied by the instruction itself. -For example, the RAL (rotate accumulator left) instruction deals only with -the accumulator, and PCHL loads the programcounter with the contents -of register-pair HL. -.NH 3 -Register addressing -.PP -With each instruction using register addressing, -only one register is specified (except for the MOV instruction), -although in many of them the accumulator is implied as -second operand. -Examples are CMP E, which compares register E with the accumulator, -and DCR B, which decrements register B. -A few instructions deal with 16 bit register-pairs: -examples are DCX B, which decrements register-pair BC and the -PUSH and POP instructions. -.NH 3 -Register indirect addressing -.PP -Each instruction that may refer to an 8 bit register, may -refer also to a memory location. In this case the letter M -(for Memory) has to be used instead of a register. -It indicates the memory location pointed to by H and L, -so ADD M adds the contents of the memory location specified -by H and L to the contents of the accumulator. -.br -The register-pairs BC and DE can also be used for indirect addressing, -but only to load or store the accumulator. -For example, STAX B stores the contents of the accumulator -into the memory location addressed by register-pair BC. -.NH 3 -Immediate addressing -.PP -The immediate value can be an 8 bit value, as in ADI 10 which -adds 10 to the accumulator, or a 16 bit value, as in -LXI H,1000, which loads 1000 in the register-pair HL. -.NH 3 -Direct addressing -.PP -Jump instructions include a 16 bit address as part of the instruction. -.br -The instruction SHLD 1234 stores the contents of register -pair HL on memory locations 1234 and 1235. -The high order byte is stored at the highest address. -.NH 1 -THE 8080 BACK END TABLE -.PP -The back end table is designed as described in [5]. -For an overall design of a back end table I refer to this document. -.br -This section deals with problems encountered in writing the -8080 back-end table. -Some remarks are made about particular parts -of the table that might not seem clear at first sight. -.NH 2 -Constant definitions -.PP -Word size (EM_WSIZE) and pointer size (EM_PSIZE) are both -defined as two bytes. -The hole between AB and LB (EM_BSIZE) is four bytes: only the -return address and the local base are saved. -.NH 2 -Registers and their properties -.PP -All properties have the default size of two bytes, because one-byte -registers also cover two bytes when put on the real stack. -.sp 1 -The next considerations led to the choice of register-pair BC -as local base. -Though saving the local base in memory would leave one more register-pair -available as scratch register, it would slow down instructions -as 'lol' and 'stl' too much. -So a register-pair should be sacrificed as local base. -Because a back-end without a free register-pair HL is completely -broken-winged, the only reasonable choices are BC and DE. -Though the choice between them might seem arbitrary at first sight, -there is a difference between register-pairs BC and DE: -the instruction XCHG exchanges the contents of register-pairs DE and -HL. -When DE and HL are both heavily used on the fake-stack, this instruction -is very useful. -Since it won't be useful too often to exchange HL with the local base -and since an instruction exchanging BC and HL does not exist, BC is -chosen as local base. -.sp 1 -Many of the register properties are never mentioned in the -PATTERNS part of the table. -They are only needed to define the INSTRUCTIONS correctly. -.sp 1 -The properties really used in the PATTERNS part are: -.IP areg: 24 -the accumulator only -.IP reg: -any of the registers A, D, E, H or L. Of course the registers B and C which are -used as local base don't possess this property. -When there is a single register on the fake-stack, its value -is always considered non-negative. -.IP dereg: -register-pair DE only -.IP hlreg: -register-pair HL only -.IP hl_or_de: -register-pairs HL and DE both have this property -.IP local base: -used only once (i.e. in the EM-instruction 'str 0') -.PP -.sp 1 -The stackpointer SP and the processor status word PSW have to be -defined explicitly because they are needed in some instructions -(i.e. SP in LXI, DCX and INX and PSW in PUSH and POP). -.br -It doesn't matter that the processor status word is not just register A -but includes the condition flags. -.NH 2 -Tokens -.PP -The tokens 'm' and 'const1' are used in the INSTRUCTIONS- and MOVES parts only. -They will never be on the fake-stack. -.sp 1 -The token 'label' reflects addresses known at assembly time. -It is used to take full profit of the instructions LHLD -(Load HL Direct) and SHLD (Store HL Direct). -.sp 1 -Compared with many other back-end tables, there are only a small number of -different tokens (four). -Reasons are the limited addressing modes of the 8080 microprocessor, -no index registers etc. -For example to translate the EM-instruction -.DS -lol 10 -.DE -the next 8080 instructions are generated: -.DS L -LXI H,10 /* load registers pair HL with value 10 */ -DAD B /* add local base (BC) to HL */ -MOV E,M /* load E with byte pointed to by HL */ -INX H /* increment HL */ -MOV D,M /* load D with next byte */ -.DE -Of course, instead of emitting code immediately, it could be postponed -by placing something like a {LOCAL,10} on the fake-stack, but some day the above -mentioned code will have to be generated, so a LOCAL-token is -hardly useful. -See also the comment on the load instructions. -.NH 2 -Sets -.PP -Only 'src1or2' is used in the PATTERNS. -.NH 2 -Instructions -.PP -Each instruction indicates whether or not the condition flags -are affected, but this information will never have any influence -because there are no tests in the PATTERNS part of the table. -.sp 1 -For each instruction a cost vector indicates the number of bytes -the instruction occupies and the number of time periods it takes -to execute the instruction. -The length of a time period depends on the clock frequency -and may range from 480 nanoseconds to 2 microseconds on a -8080 system and from 320 nanoseconds to 2 microseconds -on a 8085 system. -.sp 1 -In the TOKENS-part the cost of token 'm' is defined as (0,3). -In fact it usually takes 3 extra time periods when this register indirect mode -is used instead of register mode, but since the costs are not completely -orthogonal this results in small deficiencies for the DCR, INR and MOV -instructions. -Although it is not particularly useful these deficiencies are -corrected in the INSTRUCTIONS part, by treating the register indirect -mode separately. -.sp 1 -The costs of the conditional call and return instructions really -depend on whether or not the call resp. return is actually made. -However, this is not important to the behaviour of the back end. -.sp 1 -Instructions not used in this table have been commented out. -Of course many of them are used in the library routines. -.NH 2 -Moves -.PP -This section is supposed to be straight-forward. -.NH 2 -Tests -.PP -The TESTS section is only included to refrain -.B cgg -from complaining. -.NH 2 -Stacking rules -.PP -When, for example, the token {const2,10} has to be stacked while -no free register-pair is available, the next code is generated: -.DS -PUSH H -LXI H,10 -XTHL -.DE -The last instruction exchanges the contents of HL with the value -on top of the stack, giving HL its original value again. -.NH 2 -Coercions -.PP -The coercion to unstack register A, is somewhat tricky, -but unfortunately just popping PSW leaves the high-order byte in -the accumulator. -.sp 1 -The cheapest way to coerce HL to DE (or DE to HL) is by using -the XCHG instruction, but it is not possible to explain -.B cgg -this instruction in fact exchanges the contents of these -register-pairs. -Before the coercion is carried out other appearances of DE and HL -on the fake-stack will be moved to the real stack, because in -the INSTRUCTION-part is told that XCHG destroys the contents -of both DE and HL. -The coercion transposing one register-pair to another one by -emitting two MOV-instructions, will be used only if -one of the register-pairs is the local base. -.NH 2 -Patterns -.PP -As a general habit I have allocated (uses ...) all registers -that should be free to generate the code, although it is not -always necessary. -For example in the code rule -.DS -pat loe -uses hlreg -gen lhld {label,$1} yields hl -.DE -the 'uses'-clause could have been omitted because -.B cgg -knows that LHLD destroys register-pair HL. -.sp 1 -Since there is only one register with property 'hlreg', -there is no difference between 'uses hlreg' (allocate a -register with property 'hlreg') and 'kills hlreg' (remove -all registers with property 'hlreg' from the fake-stack). -The same applies for the property 'dereg'. -.br -Consequently 'kills' is rarely used in this back-end table. -.NH 3 -Group 1: Load instructions -.PP -When a local variable must be squared, there will probably be EM-code like: -.DS -lol 10 -lol 10 -mli 2 -.DE -When the code for the first 'lol 10' has been executed, DE contains the -wanted value. -To refrain -.B cgg -from emitting the code for 'lol 10' again, an extra -pattern is included in the table for cases like this. -The same applies for two consecutive 'loe'-s or 'lil'-s. -.sp 1 -A bit tricky is 'lof'. -It expects either DE or HL on the fake-stack, moves {const2,$1} -into the other one, and eventually adds them. -The 'kills' part is necessary here because if DE was on the fake-stack, -.B cgg -doesn't see that the contents of DE is destroyed by the code -(in fact 'kills dereg' would have been sufficient: because of the -DAD instruction -.B cgg -knows that HL is destroyed). -.sp 1 -By lookahead, -.B cgg -can make a clever choice between the first and -second code rule of 'loi 4'. -The same applies for several other instructions. -.NH 3 -Group 2: Store instructions -.PP -A similar idea as with the two consecutive identical load instructions -in Group 1, applies for a store instruction followed by a corresponding load instruction. -.NH 3 -Groups 3 and 4: Signed and unsigned integer arithmetic -.PP -Since the 8080 instruction set doesn't provide multiply and -divide instructions, special routines are made to accomplish these tasks. -.sp 1 -Instead of providing four slightly differing routines for 16 bit signed or -unsigned division, yielding the quotient or the remainder, -the routines are merged. -This saves space and assembly time -when several variants are used in a particular program, -at the cost of a little speed. -When the routine is called, bit 7 of register A indicates whether -the operands should be considered as signed or as unsigned integers, -and bit 0 of register A indicates whether the quotient or the -remainder has to be delivered. -.br -The same applies for 32 bit division. -.sp 1 -The routine doing the 16 bit unsigned multiplication could -have been used for 16 bit signed multiplication too. -Nevertheless a special 16 bit signed multiplication routine is -provided, because this one will usually be much faster. -.NH 3 -Group 5: Floating point arithmetic -.PP -Floating point is not implemented. -Whenever an EM-instruction involving floating points is offered -to the code-generator, it calls the corresponding -library routine with the proper parameters. -Each floating point library routine calls 'eunimpl', -trapping with trap number 63. -Some of the Pascal and C library routines output floating point -EM-instructions, so code has to be generated for them. -Of course this does not imply the code will ever be executed. -.NH 3 -Group 12: Compare instructions -.PP -The code for 'cmu 2', with its 4 labels, is terrible. -But it is the best I could find. -.NH 3 -Group 9: Logical instructions -.PP -I have tried to merge both variants of the instructions 'and 2', 'ior 2' and 'xor 2', -as in -.DS -pat and $1==2 -with hl_or_de hl_or_de -uses reusing %1, reusing %2, hl_or_de, areg -gen mov a,%1.2 - ana %2.2 - mov %a.2,a - mov a,%1.1 - ana %2.1 - mov %a.1,a yields %a -.DE -but the current version of -.B cgg -doesn't approve this. -In any case -.B cgg -chooses either DE or HL to store the result, using lookahead. -.NH 3 -Group 14: Procedure call instructions -.PP -There is an 8 bytes function return area, called '.fra'. -If only 2 bytes have to be returned, register-pair DE is used. -.NH 1 -LIBRARY ROUTINES -.PP -Most of the library routines start with saving the return address -and the local base, so that the parameters are on the top of the stack -and the registers B and C are available as scratch registers. -Since register-pair HL is needed to accomplish these tasks, -and also to restore everything just before the routine returns, -it is not possible to transfer data between the routines and the -surrounding world through register H or L. -Only registers A, D and E can be used for this. -.sp -When a routine returns 2 bytes, they are usually returned in -registers-pair DE. -When it returns more than 2 bytes they are pushed onto the stack. -.br -It would have been possible to let the 32 bit arithmetic routines -return 2 bytes in DE and the remaining 2 bytes on the stack -(this often would have saved some space and execution time), -but I don't consider that as well-structured programming. -.NH 1 -TRAPS -.PP -Whenever a trap, for example trying to divide by zero, -occurs in a program that originally was written in C or Pascal, -a special trap handler is called. -This trap handler wants to write an appropriate error message on the -monitor. -It tries to read the message from a file (e.g. etc/pc_rt_errors in the -EM home directory for Pascal programs), but since the 8080 back-end -doesn't know about files, we are in trouble. -This problem is solved, as far as possible, by including the 'open'-monitor call in the mon-routine. -It returns with file descriptor -1. -The trap handler reacts by generating another trap, with the original -trap number. -But this time, instead of calling the C- or Pascal trap handler again, -the next message is printed on the monitor: -.DS L - trap number - line of file - -where is the trap number (decimal) - is the line number (decimal) - is the filename of the original program -.DE -.sp 1 -Trap numbers are subdivided as follows: -.IP 1-27: 20 -EM-machine error, as described in [3] -.IP 63: -an unimplemented EM-instruction is used -.IP 64-127: -generated by compilers, runtime systems, etc. -.IP 128-252: -generated by user programs -.NH 1 -IMPLEMENTATION -.PP -It will not be possible to run the entire Amsterdam Compiler Kit on a -8080-based computer system. -One has to write a program on another -system, a system where the compiler kit runs on. -This program may be a mixture of high-level languages, such as -C or Pascal, EM and 8080 assembly code. -The program should be compiled using the compiler kit, producing 8080 machine code. -This code should come available to the 8080 machine -for example by downloading or -by storing it in ROM (Read Only Memory). -.sp 1 -Depending on the characteristics of the particular 8080 based system, some -adaptations have to be made: -.IP 1) 10 -In 'head_em': the base address, which is the address where the first -8080 instruction will be stored, and the initial value of the -stackpointer are set to 0x1000 and 0x8000 respectively. -.br -Other systems require other values. -.IP 2) -In 'head_em': before calling "__m_a_i_n", the environment -pointer, argument vector and argument count will have to be pushed -onto the stack. -Since this back-end is tested on a system without any knowledge -of these things, dummies are pushed now. -.IP 3) -In 'tail_em': proper routines "putchar" and "getchar" should -be provided. -They should write resp. read a character on/from the monitor. -Maybe some conversions will have to be made. -.IP 4) -In 'head_em': an application program returns control to the monitor by -jumping to address 0xFB52. -This may have to be changed for different systems. -.IP 5) -In 'tail_em': the current version of the 8080 back-end has very limited I/O -capabilities, because it was tested on a system that -had no knowledge of files. -So the implementation of the EM-instruction 'mon' is very simple; -it can only do the following things: -.RS -.IP Monitor\ call\ 1: 40 -exit -.IP Monitor\ call\ 3: -read, always reads from the monitor. -.br -echos the read character. -.br -ignores file descriptor. -.IP Monitor\ call\ 4: -write, always writes on the monitor. -.br -ignores file descriptor. -.IP Monitor\ call\ 5: -open file, returns file descriptor -1. -.br -(compare chapter about TRAPS) -.IP Monitor\ call\ 6: -close file, returns error code = 0. -.IP Monitor\ call\ 54: -io-control, returns error code = 0. -.RE -.sp -If the system should do file-handling the routine ".mon" -should be extended thoroughly. -.NH 1 -INTEL 8080 VERSUS ZILOG Z80 AND INTEL 8086 -.NH 2 -Introduction -.PP -At about the same time I developed the back end -for the Intel 8080 and Intel 8085, -Frans van Haarlem did the same job for the Zilog z80 microprocessor. -Since the z80 processor is an extension of the 8080, -any machine code offered to a 8080 processor can be offered -to a z80 too. -The assembly languages are quite different however. -.br -During the developments of the back ends we have used -two micro-computers, both equipped with a z80 microprocessor. -Of course the output of the 8080 back end is assembled by an -8080 assembler. This should assure I have never used any of -the features that are potentially available in the z80 processor, -but are not part of a true 8080 processor. -.sp 1 -As a final job, I have -investigated the differences between the 8080 and z80 processors -and their influence on the back ends. -I have tried to measure this influence by examining the length of -the generated code. -I have also involved the 8086 micro-processor in this measurements. -.NH 2 -Differences between the 8080 and z80 processors -.PP -Except for some features that are less important concerning back ends, -there are two points where the z80 improves upon the 8080: -.IP First, 18 -the z80 has two additional index registers, IX and IY. -They are used as in -.DS - LD B,(IX+10) -.DE -The offset, here 10, should fit in one byte. -.IP Second, -the z80 has several additional instructions. -The most important ones are: -.RS -.IP 1) 8 -The 8080 can only load or store register-pair HL direct -(using LHLD or SHLD). -The z80 can handle BC, DE and SP too. -.IP 2) -Instructions are included to ease block movements. -.IP 3) -There is a 16 bit subtract instruction. -.IP 4) -While the 8080 can only rotate the accumulator, the z80 -can rotate and shift each 8 bit register. -.IP 5) -Special routines are included to jump to near locations, saving 1 byte. -.RE -.NH 2 -Consequences for the 8080 and z80 back end -.PP -The most striking difference between the 8080 and z80 back ends -is the choice of the local base. -The writer of the z80 back end chose index register IY as local base, -because this results in the cheapest coding of EM-instructions -like 'lol' and 'stl'. -The z80 instructions that load local 10, for example -.DS -LD E,(IY+10) -LD D,(IY+11) -.DE -occupy 6 bytes and take 38 time periods to execute. -The five corresponding 8080 instructions loading a local -occupy 7 bytes and take 41 time periods. -Although the profit of the z80 might be not world-shocking, -it should be noted that as a side effect it may save some -pushing and popping since register pair HL is not used. -.sp 1 -The choice of IY as local base has its drawbacks too. -The root of the problem is that it is not possible to add -IY to HL. -For the EM-instruction -.DS -lal 20 -.DE -the z80 back end generates code like -.DS -LD BC,20 -PUSH IY -POP HL -ADD HL,BC -.DE -leaving the wanted address in HL. -.br -This annoying push and pop instructions are also needed in some -other instructions, for instance in 'lol' when the offset -doesn't fit in one byte. -.sp 1 -Beside the choice of the local base, I think there is no -fundamental difference between the 8080 and z80 back ends, -except of course that the z80 back end has register pair BC -and, less important, index register IX available as scratch registers. -.sp 1 -Most of the PATTERNS in the 8080 and z80 tables are more or less -a direct translation of each other. -.NH 2 -What did I do? -.PP -To get an idea of the quality of the code generated by -the 8080, z80 and 8086 back ends I have gathered -some C programs and some Pascal programs. -Then I produced 8080, z80 and 8086 code for them. -Investigating the assembler listing I found the -lengths of the different parts of the generated code. -I have checked two areas: -.IP 1) 8 -the entire text part -.IP 2) -the text part without any library routine, so only the plain user program -.LP -I have to admit that neither one of them is really honest. -When the entire text part is checked, the result is disturbed -because not always the same library routines are loaded. -And when only the user program itself is considered, the result is -disturbed too. -For example the 8086 has a multiply instruction, -so the EM-instruction 'mli 2' is translated in the main program, -but the 8080 and z80 call a library routine that is not counted. -Also the 8080 uses library routines at some places where the -z80 does not. -.sp 1 -But nevertheless I think the measurements will give an idea -about the code produced by the three back ends. -.NH 2 -The results -.PP -The table below should be read as follows. -For all programs I have computed the ratio of the code-lengths -of the 8080, z80 and 8086. -The averages of all Pascal/C programs are listed in the table, -standardized to '100' for the 8080. -So the listed '107' indicates that the lengths -of the text parts of the z80 programs that originally were Pascal programs, -averaged 7 percent larger than in the corresponding 8080 programs. -.DS C - -------------------------------------------------- -| | 8080 | z80 | 8086 | - -------------------------------------------------- -| C, text part | 100 | 103 | 65 | -| Pascal, text part | 100 | 107 | 55 | -| C, user program | 100 | 110 | 71 | -| Pascal, user program | 100 | 118 | 67 | - -------------------------------------------------- -.DE -.TE -The most striking thing in this table is that the z80 back end appears -to produce larger code than the 8080 back end. -The reason is that the current z80 back end table is -not very sophisticated yet. -For instance it doesn't look for any EM-pattern longer than one. -So the table shows that the preparations in the 8080 back end table -to produce faster code (like recognizing special EM-patterns -and permitting one byte registers on the fake-stack) -was not just for fun, but really improved the generated code -significantly. -.sp 1 -The table shows that the 8080 table is relatively better -when only the plain user program is considered instead of the entire text part. -This is not very surprising since the 8080 back end sometimes -uses library routines where the z80 and especially the 8086 don't. -.sp 1 -The difference between the 8080 and z80 on the one hand and the 8086 -on the other is very big. -But of course it was not equal game: -the 8086 is a 16 bit processor that is much more advanced than the -8080 or z80 and the 8086 back end is known to produce -very good code. -.bp -.B REFERENCES -.sp 2 -.IP [1] 10 -8080/8085 Assembly Language Programming Manual, -.br -Intel Corporation (1977,1978) -.IP [2] -Andrew S. Tanenbaum, Hans van Staveren, E.G. Keizer and Johan W. Stevenson, -.br -A practical tool kit for making portable compilers, -.br -Informatica report 74, Vrije Universiteit, Amsterdam, 1983. -.sp -An overview on the Amsterdam Compiler Kit. -.IP [3] -Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren, H. -.br -Description of an experimental machine architecture for use with block -structured languages, -.br -Informatica report 81, Vrije Universiteit, Amsterdam, 1983. -.sp -The defining document for EM. -.IP [4] -Steel, T.B., Jr. -.br -UNCOL: The myth and the Fact. in Ann. Rev. Auto. Prog. -.br -Goodman, R. (ed.), vol. 2, (1960), p325-344. -.sp -An introduction to the UNCOL idea by its originator. -.IP [5] -van Staveren, Hans -.br -The table driven code generator from the Amsterdam Compiler Kit -(Second Revised Edition), -.br -Vrije Universiteit, Amsterdam. -.sp -The defining document for writing a back end table. -.IP [6] -Voors, Jan -.br -A back end for the Zilog z8000 micro, -.br -Vrije Universiteit, Amsterdam. -.sp -A document like this one, but for the z8000. diff --git a/doc/install.doc b/doc/install.doc deleted file mode 100644 index fd9dae84a..000000000 --- a/doc/install.doc +++ /dev/null @@ -1,1237 +0,0 @@ -.\" $Id$ -.if n .nr PD 1v -.if n .nr LL 78m -.if n .ll 78m -.TL -Amsterdam Compiler Kit Installation Guide -.AU -Ed Keizer -(revised for 3rd, 4th and 5th distribution by Ceriel Jacobs) -.AI -Vakgroep Informatica -Vrije Universiteit -Amsterdam -.NH -Introduction -.PP -This document -describes the process of installing the Amsterdam Compiler Kit (ACK). -It depends on the combination of hard- and software how -hard it will be to install the Kit. -This description is intended for a Sun-3 or SPARC workstation. -Installation on VAXen running Berkeley -.UX -or Ultrix, -Sun-2 systems and most System V -.UX -systems should be easy. -As of this distribution, installation on PDP-11's or other -systems with a small address space is no longer supported. -See section 8 for installation on other systems. -.NH -The ACK installation process -.PP -In the ACK installation process, three directory trees are used: -.IP "-" -the ACK source tree. This is the tree on the ACK distribution medium. -For the rest of this document, we will refer to this directory -as $SRC_HOME; -.IP "-" -a configuration tree. This tree is built by the installation process and -is used to do compilations in. Its structure reflects that of the source tree, -but this tree will mostly contain Makefiles and relocatable objects. -For the rest of this document, we will refer to this directory -as $CONFIG; -.IP "-" -an ACK users tree. This tree is also built by the installation process. -For the rest of this document, we will refer to this directory -as $TARGET_HOME; -.LP -After installation, -the directories in $TARGET_HOME contain the following information: -.if n .sp 1 -.if n .nr PD 0 -.IP "bin" 14 -the few utilities that knot things together. -See the section about "Commands". -.IP "lib" -root of a tree containing almost all libraries used by -commands. -Files specific to a certain machine are collected in one subtree -per machine. E.g. "lib/pdp", "lib/z8000". -The names used here are the same names as used for subtrees -of "$SRC_HOME/mach". -.IP "lib/descr" -command descriptor files used by the program ack. -.IP "lib/LLgen" -files used by the LL(1) parser generator. -.IP "lib/flex" -files used by the lexical analyzer generator Flex. -.IP "lib/m2" -definition modules for Modula-2. -.IP "lib.bin" -root of a tree containing almost all binaries used by -commands. -All programs specific to a certain machine are collected in one subtree -per machine. E.g. "lib.bin/pdp", "lib.bin/z8000". -The names used here are the same names as used for subtrees -of "$SRC_HOME/mach". -.IP "lib.bin/ego" -files used by the global optimizer. -.IP "lib.bin/lint" -binaries for the lint passes and lint libraries. -.IP "lib.bin/ceg" -files used by the code-expander-generator. -.IP "etc" -contains the file "ip_spec.t" needed for EM interpreters and EM documentation. -.IP "config" -contains two include files: -.TS -l l. -em_path.h path names used by \fIack\fP, intended for all utilities -local.h various definitions for local versions -.TE -These include files are specific for the current machine, so they -are in a separate directory. -.IP "include/_tail_cc" -.br -include files needed by modules -in the C library from lang/cem/libcc. -.IP "include/tail_ac" -.br -include files for ANSI C. -.IP "include/occam" -include files for occam. -.IP "include/_tail_mon" -.br -more or less system independent include files needed by modules -in the library lang/cem/libcc/mon. -.IP "h" -the #include files for: -.TS -l l. -arch.h definition of the ACK archive format -as_spec.h used by EM assembler and interpreters -bc_io.h used by the Basic run-time system -bc_string.h used by the Basic run-time system -cg_pattern.h used by the backend program "cg" and its bootstrap -cgg_cg.h used by the backend program "ncg" and its bootstrap -em_abs.h contains trap numbers and address for lin and fil -em_ego.h definition of names for some global optimizer - messages -em_flag.h definition of bits in array em_flag in - $TARGET_HOME/lib.bin/em_data.a. Describes parameters - effect on flow of instructions -em_mes.h definition of names for mes pseudo numbers -em_mnem.h instruction => compact mapping -em_pseu.h pseudo instruction => compact mapping -em_ptyp.h useful for compact code reading/writing, - defines classes of parameters -em_reg.h definition of mnemonics indicating register type -em_spec.h definition of constants used in compact code -ip_spec.h used by programs that read e.out files -m2_traps.h used by the Modula-2 run-time system -ocm_chan.h used by the occam run-time system -ocm_parco.h used by the occam run-time system -ocm_proc.h used by the occam run-time system -out.h defines the ACK a.out format -pc_err.h definitions of error numbers in Pascal -pc_file.h macro's used in file handling in Pascal -pc_math.h used by the Pascal runtime system -ranlib.h defines symbol table format for archives -stb.h defines debugger symbol table types -.TE -.IP "modules" -root of a tree containing modules for compiler writers. -.IP "modules/man" -manual pages for all modules. -.IP "modules/lib" -contains module objects. -.IP "modules/h" -include files for some of the modules. -.IP "modules/pkg" -include files for some of the modules. -.IP "doc" -this directory contains the unformatted documents for the Kit. -A list of the available documents can be found in the last section. -These documents must be processed by [nt]roff. -.IP "man" -man files for various utilities. -.if n .nr PD 1v -.LP -When installing ACK on several types of machines with a shared file system, -it may be useful to know that the "doc", "etc", "h", -"include", "lib" and "man" sub-directories do not depend on this -particular installation. They do not contain binaries or path-dependent -information. These directories can therefore be shared between the -ACK installations. This can be accomplished by creating the tree and -suitable symbolic links before starting the installation process. -.LP -For instance, let us say there is a file-system that is accessible from -the different machines as "/usr/share/local", and the ACK binary tree -must be installed in "/usr/local/ack". In this case, proceed as follows: -.IP \- -create a directory "/usr/share/local/ack", with subdirectories -"doc", "etc", "h", "include", "lib" and "man". -.IP \- -create a directory "/usr/local/ack" and -then create symbolic links "doc" to "/usr/share/local/ack/doc", etc. -.LP -If this is done on all machines on which ACK will be installed, the -machine-independent part only has to be installed once, preferably -on the fastest processor (it takes a long time to install all libraries). -.LP -The directories in the source tree contain the following information: -.if n .sp 1 -.if n .nr PD 0 -.IP "bin" 14 -source of some shell-scripts. -.IP "lib" -mostly description files for the "ack" program. -.IP "etc" -the main description of EM sits here. -Files (e.g. em_table) describing -the opcodes and pseudos in use, -the operands allowed, effect in stack etc. etc. -.IP "mach" -just there to group the directories with all sources for each machine. -The section about "Machines" of this manual indicates which subdirectories -are used for which systems. -.br -These directories have subdirectories named: -.in +3n -.TS -l l. -cg the backend (*.m => *.s) -ncg the new backend (*.m => *.s) -as the assembler (*.s => *.o) or - assembler/linker (*.s + libraries => a.out) -cv conversion programs for a.out files -dl down-load programs -top the target optimizer -int source for an interpreter - -libbc to create Basic run-time system and libraries -libcc to create C run-time system and libraries -libcc.ansi to create ANSI C run-time system and libraries -libpc to create Pascal run-time system and libraries -libf77 to create Fortran run-time system and libraries -libm2 to create Modula-2 run-time system and libraries -liboc to create occam run-time system and libraries -libem EM runtime system, only depending on CPU type -libend library defining end, edata, etext -libfp to create floating point library -libdb to create debugger support library -libsys system-dependent EM library -libce fast cc-compatible C compiler library support - -ce code expander (fast back-end) - -test various tests -.TE -.in -3n -Actually, some of these directories will only appear in the configuration tree. -.br -The directory proto contains files used by most machines, -like machine-independent sources and Makefiles. -.in +3n -.TS -l l. -mach/proto/cg current backend sources -mach/proto/ncg new backend sources -mach/proto/as assembler sources -mach/proto/top target optimizer sources -mach/proto/fp floating point package sources -mach/proto/libg makefiles for compiling libraries -mach/proto/grind machine-independent debugger support -.TE -.IP "emtest" -contains prototype of em test set. -.IP "lang" -just there to group the directories for all front-ends. -.IP "lang/pc" -the Pascal front-end. -.IP "lang/pc/libpc" -.br -source of Pascal run-time system (in EM or C). -.IP "lang/pc/test" -some test programs written in Pascal. -.IP "lang/pc/comp" -the Pascal compiler proper. -.IP "lang/cem" -the C front-end. -.IP "lang/cem/libcc" -.br -directories with sources of C runtime system, libraries (in EM or C). -.IP "lang/cem/libcc/gen" -.br -sources for routines in chapter III of -.UX -programmers manual, -excluding stdio. -.IP "lang/cem/libcc/stdio" -.br -stdio sources. -.IP "lang/cem/libcc/math" -.br -sources for mathematical routines, normally available with the -\fB-lm\fP option to \fIcc\fP. -.IP "lang/cem/libcc/mon" -.br -sources for routines in chapter II, mostly written in EM. -.IP "lang/cem/cemcom" -.br -the compiler proper. -.IP "lang/cem/cemcom.ansi" -.br -the ANSI C compiler proper. -.IP "lang/cem/cpp.ansi" -.br -the ANSI C preprocessor. -.IP "lang/cem/libcc.ansi" -.br -the ANSI C library sources. -.IP "lang/cem/ctest" -.br -the C test set. -.IP "lang/cem/ctest/cterr" -.br -programs developed for pinpointing previous errors. -.IP "lang/cem/ctest/ct*" -.br -the test programs. -.IP "lang/cem/lint" -a C program checker. -.IP "lang/cem/lint/lpass1" -.br -the first pass of lint. -.IP "lang/cem/lint/lpass1.ansi" -.br -the first pass of lint, this time for ANSI C. -.IP "lang/cem/lint/lpass2" -.br -the second pass of lint, shared between ANSI C and "old-fashioned" C. -.IP "lang/cem/lint/llib" -.br -programs for producing lint libraries. -.IP "lang/basic" -the Basic front-end. -.IP "lang/basic/src" -.br -the compiler proper. -.IP "lang/basic/lib" -.br -the Basic run-time library source. -.IP "lang/basic/test" -.br -various Basic programs. -.IP "lang/occam" -the occam front-end. -.IP "lang/occam/comp" -.br -the compiler proper. -.IP "lang/occam/lib" -.br -source of occam run-time system (in EM or C). -.IP "lang/occam/test" -.br -some occam programs. -.IP "lang/m2" -the Modula-2 front-end. -.IP "lang/m2/comp" -the compiler proper. -.IP "lang/m2/libm2" -source of Modula-2 run-time system (in EM, C and Modula-2). -.IP "lang/m2/m2mm" -the Modula-2 makefile generator. -.IP "lang/m2/test" -some Modula-2 example programs. -.IP "lang/fortran" -the Fortran front-end (translates Fortran into C). This compiler is not -a part of ACK, but is included because it adds another language. -The Fortran system carries the following copyright notice: -.IP "" -.nf -/************************************************************** -Copyright 1990, 1991 by AT&T Bell Laboratories and Bellcore. - -Permission to use, copy, modify, and distribute this software -and its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the names of AT&T Bell Laboratories or -Bellcore or any of their entities not be used in advertising or -publicity pertaining to distribution of the software without -specific, written prior permission. - -AT&T and Bellcore disclaim all warranties with regard to this -software, including all implied warranties of merchantability -and fitness. In no event shall AT&T or Bellcore be liable for -any special, indirect or consequential damages or any damages -whatsoever resulting from loss of use, data or profits, whether -in an action of contract, negligence or other tortious action, -arising out of or in connection with the use or performance of -this software. -**************************************************************/ -.fi -.IP "lang/fortran/comp" -.br -the compiler proper. -.IP "lang/fortran/lib" -.br -source of Fortran runtime system and libraries. -.IP "fast" -contains sub-directories for installing the fast ACK compatible compilers. -.IP "fast/driver" -.br -contains the sources of the fast ACK compatible compiler drivers. -.IP "fcc" -contains the fast cc-compatible C compiler for SUN-3 and VAX. -.IP "util" -contains directories with sources for various utilities. -.IP "util/ack" -the program used for translation with the Kit. -.IP "util/opt" -the EM peephole optimizer (*.k => *.m). -.IP "util/ego" -the global optimizer. -.IP "util/topgen" -the target optimizer generator. -.IP "util/misc" -decode (*.[km] => *.e) + encode (*.e => *.k). -.IP "util/data" -the C-code for $TARGET_HOME/lib.bin/em_data.a. -These sources are created by the Makefile in `etc`. -.IP "util/ass" -the EM assembler (*.[km] + libraries => e.out). -.IP "util/arch" -the archivers to be used for all EM utilities. -.IP "util/cgg" -a program needed for compiling backends. -.IP "util/ncgg" -a program needed for compiling the newest backends. -.IP "util/cpp" -the C preprocessor. -.IP "util/shf" -various shell files. -.IP "util/LLgen" -the extended LL(1) parser generator. -.IP "util/amisc" -contains some programs handling ACK a.out format, such as anm, asize. -.IP "util/cmisc" -contains some programs to help in resolving name conflicts, and -a dependency generator for makefiles. -.IP "util/led" -the ACK link-editor, reading ACK relocatable a.out format, and writing -ACK a.out format. -.IP "util/int" -an EM interpreter, written in C. Very useful for checking out software, -but slow. -.IP "util/ceg" -code expander generator. -.IP "util/grind" -a symbolic debugger. -.IP "util/byacc" -this is Berkeley yacc, in the public domain. -.IP "util/flex" -this is a replacement for lex. It carries the following copyright notice: -.IP "" -.nf -Copyright (c) 1990 The Regents of the University of California. -All rights reserved. - -This code is derived from software contributed to Berkeley by -Vern Paxson. - -The United States Government has rights in this work pursuant -to contract no. DE-AC03-76SF00098 between the United States -Department of Energy and the University of California. - -Redistribution and use in source and binary forms are permitted -provided that: (1) source distributions retain this entire -copyright notice and comment, and (2) distributions including -binaries display the following acknowledgement: ``This product -includes software developed by the University of California, -Berkeley and its contributors'' in the documentation or other -materials provided with the distribution and in all advertising -materials mentioning features or use of this software. Neither the -name of the University nor the names of its contributors may be -used to endorse or promote products derived from this software -without specific prior written permission. - -THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. -.fi -.ne 4 -.if n .nr PD 1v -.LP -All path names mentioned in the text of this document are relative to -$SRC_HOME, unless they start with '/' or one of $SRC_HOME, -$TARGET_HOME or $CONFIG. -.NH -Restoring the ACK tree -.PP -The process of installing the Amsterdam Compiler Kit is quite simple. -The first step is to restore the Amsterdam Compiler Kit -distribution tree structure. -Proceed as follows -.IP " \-" 10 -Create a directory, for example /usr/share/local/src/ack, on a device -with at least 15 Megabytes left. This directory will be $SRC_HOME. -.IP " \-" -Change to that directory (cd ...). -.IP " \-" -Extract all files from the distribution medium, for instance -magtape: -\fBtar x\fP. -.IP " \-" -Keep a copy of the original distribution to be able to repeat the process -of installation in case of disasters. -This copy is also useful as a reference point for diff-listings. -.NH -Adapting ACK to the local system -.PP -Before compiling the sources in the Kit some installation dependent -actions have to be taken. -Most of these are performed by an interactive shell script in the file -.I $SRC_HOME/first/first. -Calling this script should be done -from another directory, for instance an empty directory which will later -become $CONFIG. -.LP -The actions of the -.I first -script are: -.if n .sp 1 -.if n .nr PD 0 -.IP \- -Asking for the path names of the ACK source directory ($SRC_HOME), the -configuration directory ($CONFIG), and the ACK users directory ($TARGET_HOME). -About 5M are needed for the configuration tree. The disk space needed -for the ACK users tree depends on which front-ends and back-ends are to be -installed. -For instance, on our SPARC systems -we have installed all languages and 6 back-ends, including the -system-independent part. This amounts to about 16M. -On our SUN-3 systems, we have installed all front-ends and 5 back-ends, -but only the machine-dependent part. The machine-independent directories are -symbolic links to the SPARC ACK users tree. -We also have the fast ACK compilers -installed on the SUN-3's. -The total amount of disk-space used is less than 8M. -.IP \- -Asking for what type of system the binary tree must be produced for -and creating the shell script "ack_sys" in the Kit's bin directory. -Several utilities make use of "ack_sys" to determine the type of -system. -The current choice is between: -.TS -c c c -l l l. -answer system type default machine -vax_bsd4_1a VAX11 + BSD4.1a vax4 -vax_bsd4_2 VAX11 + BSD4.2 vax4 -vax_sysV_2 VAX11 + System V.2 vax4 -i386 Intel 80386 system + Xenix System V i386 -sun3 Sun-3 Motorola 68020 workstation sun3 -sun2 Sun-2 Motorola 68010 workstation sun2 -m68_sysV_0 68000 + Uniplus System V.0 mantra -m68020 Motorola 68020 VME131 + System V/68 R2V2.1 m68020 -sparc Sun-4 or SPARC workstation running SunOs 4 sparc -sparc_solaris Sun-4 or SPARC workstation running Solaris 2 sparc_solaris -ANY Neither of the above ??? -.TE -For some of these, the installation procedure has not been tested, as -we don't have them. -For others, the installation procedure has only been tested with earlier -distributions, as we don't have those systems anymore. -However, the sun3 and sparc systems are known to behave reasonably. -The sparc_solaris system has only been tested with the GNU C compiler, -because we don't have the SUN C compiler (it is unbundled in Solaris 2). -The Sun systems should run SunOs Release 3.0 or newer. -The i386 choice may also be used for Intel 80386 or 80486 systems -running -.UX -System V Release 4. These systems are also able to run Xenix System V -binaries. -If the target system is not on this list, choose one that comes close. -If none of them come close, use the "ANY" choice. -For ANY, any name can be used, -but the Kit will not be able to compile programs for the target system. -See the section about "compilation -on a different machine". -.IP \- -Setting the default machine for which code is -produced to the local type of system according to the table above. -This in done in the file "$TARGET_HOME/config/local.h". -See also section 9.1. -.IP \- -Asking for things that don't have to be installed. -.IP \- -Producing a shell script called "INSTALL" that will take care of the -ACK installation process. -.NH -Compiling the Kit -.PP -The next step in the installation process is to run the "INSTALL" -shell-script. When using a Bourne-shell, type: -.DS -sh INSTALL > INSTALL.out 2>&1 & -.DE -When using a C-shell, type: -.DS -sh INSTALL >& INSTALL.out & -.DE -This shell-script performs the following steps: -.if n .sp 1 -.if n .nr PD 0 -.IP \- -Produce a configuration tree ($CONFIG), reflecting the structure of the -source tree. -.IP \- -Produce Makefiles in $CONFIG. -As mentioned before, compilations -will be done in the configuration tree, not in the source tree. -Most configuration directories will have Makefiles -used to compile and install the programs in that -directory. -All programs needed for compilation and/or cross compilation -with the Kit are installed in $TARGET_HOME by these Makefiles. -These Makefiles are produced from corresponding files called -"proto.make" in the source tree. In fact, the "proto.make" files -are almost complete Makefiles, except for some macro definitions that -are collected by the \fIfirst\fP script. -The Makefiles adhere to a standard which is described in the -section 9. -.IP \- -Copy "Action" files to the configuration tree and editing them to -reflect the choices concerning the parts of ACK that have to be -installed. "Action" files are described below. -.IP \- -Copy part of the source tree to the ACK users tree (include files, -manual pages, documentation, et cetera). -.IP \- -Calling the "TakeAction" script. -All these Makefiles do not have to be called separately. -We wrote a shell script calling the make's needed to install -the whole Kit. -This script consists of the file $SRC_HOME/TakeAction -and a few files called Action in some configuration directories. -The Action files describe in a very simple form which actions -have to be performed in which directories. -The default action is to start "make install && make clean". -The output of each make is diverted to a file called "Out" -in the same directory as the make was started in. -If the make was successful (return code 0) the Out file is removed -and the script TakeAction produces a small message indicating -that it succeeded in fulfilling its goal. -If the make was not successful (any other return code) the Out file -is left alone for further examination and the script TakeAction -produces a small message indicating that it failed. -.br -For some programs the scripts already know they can't be -installed on the local type of system. -In that case they produce a message "Sorry, ....." and -happily proceed with further installation commands. -.if n .sp 1 -.if n .nr PD 1v -.LP -Installation of the Kit might take anything from a few -hours to more than a day, depending on the speed of the local machine and -what must be installed. -.LP -If the installation succeeded, the Kit is ready to be used. -Read section 6 and the manuals provided -with the Kit (in the $TARGET_HOME/man directory) on how to use it. -.NH 2 -Problems -.NH 3 -on Unisoft m68000 systems. -.PP -The Unisoft C compiler has a bug which impedes the correct -translation of the peephole optimizer. -For a more detailed description of this phenomenon see -the file "$SRC_HOME/mach/m68k2/Unisoft_bug". -(This observation was made in 1985 or so, so it is probably -no longer true). -.NH 3 -with backends -.PP -The backends for the PDP11, VAX, Motorola 68000 and 68020, -SPARC, Intel 8086, and Intel 80386 -have been heavily used by ourselves and are well tested. -The backends for the other machines are known to run our own -test programs, -but might reveal errors when more heavily used. -.NH 2 -An example output of TakeAction. -.LP -.sp 1 -.nf - System definition -- done - EM definition library -- done - C utilities -- done - Flex lexical analyzer generator -- done - Yacc parser generator -- done - system-call interface module -- done - . - . - . - EM Global optimizer -- done - ACK archiver -- done - Program 'ack' -- done - Bootstrap for backend tables -- done - Bootstrap for newest form of backend tables -- done - . - . - . - C frontend -- done - ANSI-C frontend -- done - ANSI-C preprocessor -- done - ANSI-C header files -- done - Failed for LINT C program checker, see lang/cem/lint/Out - Pascal frontend -- done - Basic frontend -- done - . - . - . - Vax 4-4 assembler -- done - Vax 4-4 backend -- done - Vax target optimizer -- done - ACK a.out to VAX a.out conversion program -- done - Sorry, Vax code expander library can only be made on vax* systems - Vax 4-4 EM library -- done - Vax 4-4 debugger support library -- done - Vax 4-4 etext,edata,end library -- done - Vax 4-4 systemcall interface -- done - . - . - . -.sp 1 -.fi -.LP -The lines starting with "Sorry, " indicate that certain programs cannot -be translated on the local machine. -The lines starting with "Failed for" indicate -that certain programs/libraries were expected to, -but did not compile. -In this example, the installation of LINT failed. -To repeat a certain part of the installation, look in -the Action file, which resides in the root of the configuration tree, -for the directory in which that part is to be found. -If that directory contains an Action file issue the command -"sh $CONFIG/bin/TakeAction", otherwise type "make install". -.NH -Commands -.PP -The following commands are available in the $TARGET_HOME/bin directory after compilation -of the Kit: -.IP "\fIack\fP, \fIacc\fP, \fIabc\fP, \fIapc\fP, \fIocm\fP, \fIm2\fP, \fIf2c\fP and their links" 14 -.br -the names mentioned here can be used to compile Pascal, C, etc... programs. -Most of the links can be used to generate code for a particular -machine. -See also the section about "Machines". -.IP \fIarch\fP -the archiver used for the EM- and universal assembler/loader. -.IP \fIaal\fP -the archiver used for ACK objects. -.IP \fIem\fP -this program selects a interpreter to execute an e.out file. -Interpreters exist for PDP-11 and Motorola 68000 systems. -.IP \fIeminform\fP -the program to unravel the post-mortem information of -the EM interpretator for the PDP-11. -.IP \fILLgen\fP -the LL(1) parser generator. -.IP \fIack_sys\fP -a shell script producing an identification of the target system. -Used by some utilities to determine what is, and what is -not feasible on the target system. -.IP \fImarch\fP -a shell script used while compiling libraries. -.IP "\fIasize\fP, \fIanm\fP, \fIastrip\fP" -.br -do the same as \fIsize\fP, \fInm\fP and \fIstrip\fP, but for ACK object format. -.IP \fImkdep\fP -a dependency generator for makefiles. -.IP "\fIcid\fP, \fIprid\fP, \fIcclash\fP" -.br -some utilities for handling name clashes in C programs. Some -systems have C-compilers with only 7 or 8 characters significant in -identifiers. -.IP \fItabgen\fP -a utility for generating character tables for C-programs. -.IP \fIint\fP -an EM interpreter. This one is written in C, and is very useful for checking -out programs. -.IP \fIgrind\fP -a source level debugger for C, ANSI-C, Modula-2 and Pascal. -.IP "\fIafcc\fP, \fIafm2\fP, \fIafpc\fP" -.br -these are ACK-compatible fast C, Modula-2 and Pascal compilers, -available for M68020, VAX and Intel 80386 systems. They compile very fast, -but produce slow code. -.IP \fIfcc\fP -this is a cc-compatible fast C compiler, available on SUN-3 and VAX -systems. It compiles very fast, but produces slow code. -.LP -We currently make the Kit available to our users by telling -them that they should include the $TARGET_HOME/bin directory in -their PATH shell variable. -The programs will still work when moved to a different -directory or linked to. -Copying should preferably be done with tar, since links are -heavily used. -Renaming of the programs linked to \fIack\fP will not always -produce the desired result. -This program uses its call name as an argument. -Any call name not being \fIcc\fP, \fIacc\fP, \fIabc\fP, \fIpc\fP, \fIf2c\fP, -\fIocm\fP, \fIm2\fP, or \fIapc\fP will be -interpreted as the name of a 'machine description' and the -program will try to find a description file with that name. -The installation process will only touch the utilities in the $TARGET_HOME/bin -directory, not copies of these utilities. -.NH -Machines -.PP -Below is a table with entries for all commands in -the bin directory used to (cross)compile for a particular machine. -The name in the first column gives the name in the bin directory. -The column headed dir indicates which subdirectories of -$TARGET_HOME/lib and/or $TARGET_HOME/lib.bin are needed for compilation. -The column head i/p contains the integer and pointer size used in units of -bytes. -The subdirectories with the same name in mach contain the sources. -A * in the column headed 'fp' indicates that floating point can be used -for that particular machine. A + in that column indicates that floating -point is available under the '-fp' option. In this case, software -floating point emulation is used. -.TS -l l l l l l l. -command system i/p languages fp dir remarks - -pdp PDP/UNIX V7 2/2 C * pdp - Pascal - Basic - occam - Modula-2 - -vax4 VAX/BSD 4.? 4/4 C * vax4 - System V.2 Pascal - Basic - occam - Modula-2 - Fortran - -sparc Sun-4 4/4 C * sparc - Pascal - Basic - occam - Modula-2 - Fortran - -sparc_solaris Sun-4 4/4 C * sparc_solaris - Pascal - Basic - occam - Modula-2 - Fortran - -m68k2 M68000/ 2/4 C + m68k2 - Unisoft Pascal - Basic - occam - Modula-2 - -m68k4 M68000/ 4/4 C + m68k4 - Unisoft Pascal m68k2 - Basic - occam - Modula-2 - Fortran - -pmds M68000/ 2/4 C + pmds Philips Micro - PMDS Pascal m68k2 Devel. System - Basic - occam - Modula-2 - -pmds4 M68000/ 4/4 C + pmds4 Philips Micro - PMDS Pascal m68k2 Devel. System - Basic m68k4 - occam - Modula-2 - Fortran - -mantra M68000/ 4/4 C + mantra - Sys V.0 Pascal m68k2 - Basic m68k4 - occam - Modula-2 - Fortran - -m68020 M68020/ 4/4 C + m68020 - Sys V/68 R2V2.1 Pascal - Basic - occam - Modula-2 - Fortran - -sun3 Sun-3 R4.1 4/4 C + sun3 - Pascal m68020 - Basic - occam - Modula-2 - Fortran - -sun2 Sun-2 R3.0 4/4 C + sun2 - Pascal m68k4 - Basic m68k2 - occam - Modula-2 - Fortran - -i86 IBM PC/IX 2/2 C + i86 IBM PC with PC/IX - Pascal Causes kernel crashes - Basic - occam - Modula-2 - -xenix3 Microsoft 2/2 C + xenix3 IBM AT with Xenix - Xenix V3 Pascal i86 - Basic - occam - Modula-2 - -i386 SCO Xenix 4/4 C + i386 Intel 80386 - System V Pascal Xenix System V - Basic - occam - Modula-2 - Fortran - -minix Minix PC 2/2 C + minix IBM PC running Minix - Pascal i86 - Basic - occam - Modula-2 - -minixST ST Minix 2/4 C + minixST Atari ST running Minix - Pascal m68k2 - Basic - occam - Modula-2 - -z8000 Zilog 8000 2/2 C z8000 Central Data - Pascal CPU board - Basic Assembler/loader - occam - Modula-2 - -em22 EM machine 2/2 C * em22 Needs interpreter - Pascal - Basic - occam - Modula-2 - -em24 EM machine 2/4 C * em24 Needs interpreter - Pascal - Basic - occam - Modula-2 - -em44 EM machine 4/4 C * em44 Needs interpreter - Pascal - Basic - occam - Modula-2 - Fortran - -6500 6502/BBC 2/2 C 6500 Assembler/loader - Pascal - Basic - occam - Modula-2 - -6800 Bare 6800 6800 Assembler only - -6805 Bare 6805 6805 Assembler only - -6809 Bare 6809 6809 Assembler only - -ns Bare NS16032 4/4 C ns - Pascal - Basic - occam - Modula-2 - Fortran - -i80 Hermac/z80 2/2 C i80 - Pascal - Basic - occam - Modula-2 - -z80 Hermac/z80 2/2 C z80 \fIi80\fP is faster - Pascal - Basic - occam - Modula-2 - -s2650 Signetics s2650 Assembler only - -arm Acorn 4/4 C * arm Assembler/loader - Archimedes Pascal - Basic - occam - Modula-2 - Fortran -.TE -.LP -The commands \fBem22\fP, \fBem24\fP and \fBem44\fP -produce e.out files with EM machine code which must be interpreted. -The Kit contains three interpreters: one running under PDP 11/V7 UNIX, -one for the M68000, running under the PMDS system, Sun systems, -the Mantra system, etc, and a portable one, written in C. -The first one can only interpret 2/2 e.out files, -the second takes 2/4 and 4/4 files, -and the last one takes 2/2, 2/4 and 4/4. -The PDP 11 interpreter executes floating point instructions. -.LP -The program \fB$TARGET_HOME/bin/em\fP calls the appropriate -interpreter. -The interpreters are looked for in the em22, em24 and em44 -subdirectories of $TARGET_HOME/lib.bin. -The third interpreter is available as the program \fB$TARGET_HOME/bin/int\fP -in the bin directory. -.NH -Compilation on a different machine. -.PP -The installation mechanism of the Kit is supposed to be portable across -.UX -machines, so -the Kit can be installed and used as a cross-compiler -for the languages it supports on any -.UX -machine. -The presence of most -.UX -utilities is essential for compilation. -A few of the programs certainly needed are: sh, C-compiler, sed, ed, -make, and awk. -.NH 2 -Backend -.PP -The existence of a backend with a system call library -for the target system is essential -for producing executable files for that system. -Rewriting the system call library if the one supplied does -not work on the target system is fairly straightforward. -If no backend exists for the target CPU type, a new backend has to be written -which is a major undertaking. -.NH 2 -Universal assembler/loader, link editor -.PP -For most machines, the description files in $TARGET_HOME/lib/*/descr use our -universal assembler and our link editor. -The load file produced is not directly -usable in any system known to us, -but has to be converted before it can be put to use. -The \fIcv\fP programs convert our a.out format into -executable files. -The \fIdl\fP programs present for some machines unravel -our a.out files and transmit commands to load memory -to a microprocessor over a serial line. -The file $TARGET_HOME/man/man5/ack.out.5 contains a description of the format of -the universal assembler load file. -It might be useful to those who wish or need to write their -own conversion programs. -Also, a module is included to read and write our a.out format. -See $TARGET_HOME/man/man3/object.3. -.NH -Options -.NH 2 -Default machine -.PP -There is one important option in $TARGET_HOME/config/local.h. -The utility \fIack\fP uses a default machine name when called -as \fIacc\fP, \fIcc\fP, \fIabc\fP, \fIapc\fP, \fIpc\fP, \fIocm\fP, -\fIm2\fP, \fIf2c\fP, or \fIack\fP. -The machine name used by default is determined by the -definition of ACKM in $TARGET_HOME/config/local.h. -The Kit is distributed with "sun3" as the default machine, -but the shell script "first" in the directory "first" alters this -to suit the target system. -There is nothing against using the Kit as a cross-compiler -and by default produce code that can't run on the local system. -.NH 2 -Pathnames -.PP -Absolute path names are concentrated in "$TARGET_HOME/config/em_path.h". -Only the utilities \fIack\fP, \fIflex\fP, and \fILLgen\fP use -absolute path names to access files in the Kit. -The tree is distributed with /usr/em as the working -directory. -The definition of EM_DIR in em_path.h should be altered to -specify the root -directory for the Compiler Kit binaries on the local system ($TARGET_HOME). -This is done automatically by the shell script "first" in the -directory "first". -Em_path.h also specifies which directory should be used for -temporary files. -Most programs from the Kit do indeed use that directory -although some remain stubborn and use /tmp. -.LP -The shape of the tree should not be altered lightly because -most Makefiles and the -utility \fIack\fP know the shape of the ACK tree. -The knowledge of the utility \fIack\fP about the shape of the tree is -concentrated in the files in the directory $TARGET_HOME/lib/*/descr and $TARGET_HOME/lib/descr/*. -.NH -Makefiles -.PP -Most directories contain a "proto.make", from which a Makefile is derived. -Apart from commands applying to that specific directory these -files all recognize a few special commands. -When called with one of these they will apply the command to -their own directory. -The special commands are: -.sp 1 -.IP "install" 20 -recompile and install all binaries and libraries. -.br -Some Makefiles allow errors to occur in the programs they call. -They ignore such errors and notify the user with the message -"~....... error code n: ignored". -Whenever such a message appears in the output it can be ignored. -.IP "cmp" -recompile all binaries and libraries and compare them to the -ones already installed. -.IP pr -print the sources and documentation on the standard output. -.IP opr -make pr | opr -.br -Opr should be an off-line printer daemon. -On some systems it exists under another name e.g. lpr. -The easiest way to call such a spooler is using a shell script -with the name opr that calls lpr. -This script should be placed in /usr/bin or $TARGET_HOME/bin or -one of the directories in the PATH environment variable. -.IP clean -remove all files not needed for day-to-day use, -that is binaries not in $TARGET_HOME/bin or $TARGET_HOME/lib.bin, object files etc. -.LP -Example: -.DS -make install -.DE -given as command in a configuration directory will cause -compilation of all programs in the directory and copying of the results -to the $TARGET_HOME/bin and $TARGET_HOME/lib.bin directories. -.NH -Testing -.PP -Test sets are available in Pascal, C, Basic and EM assembly: -.IP EM 8 -the directory $SRC_HOME/emtest contains a few EM test programs. -The EM assembly files in these tests must be transformed into -load files. -These tests use the LIN and NOP instructions to mark the passing of each -test. -The NOP instruction prints the current line number during the -test phase. -Each test notifies its correctness by calling LIN with a unique -number followed by a NOP which prints this line number. -The test finishes normally with 0 as the last number printed -In all other cases a bug showed its -existence. -.IP Pascal -the directory $SRC_HOME/lang/pc/test contains a few Pascal test programs. -All these programs print the number of errors found and a -identification of these errors. -.sp 1 -.ti +4 -We also tested Pascal with the Validation Suite. -The Validation Suite is a collection of more than 200 Pascal programs, -designed by Brian Wichmann and Arthur Sale to test Pascal compilers. -We are not allowed to distribute it, but a copy may -be requested from -.DS -Richard J. Cichelli -A.N.P.A. -1350 Sullivan Trail -P.O. Box 598 -Easton, Pennsylvania 18042 -USA -.DE -.IP C -the sub-directories in $SRC_HOME/lang/cem/ctest contain C test programs. -The idea behind these tests is: -if there is a program called xx.c, compile it into xx.cem. -Run it with standard output to xx.cem.r, compare this file to -xx.cem.g, a file containing the 'ideal' output. -Any differences will point to implementation differences or -bugs. -Giving the command "run gen" or plain "run" starts this -process. -The differences will be presented on standard output. -The contents of the result files depend on the word size, -the xx.cem.g files on the distribution are intended for a -32-bit machine. -.IP Basic -the directory $SRC_HOME/lang/basic/test contains some forty Basic programs. -Not all of these programs are correct, some have syntactic errors, -some simply don't work. -The Makefile in that directory attempts to compile and run -these tests. -If it compiles its output is compared to a file with suffix .g -which contains the output to be expected. -The make should be started with its standard input diverted -to /dev/null. -An example of the output of a make is present in the file Out.std. -.NH -Documentation -.PP -After installation, the manual pages for Amsterdam Compiler Kit can be found -in the $TARGET_HOME/man directory. Also, the following documents are provided -in the $TARGET_HOME/doc directory: -.TS -l l. -toolkit.doc general overview (CACM article) -em.doc description of the EM machine architecture -ack.doc format of machine description files (lib/*/descr) -ansi_C.doc ANSI C implementation description -basic.doc Basic reference manual -pcref.doc Pascal-frontend reference manual -val.doc results of running the Pascal Validation Suite -crefman.doc C-frontend description -LLgen description of the LL(1) parser generator -peep.doc internal documentation for the peephole optimizer -cg.doc documentation for backend writers and maintainers -regadd.doc addendum to previous document describing register variables -ncg.doc documentation for the newest backends -v7bugs.doc bugs in the V7 system and how to fix them -6500.doc MSC 6500 backend description -i80.doc Intel 8080 backend description -z80.doc Zilog Z80 backend description -m68020.doc Motorola M68000/M68020 backend description -sparc.doc SPARC code expander description -occam.doc occam-frontend description -ego.doc Global Optimizer description -top.doc Target Optimizer description -int.doc description of the EM interpreter written in C -ceg.doc documentation for code-expander writers and maintainers -lint.doc documentation of LINT -m2ref.doc Modula-2 frontend description -install.doc this document -install.pr this document (formatted for a simple line printer) -.TE -.LP -Use the Makefile to get readable copies. -.LP -Good luck. diff --git a/doc/int/Makefile b/doc/int/Makefile deleted file mode 100755 index b823352b9..000000000 --- a/doc/int/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# $Id$ - -DOC = draw.mac cover txt1 txt2 txt3 appA appB bib - -FLS = README proto.make Makefile $(DOC) - -.distr: Makefile - echo $(FLS) | tr ' ' '\012' >.distr - -distr: .distr diff --git a/doc/int/README b/doc/int/README deleted file mode 100644 index 2088afc6d..000000000 --- a/doc/int/README +++ /dev/null @@ -1,4 +0,0 @@ -# $Id$ - -This directory contains the text of the documentation for the -Production Quality Interpreter "int". diff --git a/doc/int/appA b/doc/int/appA deleted file mode 100644 index 3d7abbe8a..000000000 --- a/doc/int/appA +++ /dev/null @@ -1,280 +0,0 @@ -.\" List of all warnings; source of warn_msg and warn.h -.\" -.\" $Id$ -.\" -.\" This file contains the warnings issued by the interpreter, together -.\" with their names and values in the code of the interpreter. Some of -.\" the source files of the interpreter are generated from the Wn -.\" macros in this file. -.\" When modifying this file, preserve the parameters of the Wn macros. -.de Wn \" -.IP \\$3. 7 -.B "\\$1" -.br -.. Wn -.bp -.DS C -APPENDIX A -.DE -.SH -List of Warnings. -.PP -The shadow-byte administration makes it possible to check for a -wide range of errors during run-time. -We have tried to make the diagnostics self-explanatory and especially useful -for the C-programmer. -The warnings are printed in the message file, together with source file -and line number. -The complete list of warnings is presented here, followed by an -explanation of what might be wrong. -Often, these explanations implicitly assume that the program -being interpreted, was originally written in C (and not Pascal, Basic etc.). -.LP -.I "Reading the load file" -.Wn "Floating point instructions flag in header ignored" WFLUSED 1 -.Wn "No float initialisation in this version" WFLINIT 2 -The interpreter was compiled with the NOFLOAT option; code involving -floating point operations can be run as long as the actual -instructions are avoided. -.Wn "Extra-test flag in header ignored" WEXTRIGN 4 -The interpreter already tests anything conceivable. -.Wn "Maximum line number in header was 0" WNLINEZR 5 -This number could be used to allocate tables for tallying; these tables are, -however, expanded as needed, so the number is immaterial. -.Wn "Bad float initialisation" WBADFLOAT 7 -The loadfile contains a floating point denotation which does not -satisfy the syntax (see 2.6). -Examining the loadfile (with \fBod \-c\fP) might show the syntax error. -Probably there is a bug in the front-end, creating floats with -a bad syntax. -.LP -.I "System calls" -.Wn "IOCTL \- bad or unimplemented request" WBADIOCTL 11 -The second parameter to the ioctl() request (the operation code) is invalid or -not implemented; since there are many different opcodes on the various UNIX -systems, it is difficult to tell which. The system call fails. -.Wn "MPXCALL \- not (yet) implemented" WMPXIMP 14 -.Wn "PROFIL \- not (yet) implemented" WPROFILIMP 15 -.Wn "PTRACE \- not (yet) implemented" WPTRACEIMP 16 -The monitor calls \fImpxcall()\fP, \fIprofil()\fP and \fIptrace()\fP -have not been implemented. The monitor call fails. -.Wn "Inaccessible memory in system call" WMONFLT 21 -Bad pointers passed to system calls do not cause a memory fault (which in UNIX -would happen to the kernel), but cause the system call to fail with the UNIX -variable errno set to 14 (EFAULT). It seems likely that the program is at -fault, but there is also a good possibility that a library routine made -unwarranted assumptions about word size and pointer size. -.Wn "READ \- buffer resides in unallocated memory" WRUMEM 23 -.Wn "READ \- buffer across global data area and heap" WRGDAH 24 -When the buffer passed to the read() system call is situated (completely -or partially) in unallocated memory (beyond \fIHP\fP) or begins -in the global data area and ends in the heap, the appropriate warning -is given. -The buffer is not written. -.Wn "WRITE \- buffer resides in unallocated memory" WWUMEM 25 -.Wn "WRITE \- buffer across global data area and heap" WWGDAH 26 -.Wn "WRITE \- (part of) global buffer is undefined" WWGUNDEF 27 -.Wn "WRITE \- (part of) local buffer is undefined" WWLUNDEF 28 -The first two are equivalent to the READ-errors above. -Writing out a buffer usually makes no sense when the contents are undefined, -so one of the latter two warnings will be generated in this case. -A global buffer resides in the data partition; a local buffer resides in -the stack partition. -This corresponds to global and local variables in a C-program. -In the first two cases the WRITE is not performed, in the latter two cases -it is. -.LP -.I "Traps and signals" -.Wn "SIGTRP \- bad signo argument" WILLSN 31 -The \fIsigtrp()\fP monitor call allows \fIsig_no\fP arguments in the -range [1..17] (UNIX Version 7 signals); the actual argument is out of range. -.Wn "SIGTRP \- signo argument is a synchronous trap" WUNIXTR 32 -The signal is one that can only be caused synchronously by the running program -on UNIX; it cannot occur to an interpreted program. -.Wn "SIGTRP \- bad trapno argument" WILLTN 33 -The \fIsigtrp()\fP monitor call allows \fItrap_no\fP arguments between 0 and -252, and the special values \-2 and \-3; the actual argument is not one of -these. -.Wn "Heap overflow due to command line limitation" WEHEAP 36 -.Wn "Stack overflow due to command line limitation" WESTACK 37 -The maximum sizes of the heap and the stack can be limited by options on the -command line. If overflow occurs due to such limitations, the corresponding -trap is taken, preceded by one of the above warnings. If the memory of the -interpreter itself is exhausted, a fatal error follows. -.LP -.I "Run-time type checking" -.Wn "Local character expected" WLCEXP 41 -.Wn "Global character expected" WGCEXP 42 -.Wn "Local integer expected" WLIEXP 43 -.Wn "Global integer expected" WGIEXP 44 -.Wn "Local float expected" WLFEXP 45 -.Wn "Global float expected" WGFEXP 46 -.Wn "Local data pointer expected" WLDPEXP 47 -.Wn "Global data pointer expected" WGDPEXP 48 -.Wn "Local instruction pointer expected" WLIPEXP 49 -.Wn "Global instruction pointer expected" WGIPEXP 50 -In general, a type violation has taken place when one of -these warnings is given. -The \fBfloat\fP- and \fBinstruction pointer\fP warnings are rare and will -usually be easy traceable. -\fBInteger/character expected\fP will normally occur when unsigned arithmetic -is performed on datapointers or when memory containing objects other than -integers is copied bytewise. -Often, this warning is followed by a warning \fBdatapointer expected\fP. -This is due to our decision of transforming pointers to (unsigned) integers -after doing unsigned arithmetic on them. -When such a transformed integer is dereferenced (as if it were a pointer) -or, in general, when it is treated as a pointer, this results in a warning. -The present library implementation of malloc() causes such a -sequence of errors. -.LP -These messages are always followed by a tentative description of what is found -in memory at the offending place. -.Wn "Actual memory is undefined" WWASUND 61 -.Wn "Actual memory contains an integer" WWASINT 62 -.Wn "Actual memory contains a float" WWASFLOAT 63 -.Wn "Actual memory contains a data pointer" WWASDATAP 64 -.Wn "Actual memory contains an instruction pointer" WWASINSP 65 -.Wn "Actual memory contains mixed information" WWASMISC 66 -If the contents of the area was undefined, -check the source code for an uninitialized variable of the mentioned type. -Officially, the use of an undefined value -should result in a EIUND or EFUND trap but the occurrence is -so common that a warning is more appropriate. -The contents of memory are described as mixed if the data consists of pieces -of different types. This happens, e.g., when caller and callee do not agree on -the types and lengths of the parameters. -.LP -.I "Protection" -.br -.Wn "Destroying contents of ROM (at or near loc 0)" WDESROM 71 -The program stores a value in Read-Only Memory; the only ROM in the present -implementation is the area near location 0. The warning probably results from -storing under a NULL pointer. This is only a warning, the store operation is -executed normally. Reads from location 0 are not detected. -.Wn "Destroying contents of Return Status Block" WDESRSB 72 -The Return Status Block is the stack area containing the return address, the -dynamic link, etc. -This may or may not be an error. -The current implementation of \fIsetjmp()\fP/\fIlongjmp()\fP -may be responsible for it. -If the program does not use setjmp(), there \fIis\fP something -very wrong (e.g. argument for ASP too large). -Note that there are some library routines (such as \fIalarm()\fP) which -use \fIsetjmp()\fP. -.Wn "Logical operation using undefined operand(s)" WUNLOG 81 -.Wn "Comparing undefined operand(s)" WUNCMP 82 -The logical operations AND, XOR, IOR, COM and the compare operation -CMS do their jobs bytewise. -If one of the bytes is found to be undefined, the corresponding warning -is given, and the operation is stopped immediately. -The stack is adjusted so interpretation may continue. -.br -It is hard to say what went wrong. -Possibly, the argument of the instruction at hand (which indicates the -size of the objects to be compared), was too large. -.LP -.I "Bad operands" -.Wn "Shift over negative distance" WSHNEG 91 -.Wn "Shift over too large distance" WSHLARGE 92 -Shift instructions yield undefined results if the shift distance is negative -or larger than the object size. -.Wn "Pointer arithmetic yields pointer to bad segment" WSEGADP 93 -When doing pointer arithmetic (ADP, ADS), the operand and result pointer -must be in the same \fIsegment\fP (see sec. 4). -E.g. loading the address of the first local and adding 20 to it will -certainly give this warning. -.Wn "Subtracting pointers to different segments" WSEGSBS 94 -Pointers may be subtracted only if they point into the same segment. -.Wn "Pointer arithmetic with NULL pointer" WNULLPA 96 -By definition it is illegal to do arithmetic with null pointers. -Integers with the size of a pointer and the value zero are recognized -as NULL pointers. -A well-known C-trick to compute the offset of some field in a struct -is converting the null-pointer to the type of the struct and simply -taking the address of the field. -This trick will \-when translated and interpreted\- generate this warning -because it results in arithmetic with the NULL pointer. -.LP -.I "Return area" -.Wn "Returned function result too large" WRFUNLAR 101 -.Wn "Returned function result too small" WRFUNSML 102 -This warning is generated when the size of the expected return value -is not equal to the size actually returned. -.br -An interpreted program may have fallen through the end of -the code without explicitly doing an \fIexit()\fP or \fIreturn()\fP. -The start-up routine (\fIcrt0()\fP) however always expects to get some -value returned by the program proper. -.br -Another (less probable) possibility of course is that the code contains -a subroutine or function call that does not return properly (e.g. -it returns a short instead of a long). -.Wn "Returned function result may be garbled" WRFUNGAR 103 -This warning will be generated, when the contents of the FRA are fetched -after some instruction is executed which can mess up the area. -Compiler-generated loadfiles should not generate this message. -.LP -.I "Return Status Block" -.Wn "RET did not find a Return Status Block" WRETBAD 111 -.Wn "Used RET to return from a trap" WRETTRAP 112 -The RET instruction found a garbled Return Status Block, or on that resulted -from a trap. -.Wn "RTT did not find a Return Status Block" WRTTBAD 115 -.Wn "RTT on empty stack" WRTTEMPTY 116 -.Wn "Used RTT to return from a call" WRTTCALL 117 -.Wn "Used RTT to return from a non-returnable trap" WRTTNRTT 118 -The RTT (Return from Trap) instruction found a Return Status block that was not -created properly by a trap. -.Wn "Stack Pointer too large in RET" WRETSTL 121 -.Wn "Stack Pointer too small in RET" WRETSTS 122 -.Wn "Stack Pointer too large in RTT" WRTTSTL 125 -.Wn "Stack Pointer too small in RTT" WRTTSTS 126 -According to the EM Manual (4.2), "the value of SP just after the return -value has been popped must be the same as the -value of SP just before executing the first instruction of the -invocation." -If the Stack Pointer is too large, some dynamically allocated item or some -temporary result may have been left behind on the stack. -If the Stack Pointer is too small, some locals have been unstacked. -Since the interpreter has enough information in the Return Status Block, it -recovers correctly from these errors. -.LP -.I "Traps" -.LP -Some traps have ambiguous or non-obvious causes. -As far as possible, these are preceded by a warning, explaining the -circumstances of the trap. -.Wn "Trap ESTACK: DCH on bad LB" WDCHBADLB 131 -.Wn "Trap ESTACK: LPB on bad LB" WLPBBADLB 132 -.Wn "Trap ESTACK: SP retracted over Return Status Block" WSPGTLB 133 -.Wn "Trap ESTACK: SP moved into data area" WSPINHEAP 134 -.Wn "Trap ESTACK: SP set to non-word-boundary" WSPODD 135 -.Wn "Trap ESTACK: LB set out of stack" WLBOUT 136 -.Wn "Trap ESTACK: LB set to non-word-boundary" WLBODD 137 -.Wn "Trap ESTACK: LB set to position where there is no RSB" WLBRSB 138 -.Wn "Trap EHEAP: HP retracted into Global Data Area" WHPGDA 141 -.Wn "Trap EHEAP: HP pushed into stack" WHPSTACK 142 -.Wn "Trap EHEAP: HP set to non-word-boundary" WHPODD 143 -.Wn "Trap EILLINS: unknown opcode" WBADOPC 151 -.Wn "Trap EILLINS: conversion with unacceptable size for this machine" WILLCONV 152 -.Wn "Trap EILLINS: FIL with non-existing address" WILLFIL 153 -.Wn "Trap EILLINS: LFR with too large size" WILLLFR 154 -.Wn "Trap EILLINS: RET with too large size" WILLRET 155 -.Wn "Trap EILLINS: instruction argument of class c does not fit a word" WARGC 156 -.Wn "Trap EILLINS: instruction on double word on machine with word size 4" WARGD 157 -.Wn "Trap EILLINS: local offset too large" WARGL 158 -.Wn "Trap EILLINS: instruction argument of class g not in GDA" WARGG 159 -.Wn "Trap EILLINS: fragment offset too large" WARGF 160 -.Wn "Trap EILLINS: counter in lexical instruction out of range" WARGN 161 -.Wn "Trap EILLINS: non-existent procedure identifier" WARGP 162 -.Wn "Trap EILLINS: illegal register number" WARGR 163 -.Wn "Trap EBADPC: jump out of text segment" WPCOVFL 172 -.Wn "Trap EBADPC: jump out of procedure fragment" WPCPROC 173 -.Wn "Trap EBADGTO: GTO does not restore an existing RSB" WGTORSB 181 -.Wn "Trap EBADGTO: GTO descriptor on the stack" WGTOSTACK 182 -.Wn "Trap caused by TRP instruction" WTRP 191 -.ig -.Wn "Last warning" WMSG 199 -!Leave these lines here! -.. diff --git a/doc/int/appB b/doc/int/appB deleted file mode 100644 index 44d023831..000000000 --- a/doc/int/appB +++ /dev/null @@ -1,486 +0,0 @@ -.\" A simple tutorial -.\" -.\" $Id$ -.\" -.bp -.DS -APPENDIX B -.DE -.SH -How to use the interpreter -.PP -The interpreter is not normally used for the debugging of programs under -construction. Its primary application is as a verification tool for almost -completed programs. Although the proper operation of the interpreter is -obviously a black art, this chapter tries to provide some guidelines. -.LP -For the sake of the argument, the source language is assumed to be C, but most -hints apply equally well to other languages supported by ACK. -.sp -.LP -.I "Initial measures" -.PP -Start with a test case of trivial size; to be on the safe side, reckon with a -time dilatation factor of about 500, i.e., a second grows into 10 minutes. -(The interpreter takes 0.5 msec to do one EM instruction on a Sun 3/50). -Fortunately many trivial test cases are much shorter than one second. -.PP -Compile the program into an \fIe.out\fP, the EM machine version of a -\fIa.out\fP, by calling \fIem22\fP (for 2-byte integers and 2-byte pointers), -\fIem24\fP (for 2 and 4) or \fIem44\fP (for 4 and 4) as seems appropriate; -if in doubt, use \fIem44\fP. These compilers can be found in the ACK -\fIbin\fP directory, and should be used instead of \fIacc\fP (or normal -.UX -\fIcc\fP). Alternatively, \fIacc \-memNN\fP can be used instead of -\fIemNN\fP. -.LP -If a C program consists of more than one file, as it usually does, there is -a small problem. The \fIacc\fP and \fIcc\fP compilers generate .o files, -whereas the \fIemNN\fP compilers generate .m files as object files. -A simple technique to avoid the problem is to call -.DS -em44 *.c -.DE -if possible. If not, the following hack on the \fIMakefile\fP generally works. -.IP \- -Make sure the \fIMakefile\fP is reasonably clean and complete: all calls to -the compiler are through \fI$(CC)\fP, \fICFLAGS\fP is used properly and all -dependencies are specified. -.IP \- -Add the following lines to the \fIMakefile\fP (possibly permanently): -.DS -\&.SUFFIXES: .o -\&.c.o: -\& $(CC) \-c $(CFLAGS) $< -.DE -.IP \- -Set CC to \fIem44 \-.c\fP (for example). Make sure CFLAGS includes -the \-O option; this yields a speed-up of about 15 %. -.IP \- -Change all .o to .m (or .k if the \-O option is not used). -.IP \- -If necessary, change \fIa.out\fP to \fIe.out\fP. -.PP -With these changes, \fImake\fP will produce an EM object; -\fIesize\fP can be used to verify that it is indeed an EM object and obtain some -statistics. Then call the interpreter: -.DS -int [ parameters ] -.DE -where the parameters are the normal parameters of the program. This should -work exactly like the original program, though slower. It reads from the -terminal if the original does, it opens and closes files like the original and -it accepts interrupts. -.sp -.LP -.I "Interpreting the results" -.PP -Now there are several possibilities. -.PP -It does all this. Great! This means the program -does not do very uncouth things. Now -read the file \fIint.mess\fP to see if any messages were generated. If there -are none, the program did not really run (perhaps the original cc \fIa.out\fP -got called instead?) Normally there is at least a termination message like -.DS -(Message): program exits with status 0 at "awa.p", line 64, INR = 4124 -.DE -This says that the program terminated through an exit(0) on line 64 of the -file \fIawa.p\fP after 4124 EM instructions. -If this is the only message it is time to move to a bigger test case. -.PP -On the other hand, the program may come to a grinding halt with an error -message. -All messages (errors and warnings) have a format in which the sequence -.DS -"", line -.DE -occurs, which is the same sequence many compilers produce for their error -messages. Consequently, the \fIint.mess\fP file can be processed as any -compiler message output. -.PP -One such message can be -.DS -(Fatal error) a.em: trap "Addressing non existent memory" not caught at "a.c", line 2, INR = 16 -.DE -produced by the abysmal program -.DS -main() { - *(int*)200000 = 1; -} -.DE -.LP -Often the effects are more subtle, however. The program -.DS -main() { - int *a, b = 777; - - b = *a; -} -.DE -produces the following five warnings (in far less than a second): -.DS -(Warning 47, #1): Local data pointer expected at "t.c", line 4, INR = 17 -(Warning 61, cont.): Actual memory is undefined at "t.c", line 4, INR = 17 -(Warning 102, #1): Returned function result too small at "", line 0, INR = 21 -(Warning 43, #1): Local integer expected at "exit.c", line 11, INR = 34 -(Warning 61, cont.): Actual memory is undefined at "exit.c", line 11, INR = 34 -.DE -The one about the function result looks the most frightening, -but is the most easily solved: -\fImain\fP is a function returning an int, so the start-up routine expects a -(four-byte) integer but gets an empty (zero-byte) return area. -.LP -\fINote\fP: The experts are divided about this. The traditional school holds -that \fImain\fP is an int function and its result is the return code; this -leaves them with two ways of supplying a return code: one as the parameter -of \fIexit()\fP and one as the result -of \fImain\fP. The modern school (Berkeley 4.2 etc.) claims that -return codes are supplied exclusively -by \fIexit()\fP, and they have an \fIexit(0)\fP in -the start-up routine, just after the call to \fImain()\fP; leaving \fImain()\fP -through the bottom implies successful termination. -.LP -We shall satisfy both groups by -.DS -main() { - int *a, b = 777; - - b = *a; - exit(0); -} -.DE -This results in -.DS -(Warning 47, #1): Local data pointer expected at "t.c", line 4, INR = 17 -(Warning 61, cont.): Actual memory is undefined at "t.c", line 4, INR = 17 -(Message): program exits with status 0 at "exit.c", line 11, INR = 33 -.DE -which is pretty clear as it stands. -.sp -.LP -.I "Using stack dumps" -.PP -Let's, for the sake of argument -and to avoid the fierce realism of 10000-line programs, assume that the above -still does not give enough information. -Since the error occurred in EM instruction number 17, we should like to see -more information around that moment. Call the interpreter again, now with the -shell variable AT set at 17: -.DS -int AT=17 t.em -.DE -(The interpreter has a number of internal variables that can be set by -assignments on the command line, like with \fImake\fP.) -This gives a file called \fIint.log\fP containing the -stack dump of 150 lines presented at the end of this chapter. -.PP -Since dumping is a subfacility of logging in the interpreter, the formats of -the lines are -the same. If a line starts with an @, it will contain a file-name/line-number -indication; the next two characters are the subject and the log -level. Then comes the information, preceded by a space. The text contains -three stack dumps, one before the offending instruction, one at it, and one -after it; then the interpreter stops. All kinds of other dumps can be -obtained, but this is default. -.PP -For each instruction we have, in order: -.IP \- -an @x9 line, giving the position in the program, -.IP \- -the messages, warnings and errors from the instruction as it is being executed, -.IP \- -dump(s), as requested. -.PP -The first two lines mean that at line 4 in file \fIt.c\fP the interpreter -performed its 16-th instruction, with the Program Counter at 30 pointing at -opcode 180 in the text segment; the instruction was an LOL (LOad Local) -with the operand \-4 derived from the opcode. It copies the local at offset -\-4 to the top of the stack. The effect can be seen from the subsequent stack -dump, where the undefined word at addresses 2147483568 to ...571 (the variable -\fIa\fP) has been copied to the top of the stack at 2147483560 (copying -undefined values does not generate a warning). -Since we used the \fIem44\fP compiler, all pointers and ints in our dump are -4 bytes long. -So a variable at address X in reality extends from address X to X+3. -.br -Note that this is not the offending instruction; this stack dump represents -the situation just before the error. -.PP -The stack consists of a sequence of frames, each containing data followed by -a Return Status Block resulting from a call; the last frame ends in -top-of-stack. The first frame represents the stack when the program starts, -through a call to the start-up routine. This routine prepares the second -stack frame with the actual parameters to \fImain()\fP: -\fIargc\fP at 2147483596, \fIargv\fP at 2147483600 and \fIenviron\fP at -2147483604. -.LP -The RSB line shows that the call to \fImain()\fP was made from procedure 0 -which has 0 locals, with PC at -16, an LB of 2147483608 and file name and line number still unknown. -The \fIcode\fP in the RSB tells how this RSB was made; possible values are STP -(start-up), CAL, RTT (returnable trap) and NRT (non-returnable trap). -.PP -The next frame shows the local variable(s) of \fImain()\fP; there are two of -them, the pointer \fIa\fP at 2147483568, which is undefined, and variable -\fIb\fP at 2147483564, which has the value 777. Then comes a copy of \fIa\fP, -just made by the LOL instruction, at 2147483560. The following line shows that -the Function Return Area (which does not reside at the end of the stack, but -just happens to be printed here) has size 0 and is presently undefined. -The stack dump ends -by showing that the Actuals Base is at 2147483596 (pointing at \fIargc\fP), the -Locals Base at 2147483572 (pointing just above the local \fIa\fP), the Stack -Pointer at 2147483560 (pointing at the undefined pointer), the line count is 4 -and the file name is "t.c". -.LP -(Notice that there is one more stack frame than one would probably expect, the -one above the start-up routine.) -.LP -The Function Return Area -could have a size larger than 0 and still be undefined, for -example when an instruction that does not preserve the contents of the FRA has -just been executed; likewise the FRA could have size 0 and be defined -nevertheless, for example just after a RET 0 instruction. -.PP -All this has set the scene for the distaster which is about to strike in the -next instruction. This is indeed a LOI (LOad Indirect) of size 4, opcode 169; -it causes the message -.DS -warning: Local data pointer expected [stack.c: 242] -.DE -and its continuation -.DS -warning cont.: Actual memory is undefined -.DE -(detected in the interpreter file \fIstack.c\fP at line 242; this can be -useful for sorting out dubious semantics). We see that the effect, as shown in -the third frame of this stack dump (at instruction number 17) is somewhat -unexpected: the LOI has fetched the value 4 and stacked it. The reason is -that, unfortunately, undefinedness is not transitive in the interpreter. When -an undefined value is used in an operation (other than copying) a warning is -given, but thereafter the value is treated as if it were zero. So, after the -warning a normal null pointer remains, which is then used to pick up the value -at location 0. This is the place where the EM machine stores its current line -number, which is presently 4. -.PP -The third stack dump shows the final effect: the value 4 has been unstacked -and copied to variable \fIb\fP at 2147483564 through an STL (STore Local) -instruction. -.PP -Since this form of logging dumps the stack only, the log file is relatively -small as dumps go. -Nevertheless, a useful excerpt can be obtained with the command -.DS -grep 'd1' int.log -.DE -This extracts the Return Status Block lines from the log, thus producing three -traces of calls, one for each instruction in the log: -.DS - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, LIN = 4, FIL = "t.c" - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, LIN = 4, FIL = "t.c" - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483564, HP = 848, LIN = 4, FIL = "t.c" -.DE -Theoretically, the pertinent trace is the middle one, but in practice all three -are equal. In the present case there isn't much to trace, but in real programs -the trace can be useful. -.sp -.LP -.I "Errors in libraries" -.PP -Since libraries are generally compiled with suppression of line number and -file name information, the line number and file name in the interpreter will -not be updated when it enters a library routine. Consequently, all messages -generated by interpreting library routines will seem to originate from the -line of the call. This is especially true for the routine malloc(), which, -from the nature of its business, often contains dubitable code. -.PP -A usual message is: -.DS -(Warning 43, #1): Local integer expected at "buff.c", line 18, INR = 266 -(Warning 64, cont.): Actual memory contains a data pointer at "buff.c", line 18, INR = 266 -.DE -and indeed at line 18 of the file buff.c we find: -.DS - buff = malloc(buff_size = BFSIZE); -.DE -This problem can be avoided by using a specially compiled version of the -library that contains the correct LIN and FIL instructions, or, less -elegantly, by including the source code of the library routines in the -program; in the latter case, one has to be sure to have them all. -.sp -.LP -.I "Unavoidable messages" -.br -Some messages produced by the logging are almost unavoidable; sometimes the -writer of a library routine is forced to take liberties with the semantics of -EM. -.LP -Examples from C include the memory allocation routines. -For efficiency reasons, one bit of an pointer in the administration is used as -a flag; setting, clearing and reading this bit requires bitwise operations on -pointers, which gives the above messages. -Realloc causes a problem in that it may have to copy the originally allocated -area to a different place; this area may contain uninitialised bytes. -.bp -.DS -.ft CW -@x9 "t.c", line 4, INR = 16, PC = 30 OPCODE = 180 -@L6 "t.c", line 4, INR = 16, DoLOLm(-4) - d2 - d2 . . STACK_DUMP[4/4] . . INR = 16 . . STACK_DUMP . . - d2 ---------------------------------------------------------------- - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483643 0 (Dp) - d2 2147483642 0 (Dp) - d2 2147483641 0 (Dp) - d2 2147483640 40 [ 40] (Dp) - d2 2147483639 0 (Dp) - d2 2147483638 0 (Dp) - d2 2147483637 3 (Dp) - d2 2147483636 64 [ 832] (Dp) - d2 2147483635 0 (In) - d2 2147483634 0 (In) - d2 2147483633 0 (In) - d2 2147483632 1 [ 1] (In) - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483607 0 (Dp) - d2 2147483606 0 (Dp) - d2 2147483605 0 (Dp) - d2 2147483604 40 [ 40] (Dp) - d2 2147483603 0 (Dp) - d2 2147483602 0 (Dp) - d2 2147483601 3 (Dp) - d2 2147483600 64 [ 832] (Dp) - d2 2147483599 0 (In) - d2 2147483598 0 (In) - d2 2147483597 0 (In) - d2 2147483596 1 [ 1] (In) - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483571 undef - d2 | | | | | | - d2 2147483568 undef (1 word) - d2 2147483567 0 (In) - d2 2147483566 0 (In) - d2 2147483565 3 (In) - d2 2147483564 9 [ 777] (In) - d2 2147483563 undef - d2 | | | | | | - d2 2147483560 undef (1 word) - d2 FRA: size = 0, undefined - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, \e - LIN = 4, FIL = "t.c" - d2 ---------------------------------------------------------------- - d2 -@x9 "t.c", line 4, INR = 17, PC = 31 OPCODE = 169 -@w1 "t.c", line 4, INR = 17, warning: Local data pointer expected [stack.c: 242] -@w1 "t.c", line 4, INR = 17, warning cont.: Actual memory is undefined -@L6 "t.c", line 4, INR = 17, DoLOIm(4) - d2 - d2 . . STACK_DUMP[4/4] . . INR = 17 . . STACK_DUMP . . - d2 ---------------------------------------------------------------- - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483643 0 (Dp) - d2 2147483642 0 (Dp) - d2 2147483641 0 (Dp) - d2 2147483640 40 [ 40] (Dp) - d2 2147483639 0 (Dp) - d2 2147483638 0 (Dp) - d2 2147483637 3 (Dp) - d2 2147483636 64 [ 832] (Dp) - d2 2147483635 0 (In) - d2 2147483634 0 (In) - d2 2147483633 0 (In) - d2 2147483632 1 [ 1] (In) - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483607 0 (Dp) - d2 2147483606 0 (Dp) - d2 2147483605 0 (Dp) - d2 2147483604 40 [ 40] (Dp) - d2 2147483603 0 (Dp) - d2 2147483602 0 (Dp) - d2 2147483601 3 (Dp) - d2 2147483600 64 [ 832] (Dp) - d2 2147483599 0 (In) - d2 2147483598 0 (In) - d2 2147483597 0 (In) - d2 2147483596 1 [ 1] (In) - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483571 undef - d2 | | | | | | - d2 2147483568 undef (1 word) - d2 2147483567 0 (In) - d2 2147483566 0 (In) - d2 2147483565 3 (In) - d2 2147483564 9 [ 777] (In) - d2 2147483563 0 (In) - d2 2147483562 0 (In) - d2 2147483561 0 (In) - d2 2147483560 4 [ 4] (In) - d2 FRA: size = 0, undefined - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, \e - LIN = 4, FIL = "t.c" - d2 ---------------------------------------------------------------- - d2 -@x9 "t.c", line 4, INR = 18, PC = 32 OPCODE = 229 -@S6 "t.c", line 4, INR = 18, DoSTLm(-8) - d2 - d2 . . STACK_DUMP[4/4] . . INR = 18 . . STACK_DUMP . . - d2 ---------------------------------------------------------------- - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483643 0 (Dp) - d2 2147483642 0 (Dp) - d2 2147483641 0 (Dp) - d2 2147483640 40 [ 40] (Dp) - d2 2147483639 0 (Dp) - d2 2147483638 0 (Dp) - d2 2147483637 3 (Dp) - d2 2147483636 64 [ 832] (Dp) - d2 2147483635 0 (In) - d2 2147483634 0 (In) - d2 2147483633 0 (In) - d2 2147483632 1 [ 1] (In) - d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483607 0 (Dp) - d2 2147483606 0 (Dp) - d2 2147483605 0 (Dp) - d2 2147483604 40 [ 40] (Dp) - d2 2147483603 0 (Dp) - d2 2147483602 0 (Dp) - d2 2147483601 3 (Dp) - d2 2147483600 64 [ 832] (Dp) - d2 2147483599 0 (In) - d2 2147483598 0 (In) - d2 2147483597 0 (In) - d2 2147483596 1 [ 1] (In) - d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL - d2 - d2 ADDRESS BYTE ITEM VALUE SHADOW - d2 2147483571 undef - d2 | | | | | | - d2 2147483568 undef (1 word) - d2 2147483567 0 (In) - d2 2147483566 0 (In) - d2 2147483565 0 (In) - d2 2147483564 4 [ 4] (In) - d2 FRA: size = 0, undefined - d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483564, HP = 848, \e - LIN = 4, FIL = "t.c" - d2 ---------------------------------------------------------------- - d2 -.DE diff --git a/doc/int/bib b/doc/int/bib deleted file mode 100644 index 058c3d838..000000000 --- a/doc/int/bib +++ /dev/null @@ -1,25 +0,0 @@ -.\" Bibliography -.\" -.\" $Id$ -.bp -.DS C -BIBLIOGRAPHY -.DE -.LP -[1] A.S. Tanenbaum, H. van Staveren, E.G. Keizer and J.W. Stevenson. -\fIDescription of a Machine Architecture for use with Block Structured -Languages\fP. VU Informatica Rapport IR-81, august 1983. -.LP -[2] E.G. Keizer. \fIAck description file reference manual.\fP -.LP -[3] K. Jensen and N. Wirth. -\fIPASCAL, User Manual and Report\fP. Springer Verlag. -.LP -[4] B.W. Kernighan and D.M. Ritchie. -\fIThe C Programming Language\fP. Prentice-Hall, 1978. -.LP -[5] D.M. Ritchie. \fIC Reference Manual\fP. -.LP -[6] \fIAmsterdam Compiler Kit, reference manual.\fP -.LP -[7] \fIUnix Programmer's Manual, 4.1BSD\fP. UCB, August 1983. diff --git a/doc/int/cover b/doc/int/cover deleted file mode 100644 index b52f7a9c2..000000000 --- a/doc/int/cover +++ /dev/null @@ -1,26 +0,0 @@ -.\" Front page -.\" -.\" $Id$ -.TL -The EM Interpreter -.AU -Eddo de Groot -Leo van den Berge -Dick Grune -.AI -Faculteit Wiskunde en Informatica -Vrije Universiteit, Amsterdam -.AB -This document describes the implementation -and usage of a new interpreter for the EM machine language. -This interpreter implements the full EM machine -and can be helpful to people writing new front-ends. -Moreover, it can be used as a thorough testing and debugging -tool by anyone familiar with the EM language. -.PP -A list of all warnings is given in appendix A; appendix B is a simple -tutorial. -.AE -.PP -.pn 1 -.bp diff --git a/doc/int/draw.mac b/doc/int/draw.mac deleted file mode 100644 index e035e264b..000000000 --- a/doc/int/draw.mac +++ /dev/null @@ -1,24 +0,0 @@ -.\" Macros for simple constant width drawings (uses font CW) -.\" -.\" $Id$ -.de Dr \" Drawing $1 (size) -.sp 1 -.ne \\$1 -.na -.nf -.ft CW \" constant width font -.lg 0 \" no ligatures -.. -.de Df \" Drawing Footer -.sp 1 -.ft R -.ce 1000 -.lg 1 -.. -.de De \" Drawing End $1 (lines) -.Df \" if it has not happened yet -.ce -.ad -.fi -.sp \\$1 -.. diff --git a/doc/int/proto.make b/doc/int/proto.make deleted file mode 100755 index e2d99f207..000000000 --- a/doc/int/proto.make +++ /dev/null @@ -1,18 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/int -TBL=tbl - -DOC = $(SRC_DIR)/draw.mac \ - $(SRC_DIR)/cover \ - $(SRC_DIR)/txt1 \ - $(SRC_DIR)/txt2 \ - $(SRC_DIR)/txt3 \ - $(SRC_DIR)/appA \ - $(SRC_DIR)/appB \ - $(SRC_DIR)/bib - -$(TARGET_HOME)/doc/int.doc: $(DOC) - $(TBL) $(DOC) > $@ diff --git a/doc/int/txt1 b/doc/int/txt1 deleted file mode 100644 index 4a37f4f59..000000000 --- a/doc/int/txt1 +++ /dev/null @@ -1,215 +0,0 @@ -.\" Introduction -.\" -.\" $Id$ -.NH -INTRODUCTION. -.PP -This document describes an EM interpreter which does extensive checking. -The interpreter exists in two versions: the normal version with full checking -and debugging facilities, and a fast stripped version that does interpretation -only. -This document assumes that the full version is used. -.LP -First the virtual EM machine embodied by the interpreter (called \fBint\fP) is -described, followed by some remarks on performance. -The second section gives some specific implementation decisions. -Section three explains the usage of the built-in debugging tool. -.LP -Appendix A gives an overview of the various warnings \fBint\fP gives, -with possible causes and solutions. -Appendix B is a simple tutorial on the use of \fBint\fP. -A separate manual page exists. -.PP -The document assumes a good understanding of what EM is and what -the assembly code looks like [1]. -Notions like 'procedure descriptor', 'mini', 'shortie' etc. are not -explained. -In the sequel, any word in \fIthis font\fP refers to the name of a -variable, constant, function or whatever, used in the source code under -the same name. -.LP -To avoid confusion: \fBint\fP interprets EM machine language (e.out files), -\fInot\fP the assembly language (.e files) and \fInot\fP the compact -code (.k files). -.NH 2 -The virtual EM machine. -.PP -The memory layout of the virtual EM machine represented by the interpreter -differs in details from the description in [1]. -Virtual memory is split up into two separate spaces: -one space containing the instructions, -the other all the data, including stack and heap (D-space). -The procedure descriptors are preprocessed and stored in a separate array, -\fIproctab[]\fP. -Both spaces start off at address 0. -This is possible because pointers in the two different spaces are -distinguishable by context (and shadow-bytes: see 2.6). -.NH 3 -Instruction Space -.PP -Figure 1 shows the I-space, together with the position of some important -EM registers. -.Dr 12 - NEXT --> |________________| <-- DB \e - | | | - | | | T - | | <-- PC | - | Program | | e - | | | - | Text | | x - | | | - | | | t - 0 --> |________________| <--(PB) / -.Df -\fI Fig 1. Virtual instruction space (I-space).\fP -.De -.PP -The I-space is just big enough to contain all the instructions. -The size needed for the program text (\fINTEXT\fP) is found from the -header-bytes of the loadfile. -Legal values for the program counter (\fIPC\fP) consist of all -addresses in the range from 0 through \fINTEXT\fP \- 1. -If the \fIPC\fP is made to point to an illegal address, a trap will occur. -.NH 3 -The Procedure Table -.PP -The \fINProc\fP constant indicates how many procedure descriptors there -are in the proctab array. -Elements of this array contain for each procedure: the number of locals, the -entry point and the entry point of the textually following procedure. This is -used in testing the restriction that the program counter may not wander from -procedure to procedure. -.NH 3 -The Data Space -.PP -Figure 2 shows the layout of the data space, which closely conforms to the EM -Manual. -.Dr 36 - __________________ - maxaddr(psize) --> | | <-- ML \e - | | | S - | Locals | | t - | & | | a - | RSBs | | c - | | | k - |________________| <-- SP / - . . - . . - . Unused . - . . - . . - . . - . . - . . - . Unused . - . . - . . - |________________| <-- HP - | | \e - | Heap | | - |________________| <-- HB | - | | | D - | Arguments | | - | Environ | | a - | _ _ _ _ | | - | | | t - | | | - | | | a - | Global data | | - | | | - | | | - 0 --> |________________| <--(EB) / -.Df -\fI Fig 2. Virtual dataspace (D-space).\fP -.De -.PP -D-space begins at address 0, and ends at the largest address -representable by the pointer size (\fIpsize\fP) being used; -for a 2-byte pointer size this maximum address is -.DS -((2 ^ 16 \- 1) / word size * word size) \- 1 -.DE -for a 4-byte pointer size it is -.DS -((2 ^ 31 \- 1) / word size * word size) \- 1 -.DE -(not 2 ^ 32, to allow illegal pointers to be implemented in the future). The -funny rounding construction is required to make ML+1 expressible as the -initialisation value of LB and SP. -.PP -D-space is split into two partitions: Data and Stack (indicated by the -brackets). -The Data partition holds the global data area (GDA) and the heap. -Its initial size is given by the loadfile constant SZDATA. -Some space is added to it, because arguments and environment are -stored here also. -This total size is static while interpreting. -However, as the heap may grow during execution (e.g. caused by dynamic -allocation) this results in a variable size for the Data partition. -Initially, the size for the Data partition is the sum of the space needed -by the GDA (including the space needed for arguments and environment) and -the initial heapspace. -The lowest legal Data address is 0; the highest \fIHP\fP \- 1. -.LP -The Stack partition holds the stack. -It begins at the highest available D-space address, and grows -towards the low addresses, so the Stack partition is of variable size too. -The lowest legal Stack address is the stackpointer (\fISP\fP), -the highest is the memory limit (\fIML\fP). -.NH 2 -Physical lay-out -.PP -Each partition is mapped onto a piece of physical memory with the -same name: \fItext\fP (fig. 1), \fIstack\fP and \fIdata\fP (fig. 2). -These are the storage structures which \fBint\fP uses to physically -store the contents of the virtual EM spaces. -Figure 2 thus shows the mapping of D-space onto two -different physical parts: \fIstack\fP and \fIdata\fP. -The I-space is represented by one physical part: \fItext\fP. -.LP -Each time more space is needed, the actual partition is reallocated, -with the new size being computed with the formula: -.DS -\fInew size\fP = 1.5 \(mu (\fIold size\fP + \fIextra\fP) -.DE -\fIextra\fP is the number of bytes exceeding the \fIold size\fP. -One can prove that using this method, there is a -linear relationship between allocation time and needed partition size. -.PP -A virtual D-space starting at address 0 is in correspondence with -the definition in [1], p. 3\-6. -The main reason for having D-space start at address 0, is that it induces -a one-one correspondence between the heap \- and GDA -addresses on the virtual machine (and hence the definition) on one hand, -and the offset within the \fIdata\fP partition on the other. -This implies that no extra calculation is needed to perform load and -storage operations. -.LP -Some calculation however cannot be avoided, because the stack part of -the D-space grows downwards by EM definition. -The first address of the virtual stack (\fIML\fP, the maximum address for -the given \fIpsize\fP) is mapped onto the -beginning of the \fIstack\fP partition. -When the stack grows (i.e. EM addresses get lower), the offset within the -\fIstack\fP partition gets higher. -By taking offset \fIML \- A\fP in the stack partition, one obtains the -physical address corresponding to some virtual EM (stack) address \fIA\fP. -.NH 2 -Speed. -.PP -From several test results with both versions of the interpreter, the -following may be concluded. -The speed of the interpreter depends strongly on the type of -program being interpreted. -If plain CPU arithmetic is performed, the interpreter is -relatively slow (1000 \(mu the cc version). -When stack manipulation is at hand, the interpreter is -quite fast (100 \(mu the cc version). -.LP -Most programs however will not be this extreme, so an interpretation -time of somewhere between 300 and 500 times direct execution -for a normal program is to be expected. -.LP -The fast version runs in about 60% of the time of the full version, at the -expense of a considerably lower functionality. -Tallying costs about 10%. diff --git a/doc/int/txt2 b/doc/int/txt2 deleted file mode 100644 index e9acd9f8c..000000000 --- a/doc/int/txt2 +++ /dev/null @@ -1,589 +0,0 @@ -.\" Implementation details -.\" -.\" $Id$ -.bp -.NH -IMPLEMENTATION DETAILS. -.PP -The pertinent issues are addressed below, in arbitrary order. -.NH 2 -Stack manipulation and start-up -.PP -It is not at all easy to start the EM machine with the stack in a reasonable -and consistent state. One reason is the anomalous value of the ML register -and another is the absence of a proper RSB. It may be argued that the initial -stack does not have to be in a consistent state, since the first instruction -proper is only executed after \fIargc\fP, \fIargv\fP and \fIenviron\fP -have been stacked (which takes care of the empty stack) and the initial -procedure has been called (which creates a RSB). We would, however, like to -preform the stacking of these values and the calling of the initial procedure -using the normal stack and call routines, which again require the stack to be -in an acceptable state. -.NH 3 -The anomalous value of the ML register -.PP -All registers in the EM machine point to word boundaries, and all of them, -except ML, address the even-numbered byte at the boundary. -The exception has a good reason: the even numbered byte at the ML boundary does -not exist. -This problem is not particular to EM but is inherent in the number system: the -number of N-digit numbers can itself not be expressed in an N-digit number, and -the number of addresses in an N-bit machine will itself not fit in an N-bit -address. The problem is solved in the interpreter by having ML point to the -highest word boundary that has bytes on either side; this makes ML+1 -expressible. -.NH 3 -The absence of an initial Return Status Block -.PP -When the stack is empty, there is no legal value for AB, since there are no -actuals; LB can be set naturally to ML+1. This is all right when the -interpreter starts with a call of the initial routine which stores the value -of LB in the first RSB, but causes problems when finally this call returns. We -want this call to return completely before stopping the interpreter, to check -the integrity of the last RSB; restoring information from it will, however, -cause illegal values to be stored in LB and AB (ML+1 and ML+1+rsbsize, resp.). -On top of this, the initial (illegal) Procedure Identifier of the running -procedure will be restored; then, upon restoring the likewise illegal PC will -cause a check to see if it still is inside the running procedure. After a few -attempts at writing special cases, we have decided that it is possible, but not -worth the effort; the final (= initial) RSB will not be unstacked. -.NH 2 -Floating point numbers. -.PP -The interpreter is capable of working with 4- and 8-byte floating point (FP) -numbers. -In C-terms, this corresponds to objects of type float and double respectively. -Both types fit in a C-double so the obvious way to manipulate these entities -internally is in doubles. -Pushing a 8-byte FP, all bytes of the C-double are pushed. -Pushing a 4-byte FP causes the 4 bytes representing the smallest fraction -to be discarded. -.PP -In EM, floats can be obtained in two different ways: via conversion -of another type, or via initialization in the loadfile. -Initialized floats are represented in the loadfile by an ASCII string in -the syntax of a Pascal real (signed \fPUnsignedReal\fP). -I.e. a float looks like: -.DS -[ \fISign\fP ] \fIDigit\fP+ [ . \fIDigit\fP+ ] [ \fIExp\fP [ \fISign\fP ] \fIDigit\fP+ ] (G1) -.DE -followed by a null byte. -Here \fISign\fP = {+, \-}; \fIDigit\fP = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; -\fIExp\fP = {e, E}; [ \fIAnything\fP ] means that \fIAnything\fP is optional; -and a + means one or more times. -To accommodate some loose code generators, the actual grammar accepted is: -.DS -[ \fISign\fP ] \fIDigit\fP\(** [ . \fIDigit\fP\(** ] [ \fIExp\fP [ \fISign\fP ] \fIDigit\fP+ ] (G2) -.DE -followed by a null byte. Here \(** means zero or more times. A floating -denotation which is in G2 but not in G1 draws a warning, one that is not even -in G2 causes a fatal error. -.LP -A string, representing a float which does not fit in a double causes a -warning to be given. -In that case, the returned value will be the double 0.0. -.LP -Floating point arithmetic is handled by some simple routines, checking for -over/underflow, and returning appropriate values in case of an ignored error. -.PP -Since not all C compilers provide floating point operations, there is a -compile time flag NOFLOAT, which, if defined, suppresses the use of all -fp operations in the interpreter. The resulting interpreter will still load -EM files with floats in the global data area (and ignore them) but will give a -fatal error upon attempt to execute a floating point instruction; consequently -code involving floating point operations can be run as long as the actual -instructions are avoided. -.NH 2 -Pointers. -.PP -The following sub-sections both deal with problems concerning pointers. -First, something is said about pointer arithmetic in general. -Then, the null-pointer problem is dealt with. -.NH 3 -Pointer arithmetic. -.PP -Strictly speaking, pointer arithmetic is defined only within a \fBfragment\fP. -From the explanation of the term fragment however (as given in [1], page 3), -it is not quite clear what a fragment should look like -from an interpreter's point of view. -For this reason we introduced the term \fBsegment\fP, -bordering the various areas within which pointer arithmetic is allowed. -Every stack-frame is a segment, and so are the global data area (GDA) and -the heap area. -Thus, the number of segments varies over time, and at some point in time is -given by the number of currently active stack-frames -(#CAL + #CAI \- #RET \- #RTT) plus 2 (gda, heap). -Pointers in the area between heap and stack (which is inaccessible by -definition), are assumed to be in the heap segment. -.PP -The interpreter, while building a new stack-frame (i.e. segment), stores the -value of the last ActualBase in a pointer-array (\fIAB_list[\ ]\fP). -When a pointer (say \fIP\fP) is available for arithmetic, the number -of the segment where it points (say \fIS\d\s-2P\s+2\u\fP), -is determined first. -Next, the arithmetic is performed, followed by a check on the number -of the segment where the resulting pointer \fIR\fP points -(say \fIS\d\s-2R\s+2\u\fP). -Now, if \fIS\d\s-2P\s+2\u != S\d\s-2R\s+2\u\fP, a warning is given: -\fBPointer arithmetic yields pointer to bad segment\fP. -.br -It may also be clear now, why the illegal area between heap and stack -was joined with the heap segment. -When calculating a new heap pointer (\fIHP\fP), one will obtain intermediate -results being pointers in this area just before it is made legal. -We do not want error messages all of the time, just because someone is -allocating space in the heap. -.LP -A similar treatment is given to the pointers in the SBS instruction; they have -to point into the same fragment for subtraction to be meaningful. -.LP -The length of the \fIAB_list[\ ]\fP is initially 100, -and it is reallocated in the same way the dynamically growing partitions -are (see 1.1). -.NH 3 -Null pointer. -.PP -Because the EM language lacks an instruction for loading a null pointer, -most programs solve this problem by loading a pointer-sized integer of -value zero, and using this as a null pointer (this is also proposed in [1]). -\fBInt\fP allows this, and will not complain. -A warning is given however, when an attempt is made to add something to a -null pointer (i.e. the pointer-sized integer zero). -.LP -Since many programming languages use a pointer to location 0 as an illegal -value, it is desirable to detect its use. -The big problem is though that 0 is a perfectly legal EM address; -address 0 holds the current line number in the source file. It may be freely -read but is written only by means of the LIN instruction. This allows us to -declare the area consisting of the line number and the file name pointer to be -read-only memory. Thus a store will be caught (and result in a warning) but a -read will succeed (and yield the EM information stored there). -.NH 2 -Function Return Area (FRA). -.PP -The Function Return Area (\fIFRA[\ ]\fP) has a default size of 8 bytes; -this default can -be overridden through the use of the \fB\-r\fP-option, but cannot be -made smaller than the size of two pointers, in accordance with the -remark on page 5 of [1]. -The global variable \fIFRASize\fP keeps track of how many bytes were -stored in the FRA, the last time a RET instruction was executed. -The LFR instruction only works when its argument is equal to this size. -If not, the FRA contents are loaded anyhow, but one of the following warnings -is given: -\fBReturned function result too large\fP (\fIFRASize\fP > LFR size) or -\fBReturned function result too small\fP (\fIFRASize\fP < LFR size). -.LP -Note that a C-program, falling through the end of its code without doing -a proper \fIreturn\fP or \fIexit()\fP, will generate this warning. -.PP -The only instructions that do not disturb the contents of the FRA are -GTO, BRA, ASP and RET. -This is expressed in the program by setting \fIFRA_def\fP to "undefined" -in any instruction except these four. -We realize this is a useless action most of the time, but a more -efficient solution does not seem to be at hand. -If a result is loaded when \fIFRA_def\fP is "undefined", the warning: -\fBReturned function result may be garbled\fP is generated. -.LP -Note that the FRA needs a shadow-FRA in order to store the shadow -information when performing a LFR instruction. -.NH 2 -Environment interaction. -.PP -The EM machine represented by \fBint\fP can communicate with -the environment in three different ways. -A first possibility is by means of (UNIX) interrupts; -the second by executing (relatively) high level system calls (called -monitor calls). -A third means of interaction, especially interesting for the debugging -programmer, is via internal variables set on the command line. -The former two techniques, and the way they are implemented will be described -in this section. -The latter has been allotted a separate section (3). -.NH 3 -Traps and interrupts. -.PP -Simple user programs will generally not mess around with UNIX-signals. -In interpreting these programs, the default actions will be taken -when a signal is received by the program: it gives a message and -stops running. -.LP -There are programs however, which try to handle certain signals -themselves. -In C, this is achieved by the system call \fIsignal(\ sig_no,\ catch\ )\fP, -which calls the handling routine \fIcatch()\fP, as soon as signal -\fBsig_no\fP occurs. -EM does not provide this call; instead, the \fIsigtrp()\fP monitor call -is available for mapping UNIX signals onto EM traps. -This implies that a \fIsignal()\fP call in a C-program -must be translated by the EM library routine to a \fIsigtrp()\fP call in EM. -.PP -The interpreter keeps an administration of the mapping of UNIX-signals -onto EM traps in the array \fIsig_map[NSIG]\fP. -Initially, the signals all have their default values. -Now assume a \fIsigtrp()\fP occurs, telling to map signal \fBsig_no\fP onto -trap \fBtrap_no\fP. -This results in: -.IP 1. -setting the relevant array element -\fIsig_map[sig_no]\fP to \fBtrap_no\fP (after saving the old value), -.IP 2. -catching the next to come \fBsig_no\fP signal with the handling routine -\fIHndlEMSig\fP (by a plain UNIX \fIsignal()\fP of course), and -.IP 3. -returning the saved map-value on the stack so the user can know the previous -trap value onto which \fBsig_no\fP was mapped. -.LP -On an incoming signal, -the handling routine for signal \fBsig_no\fP arms the -correct EM trap by calling the routine \fIarm_trap()\fP with argument -\fIsig_map[sig_no]\fP. -At the end of the EM instruction the proper call of \fItrap()\fP is done. -\fITrap()\fP on its turn examines the value of the \fIHaltOnTrap\fP variable; -if it is set, the interpreter will stop with a message. In the normal case of -controlled trap handling this bit is not on and the interpreter examines -the value of the \fITrapPI\fP variable, -which contains the procedure identifier of the EM trap handling routine. -It then initiates a call to this routine and performs a \fIlongjmp()\fP -to the main -loop to bypass all further processing of the instruction that caused the trap. -\fITrapPI\fP should be set properly by the library routines, through the -SIG instruction. -.LP -In short: -.IP 1. -A UNIX interrupt is caught by the interpreter. -.IP 2. -A handling routine is called which generates the corresponding EM trap -(according to the mapping). -.IP 3. -The trap handler calls the corresponding EM routine which emulates a UNIX -interrupt for the benefit of the interpreted program. -.PP -When considering UNIX signals, it is important to notice that some of them -are real signals, i.e., messages coming from outside the program, like DEL -and QUIT, but some are actually program-caused synchronous traps, like Illegal -Instruction. The latter, if they happen, are incurred by the interpreter -itself and consequently are of no concern to the interpreted program: it -cannot catch them. The present code assumes that the UNIX signals between -SIGILL (4) and SIGSYS (12) are really traps; \fIdo_sigtrp()\fP -will fail on them. -.LP -To avoid losing the last line(s) of output files, the interpreter should -always do a proper close-down, even in the presence of signals. To this end, -all non-ignored genuine signals are initially caught by the interpreter, -through the routine \fIHndlIntSig\fP, which gives a message and preforms a -proper close-down. -Synchronous trap can only be caused by the interpreter itself; they are never -caught, and consequently the UNIX default action prevails. Generally they -cause a core dump. -Signals requested by the interpreted program are caught by the routine -\fIHndlEMSig\fP, as explained above. -.NH 3 -Monitor calls. -.PP -For the convenience of the programmer, as many monitor calls as possible -have been implemented. -The list of monitor calls given in [1] pages 20/21, has been implemented -completely, except for \fIptrace()\fP, \fIprofil()\fP and \fImpxcall()\fP. -The semantics of \fIptrace()\fP and \fIprofil()\fP from an interpreted program -is unclear; the data structure passed to \fImpxcall()\fP is non-trivial -and the system call has low portability and applicability. -For these calls, on invocation a warning is generated, and the arguments which -were meant for the call are popped properly, so the program can continue -without the stack being messed up. -The errorcode 5 (IOERROR) is pushed onto the stack (twice), in order to -fake an unsuccessful monitor call. -No other \- more meaningful \- errorcode is available in the errno-list. -.LP -Now for the implemented monitor calls. -The returned value is zero for a successful call. -When something goes wrong, the value of the external \fIerrno\fP variable -is pushed, thus enabling the user to find out what the reason of failure was. -The implementation of the majority of the monitor calls is straightforward. -Those working with a special format buffer, (e.g. \fIioctl()\fP, -\fItime()\fP and \fIstat()\fP variants), need some extra attention. -This is due to the fact that working with varying word/pointer size -combinations may cause alignment problems. -.LP -The data structure returned by the UNIX system call results from -C code that has been translated with the regular C compiler, which, -on the VAX, happens to be a 4-4 compiler. -The data structure expected by the interpreted program conforms -to the translation by \fBack\fP of the pertinent include file. -Depending on the exact call of \fBack\fP, sizes and alignment may differ. -.LP -An example is in order. The EM MON 18 instruction in the interpreted program -leads to a UNIX \fIstat()\fP system call by the interpreter. -This call fills the given struct with stat information, the contents -and alignments of which are determined by the version of UNIX and the -used C compiler, resp. -The interpreter, like any program wishing to do system calls that fill -structs, has to be translated by a C compiler that uses the -appropriate struct definition and alignments, so that it can use, e.g., -\fIstab.st_mtime\fP and expect to obtain the right field. -This struct cannot be copied directly to the EM memory to fulfill the -MON instruction. -First, the struct may contain extraneous, system-dependent fields, -pertaining, e.g., to symbolic links, sockets, etc. -Second, it may contain holes, due to alignment requirements. -The EM program runs on an EM machine, knows nothing about these -requirements and expects UNIX Version 7 fields, with offsets as -determined by the em22, em24 or em44 compiler, resp. -To do the conversion, the interpreter has a built-in table of the -offsets of all the fields in the structs that are filled by the MON -instruction. -The appropriate fields from the result of the UNIX \fIstat()\fP are copied -one by one to the appropriate positions in the EM memory to be filled -by MON 18. -.PP -The \fIioctl()\fP call (MON 54) poses additional problems. Not only does it -have a second argument which is a pointer to a struct, the type of -which is dynamically determined, but its first argument is an opcode -that varies considerably between the versions of UNIX. -To solve the first problem, the interpreter examines the opcode (request) and -treats the second argument accordingly. The second problem can be solved by -translating the UNIX Version 7 \fIioctl()\fP request codes to their proper -values on the various systems. This is, however, not always useful, since -some EM run-time systems use the local request codes. There is a compile-time -flag, V7IOCTL, which, if defined, will restrict the \fIioctl()\fP call to the -version 7 request codes and emulate them on the local system; otherwise the -request codes of the local system will be used (as far as implemented). -.PP -Minor problems also showed up with the implementation of \fIexecve()\fP -and \fIfork()\fP. -\fIExecve()\fP expects three pointers on the stack. -The first points to the name of the program to be executed, -the second and third are the beginnings of the \fBargv\fP and \fBenvp\fP -pointer arrays respectively. -We cannot pass these pointers to the system call however, because -the EM addresses to which they point do not correspond with UNIX -addresses. -Moreover, (it is not very likely to happen but) what if someone constructs -a program holding the contents for one of these pointers in the stack? -The stack is implemented upside down, so passing the pointer to -\fIexecve()\fP causes trouble for this reason too. -The only solution was to copy the pointer contents completely -to fresh UNIX memory, constructing vectors which can be passed to the -system call. -Any impending memory fault while making these copies results in failure of the -system call, with \fIerrno\fP set to EFAULT. -.PP -The implementation of the \fIfork()\fP call faced us with problems -concerning IO-channels. -Checking messages (as well as logging) must be divided over different files. -Otherwise, these messages will coincide. -This problem was solved by post-fixing the default message file -\fBint.mess\fP (as well as the logging file \fBint.log\fP) with an -automatically leveled number for every new forked process. -Children of the original process do their diagnostics -in files with postfix 1,2,3 etc. -Second generation processes are assigned files numbered 11, 12, 21 etc. -When 6 generations of processes exist at one moment, the seventh will -get the same message file as the sixth, for the length of the filename -will become too long. -.PP -Some of the monitor calls receive pointers (addresses) from to program, to be -passed to the kernel; examples are the struct stat for \fIstat()\fP, the area -to be filled for \fIread()\fP, etc. If the address is wrong, the kernel does -not generate a trap, but rather the system call returns with failure, while -\fIerrno\fP is set to EFAULT. This is implemented by consistent checking of -all pointers in the MON instruction. -.NH 2 -Internal arithmetic. -.PP -Doing arithmetic on signed integers, the smallest negative integer -(\fIminsint\fP) is considered a legal value. -This is in contradiction with the EM Manual [1], page 14, which proposes using -\fIminsint\fP for uninitialized integers. -The shadow bytes already check for uninitialized integers however, -so we do not need this special illegal value. -Although the EM Manual provides two traps, for undefined integers and floats, -undefined objects occur so frequently (e.g. in block copying partially -initialized areas) that the interpreter just gives a warning. -.LP -Except for arithmetic on unsigneds, all arithmetic checks for overflow. -The value that is pushed on the stack after an overflow occurs depends -on the UNIX behavior with regard to that particular calculation. -If UNIX would not accept the calculation (e.g. division by zero), a zero -is pushed as a convention. -Illegal computations which UNIX does accept in silence (e.g. one's -complement of \fIminsint\fP), simply push the UNIX-result after giving a -trap message. -.NH 2 -Shadow bytes implementation. -.PP -A great deal of run-time checking is performed by the interpreter (except if -used in the fast version). -This section gives all details about the shadow bytes. -In order to keep track of information about the contents of D-space (stack -and global data area), there is one shadow-byte for each byte in these spaces. -Each bit in a shadow-byte represents some piece -of information about the contents of its corresponding 'sun-byte'. -All bits off indicates an undefined sun-byte. -One or more bits on always guarantees a well-defined sun-byte. -The bits have the following meaning: -.IP "\(bu bit 0:" 8 -indicates that the sun-byte is (a part of) an integer. -.IP "\(bu bit 1:" 8 -the sun-byte is a part of a floating point number. -.IP "\(bu bit 2:" 8 -the sun-byte is a part of a pointer in dataspace. -.IP "\(bu bit 3:" 8 -the sun-byte is a part of a pointer in the instruction space. -According to [1] (paragraph 6.4), there are two types pointers which -must be distinguishable. -Conversion between these two types is impossible. -The shadow-bytes make the distinction here. -.IP "\(bu bit 4:" 8 -protection bit. -Indicates that the sun-byte is part of a protected piece of memory. -There is a protected area in the stack, the Return Status Block. -The EM machine language has no possibility to declare protected -memory, as is possible in EM assembly (the ROM instruction). The protection -bit is, however, set for the line number and filename pointer area near -location 0, to aid in catching references to location 0. -.IP "\(bu bit 5/6/7:" 8 -free for later use. -.LP -The shadow bytes are managed by the routines declared in \fIshadow.h\fP. -The warnings originating from checking these shadow-bytes during -run-time are various. -A list of them is given in appendix A, together with suggestions -(primarily for the C-programmer) where to look for the trouble maker(s). -.LP -A point to notice is, that once a warning is generated, it may be repeated -thousands of times. -Since repetitive warnings carry little information, but consume much -file space, the interpreter keeps track of the number of times a given warning -has been produced from a given line in a given file. -The warning message will -be printed only if the corresponding counter is a power of four (starting at -1). In this way, a logarithmic back-off in warning generation is established. -.LP -It might be argued that the counter should be kept for each (warning, PC -value) pair rather than for each (warning, file position) pair. Suppose, -however, that two instruction in a given line would cause the same message -regularly; this would produce two intertwined streams of identical messages, -with their counters jumping up and down. This does not seem desirable. -.NH 2 -Return Status Block (RSB) -.PP -According to the description in [1], at least the return address and the -base address of the previous RSB have to be pushed when performing a call. -Besides these two pointers, other information can be stored in the RSB -also. -The interpreter pushes the following items: -.IP \- -a pointer to the current filename, -.IP \- -the current line number (always four bytes), -.IP \- -the Local Base, -.IP \- -the return address (Program Counter), -.IP \- -the current procedure identifier -.IP \- -the RSB code, which distinguishes between initial start-up, normal call, -returnable trap and non-returnable trap (a word-size integer). -.LP -Consequently, the size of the RSB varies, depending on -word size and pointer size; its value is available as \fIrsbsize\fP. -When the RSB is removed from the stack (by a RET or RTT) the RSB code is under -the Stack Pointer for immediate checking. It is not clear what should be done -if RSB code and return instruction do not match; at present we give a message -and continue, for what it is worth. -.PP -The reason for pushing filename and line number is that some front-ends tend -to forget the LIN and FIL instructions after returning from a function. -This may result in error messages in wrong source files and/or line numbers. -.PP -The procedure identifier is kept and restored to check that the PC will not -move out of the running procedure. The PI is an index in the proctab, which -tells the limits in the text segment of the running procedure. -.PP -If the Return Status Block is generated as a result of a trap, more is -stacked. Before stacking the normal RSB, the trap function pushes the -following items: -.IP \- -the contents of the entire Function Return Area, -.IP \- -the number of bytes significant in the above (a word-size integer), -.IP \- -a word-size flag indicating if the contents of the FRA are valid, -.IP \- -the trap number (a word-size integer). -.LP -The latter is followed directly by the RSB, and consequently acts as the only -parameter to the trap handler. -.NH 2 -Operand access. -.PP -The EM Manual mentions two ways to access the operands of an instruction. It -should be noticed that the operand in EM is often not the direct operand of the -operation; the operand of the ADI instruction, e.g., is the width of the -integers to be added, not one of the integers themselves. The various operand -types are described in [1]. Each opcode in the text segment identifies an -instruction with a particular operand type; these relations are described in -computer-readable format in a file in the EM tree, \fIip_spec.t\fP. -.PP -The interpreter uses the third method. Several other approaches -can be designed, with increasing efficiency and equally increasing complexity. -They are briefly treated below. -.NH 3 -The Dispatch Table, Method 1. -.PP -When the interpreter starts, it reads the ip_spec.t file and constructs from it -a dispatch table. This table (of which there are actually three, -for primary, secondary -and tertiary opcodes) has 256 entries, each describing an instruction with -indications on how to decode the operand. For each instruction executed, the -interpreter finds the entry in the dispatch table, finds information there on -how to access the operand, constructs the operand and calls the appropriate -routine with the operand as calculated. There is one routine for each -instruction, which is called with the ready-made operand. Method 1 is easy to -program but requires constant interpretation of the dispatch table. -.NH 3 -Intelligent Routines, Method 2. -.PP -For each opcode there is a separate routine, and since an opcode uniquely -defines the instruction and the operand format, the routine knows how to get -the operand; this knowledge is built into the routine. Preferably the heading -of the routine is generated automatically from the ip_spec.t file. Operand -decoding is immediate, and no dispatch table is needed. Generation of the -469 required routines is, however, far from simple. Either a generated array -of routine names or a generated switch statement is used to map the opcode onto -the correct routine. The switch approach has the advantage that parameters can -be passed to the routines. -.NH 3 -Intelligent Calls, Method 3. -.PP -The call in the switch statement does full operand construction, and the -resulting operand is passed to the routine. This reduces the number of -routines to 133, the number of EM instructions. Generation of the switch -statement from ip_spec.t is more complicated, but the routine space is -much cleaner. This does not give any speed-up since the same actions are still -required; they are just performed in a different place. -.NH 3 -Static Evaluation. -.PP -It can be observed that the evaluation of the operand of a given instruction in -the text segment will always give the same result. It is therefore possible to -preprocess the text segment, decomposing the instructions into structs which -contain the address, the instruction code and the operand. No operand decoding -will be necessary at run-time: all operands have been precalculated. This will -probably give a considerable speed-up. Jumps, especially GTO jumps, will, -however, require more attention. -.NH 2 -Disassembly. -.PP -A disassembly facility is available, which gives a readable but not -letter-perfect disassembly of the EM object. The procedure structure is -indicated by placing the indication \fBP[n]\fP at the entry point of each -procedure, where \fBn\fP is the procedure identifier. The number of locals is -given in a comment. -.LP -The disassembler was generated by the software in the directory \fIswitch\fP -and then further processed by hand. diff --git a/doc/int/txt3 b/doc/int/txt3 deleted file mode 100644 index 3056c2383..000000000 --- a/doc/int/txt3 +++ /dev/null @@ -1,180 +0,0 @@ -.\" Logging -.\" -.\" $Id$ -.bp -.NH -THE LOGGING MACHINE. -.PP -Since messages and warnings provided by \fBint\fP include source code file -names and line numbers, they alone often suffice to identify the error. -If, however, the necessity arises, much more extensive debugging information -can be obtained by activating the the Logging Machine. -This Logging Machine, which monitors all actions of the EM machine, is the -subject of this chapter. -.NH 2 -Implementation. -.PP -When inspecting the source code of \fBint\fP, many lines in the -following format will show up: -.DS -LOG(("@<\fIletter\fP><\fIdigit\fP> message", args)); -.DE -or -.DS -LOG(("\ <\fIletter\fP><\fIdigit\fP> message", args)); -.DE -The double parentheses are needed, because \fILOG()\fP is -declared as a define, and has a printf-like argument structure. -.PP -The <\fIletter\fP> classifies the log message and corresponds to an entry in -the \fIlogmask\fP, which holds a threshold for each class of messages. -The following classes exist: -.TS -tab(@); -l l l. -\(bu A\-Z@the flow of instructions: -@A: array -@B: branch -@C: convert -@F: floating point arithmetic -@I: integer arithmetic -@L: load -@M: miscellaneous -@P: procedure call -@R: pointer arithmetic -@S: store -@T: compare -@U: unsigned arithmetic -@X: logical -@Y: sets -@Z: increment/decrement/zero -\(bu d@stack dumping. -\(bu g@gda & heap manipulation. -\(bu s@stack manipulation. -\(bu r@reading the loadfile. -\(bu q@floating point calculations during reading the loadfile. -\(bu x@the instruction count, contents and file position. -\(bu m@monitor calls. -\(bu p@procedure calls and returns. -\(bu t@traps. -\(bu w@warnings. -.TE -.LP -When the interpreter reaches a LOG(()) statement it scans its first argument; -if \fIletter\fP -occurs in the logmask, and if \fIdigit\fP is lower or equal to the -threshold in the logmask, the message is given. -Depending on the first character, the message will be preceded by a -position indication (with the @) or will be printed as is (with the -space). -The \fIletter\fP is determines the message class -and the \fIdigit\fP is used to distinguish various levels -of logging, with a lower digit indicating a more important message. -We will call the <\fIletter\fP><\fIdigit\fP> combination the \fBid\fP of -the logging. -.LP -In general, the lower the \fIdigit\fP following the \fIletter\fP, -the more important the message. -E.g. m5 reports about unsuccessful monitor calls only, m9 also reports -about successful monitors (which are obviously less interesting). -New logging messages can be added to the source code on relevant places. -.LP -Reasonable settings for the logmask are: -.TS -tab(@); -l l l. - @A\-Z9d4twx9@advised setting when trouble shooting (default). - @A\-Zx9@shows the flow of instructions & global information. - @pm9@shows the procedure & monitor calls. - @tw9@shows warning & trap information. -.TE -.PP -An EM interpreter without a Logging Machine can be obtained by undefining the -macro \fICHECKING\fP in the file \fIchecking.h\fP. -.NH 2 -Controlling the Logging machine. -.PP -The actions of the Logging Machine are controlled by a set of internal -variables (one of which is the log mask). -These variables can be set through assignments on the command line, as -explained int the manual page \fIint.1\fP, q.v. -Since there are a great many logging statements in the program, of which only a -few will be executed in any call of the interpreter, it is important to be able -to decide quickly if a given \fIid\fP has to be checked at all. -To this end all logging statements are guarded (in the #define) by a test for -the boolean variable \fIlogging\fP. -This variable will only be set if the command line assignments show the -potential need for logging (\fImust_log\fP) and the instruction count -(\fIinr\fP) is at least equal to \fIlog_start\fP (which derives from the -parameter \fBLOG\fP). -.LP -The log mask can be set by the assignment -.DS -"LOGMASK=\fIlogstring\fP" -.DE -which sets the current logmask to \fIlogstring\fP. -A logstring has the following form: -.DS -[ [ \fIletter\fP | \fIletter\fP \- \fIletter\fP ]+ \fIdigit\fP ]+ -.DE -E.g. LOGMASK=A\-D8x9R7c0hi4 will print all messages belonging to loggings -with \fBid\fPs: -\fIA0..A8,B0..B8,C0..C8,D0..D8,x0..x9,R0..R7,c0,h0..h4,i0..i4\fP. -.PP -The logging variable STOP can be used to prevent run-away logging -past the point where the user expects an error to occur. -STOP=\fInr\fP will stop the interpreter after instruction number \fInr\fP. -.PP -To simplify the use of the logging machine, a number of abbreviations have been -defined. -E.g., AT=\fInr\fP can be thought of as an abbreviation of LOG=\fInr\-1\fP -STOP=\fInr+1\fP; this causes three stack dumps, one before the suspect -instruction, one on it and one after it; then the interpreter stops. -.PP -Logging results will appear in a special logging file (default: \fIint.log\fP). -.NH 2 -Dumps. -.PP -There are three routines available to examine the memory contents: -.TS -tab(@); -l l l. - @\fIstd_all()\fP@dumps the contents of the stack (\fId1\fP or \fId2\fP must be in the logmask). - @\fIgdad_all()\fP@dumps the contents of the gda (\fI+1\fP must be in the logmask). - @\fIhpd_all()\fP@dumps the contents of the heap (\fI*1\fP must be in the logmask). -.TE -.LP -These routines can be used everywhere in the program to examine the -contents of memory. -The internal variables allow the -gda and heap to be dumped only once (according to the -corresponding internal variable). -The stack is dumped after each -instruction if the log mask contains d1 or d2; d2 gives a full formatted -dump, d1 produces a listing of the Return Status Blocks only. -An attempt is made to format the stack correctly, based on the shadow -bytes, which identify the Return Status Block. -.LP -Remember to set the correct \fBid\fP in the LOGMASK, and to give -LOG the correct value. -If dumping is needed before the first instruction, then LOG must be -set to 0. -.LP -The dumps of the global data area and the heap are controlled internally by -the id-s +1 and *1 resp.; the corresponding logmask entries are set -automatically by setting the GDA and HEAP variables. -.NH 2 -Forking. -.PP -As mentioned earlier, a call to \fIfork()\fP, causes an image of the current -program to start running. -To prevent a messy logfile, the child process gets its own logfile -(and message file, tally file, etc.). -These logfiles are distinguished from the parent logfile by the a -postfix, e.g., -\fIlogfile_1\fP for the first child, \fIlogfile_2\fP for the second child, -\fIlogfile_1_2\fP for the second child of the first child, etc. -.br -\fINote\fP: the implementation of this feature is shaky; it works for the log -file but should also work for other files and for the names of the logging -variables. diff --git a/doc/lint/.distr b/doc/lint/.distr deleted file mode 100644 index 9c1cf0954..000000000 --- a/doc/lint/.distr +++ /dev/null @@ -1,15 +0,0 @@ -proto.make -abstract -appendix_A -appendix_B -chap1 -chap2 -chap3 -chap4 -chap5 -chap6 -chap7 -chap8 -chap9 -contents -frontpage diff --git a/doc/lint/Makefile b/doc/lint/Makefile deleted file mode 100644 index 3a17c50b3..000000000 --- a/doc/lint/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# $Header$ - -FP = frontpage - -DOC = abstract contents chap1 chap2 chap3 chap4 chap5 chap6 chap7\ - chap8 chap9 appendix_A appendix_B - -../lint.doc: $(FP) $(DOC) - cat $(FP) $(DOC) > ../lint.doc diff --git a/doc/lint/abstract b/doc/lint/abstract deleted file mode 100644 index 01b05e603..000000000 --- a/doc/lint/abstract +++ /dev/null @@ -1,18 +0,0 @@ -.TL -Lint, a C Program Checker -.AU -Frans Kunst -.AI -Vrije Universiteit -Amsterdam -.AB -This document describes an implementation of a program which -does an extensive consistency and plausibility check on a set -of C program files. -This may lead to warnings which help the programmer to debug -the program, to remove useless code and to improve his style. -The program has been used to test itself and has found -bugs in sources of some heavily used code. -.AE -.LP -.bp diff --git a/doc/lint/appendix_A b/doc/lint/appendix_A deleted file mode 100644 index ce62b34bc..000000000 --- a/doc/lint/appendix_A +++ /dev/null @@ -1,56 +0,0 @@ -.ds -.SH -Appendix A -.LP -.SH -The warnings -.LP -.SH -Pass one warnings -.DS -.ft CW -%s may be used before set -maybe %s used before set -%s unused in function %s -%s set but not used in function %s -argument %s unused in function %s -static [variable, function] %s unused -%s declared extern but never used - -long conversion may lose accuracy -comparison of unsigned with negative constant -unsigned comparison with 0? -degenerate unsigned comparison -nonportable character comparison -possible pointer alignment problem - -%s evaluation order undefined - -null effect -constant in conditional context -use if-else construction -while (0) ? -do ... while (0) ? -[case, default] statement in strange context - -function %s has return(e); and return; -statement not reached -function %s declared %s but no value returned -.ft P -.DE -.SH -Pass two warnings -.DS -.ft CW -%s variable # of args -%s arg %d used inconsistently -%s multiply defined -%s value declared inconsistently -%s used but not defined -%s defined (%s(%d)) but never used -%s declared but never defined -%s value is used but none is returned -%s returns value which is [sometimes, always] ignored -%s also defined in library -.ft P -.DE diff --git a/doc/lint/appendix_B b/doc/lint/appendix_B deleted file mode 100644 index 362dbd2d4..000000000 --- a/doc/lint/appendix_B +++ /dev/null @@ -1,52 +0,0 @@ -.SH -Appendix B -.TL -The Ten Commandments for C Programmers -.AU -Henry Spencer -.IP 1 -Thou shalt run \fIlint\fR frequently and study its pronouncements with -care, for verily its perception and judgement oft exceed thine. -.IP 2 -Thou shalt not follow the NULL pointer, for chaos and madness await thee at -its end. -.IP 3 -Thou shalt cast all function arguments to the expected type if they are not -of that type already, even when thou art convinced that this is unnecessary, -lest they take cruel vengeance upon thee when thou least expect it. -.IP 4 -If thy header files fail to declare the return types of thy library functions, -thou shalt declare them thyself with the most meticulous care, -lest grievous harm befall thy program. -.IP 5 -Thou shalt check the array bounds of all strings (indeed, all arrays), -for surely where thou typest ``foo'' someone someday shall type -``supercalifragilisticexpialidocious''. -.IP 6 -If a function be advertised to return an error code in the event of -difficulties, thou shalt check for that code, yea, even though the checks -triple the size of thy code and produce aches in thy typing fingers, -for if thou thinkest ``it cannot happen to me'', -the gods shall surely punish thee for thy arrogance. -.IP 7 -Thou shalt study thy libraries and strive not to re-invent them without cause, -that thy code may be short and readable and thy days pleasant and productive. -.IP 8 -Thou shalt make thy program's purpose and structure -clear to thy fellow man by using the -One True Brace Style, -even if thou likest it not, -for thy creativity is better used in solving problems than in creating -beautiful new impediments to understanding. -.IP 9 -Thy external identifiers shall be unique in the first six characters, -though this harsh discipline be irksome and the years of its necessity -stretch before thee seemingly without end, -lest thou tear thy hair out and go mad on that fateful day when -thou desirest to make thy program run on an old system. -.IP 10 -Thou shalt foreswear, renounce, and abjure the vile heresy which claimeth -that ``All the world's a VAX'', and have no commerce with the -benighted heathens who cling to this barbarous belief, -that the days of thy program may be long even though the days of thy -current machine be short. diff --git a/doc/lint/chap1 b/doc/lint/chap1 deleted file mode 100644 index 580e9b6b1..000000000 --- a/doc/lint/chap1 +++ /dev/null @@ -1,34 +0,0 @@ -.NH 1 -Introduction -.PP -C [1][2] is a dangerous programming language. -The programmer is allowed to do almost anything, as long as -the syntax of the program is correct. -This has a reason. In this way it is possible to make a fast -compiler which produces fast code. -The compiler will be fast because it doesn't do much checking -at compile time. -The code is fast because the compiler doesn't generate run time -checks. -The programmer should protect himself against producing error -prone code. -One way to do that is to obey the -.I -Ten Commandments for C programmers -.R -[appendix B]. -This document describes an implementation of the -.I lint -program, as referred to in Commandment 1. -It is a common error to run -.I lint -only after a few hours of debugging and some -bug can't be found. -.I Lint -should be run when large pieces of new code are accepted by the -compiler and as soon as bugs arise. -Even for working programs it is useful to run -.I lint, -because it can find constructions that may lead to problems in -the future. -.bp diff --git a/doc/lint/chap2 b/doc/lint/chap2 deleted file mode 100644 index aac7c33bb..000000000 --- a/doc/lint/chap2 +++ /dev/null @@ -1,57 +0,0 @@ -.NH -Outline of the program -.PP -The program can be divided into three parts. A first pass, which -parses C program files and outputs definitions, a second pass which -processes the definitions and a driver, -which feeds the set of files to the first pass and -directs its output to the second pass. Both passes produce the -warnings on standard error output, which are redirected to standard -output by the driver. -.PP -The first pass is based on an existing C front end, called -.I cem -[3]. -.I Cem -is part of the Amsterdam Compiler Kit (ACK), as described in [4]. -.PP -Most of the code of -.I cem -is left unchanged. This has several reasons. A lot of work, which -is done by -.I cem -, must also be done by -.I lint. -E.g. the lexical analysis, the macro expansions, -the parsing part and the semantical analysis. -Only the code generation part is turned off. -An advantage of this approach is, that a person who understands -.I cem -will not have to spend to much time in understanding -.I lint. -.PP -All changes and extensions to -.I cem -can be turned off by not defining the compiler directive -.ft CW -LINT. -.R -Compiling should then result in the original C compiler. -.PP -The second pass is a much less complex program. -It reads simple definitions generated by the first pass and -checks their consistency. -This second pass gives warnings -about wrong usage of function arguments, their results and -about external variables, which are used and defined in more -than one file. -.PP -The driver is a shell program, to be executed by the -.UX -shell -.I sh. -It executes the two passes and let them communicate through a -filter (sort). -Actually it is simplex communication: the first pass only talks to -the second pass through the filter. -.bp diff --git a/doc/lint/chap3 b/doc/lint/chap3 deleted file mode 100644 index 333529c16..000000000 --- a/doc/lint/chap3 +++ /dev/null @@ -1,294 +0,0 @@ -.NH -What lint checks -.NH 2 -Set, used and unused variables -.PP -We make a distinction between two classes of variables: -the class of automatic variables (including register variables) -and the other variables. -The other variables, global variables, static variables, formal -parameters et cetera, are assumed to have a defined value. -Global variables e.g., are initialized by the compiled code at -zeros; formal parameters have a value which is equal to the value -of the corresponding actual parameter. -These variables can be used without explicitly initializing them. -The initial value of automatic variables is undefined (if they are -not initialized at declaration). -These variables should be set before they are used. -A variable is set by -.IP -.RS -.IP 1. -an assignment (including an initialization) -.IP 2. -taking the address -.RE -.PP -The first case is clear. The second case is plausible. -It would take to much effort (if at all possible) to check -if a variable is set through one of its aliases. -Because -.I lint -should not warn about correct constructs, it does this conservative -approach. -Structures (and unions) can also be set by setting at -least one member. -Again a conservative approach. -An array can be set by using its name (e.g. as actual parameter -of a function call). -.I Lint -warns for usage as -.I rvalue -of automatic variables which are not set. -.PP -A variable is used if -.IP -.RS -.IP 1. -it is used as a -.I rvalue -.IP 2 -its address is taken -.IP -Arrays and structures (and unions) are also used if one entry -or one member respectively is used. -.RE -.PP -When a variable is never used in the part of the program where it is -visible, a warning is given. -For variables declared at the beginning of a compound statement, -a check is made at the end of this statement. -For formal parameters a check is made at the end of the function -definition. -At the end of a file this is done for global static definitions. -For external variables a warning can be given when all the files -are parsed. -.NH 2 -Flow of control -.PP -The way -.I lint -keeps track of the flow of control is best explained by means of -an example. -See the program of figure 1. -.KF -.DS B -.ft CW -if (cond) - /* a statement which is executed if cond is true, - * the if-part - */ -else - /* the else-part */ -.DE -.br -.ce -.I -figure\ 1. -.R -.KE -.PP -After evaluation of \f(CWcond\fP, two things can happen. -The if-part is executed or the else-part is executed (but not both). -Variables which are set in the if-part but not in the else-part, -need not be set after the if statement, and vice versa. -.I Lint -detects this and assumes these variables after the if statement to -be \fImaybe set\fR. -(See figure 2.) -.KF -.DS B -.ft CW -int cond; - -main() -{ - int i, j; - - if (cond) { - i = 0; - j = 0; - } - else - use(i); /* i may be used before set */ - use(j); /* maybe j used before set */ -} -.DE -.br -.ce -.I -figure 2. -.R -.KE -.PP -If both the if-part and the else-part are never left (i.e. they -contain an endless loop or a return statement), -.I lint -knows that the if statement is never left too. -Besides the if statement, -.I lint -knows the possible flows of control in while, do, for and -switch statements. -It also detects some endless loops like \f(CWwhile(1)\fP, -\f(CWdo ... while (1)\fP, \f(CWfor (;;)\fP. -.NH 2 -Functions -.PP -Most C compilers will not complain if a function is called with actual -parameters of a different type than the function expects. -Using a function in one file as a function of -type -.I A -while defining it in another file as a function of type -.I B -is also allowed by most compilers. -It needs no explanation that this can lead to serious trouble. -.PP -.I Lint -checks if functions are called with the correct number of arguments, -if the types of the actual parameters correspond with the types of -the formal parameters and if function values are used in a way -consistently with their declaration. -When the result of a function is used, a check is made to see if -the function returns a value. -When a function returns a value, -.I lint -checks if the values of all calls of this function are used. -.NH 2 -Undefined evaluation order -.PP -The semantics of C do not define evaluation orders for some -constructs, which, at first sight, seem well defined. -The evaluation order of the expression -.ft CW -a[i]\ =\ i++; -.R -e.g., is undefined. -It can be translated to something with the semantics of -.ft CW -a[i]\ =\ i; i++; -.R -which is what probably was meant, or -.ft CW -a[i+1]\ =\ i; i++;. -.R -An easier example to explain why, is -.ft CW -j\ =\ a[i]\ +\ i++;. -.R -`\f(CW+\fR' Is a so called -.I commutative -operator (with respect to the evaluation order) , as is `\f(CW=\fR'. -This allows the compiler to choose which term to evaluate first. -It is easy to see, that it makes a difference for the value of -.ft CW -j, -.R -which order is chosen. -The expression -.ft CW -i++ -.R -is said to have -.I -side effects. -.R -It affects the value of -.ft CW -i. -.R -Because this value is used in the other term, this gives a conflict. -.PP -A function call with reference to a variable as argument can have -side effects to. -Therefor, the evaluation order of -.ft CW -i -.R -in the expression -.ft CW -f(&i)\ +\ i -.R -is undefined. -When a function is called with an array as argument, this array -can be affected by the function, because only the address of the -array is passed to the function. -(In Pascal a copy of the array is passed to the function if the -formal parameter is not declared \fIvar\fP.) -So the evaluation order of -.ft CW -a -.R -in the expression -.ft CW -f(a)\ +\ a[0] -.R -is undefined. -This one is not yet detected by -.I lint. -.PP -Global variables can still cause trouble. -If function -.ft CW -f -.R -affects the global variable -.ft CW -i, -.R -the value of the expression -.ft CW -f()\ +\ i -.R -is undefined, because the evaluation order of \f(CWi\fP is undefined. -.PP -The evaluation order of the arguments of a function is not -defined, so the expression -.ft CW -f(i,\ i++) -.R -gives a warning -.ft CW -i evaluation order undefined. -.R -.NH 2 -Pointer alignment problems -.PP -For pointers to objects of different types there are different -alignment restrictions. -On some machines pointers to type char can have both odd and even -values, whereas pointers to type int should contain an even address. -.I Lint -could warn for all pointer conversions. -This is not what -.I lint -does. -.I Lint -assumes that some pointers are more restricted than others, and -that pointers of some types can safely be converted to a pointer -of a less restrictive type. -The order of restriction is as follows (`\(<=' means -`is not more restricted than') : -.PP -.ce -char \(<= short \(<= int \(<= long -.ce -float \(<= double -.NH 2 -Libraries -.PP -C is a small language. -As a matter of fact it has no i/o routines. -To make it a useful language, C is supported by libraries. -These libraries contain functions and variables that can be used by any -C program. -.I Lint -knows some libraries too. -At this moment it knows the `-\fIlc\fR', `-\fIlm\fR' and -`-\fIlcurses\fR' libraries. -The `-\fIlc\fR' library, containing definitions for functions from -chapter two and three of the \s-2UNIX\s+2 programmers manual, is default. -.I Lint -warns for definitions of functions or global variables with the -same name as a function definition in a library. -.bp diff --git a/doc/lint/chap4 b/doc/lint/chap4 deleted file mode 100644 index 009caa3c3..000000000 --- a/doc/lint/chap4 +++ /dev/null @@ -1,979 +0,0 @@ -.NH 1 -How lint checks -.NH 2 -The first pass first pass data structure -.PP -The data structure of -.I cem -is changed a little and some structures have been added. -.NH 3 -The changes -.NH 4 -Idf descriptor -.PP -A member -.ft CW -id_line -.R -is added -to the -.I idf -selector. -This line number is used for some warnings. -.NH 4 -Def descriptor -.PP -The -.I def -selector is extended with the members -.ft CW -df_set -.R and -df_line. -.R -The -.ft CW -df_used -.R -member did exist already, but was only used for code generation. -This usage is eliminated so it can be used by -.I lint. -The meaning of these members should be clear. -.NH 3 -The additions -.NH 4 -Lint_stack_entry descriptor -.DS B -.ft CW -struct lint_stack_entry { - struct lint_stack_entry *next; - struct lint_stack_entry *previous; - short ls_class; - int ls_level; - struct state *ls_current; - union { - struct state *S_if; - struct state *S_end; - struct switch_states switch_state; - } ls_states; -}; -.R -.DE -.PP -Structure to simulate a stacking mechanism. -.IP \f(CWnext\fP 15 -Pointer to the entry on top of this one. -.IP \f(CWprevious\fP -Pointer to the entry beneath this one. -.IP \f(CWls_class\fP -The class of statement this entry belongs to. -Possible classes are \f(CWIF\fP, \f(CWWHILE\fP, \f(CWDO\fP, -\f(CWFOR\fP, \f(CWSWITCH\fP and \f(CWCASE\fP. -.IP \f(CWls_level\fP -The level the corresponding statement is nested. -.IP \f(CWls_current\fP -A pointer to the state descriptor which describes the state -of the function (the state of the automatic variables, if the next -statement can be reached, et cetera) if control passes the -flow of control to the part of the program currently parsed. -The initialization of this state is as follows -.RS -.IP -If \f(CWls_class\fP in [\f(CWIF\fP, \f(CWSWITCH\fP] the state -after parsing the conditional expression. -.IP -If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP] the state -after parsing the code between the brackets. -.IP -If \f(CWls_class\fP in [\f(CWDO\fP, \f(CWCASE\fP] the state at -entrance of the statement after the \f(CWDO\fP or \f(CWCASE\fP -token. -.RE -.IP \f(CWls_states\fP 15 -Union of pointers to state descriptors containing different information -for different values of \f(CWls_class\fP. -.RS -.IP -If \f(CWls_class\fP is \f(CWIF\fP and in case of parsing an else part, -\f(CWls_states.S_if\fP points to the state that is reached after the -if part. -.IP -If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP, \f(CWDO\fP] -then \f(CWls_states.S_end\fP contains a conservative description -of the state of the program after `jumping' -to the end of the statement after the \f(CWWHILE\fP, \f(CWDO\fP -or \f(CWFOR\fP token. -I.e. the state at reaching a break (not inside a switch) or -continue statement. -.IP -If ls_class is \f(CWSWITCH\fP, \f(CWls_states\fP is used as a structure -.DS B -.ft CW -struct switch_states { - struct state S_case; - struct state S_break; -}; -.R -.DE -containing two pointers to state descriptors. -\f(CWls_states.switch_state.S_case\fP contains -a conservative description -of the state of the program after \f(CWcase ... case\fP -parts are parsed. -\f(CWls_states.switch_state.S_break\fP the state after parsing -all the \f(CWcase ... break\fP parts. -The reason for \f(CWls_states.switch_state.default_met\fP should be -self-explanatory. -.IP -In case \f(CWls_class\fP is \f(CWCASE\fP, \f(CWls_states\fP is not used. -.RE -.NH 4 -State descriptor -.DS B -.ft CW -struct state { - struct state *next; - struct auto_def *st_auto_list; - int st_nrchd; - int st_warned; -}; -.R -.DE -.IP \f(CWst_auto_list\fP 15 -Pointer to a list of definitions of the automatic variables whose -scope contain the current position in the program. -.IP \f(CWst_nrchd\fP -True if the next statement can't be reached. -.IP \f(CWst_warned\fP -True if a warning has already been given. -.NH 4 -Auto_def descriptor -.DS B -.ft CW -struct auto_def { - struct auto_def *next; - struct idf *ad_idf; - struct def *ad_def; - int ad_used; - int ad_set; - int ad_maybe_set; -}; -.R -.DE -.IP \f(CWnext\fP 15 -Points to the next auto_definition of the list. -.IP \f(CWad_idf\fP -Pointer to the idf descriptor associated with this auto_definition. -.IP \f(CWad_def\fP -Ditto for def descriptor. -.IP \f(CWad_used\fP -Indicates the state of this automatic variable. -Ditto for \f(CWad_set\fP and \f(CWad_maybe_set\fP. -Only one of \f(CWad_set\fP and \f(CWad_maybe_set\fP may be true. -.NH 4 -Expr_state descriptor -.DS B -.ft CW -struct expr_state { - struct expr_state *next; - struct idf *es_idf; - arith es_offset; - int es_used; - int es_set; -}; -.R -.DE -.PP -This structure is introduced to keep track of which variables, -array entries and structure members (union members) are set -and/or used in evaluating an expression. -.IP \f(CWnext\fP 15 -Pointer to the next descriptor of this list. -.IP \f(CWes_idf\fP -Pointer to the idf descriptor this descriptor belongs to. -.IP \f(CWes_offset\fP -In case of an array, a structure or union, this member contains -the offset the compiler would generate for locating the array -entry or structure/union member. -.IP \f(CWes_used\fP -True if the indicated memory location is used in evaluating the -expression. -.IP \f(CWes_set\fP -Ditto for set. -.NH 4 -Outdef descriptor -.DS B -.ft CW -struct outdef { - int od_class; - char *od_name; - char *od_file; - unsigned int od_line; - int od_nrargs; - struct tp_entry *od_entry; - int od_returns; - struct type *od_type; -}; -.DE -.R -.PP -As structures of this type are not allocated dynamically by a -storage allocator, it contains no next member. -An outdef can be given to to \f(CWoutput_def()\fP to be passed to the -second pass. -Basically this forms the interface with the second pass. -.IP \f(CWod_class\fP 15 -Indicates what kind of definition it is. -Possible classes are \f(CWEFDF\fP, \f(CWEVDF\fP, \f(CWSFDF\fP, -\f(CWSVDF\fP, \f(CWLFDF\fP, \f(CWLVDF\fP, -\f(CWEFDC\fP, \f(CWEVDC\fP, \f(CWIFDC\fP, \f(CWFC\fP, \f(CWVU\fP. -([\f(CWE\fPxternal, \f(CWS\fPtatic, \f(CWL\fPibrary, \f(CWI\fPmplicit] -[\f(CWF\fPunction, \f(CWV\fPariable] -[\f(CWD\fPe\f(CWF\fPinition, \f(CWD\fPe\f(CWC\fPlaration, -\f(CWC\fPall, \f(CWU\fPsage]) -.IP \f(CWod_name\fP -The name of the function or variable. -.IP \f(CWod_file\fP -The file this definition comes from. -.IP \f(CWod_nrargs\fP -If \f(CWod_class\fP is one of \f(CWEFDF\fP, \f(CWSFDF\fP or -\f(CWLFDF\fP, this member contains the -number of arguments this function has. -If the function was preceded by the pseudocomment -\f(CW/*\ VARARGS\ */\fP, -\f(CWod_nrargs\fP gets the value \f(CW-1-n\fP. -.IP \f(CWod_entry\fP -A pointer to a list of \f(CWod_nrargs\fP cells, each containing a -pointer to the type descriptor of an argument. (\f(CW-1-od_nrargs\fP -cells if -\f(CWod_nrargs < 0\fP.) -\f(CWTp_entry\fP is defined as -.DS B -.ft CW -struct tp_entry { - struct tp_entry *next; /* pointer to next cell */ - struct type *te_type; /* an argument type */ -}; -.R -.DE -.IP \f(CWod_returns\fP 15 -For classes \f(CWEFDF\fP, \f(CWSFDF\fP and \f(CWLFDF\fP this -member tells if the function returns an expression or not. -In case \f(CWod_class\fP is \f(CWFC\fP it is true if the value -of the function is used, false otherwise. -For other classes this member is not used. -.IP \f(CWod_type\fP -A pointer to the type of the function or variable defined or -declared. -Not used for classes \f(CWFC\fP and \f(CWVU\fP. -.NH 2 -The first pass checking mechanism -.PP -In the description of the implementation of the pass one -warnings, it is assumed that the reader is familiar with the -\fILLgen\fP parser generator, as described in [6]. -.NH 3 -Used and/or set variables -.PP -To be able to give warnings like -.ft CW -%s used before set -.R -and -.ft CW -%s set but not used in function %s -.R -, there needs to be a way to keep track of the state of a variable. -A first approach to do this was by adding two fields to the -\fIdef\fP selector: -.ft CW -df_set -.R -and -.ft CW -df_used. -.R -While parsing the program, each time an expression was met -this expression was analyzed and the fields of each \fIdef\fP -selector were possibly set during this analysis. -This analysis was done by passing each expression to a -function -.ft CW -lint_expr -.R -, which walks the expression tree in a way similar to the function -\f(CWEVAL\fP in the file \fIeval.c\fP of the original -.I -cem -.R -compiler. -This approach has one big disadvantage: it is impossible to keep -track of the flow of control of the program. -No warning will be given for the program fragment of figure 3. -.KF -.DS B -.ft CW -func() -{ - int i; - - if (cond) - i = 0; - else - use(i); /* i may be used before set */ -} -.I -.DE -.br -.ce -figure\ 3. -.R -.KE -.PP -It is clear that it would be nice having -.I lint -warn for this construction. -.PP -This was done in the second approach. -When there was a choice between two statements, each statement -was parsed with its own copy of the state at entrance of the -.I -choosing statement. -.R -A state consisted of the state of the automatic variables -(including register variables). -In addition to the possibilities of being used and set, -a variable could be \fImaybe set\fP. -These states were passed between the statement parsing routines -using the \fILLgen\fP parameter mechanism. -At the end of a choosing statement, the two states were merged -into one state, which became the state after this statement. -The construction of figure 4 was now detected, but switch -statements still gave problems and continue and break statements -were not understood. -The main problem of a switch statement is, that the closing bracket -(`\f(CW)\fP') has to be followed by a \fIstatement\fP. -The syntax shows no choice of statements, as is the case with -if, while, do and for statements. -Using the \fILLgen\fP parameter mechanism, it is not a trivial -task to parse the different case parts of a switch statement -with the same initial state and to merge the results into one -state. -This observation led to the third and final approach, as described -next. -.PP -Instead of passing the state of the program through the statements -parsing routines using the \fILLgen\fP parameters, a special stack is -introduced, the -.I lint_stack. -When a choosing statement is parsed, an entry is pushed on the stack -containing the information that is needed to keep track of the -state of the program. -Each entry contains a description of the -.I current -state of the program and a field that indicates what part of the -program the parser is currently parsing. -For all the possible choosing statements I describe the actions -to be taken. -.PP -At entrance of an if statement, an entry is pushed on the stack -with the current state being a copy of the current state of the -stack element one below. -The class of this entry is \f(CWIF\fP. -At reaching the else part, the current state is moved to -another place in this stack entry (to \f(CWS_IF\fP), and a new copy -of the current state at entrance of this if statement is made. -At the end of the else part, the two states are merged into -one state, the new current state, and the \f(CWIF\fP entry is -popped from the stack. -If there is no else part, then the state that is reached after -parsing the if part is merged with the current state at entrance -of the if statement into the new current state. -.PP -At entrance of a while statement a \f(CWWHILE\fP entry is pushed -on the stack containing a copy of the current state. -If a continue or break statement is met in the while statement, -the state at reaching this continue or break statement is -merged with a special state in the \f(CWWHILE\fP entry, called -\f(CWS_END\fP. -(If \f(CWS_END\fP did not yet contain a state, the state is copied -to \f(CWS_END\fP.) -At the end of the while statement this \f(CWS_END\fP is merged with the -current state, which result is merged with the state at entrance -of the while statement into the new current state. -.PP -A for statement is treated similarly. -A do statement is treated the same way too, except that \f(CWS_END\fP -isn't merged with the state at entrance of the do statement, -but becomes the new current state. -.PP -For switch statements a \f(CWSWITCH\fP entry is pushed on the stack. -Apart from the current state, this entry contains two other -states, \f(CWS_BREAK\fP and \f(CWS_CASE\fP. -\f(CWS_BREAK\fP initially contains no state, \f(CWS_CASE\fP -initially contains a -copy of the current state at entrance of the switch statement. -After parsing a case label, a \f(CWCASE\fP entry is pushed on the stack, -containing a copy of the current state. -If, after zero or more statements, we meet another case label, -the state at reaching this case label is merged with \f(CWS_CASE\fP -of the \f(CWSWITCH\fP entry below and a new copy of the state -at entrance -of the switch statement is put in the \f(CWCASE\fP entry. -If we meet a break statement, we merge the current state with -\f(CWS_BREAK\fP of the \f(CWSWITCH\fP entry below and pop the -\f(CWCASE\fP entry. -In addition to this, the occurrence of a default statement -inside the switch statement is recorded in the \f(CWSWITCH\fP entry. -At the end of the switch statement we check if we have met a -default statement. -If not, \f(CWS_BREAK\fP is merged with the current state at entrance -of the switch statement. (Because it is possible that no case -label will be chosen.) -Next the \f(CWS_CASE\fP is `special_merged' with \f(CWS_BREAK\fP -into the new current state. -For more details about these merge functions see the sources. -.PP -With the approach described above, -.I lint -is aware of the flow -of control in the program. -There still are some doubtful constructions -.I lint -will not detect and there are some constructions (although rare) -for which -.I lint -gives an incorrect warning (see figure 4). -.KF -.DS B -.ft CW -{ - int i; - - for (;;) { - if (cond) { - i = 0; - break; - } - } - use(i); - /* lint warns: maybe i used before set - * although the fragment is correct - */ -} -.DE -.br -.I -.ce -figure\ 4. -.R -.KE -.PP -A nice advantage of the method is, that the parser stays clear, -i.e. it isn't extended with extra parameters which must pass the -states. -In this way the parser still is very readable and we have a nice -interface with -.I lint -using function calls. -.NH 3 -Undefined evaluation orders -.PP -In expressions the values of some variables are used and some -variables are set. -Of course, the same holds for subexpressions. -The compiler is allowed to choose the order of evaluation of -subexpressions involving a commutative and associative operator -(\f(CW*\fP, \f(CW+\fP, \f(CW&\fP, \f(CW|\fP, \f(CW^\fP), -the comma in a parameter list or an assignment operator. -In section 3.4 it is made clear that this will lead to -statements with ambiguous semantics. -.PP -The way these constructs are detected is rather straight forward. -The function which parses an expression (\f(CWlint_expr\fP) -returns a linked -list containing information telling which variables are set and -which variables are used. -A variable is indicated by its -.I idf -descriptor and an -.I offset. -This offset is needed for discriminating entries of the same -array and members of the same structure or union, so it is -possible to warn about the statement -.ft CW -a[b[0]]\ =\ b[0]++;. -.R -When \f(CWlint_expr\fP meets a commutative operator (with respect to the -evaluation order), it calls itself recursively with the operands -of the operator as expression. -The returned results are checked for undefined evaluation orders -and are put together. -This is done by the function \f(CWcheck_and_merge\fP. -.NH 3 -Useless statements -.PP -Statements which compute a value that is not used, -are said to have a \fInull effect\fP. -Examples are \f(CWx = 2, 3;\fP, \f(CWf() + g();\fP and -\f(CW*p++;\fP. -(\f(CW*\fP and \f(CW++\fP have the same precedence and associate -from right to left.) -.PP -A conditional expression computes a value too. -If this value isn't used, it is better to use an if-else -statement. -So, if -.I lint -sees -.DS B -.ft CW -b ? f() : g(); -.R -.DE -.LP -it warns \f(CWuse if-else construction\fP. -.NH 3 -Not-reachable statements -.PP -The algorithm to detect not-reachable statements (including not -reachable initializations) is as follows. -Statements after a label and a case statement and the compound -statement of a function are always reachable. -Other statements are not-reachable after: -.QS -.RS -.IP - 1 -a goto statement -.IP - -a return statement -.IP - -a break statement -.IP - -a continue statement -.IP - -a switch statement -.IP - -an endless loop (a while, do or for loop with a conditional -which always evaluates to true and without a break statement) -.IP - -an if-else statement of which both if part and else part -end up in a not-reachable state -.IP - -a switch statement of which all \f(CWcase ... break\fP parts -(including -a \f(CWdefault ... break\fP part) end up in a not-reachable state -.IP - -the pseudocomment \f(CW/*\ NOTREACHED\ */\fP -.RE -.QE -.PP -The algorithm is easily implemented using the \f(CWst_nrchd\fP selector -in the -.I state -descriptor. -The \f(CWst_warned\fP selector is used to prevent superfluous warnings. -To detect an endless loop, after a while (), for (..;;..) -and do part the current state of the stack entry beneath the top one -is set to not reached. -If, in the statement following, a break statement is met, this same -state is set to reached. -If the while () part of the do statement is met, this state -is set to reached if doesn't evaluates to true. -The detection of not-reachable statements after a switch statement -is done in a similar way. -In addition it is checked if a default statement isn't met, in -which case the statement after the switch statement can be reached. -The warning \f(CWstatement not reached\fP is not given for compound -statements. -If -.I lint -did, it would warn for the compound statement in a switch statement, -which would be incorrect. -.PP -Not-reachable statements are still interpreted by -.I lint. -I.e. when -.I lint -warns that some statement can't be reached, it assumes this is -not what the programmer really wants and it ignores this fact. -In this way a lot of useless warnings are prevented in the case of -a not-reachable statement. -See figure 5. -.KF -.DS B -.ft CW -{ - int i; - - for (;;) { - /* A loop in which the programmer - * forgot to introduce a conditional - * break statement. - * Suppose i is not used in this part. - */ - } - /* some more code in which i is used */ -} -/* The warning "statement not reached" highlights the bug. - * An additional warning "i unused in function %s" is - * formally correct, but doesn't provide the programmer - * with useful information. - */ -.DE -.I -.ce -figure\ 5. -.R -.KE -.NH 3 -Functions returning expressions and just returning -.PP -Each time a return statement is met, -.I lint -checks if an expression is returned or not. -If a function has a return with expression and a return without -expression, -.I lint -warns -.ft CW -function %s has return(e); and return;. -.R -If the flow of control can -.I -fall through -.R -the end of the compound statement of a function, this indicates -an implicit return statement without an expression. -If the end of the compound statement of the function can be reached, -.I lint -introduces this implicit return statement without expression. -.PP -Sometimes the programmer knows for sure that all case parts inside -a switch statement include all possible cases, so he doesn't -introduce a default statement. -This can lead to an incorrect warning. -Figure 6 shows how to prevent this warning. -.KF -.DS B -.ft CW - func() - { - switch (cond) { - case 0: return(e0); - case 1: return(e1); - } - /* NOTREACHED */ - } -/* no warning: "function func has return(e); and return; */ -.DE -.I -.ce -figure\ 6. -.R -.KE -.PP -The pseudocomment \f(CW/*\ NOTREACHED\ */\fP can also be used to tell -.I lint -that some function doesn't return. See figure 7. -.KS -.DS B -.ft CW - func() - { - switch (cond) { - case 0: return(e0); - case 1: return(e1); - default: error(); /* calls exit or abort */ - /* NOTREACHED */ - } - } -/* no warning: "function func has return(e); and return;" */ -.I -.DE -.ce -figure\ 7. -.R -.KE -.NH 3 -Output definitions for the second pass -.PP -The first pass can only process one program file. -To be able to process a program that spreads over more than one file, -the first pass outputs definitions that are processed by a second -pass. -The format of such a definition is different for different classes: -.PP -For class in {EFDF, SFDF, LFDF} -.DS C -::::::: -.DE -.LP -A negative \fInr of args\fP indicates that the function can be called with -a varying number of arguments. -.PP -For class = FC -.DS C -::::: -.DE -.LP -The \fIvalue is used\fP part can have three meanings: -the value of the function is ignored; -the value of the function is used; -the value of the function is cast to type \fIvoid\fP. -.PP -For other classes -.DS C -:::: -.DE -.LP -Definitions of class VU (Variable Usage) are only output for \fIused\fP -global variables. -.PP -Structure and union types that are output to the intermediate file -are simplified. -(The following occurrences of \fIstructure\fP should be -read as \fIstructure or union\fP and \fIstruct\fP as \fIstruct or -union\fP.) -Structures that are identified by a \fIstructure tag\fP are output -to the intermediate file as \f(CWstruct \fP. -Structures without a structure tag are output as -\f(CWstruct {}\fP with \f(CW\fP a semicolon-separated -list of types of the members of this structure. -An alternative way would be to output the complete structure definition. -However, this gives practical problems. -It is allowed to define some object of a structure type with a -structure tag, without this structure being defined at that place. -The first approach leaves errors, such as in figure 8, undetected. -.KF -.DS B -.ft CW - "a.c" "b.c" - -struct str { struct str { - float f; int i; -} s; }; - -main() func(s) -{ struct str s; - func(s); {} -} -.I -.DE -.ce -figure\ 8. -.R -.KE -.PP -To be able to detect these errors, the first pass should also output -definitions of structure tags. -The example of figure 8 would then get a warning like -.ft CW -structure str defined inconsistently -.R -.PP -More information on these definitions in section 4.3 and 4.4. -.NH 3 -Generating libraries -.PP -.I Lint -knows the library `-lc', `-lm' and `-lcurses'. -If a program uses some other library, it is possible to generate -a corresponding \fIlint library\fP. -To do this, precede all the C source files of this library by -the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP. -Then feed these files one by one to the first pass of -.I lint -collecting the standard output in a file and ignoring the warnings. -The resulting file contains library definitions of the functions -and external variables defined in the library sources, and not more -than that. -If this file is called `llib-l\fIname\fP.ln -.I lint -can be told to search the library by passing it as argument in -the command line `-llib-l\fIname\fP.ln. -The implementation of this feature is simple. -.PP -As soon as the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP is met, -only function and variable definitions are output with class LFDF -and LVDF respectively. -Other definitions, which otherwise would have been output, are -discarded. -.PP -Instead of generating a special lint library file, one can make a -file containing the library definitions and starting with -\f(CW/* LINTLIBRARY */\fP. -This file can then be passed to -.I lint -just by its name. -This method isn't as efficient as the first one. -.NH 3 -Interpreting the pseudocomments -.PP -The interpretation of the pseudocomments is done by the lexical -analyzer, because this part of the program already took care of the -comments. -At first sight this seems very easy: as soon as some pseudocomment -is met, raise the corresponding flag. -Unfortunately this doesn't work. -The lexical analyzer is a \fIone token look ahead scanner\fP. -This causes the above procedure to raise the flags one token too -soon. -A solution to get the right effect is to reserve two flags per -pseudocomment. -The first is set as soon as the corresponding pseudocomment is -scanned. -At the returning of each token this flag is moved to the second flag. -The delay in this way achieved makes the pseudocomments have effect -at the correct place. -.NH 2 -The second pass data structure -.NH 3 -Inp_def descriptor -.DS B -.ft CW -struct inp_def { - struct inp_def *next; - int id_class; - char id_name[NAMESIZE]; - char id_file[FNAMESIZE]; - unsigned int id_line; - int id_nrargs; - char argtps[ARGSTPSSIZE]; - int id_returns; - char id_type[TYPESIZE]; - int id_called; - int id_used; - int id_ignored; - int id_voided; -}; -.R -.DE -.PP -This description is almost similar to the \fIoutdef\fP descriptor as -described in 4.1.2.5. -There are some differences too. -.IP \f(CWnext\fP 15 -As structures of this type are allocated dynamically, this field -is added so the same memory allocator as used in the first pass can be -used. -.LP -\f(CWid_called -.br -id_used -.br -id_ignored\fP -.IP \f(CWid_voided\fP 15 -Some additional fields only used for function definitions.Their -meaning should be clear. -.PP -The other fields have the same meaning as the corresponding fields -in the \fIoutdef\fP descriptor. -Some attention should be paid to \f(CWid_argtps\fP and \f(CWid_type\fP. -These members have type \f(CWarray of char\fP, in contrast to -their counterparts in the \fIoutdef\fP descriptor. -The only operation performed on types is a check on equality. -Types are output by the first pass as a string describing the type. -The type of \f(CWi\fP in \f(CWint *i();\fP e.g. is output as -\f(CWint *()\fP. -Such a string is best put in an \f(CWarray of char\fP to be compared -easily. -.NH 2 -The second pass checking mechanism -.PP -After all the definitions that are output by the first pass are -sorted by name, the definitions belonging to one name are ordered -as follows. -.QS -.RS -.IP - 1 -external definitions -.IP - -static definitions -.IP - -library definitions -.IP - -declarations -.IP - -function calls -.IP - -variable usages -.RE -.QE -.PP -The main program of the second pass is easily explained. -For all different names, do the following. -First read the definitions. -If there is more than one definition, check for conflicts. -Then read the declarations, function calls and variable usages and -check them against the definitions. -After having processed all the declarations, function calls and -variable usages, check the definitions to see if they are used -correctly. -The next three paragraphs will explain the three most important -functions of the program. -.NH 3 -Read_defs() -.PP -This function reads all definitions belonging to the same name. -Only one external definition is allowed, so if there are more, a -warning is given. -In different files it is allowed to define static functions or -variables with the same name. -So if a static function is read, \f(CWread_defs\fP checks if there isn't -already an external definition, and if not it puts the static -definition in the list of static definitions, to be used later. -If no external or static definitions are met, a library definition is -taken as definition. -If a function or a variable is defined with the same name as a function -or a variable in a library (which is allowed) -.I lint -gives a warning. -Of course it is also possible that there is no definition at all. -In that case \f(CWcheck\fP will warn. -.NH 3 -Check() -.PP -\f(CWCheck\fP verifies declarations, function calls and variable -usages against the definitions. -For each of these entries the corresponding definition is looked up. -As there may be more than one static definition, first a static -definition from the same file as the entry is searched. -If not present, the external definition (which may be a library -definition) is taken as definition. -If no definition can be found and the current entry is an external -declaration, -.I lint -warns. -However in the case of an implicit function declaration -.I lint -will not warn, because -we will get a warning \f(CW%s used but not defined\fP later on. -Next a check is done if the declarations are consistent with their -definitions. -After the declarations, the function calls and variable usages are -verified against their corresponding definitions. -If no definition exists, -.I lint -warns. -Else the field \f(CWid_called\fP is set to 1. -(For variable definitions this should be interpreted as \fIused\fP.) -For variable usages this will be all. -If we are processing a function call we also check the number and types -of the arguments and we warn for function values which are used from -functions that don't return a value. -For each function call we administrate if a function value is used, -ignored or voided. -.NH 3 -Check_usage() -.PP -Checks if the external definition and static definitions are used -correctly. -If a function or variable is defined but never used, -.I lint -warns, except for library definitions. -Functions, which return a value but whose value is always or -sometimes ignored, get a warning. -(A function value which is voided (cast to void) is not ignored, -but it isn't used either.) -.bp diff --git a/doc/lint/chap5 b/doc/lint/chap5 deleted file mode 100644 index 28c4f7c30..000000000 --- a/doc/lint/chap5 +++ /dev/null @@ -1,107 +0,0 @@ -.NH 1 -How to make lint shut up -.PP -It can be very annoying having -.I lint -warn about questionable constructs of which the programmer already is -aware. -There should be a mechanism to give -.I lint -some extra information in the source code. -This could be done by introducing some special keywords, which -would have a special meaning to -.I lint. -This is a bad solution, because these keywords would cause existing -C compilers not to work on these programs. -A neater solution is to invent some comments having a special -meaning to -.I lint. -We call these comments -.I pseudocomments. -The pseudocomments have no meaning to existing C compilers, so -compilers will not have to be rewritten for C programs containing -the previously proposed special keywords. -The following pseudocomments are recognized by -.I lint. -.LP -\f(CW/* VARARGS\fIn\fP */\fR -.br -.in 5 -The next function can be called with a variable number of arguments. -Only check the first \fIn\fP arguments. -The \fIn\fP must follow the word \f(CWVARARGS\fP immediately. -This pseudocomment is useful for functions like e.g. printf. -(The definition of the function printf should be preceded by -\f(CW/*\ VARARGS1\ */\fP.) -.in -.LP -\f(CW/* VARARGS */\fP -.br -.in 5 -Means the same as \f(CW/* VARARGS0 */\fP. -.in -.LP -\f(CW/* ARGSUSED */\fP -.br -.in 5 -Don't complain about unused arguments in the next function. -When we are developing a program we sometimes write functions of -which we do not yet use the arguments. -Because we do want to use -.I lint -on these programs, it is nice to have this pseudocomment. -.in -.LP -\f(CW/* NOTREACHED */\fP -.br -.in 5 -.I Lint -makes no attempt to discover functions which never return, -although it \fIis\fP possible to find functions that don't return. -This would require a transitive closure with respect to the already -known \fInot-returning\fP functions; an inacceptable time consuming -process. -To make -.I lint -aware of a function that doesn't return, a call of this function -should be followed by the pseudocomment \f(CW/*\ NOTREACHED\ */\fP. -This pseudocomment can also be used to indicate that some case part -inside a switch (especially a default part) can't be reached. -The above mentioned cases of use of this pseudocomment are -examples. -The comment can be used just to indicate that some part of the -program can't be reached. -It sometimes is necessary to introduce an extra compound statement -to get the right effect. -See figure 9. -.KF -.DS B -.ft CW - if (cond) - /* if part */ ; - else { - error(); /* doesn't return */ - /* NOTREACHED */ - } -/* Without the compound else part, lint would assume - * the statement after the if statement to be NOTREACHED, - * instead of the end of the else part. - */ -.I -.DE -.ce -figure\ 9. -.R -.KE -.in -.LP -\f(CW/* LINTLIBRARY */\fP -.br -.in 5 -All definitions following this comment are assumed to be library -definitions. -It shuts off complaints about unused functions and variables. -See also section 4.2.7 for how to use this comment for generating -lint libraries. -.in -.bp diff --git a/doc/lint/chap6 b/doc/lint/chap6 deleted file mode 100644 index 6ba75b309..000000000 --- a/doc/lint/chap6 +++ /dev/null @@ -1,107 +0,0 @@ -.NH 1 -User options -.PP -.I Lint -recognizes the following command line flags. -Some of them are identical to the flags of -.I cem. -.I Lint -warns for flags it doesn't know. -.LP -\f(CW-D -.br --D=\fP -.br -.in 5 -Causes \f(CW\fP to be defined as a macro. -The first form is equivalent to `\f(CW-D=1\fP'. -The second form is equivalent to putting `\f(CW#define \fP' -in front of all the source files. -.in -.LP -\f(CW-U\fP -.br -.in 5 -Acts as if the line `\f(CW#undef \fP' is put in front of all -the source files. -.in -.LP -\f(CW-I\fP -.br -.in 5 -This puts \f(CW\fP in the include directory -list. -.in -.LP -\f(CW-R\fP -.br -.in 5 -Turn off the `strict' option. -Default -.I lint -checks the program according to the Reference Manual, because this -gives a definition of the language with which there is a better chance -of writing portable programs. -With this flag on, some constructs, otherwise not allowed, are -accepted. -.in -.LP -\f(CW-l -.br --llib-l.ln -.br --l\fP -.br -.in 5 -`\f(CW-l\fP' tells -.I lint -to search the lint library -\f(CWllib-l.ln\fP for missing -definitions of functions and variables. -The option `\f(CW-llib-l.ln\fP' makes -.I lint -search the lint library file \f(CWllib-l.ln\fP in the current -directory for missing definitions. -Default is `\f(CW-lc\fP'; this default can be suppressed by -`\f(CW-l\fP'. -.in -.LP -\f(CW-a\fP -.br -.in 5 -Warn for conversions from integer to long and vice versa. -.in -.LP -\f(CW-b\fP -.br -.in 5 -Don't report not-reachable break statements. -This flag is useful for running -.I lint -on a \fIlex\fP- or \fIyacc\fP-generated source file. -.in -.LP -\f(CW-h\fP -.br -.in 5 -Check for useless statements and possible pointer alignment problems. -.in -.LP -\f(CW-n\fP -.br -.in 5 -Don't complain about unused and undefined functions and variables. -.in -.LP -\f(CW-v\fP -.br -.in 5 -Don't warn about unused arguments of functions. -.in -.LP -\f(CW-x\fP -.br -.in 5 -Complain about unused external variables. -.in -.bp diff --git a/doc/lint/chap7 b/doc/lint/chap7 deleted file mode 100644 index d224f1fdb..000000000 --- a/doc/lint/chap7 +++ /dev/null @@ -1,139 +0,0 @@ -.NH -Ideas for further development -.PP -Although the program in its current state is a useful program, -there are still a lot of features that should be implemented -in following versions. -I'll summarize them in this section. -.IP \(bu -Actually the program consists of three passes. -The filter -.I sort -is a complete pass, just as the first and the second pass. -I think we speed up the program by removing the filter and making -the second pass accept an unsorted file. -The sorting process can be done in parallel to the first pass if -both processes communicate through a pipe. -In addition to this sorting, the second pass can generate already -some warnings. -(Warnings like \f(CW%s defined but never used\fP can only be -generated after having processed all the input.) -These warnings generated in parallel to the warnings of the first pass, -should be sent to an intermediate file, otherwise the warnings would -get messed up. -Such an improvement will have best effect on a multi processing -machine, but even on single processing machines this will give a better -performance. (On a single processing machine the pipe should be -replaced by an intermediate file.) -.IP \(bu -Expressions could be classified so -.I lint -can warn for some classes of expressions in strange contexts. -Suppose as class . -\f(CWb\fP Will be of class if e.g. \f(CWb\fP is assigned to -the expression \f(CW || \fP. -The following expression should then give a warning -.DS B -.ft CW -b + i; /* weird expression */ -.R -.DE -.IP \(bu -A mechanism to check printf like routines. -This mechanism should verify the format string against the following -arguments. -There is a public domain program that can be used to do this job. -It is called printfck and should be used as a filter between the -source files and -.I lint. -.IP \(bu -Raise warnings for incomplete initializer lists like -.DS B -.ft CW -int a[10] = {0, 1, 2}; -/* initializer list not complete */ -.R -.DE -.IP \(bu -Warnings for constructs like -.DS B -.ft CW -for (i = 0; i < 10; i++) { - . . . . - i--; - /* loop control variable affected */ - . . . . -} -.R -.DE -and -.DS B -.ft CW -while (var) { - /* statements in which the value - * of var is never changed - */ -} -/* loop control variable not updated */ -.R -.DE -.IP \(bu -A warning \f(CWbad layout\fP for program fragments like -.DS B -.ft CW -if (cond1) - if (cond2) - statement(); -else /* bad layout */ - statement(); -.R -.DE -.IP \(bu -A warning \f(CWassignment in conditional context\fP in case of -.DS B -.ft CW -if (a = b) -.R -.DE -.IP -The programmer probably meant \f(CWif (a == b)\fP. -No warning should be given for \f(CWif ((a = b) != c)\fP, -nor for \f(CWif ((a = b))\fP. -.IP \(bu -Warnings for empty statements in strange contexts, like -.DS B -.ft CW -if (cond); /* mistake */ - statement(); -.R -.DE -.IP -(This mistake would also be detected by a warning \f(CWbad layout\fP.) -.IP \(bu -A mechanism to prevent the warning \f(CWpossible pointer alignment -problem\fP for functions of which the programmer already knows that -no problem will arise. -E.g. for functions like malloc and family. -.IP \(bu -The current version of -.I lint -warns for conversions from long to int (if -a flag is -on). -It even warns if the programmer used the proper cast, as e.g. -.DS B -.ft CW -int i; -long l = 0L; - -i = (int)l; -.R -.DE -.IP -In this case I think -.I lint -need not warn. -The explicit cast indicates that the programmer knows what he is -doing. -This feature is not implemented because the expression tree doesn't -show if the cast was implicit or explicit. -.bp diff --git a/doc/lint/chap8 b/doc/lint/chap8 deleted file mode 100644 index eac0ea6b3..000000000 --- a/doc/lint/chap8 +++ /dev/null @@ -1,56 +0,0 @@ -.NH 1 -Testing the program -.PP -There is no test-suite for testing -.I lint. -I have written a lot of small files that each test one -particular property of the program. -At this moment there are about 220 test programs. -.PP -It would take a lot of time and effort to run these tests by hand. -To ease this work I wrote a program that runs these tests -automatically. -The test program (the program that runs the tests) needs, associated -with each .c file, a .w file, containing from each expected warning -a substring. E.g. when the following warnings should be given by -.I lint: -.DS B -.ft CW - file t.c, line 3, i evaluation order undefined - file t.c, line 6, a set but not used in function main -.R -.DE -it is sufficient to write a file \f(CWt.w\fP containing -.DS B -.ft CW - a set but not used in function main - i evaluation order undefined -.R -.DE -The test program is called with all the .c files to be tested -as arguments. -.PP -Sometimes it is necessary to test -.I lint -on two files. -The test program runs -.I lint -on two files when two consecutive -arguments are of the form \fIname\fPa.c and \fIname\fPb.c. -It then compares the output of -.I lint -with the file \fIname\fP.w. -.PP -.I Lint -is also tested by running it on existing programs. -.I Lint -has been run on some \s-2UNIX\s+2 utility programs in -/usr/src/cmd, on Unipress Emacs (consisting of more than 30,000 -lines of code) and the program itself. -Bugs have been found in e.g. /usr/src/cmd/cat.c and -/usr/src/cmd/ld.c. -To test the robustness of the program, it was run on the -password file /etc/passwd and on `mixed' C program files. -These mixed C program files are C program files that were -broken in chunks and then put together in a different order. -.bp diff --git a/doc/lint/chap9 b/doc/lint/chap9 deleted file mode 100644 index fca2bb637..000000000 --- a/doc/lint/chap9 +++ /dev/null @@ -1,48 +0,0 @@ -.NH 1 -References -.IP [1] -Dennis M. Ritchie, -.I -C Reference Manual, -.R -Bell Laboratories, -Murray Hill, -New Jersey, -1978. -.IP [2] -B.W. Kernighan and D.M. Ritchie, -.I -The C Programming Language, -.R -Prentice Hall, -1978. -.IP [3] -Eric H. Baalbergen, Dick Grune, Maarten Waage, -.I -The CEM Compiler, -.R -Manual IM-4, Vrije Universiteit, Amsterdam, -1985. -.IP [4] -Andrew S. Tanenbaum et al., -.I -A practical tool kit for making portable compilers, -.R -Comm. ACM, -Sep. 1983. -.IP [5] -S. C. Johnson, -.I -Lint, a C program verifier, -.R -Bell Laboratories, -Murray Hill, -New Jersey, -1978. -.IP [6] -Dick Grune, Ceriel J. H. Jacobs, -.I -A Programmer-friendly LL(1) Parser Generator, -.R -IR 127, Vrije Universiteit, Amsterdam, -1987. diff --git a/doc/lint/contents b/doc/lint/contents deleted file mode 100644 index 93538fe71..000000000 --- a/doc/lint/contents +++ /dev/null @@ -1,59 +0,0 @@ -.DS - - - - - - -.DE -.SH -Contents -.R -.sp 1 -.IP 1. -Introduction -.IP 2. -Outline of the program -.IP 3. -What lint checks -.RS -.IP 3.1 -Set, used and unused variables -.IP 3.2 -Flow of control -.IP 3.3 -Functions -.IP 3.4 -Undefined evaluation order -.IP 3.5 -Pointer alignment problems -.IP 3.6 -Libraries -.RE -.IP 4. -How lint checks -.RS -.IP 4.1 -The first pass data structure -.IP 4.2 -The first pass checking mechanism -.IP 4.3 -The second pass data structure -.IP 4.4 -The second pass checking mechanism -.RE -.IP 5. -How to make lint shut up -.IP 6. -User options -.IP 7. -Ideas for further development -.IP 8. -Testing the program -.IP 9. -References -.LP -Appendix A \- The warnings -.br -Appendix B \- The Ten Commandments for C programmers -.bp diff --git a/doc/lint/frontpage b/doc/lint/frontpage deleted file mode 100644 index 7ac226657..000000000 --- a/doc/lint/frontpage +++ /dev/null @@ -1,14 +0,0 @@ -.TL -.sp 5 -Lint, a C Program Checker -.AU -Frans Kunst -.AI -Vrije Universiteit -Amsterdam -.LP -.sp 8 -.ce -Afstudeer verslag -.ce -18 mei 1988 diff --git a/doc/lint/proto.make b/doc/lint/proto.make deleted file mode 100644 index c8e589da7..000000000 --- a/doc/lint/proto.make +++ /dev/null @@ -1,24 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/lint - -FP = $(SRC_DIR)/frontpage - -DOC = $(SRC_DIR)/abstract \ - $(SRC_DIR)/contents \ - $(SRC_DIR)/chap1 \ - $(SRC_DIR)/chap2 \ - $(SRC_DIR)/chap3 \ - $(SRC_DIR)/chap4 \ - $(SRC_DIR)/chap5 \ - $(SRC_DIR)/chap6 \ - $(SRC_DIR)/chap7 \ - $(SRC_DIR)/chap8 \ - $(SRC_DIR)/chap9 \ - $(SRC_DIR)/appendix_A \ - $(SRC_DIR)/appendix_B - -$(TARGET_HOME)/doc/lint.doc: $(FP) $(DOC) - cat $(FP) $(DOC) > $(TARGET_HOME)/doc/lint.doc diff --git a/doc/m2ref.doc b/doc/m2ref.doc deleted file mode 100644 index 261db7219..000000000 --- a/doc/m2ref.doc +++ /dev/null @@ -1,545 +0,0 @@ -.\" $Id$ -.\" troff -ms m2ref.doc -.TL -The ACK Modula-2 Compiler -.AU -Ceriel J.H. Jacobs -.AI -Department of Mathematics and Computer Science -Vrije Universiteit -Amsterdam -The Netherlands -.AB no -.AE -.NH -Introduction -.PP -This document describes the implementation-specific features of the -ACK Modula-2 compiler. -It is not intended to teach Modula-2 programming. -For a description of the Modula-2 language, -the reader is referred to [1]. -.PP -The ACK Modula-2 compiler is currently available for use with the VAX, -Motorola MC68020, -Motorola MC68000, -PDP-11, -and Intel 8086 code-generators. -For the 8086, -MC68000, -and MC68020, -floating point emulation is used. -This is made available with the \fI-fp\fP -option, -which must be passed to \fIack\fP[4,5]. -.NH -The language implemented -.PP -This section discusses the deviations from the Modula-2 language as described -in the "Report on The Programming Language Modula-2", -as it appeared in [1], -from now on referred to as "the Report". -Also, -the Report sometimes leaves room for interpretation. -The section numbers -mentioned are the section numbers of the Report. -.NH 2 -Syntax (section 2) -.PP -The syntax recognized is that of the Report, -with some extensions to -also recognize the syntax of an earlier definition, -given in [2]. -Only one compilation unit per file is accepted. -.NH 2 -Vocabulary and Representation (section 3) -.PP -The input "\f(CW10..\fP" is parsed as two tokens: "\f(CW10\fP" and "\f(CW..\fP". -.PP -The empty string \f(CW""\fP has type -.DS -.ft CW -ARRAY [0 .. 0] OF CHAR -.ft P -.DE -and contains one character: \f(CW0C\fP. -.PP -When the text of a comment starts with a '\f(CW$\fP', -it may be a pragma. -Currently, -the following pragmas exist: -.DS -.ft CW -(*$F (F stands for Foreign) *) -(*$R[+|-] (Runtime checks, on or off, default on) *) -(*$A[+|-] (Array bound checks, on or off, default off) *) -(*$U (Allow for underscores within identifiers) *) -.ft P -.DE -The Foreign pragma is only meaningful in a \f(CWDEFINITION MODULE\fP, -and indicates that this -\f(CWDEFINITION MODULE\fP describes an interface to a module written in another -language (for instance C, -Pascal, -or EM). -Runtime checks that can be disabled are: -range checks, -\f(CWCARDINAL\fP overflow checks, -checks when assigning a \f(CWCARDINAL\fP to an \f(CWINTEGER\fP and vice versa, -and checks that \f(CWFOR\fP-loop control-variables are not changed -in the body of the loop. -Array bound checks can be enabled, -because many EM implementations do not -implement the array bound checking of the EM array instructions. -When enabled, -the compiler generates a check before generating an -EM array instruction. -Even when underscores are enabled, -they still may not start an identifier. -.PP -Constants of type \f(CWLONGINT\fP are integers with a suffix letter \f(CWD\fP -(for instance \f(CW1987D\fP). -Constants of type \f(CWLONGREAL\fP have suffix \f(CWD\fP if a scale factor is missing, -or have \f(CWD\fP in place of \f(CWE\fP in the scale factor (f.i. \f(CW1.0D\fP, -\f(CW0.314D1\fP). -This addition was made, -because there was no way to indicate long constants, -and also because the addition was made in Wirth's newest Modula-2 compiler. -.NH 2 -Declarations and scope rules (section 4) -.PP -Standard identifiers are considered to be predeclared, -and valid in all -parts of a program. -They are called \fIpervasive\fP. -Unfortunately, -the Report does not state how this pervasiveness is accomplished. -However, -page 87 of [1] states: "Standard identifiers are automatically -imported into all modules". -Our implementation therefore allows -redeclarations of standard identifiers within procedures, -but not within -modules. -.NH 2 -Constant expressions (section 5) -.PP -Each operand of a constant expression must be a constant: -a string, -a number, -a set, -an enumeration literal, -a qualifier denoting a -constant expression, -a type transfer with a constant argument, -or one of the standard procedures -\f(CWABS\fP, -\f(CWCAP\fP, -\f(CWCHR\fP, -\f(CWLONG\fP, -\f(CWMAX\fP, -\f(CWMIN\fP, -\f(CWODD\fP, -\f(CWORD\fP, -\f(CWSIZE\fP, -\f(CWSHORT\fP, -\f(CWTSIZE\fP, -or \f(CWVAL\fP, -with constant argument(s); -\f(CWTSIZE\fP and \f(CWSIZE\fP may also have a variable as argument. -.PP -Floating point expressions are never evaluated compile time, -because the compiler basically functions as a cross-compiler, -and thus cannot -use the floating point instructions of the machine on which it runs. -Also, -\f(CWMAX(REAL)\fP and \f(CWMIN(REAL)\fP are not allowed. -.NH 2 -Type declarations (section 6) -.NH 3 -Basic types (section 6.1) -.PP -The type \f(CWCHAR\fP includes the ASCII character set as a subset. -Values range from -\f(CW0C\fP to \f(CW377C\fP, -not from \f(CW0C\fP to \f(CW177C\fP. -.NH 3 -Enumerations (section 6.2) -.PP -The maximum number of enumeration literals in any one enumeration type -is \f(CWMAX(INTEGER)\fP. -.NH 3 -Record types (section 6.5) -.PP -The syntax of variant sections in [1] is different from the one in [2]. -Our implementation recognizes both, -giving a warning for the older one. -However, -see section 3. -.NH 3 -Set types (section 6.6) -.PP -The only limitation imposed by the compiler is that the base type of the -set must be a subrange type, -an enumeration type, -\f(CWCHAR\fP, -or \f(CWBOOLEAN\fP. -So, -the lower bound may be negative. -However, -if a negative lower bound is used, -the compiler gives a warning of the \fIrestricted\fP class (see the manual -page of the compiler). -.PP -The standard type \f(CWBITSET\fP is defined as -.DS -.ft CW -TYPE BITSET = SET OF [0 .. 8*SIZE(INTEGER)-1]; -.ft P -.DE -.NH 2 -Expressions (section 8) -.NH 3 -Operators (section 8.2) -.NH 4 -Arithmetic operators (section 8.2.1) -.PP -The Report does not specify the priority of the unary -operators \f(CW+\fP or \f(CW-\fP: -It does not specify whether -.DS -.ft CW -- 1 + 1 -.ft P -.DE -means -.DS -.ft CW -- (1 + 1) -.ft P -.DE -or -.DS -.ft CW -(-1) + 1 -.ft P -.DE -I have seen some compilers that implement the first alternative, -and others that implement the second. -Our compiler implements the second, -which is suggested by the fact that their priority is not specified, -which might indicate that it is the same as that of their binary counterparts. -And then the rule about left to right decides for the second. -On the other hand one might argue that, -since the grammar only allows for one unary operator in a simple expression, -it must apply to the whole simple expression, -not just the first term. -.NH 2 -Statements (section 9) -.NH 3 -Assignments (section 9.1) -.PP -The Report does not define the evaluation order in an assignment. -Our compiler certainly chooses an evaluation order, -but it is explicitly left undefined. -Therefore, -programs that depend on it may cease to work later. -.PP -The types \f(CWINTEGER\fP and \f(CWCARDINAL\fP are assignment-compatible with -\f(CWLONGINT\fP, -and \f(CWREAL\fP is assignment-compatible with \f(CWLONGREAL\fP. -.NH 3 -Case statements (section 9.5) -.PP -The size of the type of the case-expression must be less than or equal to -the word-size. -.PP -The Report does not specify what happens if the value of the case-expression -does not occur as a label of any case, -and there is no \f(CWELSE\fP-part. -In our implementation, -this results in a runtime error. -.NH 3 -For statements (section 9.8) -.PP -The Report does not specify the legal types for a control variable. -Our implementation allows the basic types (except \f(CWREAL\fP), -enumeration types, -and subranges. -A runtime warning is generated when the value of the control variable -is changed by the statement sequence that forms the body of the loop, -unless runtime checking is disabled. -.NH 3 -Return and exit statements (section 9.11) -.PP -The Report does not specify which result-types are legal. -Our implementation allows any result type. -.NH 2 -Procedure declarations (section 10) -.PP -Function procedures must exit through a RETURN statement, -or a runtime error occurs. -.NH 3 -Standard procedures (section 10.2) -.PP -Our implementation supports \f(CWNEW\fP and \f(CWDISPOSE\fP -for backwards compatibility, -but issues warnings for their use. -However, -see section 3. -.PP -Also, -some new standard procedures were added, -similar to the new standard procedures in Wirth's newest compiler: -.IP \- -\f(CWLONG\fP converts an argument of type \f(CWINTEGER\fP or \f(CWREAL\fP to the -types \f(CWLONGINT\fP or \f(CWLONGREAL\fP. -.IP \- -\f(CWSHORT\fP performs the inverse transformation, -without range checks. -.IP \- -\f(CWFLOATD\fP is analogous to \f(CWFLOAT\fP, -but yields a result of type -\f(CWLONGREAL\fP. -.IP \- -\f(CWTRUNCD\fP is analogous to \f(CWTRUNC\fP, -but yields a result of type -\f(CWLONGINT\fP. -.NH 2 -System-dependent facilities (section 12) -.PP -The type \f(CWBYTE\fP is added to the \f(CWSYSTEM\fP module. -It occupies a storage unit of 8 bits. -\f(CWARRAY OF BYTE\fP has a similar effect to \f(CWARRAY OF WORD\fP, -but is safer. -In some obscure cases the \f(CWARRAY OF WORD\fP mechanism does not quite -work properly. -.PP -The procedure \f(CWIOTRANSFER\fP is not implemented. -.NH 1 -Backwards compatibility -.PP -Besides recognizing the language as described in [1], -the compiler recognizes most of the language described in [2], -for backwards compatibility. -It warns the user for old-fashioned -constructions (constructions that [1] does not allow). -If the \fI-Rm2-3\fP option (see [6]) is passed to \fIack\fP, -this backwards compatibility feature is disabled. -Also, -it may not be present on some -smaller machines, -like the PDP-11. -.NH 1 -Compile time errors -.PP -The compile time error messages are intended to be self-explanatory, -and not listed here. -The compiler also sometimes issues warnings, -recognizable by a warning-classification between parentheses. -Currently, -there are 3 classifications: -.IP "(old-fashioned use)" -.br -These warnings are given on constructions that are not allowed by [1], -but are allowed by [2]. -.IP (strict) -.br -These warnings are given on constructions that are supported by the -ACK Modula-2 compiler, -but might not be supported by others. -Examples: functions returning structured types, -SET types of subranges with -negative lower bound. -.IP (warning) -.br -The other warnings, -such as warnings about variables that are never assigned, -never used, -etc. -.NH 1 -Runtime errors -.PP -The ACK Modula-2 compiler produces code for an EM machine as defined in [3]. -Therefore, -it depends on the implementation -of the EM machine for detection some of the runtime errors that could occur. -.PP -The \fITraps\fP module enables the user to install his own runtime -error handler. -The default one just displays what happened and exits. -Basically, -a trap handler is just a procedure that takes an INTEGER as -parameter. -The INTEGER is the trap number. -This INTEGER can be one of the -EM trap numbers, -listed in [3], -or one of the numbers listed in the -\fITraps\fP definition module. -.PP -The following runtime errors may occur: -.IP "array bound error" -.br -The detection of this error depends on the EM implementation. -.IP "range bound error" -.br -Range bound errors are always detected, -unless runtime checks are disabled. -.IP "set bound error" -.br -The detection of this error depends on the EM implementation. -The current implementations detect this error. -.IP "integer overflow" -.br -The detection of this error depends on the EM implementation. -.IP "cardinal overflow" -.br -This error is detected, -unless runtime checks are disabled. -.IP "cardinal underflow" -.br -This error is detected, -unless runtime checks are disabled. -.IP "real overflow" -.br -The detection of this error depends on the EM implementation. -.IP "real underflow" -.br -The detection of this error depends on the EM implementation. -.IP "divide by 0" -.br -The detection of this error depends on the EM implementation. -.IP "divide by 0.0" -.br -The detection of this error depends on the EM implementation. -.IP "undefined integer" -.br -The detection of this error depends on the EM implementation. -.IP "undefined real" -.br -The detection of this error depends on the EM implementation. -.IP "conversion error" -.br -This error occurs when assigning a negative value of type INTEGER to a -variable of type CARDINAL, -or when assigning a value of CARDINAL that is > MAX(INTEGER), -to a variable of type INTEGER. -It is detected, -unless runtime checking is disabled. -.IP "stack overflow" -.br -The detection of this error depends on the EM implementation. -.IP "heap overflow" -.br -The detection of this error depends on the EM implementation. -Might happen when ALLOCATE fails. -.IP "case error" -.br -This error occurs when non of the cases in a CASE statement are selected, -and the CASE statement has no ELSE part. -The detection of this error depends on the EM implementation. -All current EM implementations detect this error. -.IP "stack size of process too large" -.br -This is most likely to happen if the reserved space for a coroutine stack -is too small. -In this case, -increase the size of the area given to -\f(CWNEWPROCESS\fP. -It can also happen if the stack needed for the main -process is too large and there are coroutines. -In this case, -the only fix is to reduce the stack size needed by the main process, -f.i. by avoiding local arrays. -.IP "too many nested traps + handlers" -.br -This error can only occur when the user has installed his own trap handler. -It means that during execution of the trap handler another trap has occurred, -and that several times. -In some cases, -this is an error because of overflow of some internal tables. -.IP "no RETURN from function procedure" -.br -This error occurs when a function procedure does not return properly -("falls" through). -.IP "illegal instruction" -.br -This error might occur when floating point operations are used on an -implementation that does not have floating point. -.PP -In addition, -some of the library modules may give error messages. -The \fBTraps\fP-module has a suitable mechanism for this. -.NH 1 -Calling the compiler -.PP -See [4,5,6] for a detailed explanation. -.PP -The compiler itself has no version checking mechanism. -A special linker -would be needed to do that. -Therefore, -a makefile generator is included [7]. -.NH 1 -The procedure call interface -.PP -Parameters are pushed on the stack in reversed order, -so that the EM AB -(argument base) register indicates the first parameter. -For VAR parameters, -its address is passed, -for value parameters its value. -The only exception to this rule is with conformant arrays. -For conformant arrays, -the address is passed, -and an array descriptor is -passed. -The descriptor is an EM array descriptor. -It consists of three -fields: the lower bound (always 0), -upper bound - lower bound, -and the size of the elements. -The descriptor is pushed first. -If the parameter is a value parameter, -the called routine must make sure -that its value is never changed, -for instance by making its own copy -of the array. -The Modula-2 compiler does exactly this. -.PP -When the size of the return value of a function procedure is larger than -the maximum of \f(CWSIZE(LONGREAL)\fP and twice the pointer-size, -the caller reserves this space on the stack, -above the parameters. -Callee then stores -its result there, -and returns no other value. -.NH 1 -References -.IP [1] -Niklaus Wirth, -.I -Programming in Modula-2, third, corrected edition, -.R -Springer-Verlag, Berlin (1985) -.IP [2] -Niklaus Wirth, -.I -Programming in Modula-2, -.R -Stringer-Verlag, Berlin (1983) -.IP [3] -A.S.Tanenbaum, J.W.Stevenson, Hans van Staveren, E.G.Keizer, -.I -Description of a machine architecture for use with block structured languages, -.R -Informatica rapport IR-81, Vrije Universiteit, Amsterdam -.IP [4] -UNIX manual \fIack\fP(1) -.IP [5] -UNIX manual \fImodula-2\fP(1) -.IP [6] -UNIX manual \fIem_m2\fP(6) -.IP [7] -UNIX manual \fIm2mm\fP(1) diff --git a/doc/m68020.doc b/doc/m68020.doc deleted file mode 100644 index daeb464d8..000000000 --- a/doc/m68020.doc +++ /dev/null @@ -1,1408 +0,0 @@ -.nr PS 11 -.nr VS 13p -.EQ -delim @@ -.EN -.EQ -gfont R -.EN -.ND -.RP -.TL -A back end table for the Motorola MC68000, MC68010 and MC68020 microprocessors -.AU -Frank Doodeman -.AB -A back end table is part of the Amsterdam Compiler Kit (ACK). It is used -to produce the actual back end, a program that translates the intermediate -language family EM to assembly language for some target machine. The table -discussed here can be used for two back ends, suitable for in total three -machines: the MC68000 and MC68010 (the difference between these two is -so small that one back end table can be used for either one), or -for the MC68020. -.AE -.NH -Introduction -.PP -To simplify the task of producing portable (cross) compilers and interpreters -the Vrije Universiteit designed an integrated collection of programs, the -Amsterdam Compiler Kit (ACK) [2]. It is based on the old UNCOL idea [1] which -attempts to solve the problem of how to make a compiler for each of @ N @ -languages on @ M @ different machines without having to write @ N times M @ -programs. -.PP -The UNCOL approach is to write @ N @ -.I -front ends, -.R -which translate the -source language into a common intermediate language UNCOL (Universal Computer -Oriented Language), and @ M @ -.I -back ends, -.R -each of which translates programs in -UNCOL into a specific machine language. Under these conditions only @ M + N @ -programs must be written to provide all @ N @ languages on all @ M @ -machines, instead of @ M times N @ programs. -.PP -The intermediate language for the Amsterdam Compiler Kit is the machine language -for a simple stack machine called EM (Encoding Machine) [3]. So a back end for -the MC68020 translates EM code into MC68020 assembly language. Writing such a -table [4] suffices to get the back end. -.PP -The back end is a single program that is driven by a machine dependent driving -table. This table, the back end table, defines the mapping of EM code to -the MC68000, MC68010 or MC68020 assembly language. -.NH -The MC68000 and MC68020 micro processors -.PP -In this document the name MC68000 will be used for both the MC68000 and the -MC68010 micro processors, because as far as the back end table is concerned -there is no difference between them. For a complete and detailed description -of the MC68020 one is referred to [5]; for the MC68000 one might also use [6]. -In this section some relevant parts will be handled. -.NH 2 -Registers -.PP -Both the MC68000 and the MC68020 have eight 32-bit data registers (@ D sub 0 @-@ D sub 7 @) that can -be used for byte (8-bit), word (16-bit) and long word (32-bit) data operations. -They also have seven 32-bit address registers (@ A sub 0 @-@ A sub 6 @) that may be used as -software stack pointers and base address registers; address register @ A sub 7 @ is -used as the system stack pointer. Address registers may also be used for -word and long word address operations. -.NH 2 -Addressing modes -.PP -First the MC68000 addressing modes will be discussed. Since the MC68020's -set of addressing modes is an extension of the MC68000's set, of course this -section also applies to the MC68020. -.PP -In the description we use: -.IP @ A sub n @ -for address register; -.IP @ D sub n @ -for data register; -.IP @ R sub n @ -for address or data register; -.IP @ X sub n @ -for index register (either data or address register); -.IP @ PC @ -for program counter; -.IP @ d sub 8 @ -for 8 bit displacement integer; -.IP @ d sub 16 @ -for 16 bit displacement integer; -.IP @ bd @ -for base displacement (may be null, word or long); -.IP @ od @ -for outer displacement (may be null, word or long). -.NH 3 -General addressing modes -.NH 4 -Register Direct Addressing -.IP Syntax: 8 -@ R sub n @ -.PP -This addressing mode (it can be used with either a data register or an address -register) specifies that the operand is in one of -the 16 multifunction registers. -.NH 4 -Address Register Indirect -.IP Syntax: 8 -@ ( A sub n ) @ -.PP -The address of the operand is in the address register specified. -.NH 4 -Address Register Indirect With Postincrement -.IP Syntax: 8 -@ ( A sub n )+ @ -.PP -The address of the operand is in the address register specified. After the -operand address is used, the address register is incremented by one, two or -four depending upon whether the size of the operand is byte, word or long. -If the address register is the stack pointer and the operand size is byte, the -address register is incremented by two rather than one to keep the stack pointer -on a word boundary. -.NH 4 -Address Register Indirect With Predecrement -.IP Syntax: 8 -@ -( A sub n ) @ -.PP -The address of the operand is in the address register specified. Before the -operand address is used, the address register is decremented by one, two or -four depending upon whether the size of the operand is byte, word or long. -If the address register is the stack pointer and the operand size is byte, the -address register is decremented by two rather than one to keep the stack pointer -on a word boundary. -.NH 4 -Address Register Indirect With Displacement -.IP Syntax: 8 -@ d sub 16 ( A sub n ) @ for the MC68000, @ ( d sub 16 , A sub n ) @ for the MC68020 -.PP -This address mode requires one word of extension. The address of the operand is -the sum of the contents of the address register and the sign extended 16-bit -integer in the extension word. -.NH 4 -Address Register Indirect With Index -.IP Syntax: 8 -@ d sub 8 ( A sub n , X sub n .size) @ for the MC68000, @ ( d sub 8 , A sub n , X sub n .size) @ for the MC68020 -.PP -This address mode requires one word of extension according to a certain format, -which specifies -.IP 1. -which register to use as index register; -.IP 2. -a flag that indicates whether the index register is a data register or an -address register; -.IP 3. -a flag that indicates the index size; this is -.I word -when the low order part of the index register is to be used, and -.I long -when the whole long value in the register is to be used as index; -.IP 4. -an 8-bit displacement integer (the low order byte of the extension word). -.PP -The address of the operand is the sum of the contents of the address register, -the possibly sign extended contents of index register and the sign -extended 8-bit displacement. -.NH 4 -Absolute Data Addressing -.IP Syntax: 8 -@ address @ for the MC68000, @ ( address ) @ for the MC68020 -.PP -Two different kinds of this mode are available: -.IP 1. -Absolute Short Address; this mode requires one word of extension. The address of -the operand is the sign extended 16-bit extension word. -.IP 2. -Absolute Long Address; this mode requires two words of extension. The address of -the operand is developed by concatenation of the two extension words; the high -order part of the address is the first extension word, the low order part is -the second. -.NH 4 -Program Counter With Displacement. -.IP Syntax: 8 -@ d sub 16 ( PC ) @ for the MC68000, @ ( d sub 16 , PC ) @ for the MC68020 -.PP -This mode requires one word of extension. The address of the operand is the sum -of the address in the program counter and the sign extended 16-bit displacement -integer in the extension word. The value in the program counter is the -address of the extension word. -.NH 4 -Program Counter With Index -.IP Syntax: 8 -@ d sub 8 ( PC , X sub n .size ) @ for the MC68000, @ ( d sub 8 , PC, X sub n .size ) @ for the MC68020 -.PP -This mode requires one word of extension as described under -.I -Address Register Indirect With Index. -.R -The address of the operand is the sum of the value in the -program counter, the possibly sign extended index register and the sign -extended 8-bit displacement integer in the extension word. -The value in the program counter is the address of the extension word. -.NH 4 -Immediate Data -.IP Syntax: 8 -@ "\#data" @ -.PP -This addressing mode requires either one or two words of extension, depending -on the size of the operation; -.IP -byte operation - the operand is in the low order byte of extension word; -.IP -word operation - the operand is in the extension word; -.IP -long operation - the operand is in the two extension words, the high order -16-bits are in the first extension word, the low order 16-bits in the second. -.NH 3 -Extra MC68020 addressing modes -.PP -The MC68020 has three more addressing modes. These modes all use a displacement -(some even two), an address register and an index register. Instead of the -address register one may also use the program counter. Any of these -may be omitted. If all addends are omitted the processor creates an -effective address of zero. All of these three modes require at least one -extension word, the -.I -Full Format Extension Word, -.R -which specifies: -.IP 1. -the index register number (0-7); -.IP 2. -the index register type (address or data register); -.IP 3. -the size of the index (only low order part or the whole register) -.IP 4. -a scale factor. This is a number from 0 to 3 which specifies how many bits -the contents of the index register is to be shifted to the left before being -used as an index; -.IP 5. -a flag that specifies whether the base (address) register is to be added or -to be suppressed; -.IP 6. -a flag that specifies whether to add or suppress the index operand; -.IP 7. -two bits that specify the size of the base displacement (null, word or long); -.IP 8. -three bits that in combination with (6) above specify which of the three -addressing modes (described below) to use and, if used, the size of the -outer displacement (null, word or long). -.IP N.B. -All modes mentioned above for the MC68000 -that use an index register may have this register -scaled (only when using the MC68020). -.PP -The three extra addressing modes are: -.NH 4 -Address Register Indirect With Index (Base Displacement) -.IP Syntax: 8 -@ ( bd , A sub n , X sub n .size*scale ) @ (MC68020 only) -.PP -The address of the operand is the sum of the contents of the address register, -the scaled contents of the possibly scaled index register and the possibly -sign extended base displacement. When the program counter is used instead -of the address register, the value in the program counter is the address -of the full format extension word. This mode requires one or two more extension -words when the size of the base displacement is word or long respectively. -.PP -Note that without the index operand, this mode is an extension of the -.I -Address Register Indirect With Displacement -.R -mode; when using the MC68020 one is no longer limited to a 16-bit displacement. -Also note that with the index operand added, this mode is an extension -of the -.I -Address Register Indirect With Index -.R -mode; when using the MC68020 one is no longer limited to an 8-bit displacement. -.NH 4 -Memory Indirect Post-Indexed -.IP Syntax: 8 -@ ( [ bd , A sub n ] , X sub n .size*scale , od ) @ (MC68020 only) -.PP -This mode may use an outer displacement. First an intermediate memory -address is calculated by adding the contents of the address register and -the possibly sign extended base displacement. This address is used -for in indirect memory access of a long word, followed by adding -the index operand (scaled and possibly signed extended). Finally the -outer displacement is added to yield the address of the operand. -When the program counter is used, the value in the program counter is the -address of the full format extension word. -.NH 4 -Memory Indirect Pre-Indexed -.IP Syntax: 8 -@ ( [ bd , A sub n , X sub n .size*scale ] , od ) @ (MC68020 only) -.PP -This mode may use an outer displacement. First an intermediate memory -address is calculated by adding the contents of the address register, -the scaled contents of the possibly sign extended index register and -the possibly sign extended base displacement. This address is used -for an indirect memory access of a long word, followed by adding -the outer displacement to yield the address of the operand. -When the program counter is used, the value in the program counter is the -address of the full format extension word. -.NH 3 -Addressing modes used in the table -.PP -Not all addressing modes mentioned above are used in code generation. It is -clear that none of the modes that use the program counter PC can be used, -since at code generation time nothing is known about the value in PC. -Also some of the possibilities of the three MC68020 addressing modes are not -used; e.g. it is possible to use a -.I -Data Register Indirect -.R -mode, which actually is the -.I -Address Register Indirect With Index -.R -mode, with the address register and the displacement left out. However -such a mode would require two extra bytes for the full format extension word, -and it would also be much slower than using -.I -Address Register Indirect. -.R -For this kind of reasons several possible addressing modes are not used in the -generation of code. -In the table address registers are only used for holding addresses, and -for index registers only data registers are used. -.NH -The M68000 and MC68020 back end table -.PP -The table itself has to be run through the C preprocessor -before it can be used to generate -the back end (called -.I -code generator -.R -or -.I cg -for short). When no flags are given to -the preprocessor an MC68020 code generator is produced; for the MC68000 -code generator one has to run the table through the preprocessor using the -.I -Dm68k4 -flag. -.PP -The table is designed as described in [4]. For the overall design of a back -end table one is referred to this document. This section only deals -with problems encountered in writing the table and other things worth noting. -.NH 2 -Constant Definitions -.PP -Wordsize and pointersize (EM_WSIZE and EM_PSIZE respectively) are defined -as four (bytes). EM_BSIZE, the hole between AB (the parameter base) and -LB (the local base), is eight bytes: only -the return address and the localbase are saved. -.NH 2 -Properties -.PP -Since Hans van Staveren in his document [4] clearly states that -.I cg -execution time is negatively influenced by the number of properties, only -four different properties have been defined. Besides, since the registers -really are multifunctional, these four are really all that are needed. -.NH 2 -Registers -.PP -The table uses register variables: @ D sub 3 @ - @ D sub 7 @ are used as general register -variables, and address registers @ A sub 2 @ - @ A sub 5 @ are used as pointer register -variables. @ A sub 6 @ is reserved for the localbase. -.NH 2 -Tokens -.PP -At first glance one might wonder about the amount of tokens, especially -for the MC68020, considering the small amount of different addressing modes. -However, the last three addressing modes mentioned for the MC68020 may -omit any of the addends, and this leads to a large amount of different tokens. -I did consider the possibility of enlarging the number of tokens and sets -even further, because there might be assemblers that don't handle displacements -of zero optimally (they might generate a 2 byte extension word holding zero). -The small profit in bytes in the generated code -however does not justify the increase -in size of the token section, the set section and the patterns section, -so this idea was not developed any further. -.PP -The timing cost of the tokens may be incorrect for some MC68000 tokens. -This is because the MC68000 uses a 16-bit data bus which causes the need -of two separate memory accesses for getting 32-bit operands. -.NH 3 -Token names -.PP -The amount of tokens and the limited capability of the authors imagination -might have caused the names of some tokens not to be very clarifying. -Some information about the names may be in place here. -.PP -Whenever part of a token name is in capitals that part is memory indirected -(i.e. in square brackets). In token names -.I OFF -and -.I off -mean an offsetted address register, so an address register with a displacement -(either base displacement or outer displacement). -.I -IND, ind -.R -and -.I index -stand for indexed, or index register. -.I ABS -and -.I abs -stand for absolute, which actually is just a displacement (base or outer). -These `rules' only apply to names of tokens that represent actual operands. -There are also tokens that represent addresses of operands. These -(with a few exceptions) contain -.I -regA, regX -.R -and -.I con -as parts of there names, which stand for address register, index register and -displacement (always base displacement) respectively. If the address to which -the token refers uses memory indirection, that part of the name comes first -(in small letters), followed by an underscore. The memory indirection part -follows the `rules' for operand token names. -.PP -Of course there are exceptions to these `rules' but in those cases the names -are self explanatory. -.PP -Two special cases: -.I ext_regX -is the name of the token that represents the -address of an absolute indexed operand, syntax @ ( bd , X sub n .size*scale ) @; -.I regX -does not represent any real mode, but is used with EM array instructions and -pointer arithmetic. -.NH 3 -Special tokens for the MC68000 -.PP -The MC68000 requires two extra tokens, which are called -.I t_regAcon -and -.I -t_regAregXcon. -.R -They are necessary because -.I regAcon -can only have a 16-bit displacement on the MC68000, and -.I regAregXcon -uses only 8 bits for its displacement. To prevent these addressing modes to -be used with displacements that are too large, the extra tokens are needed. -Whenever the displacements become too large and they need -to be used in the generation -of assembly code, these tokens are transformed into other tokens. -To prevent the table from becoming too messy I defined -.I t_regAcon -and -.I t_regAregXcon -to be identical to -.I regAcon -and -.I regAregXcon -respectively for the MC68020. -.NH 2 -Sets -.PP -Most set names used in the table are self explanatory, especially to the reader -who is familiar with the four addressing categories as mentioned in [5]: -.I -data, memory, alterable -.R -and -.I -control. -.R -In the sets definition part some sets are defined that are not used elsewhere in -the table, but are only used to be part of the definition of -some other set. This keeps the -set definition part from getting too unreadable. -.PP -The sets called -.I imm_cmp -consist of all tokens that can be used to compare with a constant. -.NH 2 -Instructions -.PP -Only the instructions that are used in code generation are listed here. -The first few instructions are meant especially for the use with register -variables. The operand LOCAL used here refers to a register variable. -The reader may not conclude that these operations are also allowed on -ordinary locals. The space and timing cost of these instructions have been -adapted, but the use of the word LOCAL for register variables causes these cost -to be inaccurate anyway. -.PP -The -.I killreg -instruction, which generates a comment in the assembly language output and -which is meant to let -.I cg -know that the data register operand has its contents destroyed, -needs some explaining but this explanation is better in place -in the discussion of groups 3 and 4 of the section about patterns. -.PP -The timing cost of the instructions are probably not very accurate for the -MC68020 because the MC68020 uses an instruction cache and prefetch. The -cost used in the table are the `worst case cost' as mentioned in section 9 -of [5]. -.NH 2 -Moves -.PP -These are all pretty straightforward, except perhaps when -.I t_regAcon -and -.I t_regAregXcon -are used. In these cases the size of the displacement has to be checked -before moving. This also applies to the stacking rules and the coercions. -.NH 2 -Tests -.PP -These three tests (one fore each operation size) could not be more -straightforward than they are now. -.NH 2 -Stackingrules -.PP -The only peculiar stackingrule is the one for -.I -regX. -.R -This token is only used with EM array instructions and -with pointer arithmetic. Whenever it is put -on the fake stack, some EM instructions are left in the instruction stream -to remove this token. Consequently it should never have to be stacked. However -the -.I -code generator generator -.R -(or -.I cgg -for short) -complained about not having a stackingrule for this token, so it had to -be added nevertheless. -.NH 2 -Coercions -.PP -These are all straightforward. There are no splitting coercions since -the fake stack never contains any tokens that can be split. -There are only two unstacking coercions. -The rest are all transforming coercions. Almost all coercions transform -tokens into either a data register or an address register, except in the -MC68000 part of the table the -.I t_regAcon -and -.I t_regAregXcon -tokens are transformed into real -.I regAcon -and -.I regAregXcon -tokens with displacements that are properly sized. -.NH 2 -Patterns -.PP -This is the largest part of the table. It is subdivided into 17 groups. -We will take a closer look at the more interesting groups. -.NH 3 -Group 0: rules for register variables -.PP -This group makes sure that EM instructions using register variables are -handled efficiently. This group includes: local loads and -stores; arithmetic, shifts and logical operations on locals and indirect locals -and pointer handling, where C expressions like -.I -*cp++ -.R -are handled. For such an expression there are several EM instruction -sequences the front end might generate. For an integer pointer e.g.: -.DS -.B -lol lol adp stl loi $1==$2 && $1==$4 && $3==4 && $5==4 -.I -.DE -or -.DS -.B -lol loi lol adp stl $1==$3 && $3==$5 && $2==4 && $5==4 -.I -.DE -or perhaps even -.DS -.B -lil lol adp stl $1==$2 && $2==$4 && $3==4 -.I -.DE -Each of these is included, since which one is generated is is up to the front -end. If the front end is consistent this will mean that some of these patterns -will never be used in code generation. This might seem a waist, but anyone -who thinks that will certainly change his mind when his new C front end -generates a different EM instruction sequence. -.NH 3 -Groups 1 and 2: load and store instructions -.PP -In these groups -.B lof -and -.B stf -, -.B loi -and -.B sti -, -.B ldf -and -.B sdf -are the important instructions. -These are the large parts in this group, especially the -.B loi -and -.B sti -instructions, because they come in three basic sizes (byte, word and long). -Note that with these instructions in the MC68000 part the -.I exact -is omitted in front of -.I regAcon -and -.I -regAregXcon. -.R -This makes sure that -.I t_regAcon -and -.I t_regAregXcon -are transformed into proper tokens before they are used as addresses. -.PP -Also note that the -.I regAregXcon -token is completely left out from the -\fBlof\fR, \fBstf\fR, \fBldf\fR and \fBsdf\fR -instruction handling. This is because the sum of the token displacement -and the offset provided in the instruction cannot be checked and is likely -to exceed 8 bits. Unfortunately -.I cgg -does not allow the inspection of subregisters of tokens that are on the -fake stack. This same problem might also occur with the -.I regAcon -token, but this is less likely because it -uses 16-bit displacements. Besides if it would have been left out the -\fBlof\fR, \fBstf\fR, \fBldf\fR and \fBsdf\fR -instructions would have been handled considerably less efficient. -.NH 3 -Groups 3 and 4: integer and unsigned arithmetic -.PP -EM instruction -.B sbi -also works with address registers, because the -.B cmp -instruction in group 12 is replaced by \fBsbi 4\fR. -.PP -For the MC68000 \fBmli\fR, \fBmlu\fR, \fBdvi\fR, \fBdvu\fR, \fBrmi\fR -and \fBrmu\fR are handled -by library routines. This is because the MC68000 has only 16-bit multiplications -and divisions. -.PP -The MC68020 does have 32-bit multiplications and divisions, but for the -.B rmi -and -.B rmu -EM instructions peculiar things happen anyway: they generate the -.I killreg -instruction. This is necessary because the data register that -first held the dividend now holds the quotient; the original contents are -destroyed without -.I cg -knowing about it (the destruction of the two registers that make up the -.I DREG_pair -token couldn't be noted in the instructions part of the table). -To let -.I cg -know that these contents are destroyed, we have to use this `pseudo instruction' -from lack of a better solution. -.NH 3 -Group 5: floating point arithmetic -.PP -Since floating point arithmetic is not implemented traps will be generated here. -.NH 3 -Group 6: pointer arithmetic -.PP -This also is a very important group, along with groups 1 and 2. The MC68020 -has many different addressing modes and if possible they should be used in -the generation of assembly language. -.PP -The -.I regX -token is generated here too. It is meant to make efficient use of the -MC68020 possibility of scaling index registers. -.PP -Note that I would have liked one extra pattern to handle C-statements -like -.DS -.I -pointer += expr ? constant1 : constant2; -.R -.DE -efficiently. This pattern would have looked like: -.DS -pat ads -with const -leaving adp %1.num -.DE -but when -.I cg -is coming to the EM replacement part, the constant has already been removed -from the fake stack, causing -.I %1.num -to have a wrong value. -.NH 3 -Group 9: logical instructions -.PP -The EM instructions \fBand\fR, -.B ior -and -.B xor -are so much alike that procedures can be used here, except for the -.B -xor $1==4 -.R -instruction, because the MC68000 -.I eor -instruction does not allow as many kinds of operands as -.I and -and -.I -or. -.R -.NH 3 -Group 11: arrays -.PP -This group also tries to make efficient use of the available addressing modes, -but it leaves the actual work to group 6 mentioned above. -.PP -The -.I regX -token is also generated here. In this group this token is very useful for -handling array instructions for arrays with one, two, four or eight byte -elements; the array index goes into the index register, which can then -be scaled appropriately. An offset is used when the -first array element has an index other than zero. -.PP -I would have liked some extra patterns here too but they won't work -for the same reasons as explained in the discussion of group 6. -.NH 3 -Group 14: procedure calls instructions -.PP -The function return area consists of registers @ D sub 0 @ and @ D sub 1 @. -.NH 3 -Group 15: miscellaneous instructions -.PP -In many cases here library routines are called. These will be discussed -later. -.PP -Two special EM instructions are included here: \fBdch\fR, and \fBlpb\fR. -I don't know when they are generated by a front end, but these -instructions were also in the back end table for the PDP. In the PDP table -these instructions were replaced by -.B -loi 4 -.R -and -.B -adp 8 -.R -respectively. I included them both, since they couldn't do any harm. -.NH 3 -Extra group: optimalization -.PP -This group is handling EM patterns with more than one instruction. This group -is not absolutely necessary but it makes the generation of code -more efficient. Among the things that are handled here are: arithmetic and -logical operations on locals, externals and indirect locals; shifting -of locals, externals and indirect locals by one; some pointer arithmetic; tests -in combination with logical and's and or's or with branches. Finally -there are sixteen patterns about divisions that could be handled more -efficiently by right shifts and which I think should be handled by the -peephole optimizer (since it also handles -the same patterns with multiplication). -.NH -The library routines -.PP -The table is supplied with two separate libraries: one for the MC68000 and one -for the MC68020. The MC68000 uses a couple more routines than the MC68020 -because it doesn't have 32-bit division and multiplication. -.PP -The routines that need to pop their operands first store their return address. -Routines that need other register besides @ D sub 0 @-@ D sub 2 @ and @ A sub 0 @-@ A sub 1 @ first store -the original contents of those registers. @ D sub 0 @-@ D sub 2 @ and @ A sub 0 @-@ A sub 1 @ do not have -to be saved because if they contain anything useful, their contents -are pushed on the stack before the routine is called. -.PP -The -.I .trp -routine just prints a message stating the trap number and exits (except -of course when that particular trap number is masked). Usually higher -level languages use their own trap handling routines. -.PP -The -.I .mon -routine doesn't do anything useful at all. It just prints a message stating that -the specified system call is not implemented and then exits. Front ends -usually generate calls to special routines rather than the EM -instruction \fBmon\fR. -These routines have to be supplied in another library. They -may be system dependent (e.g. the MC68000 machine this table was tested on -first moves the parameters to registers, then moves the system call number -to @ D sub 0 @ and then executes -.I -trap #0, -.R -whereas the MC68020 machine this table was tested on required the parameters -to be on the stack rather than in registers). Therefor this library is not -discussed here. -.PP -The -.I .printf -routine is included for EM diagnostic messages. It can print strings using %s, -16-bit decimal numbers using %d and 32-bit hexadecimal numbers using %x. -.PP -The -.I .strhp -routine stores a new EM heap pointer, and sometimes it needs to allocate more -heap space. This is done by calling the system call routine \fI_brk\fR. -Chunks of 1K bytes are allocated, but this can easily be changed into -larger or smaller chunks. -.PP -The MC68000 library also contains a routine to handle the EM instruction \fBrck\fR. -The MC68020 has an instruction -.I cmp2 -that is specially meant for range checking so the MC68020 library can do without -that routine. -.PP -The MC68000 library has two multiplication routines, one for unsigned and the other -for signed multiplication. The one for signed multiplication -first tests the sizes of the operands, to see if it can perform -the 16 bit machine instruction instead of the routine. If not, it considers -it's two operands being two digit numbers in a 65535-radix system. It -uses the 16-bit unsigned multiply instruction -.I mulu -three times (it does not calculate the high order result), -and adds up the intermediary results the proper way. The signed -multiplication routine calculates the sign of the result, calculates -the result as it it were an unsigned multiplication, and -adjusts the sign of the result. Here testing -the operands for there sizes would be less simple, because the operands -are signeds; so that is not done here. -.PP -The MC68000 library also has two division routines. The routine for unsigned -division uses the popular algorithm, where the divisor is shifted out and -the quotient shifted in. The signed division routine calculates the sign of -both the quotient and the remainder, calls the unsigned division routine -and adjusts the signs for the quotient and the remainder. -.PP -The -.I .nop -routine is included for testing purposes. This routine prints the line -number and the value in the stack pointer. Calls to this routine -are generated by the EM instruction \fBnop\fR, which is ordinarily -left out by the peephole optimizer. -.NH -Testing the table -.PP -There are special test programs available for testing back end tables. -First there is the EM test set, which tests most EM instructions, making -good use of the -.B nop -instruction. Then there are the Pascal and C test programs. The Pascal -test programs report errors, which makes it relatively easy -to find out what was wrong in the table. The C test programs just -generate some output, which then has to be compared to the expected -output. Differences are -not only caused by errors but also e.g. by the use of four -byte integers and unsigneds (which this table does), -the use of signed characters -instead of unsigned characters (the C front end I used generated signed -characters) or because the back end -does not support floating point. -These differences have to be `filtered out' to reveal -the differences caused by actual errors in the back end table. -These errors then have to be found out by examining the assembly code, for -no proper diagnostic messages are generated. -.PP -After these three basic tests there still remain a number of patterns that -haven't been tested yet. Fortunately -.I cgg -offers the possibility of generating a special -.I cg -that can print a list of patterns that haven't been used in -code generation yet. -For these patterns the table writer has to write his own test programs. -This may complicate things a bit because errors may now be caused by -errors in the back end table as well as errors in the test programs. -The latter happened quite often to me, because I found EM -to be an uncomfortable programming language (of course it isn't meant to -be a programming language, but an intermediary language). -.PP -There still remain a couple of patterns in this table that haven't been tested -yet. However these patterns all have very similar cases that have been -tested (an example of this is mentioned in the section on group 0 -of the patterns section of the table). Some patterns have to -do with floating point numbers. These EM instructions all generate -traps, so they didn't all have to be tested. The two instructions -.B dch -and -.B lpb -haven't been tested in this table, but since they only use EM replacement -and they have been tested in the PDP back end table, these two should -be all right. -.NH -Performance of the back end -.PP -To test the performance of the back end I gathered a couple of -C programs and compiled them on the machines I used to test the back ends on. -I compiled them using the C compiler that was available there and -I also compiled them using the back end. I then compared the sizes -of the text segments in the object files. -The final results of these comparisons are in fig. 1 and fig. 2. -.KF -.TS -center box; -cfI s s s s s -c s s s s s -c c | c s | c s -c c | c s | c s -c | c | c c | c c -l | n | n n | n n. -Differences in text segment sizes for the MC68000 -parts of the back end compiled by itself -_ -original old m68k4 new MC68000 -compiler (100%) back end back end -_ -name size size perc. size perc. -_ -codegen.c 13892 16224 116.7% 12860 92.5% -compute.c 4340 4502 103.7% 4530 104.3% -equiv.c 680 662 97.3% 598 87.9% -fillem.c 8016 7304 91.1% 6880 85.8% -gencode.c 1356 1194 88.0% 1130 83.3% -glosym.c 224 202 90.1% 190 84.8% -main.c 732 672 91.8% 634 86.6% -move.c 1876 1526 81.3% 1410 75.1% -nextem.c 1288 1594 123.7% 1192 92.5% -reg.c 1076 1014 94.2% 916 85.1% -regvar.c 1352 1188 87.8% 1150 85.0% -salloc.c 1240 1100 88.7% 1024 82.5% -state.c 628 600 95.5% 532 84.7% -subr.c 6948 6382 91.8% 5680 81.7% -= -averages 2939 3155 95.8% 2766 86.6% -.TE -.DS C -fig 1. -.DE -.KE -.KF -.TS -center box; -cfI s s s -cfI s s s -c s s s -c s s s -c c | c s -c c | c s -c | c | c c -l | n | n n. -Differences in text segment sizes -for the MC68020 -parts of the back end -compiled by itself -_ -original MC68020 -compiler (100%) back end -_ -name size size perc. -_ -codegen.c 12608 12134 96.2% -compute.c 4624 4416 95.5% -equiv.c 572 504 88.1% -fillem.c 7780 6976 89.6% -gencode.c 1320 1086 82.2% -glosym.c 228 182 79.8% -main.c 736 596 80.9% -move.c 1392 1280 91.9% -nextem.c 1176 1066 90.6% -reg.c 1052 836 79.4% -regvar.c 1196 968 80.9% -salloc.c 1200 932 77.6% -state.c 580 528 91.0% -subr.c 6136 5268 85.8% -= -averages 2900 2627 86.4% -.TE -.DS C -fig 2. -.DE -.KE -Fig. 1 also includes results of an old m68k4 back end (a back end -for the MC68000 with four byte word and pointersize). The table for -this back end was given to me as an example, but I thought it didn't make -good use of the MC68000's addressing capabilities, it hardly did any -optimalization, and it sometimes even -generated code that the assembler would not swallow. -This was sufficient reason for me to write a completely new table. -.PP -The results from the table may not be taken too seriously. The sizes measured -are the sizes of the text segments of the user programs, i.e. without the -inclusion of library routines. Of course these segments do contain calls -to these routines. Another thing is that the -.I rom -segment may be included in the text segment (this is why the -results for the MC68000 for -.I compute.c -look so bad). -.PP -Some other things must be said about these results. -The quality of EM code -generated by the C front end is certainly not optimal. The front end -uses temporary locals (extra locals that are used to evaluate expressions) -far too quickly: for a simple C expression like -.DS -.I -*(pointer) += constant -.R -.DE -where -.I pointer -is a register variable, the C front end generates (for obscure reasons) -a temporary local that holds the contents of \fIpointer\fR. This way -the pattern for -.DS -.B -loc lil adi sil $2==$4 && $3==4 -.R -.DE -for register variables is not used and longer, less efficient -code is generated. But even in spite of this, the back end seems to -generate rather compact code. -.NH -Some timing results -.PP -In order to measure the performance of the code generated by the back end -some timing tests were done. The reason I chose these particular tests is -that they were also done for many other back ends; the reader can compare -the results if he so wishes (of course comparing the results only -show a global difference in speed of the various machines; it doesn't -show whether some back end generates relatively better code than another). -.PP -On the MC68000 machine the statements were executed one million times. -On the MC68020 machine the statements had to be executed four million times -because this machine was so fast that timing results would be very -unreliable if the statements were executed only one million times. -.PP -For testing I used the following C test program: -.DS -.I -main() -{ - int i, j, ... - ... - for (i=0; i<1000; i++) - for (j=0; j<1000; j++) - STATEMENT; -} -.R -.DE -where -.I STATEMENT -is any of the test statements or the empty statement. For the MC68020 -tests I used 2000 instead of 1000. -The results of the test with the empty statement were used to calculate -the execution times of the other test statements. -.PP -Figures 3 and 4 show many results. For each machine actually two tests were -done: one with register variables, and the other without them. -I noticed that the original C compilers on both machines did not generate -the use of register variables, unless specifically requested. The -back end uses register variables when and where they are profitable, even -if the user did not ask for them. -.KF -.TS -center box; -cfI s s s s -c s s s s -c | c s | c s -cw(1.5i) | c c | c c -c | c c | c c -lp-2fI | n n | n n. -timing results for the MC68000 -times in @ mu @seconds -_ -test statement without register variables with register variables -_ - original new MC68000 original new MC68000 - C compiler back end C compiler back end -_ -int1=0; 2.8 2.7 0.5 0.5 -int1=int2-1; 4.1 4.1 1.3 1.3 -int1=int1+1; 4.1 4.1 1.3 1.3 -int1=int2*int3; 40.0 40.5 36.2 36.8 -T{ -int1=(int2<0); -\/*true*/ -T} 5.5 7.3 2.0 4.5 -T{ -int1=(int2<0); -\/*false*/ -T} 4.7 8.5 2.8 5.6 -T{ -int1=(int2<3); -\/*true*/ -T} 6.2 7.7 2.6 5.4 -T{ -int1=(int2<3); -\/*false*/ -T} 5.4 8.9 3.6 6.5 -T{ -.na -int1=((int2>3)||(int2<3)); -\/* true || false */ -T} 6.0 7.8 3.4 5.4 -T{ -.na -int1=((int2>3)||(int2<3)); -\/* false || true */ -T} 9.1 10.2 5.7 7.1 -T{ -.na -switch (int1) { -case 1: int1=0; break; -case 2: int1=1; break; -} -T} 6.3 17.8 5.3 14.0 -T{ -.na -if (int1=0) int2=3; -\/*true*/ -T} 5.1 4.7 1.3 1.3 -T{ -.na -if (int1=0) int2=3; -\/*false*/ -T} 2.2 2.1 1.9 1.1 -while (int1>0) int1=int1-1; 2.2 2.1 1.1 1.1 -int1=a[int2]; 6.8 6.7 4.0 3.1 -p3(int1); 14.3 11.1 13.4 10.0 -int1=f(int2); 17.7 14.5 14.8 11.7 -s.overhead=5400; 2.8 2.7 2.9 2.7 -.TE -.DS C -Fig. 3 -.DE -.KE -.KF -.TS -center box; -cfI s s s s -c s s s s -c | c s | c s -cw(1.5i) | c c | c c -c | c c | c c -lp-2fI | n n | n n. -timing results for the MC68020 -times in @ mu @seconds -_ -test statement without register variables with register variables -_ - original new MC68020 original new MC68020 - C compiler back end C compiler back end -_ -int1=0; .25 .25 .15 .15 -int1=int2-1; 1.3 1.3 .38 .38 -int1=int1+1; 1.2 .90 .38 .15 -int1=int2*int3; 4.4 4.2 3.0 3.1 -T{ -int1=(int2<0); -\/*true*/ -T} 1.6 2.7 1.1 2.3 -T{ -int1=(int2<0); -\/*false*/ -T} 1.9 2.9 .80 2.1 -T{ -int1=(int2<3); -\/*true*/ -T} 1.7 2.8 1.2 2.6 -T{ -int1=(int2<3); -\/*false*/ -T} 2.1 3.0 .85 2.3 -T{ -.na -int1=((int2>3)||(int2<3)); -\/* true || false */ -T} 2.1 3.1 1.2 2.5 -T{ -.na -int1=((int2>3)||(int2<3)); -\/* false || true */ -T} 3.4 4.2 1.8 3.2 -T{ -.na -switch (int1) { -case 1: int1=0; break; -case 2: int1=1; break; -} -T} 2.7 8.0 2.0 6.9 -T{ -.na -if (int1=0) int2=3; -\/*true*/ -T} 1.2 1.3 .63 .63 -T{ -.na -if (int1=0) int2=3; -\/*false*/ -T} 1.7 1.6 .50 .53 -while (int1>0) int1=int1-1; 1.2 1.3 .55 .53 -int1=a[int2]; 1.8 1.8 1.0 1.0 -p3(int1); 14.8 5.5 14.1 5.0 -int1=f(int2); 16.3 6.6 15.2 5.9 -s.overhead=5400; .48 .48 .50 .50 -.TE -.DS C -Fig. 4 -.DE -.KE -.PP -The reader may have noticed that on both machines the back end seems -to generate considerably slower code for tests where a `condition' is -used in the rhs of an assignment statement. This is in fact not true: it is -the front end that generates bad code. Two examples: for the C statement -.DS -.I -int1 = (int2 < 0); -.R -.DE -the front end generates the following code for the rhs (I -used arbitrary labels): -.DS -.B -lol -16 -zlt *10 -loc 0 -bra *11 -10 -loc 1 -11 -.R -.DE -while in this case (to my opinion) it should have generated -.DS -.B -lol -16 -tlt -.R -.DE -which is much shorter. Another example: for the C statement -.DS -.I -int1 = (int2 < 3); -.B -.DE -the front end generates for the rhs -.DS -.B -lol -16 -loc 3 -blt *10 -loc 0 -bra *11 -10 -loc 1 -11 -.R -.DE -while a much better translation would be -.DS -.B -lol -16 -loc 3 -cmi 4 -tlt -.R -.DE -.PP -Another statement that the back end seems to generate slower code for is -the C switch statement. This is true, but it is also caused by -the way these things are done in EM. EM uses the -.B csa -or -.B csb -instruction, and for these two I had to use library routines. On larger -switch statements the -.I .csa -routine will perform relatively better. -.PP -The back end generates considerably faster code for procedure and function -calls, especially in the MC68020 case, and also for the C statement -.DS -.I -int1 = int1 + 1; -.R -.DE -The original C compilers use the same method for this instruction -as for -.DS -.I -int1 = int2 - 1; -.R -.DE -they perform the addition in a scratch register, and then store the -result. For the former C statement this is not necessary, because -the MC68000 and MC68020 have an instruction that can add constants -to almost anything (in this case: to locals). The MC68000 and MC68020 -back ends do use this instruction. -.NH -Some final remarks -.PP -As mentioned a few times before, the C front end compiler does not -generate optimal code and as a consequence of this the -back end does not always generate optimal code. This is especially -the case with temporary locals, which the front end generates much -too quickly, and also with conditional expressions that are -used in the rhs of an assignment statement (fortunately this is not -needed so much). -.PP -If -.I cgg -would have been able to accept operands separated by any character -instead of just by commas (in the instruction definitions part), -I wouldn't have had the need of the -.I killreg -pseudo instruction. It would also be handy to have -.I cgg -accept all normal C operators. At the moment -.I cgg -does not accept binary ands, ors and exors, even though in [4] -it is stated that -.I cgg -does accept all normal C operators. As it happens I did not need the -binary operators, but at some time in developing the table I thought -I did. -.PP -I would also like -.I cg -to do more with the condition codes information that is supplied with -each instruction in the instruction definitions section of the table. -Sometimes -.I cg -generates test instructions which actually were not necessary. This -of course causes the generated -programs to be slightly larger and slightly slower. -.PP -In spite of the few minor shortcomings mentioned above I found -.I cgg -a very comfortable tool to use. -.SH -References -.PP -.IP [1] -T. B. Steel Jr., -.I -UNCOL: The myth and the Fact, -.R -in Ann. Rev. Auto. Prog., -R. Goodman (ed.), Vol. 2 (1969), pp 325 - 344 -.IP [2] -A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, -.I -A practical toolkit for making portable compilers, -.R -Informatica Report 74, Vrije Universiteit, Amsterdam, 1983 -.IP [3] -A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, -.I -Description of an experimental machine architecture for use with -block structured languages, -.R -Informatica Report 81, Vrije Universiteit, Amsterdam, 1983 -.IP [4] -H. van Staveren -.I -The table driven code generator from the Amsterdam Compiler Kit, -Second Revised Edition, -.R -Vrije Universiteit, Amsterdam -.IP [5] -.I -MC68020 32-bit Microprocessor User's Manual, -.R -Second Edition, -Motorola Inc., 1985, 1984 -.IP [6] -.I -MC68000 16-bit Microprocessor User's Manual, -Preliminary, -.R -Motorola Inc., 1979 diff --git a/doc/ncg.doc b/doc/ncg.doc deleted file mode 100644 index b77056880..000000000 --- a/doc/ncg.doc +++ /dev/null @@ -1,3020 +0,0 @@ -.\" $Id$ -.RP -.ND -.TL -The table driven code generator -.br -from the -.br -Amsterdam Compiler Kit -.br -Second Revised Edition -.AU -Hans van Staveren -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.AB -The Amsterdam Compiler Kit is a collection of tools -designed to help automate the process of compiler building. -Part of it is a table driven code generator, -called -.I cg , -and a program to check and translate machine description -tables called -.I cgg . -This document provides a description of the internal workings of -.I cg , -and a description of syntax and semantics of the driving table. -This is required reading for those wishing to write a new table. -.AE -.NH 1 -Introduction -.PP -Part of the Amsterdam Compiler Kit is a code generator system consisting -of a code generator generator (\fIcgg\fP for short) and some machine -independent C code. -.I Cgg -reads a machine description table and creates two files, -tables.h and tables.c. -These are then used together with other C code to produce -a code generator for the machine at hand. -.PP -This in turn reads compact EM code and produces -assembly code. -The remainder of this document will first broadly describe -the working of the code generator, -then the machine table will be described after which -some light is shed onto -the internal workings of the code generator. -.PP -The reader is assumed to have at least a vague notion about the -semantics of the intermediary EM code. -Someone wishing to write a table for a new machine -should be thoroughly acquainted with EM code -and the assembly code of the machine at hand. -.NH 1 -What has changed since version 1 ? -.PP -This section can be skipped by anyone not familiar with the first version. -It is not needed to understand the current version. -.PP -This paper describes the second version of the code generator system. -Although the code generator itself is for the main part unchanged, -the table format has been drastically redesigned and the opportunities -to make faulty tables are reduced. -The format is now aesthaticly more pleasing (according to \fIme\fP that is), -mainly because the previous version was designed for one line code rules, -which did not work out that way. -.PP -The `SCRATCH' property is now automatically generated by -.I cgg , -.I erase -and -.I setcc -calls and their ilk are now no longer needed -(read: can no longer be forgotten) -and all this because the table now -.I knows -what the machine instructions look like and what arguments they -destroy. -.PP -Checks are now made for register types, so it is no longer possible -to generate a `regind2' token with a floating point register as a base. -In general, if the instructions of the machine are correctly defined, -it is no longer possible to generate code that does not assemble, -which of course does not mean that it is not possible to generate -assembly code that does not do what was intended! -.PP -Checks are made now for missing moves, tests, coercions, etc. -There is a form of procedure call now to reduce table size: -it is no longer necessary to write the code for conditional -instructions six times. -.PP -The inreg() pseudo-function returns other results!! -.NH 1 -Global overview of the workings of the code generator. -.PP -The code generator or -.I cg -tries to generate good code by simulating the stack -of the compiled program and delaying emission of code as long -as possible. -It also keeps track of register contents, which enables it to -eliminate redundant moves, and tries to eliminate redundant tests -by keeping information about condition code status, -if applicable for the machine. -.PP -.I Cg -maintains a `fake stack' containing `tokens' that are built -by executing the pseudo code contained in the code rules given -by the table writer. -One can think of the fake stack as a logical extension of the real -stack the compiled program will have when run. -Alternatively one can think of the real stack as an infinite extension -at the bottom of the fake stack. -Both ways, the concatenation of the real stack and the fake stack -will be the stack as it would have been on a real EM machine (see figure). -.TS -center; -cw(3.5c) cw(3c) cw(3.5c) -cw(3.5c) cw(3c) cw(3.5c) -|cw(3.5c)| cw(3c) |cw(3.5c)| . -EM machine target machine - - - - - - real stack - stack - grows -EM stack \s+2\(br\s0 - \s+2\(br\s0 - \s+2\(br\s0 _ - \s+2\(br\s0 - \s+2\(da\s0 - fake stack - - - -_ _ -.T& -ci s s. -Relation between EM stack, real stack and fake stack. -.TE -During code generation tokens will be kept on the fake stack as long -as possible but when they are moved to the real stack, -by generating code for the push, -all tokens above\v'-.25m'\(dg\v'.25m' -.FS -\(dg in this document the stack is assumed to grow downwards, -although the top of the stack will mean the first element that will -be popped. -.FE -the pushed tokens will be pushed also, -so the fake stack will not contain holes. -.PP -The information about the machine that -.I cg -needs has to be given in a machine description table, -with as a major part a list of code rules telling -.I cg -what to do when certain EM-instructions occur -with certain tokens on the fake stack. -Not all possible fake stack possibilities have to be given of course, -there is a possibility for providing rewriting rules, or -.I coercions -as they are called in this document. -.PP -The main loop of -.I cg -is: -.IP 1) -find a pattern of EM instructions starting at the current one to -generate code for. -This pattern will usually be of length one but longer patterns can be used. -Process any pseudo-instructions found. -.IP 2) -Select one of the possibly many stack patterns that go with this -EM pattern on the basis of heuristics, look ahead or both. -The cost fields provided in the token definitions and -instruction definitions are used -to compute costs during look ahead. -.IP 3) -Force the current fake stack contents to match the pattern. -This may involve -copying tokens to registers, making dummy transformations, e.g. to -transform a `local' into an `indexed from register' or might even -cause the move of the complete fake stack contents to the real stack -and then back into registers if no suitable coercions -were provided by the table writer. -.IP 4) -Execute the pseudocode associated with the code rule just selected, -this may cause registers to be allocated, -code to be emitted etc.. -.IP 5) -Put tokens onto the fake stack to reflect the result of the operation. -.IP 6) -Insert some EM instructions into the stream; -this is possible but not common. -.IP 7) -Account for the cost. -The cost is kept in a (space, time) vector and look ahead decisions -are based on a linear combination of these. -The code generator calls on itself recursively during look ahead, -and the recursive incarnations return the costs they made. -The costs the top-level code generator makes is of course irrelevant. -.PP -The table that drives -.I cg -is not read in every time, -but instead is used at compile time -of -.I cg -to set parameters and to load pseudocode tables. -A program called -.I cgg -reads the table and produces large lists of numbers that are -compiled together with machine independent code to produce -a code generator for the machine at hand. -.PP -Part of the information needed is not easily expressed in this table -format and must be supplied in two separate files, -mach.h and mach.c. -Their contents are described later in this document. -.NH 1 -Register variables -.PP -If the machine has more than enough registers to generate code with, -it is possible to reserve some of them for use as register variables. -If it has not, this section may be skipped and any references -to register variables in the rest of this document may be ignored. -.PP -The front ends generate messages to the back ends telling them which -local variables could go into registers. -The information given is the offset of the local, its size and type -and a scoring number, roughly the number of times it occurs. -.PP -The decision which variable to put in which register is taken by the -machine independent part of -.I cg -with the help of a scoring function provided by the table writer in mach.c. -The types of variables known are -.IP reg_any 12 -Just a variable of some integer type. -Nothing special known about it. -.IP reg_float -A floating point variable. -.IP reg_loop -A loop control variable. -.IP reg_pointer -A pointer variable. -Usually they are better candidates to put in registers. -.PP -If register variables are used, -more functions must be supplied in mach.c. -These functions are explained later. -.NH 1 -Description of the machine table -.PP -The machine description table consists of the -concatenation of the following sections: -.IP 1) -Constant definitions -.IP 2) -Property definitions -.IP 3) -Register definitions -.IP 4) -Token definitions -.IP 5) -Set definitions -.IP 6) -Instruction definitions -.IP 7) -Move definitions -.IP 8) -Test definitions -.IP 9) -Stack definitions -.IP 10) -Coercions -.IP 11) -Code rules -.PP -This is the order in the table -but the descriptions in this document will use a slightly different -order. -All sections except the first start with an uppercase header word. -Examples may be given in early stages that use knowledge that is explained -in a later stage. -If something is not clear the first time, please read on. -All will clear up in a couple of pages. -.PP -Input is in free format, white space and newlines may be used -at will to improve legibility. -Identifiers used in the table have the same syntax as C identifiers, -upper and lower case considered different, all characters significant. -Here is a list of reserved words; all of these are unavailable as identifiers. -.TS -box; -l l l l l. -ADDR STACKINGRULES gen proc test -COERCIONS TESTS highw reg_any to -INSTRUCTIONS TIMEFACTOR inreg reg_float topeltsize -INT TOKENS is_rom reg_loop ufit -MOVES call kills reg_pointer uses -PATTERNS cost lab regvar with -PROPERTIES defined labeldef return yields -REGISTERS exact leaving reusing -SETS example loww rom -SIZEFACTOR fallthrough move samesign -STACK from pat sfit -.TE -C style comments are accepted. -.DS -/* this is a comment */ -.DE -If the standard constant facility is not enough the C-preprocessor can -be used to enhance the table format. -.PP -Integers in the table have the normal C-style syntax. -Decimal by default, octal when preceded by a 0 -and hexadecimal when preceded by 0x. -.NH 2 -Constant section -.PP -In the first part of the table some constants can be defined, -most with the syntax -.DS -NAME=value -.DE -value being an integer or string. -Three constants must be defined here: -.IP EM_WSIZE 14 -Number of bytes in a machine word. -This is the number of bytes -a \fBloc\fP instruction will put on the stack. -.IP EM_PSIZE -Number of bytes in a pointer. -This is the number of bytes -a \fBlal\fP instruction will put on the stack. -.IP EM_BSIZE -Number of bytes in the hole between AB and LB. -If the calling sequence just saves PC and LB this -size will be twice the pointersize. -.PP -EM_WSIZE and EM_PSIZE are checked when a program is compiled -with the resulting code generator. -EM_BSIZE is used by -.I cg -to add to the offset of instructions dealing with locals -having positive offsets, -i.e. parameters. -.PP -Other constants can be defined here to be used as mnemonics -later in the table. -.PP -Optional is the definition of a printformat for integers in the code file. -This is given as -.DS -FORMAT = string -.DE -The string must be a valid printf(III) format, -and defaults to "%ld". -For example on the PDP-11 one can use -.DS -FORMAT= "0%lo" -.DE -to satisfy the old UNIX assembler that reads octal unless followed by -a period, and the ACK assembler that follows C conventions. -.PP -Tables under control of source code control systems like -.I sccs -or -.I rcs -can put their id-string here, for example -.DS -rcsid="$\&Header$" -.DE -These strings, like all strings in the table, will eventually -end up in the binary code generator produced. -.PP -Optionally one can give the factors with which the size and time -parts of the cost vector have to be multiplied to ensure they have the -same order of magnitude. -This can be done as -.DS -SIZEFACTOR = C\d3\u/C\d4\u -.sp -TIMEFACTOR = C\d1\u/C\d2\u -.DE -Above numbers must be read as rational numbers. -Defaults are 1/1 for both of them. -These constants set the default size/time tradeoff in the code generator, -so if TIMEFACTOR and SIZEFACTOR are both 1 the code generator will choose -at random between two code sequences where one has -cost (10,4) and the other has cost (8,6). -See also the description of the cost field below. -.NH 2 -Property definition -.PP -This part of the table defines the list of properties that can be used -to differentiate between register classes. -It consists of a list of user-defined -identifiers optionally followed by the size -of the property in parentheses, default EM_WSIZE. -Example for the PDP-11: -.TS -l l. -PROPERTIES /* The header word for this section */ - -GENREG /* All PDP registers */ -REG /* Normal registers (allocatable) */ -ODDREG /* All odd registers (allocatable) */ -REGPAIR(4) /* Register pairs for division */ -FLTREG(4) /* Floating point registers */ -DBLREG(8) /* Same, double precision */ -GENFREG(4) /* generic floating point */ -GENDREG(8) /* Same, double precision */ -FLTREGPAIR(8) /* register pair for modf */ -DBLREGPAIR(16) /* Same, double precision */ -LOCALBASE /* Guess what */ -STACKPOINTER -PROGRAMCOUNTER -.TE -Registers are allocated by asking for a property, -so if for some reason in later parts of the table -one particular register must be allocated it -has to have a unique property. -.NH 2 -Register definition -.PP -The next part of the tables describes the various registers of the -machine and defines identifiers -to be used in later parts of the tables. -Syntax: -.DS - : REGISTERS - : ':' '.' - : ident [ '(' string ')' ] [ '=' ident [ '+' ident ] ] -.DE -Example for the PDP-11: -.TS -l l. -REGISTERS - -r0,r2,r4 : GENREG,REG. -r1,r3 : GENREG,REG,ODDREG. -r01("r0")=r0+r1 : REGPAIR. -fr0("r0"),fr1("r1"),fr2("r2"),fr3("r3") : GENFREG,FLTREG. -dr0("r0")=fr0,dr1("r1")=fr1, - dr2("r2")=fr2,dr3("r3")=fr3 : GENDREG,DBLREG. -fr01("r0")=fr0+fr1,fr23("r2")=fr2+fr3 : FLTREGPAIR. -dr01("r0")=dr0+dr1,dr23("r2")=dr2+dr3 : DBLREGPAIR. -lb("r5") : GENREG,LOCALBASE. -sp : GENREG,STACKPOINTER. -pc : GENREG,PROGRAMCOUNTER. -.TE -.PP -The names in the left hand lists are names of registers as used -in the table. -They can optionally be followed by a string in parentheses, -their name as far as the assembler is concerned. -The default assembler name is the same as the table name. -A name can also be followed by -.DS -= othername -.DE -or -.DS -= othername + othername -.DE -which says that the register is composed of the parts -after the '=' sign. -The identifiers at the right hand side of the lists are -names of properties. -The end of each register definition is a period. -.PP -It might seem wise to list every property of a register, -so one might give r0 the extra property MFPTREG named after the not -too well known MFPT instruction on newer PDP-11 types, -but this is not a good idea, -especially since no use can be made of that instruction anyway. -Every extra property means the register set is more unorthogonal -and -.I cg -execution time is influenced by that, -because it has to take into account a larger set of registers -that are not equivalent. -So try to keep the number of different register classes to a minimum. -When faced with the choice between two possible code rules -for a nonfrequent EM sequence, -one being elegant but requiring an extra property, -and the other less elegant, -elegance should probably loose. -.PP -Tables that implement register variables must mark registers to be used -for variable storage here by following the list of properties by one -of the following: -.DS -regvar \fIor\fP regvar(reg_any) -regvar(reg_loop) -regvar(reg_pointer) -regvar(reg_float) -.DE -meaning they are candidates for that type of variable. -All register variables of one type must be of the same size, -and they may have no subregisters. -Such registers are not available for normal code generation. -.NH 2 -Stack token definition -.PP -The next part describes all possible tokens that can reside on -the fake stack during code generation. -Attributes of a token are described as a C struct declaration; -this is followed by the size of the token in bytes, -optionally followed by the cost of the token when used as an addressing mode -and the format to be used on output. -.PP -In general, when writing a table, it is not wise to try -to think of all necessary tokens in advance. -While writing the necessity or advisability for some token -will be seen and it can then be added together with the -stacking rules and coercions needed. -.PP -Tokens should usually be declared for every addressing mode -of the machine at hand and for every size directly usable in -a machine instruction. -Example for the PDP-11 (incomplete): -.TS -l l. -TOKENS - -const2 = { INT num; } 2 cost(2,300) "$" num . -addr_local = { INT ind; } 2 . -addr_external = { ADDR off; } 2 "$" off. - -regdef2 = { GENREG reg; } 2 "*" reg. -regind2 = { GENREG reg; ADDR off; } 2 off "(" reg ")" . -reginddef2 = { GENREG reg; ADDR off; } 2 "*" off "(" reg ")" . -regconst2 = { GENREG reg; ADDR off; } 2 . -relative2 = { ADDR off; } 2 off . -reldef2 = { ADDR off; } 2 "*" off. -.TE -.PP -Types allowed in the struct are ADDR, INT and all register properties. -The type ADDR means a string and an integer, -which is output as string+integer, -and arithmetic on mixed ADDR and INT is possible. -This is the right mode for anything that can be an -assembler address expression. -The type of the register in the token is strict. -At any assignment of an expression of type register to a token attribute -of type register -.I cgg -will check if the set of possible results from the expression is a subset -of the set of permissible values for the token attribute. -.PP -The cost-field is made up by the word -.I cost -followed by two numbers in parentheses, the size and timecosts -of this token when output in the code file. -If omitted, zero cost is assumed. -While generating code, -.I cg -keeps track of a linear combination of these costs together -with the costs of the instructions itself which we will see later. -The coefficients of this linear combination are influenced -by two things: -.IP 1) -The SIZEFACTOR and TIMEFACTOR constants, -as mentioned above. -.IP 2) -A run time option to -.I cg -that can adjust the time/space tradeoff to all positions -from 100% time to 100% space. -.LP -By supplying different code rules in certain situations -it is possible to get a code generator that can adjust its -code to the need of the moment. -This is probably most useful with small machines, -experience has shown that on the larger micro's and mini's -the difference between time-optimal and space-optimal code -is often small. -.PP -The printformat consists of a list of strings intermixed with -attributes from the token. -Strings are output literally, attributes are printed according -to their type and value. -Tokens without a printformat should never be output, -and -.I cgg -checks for this. -.PP -Notice that tokens need not correspond to addressing modes; -the regconst2 token listed above, -meaning the sum of the contents of the register and the constant, -has no corresponding addressing mode on the PDP-11, -but is included so that a sequence of add constant, load indirect, -can be handled efficiently. -This regconst2 token is needed as part of the path -.DS -REG -> regconst2 -> regind2 -.DE -of which the first and the last "exist" and the middle is needed -only as an intermediate step. -.PP -Tokens with name `LOCAL' or `DLOCAL' are a special case when -register variables are used, this is explained further in the -section on token descriptions. -.NH 2 -Sets -.PP -Usually machines have certain collections of addressing modes that -can be used with certain instructions. -The stack patterns in the table are lists of these collections -and since it is cumbersome to write out these long lists -every time, there is a section here to give names to these -collections. -Please note that it is not forbidden to write out a set -in the remainder of the table, -but for clarity it is usually better not to. -.LP -Example for the PDP-11 (incomplete): -.TS -l l. -SETS - -src2 = GENREG + regdef2 + regind2 + reginddef2 + relative2 + - \h'\w'= 'u'reldef2 + addr_external + const2 + LOCAL + ILOCAL + - \h'\w'= 'u'autodec + autoinc . -dst2 = src2 - ( const2 + addr_external ) . -xsrc2 = src2 + ftoint . -src1 = regdef1 + regind1 + reginddef1 + relative1 + reldef1 . -dst1 = src1 . -src1or2 = src1 + src2 . -src4 = relative4 + regdef4 + DLOCAL + regind4 . -dst4 = src4 . -.TE -Permissible in the set construction are all the usual set operators, i.e. -.IP + -set union -.IP - -set difference -.IP * -set intersection -.PP -Normal operator priorities apply, and parentheses can be -used. -Every token identifier is also a set identifier -denoting the singleton collection of tokens containing -just itself. -Every register property as defined above is also a set -matching all registers with that property. -The standard set identifier ALL denotes the collection of -all tokens. -.NH 2 -Instruction definitions -.PP -In the next part of the table the instructions for the machine -are declared together with information about their operands. -Example for the PDP-11(very incomplete): -.DS -.ta 8 16 24 32 40 48 56 64 -INSTRUCTIONS -/* default cost */ - -cost(2,600) - -/* Normal instructions */ - -adc dst2:rw:cc . -add src2:ro,dst2:rw:cc cost(2,450). -ash src2:ro,REG:rw:cc . -ashc src2:ro,REGPAIR+ODDREG:rw . -asl dst2:rw:cc . -asr dst2:rw:cc . -bhis "bcc" label . - -/* floating point instructions */ - -movf "ldf" fsrc,freg . -movf "stf" freg,fdst . -.DE -As the examples show an instruction definition consists of the name -of the instruction, -optionally followed by an assembler mnemonic in -quotes-default is the name itself-and then -a list of operands, -optionally followed by the cost and then a period. -If the cost is omitted the cost just after the word -INSTRUCTIONS is assumed, -if that is also omitted the cost is zero. -The cost must be known by -.I cg -of course if it has multiple -code generation paths to choose from. -.PP -For each operand we have the set of possible token values, -followed by a qualifier that can be -.IP :ro -signifies that this operand is read only, -so it can be replaced by a register with the same contents -if available. -.IP :rw -signifies that the operand is read-write -.IP :wo -signifies that the operand is write only. -.IP :cc -says that after the instruction is finished, the condition codes -are set to this operand. -If none of the operands have the :cc qualifier set, -.I cg -will assume that condition codes were unaffected -(but see below). -.PP -The first three qualifiers are of course mutually exclusive. -The :ro qualifier does not cause any special action in the current -implementation, and the :wo and :rw qualifiers are treated equal. -It must be recommended however to be precise in the specifications, -since later enhancements to the code generator might use them. -.PP -As the last examples show it is not necessary to give one definition -for an instruction. -There are machines that have very unorthogonal instruction sets, -in fact most of them do, -and it is possible to declare each possible combination -of operands. -The -.I cgg -program will check all uses of the instruction to find out which -one was meant. -.PP -Although not in the PDP-11 example above there is a possibility -to describe instructions that have side effects to registers not -in the operand list. -The only thing possible is to say that the instruction is destructive -to some registers or the condition codes, by following the operand list -with the word -.I kills -and a list of the things destroyed. -Example for some hypothetic accumulator machine: -.DS -add source2:ro kills ACCU :cc . -.DE -.PP -The cost fields in the definitions for tokens and instructions -are added together when generating code. -It depends on the machine at hand whether the costs are orthogonal -enough to make use of both these costs, -in extreme cases every combination of instructions and operands -can be given in this section, -all with their own costs. -.NH 2 -Expressions -.PP -Throughout the rest of the table expressions can be used in some -places. -This section will give the syntax and semantics of expressions. -There are four types of expressions: integer, address, register and undefined. -Really the type register is nonexistent as such, -for each register expression -.I cgg -keeps a set of possible values, -and this set can be seen as the real type. -.PP -Type checking is performed by -.I cgg . -An operator with at least one undefined operand returns undefined except -for the defined() function mentioned below. -An undefined expression is interpreted as FALSE when it is needed -as a truth value. -It is the responsibility of the table writer to ensure no undefined -expressions are ever used as initialisers for token attributes. -This is unfortunately almost impossible to check for -.I cgg -so be careful. -.LP -Basic terms in an expression are -.IP number 16 -A number is a constant of type integer. -Also usable is an identifier defined to a number in the constant -definition section. -.IP """string""" -A string within double quotes is a constant of type address. -All the normal C style escapes may be used within the string. -Also usable is an identifier defined to a string in the constant -definition section. -.IP [0-9][bf] -This must be read as a grep-pattern. -It evaluates to a string that is the label name for the -temporary label meant. -More about this in the section on code rules. -.IP REGIDENT -The name of a register is a constant of type register. -.IP $\fIi\fP -A dollarsign followed by a number is the representation of the argument -of EM instruction \fI\fP. -The type of the operand is dependent on the instruction, -sometimes it is integer, -sometimes it is address. -It is undefined when the instruction has no operand. -Instructions with type-letter w can occur without an operand. -This can be checked in the code rule with the defined() pseudo function. -.br -If it is unimaginable for the operand of the instruction ever to be -something different from a plain integer, the type is integer, -otherwise it is address. -.br -Those who want to know it exactly, the integer instruction types -are the instructions marked with the -type-letters c,f,l,n,o,s,r,w,z in the EM manual. -.br -.I Cg -makes all necessary conversions, -like adding EM_BSIZE to positive arguments of instructions -dealing with locals, -prepending underlines to global names, -converting code labels into a unique representation etc. -Details about this can be found in the section about -machine dependent C code. -.IP %1 -This in general means the token mentioned first in the -stack pattern. -When used inside an expression the token must be a simple register. -Type of this is register. -.IP %1.off -This means attribute "off" of the first stack pattern token. -Type is the same as that of attribute "off". -To use this expression implies a check that all tokens -in the set used have the same attribute in the same place. -.IP %off -This means attribute "off" in the `current' token. -This can only be used when no confusion is possible about which token -was meant, eg. in the optional boolean expressions following token sets -in the move and test rules, in coercions or in the kills section inside -the code rules. -Same check as above. -.IP %1.1 -This is the first subregister of the first token. -Previous comments apply. -.IP %b -A percent sign followed by a lowercase letter -stands for an allocated register. -This is the second allocated register. -.IP %a.2 -The second subregister of the first allocated register. -.PP -All normal C operators apply to integers, -the + operator on addresses behaves as one would expect -and the only operators allowed on register expressions -are == and != . -Furthermore there are some special `functions': -.IP defined(e) 16 -Returns 1 if expression -.I e -is defined, 0 otherwise. -.IP samesign(e1,e2) -Returns 1 if integer expression -.I e1 -and -.I e2 -have the same sign. -.IP sfit(e1,e2) -Returns 1 if integer expression -.I e1 -fits as a signed integer -into a field of -.I e2 -bits, 0 otherwise. -.IP ufit(e1,e2) -Same as above but now for unsigned -.I e1 . -.IP rom($a,n) -Integer expression giving word -.I n -from the \fBrom\fP descriptor -pointed at by EM instruction -number -.I a -in the EM-pattern. -Undefined if that descriptor does not exist. -.IP is_rom($a) -Integer expression indicating whether EM instruction number -.I a -in the EM-pattern refers to ROM. This may be useful for generating -position-independent code with the ROM in read-only memory. -.I Is_rom -enables one to see the difference between ROM references and other data -references. -.IP loww($a) -Returns the lower half of the argument of EM instruction number -.I a . -This is used to split the arguments of a \fBldc\fP instruction. -.IP highw($a) -Same for upper half. -.LP -The next two `functions' are only needed in a table that -implements register variables. -.IP inreg(e) 16 -Returns the status of the local variable with offset -.I e -from the localbase. -Value is an integer, -negative if the local was not allowed as a register -variable, -zero if it was allowed but not assigned to a register, -and the type of the register if it was assigned to a register. -This makes it possible to write -.DS -inreg($1)==reg_pointer -.DE -and similar things. -.IP regvar(e,t) -Type of this is register. -It returns the register the local with offset -.I e -is assigned to. -The table writer guarantees the register is one of type -.I t , -with -.I t -one of reg_any, reg_loop, reg_pointer or reg_float. -If -.I t -is omitted reg_any is assumed. -Undefined if inreg(\fIe\fP)<=0 . -.LP -The next two `functions' are only needed in a table that -uses the top element size information. -.IP topeltsize($a) 16 -Returns the size of the element on top of the EM-stack at the label -identified by $a. This can be used to put the top of the stack in a -register at the moment of an unconditional jump. At an unconditional jump, -the size of the top-element will always look 0. -.IP fallthrough($a) -Returns 1 if the label identified by $a can be reached via fallthrough, 0 -otherwise. -.NH 2 -Token descriptions -.PP -Throughout the rest of the table tokens must be described, -be it as operands of instructions or as stack-replacements. -In all those cases we will speak about a token description. -The possibilities for these will be described here. -.PP -All expressions of type register are token descriptions. -The construct %1 means the token matched first in the stack pattern. -All other token descriptions are those that are built on the spot. -They look like this: -.DS -{ , } -.DE -All expressions are type-checked by -.I cgg , -and the number of initializers is also checked. -.PP -A special case of the last token descriptions occurs when -the token name is `LOCAL' or `DLOCAL' and the table uses register -variables. The first token attribute then must be of type integer and -the token description is automagically replaced by the register chosen -if the LOCAL (wordsize) or DLOCAL (twice the wordsize) was assigned -to a register. -.NH 2 -Code rules -.PP -The largest section of the tables consists of the code generation rules. -They specify EM patterns, stack patterns, code to be generated etc. -Broadly the syntax is -.DS L -code rule : EM-part code-part -EM-part : EM-pattern | procedure-heading -code-part : code-description | procedure-call -code-description : stackpattern kills allocates generates yields leaving -.DE -Ignoring the "procedure"-part for now, the description for the EM-pattern -and the code-description follows. -Almost everything here is optional, the minimum code rule -is: -.DS -pat nop -.DE -that will simply throw away -.I nop -instructions. -.NH 3 -The EM pattern -.PP -The EM pattern consists of a list of EM mnemonics -preceded by the word -.I pat -optionally followed by a boolean expression. -Examples: -.DS -pat \fBloe\fP -.DE -will match a single \fBloe\fP instruction, -.DS -pat \fBloc\fP \fBloc\fP \fBcif\fP $1==2 && $2==8 -.DE -is a pattern that will match -.DS -\fBloc\fP 2 -\fBloc\fP 8 -\fBcif\fP -.DE -and -.DS -pat \fBlol\fP \fBinc\fP \fBstl\fP $1==$3 -.DE -will match for example -.DS -.ta 10m 20m 30m 40m 50m 60m -\fBlol\fP 6 \fBlol\fP -2 \fBlol\fP 4 -\fBinc\fP \fBinc\fP but \fInot\fP \fBinc\fP -\fBstl\fP 6 \fBstl\fP -2 \fBstl\fP -4 -.DE -A missing boolean expression evaluates to TRUE. -.PP -The code generator will match the longest EM pattern on every occasion, -if two patterns of the same length match the first in the table will be chosen, -while all patterns of length greater than or equal to three are considered -to be of the same length. -This rule of three is an unfortunate implementation dependent restriction, -but patterns longer than three EM instructions are luckily not needed -too often. -.PP -The EM mnemonic may also be the pseudo-instruction \fBlab\fP, which matches -a label. Its argument can be used in testing on topeltsize and -fallthrough. When this pattern is specified, the label should be defined -explicitly with a -.I labeldef -statement. -.PP -Following the EM-pattern there may be more than one code -rule, -.I cg -will choose using heuristics and the cost -information provided with the instruction and token -definitions. -Owing to parsing reasons of the table, the word -.I with -(see below) -is mandatory when there are more code rules attached to one -EM-pattern. -The stack pattern may be empty however. -.NH 3 -The stack pattern -.PP -The optional stack pattern is a list of token sets preceded by the word -.I with . -The token sets are usually represented by set identifiers for clarity. -No boolean expression is allowed here. -The first expression is the one that matches the top of the stack. -.PP -If the pattern is followed by the word STACK -it only matches if there is nothing -else on the fake stack, -and the code generator will stack everything not matched at the start -of the rule. -.PP -The pattern can be preceded with the word -.I exact -following the -.I with -that tells the code generator not to try to coerce to the pattern -but only to use it when it is already present on the fake stack. -There are two reasons for this construction, -correctness and speed. -It is needed for correctness when the pattern contains a register -that is not transparent when data is moved through it. -.LP -Example: on the PDP-11 the shortest code for -.DS -\fBlae\fP a -\fBloi\fP 8 -\fBlae\fP b -\fBsti\fP 8 -.DE -is -.DS -movf _a,fr0 -movf fr0,_b -.DE -if the floating point processor is in double -precision mode and fr0 is free. -Unfortunately this is not correct since a trap can occur on certain -kinds of data. -This could happen if there was a stack pattern for \fBsti\fP\ 8 -like this: -.DS -with DBLREG -.DE -The code generator would then find that coercing the 8-byte global _a -to a floating point register and then storing it to _b was the cheapest, -if the space/time knob was turned far enough to space. -This can be prevented by changing the stack pattern to -.DS -with exact DBLREG -.DE -It is unfortunate that the type information is no longer present, -since if _a really is a floating point number the move could be -made without error. -.PP -The second reason for the -.I exact -construct is speed. -When the code generator has a long list of possible stack patterns -for one EM pattern it can waste much time trying to find coercions -to all of them, while the mere presence of such a long list -indicates that the table writer has given many special cases. -Prepending all the special cases by -.I exact -will stop the code generator from trying to find things -that either cannot be done, -or are too expensive anyway. -.PP -So in general it is wise to prepend all stack patterns that -cannot be made by coercions with -.I exact . -.PP -Using both -.I exact -and STACK in the stack pattern has the effect that the rule will -only be taken if there is nothing else on the fake stack. -.NH 3 -The kills part -.PP -The optional kills part describes certain tokens -that should neither remain on -the fake stack, nor remembered as contents of registers. -This is usually only required with store operations. -The entire fake stack, except for the part matched in the stack pattern, -is searched for tokens matching the expression and they are copied -to the real stack. -Every register that contains the token is marked as empty. -.PP -Syntax is -.DS -kills -thing to kill : token set optionally followed by boolean expression -.DE -Example: -.DS -kills regind2 %reg != lb || %off == $1 -.DE -is a kills part used for example in the \fBinl\fP or \fBstl\fP code rule. -It removes all register offsetted tokens where the register is not the -localbase plus the local in which the store is done. -The necessity for this can be seen from the following example: -.DS -\fBlol\fP 4 -\fBinl\fP 4 -\fBstl\fP 6 -.DE -Without a proper kills part in the rule for \fBinl\fP code would -be generated as here -.DS -inc 4(r5) -mov 4(r5),6(r5) -.DE -so local 6 would be given the new value of local 4 instead of the old -as the EM code prescribed. -.PP -When generating code for an EM-instruction like -.B sti -it is necessary to write a line in the table like -.DS -kills all_except_constant_or_register -.DE -where the long identifier is a set containing all tokens -that can be the destination of some random indirect store. -These indirect stores are the main reason to prevent this -.I kills -line to be deduced automatically by -.I cgg . -.PP -When generating something like a branch instruction it -might be needed to empty the fake stack completely. -This can of course be done with -.DS -kills ALL -.DE -or by ending the stack pattern with the word STACK, -if the stack pattern does not start with -.I exact . -The latter does not erase the contents of registers. -.PP -It is unfortunate that this part is still present in the table -but it is too much for now to let the -.I cgg -program discover what rules ruin what kind of tokens. -Maybe some day ..... -.NH 3 -The allocates part -.PP -The optional register allocation part describes the registers needed. -Syntax is -.DS -uses -.DE -where itemlist is a list of three kinds of things: -.IP 1) -.I reusing -< a token description >, for example %1. -.br -This will instruct the code generator that all registers -contained in this token can be reused if they are not used -in another token on the fakestack, -so that they are available for allocation in this -.I uses -line -if they were only used in that token. -See example below. -.IP 2) -a register property. -.br -This will allocate a register with that property, -that is marked as empty at this point. -Look ahead can be performed if there is more than one register available. -.IP 3) -a register property with initialization. -.br -This will allocate the register as in 2) but will also -initialize it. -This eases the task of the code generator because it can -find a register already filled with the right value -if it exists. -.LP -Examples: -.DS -uses ODDREG -.DE -will allocate an odd register, while -.DS -uses REG={regind2,lb,$1} -.DE -will allocate a register while simultaneously filling it with -the asked value. -.br -Inside the coercion from xsrc2 to REG in the PDP-11 table -the following line can be found. -.DS -uses reusing %1, REG=%1 -.DE -This tells the code generator that registers contained in %1 can be used -again and asks to fill the register allocated with %1. -So if %1={regind2,r3,"4"} and r3 is not in use elsewhere on the fake stack -the following code might be generated. -.DS -mov 4(r3),r3 -.DE -In the rest of the line the registers allocated can be named by -%a and %b.1,%b.2, i.e. with lower case letters -in order of allocation. -.NH 3 -The generates part -.PP -Code to be generated, also optionally, is specified as -the word -.I gen -followed by a list of items of the following kind: -.IP 1) -An instruction name followed by a comma-separated -list of token descriptions. -.I Cgg -will search the instruction definitions for the machine to find a suitable -instruction. -At code generation time the assembler name of the -instruction will be output followed by a space, -followed by a comma separated list of tokens. -.br -In the table an instruction without operands must be -followed by a period. -The author of -.I cgg -could not get -.I yacc -to accept his syntax without it. -Sorry about this. -.IP 2) -a -.I move -call. -This has the following syntax: -.DS -move , -.DE -Moves are handled specially since that enables the code generator -to keep track of register contents. -Example: -.DS -move r3,{regind2,lb,$1} -.DE -will generate code to move r3 to $1(r5) except when -r3 already was a copy of $1(r5). -Then the code will be omitted. -The rules describing how to move things to each other -can be found in the move definitions section described below. -.IP 3) -For machines that have condition codes, -which alas most of them do, -there are provisions to remember condition code settings -and prevent needless testing. -To set the condition code to a token put in the code the following call: -.DS -test -.DE -This will generate a test if the condition codes -were not already set to that token. -The rules describing how to test things -can be found in the test definitions section described below. -See also the :cc qualifier that can be used at instruction -definition time. -.IP 4) -The -.I return -statement. -Only used when register variables are in use. -This statement causes a call to the machine dependent -C-routine -.I regreturn . -Explanation of this must wait for the description of the -file mach.c below. -.IP 5) -The -.I labeldef -statement. Its only argument should be that of the -.I lab -pseudo-instruction. This is needed to generate local labels when the -top element size information is used. It takes the form -.DS - labeldef $i -.DE -.IP 6) -A temporary label of the form : may be placed here. -Expressions of the form [0-9][bf] in this code rule -generate the same string as is used for this label. -The code generator system could probably easily be changed -to make this work for assemblers that do not support this -type of label by generating unique labels itself. -Implementation of this is not contemplated at the moment. -.NH 3 -Stack replacement -.PP -The optional stack replacement is a possibly empty list -of tokens to be pushed onto the fake stack. -It start with the word -.I yields , -and is followed by a list of token descriptions. -.PP -All tokens matched by the stack pattern at the beginning of the code rule -are first removed and their registers deallocated. -Items are pushed in the order of appearance. -This means that the last item will be on the top of the -stack after the push. -So if the stack pattern contained two sets -and they must be pushed back unchanged, -they have to be specified as stack replacement -.DS -yields %2 %1 -.DE -and not the other way around. -This is known to cause errors in tables so watch out for -this! -.NH 3 -EM replacement -.PP -In exceptional cases it might be useful to leave part of an EM-pattern -undone. -For example, a \fBsdl\fP instruction might -be split into two \fBstl\fP instructions -when there is no 4-byte quantity on the stack. -The EM replacement part allows -one to express this. -It is activated by the word -.I leaving . -.LP -Example: -.DS -leaving \fBstl\fP $1 \fBstl\fP $1+2 -.DE -The instructions are inserted in the stream so that they can match -the first part of a pattern in the next step. -Note that since the code generator traverses the EM instructions in a strict -linear fashion, -it is impossible to let the EM replacement match later parts of a pattern. -So if there is a pattern -.DS -\fBloc\fP \fBstl\fP $1==0 -.DE -and the input is -.DS -\fBloc\fP 0 \fBsdl\fP 4 -.DE -the \fBloc\fP\ 0 will be processed first, -then the \fBsdl\fP might be split into two \fBstl\fP's but the pattern -cannot match now. -.NH 3 -Examples -.PP -A list of examples for the PDP-11 is given here. -Far from being complete it gives examples of most kinds -of instructions. -.DS -.ta 7.5c -pat loc yields {const2, $1} - -pat ldc yields {const2, loww($1)} {const2, highw($1)} -.DE -These simple patterns just push one or more tokens onto the fake stack. -.DS -.ta 7.5c -pat lof -with REG yields {regind2,%1,$1} -with exact regconst2 yields {regind2,%1.reg,$1+%1.off} -with exact addr_external yields {relative2,$1+%1.off} -with exact addr_local yields {LOCAL, %1.ind + $1,2} -.DE -This pattern shows the possibility to do different things -depending on the fake stack contents, -there are some rules for some specific cases plus a general rule, -not preceded by -.I exact -that can always be taken after a coercion, -if necessary. -.DS -.ta 7.5c -pat lxl $1>3 -uses REG={LOCAL, SL, 2}, REG={const2,$1-1} -gen 1: - move {regind2,%a, SL},%a - sob %b,{label,1b} yields %a -.DE -This rule shows register allocation with initialisation, -and the use of a temporary label. -The constant SL used here is defined to be the offset from lb -of the static link, -that is pushed by the Pascal compiler as the last argument of -a function. -.DS -.ta 7.5c -pat stf -with regconst2 xsrc2 - kills allexeptcon - gen move %2,{regind2,%1.reg,$1+%1.off} -with addr_external xsrc2 - kills allexeptcon - gen move %2,{relative2,$1+%1.off} -.DE -This rule shows the use of a -.I kills -part in a store instruction. -The set allexeptcon contains all tokens that can be the destination -of an indirect store. -.DS -.ta 7.5c -pat sde -with exact FLTREG - kills posextern - gen move %1,{relative4,$1} -with exact ftolong - kills posextern - gen setl. - movfi %1.reg,{relative4,$1} - seti. -with src2 src2 - kills posextern - gen move %1, {relative2, $1 } - move %2, {relative2, $1+2} -.DE -The rule for -.B sde -shows the use of the -.I exact -clause in both qualities, -the first is for correctness, -the second for efficiency. -The third rule is taken by default, -resulting in two separate stores, -nothing better exists on the PDP-11. -.DS -.ta 7.5c -pat sbi $1==2 -with src2 REG - gen sub %1,%2 yields %2 -with exact REG src2-REG - gen sub %2,%1 - neg %1 yields %1 -.DE -This rule for -.I sbi -has a normal first part, -and a hand optimized special case as its second part. -.DS -.ta 7.5c -pat mli $1==2 -with ODDREG src2 - gen mul %2,%1 yields %1 -with src2 ODDREG - gen mul %1,%2 yields %2 -.DE -This shows the general property for rules with commutative -operators, -heuristics or look ahead will have to decide which rule is the best. -.DS -.ta 7.5c -pat loc sli $1==1 && $2==2 -with REG -gen asl %1 yields %1 -.DE -A simple rule involving a longer EM-pattern, -to make use of a specialized instruction available. -.DS -.ta 7.5c -pat loc loc cii $1==1 && $2==2 -with src1or2 -uses reusing %1,REG -gen movb %1,%a yields %a -.DE -A somewhat more complicated example of the same. -Note the -.I reusing -clause. -.DS -.ta 7.5c -pat loc loc loc cii $1>=0 && $2==2 && $3==4 - leaving loc $1 loc 0 -.DE -Shows a trivial example of EM-replacement. -This is a rule that could be done by the -peephole optimizer, -if word order in longs was defined in EM. -On a `big-endian' machine the two replacement -instructions would be the other way around. -.DS -.ta 7.5c -pat and $1==2 -with const2 REG - gen bic {const2,~%1.num},%2 yields %2 -with REG const2 - gen bic {const2,~%2.num},%1 yields %1 -with REG REG - gen com %1 - bic %1,%2 yields %2 -.DE -Shows the way to handle the absence -of an -.I and -instruction. -.DS -.ta 7.5c -pat set $1==2 -with REG -uses REG={const2,1} -gen ash %1,%a yields %a -.DE -Shows the building of a word-size set. -.DS -.ta 7.5c -pat lae aar $2==2 && rom($1,3)==1 && rom($1,1)==0 - leaving adi 2 - -pat lae aar $2==2 && rom($1,3)==1 && rom($1,1)!=0 - leaving adi 2 adp 0-rom($1,1) -.DE -Two rules showing the use of the rom pseudo function, -and some array optimalisation. -.DS -.ta 7.5c -pat bra -with STACK -gen jbr {label, $1} -.DE -A simple jump. -The stack pattern guarantees that everything will be stacked -before the jump is taken. -.DS -pat lab topeltsize($1)==2 && !fallthrough($1) -gen labeldef $1 yields r0 - -pat lab topeltsize($1)==2 && fallthrough($1) -with src2 -gen move %1,r0 - labeldef $1 yields r0 - -pat lab topeltsize($1)!=2 -with STACK -kills all -gen labeldef $1 - -pat bra topeltsize($1)==2 -with src2 STACK - gen move %1,d0 - jbr {label, $1} - -pat bra topeltsize($1)!=2 -with STACK - gen jbr {label, $1} -.DE -The combination of these patterns make sure that the top of the EM-stack will -be in register r0 whenever necessary. The top element size mechanism will -also show a size of 0 whenever a conditional branch to a label -occurs. This saves a lot of patterns and hardly decreases performance. -When the same register is used to return function results, this can save -many moves to and from the stack. -.DS -.ta 7.5c -pat cal -with STACK -gen jsr pc,{label, $1} -.DE -A simple call. -Same comments as previous rule. -.DS -.ta 7.5c -pat lfr $1==2 yields r0 -pat lfr $1==4 yields r1 r0 -.DE -Shows the return area conventions of the PDP-11 table. -At this point a reminder: -the -.B asp -instruction, and some other instructions must leave -the function return area intact. -See the defining document for EM for exact information. -.DS -.ta 7.5c -pat ret $1==0 -with STACK -gen mov lb,sp - rts pc -.DE -This shows a rule for -.B ret -in a table not using register variables. -In a table with register variables the -.I gen -part would just contain -.I return . -.DS -.ta 7.5c -pat blm -with REG REG -uses REG={const2,$1/2} -gen 1: - mov {autoinc,%2},{autoinc,%1} - sob %a,{label,1b} -.DE -This rule for -.B blm -already uses three registers of the same type. -.I Cgg -contains code to check all rules -to see if they can be applied from an empty fakestack. -It uses the marriage thesis from Hall, -a thesis from combinatorial mathematics, -to accomplish this. -.DS -.ta 7.5c -pat exg $1==2 -with src2 src2 yields %1 %2 -.DE -This rule shows the exchanging of two elements on the fake stack. -.NH 2 -Code rules using procedures -.PP -To start this section it must be admitted at once that the -word procedure is chosen here mainly for its advertising -value. -It more resembles a glorified goto but this of course can -not be admitted in the glossy brochures. -This document will continue to use the word -procedure. -.PP -The need for procedures was felt after the first version of -the code generator system was made, -mainly because of conditional instructions. -Often the code sequences for -.B tlt , -.B tle , -.B teq , -.B tne , -.B tge -and -.B tgt -were identical apart from one opcode in the code rule. -The code sequence had to be written out six times however. -Not only did this increase the table size and bore the -table writer, it also led to errors when changing the table -since it happened now and then that five out of six -rules were changed. -.PP -In general the procedures in this table format are used to -keep one copy instead of six of the code rules for all -sorts of conditionals and one out of two for things like -increment/decrement. -.PP -And now the syntax, first the procedure definition, -which must indeed be defined before the call because -.I cgg -is one-pass. -The procedure heading replaces the EM-pattern in a code rule -and looks like this: -.DS -proc -.DE -The identifier is used in later calls and the example must -be used if expressions like $1 are used in the code rule. -.DS - : example -.DE -so an example looks just like an EM-pattern, but without -the optional boolean expression. -The example is needed to know the types of $1 expressions. -The current version of -.I cgg -does not check correctness of the example, so be careful. -.PP -A procedure is called with string-parameters, -that are assembler opcodes. -They can be accessed by appending the string `[]' -to a table opcode, where is the parameter number. -The string `*' can be used as an equivalent for `[1]'. -Just in case this is not clear, here is an example for -a procedure to increment/decrement a register. -.DS -.ta 7.5c -incop REG:rw:cc . /* in the INSTRUCTIONS part of course */ - -proc incdec -with REG -gen incop* %1 yields %1 -.DE -The procedure is called with parameter "inc" or "dec". -.PP -The procedure call is given instead of the code-part of the -code rule and looks like this -.DS -call '(' ')' -.DE -which leads to the following large example: -.DS -.ta 7.5c -proc bxx example beq -with src2 src2 STACK -gen cmp %2,%1 - jxx* {label, $1} - -pat blt call bxx("jlt") -pat ble call bxx("jle") -pat beq call bxx("jeq") -pat bne call bxx("jne") -pat bgt call bxx("jgt") -pat bge call bxx("jge") -.DE -.NH 2 -Move definitions -.PP -We now jump back to near the beginning of the table -where the move definitions are found. -The move definitions directly follow the instruction -definitions. -.PP -In certain cases a move is called for, -either explicitly when a -.I move -instruction is used in a code rule, -or implicitly in a register initialization. -The different code rules possible to move data from one -spot to another are described here. -Example for the PDP-11: -.DS -.ta 8 16 24 32 40 48 56 64 -MOVES - -from const2 %num==0 to dst2 -gen clr %2 - -from src2 to dst2 -gen mov %1,%2 - -from FLTREG to longf4-FLTREG -gen movfo %1,%2 - -from longf4-FLTREG to FLTREG -gen movof %1,%2 -.DE -The example shows that the syntax is just -.DS -from to gen -.DE -Source and destination are a token set, optionally followed by -a boolean expression. -The code generator will take the first move that matches, -whenever a move is necessary. -.I Cgg -checks whether all moves called for in the table are present. -.NH 2 -Test definitions -.PP -This part describes the instructions necessary to set the condition codes -to a certain token. -These rules are needed when the -.I test -instruction is used in code rules. -Example for the PDP-11: -.DS -.ta 8 16 24 32 40 48 56 64 -TESTS - -to test src2 -gen tst %1 -.DE -So syntax is just -.DS -to test gen -.DE -Source is the same thing as in the move definition. -.I Cgg -checks whether all tests called for in the table are present. -.NH 2 -Some explanation about the rules behind coercions -.PP -A central part in code generation is taken by the -.I coercions . -It is the responsibility of the table writer to provide -all necessary coercions so that code generation can continue. -The minimal set of coercions are -the coercions to unstack every token expression, -in combination with the rules to stack every token. -It should not be possible to smuggle a table through -.I cgg -without these basic set available. -.PP -If these are present the code generator can always make the necessary -transformations by stacking and unstacking. -Of course for code quality it is usually best to provide extra coercions -to prevent this stacking to take place. -.I Cg -discriminates three types of coercions: -.IP 1) -Unstacking coercions. -This category can use the -.I uses -clause in its code. -.IP 2) -Splitting coercions, these are the coercions that split -larger tokens into smaller ones. -.IP 3) -Transforming coercions, these are the coercions that transform -a token into another of the same size. -This category can use the -.I uses -clause in its code. -.PP -When a stack configuration does not match the stack pattern -.I coercions -are searched for in the following order: -.IP 1) -First tokens are split if necessary to get their sizes right. -.IP 2) -Then transforming coercions are found that will make the pattern match. -.IP 3) -Finally if the stack pattern is longer than the fake stack contents -unstacking coercions will be used to fill up the pattern. -.PP -At any point, when coercions are missing so code generation could not -continue, the offending tokens are stacked. -.NH 2 -Stack definitions -.PP -The next part of the table defines the stacking rules for the machine. -Each token that may reside on the fake stack must have a rule attached -to put it on the real stack. -Example for the PDP-11: -.DS -.ta 8 16 24 32 40 48 56 64 -STACKINGRULES - -from const2 %num==0 to STACK -gen clr {autodec,sp} - -from src2 to STACK -gen mov %1,{autodec,sp} - -from regconst2 to STACK -gen mov %1.reg,{autodec,sp} - add {addr_external, %1.off},{regdef2,sp} - -from DBLREG to STACK -gen movf %1,{autodec,sp} - -from FLTREG to STACK -gen movfo %1,{autodec,sp} - -from regind8 to STACK -uses REG -gen move %1.reg,%a - add {addr_external, 8+%1.off},%a - mov {autodec, %a},{autodec,sp} - mov {autodec, %a},{autodec,sp} - mov {autodec, %a},{autodec,sp} - mov {autodec, %a},{autodec,sp} -.DE -.PP -These examples should be self-explanatory, except maybe for the last one. -It is possible inside a stacking-rule to use a register. -Since however the stacking might also take place at a moment -when no registers are free, it is mandatory that for each token -there is one stackingrule that does not use a register. -The code generator uses the first rule possible. -.NH 2 -Coercions -.PP -The next part of the table defines the coercions that are possible -on the defined tokens. -Example for the PDP-11: -.DS -.ta 7.5c -COERCIONS - -from STACK -uses REG -gen mov {autoinc,sp},%a yields %a - -from STACK -uses DBLREG -gen movf {autoinc,sp},%a yields %a - -from STACK -uses REGPAIR -gen mov {autoinc,sp},%a.1 - mov {autoinc,sp},%a.2 yields %a -.DE -These three coercions just deliver a certain type -of register by popping it from the real stack. -.DS -.ta 7.5c -from LOCAL yields {regind2,lb,%1.ind} - -from DLOCAL yields {regind4,lb,%1.ind} - -from REG yields {regconst2, %1, 0} -.DE -These three are zero-cost rewriting rules. -.DS -.ta 7.5c -from regconst2 %1.off==1 -uses reusing %1,REG=%1.reg -gen inc %a yields %a - -from regconst2 -uses reusing %1,REG=%1.reg -gen add {addr_external, %1.off},%a yields %a - -from addr_local -uses REG -gen mov lb,%a - add {const2, %1.ind},%a yields %a -.DE -The last three are three different cases of the coercion -register+constant to register. -Only in the last case is it always necessary to allocate -an extra register, -since arithmetic on the localbase is unthinkable. -.DS -.ta 7.5c -from xsrc2 -uses reusing %1, REG=%1 yields %a - -from longf4 -uses FLTREG=%1 yields %a - -from double8 -uses DBLREG=%1 yields %a - -from src1 -uses REG={const2,0} -gen bisb %1,%a yields %a -.DE -These examples show the coercion of different -tokens to a register of the needed type. -The last one shows the trouble needed on a PDP-11 to -ensure bytes are not sign-extended. -In EM it is defined that the result of a \fBloi\fP\ 1 -instruction is an integer in the range 0..255. -.DS -.ta 7.5c -from REGPAIR yields %1.2 %1.1 - -from regind4 yields {regind2,%1.reg,2+%1.off} - {regind2,%1.reg,%1.off} - -from relative4 yields {relative2,2+%1.off} - {relative2,%1.off} -.DE -The last examples are splitting rules. -.PP -The examples show that -all coercions change one token on the fake stack into one or more others, -possibly generating code. -The STACK token is supposed to be on the fake stack when it is -really empty, and can only be changed into one other token. -.NH 1 -The files mach.h and mach.c -.PP -The table writer must also supply two files containing -machine dependent declarations and C code. -These files are mach.h and mach.c. -.NH 2 -Types in the code generator -.PP -Three different types of integer coexist in the code generator -and their range depends on the machine at hand. -They are defined depending on the Target EM_WSIZE, or TEM_WSIZE, -and TEM_PSIZE. -The type 'int' is used for things like counters that won't require -more than 16 bits precision. -The type 'word' is used among others to assemble datawords and -is of type 'long'. -The type 'full' is used for addresses and is of type 'long' if -TEM_WSIZE>2 or TEM_PSIZE>2. -.PP -In macro and function definitions in later paragraphs implicit typing -will be used for parameters, that is parameters starting with an 's' -will be of type string, and the letters 'i','w','f' will stand for -int, word and full respectively. -.NH 2 -Global variables to work with -.PP -Some global variables are present in the code generator -that can be manipulated by the routines in mach.h and mach.c. -.LP -The declarations are: -.DS L -.ta 20 -FILE *codefile; /* code is emitted on this stream */ -word part_word; /* words to be output are put together here */ -int part_size; /* number of bytes already put in part_word */ -char str[]; /* Last string read in */ -long argval; /* Last int read and kept */ -.DE -.NH 2 -Macros in mach.h -.PP -In the file mach.h a collection of macros is defined that have -to do with formatting of assembly code for the machine at hand. -Some of these macros can of course be left undefined in which case the -macro calls are left in the source and will be treated as -function calls. -These functions can then be defined in \fImach.c\fR. -.PP -The macros to be defined are: -.IP ex_ap(s) 16 -Must print the magic incantations that will mark the symbol \fI\fR -to be exported to other modules. -This is the translation of the EM \fBexa\fP and \fBexp\fP instructions. -.IP in_ap(s) -Same to import the symbol. -Translation of \fBina\fP and \fBinp\fP. -.IP newplb(s) -Must print the definition of procedure label \fIs\fR. -If left undefined the newilb() macro is used instead. -.IP newilb(s) -Must print the definition of instruction label \fIs\fR. -.IP newdlb(s) -Must print the definition of data label \fIs\fR. -.IP dlbdlb(s1,s2) -Must define data label -.I s1 -to be equal to -.I s2 . -.IP newlbss(s,f) -Must declare a piece of memory initialized to BSS_INIT(see below) -of length -.I f -and with label -.I s . -.IP cst_fmt -Format to be used when converting constant arguments of -EM instructions to string. -Argument to be formatted will be 'full'. -.IP off_fmt -Format to be used for integer part of label+constant, -argument will be 'full'. -.IP fmt_ilb(ip,il,s) -Must use the numbers -.I ip -and -.I il -that are a procedure number -and a label number respectively and copy a string to -.I s -that must be unique for that combination. -This procedure is optional, if it is not given ilb_fmt -must be defined as below. -.IP ilb_fmt -Format to be used for creation of unique instruction labels. -Arguments will be a unique procedure number (int) and the label -number (int). -.IP dlb_fmt -Format to be used for printing numeric data labels. -Argument will be 'int'. -.IP hol_fmt -Format to be used for generation of labels for -space generated by a -.B hol -pseudo. -Argument will be 'int'. -.IP hol_off -Format to be used for printing of the address of an element in -.B hol -space. -Arguments will be the offset in the -.B hol -block (word) and the number of the -.B hol -(int). -.IP con_cst(w) -Must generate output that will assemble into one machine word. -.IP con_ilb(s) -Must generate output that will put the address of the instruction label -into the datastream. -.IP con_dlb(s) -Must generate output that will put the address of the data label -into the datastream. -.IP fmt_id(sf,st) -Must take the string in -.I sf -that is a nonnumeric global label, and transform it into a copy made to -.I st -that will not collide with reserved assembler words and system labels. -This procedure is optional, if it is not given the id_first macro is used -as defined below. -.IP id_first -Must be a character. -This is prepended to all nonnumeric global labels if their length -is shorter than the maximum allowed(currently 8) or if they already -start with that character. -This is to avoid conflicts of user labels with system labels. -.IP BSS_INIT -Must be a constant. -This is the value filled in all the words not initialized explicitly. -This is loader and system dependent. -If omitted no initialization is assumed. -.NH 3 -Example mach.h for the PDP-11 -.DS L -.ta 4c -#define ex_ap(y) fprintf(codefile,"\et.globl %s\en",y) -#define in_ap(y) /* nothing */ - -#define newplb(x) fprintf(codefile,"%s:\en",x) -#define newilb(x) fprintf(codefile,"%s:\en",x) -#define newdlb(x) fprintf(codefile,"%s:\en",x) -#define dlbdlb(x,y) fprintf(codefile,"%s=%s\en",x,y) -#define newlbss(l,x) fprintf(codefile,"%s:.=.+%d.\en",l,x); - -#define cst_fmt "$%d." -#define off_fmt "%d." -#define ilb_fmt "I%x_%x" -#define dlb_fmt "_%d" -#define hol_fmt "hol%d" - -#define hol_off "%ld.+hol%d" - -#define con_cst(x) fprintf(codefile,"%ld.\en",x) -#define con_ilb(x) fprintf(codefile,"%s\en",x) -#define con_dlb(x) fprintf(codefile,"%s\en",x) - -#define id_first '_' -#define BSS_INIT 0 -.DE -.NH 2 -Functions in mach.c -.PP -In mach.c some functions must be supplied, -mostly manipulating data resulting from pseudoinstructions. -The specifications are given here, -implicit typing of parameters as above. -.IP - -con_part(isz,word) -.br -This function must manipulate the globals -part_word and part_size to append the isz bytes -contained in word to the output stream. -If part_word is full, i.e. part_size==TEM_WSIZE -the function part_flush() may be called to empty the buffer. -This is the function that must go through the trouble of -doing byte order in words correct. -.IP - -con_mult(w_size) -.br -This function must take the string str[] and create an integer -from the string of size w_size and generate code to assemble global -data for that integer. -Only the sizes for which arithmetic is implemented need be -handled, -so if 200-byte integer division is not implemented, -200-byte integer global data don't have to be implemented. -Here one must take care of word order in long integers. -.IP - -con_float() -.br -This function must generate code to assemble a floating -point number of which the size is contained in argval -and the ASCII representation in str[]. -.IP - -prolog(f_nlocals) -.br -This function is called at the start of every procedure. -Function prolog code must be generated, -and room made for local variables for a total of f_nlocals bytes. -.IP - -mes(w_mesno) -.br -This function is called when a -.B mes -pseudo is seen that is not handled by the machine independent part. -The example below shows all one probably have to know about that. -.IP - -segname[] -.br -This is not a function, -but an array of four strings. -These strings are put out whenever the code generator -switches segments. -Segments are SEGTXT, SEGCON, SEGROM and SEGBSS in that order. -.PP -If register variables are used in a table, the program -.I cgg -will define the word REGVARS during compilation of the sources. -So the following functions described here should be bracketed -by #ifdef REGVARS and #endif. -.IP - -regscore(off,size,typ,freq,totyp) long off; -.br -This function should assign a score to a register variable, -the score should preferably be the estimated number of bytes -gained when it is put in a register. -Off and size are the offset and size of the variable, -typ is the type, that is reg_any, reg_pointer, reg_loop or reg_float. -Freq is the count of static occurrences, and totyp -is the type of the register it is planned to go into. -.br -Keep in mind that the gain should be net, that is the cost for -register save/restore sequences and the cost of initialisation -in the case of parameters should already be included. -.IP - -i_regsave() -.br -This function is called at the start of a procedure, just before -register saves are done. -It can be used to initialise some variables if needed. -.IP - -f_regsave() -.br -This function is called at end of the register save sequence. -It can be used to do the real saving if multiple register move -instructions are available. -.IP - -regsave(regstr,off,size) char *regstr; long off; -.br -Should either do the real saving or set up a table to have -it done by f_regsave. -Note that initialisation of parameters should also be done, -or planned here. -.IP - -regreturn() -.br -Should restore saved registers and return. -The function result is already in the function return area by now. -.NH 3 -Example mach.c for the PDP-11 -.PP -As an example of the sort of code expected, -the mach.c for the PDP-11 is presented here. -.DS L -.ta 0.5i 1i 1.5i 2i 2.5i 3i 3.5i 4i 4.5i -/* - * machine dependent back end routines for the PDP-11 - */ - -con_part(sz,w) register sz; word w; { - - while (part_size % sz) - part_size++; - if (part_size == 2) - part_flush(); - if (sz == 1) { - w &= 0xFF; - if (part_size) - w <<= 8; - part_word |= w; - } else { - assert(sz == 2); - part_word = w; - } - part_size += sz; -} - -con_mult(sz) word sz; { - long l; - - if (sz != 4) - fatal("bad icon/ucon size"); - l = atol(str); - fprintf(codefile,"\et%o;%o\en",(int)(l>>16),(int)l); -} - -con_float() { - double f; - register short *p,i; - - /* - * This code is correct only when the code generator is - * run on a PDP-11 or VAX-11 since it assumes native - * floating point format is PDP-11 format. - */ - - if (argval != 4 && argval != 8) - fatal("bad fcon size"); - f = atof(str); - p = (short *) &f; - i = *p++; - if (argval == 8) { - fprintf(codefile,"\et%o;%o;",i,*p++); - i = *p++; - } - fprintf(codefile,"\et%o;%o\en",i,*p++); -} - -#ifdef REGVARS - -char Rstring[10]; -full lbytes; -struct regadm { - char *ra_str; - long ra_off; -} regadm[2]; -int n_regvars; - -regscore(off,size,typ,score,totyp) long off; { - - /* - * This function is full of magic constants. - * They are a result of experimentation. - */ - - if (size != 2) - return(-1); - score -= 1; /* allow for save/restore */ - if (off>=0) - score -= 2; - if (typ==reg_pointer) - score *= 17; - else if (typ==reg_loop) - score = 10*score+50; /* Guestimate */ - else - score *= 10; - return(score); /* 10 * estimated # of words of profit */ -} - -i_regsave() { - - Rstring[0] = 0; - n_regvars=0; -} - -f_regsave() { - register i; - - if (n_regvars==0 || lbytes==0) { - fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); - if (lbytes == 2) - fprintf(codefile,"tst -(sp)\en"); - else if (lbytes!=0) - fprintf(codefile,"sub $0%o,sp\en",lbytes); - for (i=0;i6) { - fprintf(codefile,"mov $0%o,r0\en",lbytes); - fprintf(codefile,"jsr r5,PR%s\en",Rstring); - } else { - fprintf(codefile,"jsr r5,PR%d%s\en",lbytes,Rstring); - } - } - for (i=0;i=0) - fprintf(codefile,"mov 0%lo(r5),%s\en",regadm[i].ra_off, - regadm[i].ra_str); -} - -regsave(regstr,off,size) char *regstr; long off; { - - fprintf(codefile,"/ Local %ld into %s\en",off,regstr); - strcat(Rstring,regstr); - regadm[n_regvars].ra_str = regstr; - regadm[n_regvars].ra_off = off; - n_regvars++; -} - -regreturn() { - - fprintf(codefile,"jmp RT%s\en",Rstring); -} - -#endif - -prolog(nlocals) full nlocals; { - -#ifndef REGVARS - fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); - if (nlocals == 0) - return; - if (nlocals == 2) - fprintf(codefile,"tst -(sp)\en"); - else - fprintf(codefile,"sub $0%o,sp\en",nlocals); -#else - lbytes = nlocals; -#endif -} - -mes(type) word type; { - int argt ; - - switch ( (int)type ) { - case ms_ext : - for (;;) { - switch ( argt=getarg( - ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { - case sp_cend : - return ; - default: - strarg(argt) ; - fprintf(codefile,".globl %s\en",argstr) ; - break ; - } - } - default : - while ( getarg(any_ptyp) != sp_cend ) ; - break ; - } -} - -char *segname[] = { - ".text", /* SEGTXT */ - ".data", /* SEGCON */ - ".data", /* SEGROM */ - ".bss" /* SEGBSS */ -}; -.DE -.NH 1 -Internal workings of the code generator. -.NH 2 -Description of tables.c and tables.h contents -.PP -In this section the intermediate files will be described -that are produced by -.I cgg -and compiled with machine independent code to produce a code generator. -.NH 3 -Tables.c -.PP -Tables.c contains a large number of initialized array's of all sorts. -Description of each follows: -.br -.in 1i -.ti -0.5i -byte coderules[] -.br -Pseudo code interpreted by the code generator. -Always starts with some opcode followed by operands depending -on the opcode. -Some of the opcodes have an argument encoded in the upper three -bits of the opcode byte. -Integers in this table are between 0 and 32767 and have a one byte -encoding if between 0 and 127. -.ti -0.5i -char wrd_fmt[] -.br -The format used for output of words. -.ti -0.5i -char stregclass[] -.br -Number of computed static register class per register. -Two registers are in the same class if they have the same properties -and don't share a common subregister. -.ti -0.5i -struct reginfo machregs[] -.br -Info per register. -Initialized with representation string, size, -members of the register and set of registers affected when this -one is changed. -Also contains room for run time information, -like contents and reference count. -.ti -0.5i -tkdef_t tokens[] -.br -Information per tokentype. -Initialized with size, cost, type of operands and formatstring. -.ti -0.5i -node_t enodes[] -.br -List of triples representing expressions for the code generator. -.ti -0.5i -string codestrings[] -.br -List of strings. -All strings are put in a list and checked for duplication, -so only one copy per string will reside here. -.ti -0.5i -set_t machsets[] -.br -List of token expression sets. -Bit 0 of the set is used for the SCRATCH property of registers, -bit 1 upto NREG are for the corresponding registers -and bit NREG+1 upto the end are for corresponding tokens. -.ti -0.5i -inst_t tokeninstances[] -.br -List of descriptions for building tokens. -Contains type of rule for building one, -plus operands depending on the type. -.ti -0.5i -move_t moves[] -.br -List of move rules. -Contains token expressions for source and destination -plus index for code rule. -.ti -0.5i -test_t tests[] -.br -List of test rules. -Contains token expressions for source -plus index for code rule. -.ti -0.5i -byte pattern[] -.br -EM patterns. -This is structured internally as chains of patterns, -each chain pointed at by pathash[]. -After each pattern the list of possible code rules is given. -.ti -0.5i -int pathash[256] -.br -Indices into pattern[] for all patterns with a certain low order -byte of the hashing function. -.ti -0.5i -c1_t c1coercs[] -.br -List of rules to stack tokens. -Contains token expressions, -register needed, -cost -and code rule. -.ti -0.5i -c2_t c2coercs[] -.br -List of splitting coercions. -Token expressions, -split factor, -replacements -and code rule. -.ti -0.5i -c3_t c3coercs[] -.br -List of one to one coercions. -Token expressions, -register needed, -replacement -and code rule. -.ti -0.5i -struct reginfo **reglist[] -.br -List of lists of pointers to register information. -For every property the list is here -to find the registers corresponding to it. -.in 0 -.NH 3 -tables.h -.PP -In tables.h various derived constants for the tables are -given. -They are then used to determine array sizes in the actual code generator, -plus loop termination in some cases. -.NH 2 -Other important data structures -.PP -During code generation some other data structures are used -and here is a short description of some of the important ones. -.PP -Tokens are kept in the code generator as a struct consisting of -one integer -.I t_token -which is -1 if the token is a register, -and the number of the token otherwise, -plus an array of -.I TOKENSIZE -unions -.I t_att -of which the first is the register number in case of a register. -.PP -The fakestack is an array of these tokens, -there is a global variable -.I stackheight . -.PP -The results of expressions are kept in a struct -.I result -with elements -.I e_typ , -giving the type of the expression: -.I EV_INT , -.I EV_REG -or -.I EV_ADDR , -and a union -.I e_v -which contains the real result. -.NH 2 -A tour through the sources -.NH 3 -codegen.c -.PP -The file codegen.c contains one large function consisting -of one giant switch statement. -It is the interpreter for the code generator pseudo code -as contained in code rules[]. -This function can call itself recursively when doing look ahead. -Arguments are: -.IP codep 10 -Pointer into code rules, pseudo program counter. -.IP ply -Number of EM pattern look ahead allowed. -.IP toplevel -Boolean telling whether this is the toplevel codegen() or -a deeper incarnation. -.IP costlimit -A cutoff value to limit searches. -If the cost crosses costlimit the incarnation can terminate. -.IP forced -A register number if nonzero. -This is used inside coercions to force the allocate() call to allocate -a register determined by earlier look ahead. -.PP -The instructions inplemented in the switch: -.NH 4 -DO_DLINE -.PP -Prints debugging information if the code generator runs in debug mode. -This information is only generated if -.I cgg -was called with the -d flag. -.NH 4 -DO_NEXTEM -.PP -Matches the next EM pattern and does look ahead if necessary to find the best -code rule associated with this pattern. -Heuristics are used to determine best code rule when possible. -This is done by calling the distance() function. -It can also handle the procedure mechanism. -.NH 4 -DO_COERC -.PP -This sets the code generator in the state to do a from stack coercion. -.NH 4 -DO_XMATCH -.PP -This is done when a match no longer has to be checked. -Used when the nocoercions: trick is used in the table. -.NH 4 -DO_MATCH -.PP -This is the big one inside this function. -It has the task to transform the contents of the current -fake stack to match the pattern given after it. -.PP -Since the code generator does not know combining coercions, -i.e. there is no way to make a big token out of two smaller ones, -the first thing done is to stack every token that is too small. -After that all tokens too big are split if possible to the right size. -.PP -Next the coercions are sought that would transform tokens in place to -the right one, plus the coercions that would pop tokens of the stack. -Each of those might need a register, so a list of registers is generated -and at the end of looking for coercions the function -.I tuples() -is called to generate the list of all possible \fIn\fP-tuples, -where -.I n -equals the number of registers needed. -.PP -Look ahead is now performed if the number of tuples is greater than one. -If no possibility is found within the costlimit, -the fake stack is made smaller by pushing the bottom token, -and this process is repeated until either a way is found or -the fake stack is completely empty and there is still no way -to make the match. -.PP -If there is a way the corresponding coercions are executed -and the code is finished. -.NH 4 -DO_REMOVE -.PP -Here the kills clause is executed, all tokens matched by the -token expression plus boolean expression are pushed. -In the current implementation there is no attempt to move those -tokens to registers, but that is a possible future extension. -.NH 4 -DO_DEALLOCATE -.PP -This one temporarily decrements by one the reference count of all registers -contained in the token given as argument. -.NH 4 -DO_REALLOCATE -.PP -Here all temporary deallocates are made undone. -.NH 4 -DO_ALLOCATE -.PP -This is the part that allocates a register and decides which one to use. -If the -.I forced -argument was given its task is simple, -otherwise some work must be done. -First the list of possible registers is scanned, -all free registers noted and it is noted whether any of those -registers is already -containing the initialization. -If no registers are available some fakestack token is stacked and the -process is repeated. -.PP -After that if an exact match was found, -the list of registers is reduced to one register matching exactly -out of every register class. -Now look ahead is performed if necessary and the register chosen. -If an initialization was given the corresponding move is performed, -otherwise the register is marked empty. -.NH 4 -DO_INSTR -.PP -This prints an instruction and its operands. -Only done on toplevel. -.NH 4 -DO_MOVE -.PP -Calls the move() function in the code generator to implement the move -instruction in the table. -.NH 4 -DO_TEST -.PP -Calls the test() function in the code generator to implement the test -instruction in the table. -.NH 4 -DO_ERASE -.PP -Marks the register that is its argument as empty. -.NH 4 -DO_TOKREPLACE -.PP -This is the token replacement part. -It is also called if there is no token replacement because it has -some other functions as well. -.PP -First the tokens that will be pushed on the fake stack are computed -and stored in a temporary array. -Then the tokens that were matched in this rule are popped -and their embedded registers have their reference count -decremented. -After that the replacement tokens are pushed. -.PP -Finally all registers allocated in this rule have their reference count -decremented. -If they were not pushed on the fake stack they will be available again -in the next code rule. -.NH 4 -DO_EMREPLACE -.PP -Places replacement EM instructions back into the instruction stream. -.NH 4 -DO_COST -.PP -Accounts for cost as given in the code rule. -.NH 4 -DO_RETURN -.PP -Returns from this level of codegen(). -Is used at the end of coercions, -move rules etc.. -.NH 4 -DO_LABDEF -.PP -This prints a label when the top element size mechanism is used. Only done on -toplevel. -.NH 3 -compute.c -.PP -This module computes the various expressions as given -in the enodes[] array. -Nothing very special happens here, -it is just a recursive function computing leaves -of expressions and applying the operator. -.NH 3 -equiv.c -.PP -In this module the tuples() function is implemented. -It is given the number of registers needed and -a list of register lists and it constructs a list of tuples -where the \fIn\fP'th register comes from the \fIn\fP'th list. -Before the list is constructed however -the dynamic register classes are computed. -Two registers are in the same dynamic class if they are in the -same static class and their contents is the same. -.PP -After that the permute() recursive function is called to -generate the list of tuples. -After construction a generated tuple is added to the list -if it is not already pairwise in the same class -or if the register relations are not the same, -i.e. if the first and second register share a common -subregister in one tuple and not in the other they are considered different. -.NH 3 -fillem.c -.PP -This is the routine that does the reading of EM instructions -and the handling of pseudos. -The mach.c module provided by the table writer is included -at the end of this module. -The routine fillemlines() is called by nextem() at toplevel -to make sure there are enough instruction to match. -It fills the EM instruction buffer up to 5 places from the end to -keep room for EM replacement instructions, -or up to a pseudo. -.PP -The dopseudo() function performs the function of the pseudo last -encountered. -If the pseudo is a -.B rom -the corresponding label is saved with the contents of the -.B rom -to be available to the code generator later. -The rest of the routines are small service routines for either -input or data output. -.NH 3 -gencode.c -.PP -This module contains routines called by codegen() to generate the real -code to the codefile. -The function genstr() gets a string as argument and copies it to codefile. -The prtoken() function interprets the tokenformat as given in -the tokens[] array. -.NH 3 -glosym.c -.PP -This module maintains a list of global symbols that have a -.B rom -pseudo associated. -There are functions to enter a symbol and to find a symbol. -.NH 3 -label.c -.PP -This module contains routines to handle the top element size messages. -.NH 3 -main.c -.PP -Main routine of the code generator. -Processes arguments and flags. -Flags available are: -.IP -d -Sets debug mode if the code generator was not compiled with -the NDEBUG macro defined. -The flag can be followed by a digit specifying the amount of debugging -wanted, -and by @labelname giving the start of debugging. -Debug mode gives very long output on stderr indicating -all steps of the code generation process including nesting -of the codegen() function. -.IP -p\fIn\fP -Sets the look ahead depth to -.I n , -the -.I p -stands for ply, -a well known word in chess playing programs. -.IP -w\fIn\fP -Sets the weight percentage for size in the cost function to -.I n -percent. -Uses Euclides algorithm to simplify rationals. -.NH 3 -move.c -.PP -Function to implement the move instruction in the tables, -register initialization and the test instruction and associated bookkeeping. -First tests are made to try to prevent the move from really happening. -After that, if there is an after that, -the move rule is found and the code executed. -.NH 3 -nextem.c -.PP -The entry point of this module is nextem(). -It hashes the next three EM instructions, -and uses the low order byte of the hash -as an index into the array pathash[], -to find a chain of patterns in the array -pattern[], -that are all tried for a match. -.PP -The function trypat() does most of the work -checking patterns. -When a pattern is found to match all instructions -the operands of the instruction are placed into the dollar[] array. -Then the boolean expression is tried. -If it matches the function can return, -leaving the operands still in the dollar[] array, -so later in the code rule they can still be used. -.NH 3 -reg.c -.PP -Collection of routines to handle registers. -Reference count routines are here, -chrefcount() and getrefcount(), -plus routines to erase a single register or all of them, -erasereg() and cleanregs(). -.PP -If NDEBUG hasn't been defined, here is also the routine that checks -if the reference count kept with the register information is in -agreement with the number of times it occurs on the fake stack. -.NH 3 -salloc.c -.PP -Module for string allocation and garbage collection. -Contains entry points myalloc(), -a routine calling malloc() and checking whether room is left, -myfree(), just free(), -popstr() a function called from state.c to free all strings -made since the last saved status. -Furthermore there is salloc() which has the size of the string as parameter -and returns a pointer to the allocated space, -while keeping a copy of the pointer for garbage allocation purposes. -.PP -The function garbage_collect is called from codegen() at toplevel -every now and then, -and checks all places where strings may reside to mark strings -as being in use. -Strings not in use are returned to the pool of free space. -.NH 3 -state.c -.PP -Set of routines called to save current status and -restore a previous saved state. -.NH 3 -subr.c -.PP -Random set of leftover routines. -.NH 4 -match -.PP -Computes whether a certain token matches a certain token expression. -Just computes a bitnumber according to the algorithm explained with -machsets[], -and tests the bit and the boolean expression if it is there. -.NH 4 -instance,cinstance -.PP -These two functions compute a token from a description. -They differ very slight, cinstance() is used to compute -the result of a coercion in a certain context -and therefore has more arguments, which it uses instead of -the global information instance() works on. -.NH 4 -eqtoken -.PP -eqtoken computes whether two tokens can be considered identical. -Used to check register contents during moves mainly. -.NH 4 -distance -.PP -This is the heuristic function that computes a distance from -the current fake stack contents to the token pattern in the table. -It likes exact matches most, then matches where at least the sizes are correct -and if the sizes are not correct it likes too large sizes more than too -small, since splitting a token is easier than combining one. -.NH 4 -split -.PP -This function tries to find a splitting coercion -and executes it immediately when found. -The fake stack is shuffled thoroughly when this happens, -so pieces below the token that must be split are saved first. -.NH 4 -docoerc -.PP -This function executes a coercion that was found. -The same shuffling is done, so the top of the stack is again saved. -.NH 4 -stackupto -.PP -This function gets a pointer into the fake stack and must stack -every token including the one pointed at up to the bottom of the fake stack. -The first stacking rule possible is used, -so rules using registers must come first. -.NH 4 -findcoerc -.PP -Looks for a one to one coercion, if found it returns a pointer -to it and leaves a list of possible registers to use in the global -variable curreglist. -This is used by codegen(). -.NH 3 -var.c -.PP -Global variables used by more than one module. -External definitions are in extern.h. diff --git a/doc/nopt.doc b/doc/nopt.doc deleted file mode 100644 index 173790392..000000000 --- a/doc/nopt.doc +++ /dev/null @@ -1,591 +0,0 @@ -.\" $Id$ -.TL -A Tour of the New Peephole Optimizer -.AU -B. J. McKenzie -.NH -Introduction -.LP -The peephole optimizer consists of four major parts: -.IP a) -the table describing the optimization to be performed -.IP b) -a program to parse these tables and build input and output routines to -interface to the library and a dfa based routine to recognize patterns and -make the requested replacements. -.IP c) -common routines for the library that are independent of the table of a) -.IP d) -a stand alone version of the optimizer. -.LP -The library conforms to the -.I EM_CODE(3) -module interface but with routine names of the form -.BI C_ xxx -replaced by names like -.BI O_ xxx. -Furthermore there is also no routine -.I O_getid -and no variable -.I O_tmpdir -in the module. -The library module results in calls to the usual -.I EM_CODE(3) -module. It is possible to write a front end so that it can call either the -normal -.I EM_CODE(3) -module or this new module by adding -.B -#define PEEPHOLE -.R -before the line -.B -#include -.R -This will map all calls to the routine -.BI C_ xxx -into a call to the routine -.BI O_ xxx. - -.LP -We shall now describe each of these major parts in some detail. - -.NH -The optimization table -.LP -The file -.I patterns -contains the patterns of EM instructions to be recognized by the optimizer -and the EM instructions to replace them. Each pattern may have an -optional restriction that must be satisfied before the replacement is made. -The syntax of the table will be described using extended BNF notation -used by -.I LLGen -where: -.DS -.I - [...] - are used to group items - | - is used to separate alternatives - ; - terminates a rule - ? - indicates item is optional - * - indicates item is repeated zero or more times - + - indicates item is repeated one or more times -.R -.DE -The format of each rule in the table is: -.DS -.I - rule : pattern global_restriction? ':' replacement - ; -.R -.DE -Each rule must be on a single line except that it may be broken after the -colon if the next line begins with a tab character. -The pattern has the syntax: -.DS -.I - pattern : [ EM_mnem [ local_restriction ]? ]+ - ; - EM-mnem : "An EM instruction mnemonic" - | 'lab' - ; -.R -.DE -and consists of a sequence of one or more EM instructions or -.I lab -which stands for a defined instruction label. Each EM-mnem may optionally be -followed by a local restriction on the argument of the mnemonic and take -one of the following forms depending on the type of the EM instruction it -follows: -.DS -.I - local_restriction : normal_restriction - | opt_arg_restriction - | ext_arg_restriction - ; -.R -.DE -A normal restriction is used after all types of EM instruction except for -those that allow an optional argument, (such as -.I adi -) or those involving external names, (such as -.I lae -) -and takes the form: -.DS -.I - normal_restriction : [ rel_op ]? expression - ; - rel_op : '==' - | '!=' - | '<=' - | '<' - | '>=' - | '>' - ; -.R -.DE -If the rel_op is missing, the equality -.I == -operator is assumed. The general form of expression is defined later but -basically it involves simple constants, references to EM_mnem arguments -that appear earlier in the pattern and expressions similar to those used -in C expressions. - -The form of the restriction after those EM instructions like -.I adi -whose arguments are optional takes the form: -.DS -.I - opt_arg_restriction : normal_restriction - | 'defined' - | 'undefined' - ; -.R -.DE -The -.I defined -and -.I undefined -indicate that the argument is present -or absent respectively. The normal restriction form implies that the -argument is present and satisfies the restriction. - -The form of the restriction after those EM instructions like -.I lae -whose arguments refer to external object take the form: -.DS -.I - ext_arg_restriction : patarg offset_part? - ; - offset_part : [ '+' | '-' ] expression - ; -.R -.DE -Such an argument has one of three forms: a offset with no name, an -offset form a name or an offset from a label. With no offset part -the restriction requires the argument to be identical to a previous -external argument. With an offset part it requires an identical name -part, (either empty, same name or same label) and supplies a relationship -among the offset parts. It is possible to refer to test for the same -external argument, the same name or to obtain the offset part of an external -argument using the -.I sameext -, -.I samenam -and -.I offset -functions given below. -.LP -The general form of an expression is: -.DS -.I - expression : expression binop expression - | unaryop expression - | '(' expression ')' - | bin_function '(' expression ',' expression ')' - | ext_function '(' patarg ',' patarg ')' - | 'offset' '(' patarg ')' - | patarg - | 'p' - | 'w2' - | 'w' - | INTEGER - ; -.R -.DE -.DS -.I - bin_function : 'sfit' - | 'ufit' - | 'samesign' - | 'rotate' - ; -.R -.DE -.DS -.I - ext_function : 'samenam' - | 'sameext' - ; - patarg : '$' INTEGER - ; - binop : "As for C language" - unaryop : "As for C language" -.R -.DE -The INTEGER in the -.I patarg -refers to the first, second, etc. argument in the pattern and it is -required to refer to a pattern that appears earlier in the pattern -The -.I w -and -.I p -refer to the word size and pointer size (in bytes) respectively. -The -.I w2 -refers to twice the word size. -The -various function test for: -.IP sfit 10 -the first argument fits as a signed value of -the number of bit specified by the second argument. -.IP ufit 10 -as for sfit but for unsigned values. -.IP samesign 10 -the first argument has the same sign as the second. -.IP rotate 10 -the value of the first argument rotated by the number of bit specified -by the second argument. -.IP samenam 10 -both arguments refer to externals and have either no name, the same name -or same label. -.IP sameext 10 -both arguments refer to the same external. -.IP offset 10 -the argument is an external and this yields it offset part. - -.LP -The global restriction takes the form: -.DS -.I - global_restriction : '?' expression - ; -.R -.DE -and is used to express restrictions that cannot be expressed as simple -restrictions on a single argument or are can be expressed in a more -readable fashion as a global restriction. An example of such a rule is: -.DS -.I - dup w ldl stf ? p==2*w : ldl $2 stf $3 ldl $2 lof $3 -.R -.DE -which says that this rule only applies if the pointer size is twice the -word size. - -.NH -Incompatibilities with Previous Optimizer -.LP -The current table format is not compatible with previous versions of the -peephole optimizer tables. In particular the previous table had no provision -for local restrictions and only the equivalent of the global restriction. -This meant that our -.I '?' -character that announces the presence of the optional global restriction was -not required. The previous optimizer performed a number of other tasks that -were unrelated to optimization that were possible because the old optimizer -read the EM code for a complete procedure at a time. This included tasks such -as register variable reference counting and moving the information regarding -the number of bytes of local storage required by a procedure from it -.I end -pseudo instruction to it's -.I pro -pseudo instruction. These tasks are no longer done by this module but have -been moved to other modules or programs in the pipeline. The register variable -reference counting is now performed by the front end. The reordering of -code, such as the moving of mes instructions and the local storage -requirements from the end to beginning of procedures, is now performed using -the insertpart mechanism in the -.I EM_CODE -(or -.I EM_OPT -) module. -The removal of dead code is performed by the global optimizer. -Various -.I ext_functions -available in the old tables are no longer available as they rely on -information that is not available to the current program. -These are the -.I notreg -and the -.I rom -functions. -The previous optimizer allowed the use of -.I LLP, -.I LEP, -.I SLP -and -.I SEP -in patterns. For example -.I LLP -stood for either -.I lol -if the pointer size was the same as the word size, or for -.I ldl -if the pointer size was twice the word size. -In the current optimizer it is necessary to include two patterns for each -such single pattern in the old table. For example for a pattern containing -.I LLP -there would be one pattern with -.I lol -and with a global restriction of the form -.I p=w -and another pattern with ldl and a global restriction of the form -.I p=2*w. - -.NH -The Parser -.LP -The program to parse the tables and build the pattern table dependent dfa -routines is built from the files: -.IP parser.h 15 -header file -.IP parser.g 15 -LLGen source file defining syntax of table -.IP syntax.l 15 -Lex sources file defining form of tokens in table. -.IP initlex.c 15 -Uses the data in the library -.I em_data.a -to initialize the lexical analyzer to recognize EM instruction mnemonics. -.IP outputdfa.c 15 -Routines to output the dfa when it has been constructed. It outputs the files -.I dfa.c -and -.I trans.c -.IP outcalls.c 15 -Routines to output the file -.I incalls.r -defined in the next section. -.IP findworst.c 15 -Routines to analyze patterns to find how to continue matching after a -successful replacement or failed match. - -.LP -The parser checks that the tables conform to the syntax outlined in the -previous section and also makes a number of semantic checks on their -validity. Further versions could make further checks such as looking for -cycles in the rules or checking that each replacement leaves the same -number of bytes on the stack as the pattern it replaces. The parser -builds an internal dfa representation of the rules by combining rules with -common prefixes. All local and global restrictions are combined into a single -test to be performed are a complete pattern has been detected in the input. -The idea is to build a structure so that each of the patterns can be matched -and then the corresponding tests made and the first that succeeds is replaced. -If two rules have the same pattern and both their tests also succeed the one -that appears first in the tables file will be done. Somewhat less obvious -is that if one pattern is a proper prefix of a longer pattern and its test -succeeds then the second pattern will not be checked for. - -A major task of the parser if to decide on the action to take when a rule has -been partially matched or when a pattern has been completely matched but its -test does not succeed. This requires a search of all patterns to see if any -part of the part matched could be part of some other pattern. for example -given the two patterns: -.DS -.I - loc adi w loc adi w : loc $1+$3 adi w - loc adi w loc sbi w : loc $1-$3 adi w -.R -.DE -If the first pattern fails after seeing the input: -.DS -.I - loc adi loc -.R -.DE -the parser will still need to check whether the second pattern matches. -This requires a decision on how to fix up any internal data structures in -the dfa matcher, such as moving some instructions from the pattern to the -output queue and moving the pattern along and then deciding what state -it should continue from. Similar decisions are requires after a pattern -has been replaced. For example if the replacement is empty it is necessary -to backup -.I n-1 -instructions where -.I n -is the length of the longest pattern in the tables. - -.NH -Structure of the Resulting Library - -.LP -The major data structures maintained by the library consist of three queues; -an -.I output -queue of instructions awaiting output, a -.I pattern -queue containing instructions that match the current prefix, and a -.I backup -queue of instructions that have been backed up over and need to be reparsed -for further pattern matches. -These three queues are maintained in a single fixed size buffer as explained -in more detail in the next section. -Also, after a successful match, a replacement queue is constructed. - - -.LP -If no errors are detected by the parser in the tables it output the following -files if they have changed from the existing version of the file: -.IP dfa.c 10 -this contains the dfa encoded into a number of arrays using the technique -of row displacement for compacted sparse matricies. Given an opcode and -the current state, the value of -.I OO_base[OO_state] -is consulted to obtain a pointer into the array -.I OO_checknext. -If this pointer in zero or the -.I check -field of the addressed structure does -not correspond to the curerent state then it is known there is no entry for -this opcode/state pair and the -.I OO_default -array is consulted instead. -If the check field does match then the -.I next -field contains the new state. -After each transition the array -.I OO_ftrans -is consulted to see if this state corresponds to a final state -(i.e. a complete pattern) and if so the corresponding function is called. -.IP trans.c 10 -this contains external declarations of transition routines with names like -.B OO_xxxdotrans -(where -.I xxx -is a small integer). -These are called when there a transition to state -.I xxx -that corresponds to a -complete pattern. Any tests are performed if necessary to confirm that the -pattern matches and then the replacement instructions are placed on the -output queue and the routine -.I OO_mkrepl -is called to make the replacement and if backup the amount required. -If there are a number of patterns with the same instructions but different -tests, these will all appear in the same routine and the tests performed in -the order they appear in the original -.I patterns -file. -.IP incalls.r 10 -this contains an entry for every EM instruction (plus -.I lab -) giving information on how to build a routine with the name -.BI O_ xxx -for the library version of the module. -If the EM instruction does not appear in the tables -patterns at all then the dfa routine is called to flush any current queued -output and the the output -.BI C_ xxx -routine is called. If the EM instruction does appear in a pattern then the -instruction data structure fields are -initialized and it is added onto the end of the pattern queue. -The dfa routines are then called to attempted to make a transition. -This file is input to the -.I awk -program -.I makefuns.awk. - -.LP -The following files contain code that is independent of the pattern tables: -.IP main.c 10 -this is used only in the stand alone version of the optimizer and consists -of code to open the input file, read the input using the -.I READ_EM(3) -module and call the dfa routines. This version does not require the routines -constructed from the incalls.r file described above. -.IP nopt.c 10 -general routines to initialize, and maintain the data structures. The file -handling routines -.I O_open -etc are defined here. Also defined are routines for flushing the output queue -by calling the -.I EM_mkcalls -routine from the -.I READ_EM(3) -module and moving instructions from the output to the backup queue. -Routines to free the strings stored in instructions -with types of -.I sof_ptyp, -.I pro_ptyp, -.I str_ptyp, -.I ico_ptyp, -.I uco_ptyp, -and -.I fco_ptyp are also defined. These strings are copied to a large array that -is extended by -.I Realloc -if it overflows. The strings can be thrown away on any flush that occurs when -the backup queue is empty. -.IP mkstrct.c 10 -contains routines to build the data structure from the input -.BI C_ xxx -routines and place the structure on the pattern queue. These routines are also -used to build the data structures when a replacement is constructed. -.IP aux.c 10 -routines to implement the external functions used in the pattern table. - -.LP -The following files are also used in building the module library: -.IP makefuns.awk 10 -this -.I awk -program is used to produce individual C files with names like -.BI O_ xxx.c -each containing a single function definition and then call the -.I cc -compiler to produce a single output file. -This enables the loader to only load those routines that are actually -needed when the library is loaded. -.IP pseudo.r 10 -this file is like the -.I incalls.r -file produced by the parser but is built by hand and handles the pseudo -EM instructions. It is also processed by -.I makefuns.awk. - -.NH -Miscellaneous Issues -.LP -The output, pattern and backup queues are maintained in fixed length array, -.I OO_buffer -allocated of size -.I MAXBUFFER -(a constant declared in nopt.h) at run time. -It consists of an array of the -.I e_instr -data structure used by the -.I READ_EM(3) -module. -At any time the pointers -.I OO_patternqueue -and -.I OO_nxtpatt -point to the beginning and end of the current pattern prefix that corresponds -to the current state. Any instructions on the backup queue are between -.I OO_nxtpatt -and -.I OO_endbackup. -If there are no instructions on the backup queue then -.I OO_endbackup -will be 0 (zero). -The size of the replacement queue is set to the length of the maximum -replacement length by the tables output by the parser. - -.LP -The fixed size of the buffer causes no difficulty in -practice and can only result in some potential optimizations being missed. -When space for a new instruction is required and the buffer is full the -routine -.I OO_halfflush -is called to flush half the buffer and move all the data structures left. -It should be noted that it is not possible to statically determine the -maximum possible size for these queues as they need to be unbounded in -the worst case. -A study of the rule -.DS -.I - inc dec : -.R -.DE -with the input consisting of -.I N -.I inc -and then -.I N -.I dec -instructions requires an output queue length of -.I N-1 -to find all possible replacements. diff --git a/doc/occam/.distr b/doc/occam/.distr deleted file mode 100644 index 08cf9cf3b..000000000 --- a/doc/occam/.distr +++ /dev/null @@ -1,12 +0,0 @@ -proto.make -ctot -p0 -p1 -p2 -p3 -p4 -p5 -p6 -p7 -p8 -p9 diff --git a/doc/occam/Makefile b/doc/occam/Makefile deleted file mode 100644 index 9a5ba5b45..000000000 --- a/doc/occam/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -EMHOME=../.. -FILES= p0 p1 p2 p3 p4 p5 p6 p7 p8 p9 - -PIC=pic -EQN=eqn -TBL=tbl -TARGET=-Tlp -../occam.doc: p0 p1 p2 p3 p4 p5 p6 p7 p8 p9 channel.h.t channel.c.t - soelim $(FILES) | $(PIC) $(TARGET) | $(TBL) | $(EQN) $(TARGET) > $@ - -channel.h.t: $(EMHOME)/h/ocm_chan.h - ctot <$(EMHOME)/h/ocm_chan.h >channel.h.t - -channel.c.t: channel.c - ctot channel.c.t - -channel.c: $(EMHOME)/lang/occam/lib/tail_ocm.a - arch x $(EMHOME)/lang/occam/lib/tail_ocm.a channel.c diff --git a/doc/occam/ctot b/doc/occam/ctot deleted file mode 100755 index f4fe6481d..000000000 --- a/doc/occam/ctot +++ /dev/null @@ -1,8 +0,0 @@ -sed 's/^$/.sp 0.5/ -s/\\/\\e/g -s/^ $/.ft\ -.DE\ -.bp\ -.DS\ -.ft CW\ -.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i/' diff --git a/doc/occam/p0 b/doc/occam/p0 deleted file mode 100644 index 1055ec1c4..000000000 --- a/doc/occam/p0 +++ /dev/null @@ -1,21 +0,0 @@ -.pl 11.7i -.ND -.de PT -.if \\n%>0 .if e .tl '\fB%\fP''' -.if \\n%>1 .if o .tl '''\fB%\fP' -.. -.TL -An Occam Compiler -.AU -Kees Bot -Edwin Scheffer -.AI -Vrije Universiteit -Amsterdam, The Netherlands -.AB -This document describes the implementation of an \fBOccam\fP to \fBEM\fP -compiler. The lexical analysis is done using \fBLex\fP. -For the semantic analysis the extended LL(1) parser generator \fBLLgen\fP is -used. To handle the Occam-specific features as channels and parallelism some -library routines are required. -.AE diff --git a/doc/occam/p1 b/doc/occam/p1 deleted file mode 100644 index 1d45e806d..000000000 --- a/doc/occam/p1 +++ /dev/null @@ -1,87 +0,0 @@ -.NH -Introduction -.PP -Occam [1] is a programming language which is based on the concepts of -concurrency and communication. These concepts enable today's applications of -microprocessors and computers to be implemented more effectively. -.PP -An Occam program consists of a (dynamically determined) number -of processes communicating through channels. -To communicate with the outside world some predefined channels are needed. -A channel has only one writer and one reader; it carries machine words and -bytes, at the reader/writer's discretion. The process with its communication -in Occam replaces the procedure with parameters in other languages (there are -no procedures in Occam). -.PP -In addition to the normal assignment statement, Occam has two more -information-transfer statements, the input and the output: -.DS -.ft CW - chan1 ? x -- reads a value from chan1 into x - chan2 ! x -- writes the value of x onto chan2 -.ft -.DE -Both the outputting and the inputting processes wait until the other is there. -Channels are declared and given names. Arrays of channels are possible. -.PP -Processes come in 5 varieties: sequential, parallel, alternative, -conditional and repetitive. A process starts with a reserved word telling -its nature, followed by an indented list of other processes. (Indentation -is used to indicate block structure.) It may be preceded by declarations. -The processes in a sequential/parallel process are executed sequentially/in -parallel. The processes in an alternative process have guards based on the -availability of input; the first to be ready is executed (this is waiting -for multiple input). The conditional and repetitive processes are normal -\fBIF\fPs and \fBWHILE\fPs. -.PP -\fIProducer-consumer example:\fP -.DS -.ft CW -.nf -CHAN buffer: -- declares the channel buffer -PAR - WHILE TRUE -- the producer - VAR x: -- a local variable - SEQ - produce(x) -- in some way - buffer ! x -- and send it - WHILE TRUE -- the consumer - VAR x: - SEQ - buffer ? x -- get a value - consume(x) -- in some way -.ft -.fi -.DE -.bp -.PP -Processes can be replicated from a given template; this combines -with arrays of variables and/or channels. -.PP -\fIExample: 20 window-sorters in series:\fP -.DS -.ft CW -.nf -CHAN s[20]: -- 20 channels -PAR i = [ 0 FOR 19 ] -- 19 processes - WHILE TRUE - VAR v1, v2: - SEQ - s[i] ? v1; v2 -- wait for 2 variables from s[i] - IF - v1 <= v2 -- ok - s[i+1] ! v1; v2 - v1 > v2 -- reorder - s[i+1] ! v2; v1 -.fi -.ft -.DE -.PP -A process may wait for a condition, which must include a comparison -with \fBNOW\fP, the present clock value. -.PP -Processes may be distributed over several processors; all processes -under a \fBVAR\fP declaration must run on the same processor. Concurrency can be -improved by avoiding \fBVAR\fP declarations, and replacing them by \fBCHAN\fP -declarations. Processes can be allocated explicitly on named processors and -channels can be connected to physical ports. diff --git a/doc/occam/p2 b/doc/occam/p2 deleted file mode 100644 index b7003a6b6..000000000 --- a/doc/occam/p2 +++ /dev/null @@ -1,151 +0,0 @@ -.NH -The Compiler -.PP -The compiler is written in \fBC\fP using LLgen and Lex and compiles -Occam programs to EM code, using the procedural interface as defined for EM. -In the following sub-sections we describe the LLgen parser generator and -the aspect of indentation. -.NH 2 -The LLgen Parser Generator -.PP -LLgen accepts a Context Free syntax extended with the operators `\f(CW*\fP', `\f(CW?\fP' and `\f(CW+\fP' -that have effects similar to those in regular expressions. -The `\f(CW*\fP' is the closure set operator without an upperbound; `\f(CW+\fP' is the positive -closure operator without an upperbound; `\f(CW?\fP' is the optional operator; -`\f(CW[\fP' and `\f(CW]\fP' can be used for grouping. -For example, a comma-separated list of expressions can be described as: -.DS -.ft CW - expression_list: - expression [ ',' expression ]* - ; -.ft -.DE -.LP -Alternatives must be separated by `\f(CW|\fP'. -C code (``actions'') can be inserted at all points between the colon and the -semicolon. -Variables global to the complete rule can be declared just in front of the -colon enclosed in the brackets `\f(CW{\fP' and `\f(CW}\fP'. All other declarations are local to -their actions. -Nonterminals can have parameters to pass information. -A more mature version of the above example would be: -.DS -.ft CW - expression_list(expr *e;) { expr e1, e2; } : - expression(&e1) - [ ',' expression(&e2) - { e1=append(e1, e2); } - ]* - { *e=e1; } - ; -.ft -.DE -As LLgen generates a recursive-descent parser with no backtrack, it must at all -times be able to determine what to do, based on the current input symbol. -Unfortunately, this cannot be done for all grammars. Two kinds of conflicts -are possible, viz. the \fBalternation\fP and \fBrepetition\fP conflict. -An alternation confict arises if two sides of an alternation can start with the -same symbol. E.g. -.DS -.ft CW - plus: '+' | '+' ; -.ft -.DE -The parser doesn't know which `\f(CW+\fP' to choose (neither do we). -Such a conflict can be resolved by putting an \fBif-condition\fP in front of -the first conflicting production. It consists of a \fB``%if''\fP followed by a -C-expression between parentheses. -If a conflict occurs (and only if it does) the C-expression is evaluated and -parsing continues along this path if non-zero. Example: -.DS -.ft CW - plus: - %if (some_plusses_are_more_equal_than_others()) - '+' - | - '+' - ; -.ft -.DE -A repetition conflict arises when the parser cannot decide whether -``\f(CWproductionrule\fP'' in e.g. ``\f(CW[ productionrule ]*\fP'' must be chosen -once more, or that it should continue. -This kind of conflicts can be resolved by putting a \fBwhile-condition\fP right -after the opening parentheses. It consists of a \fB``%while''\fP -followed by a C-expression between parentheses. As an example, we can look at -the \fBcomma-expression\fP in C. The comma may only be used for the -comma-expression if the total expression is not part of another comma-separated -list: -.DS -.nf -.ft CW - comma_expression: - sub_expression - [ %while (not_part_of_comma_separated_list()) - ',' sub_expression - ]* - ; -.ft -.fi -.DE -Again, the \fB``%while''\fP is only used in case of a conflict. -.LP -Error recovery is done almost completely automatically. All the LLgen-user has to do -is write a routine called \fILLmessage\fP to give the necessary error -messages and supply information about terminals found missing. -.NH 2 -Indentation -.PP -The way conflicts can be resolved are of great use to Occam. The use of -indentation, to group statements, leads to many conflicts because the spaces -used for indentation are just token separators to the lexical analyzer, i.e. -``white space''. The lexical analyzer can be instructed to generate `BEGIN' and -`END' tokens at each indentation change, but that leads to great difficulties -as expressions may occupy several lines, thus leading to indentation changes -at the strangest moments. So we decided to resolve the conflicts by looking -at the indentation ourselves. The lexical analyzer puts the current indentation -level in the global variable \fIind\fP for use by the parser. The best example -is the \fBSEQ\fP construct, which exists in two flavors, one with a replicator -and one process: -.DS -.nf -.ft CW - seq i = [ 1 for str[byte 0] ] - out ! str[byte i] -.ft -.fi -.DE -and one without a replicator and several processes: -.DS -.nf -.ft CW - seq - in ? c - out ! c -.ft -.fi -.DE -The LLgen skeleton grammar to handle these two is: -.DS -.nf -.ft CW - SEQ { line=yylineno; oind=ind; } - [ %if (line==yylineno) - replicator - process - | - [ %while (ind>oind) process ]* - ] -.ft -.fi -.DE -This shows clearly that, a replicator must be on the same line as the \fBSEQ\fP, -and new processes are collected as long as the indentation level of each process -is greater than the indentation level of \fBSEQ\fP (with appropriate checks on this -identation). -.PP -Different indentation styles are accepted, as long as the same amount of spaces -is used for each indentation shift. The ascii tab character sets the indentation -level to an eight space boundary. The first indentation level found in a file -is used to compare all other indentation levels to. diff --git a/doc/occam/p3 b/doc/occam/p3 deleted file mode 100644 index fca586dcd..000000000 --- a/doc/occam/p3 +++ /dev/null @@ -1,337 +0,0 @@ -.NH -Implementation -.PP -It is now time to describe the implementation of some of the occam-specific -features such as channels and \fBNOW\fP. Also the way communication with -UNIX\(dg is performed must be described. -.FS -\(dg UNIX is a trademark of Bell Laboratories -.FE -For a thorough description of the library routines to simulate parallelism, -which are e.g. used by the channel routines and by the \fBPAR\fP construct -in Appendix B, see [6]. -.NH 2 -Channels -.PP -There are currently two types of channels (see Figure 1.) indicated by the type -field of a channel variable: -.IP - -An interprocess communication channel with two additional fields: -.RS -.IP - -A synchronization field to hold the state of an interprocess communication -channel. -.IP - -An integer variable to hold the value to be send. -.RE -.IP - -An outside world communication channel. This is a member of an array of -channels connected to UNIX files. Its additional fields are: -.RS -.IP - -A flags field holding a readahead flag and a flag that tells if this channel -variable is currently connected to a file. -.IP - -A preread character, if readahead is done. -.IP - -An index field to find the corresponding UNIX file. -.RE -.LP -.PS -box ht 3.0 wid 3.0 -box ht 0.75 wid 0.75 with .nw at 1st box.nw + (0.5, -0.5) "Process 1" -box ht 0.75 wid 0.75 with .ne at 1st box.ne + (-0.5, -0.5) "Process 2" -box ht 0.75 wid 0.75 with .sw at 1st box.sw + (0.5, 0.5) "Process 3" -box ht 0.75 wid 0.75 with .se at 1st box.se + (-0.5, 0.5) "Process 4" -line right from 5/12 <2nd box.ne, 2nd box.se> to 5/12 <3nd box.nw, 3nd box.sw> -line right from 7/12 <2nd box.ne, 2nd box.se> to 7/12 <3nd box.nw, 3nd box.sw> -line right from 5/12 <4th box.ne, 4th box.se> to 5/12 <5nd box.nw, 5nd box.sw> -line right from 7/12 <4th box.ne, 4th box.se> to 7/12 <5nd box.nw, 5nd box.sw> -line down from 5/12 <2nd box.sw, 2nd box.se> to 5/12 <4nd box.nw, 4nd box.ne> -line down from 7/12 <2nd box.sw, 2nd box.se> to 7/12 <4nd box.nw, 4nd box.ne> -line down from 5/12 <3rd box.sw, 3rd box.se> to 5/12 <5nd box.nw, 5nd box.ne> -line down from 7/12 <3rd box.sw, 3rd box.se> to 7/12 <5nd box.nw, 5nd box.ne> -line right 1.0 from 5/12 <5th box.ne, 5th box.se> -line right 1.0 from 7/12 <5th box.ne, 5th box.se> -line left 1.0 from 5/12 <2nd box.nw, 2nd box.sw> -line left 1.0 from 7/12 <2nd box.nw, 2nd box.sw> -.PE -.DS C -\fIFigure 1. Interprocess and outside world communication channels\fP -.DE -The basic channel handling is done by \f(CWchan_in\fP and \f(CWchan_out\fP. All -other routines are based on them. The routine \f(CWchan_any\fP only checks if -there's a value available on a given channel. (It does not read this value!) -\f(CWC_init\fP initializes an array of interprocess communication channels. -.LP -The following table shows Occam statements paired with the routines used to -execute them. -.TS H -center, box; -c | c | c -lf5 | lf5 | lf5. -Occam statement Channel handling routine Called as -= -.sp 0.5 -.TH -T{ -.nf -CHAN c: -CHAN c[z]: -.fi -T} T{ -.nf -c_init(c, z) -chan *c; unsigned z; -.fi -T} T{ -.nf -c_init(&c, 1); -c_init(&c, z); -.fi -T} -.sp 0.5 -_ -.sp 0.5 -T{ -.nf -c ? v -.fi -T} T{ -.nf -chan_in(v, c) -long *v; chan *c; -.fi -T} T{ -.nf -chan_in(&v, &c); -.fi -T} -.sp 0.5 -T{ -.nf -c ? b[byte i] -.fi -T} T{ -.nf -cbyte_in(b, c) -char *b; chan *c; -.fi -T} T{ -.nf -cbyte_in(&b[i], &c); -.fi -T} -.sp 0.5 -T{ -.nf -c ? a[i for z] -.fi -T} T{ -.nf -c_wa_in(a, z, c) -long *a; unsigned z; chan *c; -.fi -T} T{ -.nf -c_wa_in(&a[i], z, &c); -.fi -T} -.sp 0.5 -T{ -.nf -c ? a[byte i for z] -.fi -T} T{ -.nf -c_ba_in(a, z, c) -long *a; unsigned z; chan *c; -.fi -T} T{ -.nf -c_ba_in(&a[i], z, &c); -.fi -T} -.sp 0.5 -_ -.sp 0.5 -T{ -.nf -c ! v -.fi -T} T{ -.nf -chan_out(v, c) -long *v; chan *c; -.fi -T} T{ -.nf -chan_out(&v, &c); -.fi -T} -.sp 0.5 -T{ -.nf -c ! a[i for z] -.fi -T} T{ -.nf -c_wa_out(a, z, c) -long *a; unsigned z; chan *c; -.fi -T} T{ -.nf -c_wa_out(&a[i], z, &c); -.fi -T} -.sp 0.5 -T{ -.nf -c ! a[byte i for z] -.fi -T} T{ -.nf -c_ba_out(a, z, c) -long *a; unsigned z; chan *c; -.fi -T} T{ -.nf -c_ba_out(&a[i], z, &c); -.fi -T} -.sp 0.5 -_ -.sp 0.5 -T{ -.nf -alt - c ? .... - .... -.fi -T} T{ -.nf -int chan_any(c) -chan *c; -.fi -T} T{ -.nf -deadlock=0; -for(;;) { - if (chan_any(&c)) { - .... - .... -.fi -T} -.sp 0.5 -.TE -The code of \f(CWc_init\fP, \f(CWchan_in\fP, \f(CWchan_out\fP and \f(CWchan_any\fP -can be found in Appendix A. -.NH 3 -Synchronization on interprocess communication channels -.PP -The synchronization field can hold three different values indicating the -state the channel is in: -.IP "- \fBC\(ulS\(ulFREE\fP:" 15 -Ground state, channel not in use. -.IP "- \fBC\(ulS\(ulANY\fP:" 15 -Channel holds a value, the sending process is waiting for an acknowledgement -about its receipt. -.IP "- \fBC\(ulS\(ulACK\fP:" 15 -Channel data has been removed by a receiving process, the sending process can -set the channel free now. -.LP -A sending process cannot simply wait until the channel changes state C\(ulS\(ulANY -to state C\(ulS\(ulFREE before it continues. There is a third state needed to prevent -a third process from using the channel before our sending process is -acknowledged. Note, however that it is not allowed to use a channel for input -or output in more than one parallel process. This is too difficult to check -in practice, so we tried to smooth it a little. -.NH 2 -NOW -.PP -\fBNOW\fP evaluates to the current time returned by the time(2) system call. -The code is simply: -.DS -.ft CW -.nf - long now() - { - deadlock=0; - return time((long *) 0); - } -.fi -.ft -.DE -The ``deadlock=0'' prevents deadlocks while using the clock. -.NH 2 -UNIX interface -.PP -To handle the communication with the outside world the following channels are -defined: -.IP - -\fBinput\fP, that corresponds with the standard input file, -.IP - -\fBoutput\fP, that corresponds with the standard output file, -.IP - -\fBerror\fP, that corresponds with the standard error file. -.IP - -\fBfile\fP, an array of channels that can be subscripted with an index -obtained by the builtin named process ``\f(CWopen\fP''. Note that -\fBinput\fP=\fBfile\fP[0], \fBoutput\fP=\fBfile\fP[1] and -\fBerror\fP=\fBfile\fP[2]. -.LP -Builtin named processes to open and close files are defined as -.DS -.nf -.ft CW -proc open(var index, value name[], mode[]) = ..... : -proc close(value index) = ..... : -.fi -.ft -.DE -To open a file `junk', write nonsense onto it, and close it, goes as follows: -.DS -.ft CW -.nf - var i: - seq - open(i, "junk", "w") - file[i] ! nonsense - close(i) -.fi -.ft -.DE -Errors opening a file are reported by a negative index, which is the -negative value of the error number (called \fIerrno\fP in UNIX). -.LP -Bytes read from or written onto these channels are taken from occam variables. -As these variables can hold more than 256 values, some negative values are used -to control channels. These values are: -.IP "- \fBEOF\fP" 9 -(-1): Eof from file channel is read as -1. -.IP "- \fBTEXT\fP" 9 -(-2): A -2 written onto any channel connected to a terminal puts this -terminal in the normal line oriented mode (i.e. characters typed are echoed -and lines are buffered before they are read). -.IP "- \fBRAW\fP" 9 -(-3): A -3 written onto any channel connected to a terminal puts it in raw mode -(i.e. no echoing of typed characters and no line buffering). -.LP -To exit an Occam program, e.g. after an error, a builtin named process -\f(CWexit\fP is available that takes an exit code as its argument. -.NH 2 -Replicators and slices -.PP -Both the base and the count of replicators like in -.DS -.ft CW - par i = [ base for count ] -.ft -.DE -may be arbitrary expressions. The count in array slices like in -.DS -.ft CW - c ? A[ base for count ] -.ft -.DE -must be a constant expression however, the base is again free. diff --git a/doc/occam/p4 b/doc/occam/p4 deleted file mode 100644 index afa9ec106..000000000 --- a/doc/occam/p4 +++ /dev/null @@ -1,42 +0,0 @@ -.NH -Particular details -.NH 2 -Lower case/Upper case -.PP -Keywords must be either fully written in lower case or in upper case, thus -\fBPAR\fP is equivalent to \fBpar\fP but \fBPar\fP is not a keyword. Identifiers -may be of mixed case. Different styles are used in our examples just to indicate -what's accepted by the compiler. -.NH 2 -File inclusion -.PP -The C preprocessor is applied to the input file before -compilation, so that files containing useful \fBPROC\fP and \fBDEF\fP -declarations can be used in the program by using the \fB#include\fP-directive -of the preprocessor. -.NH 2 -Substitution -.PP -Named processes are not textually substituted. A procedure call is used instead. -The semantics of occam substitution imply this by letting a global variable -(i.e. not declared inside the named process' body) be found where the named -process is defined and not where it is substituted. -.NH 2 -ANY -.PP -According to the occam syntax the \fBANY\fP keyword may be the only argument of -an input or output process. Thus, -.DS -.ft CW - c ? ANY; x -.ft -.DE -is not allowed. Because it was easy to add, and it was used by some programs, -our compiler allows it. (If portability is an issue, usage of this feature -is not advisable). -.NH 2 -Configuration -.PP -The special configuration keywords like \fBPLACED\fP, \fBALLOCATE\fP, \fBPORT\fP -and \fBLOAD\fP are not implemented. Only \fBPRI\fP works because \fBPAR\fP and -\fBALT\fP work the same without it. diff --git a/doc/occam/p5 b/doc/occam/p5 deleted file mode 100644 index 1dc98e02c..000000000 --- a/doc/occam/p5 +++ /dev/null @@ -1,18 +0,0 @@ -.NH -Conclusions -.PP -Writing the compiler was very straightforward using the LLgen parser generator. -Its extended grammar and its way of conflict resolving were of great use to us, -especially -the indentation handling could be implemented quite easily. The automatic -error recovery given by LLgen took a great weight of our shoulders. -.PP -A set of parallelism simulation routines makes implementing \fBPAR\fP constructs -very simple. And we consider it a necessity to have such a layer to shield the -compiler writer from these details. -.PP -The translation to EM code was fairly direct, no great tricks were needed to -make things work. Only the different sizes of words and pointers that are given -as parameters to the compiler must be carefully watched. Variables or pointers -must sometimes be handled with double word instructions for different word or -pointer sizes. diff --git a/doc/occam/p6 b/doc/occam/p6 deleted file mode 100644 index 2ce3d9da4..000000000 --- a/doc/occam/p6 +++ /dev/null @@ -1,5 +0,0 @@ -.NH -Acknowledgement -.PP -We want to thank Dick Grune for his description of Occam which is used -in the introduction. diff --git a/doc/occam/p7 b/doc/occam/p7 deleted file mode 100644 index c9397d156..000000000 --- a/doc/occam/p7 +++ /dev/null @@ -1,23 +0,0 @@ -.bp -.NH -References -.LP -.IP [1] -INMOS limited, \fIOCCAM Programming manual\fP, Prentice-Hall, 1984. -.IP [2] -C. J. H. Jacobs, \fISome Topics in Parser Generation\fP, -Informatica Rapport IR-105, Vrije Universiteit, Amsterdam, October 1985. -.IP [3] -B. W. Kernighan and D. M. Ritchie, \fIThe C Programming Language\fP, -Prentice-Hall, 1978. -.IP [4] -M. E. Lesk, \fILex - A Lexical Analyser Generator\fP, Comp. Sci. Tech. Rep. -No. 39, Bell Laboratories, Murrey Hill, New Jersey, October 1975. -.IP [5] -A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, -\fIDescription of a Machine Architecture for use with Block Structured -Languages\fP, Informatica Rapport IR-81, Vrije Universiteit, Amsterdam, 1983. -.IP [6] -K. Bot and E. Scheffer, \fIA set of multi-process primitives for stack based -machines\fP, Vrije Universiteit, Amsterdam, 1986. -.LP diff --git a/doc/occam/p8 b/doc/occam/p8 deleted file mode 100644 index 71e622bd4..000000000 --- a/doc/occam/p8 +++ /dev/null @@ -1,16 +0,0 @@ -.bp -.NH -Appendix A: Implementation of the channel routines -.DS L -.ft CW -.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i -.so channel.h.t -.ft -.DE -.bp -.DS L -.ft CW -.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i -.so channel.c.t -.ft -.DE diff --git a/doc/occam/p9 b/doc/occam/p9 deleted file mode 100644 index 3096fe16e..000000000 --- a/doc/occam/p9 +++ /dev/null @@ -1,60 +0,0 @@ -.bp -.NH -Appendix B: Translation of a \fBPAR\fP construct to EM code using the library -routines to simulate parallelism -.PP -Translation of the parallel construct: -.DS -.ft CW - par - P0 - par i = [ 1 for n ] - P(i) -.DE -is -.TS -center; -lf5 lf5. - lal -20 ; Assume 20 bytes of local variables at this moment - cal $parbegin ; Set up a process group - asp 4 ; Assume pointersize = 4 - cal $parfork ; Split stack in two from local -20 - lfr 4 ; Assume wordsize = 4 - zne *23 ; One end jumps to second process, other continues here - lor 0 ; Static link - cal $P0 - asp 4 - bra *24 ; Jump to the outer parend -23 - cal $parfork ; Fork off `par i = ...' process - lfr 4 - zne *25 ; One end jumps to end of outer par - lal -20 ; Place break just above i - cal $parbegin ; Set up another process group for the P(i) - loc 1 - stl -24 ; i:=1 - lol n ; Assume n can be addressed this simply - stl -28 ; A nameless counter - bra *26 ; Branch to counter test -27 - cal $parfork ; Fork off one P(i) - lfr 4 - zne *28 ; One jumps away to increment i, the other calls P(i) - lol -24 - lor 0 - cal $P - asp 8 - bra *29 -28 - inl -24 ; i:=i+1 - del -28 ; counter:=counter-1 -26 - lol -28 - zgt *27 ; while counter>0 repeat loop -29 - cal $parend ; Wait for the P(i) to finish, then delete group - bra *24 ; Jump to the higher up meeting place with P0 -25 ; Note that the bra will be optimized away -24 - cal $parend ; Wait for both processes to end, then delete group -.TE diff --git a/doc/occam/proto.make b/doc/occam/proto.make deleted file mode 100644 index 8ee34e754..000000000 --- a/doc/occam/proto.make +++ /dev/null @@ -1,32 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/occam - -FILES= $(SRC_DIR)/p0 \ - $(SRC_DIR)/p1 \ - $(SRC_DIR)/p2 \ - $(SRC_DIR)/p3 \ - $(SRC_DIR)/p4 \ - $(SRC_DIR)/p5 \ - $(SRC_DIR)/p6 \ - $(SRC_DIR)/p7 \ - $(SRC_DIR)/p8 \ - $(SRC_DIR)/p9 - -PIC=pic -EQN=eqn -TBL=tbl - -$(TARGET_HOME)/doc/occam.doc: $(FILES) channel.h.t channel.c.t - soelim $(FILES) | $(PIC) | $(TBL) | $(EQN) > $@ - -channel.h.t: $(SRC_HOME)/h/ocm_chan.h - $(SRC_DIR)/ctot <$(SRC_HOME)/h/ocm_chan.h >channel.h.t - -channel.c.t: channel.c - $(SRC_DIR)/ctot channel.c.t - -channel.c: $(SRC_HOME)/lang/occam/lib/tail_ocm.a - $(UTIL_HOME)/bin/arch x $(SRC_HOME)/lang/occam/lib/tail_ocm.a channel.c diff --git a/doc/pascal/.distr b/doc/pascal/.distr deleted file mode 100644 index 44bb76092..000000000 --- a/doc/pascal/.distr +++ /dev/null @@ -1,20 +0,0 @@ -ab+intro.doc -compar.doc -conf.doc -contents.doc -deviations.doc -example.doc -extensions.doc -hints.doc -his.doc -improv.doc -internal.doc -options.doc -proto.make -reference.doc -rtl.doc -syntax.doc -test.doc -titlepg.doc -transpem.doc -vrk.doc diff --git a/doc/pascal/ab+intro.doc b/doc/pascal/ab+intro.doc deleted file mode 100644 index bd99d00ec..000000000 --- a/doc/pascal/ab+intro.doc +++ /dev/null @@ -1,50 +0,0 @@ -.TL -The ACK Pascal Compiler -.AU -Aad Geudeke -Frans Hofmeester -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.AB -This document describes the implementation of a Pascal to EM compiler. The -compiler is written in C. The lexical analysis is done using a hand-written -lexical analyzer. Semantic analysis makes use of the extended LL(1) parser -generator LLgen. Several EM utility modules are used in the compiler. -.AE -.sp 2 -.NH -Introduction - -.PP -.nh -The Pascal front end of the Amsterdam Compiler Kit (ACK) complies with the -requirements of the international standard published by the International -Organization for Standardization (ISO) [ISO]. An informal description, which -unfortunately is not conforming to the standard, of the programming language -Pascal is given in [JEN]. - -.PP -The main reason for rewriting the Pascal compiler was that the old Pascal -compiler was written in Pascal itself, and a disadvantage of it was its -lack of flexibility. The compiler did not meet the needs of the current -ACK-framework, which makes use of modern parsing techniques and utility -modules. In this framework it is, for example, possible to use a fast back -end. Such a back end translates directly to object code [ACK]. Our compiler is -written in C and it is designed similar to the current C and Modula-2 compiler -of ACK. - -.PP -Chapter 2 describes the basic structure of the compiler. Chapter 3 discusses -the code generation of the main Pascal constructs. Chapter 4 covers one of -the major components of Pascal, viz. the conformant array. In Chapter 5 the -various compiler options that can be used are enumerated. The extensions -to the standard and the deviations from the standard are listed in Chapter -6 and 7. Chapter 8 presents some ideas to improve the standard. Chapter 9 -gives a short overview of testing the compiler. The major differences -between the old and new compiler can be found in Chapter 10. Suggestions -to improve the compiler are described in Chapter 11. The appendices -contain the grammar of Pascal and the changes made to the ACK Pascal run time -library. A translation of a Pascal program to EM code as example is presented. -.bp diff --git a/doc/pascal/compar.doc b/doc/pascal/compar.doc deleted file mode 100644 index e712435c3..000000000 --- a/doc/pascal/compar.doc +++ /dev/null @@ -1,89 +0,0 @@ -.sp 2 -.NH -Comparison with the Pascal-VU compiler -.nh - -.LP -In this chapter, the differences with the Pascal-VU compiler [IM2] are listed. -The points enumerated below can be used as improvements to the compiler (see -also Chapter 11). -.sp -.NH 2 -Deviations -.LP -.sp -- large labels -.in +3m -only labels in the closed interval 0..9999 are allowed, as opposed to the -Pascal-VU compiler. The Pascal-VU compiler allows every unsigned integer -as label. -.in -3m - -- goto -.in +3m -the new compiler conforms to the standard as opposed to the old one. The -following program, which contains an illegal jump to label 1, is accepted -by the Pascal-VU compiler. - -.nf -\fBprogram\fR illegal_goto(output); -\fBlabel\fR 1; -\fBvar\fR i : integer; -\fBbegin\fR - \fBgoto\fR 1; - \fBfor\fR i := 1 \fBto\fR 10 \fBdo\fR - \fBbegin\fR - 1 : writeln(i); - \fBend\fR; -\fBend\fR. -.fi - -This program is rejected by the new compiler. -.in -3m - -.NH 2 -Extensions -.LP -.sp -The extensions implemented by the Pascal-VU compiler are listed in -Chapter 5 of [IM2]. -.sp -- separate compilation -.ti +3m -the new compiler only accepts programs, not modules. - -- assertions -.ti +3m -not implemented. - -- additional procedures -.ti +3m -the procedures \fIhalt, mark\fR and \fIrelease\fR are not available. -.bp -- UNIX\(tm interfacing -.ti +3m -the \-c option is not implemented. -.FS -\(tm UNIX is a Trademark of Bell Laboratories. -.FE - -- double length integers -.ti +3m -integer size can be set with the \-V option, so the additional type \fIlong\fR -is not implemented. - - -.NH 2 -Compiler options -.LP -.sp -The options implemented by the Pascal-VU compiler are listed in -Chapter 7 of [IM2]. -.sp -The construction "{$....}" is not recognized. - -The options: \fIa, c, d, s\fR and \fIt\fR are not available. - -The \-l option has been changed into the \-L option. - -The size of reals can be set with the \-V option. diff --git a/doc/pascal/conf.doc b/doc/pascal/conf.doc deleted file mode 100644 index ff85003bc..000000000 --- a/doc/pascal/conf.doc +++ /dev/null @@ -1,88 +0,0 @@ -.sp 1.5i -.nr H1 3 -.NH -Conformant Arrays -.nh -.LP -.sp -A fifth kind of parameter, besides the value, variable, procedure, and function -parameter, is the conformant array parameter (\fBISO 6.6.3.7\fR). This -parameter, undoubtedly the major addition to Pascal from the compiler writer's -point of view, has been implemented. With this kind of parameter, the required -bounds of the index-type of an actual parameter are not fixed, but are -restricted to a specified range of values. Two types of conformant array -parameters can be distinguished: variable conformant array parameters and -value conformant array parameters. -.sp -.NH 2 -Variable conformant array parameters -.LP -.sp -The treatment of variable conformant array parameters is comparable with the -normal variable parameter. -Both have in common that the parameter mechanism used is \fIcall by -reference\fR. -.br -An example is: -.br -.in +5m -to sort variable length arrays of integers, the following Pascal procedure could be used: - -.nf -\fBprocedure\fR bubblesort(\fBvar\fR A : \fBarray\fR[low..high : integer] \fBof\fR integer); -\fBvar\fR i, j : integer; -\fBbegin - for\fR j := high - 1 \fBdownto\fR low \fBdo - for\fR i := low \fBto\fR j \fBdo - if\fR A[i+1] < A[i] \fBthen\fI interchange A[i] and A[i+1] -\fBend\fR; -.fi -.in -5m - -For every actual parameter, the base address of the array is pushed on the -stack and for every index-type-specification, exactly one array descriptor -is pushed. -.sp -.NH 2 -Value conformant array parameters -.LP -.sp -The treatment of value conformant array parameters is more complex than its -variable counterpart. -.br -An example is: -.br -.in +5m -an unpacked array of characters could be printed as a string with the following program part: - -.nf -\fBprocedure\fR WriteAsString( A : \fBarray\fR[low..high : integer] \fBof\fR char); -\fBvar\fR i : integer; -\fBbegin - for\fR i := low \fBto\fR high \fBdo\fR write(A[i]); -\fBend\fR; -.fi -.in -5m - -The calling procedure pushes the base address of the actual parameter and -the array descriptors belonging to it on the stack. Subsequently the procedure -using the conformant array parameter is called. Because it is a \fIcall by -value\fR, the called procedure has to create a copy of the actual parameter. -This implies that the calling procedure knows how much space on the stack -must be reserved for the parameters. If the actual-parameter is a conformant -array, the called procedure keeps track of the size of the activation record. -Hence the restrictions on the use of value conformant array parameters, as -specified in \fBISO 6.6.3.7.2\fR, are dropped. - -A description of the EM code generated by the compiler is: - -.nf -.ft I -load the stack adjustment sofar -load base address of array parameter -compute the size in bytes of the array -add this size to the stack adjustment -copy the array -remember the new address of the array -.ft R -.fi diff --git a/doc/pascal/contents.doc b/doc/pascal/contents.doc deleted file mode 100644 index f744c9e32..000000000 --- a/doc/pascal/contents.doc +++ /dev/null @@ -1,41 +0,0 @@ -.sp 1.5i -.ps 12 -.vs 14 -.ft B -Contents\fR\h'+108u'\h'+5i'Page - - -\h'+34u'1. Introduction \h'+34u'\h'+1.5i'1 - -\h'+34u'2. The compiler \h'+34u'\h'+1.5i'2 - -\h'+34u'3. Translation of Pascal to EM \h'+34u'\h'+1.5i'5 - -\h'+34u'4. Conformant arrays \h'+1.5i'10 - -\h'+34u'5. Compiler options \h'+1.5i'11 - -\h'+34u'6. Extensions to the standard \h'+1.5i'13 - -\h'+34u'7. Deviations from the standard \h'+1.5i'13 - -\h'+34u'8. Hints to change the standard \h'+1.5i'15 - -\h'+34u'9. Testing the compiler \h'+1.5i'16 - -10. Comparison with the old compiler \h'+1.5i'16 - -11. Improvements to the compiler \h'+1.5i'17 - -12. History & Acknowledgements \h'+1.5i'18 - -13. References \h'+1.5i'19 - - -\fBAppendices\fR - -\h'+16u'A. ISO-PASCAL Grammar \h'+1.5i'20 - -\h'+24u'B. Changes to run time library \h'+1.5i'26 - -\h'+20u'C. An example \h'+1.5i'28 diff --git a/doc/pascal/deviations.doc b/doc/pascal/deviations.doc deleted file mode 100644 index 53ee571ac..000000000 --- a/doc/pascal/deviations.doc +++ /dev/null @@ -1,118 +0,0 @@ -.sp 2 -.NH -Deviations from the standard -.nh - -.PP -The compiler deviates from the ISO 7185 standard with respect to the -following clauses: - -.IP "\fBISO 6.1.3:\fR" 14 -\h'-5u'Identifiers may be of any length and all characters of an identifier -shall be significant in distinguishing between them. -.sp -.in +3m -The constant IDFSIZE, defined in the file \fIidfsize.h\fR, determines -the (maximum) significant length of an identifier. It can be set at run -time with the \-M option (see also section on compiler options). -.in -3m -.sp -.IP "\fBISO 6.1.8:\fR" -\h'-5u'There shall be at least one separator between any pair of consecutive tokens -made up of identifiers, word-symbols, labels or unsigned-numbers. -.sp -.in +3m -A token separator is not needed when a number is followed by an identifier -or a word-symbol. For example the input sequence, 2\fBthen\fR, is recognized -as the integer 2 followed by the keyword \fBthen\fR. -.in -3m -.sp -.IP "\fBISO 6.2.1:\fR" -\h'-29u'The label-declaration-part shall specify all labels that prefix a statement -in the corresponding statement-part. -.sp -.ti +3m -The compiler generates a warning if a label is declared but never defined. -.bp -.IP "\fBISO 6.2.2:\fR" -\h'-9u'The scope of identifiers and labels should start at the beginning of the -block in which these identifiers or labels are declared. -.sp -.in +3m -The compiler, as most other one pass compilers deviates in this respect, -because the scope of variables and labels start at their defining-point. -.nf -.in +4m -\fBprogram\fR deviates\fB; -const\fR - x \fB=\fR 3\fB; -procedure\fR p\fB; -const\fR - y \fB=\fR x\fB;\fR - x \fB=\fR true\fB; -begin end; -begin -end.\fR -.in -4m -.fi - -In procedure p, the constant y has the integer value 3. This program does not -conform to the standard. In [SAL] a simple algorithm is described for -enforcing the scope rules, it involves numbering all scopes encoutered in the -program in order of their opening, and recording in each identifier table -entry the number of the latest scope in which it is used. - -Note: The compiler does not deviate from the standard in the following program: -.nf -.in +4m -\fBprogram\fR conforms\fB; -type\fR - x \fB=\fR real\fB; -procedure\fR p\fB; -type\fR - y \fB= ^\fRx\fB;\fR - x \fB=\fR boolean\fB; -var\fR - p \fB:\fR y\fB; -begin end; -begin -end.\fR -.in -4m -.fi - -In procedure p, the variable p is a pointer to boolean. -.fi -.in -3m -.sp -.IP "\fBISO 6.4.3.2:\fR" -The standard specifies that any ordinal type is allowed as index-type. -.sp -.in +3m -The required type \fIinteger\fR is not allowed as index-type, i.e. -.ti +2m -\fBARRAY [ \fIinteger\fB ] OF\fR -is not permitted. -.br -This could be implemented, but this might cause problems on machines with -a small memory. -.in -3m -.sp -.IP "\fBISO 6.4.3.3:\fR" -\h'-1u'The type possessed by the variant-selector, called the tag-type, must -be an ordinal type, so the integer type is permitted. The values denoted by -all case-constants shall be distinct and the set thereof shall be equal -to the set of values specified by the tag-type. -.sp -.in +3m -Because it is impracticable to enumerate all integers as case-constants, -the integer type is not permitted as tag-type. It would not make a great -difference to allow it as tagtype. -.in -3m -.sp -.IP "\fBISO 6.8.3.9:\fR" -The standard specifies that the control-variable of a for-statement is not -allowed to be modified while executing the loop. -.sp -.in +3m -Violation of this rule is not detected. An algorithm to implement this rule -can be found in [PCV]. diff --git a/doc/pascal/example.doc b/doc/pascal/example.doc deleted file mode 100644 index f8350f01e..000000000 --- a/doc/pascal/example.doc +++ /dev/null @@ -1,92 +0,0 @@ -.sp 1.5i -.ft B -Appendix C: An example -.ft R -.nh -.nf - - -\h'+10u' 1 \fBprogram\fR factorials(input, output); -\h'+10u' 2 { This program prints factorials } -\h'+10u' 3 -\h'+10u' 4 \fBconst\fR -\h'+10u' 5 FAC1 = 1; -\h'+10u' 6 \fBvar\fR -\h'+10u' 7 i : integer; -\h'+10u' 8 -\h'+10u' 9 \fBfunction\fR factorial(n : integer) : integer; -10 \fBbegin\fR -11 \fBif\fR n = FAC1 \fBthen\fR -12 factorial := FAC1 -13 \fBelse\fR -14 factorial := n * factorial(n-1); -15 \fBend\fR; -16 -17 \fBbegin\fR -18 write('Give a number : '); -19 readln(i); -20 \fBif\fR i < 1 \fBthen\fR -21 writeln('No factorial') -22 \fBelse\fR -23 writeln(factorial(i):1); -24 \fBend\fR. -.bp -.po -.DS - mes 2,4,4 loc 16 -\&.1 cal $_wrs - rom 'factorials.p\(rs000' asp 12 -i lin 19 - bss 4,0,0 lae input -output cal $_rdi - bss 540,0,0 asp 4 -input lfr 4 - bss 540,0,0 ste i - exp $factorial lae input - pro $factorial, ? cal $_rln - mes 9,4 asp 4 - lin 11 lin 20 - lol 0 loe i - loc 1 loc 1 - cmi 4 cmi 4 - teq tlt - zeq *1 zeq *1 - lin 12 lin 21 - loc 1 .4 - stl -4 rom 'No factorial' - bra *2 lae output -1 lae .4 - lin 14 loc 12 - lol 0 cal $_wrs - lol 0 asp 12 - loc 1 lae output - sbi 4 cal $_wln - cal $factorial asp 4 - asp 4 bra *2 - lfr 4 1 - mli 4 lin 23 - stl -4 lae output -2 loe i - lin 15 cal $factorial - mes 3,0,4,0,0 asp 4 - lol -4 lfr 4 - ret 4 loc 1 - end 4 cal $_wsi - exp $m_a_i_n asp 12 - pro $m_a_i_n, ? lae output - mes 9,0 cal $_wln - fil .1 asp 4 -\&.2 2 - con input, output lin 24 - lxl 0 loc 0 - lae .2 cal $_hlt - loc 2 end 0 - lxa 0 mes 4,24,'factorials.p\(rs000' - cal $_ini - asp 16 - lin 18 -\&.3 - rom 'Give a number : ' - lae output - lae .3 -.DE diff --git a/doc/pascal/extensions.doc b/doc/pascal/extensions.doc deleted file mode 100644 index 44febcc49..000000000 --- a/doc/pascal/extensions.doc +++ /dev/null @@ -1,60 +0,0 @@ -.pl 12i -.sp 1.5i -.NH -Extensions to Pascal as specified by ISO 7185 -.nh - -.IP "\fBISO 6.1.3:\fR" 14 -\h'-11u'The underscore is treated as a letter when the \-u option is turned -on (see also section 5.2). This is implemented to be compatible with -Pascal-VU and can be used in identifiers to increase readability. -.sp -.IP "\fBISO 6.1.4:\fR" -\h'-12u'The directive \fIextern\fR can be used in a procedure-declaration or -function-declaration to specify that the procedure-block or function-block -corresponding to that declaration is external to the program-block. This can -be used in conjunction with library routines. -.sp -.IP "\fBISO 6.1.9:\fR" -\h'-22u'An alternative representation for the following tokens and delimiting -characters is recognized: -.in +5m -.ft 5 -\fBtoken -.ft 5 -\& \fBalternative token -.ft 5 -.sp -^ -\& @ -.br -[ -\& (. -.br -] -\& .) - -.ft 5 -\fBdelimiting character -.ft 5 -\& \fBalternative delimiting pair of characters -.ft 5 -.sp -{ -\& (* -.br -} -\& *) -.in -5m -.sp -.IP "\fBISO 6.6.3.7.2:\fR" -\h'-1u'A conformant array parameter can be passed as value conformant array -parameter without the restrictions imposed by the standard. The compiler -gives a warning. This is implemented to keep the parameter mechanism orthogonal (see also Chapter 4). -.sp -.IP "\fBISO 6.9.3.1:\fR" -\h'-16u'If the value of the argument \fITotalWidth\fR of the required -procedure \fIwrite\fR is zero or negative, no characters are written for -character, string or boolean type arguments. If the value of the argument -\fIFracDigits\fR of the required procedure \fIwrite\fR is zero or negative, -the fraction and '.' character are suppressed for fixed-point arguments. diff --git a/doc/pascal/hints.doc b/doc/pascal/hints.doc deleted file mode 100644 index a1c7fc1ba..000000000 --- a/doc/pascal/hints.doc +++ /dev/null @@ -1,76 +0,0 @@ -.sp 1.5i -.nr H1 7 -.NH -Hints to change the standard -.nh -.sp -.LP -We encoutered some difficulties when the compiler was developed. In this -chapter some hints are presented to change the standard, which would make -the implementation of the compiler less difficult. The semantics of Pascal -would not be altered by these adaptions. -.sp 2 -.LP -\- Some minor changes in the grammar of Pascal from the user's point of view, -but which make the writing of an LL(1) parser considerably easier, could be: -.in +3m -.nf -field-list : [ ( fixed-part [ variant-part ] | variant-part ) ] . -fixed-part : record-section \fB;\fR { record-section \fB;\fR } . -variant-part : \fBcase\fR variant-selector \fBof\fR variant \fB;\fR { variant \fB;\fR } . - -case-statement : \fBcase\fR case-index \fBof\fR case-list-element \fB;\fR { case-list-element \fB;\fR } \fBend\fR . -.fi -.in -3m - - -.LP -\- To ease the semantic checking on sets, the principle of qualified sets could -be used, every set-constructor must be preceeded by its type-identifier: -.nf -.ti +3m -set-constructor : type-identifier \fB[\fR [ member-designator { \fB,\fR member-designator } ] \fB]\fR . - -Example: - t1 = set of 1..5; - t2 = set of integer; - -The type of [3, 5] would be ambiguous, but the type of t1[3, 5] not. -.fi - - -.LP -\- Another problem arises from the fact that a function name can appear in -three distinct 'use' contexts: function call, assignment of function -result and as function parameter. -.br -Example: -.in +5m -.nf -\fBprogram\fR function_name; - -\fBfunction\fR p(x : integer; function y : integer) : integer; -\fBbegin\fR .. \fBend\fR; - -\fBfunction\fR f : integer; -\fBbegin\fR - f := p(f, f); (*) -\fBend\fR; - -\fBbegin\fR .. \fBend\fR. -.fi -.in -5m - -A possible solution in case of a call (also a procedure call) would be to -make the (possibly empty) actual-parameter-list mandatory. The assignment -of the function result could be changed in a \fIreturn\fR statement. -Though this would change the semantics of the program slightly. -.br -The above statement (*) would look like this: return p(f(), f); - - -.LP -\- Another extension to the standard could be the implementation of an -\fIotherwise\fR clause in a case-statement. This would behave exactly like -the \fIdefault\fR clause in a switch-statement in C. -.bp diff --git a/doc/pascal/his.doc b/doc/pascal/his.doc deleted file mode 100644 index d4c64a2a5..000000000 --- a/doc/pascal/his.doc +++ /dev/null @@ -1,36 +0,0 @@ -.sp 2 -.NH -History & Acknowledgements -.nh -.sp 2 -.ft B -History -.ft R -.sp -.LP -The purpose of this project was to make a Pascal compiler which should satisfy -the conditions of the ISO standard. The task was considerably simplified, -because parts of the Modula-2 compiler were used. This gave the advantage of -increasing the uniformity of the compilers in ACK. -.br -While developing the compiler, a number of errors were detected in the Modula-2 -compiler, EM utility modules and the old Pascal compiler. - -.sp 2 -.ft B -Acknowledgements -.ft R -.sp -.LP -During the development of the compiler, valuable support was received from -a number of persons. In this regard we owe a debt of gratitude to -Fred van Beek, Casper Capel, Rob Dekker, Frank Engel, Jos\('e Gouweleeuw -and Sonja Keijzer (Jut and Jul !!), Herold Kroon, Martin van Nieuwkerk, -Sjaak Schouten, Eric Valk, and Didan Westra. -.br -Special thanks are reserved for Dick Grune, who introduced us to the field of -Compiler Design and who helped testing the compiler. Ceriel Jacobs, who -developed LLgen and the Modula-2 compiler of ACK. Finally we would like to -thank Erik Baalbergen, who had the supervision on this entire project and -gave us many valuable suggestions. -.bp diff --git a/doc/pascal/improv.doc b/doc/pascal/improv.doc deleted file mode 100644 index 3c15ee8b8..000000000 --- a/doc/pascal/improv.doc +++ /dev/null @@ -1,87 +0,0 @@ -.sp 2 -.NH -Improvements to the compiler -.nh -.sp -.LP -In consideration of portability, a restricted option could be implemented. -Under this option, the extensions and warnings should be considered as errors. - - -.LP -The restrictions imposed by the standard on the control variable of a -for-statment should be implemented (\fBISO 6.8.3.9\fR). - -.LP -To check whether a function returns a valid result, the following algorithm -could be used. When a function is entered a hidden temporary variable of -type boolean is created. This variable is initialized with the value false. -The variable is set to true, when an assignment to the function name occurs. -On exit of the function a test is performed on the variable. If the value -of the variable is false, a run-time error occurs. -.br -Note: The check has to be done run-time. - - -.LP -The \fIundefined value\fR should be implemented. A problem arises with -local variables, for which space on the stack is allocated. A possible -solution would be to generate code for the initialization of the local -variables with the undefined value at the beginning of a procedure or -function. -.br -The implementation for the global variables is easy, because \fBbss\fR -blocks are used. - - -.LP -Closely related to the last point is the generation of warnings when -variables are never used or assigned. This is not yet implemented. - - -.LP -The error messages could specify more details about the errors occurred, -if some additional testing is done. - -.bp -.LP -Every time the compiler detects sets with different base-types, a warning -is given. Sometimes this is superfluous. - -.nf -\fBprogram\fR sets(output); -\fBtype\fR - week = (sunday, monday, tuesday, wednesday, thursday, friday, saturday); - workweek = monday..friday; -\fBvar\fR - s : \fBset of\fR workweek; - day : week; -\fBbegin\fR - day := monday; - s := [day]; (* warning *) - day := saturday; - s := [day]; (* warning *) -\fBend\fR. -.fi -The new compiler gives two warnings, the first one is redundant. - - -.LP -A nasty point in the compiler is the way the procedures \fIread, readln, -write\fR and \fIwriteln\fR are handled (see also section 2.2). They have -been added to the grammar. This implies that they can not be redefined as -opposed to the other required procedures and functions. They should be -removed from the grammar altogether. This could imply that more semantic -checks have to be performed. - - -.LP -No effort is made to detect possible run-time errors during compilation. -.br -E.g. a : \fBarray\fR[1..10] \fBof\fI something\fR, and the array selection -a[11] would occur. - - -.LP -Some assistance to implement the improvements mentioned above, can be -obtained from [PCV]. diff --git a/doc/pascal/internal.doc b/doc/pascal/internal.doc deleted file mode 100644 index d1a94e7ae..000000000 --- a/doc/pascal/internal.doc +++ /dev/null @@ -1,342 +0,0 @@ -.pl 12.5i -.sp 1.5i -.NH -The compiler - -.nh -.LP -The compiler can be divided roughly into four modules: - -\(bu lexical analysis -.br -\(bu syntax analysis -.br -\(bu semantic analysis -.br -\(bu code generation -.br - -The four modules are grouped into one pass. The activity of these modules -is interleaved during the pass. -.br -The lexical analyzer, some expression handling routines and various -datastructures from the Modula-2 compiler contributed to the project. -.sp 2 -.NH 2 -Lexical Analysis - -.LP -The first module of the compiler is the lexical analyzer. In this module, the -stream of input characters making up the source program is grouped into -\fItokens\fR, as defined in \fBISO 6.1\fR. The analyzer is hand-written, -because the lexical analyzer generator, which was at our disposal, -\fILex\fR [LEX], produces much slower analyzers. A character table, in the file -\fIchar.c\fR, is created using the program \fItab\fR which takes as input -the file \fIchar.tab\fR. In this table each character is placed into a -particular class. The classes, as defined in the file \fIclass.h\fR, -represent a set of tokens. The strategy of the analyzer is as follows: the -first character of a new token is used in a multiway branch to eliminate as -many candidate tokens as possible. Then the remaining characters of the token -are read. The constant INP_NPUSHBACK, defined in the file \fIinput.h\fR, -specifies the maximum number of characters the analyzer looks ahead. The -value has to be at least 3, to handle input sequences such as: -.br - 1e+4 (which is a real number) -.br - 1e+a (which is the integer 1, followed by the identifier "e", a plus, and the identifier "a") - -Another aspect of this module is the insertion and deletion of tokens -required by the parser for the recovery of syntactic errors (see also section -2.2). A generic input module [ACK] is used to avoid the burden of I/O. -.sp 2 -.NH 2 -Syntax Analysis - -.LP -The second module of the compiler is the parser, which is the central part of -the compiler. It invokes the routines of the other modules. The tokens obtained -from the lexical analyzer are grouped into grammatical phrases. These phrases -are stored as parse trees and handed over to the next part. The parser is -generated using \fILLgen\fR[LL], a tool for generating an efficient recursive -descent parser with no backtrack from an Extended Context Free Syntax. -.br -An error recovery mechanism is generated almost completely automatically. A -routine called \fILLmessage\fR had to be written, which gives the necessary -error messages and deals with the insertion and deletion of tokens. -The routine \fILLmessage\fR must accept one parameter, whose value is -a token number, zero or -1. A zero parameter indicates that the current token -(the one in the external variable \fILLsymb\fR) is deleted. -A -1 parameter indicates that the parser expected end of file, but did -not get it. The parser will then skip tokens until end of file is detected. -A parameter that is a token number (a positive parameter) indicates that -this token is to be inserted in front of the token currently in \fILLsymb\fR. -Also, care must be taken, that the token currently in \fILLsymb\fR is again -returned by the \fBnext\fR call to the lexical analyzer, with the proper -attributes. So, the lexical analyzer must have a facility to push back one -token. -.br -Calls to the two standard procedures \fIwrite\fR and \fIwriteln\fR can be -different from calls to other procedures. The syntax of a write-parameter -is different from the syntax of an actual-parameter. We decided to include -them, together with \fIread\fR and \fIreadln\fR, in the grammar. An alternate -solution would be to make the syntax of an actual-parameter identical to the -syntax of a write-parameter. Afterwards the parameter has to be checked to -see whether it is used properly or not. -.bp -As the parser is LL(1), it must always be able to determine what to do, -based on the last token read (\fILLsymb\fR). Unfortunately, this was not the -case with the grammar as specified in [ISO]. Two kinds of problems -appeared, viz. the \fBalternation\fR and \fBrepetition\fR conflict. -The examples given in the following paragraphs are taken from the grammar. - -.NH 3 -Alternation conflict - -.LP -An alternation conflict arises when the parser can not decide which -production to choose. -.br -\fBExample:\fR -.in +2m -.ft 5 -.nf -procedure-declaration : procedure-heading \fB';'\f5 directive | -.br -\h'\w'procedure-declaration : 'u'procedure-identification \fB';'\f5 procedure-block | -.br -\h'\w'procedure-declaration : 'u'procedure-heading \fB';'\f5 procedure-block ; -.br -procedure-heading : \fBprocedure\f5 identifier [ formal-parameter-list ]? ; -.br -procedure-identification : \fBprocedure\f5 procedure-identifier ; -.fi -.ft R -.in -2m - -A sentence that starts with the terminal \fBprocedure\fR is derived from the -three alternative productions. This conflict can be resolved in two ways: -adjusting the grammar, usually some rules are replaced by one rule and more -work has to be done in the semantic analysis; using the LLgen conflict -resolver, "\fB%if\fR (C-expression)", if the C-expression evaluates to -non-zero, the production in question is chosen, otherwise one of the -remaining rules is chosen. The grammar rules were rewritten to solve this -conflict. The new rules are given below. For more details see the file -\fIdeclar.g\fR. - -.in +2m -.ft 5 -.nf -procedure-declaration : procedure-heading \fB';'\f5 ( directive | procedure-block ) ; -.br -procedure-heading : \fBprocedure\f5 identifier [ formal-parameter-list ]? ; -.fi -.ft R -.in -2m - -A special case of an alternation conflict, which is common to many block -structured languages, is the \fI"dangling-else"\fR ambiguity. - -.in +2m -.ft 5 -.nf -if-statement : \fBif\f5 boolean-expression \fBthen\f5 statement [ else-part ]? ; -.br -else-part : \fBelse\f5 statement ; -.fi -.ft R -.in -2m - -The following statement that can be derived from the rules above is ambiguous: - -.ti +2m -\fBif\f5 boolean-expr-1 \fBthen\f5 \fBif\f5 boolean-expr-2 \fBthen\f5 statement-1 \fBelse\f5 statement-2 -.ft R - - -.ps 8 -.vs 7 -.PS -move right 1.1i -S: line down 0.5i -"if-statement" at S.start above -.ft B -"then" at S.end below -.ft R -move to S.start then down 0.25i -L: line left 0.5i then down 0.25i -box ht 0.33i wid 0.6i "boolean" "expression-1" -move to L.start then left 0.5i -L: line left 0.5i then down 0.25i -.ft B -"if" at L.end below -.ft R -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -"statement" at L.end below -move to L.end then down 0.10i -L: line down 0.25i dashed -"if-statement" at L.end below -move to L.end then down 0.10i -L: line down 0.5i -.ft B -"then" at L.end below -.ft R -move to L.start then down 0.25i -L: line left 0.5i then down 0.25i -box ht 0.33i wid 0.6i "boolean" "expression-2" -move to L.start then left 0.5i -L: line left 0.5i then down 0.25i -.ft B -"if" at L.end below -.ft R -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -box ht 0.33i wid 0.6i "statement-1" -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -.ft B -"else" at L.end below -.ft R -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -box ht 0.33i wid 0.6i "statement-2" -move to S.start -move right 3.5i -L: line down 0.5i -"if-statement" at L.start above -.ft B -"then" at L.end below -.ft R -move to L.start then down 0.25i -L: line left 0.5i then down 0.25i -box ht 0.33i wid 0.6i "boolean" "expression-1" -move to L.start then left 0.5i -L: line left 0.5i then down 0.25i -.ft B -"if" at L.end below -.ft R -move to L.start then right 0.5i -S: line right 0.5i then down 0.25i -"statement" at S.end below -move to S.start then right 0.5i -L: line right 0.5i then down 0.25i -.ft B -"else" at L.end below -.ft R -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -box ht 0.33i wid 0.6i "statement-2" -move to S.end then down 0.10i -L: line down 0.25i dashed -"if-statement" at L.end below -move to L.end then down 0.10i -L: line down 0.5i -.ft B -"then" at L.end below -.ft R -move to L.start then down 0.25i -L: line left 0.5i then down 0.25i -box ht 0.33i wid 0.6i "boolean" "expression-2" -move to L.start then left 0.5i -L: line left 0.5i then down 0.25i -.ft B -"if" at L.end below -.ft R -move to L.start then right 0.5i -L: line right 0.5i then down 0.25i -box ht 0.33i wid 0.6i "statement-1" -.PE -.ps -.vs -\h'615u'(a)\h'1339u'(b) -.sp -.ce -Two parse trees showing the \fIdangling-else\fR ambiguity -.sp 2 -According to the standard, \fBelse\fR is matched with the nearest preceding -unmatched \fBthen\fR, i.e. parse tree (a) is valid (\fBISO 6.8.3.4\fR). -This conflict is statically resolved in LLgen by using "\fB%prefer\fR", -which is equivalent in behaviour to "\fB%if\fR(1)". -.bp -.NH 3 -Repetition conflict - -.LP -A repetition conflict arises when the parser can not decide whether to choose -a production once more, or not. -.br -\fBExample:\fR -.in +2m -.ft 5 -.nf -field-list : [ ( fixed-part [ \fB';'\f5 variant-part ]? | variantpart ) [;]? ]? ; -.br -fixed-part : record-section [ \fB';'\f5 record-section ]* ; -.fi -.in -2m -.ft R - -When the parser sees the semicolon, it can not decide whether another -record-section or a variant-part follows. This conflict can be resolved in -two ways: adjusting the grammar or using the conflict resolver, -"\fB%while\fR (C-expression)". The grammar rules that deal with this conflict -were completely rewritten. For more details, the reader is referred to the -file \fIdeclar.g\fR. -.sp 2 -.NH 2 -Semantic Analysis - -.LP -The third module of the compiler is the checking of semantic conventions of -ISO-Pascal. To check the program being parsed, actions have been used in -LLgen. An action consists of several C-statements, enclosed in brackets -"{" and "}". In order to facilitate communication between the actions and -\fILLparse\fR, the parsing routines can be given C-like parameters and -local variables. An important part of the semantic analyzer is the symbol -table. This table stores all information concerning identifiers and their -definitions. Symbol-table lookup and hashing is done by a generic namelist -module [ACK]. The parser turns each program construction into a parse tree, -which is the major datastructure in the compiler. This parse tree is used -to exchange information between various routines. -.sp 2 -.NH 2 -Code Generation - -.LP -The final module in the compiler is that of code generation. The information -stored in the parse trees is used to generate the EM code [EM]. EM code is -generated with the help of a procedural EM-code interface [ACK]. The use of -static exchanges is not desired, since the fast back end can not cope with -static code exchanges, hence the EM pseudoinstruction \fBexc\fR is never -generated. -.br -Chapter 3 discusses the code generation in more detail. -.sp 2 -.NH 2 -Error Handling - -.LP -The first three modules have in common that they can detect errors in the -Pascal program being compiled. If this is the case, a proper message is given -and some action is performed. If code generation has to be aborted, an error -message is given, otherwise a warning is given. The constant MAXERR_LINE, -defined in the file \fIerrout.h\fR, specifies the maximum number of messages -given per line. This can be used to avoid long lists of error messages caused -by, for example, the omission of a ';'. Three kinds of errors can be -distinguished: the lexical error, the syntactic error, and the semantic error. -Examples of these errors are respectively, nested comments, an expression with -unbalanced parentheses, and the addition of two characters. -.sp 2 -.NH 2 -Memory Allocation and Garbage Collection - -.LP -The routines \fIst_alloc\fR and \fIst_free\fR provide a mechanism for -maintaining free lists of structures, whose first field is a pointer called -\fBnext\fR. This field is used to chain free structures together. Each -structure, suppose the tag of the structure is ST, has a free list pointed -by h_ST. Associated with this list are the operations: \fInew_ST()\fR, an -allocating mechanism which supplies the space for a new ST struct; and -\fIfree_ST()\fR, a garbage collecting mechanism which links the specified -structure into the free list. -.bp diff --git a/doc/pascal/options.doc b/doc/pascal/options.doc deleted file mode 100644 index a278b5e69..000000000 --- a/doc/pascal/options.doc +++ /dev/null @@ -1,166 +0,0 @@ -.sp 1.5i -.NH -Compiler options -.nh -.PP -There are some options available to control the behaviour of the compiler. -Two types of options can be distinguished: compile-time options and -run-time options. -.sp -.NH 2 -Compile time options -.LP -.sp -There are some options that can be set when the compiler is installed. -Those options can be found in the file \fIParameters\fR. To set a parameter -just modify its definition in the file \fIParameters\fR. The shell script -in the file \fImake.hfiles\fR creates for each parameter a separate .h file. -This mechanism is derived from the C compiler in ACK. -.sp -\fBIDFSIZE\fR -.in +3m -The maximum number of characters that are significant in an identifier. This -value has to be at least the value of \fBMINIDFSIZE\fR, defined in the file -\fIoptions.c\fR. A compile-time check is included to see if the value of -\fBMINIDFSIZE\fR is legal. The compiler will not recognize some keywords -if \fBIDFSIZE\fR is too small. -.in -3m -.sp -\fBISTRSIZE\fR, \fBRSTRSIZE\fR -.in +3m -The lexical analyzer uses these two values for the allocation of memory needed -to store a string. \fBISTRSIZE\fR is the initial number of bytes allocated. -\fBRSTRSIZE\fR is the step size used for enlarging the memory needed. -.in -3m -.sp -\fBNUMSIZE\fR -.in +3m -The maximum length of a numeric constant recognized by the lexical analyzer. -It is an error if this length is exceeded. -.in -3m -.sp -\fBERROUT\fR, \fBMAXERR_LINE\fR -.in +3m -Used for error messages. \fBERROUT\fR defines the file on which the -messages are written. \fBMAXERR_LINE\fR is the maximum number of error -messages given per line. -.in -3m -.sp -\fBSZ_CHAR\fR, \fBAL_CHAR\fR, etc -.in +3m -The default values of the target machine sizes and alignments. The values -can be overruled with the \-V option. -.in -3m -.sp -\fBMAXSIZE\fR -.in +3m -This value must be set to the maximum of the values of the target machine -sizes. This parameter is used in overflow detection (see also section 3.2). -.in -3m -.sp -\fBDENSITY\fR -.in +3m -This parameter is used to decide what EM instruction has to be generated -for a case-statement. If the range of the index value is sparse, i.e. -.br -.ti +5m -(upperbound - lowerbound) / number_of_cases -.br -is more than some threshold (\fBDENSITY\fR) the \fBcsb\fR instruction is -chosen. If the range is dense a jump table is generated (\fBcsa\fR). This -uses more space. Reasonable values are 2, 3 or 4. -.br -Higher values might also be reasonable on machines, which have lots of -address space and memory (see also section 3.3.3). -.in -3m -.sp -\fBINP_READ_IN_ONE\fR -.in +3m -Used by the generic input module. It can either be defined or not defined. -Defining it has the effect that files will be read completely into memory -using only one read-system call. This should be used only on machines with -lots of memory. -.in -3m -.sp -.bp -\fBDEBUG\fR -.in +3m -.nf -If this parameter is defined some built-in compiler-debugging tools can be used: -.in +2m -\(bu only lexical analyzing is done, if the \-l option is given. -\(bu if the \-I option is turned on, the allocated number of structures is printed. -\(bu the routine debug can be used to print miscellaneous information. -\(bu the routine PrNode prints a tree of nodes. -\(bu the routine DumpType prints information about a type structure. -\(bu the macro DO_DEBUG(x,y) defined as ((x) && (y)) can be used to perform - several actions. -.in -2m -.in -3m -.sp -.NH 2 -Run time options -.LP -.sp -The run time options can be given in the command line when the compiler is -called. -.br -They all have the form: \- -.br -Depending on the option, a character string has to be specified. The following -options are currently available: -.sp -.IP \-\fBC\fR 18 -The lower case and upper case letters are treated different (\fBISO 6.1.1\fR). -.sp -.IP \-\fBu\fR -The character '_' is treated like a letter, so it is allowed to use the -underscore in identifiers. -.br -Note: identifiers starting with an underscore may cause problems, because -.br -\h'\w'Note: 'u'most identifiers in library routines start with an underscore. -.sp -.IP \-\fBn\fR -This option suppresses the generation of register messages. -.sp -.IP \-\fBr\fR -With this option rangechecks are generated where necessary. -.sp -.IP \-\fBL\fR -Do not generate EM \fBlin\fR and \fBfil\fR instructions. These instructions -are used only for profiling. -.sp -.IP \-\fBM\fR -Set the number of characters that are significant in an identifier to . -The maximum significant identifier length depends on the constant IDFSIZE, -defined in \fIidfsize.h\fR. -.sp -.IP \-\fBi\fR -With this flag the setsize for a set of integers can be changed. The number must -be the number of bits per set. Default value : (#bits in a word) \- 1 -.sp -.IP \-\fBw\fR -Suppress warning messages (see also section 2.5). -.sp -.IP \-\fBV\fR[[\fBw\fR|\fBi\fR|\fBf\fR|\fBp\fR|\fBS\fR][\fIsize\fR]?[\fI.alignment\fR]?]* -.br -Option to set the object sizes and alignments on the target machine -dynamically. The objects that can be manipulated are: -.br -\fBw\fR\h'\w'ifpS'u' word -.br -\fBi\fR\h'\w'wfpS'u' integer -.br -\fBf\fR\h'\w'wipS'u' float -.br -\fBp\fR\h'\w'wifS'u' pointer -.br -\fBS\fR\h'\w'wifp'u' structure -.br -In case of a structure, \fIsize\fR is discarded and the \fIalignment\fR is -the initial alignment of the structure. The effective alignment is the least -common multiple of \fIalignment\fR and the alignment of its members. This -option has been implemented so that the compiler can be used as cross -compiler. -.bp diff --git a/doc/pascal/p1-9 b/doc/pascal/p1-9 deleted file mode 100755 index 7455d2ef7..000000000 --- a/doc/pascal/p1-9 +++ /dev/null @@ -1 +0,0 @@ -pic ab+intro.doc internal.doc transpem.doc | troff -ms > p1-9.dit diff --git a/doc/pascal/p10-14 b/doc/pascal/p10-14 deleted file mode 100755 index 529162663..000000000 --- a/doc/pascal/p10-14 +++ /dev/null @@ -1 +0,0 @@ -troff -ms -n10 conf.doc options.doc extensions.doc deviations.doc > p10-14.dit diff --git a/doc/pascal/p15-19 b/doc/pascal/p15-19 deleted file mode 100755 index 808edc2fe..000000000 --- a/doc/pascal/p15-19 +++ /dev/null @@ -1 +0,0 @@ -troff -ms -n15 hints.doc test.doc compar.doc improv.doc his.doc reference.doc > p15-19.dit diff --git a/doc/pascal/p20-29 b/doc/pascal/p20-29 deleted file mode 100755 index 11c4b4ec6..000000000 --- a/doc/pascal/p20-29 +++ /dev/null @@ -1 +0,0 @@ -troff -ms -n20 syntax.doc rtl.doc example.doc > p20-29.dit diff --git a/doc/pascal/proto.make b/doc/pascal/proto.make deleted file mode 100644 index 1aa820275..000000000 --- a/doc/pascal/proto.make +++ /dev/null @@ -1,28 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/pascal - -PIC = pic - -SRC = \ - $(SRC_DIR)/ab+intro.doc \ - $(SRC_DIR)/internal.doc \ - $(SRC_DIR)/transpem.doc \ - $(SRC_DIR)/conf.doc \ - $(SRC_DIR)/options.doc \ - $(SRC_DIR)/extensions.doc \ - $(SRC_DIR)/deviations.doc \ - $(SRC_DIR)/hints.doc \ - $(SRC_DIR)/test.doc \ - $(SRC_DIR)/compar.doc \ - $(SRC_DIR)/improv.doc \ - $(SRC_DIR)/his.doc \ - $(SRC_DIR)/reference.doc \ - $(SRC_DIR)/syntax.doc \ - $(SRC_DIR)/rtl.doc \ - $(SRC_DIR)/example.doc - -$(TARGET_HOME)/doc/pascal.doc: $(SRC) - $(PIC) $(SRC) > $@ diff --git a/doc/pascal/reference.doc b/doc/pascal/reference.doc deleted file mode 100644 index e99f16da6..000000000 --- a/doc/pascal/reference.doc +++ /dev/null @@ -1,50 +0,0 @@ -.ps 12 -.vs 14 -.NH -References -.sp -.nh -.IP [ISO] 8 -ISO 7185 Specification for Computer Programming Language Pascal, 1982, -Acornsoft ISO-PASCAL, 1984 -.sp -.IP [EM] -A.S. Tanenbaum, H. van Staveren, E.G. Keizer and J.W. Stevenson, -\fIDescription Of A Machine Architecture for use with Block Structured -Languages\fR, Informatica Rapport IR-81, Vrije Universiteit, Amsterdam, 1983 -.sp -.IP [C] -B.W. Kernighan and D.M. Ritchie, \fIThe C Programming Language\fR, -Prentice-Hall, 1978 -.sp -.IP [LL] -C.J.H. Jacobs, \fISome Topics in Parser Generation\fR, Informatica Rapport -IR-105, Vrije Universiteit, Amsterdam, October 1985 -.sp -.IP [IM2] -J.W. Stevenson, \fIPascal-VU Reference Manual and Unix Manual Pages\fR, -Informatica Manual IM-2, Vrije Universiteit, Amsterdam, 1980 -.sp -.IP [JEN] -K. Jensen and N.Wirth, \fIPascal User Manual and Report\fR, -Springer-Verlag, 1978 -.sp -.IP [ACK] -\fIACK Manual Pages\fR: ALLOC, ASSERT, EM_CODE, EM_MES, IDF, INPUT, PRINT, -STRING, SYSTEM -.sp -.IP [AHO] -A.V. Aho, R. Sethi and J.D. Ullman, \fICompiler Principles, Techniques, and -Tools\fR, Addison Wesley, 1985 -.sp -.IP [LEX] -M.E. Lesk, \fILex - A Lexical Analyser Generator\fR, Comp. Sci. Tech. Rep. -No. 39, Bell Laboratories, Murray Hill, New Jersey, October 1975 -.sp -.IP [PCV] -B.A. Wichmann and Z.J. Ciechanowicz, \fIPascal Compiler Validation\fR, John -Wiley & Sons, 1983 -.sp -.IP [SAL] -A.H.J. Sale, \fIA Note on Scope, One-Pass Compilers and Pascal\fR, Australian -Communications, 1, 1, 80-82, 1979 diff --git a/doc/pascal/rtl.doc b/doc/pascal/rtl.doc deleted file mode 100644 index 011375b14..000000000 --- a/doc/pascal/rtl.doc +++ /dev/null @@ -1,85 +0,0 @@ -.sp 1.5i -.ft B -Appendix B: Changes to the run time library -.ft R -.nh -.sp -Some minor changes in the run time library have been made concerning the -external files (i.e. program arguments). The old compiler reserved -space for the file structures of the external files in one \fBhol\fR block. -In the new compiler, every file structure is placed in a separate \fBbss\fR -block. This implies that the arguments with which \fI_ini\fR is called are -slightly different. The second argument was the base of the \fBhol\fR block -to relocate the buffer addresses, it is changed into an integer denoting the -size of the array passed as third argument. The third argument was a pointer -to an array of integers containing the description of external files, this -argument is changed into a pointer to an array of pointers to file structures. - -The differences in the generated EM code for an arbitrary Pascal program are -listed below (only the relevant parts are shown): -.in +5m -.nf -\fBprogram\fR external_files(output,f); -\fBvar\fR - f : \fBfile of \fIsome-type\fR; - . - . -\fBend\fR. -.in -5m - -EM code generated by Pascal-VU: -.in +5m - . - . - hol 1088,-2147483648,0 ; space belonging to file structures of the program arguments - . - . - . -\&.2 - con 3, -1, 544, 0 \h'80u'; description of external files - lxl 0 - lae .2 - lae 0 \h'146u'; base of hol block, to relocate buffer addresses - lxa 0 - cal $_ini - asp 16 - . - . -.in -5m - -EM code generated by our compiler: -.in +5m - . - . -f - bss 540,0,0 \h'100u'; space belonging to file structure of program argument f -output - bss 540,0,0 \h'100u'; space belonging to file structure of standard output - . - . - . -\&.2 - con 0U4, output, f \h'50u'; the absence of standard input is denoted by a null pointer - lxl 0 - lae .2 - loc 3 \h'144u'; denotes the size of the array of pointers to file structures - lxa 0 - cal $_ini - asp 16 - . - . -.in -5m - -.po -The following files in the run time library have been changed: -.in +1m -pc_file.h -hlt.c -ini.c -opn.c -pentry.c -pexit.c -.in -1m -.fi -.bp -.po diff --git a/doc/pascal/syntax.doc b/doc/pascal/syntax.doc deleted file mode 100644 index ba6cfbee7..000000000 --- a/doc/pascal/syntax.doc +++ /dev/null @@ -1,269 +0,0 @@ -.sp 1.5i -.LP -.vs 14 -.nh -.ft B -Appendix A: ISO-PASCAL grammar -.ft R - - -\fBA.1 Lexical tokens\fR - -The syntax describes the formation of lexical tokens from characters and the -separation of these tokens, and therefore does not adhere to the same rules -as the syntax in A.2. - -The lexical tokens used to construct Pascal programs shall be classified into -special-symbols, identifiers, directives, unsigned-numbers, labels and -character-strings. The representation of any letter (upper-case or lower-case, -differences of font, etc) occurring anywhere outside of a character-string -shall be insignificant in that occurrence to the meaning of the program. - -letter = \fBa\fR | \fBb\fR | \fBc\fR | \fBd\fR | \fBe\fR | \fBf\fR | \fBg\fR | \fBh\fR | \fBi\fR | \fBj\fR | \fBk\fR | \fBl\fR | \fBm\fR | \fBn\fR | \fBo\fR | \fBp\fR | \fBq\fR | \fBr\fR | \fBs\fR | \fBt\fR | \fBu\fR | \fBv\fR | \fBw\fR | \fBx\fR | \fBy\fR | \fBz\fR . - -digit = \fB0\fR | \fB1\fR | \fB2\fR | \fB3\fR | \fB4\fR | \fB5\fR | \fB6\fR | \fB7\fR | \fB8\fR | \fB9\fR . - - -The special symbols are tokens having special meanings and shall be used to -delimit the syntactic units of the language. - -special-symbol = \fB+\fR | \fB\-\fR | \fB*\fR | \fB/\fR | \fB=\fR | \fB<\fR | \fB>\fR | \fB[\fR | \fB]\fR | \fB.\fR | \fB,\fR | \fB:\fR | \fB;\fR | \fB^\fR | \fB(\fR | \fB)\fR | \fB<>\fR | \fB<=\fR | \fB>=\fR | \fB:=\fR | \fB..\fR | -\h'\w'special-symbol = 'u'word-symbol . - -word-symbol = \fBand\fR | \fBarray\fR | \fBbegin\fR | \fBcase\fR | \fBconst\fR | \fBdiv\fR | \fBdo\fR | \fBdownto\fR | \fBelse\fR | \fBend\fR | \fBfile\fR | \fBfor\fR | \fBfunction\fR | -\h'\w'word-symbol = 'u'\fBgoto\fR | \fBif\fR | \fBin\fR | \fBlabel\fR | \fBmod\fR | \fBnil\fR | \fBnot\fR | \fBof\fR | \fBor\fR | \fBpacked\fR | \fBprocedure\fR | \fBprogram\fR | \fBrecord\fR | -\h'\w'word-symbol = 'u'\fBrepeat\fR | \fBset\fR | \fBthen\fR | \fBto\fR | \fBtype\fR | \fBuntil\fR | \fBvar\fR | \fBwhile\fR | \fBwith\fR . - - -Identifiers may be of any length. All characters of an identifier shall be -significant. No identifier shall have the same spelling as any word-symbol. - -identifier = letter { letter | digit } . - - -A directive shall only occur in a procedure-declaration or function-declaration. -No directive shall have the same spelling as any word-symbol. - -directive = letter {letter | digit} . - - -Numbers are given in decimal notation. - -.nf -unsigned-integer = digit-sequence . -unsigned-real = unsigned-integer \fB.\fR fractional-part [ \fBe\fR scale-factor ] | unsigned-integer \fBe\fR scale-factor . -digit-sequence = digit {digit} . -fractional-part = digit-sequence . -scale-factor = signed-integer . -signed-integer = [sign] unsigned-integer . -sign = \fB+\fR | \fB\-\fR . -.fi - -.bp -Labels shall be digit-sequences and shall be distinguished by their apparent -integral values and shall be in the closed interval 0 to 9999. - -label = digit-sequence . - - -A character-string containing a single string-element shall denote a value of -the required char-type. Each string-character shall denote an implementation- -defined value of the required char-type. - -.nf -character-string = \fB'\fR string-element { string-element } \fB'\fR . -string-element = apostrophe-image | string-character . -apostrophe-image = \fB''\fR . -string-character = All 7-bits ASCII characters except linefeed (10), vertical tab (11), and new page (12). -.fi - - -The construct: - - \fB{\fR any-sequence-of-characters-and-separations-of-lines- not-containing-right-brace \fB}\fR - -shall be a comment if the "{" does not occur within a character-string or -within a comment. The substitution of a space for a comment shall not alter -the meaning of a program. - -Comments, spaces (except in character-strings), and the separation of -consecutive lines shall be considered to be token separators. Zero or more -token separators may occur between any two consecutive tokens, or before -the first token of a program text. No separators shall occur within tokens. -.bp -.po -\fBA.2 Grammar\fR - -The non-terminal symbol \fIprogram\fR is the start symbol of the grammar. - -.nf -actual-parameter : expression | variable-access | procedure-identifier | function-identifier . -actual-parameter-list : \fB(\fR actual-parameter { \fB,\fR actual-parameter } \fB)\fR . -adding-operator : \fB+\fR | \fB\-\fR | \fBor\fR . -array-type : \fBarray\fR \fB[\fR index-type { \fB,\fR index-type } \fB]\fR \fBof\fR component-type . -array-variable : variable-access . -assignment-statement : ( variable-access | function-identifier ) \fB:=\fR expression . - -base-type : ordinal-type . -block : label-declaration-part constant-definition-part type-definition-part variable-declaration-part -\h'\w'block : 'u'procedure-and-function-declaration-part statement-part . -Boolean-expression : expression . -bound-identifier : identifier . -buffer-variable : file-variable \fB^\fR . - -case-constant : constant . -case-constant-list : case-constant { \fB,\fR case-constant } . -case-index : expression . -case-list-element : case-constant-list \fB:\fR statement . -case-statement : \fBcase\fR case-index \fBof\fR case-list-element { \fB;\fR case-list-element } [ \fB;\fR ] \fBend\fR . -component-type : type-denoter . -component-variable : indexed-variable | field-designator . -compound-statement : \fBbegin\fR statement-sequence \fBend\fR . -conditional-statement : if-statement | case-statement . -conformant-array-parameter-specification : value-conformant-array-specification | -\h'+18.5m'variable-conformant-array-specification . -conformant-array-schema : packed-conformant-array-schema | unpacked-conformant-array-schema . -constant : [ sign ] ( unsigned-number | constant-identifier ) | character-string . -constant-definition : identifier \fB=\fR constant . -constant-definition-part : [ \fBconst\fR constant-definition \fB;\fR { constant-definition \fB;\fR } ] . -constant-identifier : identifier . -control-variable : entire-variable . - -domain-type : type-identifier . - -else-part : \fBelse\fR statement . -empty-statement : . -entire-variable : variable-identifier . -enumerated-type : \fB(\fR identifier-list \fB)\fR . -expression : simple-expression [ relational-operator simple-expression ] . -.bp -.po -factor : variable-access | unsigned-constant | bound-identifier | function-designator | set-constructor | -\h'\w'factor : 'u'\fB(\fR expression \fB)\fR | \fBnot\fR factor . -field-designator : record-variable \fB.\fR field-specifier | field-designator-identifier . -field-designator-identifier : identifier . -field-identifier : identifier . -field-list : [ ( fixed-part [ \fB;\fR variant-part ] | variant-part ) [ \fB;\fR ] ] . -field-specifier : field-identifier . -file-type : \fBfile\fR \fBof\fR component-type . -file-variable : variable-access . -final-value : expression . -fixed-part : record-section { \fB;\fR record-section } . -for-statement : \fBfor\fR control-variable \fB:=\fR initial-value ( \fBto\fR | \fBdownto\fR ) final-value \fBdo\fR statement . -formal-parameter-list : \fB(\fR formal-parameter-section { \fB;\fR formal-parameter-section } \fB)\fR . -formal-parameter-section : value-parameter-specification | variable-parameter-specification | -\h'\w'formal-parameter-section : 'u'procedural-parameter-specification | functional-parameter-specification | -\h'\w'formal-parameter-section : 'u'conformant-array-parameter-specification . -function-block : block . -function-declaration : function-heading \fB;\fR directive | function-identification \fB;\fR function-block | -\h'\w'function-declaration : 'u'function-heading \fB;\fR function-block . -function-designator : function-identifier [ actual-parameter-list ] . -function-heading : \fBfunction\fR identifier [ formal-parameter-list ] \fB:\fR result-type . -function-identification : \fBfunction\fR function-identifier . -function-identifier : identifier . -functional-parameter-specification : function-heading . - -goto-statement : \fBgoto\fR label . - -identified-variable : pointer-variable \fB^\fR . -identifier-list : identifier { \fB,\fR identifier } . -if-statement : \fBif\fR Boolean-expression \fBthen\fR statement [ else-part ] . -index-expression : expression . -index-type : ordinal-type . -index-type-specification : identifier \fB..\fR identifier \fB:\fR ordinal-type-identifier . -indexed-variable : array-variable \fB[\fR index-expression { \fB,\fR index-expression } \fB]\fR . -initial-value : expression . - -label : digit-sequence . -label-declaration-part : [ \fBlabel\fR label { \fB,\fR label } \fB;\fR ] . - -member-designator : expression [ \fB..\fR expression ] . -multiplying-operator : \fB*\fR | \fB/\fR | \fBdiv\fR | \fBmod\fR | \fBand\fR . -.bp -.po -new-ordinal-type : enumerated-type | subrange-type . -new-pointer-type : \fB^\fR domain-type . -new-structured-type : [ \fBpacked\fR ] unpacked-structured-type . -new-type : new-ordinal-type | new-structured-type | new-pointer-type . - -ordinal-type : new-ordinal-type | ordinal-type-identifier . -ordinal-type-identifier : type-identifier . - -packed-conformant-array-schema : \fBpacked\fR \fBarray\fR \fB[\fR index-type-specification \fB]\fR \fBof\fR type-identifier . -pointer-type-identifier : type-identifier . -pointer-variable : variable-access . -procedural-parameter-specification : procedure-heading . -procedure-and-function-declaration-part : { ( procedure-declaration | function-declaration ) \fB;\fR } . -procedure-block : block . -procedure-declaration : procedure-heading \fB;\fR directive | procedure-identification \fB;\fR procedure-block | -\h'\w'procedure-declaration : 'u'procedure-heading \fB;\fR procedure-block . -procedure-heading : \fBprocedure\fR identifier [ formal-parameter-list ] . -procedure-identification : \fBprocedure \fR procedure-identifier . -procedure-identifier : identifier . -procedure-statement : procedure-identifier ( [ actual-parameter-list ] | read-parameter-list | readln-parameter-list | -\h'\w'procedure-statement : procedure-identifier ( ['u'write-parameter-list | writeln-parameter-list ) . -program : program-heading \fB;\fR program-block \fB.\fR . -program-block : block . -program-heading : \fBprogram\fR identifier [ \fB(\fR program-parameters \fB)\fR ] . -program-parameters : identifier-list . - -read-parameter-list : \fB(\fR [ file-variable \fB,\fR ] variable-access { \fB,\fR variable-access } \fB)\fR . -readln-parameter-list : [ \fB(\fR ( file-variable | variable-access ) { \fB,\fR variable-access } \fB)\fR ] . -record-section : identifier-list \fB:\fR type-denoter . -record-type : \fBrecord\fR field-list \fBend\fR . -record-variable : variable-access . -record-variable-list : record-variable { \fB,\fR record-variable } . -relational-operator : \fB=\fR | \fB<>\fR | \fB<\fR | \fB>\fR | \fB<=\fR | \fB>=\fR | \fBin\fR . -repeat-statement : \fBrepeat\fR statement-sequence \fBuntil\fR Boolean-expression . -repetitive-statement : repeat-statement | while-statement | for-statement . -result-type : simple-type-identifier | pointer-type-identifier . - -set-constructor : \fB[\fR [ member-designator { \fB,\fR member-designator } ] \fB]\fR . -set-type : \fBset\fR \fBof\fR base-type . -sign : \fB+\fR | \fB\-\fR . -simple-expression : [ sign ] term { adding-operator term } . -simple-statement : empty-statement | assignment-statement | procedure-statement | goto-statement . -simple-type-identifier : type-identifier . -.bp -.po -statement : [ label \fB:\fR ] ( simple-statement | structured-statement ) . -statement-part : compound-statement . -statement-sequence : statement { \fB;\fR statement } . -structured-statement : compound-statement | conditional-statement | repetitive-statement | with-statement . -subrange-type : constant \fB..\fR constant . - -tag-field : identifier . -tag-type : ordinal-type-identifier . -term : factor { multiplying-operator factor } . -type-definition : identifier \fB=\fR type-denoter . -type-definition-part : [ \fBtype\fR type-definition \fB;\fR { type-definition \fB;\fR } ] . -type-denoter : type-identifier | new-type . -type-identifier : identifier . - -unpacked-conformant-array-schema : \fBarray\fR \fB[\fR index-type-specification { \fB;\fR index-type-specification } \fB]\fR \fBof\fR -\h'\w'unpacked-conformant-array-schema : 'u'( type-identifier | conformant-array-schema ) . -unpacked-structured-type : array-type | record-type | set-type | file-type . -unsigned-constant : unsigned-number | character-string | constant-identifier | \fBnil\fR . -unsigned-number : unsigned-integer | unsigned-real . - -value-conformant-array-specification : identifier-list \fB:\fR conformant-array-schema . -value-parameter-specification : identifier-list \fB:\fR type-identifier . -variable-access : entire-variable | component-variable | identified-variable | buffer-variable . -variable-conformant-array-specification : \fBvar\fR identifier-list \fB:\fR conformant-array-schema . -variable-declaration : identifier-list \fB:\fR type-denoter . -variable-declaration-part : [ \fBvar\fR variable-declaration \fB;\fR { variable-declaration \fB;\fR } ] . -variable-identifier : identifier . -variable-parameter-specification : \fBvar\fR identifier-list \fB:\fR type-identifier . -variant : case-constant-list \fB:\fR \fB(\fR field-list \fB)\fR . -variant-part : \fBcase\fR variant-selector \fBof\fR variant { \fB;\fR variant } . -variant-selector : [ tag-field \fB:\fR ] tag-type . - -while-statement : \fBwhile\fR Boolean-expression \fBdo\fR statement . -with-statement : \fBwith\fR record-variable-list \fBdo\fR statement . -write-parameter : expression [ \fB:\fR expression [ \fB:\fR expression ] ] . -write-parameter-list : \fB(\fR [ file-variable \fB,\fR ] write-parameter { \fB,\fR write-parameter } \fB)\fR . -writeln-parameter-list : [ \fB(\fR ( file-variable | write-parameter ) { \fB,\fR write-parameter } \fB)\fR ] . -.fi -.vs -.bp -.po diff --git a/doc/pascal/test.doc b/doc/pascal/test.doc deleted file mode 100644 index 60220a0e9..000000000 --- a/doc/pascal/test.doc +++ /dev/null @@ -1,19 +0,0 @@ -.sp 2 -.NH -Testing the compiler -.nh -.sp -.LP -Although it is practically impossible to prove the correctness of a compiler, -a systematic method of testing the compiler is used to increase the confidence -that it will work satisfactorily in practice. The first step was to see if -the lexical analysis was performed correctly. For this purpose, the routine -LexScan() was used (see also the \-l option). Next we tested the parser -generated by LLgen, to see whether correct Pascal programs were accepted and -garbage was dealed with gracefully. The biggest test involved was the -validation of the semantic analysis. Simultaneously we tested the code -generation. First some small Pascal test programs were translated and -executed. When these programs work correctly, the Pascal validation suite -and a large set of Pascal test programs were compiled to see whether they -behaved in the manner the standard specifies. For more details about the -Pascal validation suite, the reader is referred to [PCV]. diff --git a/doc/pascal/titlepg.doc b/doc/pascal/titlepg.doc deleted file mode 100644 index af074c0f9..000000000 --- a/doc/pascal/titlepg.doc +++ /dev/null @@ -1,13 +0,0 @@ -\v'3i' -.ps 36 -The ACK Pascal Compiler -.ps 12 -.sp 30 -.ce 5 -.ft I -There is always something like something that there should not be. -.sp 2 -.ps 10 -For Whom The Bell Tolls -.ft R -Ernest Hemingway diff --git a/doc/pascal/transpem.doc b/doc/pascal/transpem.doc deleted file mode 100644 index ede79369a..000000000 --- a/doc/pascal/transpem.doc +++ /dev/null @@ -1,407 +0,0 @@ -.sp 1.5i -.de CL -.ft R -c\\$1 -.ft 5 - \fIcode statement-\\$1 -.ft 5 - \fBbra *\fRexit_label -.ft 5 -.. -.NH -Translation of Pascal to EM code -.nh -.LP -.sp -A short description of the translation of Pascal constructs to EM code is -given in the following paragraphs. The EM instructions and Pascal terminal -symbols are printed in \fBboldface\fR. A sentence in \fIitalics\fR is a -description of a group of EM (pseudo)instructions. -.sp -.NH 2 -Global Variables -.LP -.sp -For every global variable, a \fBbss\fR block is reserved. To enhance the -readability of the EM-code generated, the variable-identifier is used as -a data label to address the block. -.sp -.NH 2 -Expressions -.LP -.sp -Operands are always evaluated, so the execution of -.br -.ti +3m -\fBif\fR ( p <> nil ) \fBand\fR ( p^.value <> 0 ) \fBthen\fR ..... -.br -might cause a run-time error, if p is equal to nil. -.LP -The left-hand operand of a dyadic operator is almost always evaluated before -the right-hand side. Peculiar evaluations exist for the following cases: -.sp -the expression: set1 <= set2, is evaluated as follows : -.nf -- evaluate set2 -- evaluate set1 -- compute set2+set1 -- test set2 and set2+set1 for equality -.fi -.sp -the expression: set1 >= set2, is evaluated as follows : -.nf -- evaluate set1 -- evaluate set2 -- compute set1+set2 -- test set1 and set1+set2 for equality -.fi -.sp -Where allowed, according to the standard, constant integral expressions are -compile-time evaluated while an effort is made to report overflow on target -machine basis. The integral expressions are evaluated in the type \fIarith\fR. -The size of an arith is assumed to be at least the size of the integer type -on the target machine. If the target machine's integer size is less than the -size of an arith, overflow can be detected at compile-time. However, the -following call to the standard procedure new, \fInew(p, 3+5)\fR, is illegal, -because the second parameter is not a constant according to the grammar. -.sp -Constant floating expressions are not compile-time evaluated, because the -precision on the target machine and the precision on the machine on which the -compiler runs could be different. The boolean expression \fI(1.0 + 1.0) = 2.0\fR -could evaluate to false. -.sp -.NH 2 -Statements -.NH 3 -Assignment Statement - -\fRPASCAL : -.ti +3m -\f5(variable-access | function-identifier) \fB:=\f5 expression - -\fREM : -.nf -.in +3m -.ft I -evaluate expression -store in variable-access or function-identifier -.ft R -.in -3m -.fi - -In case of a function-identifier, a hidden temporary variable is used to -keep the function result. -.bp -.NH 3 -Goto Statement - -\fRPASCAL : -.ti +3m -\fBGOTO\f5 label - -\fREM : -.in +3m -Two cases can be distinguished : -.br -- local goto, -.ti +2m -in which a \fBbra\fR is generated. - -- non-local goto, -.in +2m -.ll -1i -a goto_descriptor is build, containing the ProgramCounter of the instruction -jumped to and an offset in the target procedure frame which contains the -value of the StackPointer after the jump. The code for the jump itself is to -load the address of the goto_descriptor, followed by a push of the LocalBase -of the target procedure and a \fBcal\fR $_gto. A message is generated to -indicate that a procedure or function contains a statement which is the -target of a non-local goto. -.ll +1i -.in -2m -.in -3m -.sp 2 -.NH 3 -If Statement - -\fRPASCAL : -.in +3m -.ft 5 -\fBIF\f5 boolean-expression \fBTHEN\f5 statement - -.in -3m -\fREM : -.nf -.in +3m - \fIevaluation boolean-expression - \fBzeq \fR*exit_label - \fIcode statement -\fRexit_label -.in -3m -.fi -.sp 2 -\fRPASCAL : -.in +3m -.ft 5 -\fBIF\f5 boolean-expression \fBTHEN\f5 statement-1 \fBELSE\f5 statement-2 - -.in -3m -\fREM : -.nf -.in +3m - \fIevaluation boolean-expression - \fBzeq \fR*else_label - \fIcode statement-1 - \fBbra \fR*exit_label -\fRelse_label - \fIcode statement-2 -\fRexit_label -.in -3m -.fi -.sp 2 -.NH 3 -Repeat Statement - -\fRPASCAL : -.in +3m -.ft 5 -\fBREPEAT\f5 statement-sequence \fBUNTIL\f5 boolean-expression - -.in -3m -\fREM : -.nf -.in +3m -\fRrepeat_label - \fIcode statement-sequence - \fIevaluation boolean-expression - \fBzeq\fR *repeat_label -.in -3m -.fi -.bp -.NH 3 -While Statement - -\fRPASCAL : -.in +3m -.ft 5 -\fBWHILE\f5 boolean-expression \fBDO\f5 statement - -.in -3m -\fREM : -.nf -.in +3m -\fRwhile_label - \fIevaluation boolean-expression - \fBzeq\fR *exit_label - \fIcode statement - \fBbra\fR *while_label -\fRexit_label -.in -3m -.fi -.sp 2 -.NH 3 -Case Statement -.LP -.sp -The case-statement is implemented using the \fBcsa\fR and \fBcsb\fR -instructions. - -\fRPASCAL : -.in +3m -\fBCASE\f5 case-expression \fBOF\f5 -.in +5m -case-constant-list-1 \fB:\f5 statement-1 \fB;\f5 -.br -case-constant-list-2 \fB:\f5 statement-2 \fB;\f5 -.br -\&. -.br -\&. -.br -case-constant-list-n \fB:\f5 statement-n [\fB;\f5] -.in -5m -\fBEND\fR -.in -3m -.sp 2 -.LP -.ll -1i -The \fBcsa\fR instruction is used if the range of the case-expression -value is dense, i.e. -.br -.ti +3m -\f5( upperbound \- lowerbound ) / number_of_cases\fR -.br -is less than the constant DENSITY, defined in the file \fIdensity.h\fR. - -If the range is sparse, a \fBcsb\fR instruction is used. - -.ll +1i -\fREM : -.nf -.in +3m - \fIevaluation case-expression - \fBbra\fR *l1 -.CL 1 -.CL 2 - . - . -.CL n -.ft R -\&.case_descriptor -.ft 5 - \fIgeneration case_descriptor -\fRl1 -.ft 5 - \fBlae\fR .case_descriptor -.ft 5 - \fBcsa\fR size of (case-expression) -\fRexit_label -.in -3m -.fi -.bp -.NH 3 -For Statement - -\fRPASCAL : -.in +3m -.ft 5 -\fBFOR\f5 control-variable \fB:=\f5 initial-value (\fBTO\f5 | \fBDOWNTO\f5) final-value \fBDO\f5 statement - -.ft R -.in -3m -The initial-value and final-value are evaluated at the beginning of the loop. -If the values are not constant, they are evaluated once and stored in a -temporary. - -EM : -.nf -.in +3m - \fIload initial-value - \fIload final-value - \fBbgt\fR exit-label (* DOWNTO : \fBblt\fI exit-label\fR *) - \fIload initial-value -\fRl1 - \fIstore in control-variable - \fIcode statement - \fIload control-variable - \fBdup\fI control-variable - \fIload final-value - \fBbeq\fR exit_label - \fBinc\fI control-variable\fR (* DOWNTO : \fBdec\fI control-variable\fR *) - \fBbra *\fRl1 -\fRexit_label -.in -3m -.fi - -Note: testing must be done before incrementing(decrementing) the -control-variable, -.br -\h'\w'Note: 'u'because wraparound could occur, which could lead to an infinite -loop. -.sp 2 -.NH 3 -With Statement - -\fRPASCAL : -.ti +3m -\fBWITH\f5 record-variable-list \fBDO\f5 statement - -.ft R -The statement -.ti +3m -\fBWITH\fR r\s-3\d1\u\s0, r\s-3\d2\u\s0, ..., r\s-3\dn\u\s0 \fBDO\f5 statement - -.ft R -is equivalent to -.in +3m -\fBWITH\fR r\s-3\d1\u\s0 \fBDO\fR - \fBWITH\fR r\s-3\d2\u\s0 \fBDO\fR - ... - \fBWITH\fR r\s-3\dn\u\s0 \fBDO\f5 statement - -.ft R -.in -3m -The translation of -.ti +3m -\fBWITH\fR r\s-3\d1\u\s0 \fBDO\f5 statement -.br -.ft R -is -.nf -.in +3m -\fIpush address of r\s-3\d1\u\s0 -\fIstore address in temporary -\fIcode statement -.in -3m -.fi - -.ft R -An occurrence of a field is translated into: -.in +3m -\fIload temporary -.br -\fIadd field-offset -.in -3m -.bp -.NH 2 -Procedure and Function Calls - -.ft R -In general, the call -.ti +5m -p(a\s-3\d1\u\s0, a\s-3\d2\u\s0, ...., a\s-3\dn\u\s0) -.br -is translated into the sequence: - -.in +5m -.nf -\fIevaluate a\s-3\dn\u\s0 -\&. -\&. -\fIevaluate a\s-3\d2\u\s0 -\fIevaluate a\s-3\d1\u\s0 -\fIpush localbase -\fBcal\fR $p -\fIpop parameters -.ft R -.fi -.in -5m - -i.e. the order of evaluation and binding of the actual-parameters is from -right to left. In general, a copy of the actual-parameter is made when the -formal-parameter is a value-parameter. If the formal-parameter is a -variable-parameter, a pointer to the actual-parameter is pushed. - -In case of a function call, a \fBlfr\fR is generated, which pushes the -function result on top of the stack. -.sp 2 -.NH 2 -Register Messages - -.ft R -A register message can be generated to indicate that a local variable is never -referenced indirectly. This implies that a register can be used for a variable. -We distinguish the following classes, given in decreasing priority: - -\(bu control-variable and final-value of a for-statement -.br -.ti +5m -to speed up testing, and execution of the body of the for-statement -.sp -\(bu record-variable of a with-statement -.br -.ti +5m -to improve the field selection of a record -.sp -\(bu remaining local variables and parameters -.sp 2 -.NH 2 -Compile-time optimizations - -.ft R -The only optimization that is performed is the evaluation of constant -integral expressions. The optimization of constructs like -.ti +5m -\fBif\f5 false \fBthen\f5 statement\fR, -.br -is left to either the peephole optimizer, or a global optimizer. diff --git a/doc/pascal/vrk.doc b/doc/pascal/vrk.doc deleted file mode 100644 index c5622a5d7..000000000 --- a/doc/pascal/vrk.doc +++ /dev/null @@ -1,23 +0,0 @@ -.TL - - - -The ACK Pascal Compiler -.AU -Aad Geudeke -Frans Hofmeester -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.LP -.ps 12 -.sp 24 -.ce 5 -.ft I -There is always something like something that there should not be. -.sp 2 -.ps 10 -For Whom The Bell Tolls -.ft R -Ernest Hemingway diff --git a/doc/pcref.doc b/doc/pcref.doc deleted file mode 100644 index f19bfc72f..000000000 --- a/doc/pcref.doc +++ /dev/null @@ -1,1204 +0,0 @@ -.\" $Id$ -.\" tbl pcref.doc | troff -.ds OF \\fBtest~off:~\\fR -.ds ON \\fBtest~on:~~\\fR -.ds AL \\fBtest~all:~\\fR -.ll 72n -.wh 0 hd -.wh 60 fo -.de hd -'sp 5 -.. -.de fo -'bp -.. -.tr ~ -. TITLE -.de TL -.sp 10 -.ce -\\fB\\$1\\fR -.sp 10 -.. -. AUTHOR -.de AU -.ce -by -.sp 2 -.ce -\\$1 -.. -. OTHER AUTHOR -.de OA -.sp 2 -.ce -(revised) -.sp 2 -.ce -\\$1 -.. -. DATE -.de DA -.sp 1 -.ce -( \\$1 ) -.. -. INSTITUTE -.de VU -.sp 3 -.ce 4 -Vakgroep Informatica -Vrije Universiteit -De Boelelaan 1081 -Amsterdam -.. -. PARAGRAPH -.de PP -.sp -.ti +5 -.. -.nr CH 0 1 -. CHAPTER -.de CH -.nr SH 0 1 -.bp -.in 0 -\\fB\\n+(CH.~\\$1\\fR -.PP -.. -. SUBCHAPTER -.de SH -.sp 3 -.in 0 -\\fB\\n(CH.\\n+(SH.~\\$1\\fR -.PP -.. -. INDENT START -.de IS -.sp -.in +5 -.. -. INDENT END -.de IE -.in -5 -.sp -.. -. DOUBLE INDENT START -.de DS -.sp -.in +5 -.ll -5 -.. -. DOUBLE INDENT END -.de DE -.ll +5 -.in -5 -.sp -.. -. EQUATION START -.de EQ -.sp -.nf -.. -. EQUATION END -.de EN -.fi -.sp -.. -. ITEM -.de IT -.sp -.in 0 -\\fBBS~\\$1:\\fR~\\ -.. -.de CS -.br -~-~\\ -.. -.br -.fi -.TL "Amsterdam Compiler Kit-Pascal reference manual" -.AU "Johan W. Stevenson" -.DA "January 4, 1983" -.OA "Hans van Eck" -.DA "May 1, 1989" -.VU -.CH "Introduction" -This document refers to the (1982) BSI standard for Pascal [1]. -Ack-Pascal complies with the requirements of level 1 of BS 6192: 1982, with -the exceptions as listed in this document. -.PP -The standard requires an accompanying document describing the -implementation-defined and implementation-dependent features, -the reaction on errors and the extensions to standard Pascal. -These four items will be treated in the rest of this document, -each in a separate chapter. -The other chapters describe the deviations from the standard and -the list of options recognized by the compiler. -.PP -The Ack-Pascal compiler produces code for an EM machine as defined in [2]. -It is up to the implementor of the EM machine to decide whether errors like -integer overflow, undefined operand and range bound error are recognized or not. -.PP -There does not (yet) exist a hardware EM machine. -Therefore, EM programs must be interpreted, or translated into -instructions for a target machine. -The Ack-Pascal compiler is currently available for use with the VAX, -Motorola MC68020, Motorola MC68000, -PDP-11, and Intel 8086 code-generators. -For the 8086, MC68000, and MC68020, -floating point emulation is used. This is made available with the \fI-fp\fP -option, which must be passed to \fIack\fP[3]. -.IE -.CH "Implementation-defined features" -For each implementation-defined feature mentioned in the BSI standard -we give the section number, the quotation from that section and the definition. -First we quote the definition of implementation-defined: -.DS -Possibly differing between processors, but defined for any particular -processor. -.DE -.IT 6.1.7 -Each string-character shall denote an implementation-defined value of the -required char-type. -.IS -All 7-bits ASCII characters except linefeed LF (10) are allowed. -.IE -.IT 6.4.2.2 -The values of type real shall be an implementation-defined subset -of the real numbers denoted as specified by 6.1.5 bu signed real. -.IS -The format of reals is not defined in EM. -Even the size of reals depends on the EM-implementation. -The compiler can be instructed, by the V-option, to use a different -size for real values. -The size of reals is preset by the calling program \fIack\fP -[3] to the proper size. -.IE -.IT 6.4.2.2 -The type char shall be the enumeration of a set of implementation-defined -characters, some possibly without graphic representations. -.IS -The 7-bits ASCII character set is used, where LF (10) denotes the -end-of-line marker on text-files. -.IT 6.4.2.2 -The ordinal numbers of the character values shall be values of integer-type, -that are implementation-defined, and that are determined by mapping -the character values on to consecutive non-negative integer values -starting at zero. -.IS -The normal ASCII ordering is used: ord('0')=48, ord('A')=65, ord('a')=97, etc. -.IE -.IT 6.6.5.2 -The post-assertions imply corresponding activities on the external entities, -if any, to which the file-variables are bound. These activities, and the -point at which they are actually performed, shall be -implementation-defined. -.IS -The reading and writing writing of objects on files is buffered. -This means that when a program terminates abnormally, IO may be -unfinished. Terminal IO is unbuffered. -Files are closed whenever they are rewritten or reset, or on -program termination. -.IT 6.7.2.2 -The predefined constant maxint shall be of integer-type and shall denote -an implementation-defined value, that satisfies the following conditions: -.sp 1 -.in +5 -.ti -4 -(a)~All integral values in the closed interval from -maxint to +maxint -shall be values of the integer-type. -.ti -4 -(b)~Any monadic operation performed on an integer value in this interval -shall be correctly performed according to the mathematical rules for -integer arithmetic. -.ti -4 -(c)~Any dyadic integer operation on two integer values in this same interval -shall be correctly performed according to the mathematical rules for -integer arithmetic, provided that the result is also in this interval. -.ti -4 -(d)~Any relational operation on two integer values in this same interval -shall be correctly performed according to the mathematical rules for -integer arithmetic. -.in -5 -.IS -The representation of integers in EM is a \fIn\fP*8-bit word using -two's complement arithmetic. -Where \fIn\fP is called wordsize. -The range of available integers depends on the EM implementation: -For 2-byte machines, the integers range from -32767 to +32767. For 4-byte -machines, the integers range from -2147483647 to 2147483647. -The number -maxint-1 may be used to indicate 'undefined'. -.IE -.IT 6.7.2.2 -The result of the real arithmetic operators and functions shall be -approximations to the corresponding mathematical results. The accuracy of -this approximation shall be implementation-defined -.IS -Since EM doesn't specify floating point format, it is not possible to -specify the accuracy. When the floating point emulation is used, and the -default size of reals is 8 bytes, the accuracy is 11 bits for the exponent, -and 53 bits for the mantissa. This gives an accuracy of about 16 digits, -and exponents ranging from -309 to +307. -.IE -.IT 6.9.3.1 -The default TotalWidth values for integer, Boolean and real types -shall be implementation-defined. -.IS -The defaults are: - integer 6 for 2-byte machines, 11 for 4-byte machines - Boolean 5 - real 14 -.IT 6.9.3.4.1 -ExpDigits, the number of digits written in an exponent part of a real, -shall be implementation-defined. -.IS -ExpDigits is defined as 3. This is sufficient for all implementations -currently available. When the representation would need more than 3 -digits, then the string '***' replaces the exponent. -.IT 6.9.3.4.1 -The character written as part of the representation of -a real to indicate the beginning of the exponent part shall be -implementation-defined, either 'E' or 'e'. -.IS -The exponent part starts with 'e'. -.IT 6.9.3.5 -The case of the characters written as representation of the -Boolean values shall be implementation-defined. -.IS -The representations of true and false are 'true' and 'false'. -.IT 6.9.5 -The effect caused by the standard procedure page -on a text file shall be implementation-defined. -.IS -The ASCII character form feed FF (12) is written. -.IT 6.10 -The binding of the variables denoted by the program-parameters -to entities external to the program shall be implementation-defined if -the variable is of a file-type. -.IS -The program parameters must be files and all, except input and output, -must be declared as such in the program block. -.PP -The program parameters input and output, if specified, will correspond -with the UNIX streams 'standard input' and 'standard output'. -.PP -The other program parameters will be mapped to the argument strings -provided by the caller of this program. -The argument strings are supposed to be path names of the files to be -opened or created. -The order of the program parameters determines the mapping: -the first parameter is mapped onto the first argument string etc. -Note that input and output are ignored in this mapping. -.PP -The mapping is recalculated each time a program parameter -is opened for reading or writing by a call to the standard procedures -reset or rewrite. -This gives the programmer the opportunity to manipulate the list -of string arguments using the external procedures argc, argv and argshift -available in libpc [6]. -.IT 6.10 -The effect of an explicit use of reset or rewrite -on the standard textfiles input or output shall be implementation-defined. -.IS -The procedures reset and rewrite are no-ops -if applied to input or output. -.CH "Implementation-dependent features" -For each implementation-dependent feature mentioned in the BSI standard, -we give the section number, the quotation from that section and the way -this feature is treated by the Ack-Pascal system. -First we quote the definition of 'implementation-dependent': -.DS -Possibly differing between processors and not necessarily defined for any -particular processor. -.DE -.IT 6.7.2.1 -The order of evaluation of the operands of a dyadic operator -shall be implementation-dependent. -.IS -Operands are always evaluated, so the program part -.EQ - if (p<>nil) and (p^.value<>0) then -.EN -is probably incorrect. -.PP -The left-hand operand of a dyadic operator is almost always evaluated -before the right-hand side. -Some peculiar evaluations exist for the following cases: -.IS -.ti -3 -1.~\ -the modulo operation is performed by a library routine to -check for negative values of the right operand. -.sp -.ti -3 -2.~\ -the expression -.EQ - set1 <= set2 -.EN -where set1 and set2 are compatible set types is evaluated in the -following steps: -.IS -.CS -evaluate set2 -.CS -evaluate set1 -.CS -compute set2+set1 -.CS -test set2 and set2+set1 for equality -.IE -.sp -.ti -3 -3.~\ -the expression -.EQ - set1 >= set2 -.EN -where set1 and set2 are compatible set types is evaluated in the following steps: -.IS -.CS -evaluate set1 -.CS -evaluate set2 -.CS -compute set1+set2 -.CS -test set1 and set1+set2 for equality -.IE -.IE -.IT 6.7.3 -The order of evaluation, accessing and binding -of the actual-parameters for functions -shall be implementation-dependent. -.IS -The order of evaluation is from right to left. -.IT 6.8.2.2 -The decision as to the order of accessing the variable and evaluating -the expression in an assignment-statement, shall be -implementation-dependent. -.IS -The expression is evaluated first. -.IT 6.8.2.3 -The order of evaluation and binding of the actual-parameters for procedures -shall be implementation-dependent. -.IS -The same as for functions. -.IT 6.9.5 -The effect of inspecting a text file to which the page -procedure was applied during generation is -implementation-dependent. -.IS -The formfeed character written by page is -treated like a normal character, with ordinal value 12. -.IT 6.10 -The binding of the variables denoted by the program-parameters -to entities external to the program shall be implementation-dependent unless -the variable is of a file-type. -.IS -Only variables of a file-type are allowed as program parameters. -.IE -.CH "Error handling" -There are three classes of errors to be distinguished. -In the first class are the error messages generated by the compiler. -The second class consists of the occasional errors generated by the other -programs involved in the compilation process. -Errors of the third class are the errors as defined in the standard by: -.DS -An error is a violation by a program of the requirements of this standard -that a processor is permitted to leave undetected. -.DE -.SH "Compiler errors" -Error are written on the standard error output. Each line has the form: -.br -, line : -.br -Every time the compiler detects an error that does not have influence -on the code produced by the compiler or on the syntax decisions, a warning -messages is given. -If only warnings are generated, compilation proceeds and probably results -in a correctly compiled program. -.PP -Sometimes the compiler produces several errors for the same line. They are -only shown up to a maximum of 5 errors per line. Warning are also shown up -to a maximum of 5 per line. -.PP -Extensive treatment of these errors is outside the scope of this manual. -.SH "Runtime errors" -Errors detected at run time cause an error message to be generated on the -diagnostic output stream (UNIX file descriptor 2). -The message consists of the name of the program followed by a message -describing the error, possibly followed by the source line number. -Unless the -L-option is turned on, the compiler generates code to keep track -of which source line causes which EM instructions to be generated. -It depends on the EM implementation whether these LIN instructions -are skipped or executed. -.PP -For each error mentioned in the standard we give the section number, -the quotation from that section and the way it is processed by the -Pascal-compiler or runtime system. -.PP -For detected errors the corresponding message -and trap number are given. -Trap numbers are useful for exception-handling routines. -Normally, each error causes the program to terminate. -By using exception-handling routines one can -ignore errors or perform alternate actions. -Only some of the errors can be ignored -by restarting the failing instruction. -These errors are marked as non-fatal, -all others as fatal. -A list of errors with trap number between 0 and 63 -(EM errors) can be found in [2]. -Errors with trap number between 64 and 127 (Pascal errors) are listed in [7]. -.IT 6.4.6 -It shall be an error if a value of type T2 must be -assignment-compatible with type T1, while -T1 and T2 are compatible ordinal-types and the value of -type T2 is not in the closed interval specified by T1. -.IS -The compiler distinguishes between array-index expressions and the other -places where assignment-compatibility is required. -.PP -Array subscripting is done using the EM array instructions. -These instructions have three arguments: the array base address, -the index and the address of the array descriptor. -An array descriptor describes one dimension by three values: -the lower bound on the index, the number of elements minus one and the -element-size. -It depends on the EM implementation whether these bounds are checked. Since -most implementations don't, an extra compiler flag is added to force these -checks. -.br -The other places where assignment-compatibility is required are: -.IS -.CS -assignment -.CS -value parameters -.CS -procedures read and readln -.CS -the final value of the for-statement -.IE -For these places the compiler generates an EM range check instruction, except -when the R-option is turned on, or when the range of values of T2 -is enclosed in the range of T1. -If the expression consists of a single variable and if that variable -is of a subrange type, -then the subrange type itself is taken as T2, not its host-type. -Therefore, a range instruction is only generated if T1 is a subrange type -and if the expression is a constant, an expression with two or more -operands, or a single variable with a type not enclosed in T1. -If a constant is assigned, then the EM optimizer removes the range check -instruction, except when the value is out of bounds. -.PP -It depends on the EM implementation whether the range check instruction -is executed or skipped. -.IT 6.4.6 -It shall be an error if a value of type T2 must be -assignment-compatible with type T1, while T1 and T2 are compatible -set-types and any member of the value of type T2 -is not in the closed interval specified by the base-type -of the type T1. -.IS -This error is not detected. -.IT 6.5.3.3 -It shall be an error if a component of a variant-part of a variant, -where the selector of the variant-part is not a field, -is accessed unless the variant is active for the entirety of each -reference and access to each component of the variant. -.IS -This error is not detected. -.IT 6.5.4 -It shall be an error if -the pointer-variable of an identified-variable either denotes a -nil-value or is undefined. -.IS -The EM definition does not specify the binary representation of pointer -values, so that it is not possible to choose an otherwise illegal -binary representation for the pointer value NIL. -Rather arbitrary the compiler uses the integer value zero to represent NIL. -For all current implementations this does not cause problems. -.PP -The size of pointers depends on the implementation and is -preset in the compiler by \fIack\fP [3]. -The compiler can be instructed, by the V-option, to use -another size for pointer objects. -NIL is represented here by the appropriate number of zero words. -.PP -It depends on the EM implementation whether de-referencing of a pointer -with value NIL causes an error. -.IE -.IT 6.5.4 -It shall be an error to remove the identifying-value of an identified -variable from its pointer-type when a reference to the variable exists. -.IS -When the identified variable is an element of the record-variable-list of -a with_statement, a warning is given at compile-time. Otherwise, this error -is not detected. -.IT 6.5.5 -It shall be an error to alter the value of a file-variable f when a -reference to the buffer-variable f^ exists. -.IS -When f is altered when it is an element of the record-variable-list of a -with-statement, a warning is given. When a buffer-variable is used as a -variable-parameter, an error is given. This is done at compile-time. -.IT 6.6.5.2 -It shall be an error if -the stated pre-assertion does not hold immediately -prior to any use of the file handling procedures -rewrite, put, reset and get. -.IS -For each of these four operations the pre-assertions -can be reformulated as: -.sp -rewrite(f):~no pre-assertion. -.br -put(f):~~~~~f is opened for writing and f^ is not undefined. -.br -reset(f):~~~f exists. -.br -get(f):~~~~~f is opened for reading and eof(f) is false. -.sp -The following errors are detected for these operations: -.sp -rewrite(f): -.in +10 -.ti -5 -more args expected, trap 64, fatal: -.br -f is a program-parameter and the corresponding -file name is not supplied by the caller of the program. -.ti -5 -rewrite error, trap 101, fatal: -.br -the caller of the program lacks the necessary -access rights to create the file in the file system -or operating system problems like table overflow -prevent creation of the file. -.in -10 -.sp -put(f): -.in +10 -.ti -5 -file not yet open, trap 72, fatal: -.br -reset or rewrite are never applied to the file. -The checks performed by the run time system are not foolproof. -.ti -5 -not writable, trap 96, fatal: -.br -f is opened for reading. -.ti -5 -write error, trap 104, fatal: -.br -probably caused by file system problems. -For instance, the file storage is exhausted. -Because IO is buffered to improve performance, -it might happen that this error occurs if the -file is closed. -Files are closed whenever they are rewritten or reset, or on -program termination. -.in -10 -.sp -reset(f): -.in +10 -.ti -5 -more args expected, trap 64, fatal: -.br -same as for rewrite(f). -.ti -5 -reset error, trap 100, fatal: -.br -f does not exist, or the caller has insufficient access rights, or -operating system tables are exhausted. -.in -10 -.sp -get(f): -.in +10 -.ti -5 -file not yet open, trap 72, fatal: -.br -as for put(f). -.ti -5 -not readable, trap 97, fatal: -.br -f is opened for writing. -.ti -5 -end of file, trap 98, fatal: -.br -eof(f) is true just before the call to get(f). -.ti -5 -read error, trap 103, fatal: -.br -unlikely to happen. Probably caused by hardware problems -or by errors elsewhere in the program that destroyed -the file information maintained by the run time system. -.ti -5 -truncated, trap 99, fatal: -.br -the file is not properly formed by an integer -number of file elements. -For instance, the size of a file of integer is odd. -.ti -5 -non-ASCII char read, trap 106, non-fatal: -.br -the character value of the next character-type -file element is out of range (0..127). -Only for text files. -.in -10 -.IT 6.6.5.3 -It shall be an error if a variant of a variant-part within the new -variable becomes active and a different variant of the variant-part is -one of the specified variants. -.IS -This error is not detected. -.IT 6.6.5.3 -It shall be an error to use dispose(q) if the identifying variable has been -allocated using the form new(p,c1,...,cn). -.IS -This error is not detected. However, this error can cause more memory -to be freed then was allocated. -Dispose causes a fatal trap 73 when memory already on the free -list is freed again. -.IT 6.6.5.3 -It shall be an error to use dispose(q,k1,...,km) if the identifying -variable has been allocated using the form new(p,c1,...,cn) and m is not -equal to n. -.IS -This error is not detected. However, this error can cause more memory -to be freed then was allocated. -Dispose causes a fatal trap 73 when memory already on the free -list is freed again. -.IT 6.6.5.3 -It shall be an error if the variants of a variable to be disposed -are different from those specified by the case-constants to dispose. -.IS -This error is not detected. -.IT 6.6.5.3 -It shall be an error if the value of the pointer parameter of dispose has -nil-value or is undefined. -.IS -The same comments apply as for de-referencing NIL or undefined pointers. -.IT 6.6.5.3 -It shall be an error if a variable created using the second form of new is -accessed by the identified variable of the variable-access of a factor, -of an assignment-statement, or of an actual-parameter. -.IS -This error is not detected. -.IT 6.6.6.2 -It shall be an error if the value of sqr(x) does not exist. -.IS -This error is detected for real-type arguments (real overflow, -trap 4, non-fatal). -.IT 6.6.6.2 -It shall be an error if x in ln(x) is smaller than or equal to 0. -.IS -This error is detected (error in ln, trap 66, non-fatal) -.IT 6.6.6.2 -It shall be an error if x in sqrt(x) is smaller than 0. -.IS -This error is detected (error in sqrt, trap 67, non-fatal) -.sp -In addition to these errors, overflow in the expression exp(x) is -detected (error in exp, trap 65, non-fatal; real overflow, trap 4, non-fatal) -.sp -.IT 6.6.6.3 -It shall be an error if -the integer value of trunc(x) does not exist. -.IS -It depends on the implementations whether this error is detected. -The floating-point emulation detects this error (conversion error, -trap 10, non-fatal). -.IT 6.6.6.3 -It shall be an error if -the integer value of round(x) does not exist. -.IS -It depends on the implementations whether this error is detected. -The floating-point emulation detects this error (conversion error, -trap 10, non-fatal). -.IT 6.6.6.4 -It shall be an error if -the integer value of ord(x) does not exist. -.IS -This error can not occur, because the compiler will not allow -such ordinal types. -.IT 6.6.6.4 -It shall be an error if -the character value of chr(x) does not exist. -.IS -Except when the R-option is off, the compiler generates an EM -range check instruction. The effect of this instruction depends on the -EM implementation. -.IT 6.6.6.4 -It shall be an error if the value of succ(x) does not exist. -.IS -Same comments as for chr(x). -.IT 6.6.6.4 -It shall be an error if the value of pred(x) does not exist. -.IS -Same comments as for chr(x). -.IT 6.6.6.5 -It shall be an error if f in eof(f) is undefined. -.IS -This error is detected (file not yet open, trap 72, fatal). -.IT 6.6.6.5 -It shall be an error if -f in eoln(f) is undefined, or if eof(f) is true at that time. -.IS -The following errors may occur: -.IS -file not yet open, trap 72, fatal; -.br -not readable, trap 97, fatal; -.br -end of file, trap 98, fatal. -.IE -.IT 6.7.1 -It shall be an error if a variable-access used as an operand -in an expression is undefined at the time of its use. -.IS -The compiler performs some limited checks to see if identifiers are -used before they are set. Since it can not always be sure (one could, for -instance, jump out of a loop), only a warning is generated. When an -expression contains a function-call, an error occur if the -function is not assigned at run-time. -.IT 6.7.2.2 -A term of the form x/y shall be an error if y is zero. -.IS -It depends on the EM implementation whether this error is detected. On some -machines, a trap may occur. -.IT 6.7.2.2 -It shall be an error if j is zero in 'i div j'. -.IS -It depends on the EM implementation whether this error is detected. On some -machines, a trap may occur. -.IE -.IT 6.7.2.2 -It shall be an error if -j is zero or negative in i MOD j. -.IS -This error is detected (only positive j in 'i mod j', trap 71, non-fatal). -.IT 6.7.2.2 -It shall be an error if the result of any operation on integer -operands is not performed according to the mathematical -rules for integer arithmetic. -.IS -The reaction depends on the EM implementation. Most implementations, -however, will not notice integer overflow. -.IT 6.8.3.5 -It shall be an error if none of the case-constants is equal to the -value of the case-index upon entry to the case-statement. -.IS -This error is detected (case error, trap 20, fatal). -.IT 6.9.1 -It shall be an error if the sequence of characters read looking for an -integer does not form a signed-integer as specified in 6.1.5. -.IS -This error is detected (digit expected, trap 105, non-fatal). -.IT 6.9.1 -It shall be an error if the sequence of characters read looking for a -real does not form a signed-number as specified in 6.1.5. -.IS -This error is detected (digit expected, trap 105, non-fatal). -.IT 6.9.1 -When read is applied to f, it shall be an error if the buffer-variable f^ -is undefined or the pre-assertions for get do not hold. -.IS -This error is detected (see get(f)). -.IT 6.9.3 -When write is applied to a textfile f, it shall be an error if f is -undefined or f is opened for reading. -.IS -This error is detected (see put(f)). Furthermore, this error is also -detected when f is not a textfile. -.IT 6.9.3.1 -The values of TotalWidth or FracDigits shall be greater than or equal to -one; it shall be an error if either value is less then one. -.IS -When either value is less than zero, an error (illegal field width, trap -75, non-fatal) occurs. Zero values are allowed, in order to maintain some -compatibility with the old Ack-Pascal compiler. -.IT 6.9.5 -It shall be an error if the pre-assertion required for writeln(f) doe not -hold prior to the invocation of page(f); -.IS -This error is detected (see put(f)). -.CH "Extensions to the standard" -.IS -.ti -3 -1.~\ -External routines -.sp -Except for the required directive 'forward' the Ack-Pascal compiler recognizes -the directive 'extern'. -This directive tells the compiler that the procedure block of this -procedure will not be present in the current program. -The code for the body of this procedure must be included at a later -stage of the compilation process. -.PP -This feature allows one to build libraries containing often used routines. -These routines do not have to be included in all the programs using them. -Maintenance is much simpler if there is only one library module to be -changed instead of many Pascal programs. -.PP -Another advantage is that these library modules may be written in a different -language, for instance C or the EM assembly language. -This is useful for accessing some specific EM instructions not generated -by the Pascal compiler. Examples are the system call routines and some -floating point conversion routines. -Another motive could be the optimization of some time-critical program parts. -.PP -The use of external routines, however, is dangerous. -The compiler normally checks for the correct number and type of parameters -when a procedure is called and for the result type of functions. -If an external routine is called these checks are not sufficient, -because the compiler can not check whether the procedure heading of the -external routine as given in the Pascal program matches the actual routine -implementation. -It should be the loader's task to check this. -However, the current loaders are not that smart. -Another solution is to check at run time, at least the number of words -for parameters. Some EM implementations check this. -.PP -For those who wish the use the interface between C and Pascal we -give an incomplete list of corresponding formal parameters in C and Pascal. -.sp 1 -.TS -l l. -Pascal C -a:integer int a -a:char int a -a:boolean int a -a:real double a -a:^type type *a -var a:type type *a -procedure a(pars) struct { - void (*a)() ; - char *static_link ; - } -function a(pars):type struct { - type (*a)() ; - char *static_link ; - } -.TE -The Pascal runtime system uses the following algorithm when calling -function/procedures passed as parameters. -.TS -l l. -if ( static_link ) (*a)(static_link,pars) ; -else (*a)(pars) ; -.TE -.ti -3 -2.~\ -Separate compilation. -.sp -The compiler is able to (separately) compile a collection of declarations, -procedures and functions to form a library. -The library may be linked with the main program, compiled later. -The syntax of these modules is -.EQ - module = [constant-definition-part] - [type-definition-part] - [var-declaration-part] - [procedure-and-function-declaration-part] -.EN -The compiler accepts a program or a module: -.EQ - unit = program | module -.EN -All variables declared outside a module must be imported -by parameters, even the files input and output. -Access to a variable declared in a module is only possible -using the procedures and functions declared in that same module. -By giving the correct procedure/function heading followed by the -directive 'extern' procedures and functions declared in -other units may be used. -.sp -.ti -3 -3.~\ -Assertions. -.sp -When the s-option is off, Ack-Pascal compiler recognizes an additional -statement, the assertion. Assertions can be used as an aid in debugging -and documentation. The syntax is: -.EQ - assertion = 'assert' Boolean-expression -.EN -An assertion is a simple-statement, so -.EQ - simple-statement = [assignment-statement | - procedure-statement | - goto-statement | - assertion - ] -.EN -An assertion causes an error if the Boolean-expression is false. -That is its only purpose. -It does not change any of the variables, at least it should not. -Therefore, do not use functions with side-effects in the Boolean-expression. -If the a-option is turned on, then assertions are skipped by the -compiler. 'assert' is not a word-symbol (keyword) and may be used as identifier. -However, assignment to a variable and calling of a procedure with that -name will be impossible. -If the s-option is turned on, the compiler will not know a thing about -assertions, so using assertions will then give a parse error. -.sp -.ti -3 -4.~\ -Additional procedures. -.sp -Three additional standard procedures are available: -.IS -.IS -.ti -8 -halt:~~~a call of this procedure is equivalent to jumping to the -end of the program. It is always the last statement executed. -The exit status of the program may be supplied -as optional argument. If not, it will be zero. -.ti -8 -release: -.ti -8 -mark:~~~for most applications it is sufficient to use the heap as second stack. -Mark and release are suited for this type of use, more suited than dispose. -mark(p), with p of type pointer, stores the current value of the -heap pointer in p. release(p), with p initialized by a call -of mark(p), restores the heap pointer to its old value. -All the heap objects, created by calls of new between the call of -mark and the call of release, are removed and the space they used -can be reallocated. -Never use mark and release together with dispose! -.sp -.in -10 -.ti -3 -5.~\ -UNIX interfacing. -.sp -If the c-option is turned on, then some special features are available -to simplify an interface with the UNIX environment. -First of all, the compiler allows for a different type -of string constants. -These string constants are delimited by double quotes ('"'). -To put a double quote into these strings, the double quote must be repeated, -like the single quote in normal string constants. -These special string constants are terminated by a zero byte (chr(0)). -The type of these constants is a pointer to a packed array of characters, -with lower bound 1 and unknown upper bound. -.br -Secondly, the compiler predefines a new type identifier 'string' denoting -this just described string type. -.PP -These features are only useful for declaration of -constants and variables of type 'string'. -String objects may not be allocated on the heap and string pointers -may not be de-referenced. -Still these strings are very useful in combination with external routines. -The procedure write is extended to print these zero-terminated -strings correctly. -.sp -.ti -3 -6.~\ -Double length (32 bit) integers. -.sp -If the d-option is turned on, then the additional type 'long' is known -to the compiler. -By default, long variables have integer values in the -range -2147483647..+2147483647, but this can be changed with the -V option -(if the backend can support this). -Long constants can not be declared. -Longs can not be used as control-variables. -It is not allowed to form subranges of type long. -All operations allowed on integers are also -allowed on longs and are indicated by the same -operators: '+', '-', '*', '/', 'div', 'mod'. -The procedures read and write have been extended to handle long -arguments correctly. It is possible to read longs from a file of integers -and vice-versa, but only if longs and integers have the same size. -The default width for longs is 11. -The standard procedures 'abs' and 'sqr' have been extended to work -on long arguments. -Conversion from integer to long, long to real, -real to long and long to integer are automatic, like the conversion -from integer to real. -These conversions may cause a -.IS -conversion error, trap 10, non-fatal -.IE -.sp -.ti -3 -7.~\ -Underscore as letter. -.sp -The character '_' may be used in forming identifiers, if the u- or U-option -is turned on. It is forbidden to start identifiers with underscores, since -this may cause name-clashes with run-time routines. -.sp -.ti -3 -8.~\ -Zero field width in write. -.sp -Zero TotalWidth arguments are allowed. No characters are written for -character, string or Boolean type arguments then. A zero FracDigits -argument for fixed-point representation of reals causes the fraction and -the character '.' to be suppressed. -.sp -.ti -3 -9.~\ -Pre-processing. -.sp -If the very first character of a file containing a Pascal -program is the sharp ('#', ASCII 23(hex)) the file is preprocessed -in the same way as C programs. -Lines beginning with a '#' are taken as preprocessor command lines -and not fed to the Pascal compiler proper. -C style comments, /*......*/, are removed by the C preprocessor, -thus C comments inside Pascal programs are also removed when they -are fed through the preprocessor. -.CH "Deviations from the standard" -Ack-Pascal deviates from the standard proposal in the following ways: -.IS -.ti -3 -1.~\ -Standard procedures and functions are not allowed as parameters in Ack-Pascal. -The same result can be obtained with negligible loss of performance -by declaring some user routines like: -.EQ - function sine(x:real):real; - begin - sine:=sin(x) - end; -.EN -.sp -.ti -3 -2.~\ -The standard procedures read, readln, write and writeln are implemented as -word-symbols, and can therefore not be redeclared. -.CH "Compiler options" -Some options of the compiler may be controlled by using "{$....}". -Each option consists of a lower case letter followed by +, - or an unsigned -number. -Options are separated by commas. -The following options exist: -.in 8 -.sp -.ti -8 -a~+/-~~~\ -this option switches assertions on and off. -If this option is on, then code is included to test these assertions -at run time. Default +. -.sp -.ti -8 -c~+/-~~~\ -this option, if on, allows the use of C-type string constants -surrounded by double quotes. -Moreover, a new type identifier 'string' is predefined. -Default -. -.sp -.ti -8 -d~+/-~~~\ -this option, if on, allows the use of variables of type 'long'. -Default -. -.sp -.ti -8 -i~~\ -with this flag the setsize for a set of integers can be -manipulated. -The number must be the number of bits per set. -The default value is wordsize-1. -.sp -.ti -8 -l~+/-~~~\ -if + then code is inserted to keep track of the source line number. -When this flag is switched on and off, an incorrect line number may appear -if the error occurs in a part of the program for which this flag is off. -These same line numbers are used for the profile, flow and count options -of the EM interpreter em [5]. -Default +. -.sp -.ti -8 -r~+/-~~~\ -if + then code is inserted to check subrange variables against -lower and upper subrange limits. -Default +. -.sp -.ti -8 -s~+/-~~~\ -if + then the compiler will hunt for places in the program -where non-standard features are used, and for each place found -it will generate a warning. Default -. -.sp -.ti -8 -t~+/-~~~\ -if + then each time a procedure is entered, the routine 'procentry' is -called, and each time a procedure exits, the procedure 'procexit' is -called. Both 'procentry' and 'procexit' have a 'string' as parameter. This -means that when a user specifies his or her own procedures, the c-option -must be used. Default procedures are present in the run time library. -Default -. -.sp -.ti -8 -u~+/-~~~\ -if + then the character '_' is treated like a letter, -so that it may be used in identifiers. -Procedure and function identifiers are not allowed to start with an -underscore because they may collide with library routine names. -Default -. -.in 0 -.sp -Some of these flags (c, d, i, s, u, C and U) are only effective when -they appear before the 'program' symbol. The others may be switched -on and off. -.PP -A very powerful debugging tool is the knowledge that inaccessible statements -and useless tests are removed by the EM optimizer. For instance, a -statement like: -.sp -.nf - if debug then - writeln('initialization done'); -.fi -.sp -is completely removed by the optimizer if debug is a constant with -value false. -The first line is removed if debug is a constant with value true. -Of course, if debug is a variable nothing can be removed. -.PP -A disadvantage of Pascal, the lack of preinitialized data, can be -diminished by making use of the possibilities of the EM optimizer. -For instance, initializing an array of reserved words is sometimes -optimized into 3 EM instructions. To maximize this effect -variables must be initialized as much as possible in order of declaration and array entries -in order of decreasing index. -.CH "References" -.in +5 -.ti -5 -[1]~~\ -BSI standard BS 6192: 1982 (ISO 7185). -.sp -.ti -5 -[2]~~\ -A.S.Tanenbaum, J.W.Stevenson, Hans van Staveren, E.G.Keizer, -"Description of a machine architecture for use with block structured languages", -Informatica rapport IR-81. -.sp -.ti -5 -[3]~~\ -UNIX manual ack(I). -.sp -.ti -5 -[4]~~\ -UNIX manual ld(I). -.sp -.ti -5 -[5]~~\ -UNIX manual em(I). -.sp -.ti -5 -[6]~~\ -UNIX manual libpc(VII) -.sp -.ti -5 -[7]~~\ -UNIX manual pc_prlib(VII) diff --git a/doc/peep.doc b/doc/peep.doc deleted file mode 100644 index 49c2c6c5f..000000000 --- a/doc/peep.doc +++ /dev/null @@ -1,521 +0,0 @@ -.\" $Id$ -.TL -Internal documentation on the peephole optimizer -.br -from the Amsterdam Compiler Kit -.NH 1 -Introduction -.PP -Part of the Amsterdam Compiler Kit is a program to do -peephole optimization on an EM program. -The optimizer scans the program to match patterns from a table -and if found makes the optimization from the table, -and with the result of the optimization -it tries to find yet another optimization -continuing until no more optimizations are found. -.PP -Furthermore it does some optimizations that can not be called -peephole optimizations for historical reasons, -like branch chaining and the deletion of unreachable code. -.PP -The peephole optimizer consists of three parts -.IP 1) -A driving table -.IP 2) -A program translating the table to internal format -.IP 3) -C code compiled with the table to make the optimizer proper -.PP -In this document the table format, internal format and -data structures in the optimizer will be explained, -plus a hint on what the code does where it might not be obvious. -It is a simple program mostly. -.NH 1 -Table format -.PP -The driving table consists of pattern/replacement pairs, -in principle one per line, -although a line starting with white space is considered -a continuation line for the previous. -The general format is: -.DS -optimization : pattern ':' replacement '\en' -.sp -pattern : EMlist optional_boolean_expression -.sp -replacement : EM_plus_operand_list -.DE -Example of a simple one -.DS -loc stl $1==0 : zrl $2 -.DE -There is no real limit for the length of the pattern or the replacement, -the replacement might even be longer than the pattern, -and expressions can be made arbitrarily complicated. -.PP -The expressions in the table are made of the following pieces: -.IP - -Integer constants -.IP - -$\fIn\fP, standing for the operand of the \fIn\fP'th EM -instruction in the pattern, -undefined if that instruction has no operand. -.IP - -w, standing for the wordsize of the code optimized. -.IP - -p, for the pointersize. -.IP - -defined(expr), true if expression is defined -.IP - -samesign(expr,expr), true if expressions have the same sign. -.IP - -sfit(expr,expr), ufit(expr,expr), -true if the first expression fits signed or unsigned in the number -of bits given in the second expression. -.IP - -rotate(expr,expr), -first expression rotated left the number of bits given by the second expression. -.IP - -notreg(expr), -true if the local with the expression as number is not a candidate to put -in a register. -.IP - -rom(\fIn\fP,expr), contents of the rom descriptor at index expr that -is associated with the global label that should be the argument of -the \fIn\fP'th EM instruction. -Undefined if such a thing does not exist. -.PP -The usual arithmetic operators may be used on integer values, -if any operand is undefined the expression is undefined, -except for the defined() function above. -An undefined expression used for its truth value is false. -All arithmetic on local label operands is forbidden, -only things allowed are tests for equality. -Arithmetic on global labels makes sense, -i.e. one can add a global label and a constant, -but not two global labels. -.PP -In the table one can use five additional EM instructions in patterns. -These are: -.IP lab -Stands for a local label -.IP LLP -Load Local Pointer, translates into a -.B lol -or into a -.B ldl -depending on the relationship between wordsize and pointersize. -.IP LEP -Load External Pointer, translates into a -.B loe -or into a -.B lde . -.IP SLP -Store Local Pointer, -.B stl -or -.B sdl . -.IP SEP -Store External Pointer, -.B ste -or -.B sde . -.PP -There is only one peephole optimizer, -so the substitutions to be made for the last four instructions -are made at run time before the first optimizations are made. -.NH 1 -Internal format -.PP -The translating program, -.I mktab -converts the table into an array of bytes where all -patterns follow unaligned. -Format of a pattern is: -.IP 1) -One byte for high byte of hash value, -will be explained later on. -.IP 2) -Two bytes for the index of the next pattern in a chain. -.IP 3) -An integer\u*\d, -.FS -* An integer is encoded as a byte when less than 255, -otherwise as a byte containing 255 followed by two -bytes with the real value. -.FE -pattern length. -.IP 4) -The list of pattern opcodes, one per byte. -.IP 5) -An integer expression index, 0 if not used. -.IP 6) -An integer, replacement length. -.IP 7) -A list of pairs consisting of a one byte opcode and an integer -expression index. -.PP -The expressions are kept in an array of triples, -implementing a binary tree. -The -.I mktab -program tries to minimize the number of triples by reusing -duplicates and even reverses the operands of commutative operators -when doing so would spare a triple. -.NH 1 -A tour through the sources -.PP -Now we will walk through the sources and note things of interest. -.NH 2 -The header files -.PP -The header files are the place where data structures and options reside. -.NH 3 -alloc.h -.PP -In the header file alloc.h several defines can be used to select various -kinds of core allocation schemes. -This is important on small machines like the PDP-11 since a complete -procedure must be in core at the same space, -and the peephole optimizer should not be the limiting factor in -determining the maximum size of procedures if possible. -Options are: -.IP - -USEMALLOC, standard malloc() and free() are used instead of the own -core allocation package. -Not recommended unless the own package does not work on some bizarre -machine. -.IP - -COREDEBUG, prints large amounts of information about core management. -Not recommended unless the code is changed and it stops working. -.IP - -SEPID, defining this will add an extra procedure that will -go through a lot of work to scrape the last bytes together if the -system won't provide more. -This is not a good idea if memory is scarce and code and data reside -in the same spaces, since the room used by the procedure might well -be more than the room saved. -.IP - -STACKROOM, number of shorts used in stack space. -This is used if memory is scarce and stack space and data space are -different. -On the PDP-11 a UNIX process starts with an 8K stack segment which -cannot be transferred to the data segment. -Under these conditions one can use a lot of the stack space for storage. -.NH 3 -assert.h -.PP -Just defines the assert macro. -When compiled with -DNDEBUG all asserts will be off. -.NH 3 -ext.h -.PP -Gives external definitions of variables used by more than one module. -.NH 3 -line.h -.PP -Defines the structures used to keep instructions, -one structure per line of EM code, -and the structure to keep arguments of pseudos, -one structure per argument. -Both structures essentially contain a pointer to the next, -a type, -and a union containing information depending on the type. -Core is allocated only for the part of the union used. -.PP -The -.I -struct line -.R -has a very compact encoding for small integers, -they are encoded in the type field. -On the PDP-11 this gives a line structure of only 4 bytes for most -instructions. -.NH 3 -lookup.h -.PP -Contains definition of the struct used for symbol table management, -global labels and procedure names are kept in one table. -.NH 3 -optim.h -.PP -If one defines the DIAGOPT option in this header file, -for every optimization performed a number is written on stderr. -The number gives the number of the pattern in the table -or one of the four special numbers in this header file. -.NH 3 -param.h -.PP -Contains one settable option, -LONGOFF. -If this is not defined the optimizer can only optimize programs -with wordsize 2 and pointersize 2. -Set this only if it must be run on a Z80 or something pathetic like that. -.PP -Other defines here should not be touched. -.NH 3 -pattern.h -.PP -Contains defines of indices in a pattern, -definition of the expression triples, -definitions of the various expression operators -and definition of the result struct where expression results are put. -.PP -This header file is the main one that is also included by -.I mktab . -.NH 3 -proinf.h -.PP -This one contains definitions -for the local label table structs -and for the struct where all information for one procedure is kept. -This is in one struct so it can be saved easily when recursive -procedures have to be resolved. -.NH 3 -tes.h -.PP -Contains the data structure used by the top element size computation. -.NH 3 -types.h -.PP -Collection of typedefs to be used by almost all modules. -.NH 2 -The C code itself. -.PP -The C code will now be the center of our attention. -We will make a walk through the sources and we will try -to follow the sources in a logical order. -So we will start at -.NH 3 -main.c -.PP -The main.c module contains the main() function. -Here nothing spectacular happens, -only thing of interest is the handling of flags: -.IP -L -This is an instruction to the peephole optimizer to perform -one of its auxiliary functions, the generation of a library module. -This makes the peephole optimizer write its output on a temporary file, -and at the end making the real output by first generating a list -of exported symbols and then copying the temporary file behind it. -.IP -n -Disables all optimization. -Only thing the optimizer does now is filling in the blank after the -.I END -pseudo and resolving recursive procedures. -.PP -The place where main() is left is the call to getlines() which brings -us to -.NH 3 -getline.c -.PP -This module reads the EM code and constructs a list of -.I -struct line -.R -records, -linked together backwards, -i.e. the first instruction read is the last in the list. -Pseudos are handled here also, -for most pseudos this just means that a chain of argument records -is linked into the linked line list but some pseudos get special attention: -.IP exc -This pseudo is acted upon right away. -Lines read are shuffled around according to instruction. -.IP mes -Some messages are acted upon. -These are: -.RS -.IP ms_err 8 -The input is drained, just in case it is a pipe. -After that the optimizer exits. -.IP ms_opt -The do not optimize flag is set. -Acts just like -n on the command line. -.IP ms_emx -The word- and pointersize are read, -complain if we are not able to handle this. -.IP ms_reg -We take notice of the offset of this local. -See also comments in the description of peephole.c -.RE -.IP pro -A new procedure starts, if we are already in one save the status, -else process collected input. -Collect information about this procedure and if already in a procedure -call getlines() recursively. -.IP end -Process collected input. -.PP -The phrase "process collected input" is used twice, -which brings us to -.NH 3 -process.c -.PP -This module contains the entry point process() which is called at any -time the collected input must be processed. -It calls a variety of other routines to get the real work done. -Routines in this module are in chronological order: -.IP symknown 12 -Marks all symbols seen until now as known, -i.e. it is now known whether their scope is local or global. -This information is used again during output. -.IP symvalue -Runs through the chain of pseudos to give values to data labels. -This needs an extra pass. -It cannot be done during the getlines pass, since an -.B exc -pseudo could destroy things. -Nor can it be done during the backward pass since it is impossible -to do good fragment numbering backward. -.IP checklocs -Checks whether all local labels referenced are defined. -It needs to be sure about this since otherwise the -semi global optimizations made cannot work. -.IP relabel -This routine finds the final destination for each label in the procedure. -Labels followed by unconditional branches or other labels are marked during -the peephole fase and this leeds to chains of identical labels. -These chains are followed here, and in the local label table each label -has associated with it its replacement label, after this procedure is run. -Care is taken in this routine to prevent a loop in the program to -cause the optimizer to loop. -.IP cleanlocals -This routine empties the local label table after everything -is processed. -.PP -But before this can all be done, -the backward linked list of instructions first has to be reversed, -so here comes -.NH 3 -backward.c -.PP -The routine backward has a number of functions: -.IP - -It reverses the backward linked list, making two forward linked lists, -one for the instructions and one for the pseudos. -.IP - -It notes the last occurrence of data labels in the backward linked list -and puts it in the global symbol table. -This is of course the first occurence in the procedure. -This information is needed to decide whether the symbols are global -or local to this module. -.IP - -It decides about the fragment boundaries of data blocks. -Fragments are numbered backwards starting at 3. -This is done to be able to make the type of an expression -containing a symbol equal to its fragment. -This type can then not clash with the types integer and local label. -.IP - -It allocates a rom buffer to every data label with a rom behind -it, if that rom contains only plain integers at the start. -.PP -The first thing done after process() has called backward() and some -of its own little routines is a call to the real routine, -the one that does the work the program was written for -.NH 3 -peephole.c -.PP -The first routines in peephole.c -implement a linked list for the offsets of local variables -that are candidates for a register implementation. -Several patterns use the notreg() function, -since it is forbidden to combine a load of that variable -with the load of another and -it is not allowed to take the address of that variable. -.PP -The routine peephole hashes the patterns the first time it is called -after which it doesn't do much more than calling optimize. -But first hashpatterns(). -.PP -The patterns are hashed at run time of the optimizer because of -the -.B LLP , -.B LEP , -.B SLP -and -.B SEP -instructions added to the instruction set in this optimizer. -These are first replaced everywhere in the table by the correct -replacement after which the first three instructions of the -pattern are hashed and the pattern is linked into one of the -256 linked lists. -There is a define CHK_HASH in this module that -can be set if the randomness of the hashing -function is not trusted. -.PP -The attention now shifts to optimize(). -This routine calls basicblock() for every piece of code between two labels. -It also notes which labels have another label or a branch behind them -so the relabel() routine from process.c can do something with that. -.PP -Basicblock() keeps making passes over its basic block -until no more optimizations are found. -This might be inefficient if there is a long basicblock with some -deep recursive optimization in one part of it. -The entire basic block is then scanned a lot of times just for -that one piece. -The alternative is backing up after making an optimization and running -through the same code again, but that is difficult -in a single linked list. -.PP -It hashes instructions and calls trypat() for every pattern that has -a full hash value match, -i.e. lower byte and upper byte equal. -Longest pattern is tried first. -.PP -Trypat() checks length and opcodes of the pattern. -If correct it fills the iargs[] array with argument values -and calculates the expression. -If that is also correct the work shifts to tryrepl(). -.PP -Tryrepl() generates the list of replacement instructions, -links it into the list and returns true. -Why then the name tryrepl() if it always succeeds? -Well, there is a mechanism in the optimizer, -unused until today that makes it possible to do optimizations that cannot -be described by the table. -It is possible to give a number as a replacement which will cause the -optimizer to call a routine special() to do some work. -This routine might decide not to do an optimization and return false. -.PP -The last routine that is called from process() is putline() -to write the optimized code, bringing us to -.NH 3 -tes.c -.PP -Contains the routines used by the top element size computation phase, -which is run after the peephole-optimisation. -The main routine of tes.c is tes_instr(). This looks at an instruction and -decides the size of the element on top of the stack after the instruction -is executed. When a label is defined or used, the size of the top element -is remembered for later use. When the information in consistent throuhout -the procedure, it is passed to the code generator by means of an ms_tes -message. -.NH 3 -putline.c -.PP -The major part of putline.c is the standard set of routines -that makes EM compact code. -The extra functions performed are: -.IP - -For every occurence of a global symbol it might be necessary to -output a -.B exa , -.B exp , -.B ina -or -.B inp -pseudo instruction. -That task is performed. -.IP - -The -.B lin -instructions are optimized here, -.B lni -instructions added for -.B lin -instructions and superfluous -.B lin -instructions deleted. - diff --git a/doc/proto.make b/doc/proto.make deleted file mode 100644 index ea9511cd6..000000000 --- a/doc/proto.make +++ /dev/null @@ -1,161 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -TBL=tbl -EQN=eqn -PIC=pic -REFER=refer -GRAP=grap - -RESFILES= \ - $(TARGET_HOME)/doc/toolkit.doc \ - $(TARGET_HOME)/doc/install.doc \ - $(TARGET_HOME)/doc/em.doc \ - $(TARGET_HOME)/doc/ack.doc \ - $(TARGET_HOME)/doc/v7bugs.doc \ - $(TARGET_HOME)/doc/peep.doc \ - $(TARGET_HOME)/doc/cg.doc \ - $(TARGET_HOME)/doc/ncg.doc \ - $(TARGET_HOME)/doc/regadd.doc \ - $(TARGET_HOME)/doc/LLgen.doc \ - $(TARGET_HOME)/doc/LLgen_NCER.doc \ - $(TARGET_HOME)/doc/pascal.doc \ - $(TARGET_HOME)/doc/basic.doc \ - $(TARGET_HOME)/doc/crefman.doc \ - $(TARGET_HOME)/doc/ansi_C.doc \ - $(TARGET_HOME)/doc/pcref.doc \ - $(TARGET_HOME)/doc/val.doc \ - $(TARGET_HOME)/doc/6500.doc \ - $(TARGET_HOME)/doc/i80.doc \ - $(TARGET_HOME)/doc/z80.doc \ - $(TARGET_HOME)/doc/m68020.doc \ - $(TARGET_HOME)/doc/sparc.doc \ - $(TARGET_HOME)/doc/top.doc \ - $(TARGET_HOME)/doc/ego.doc \ - $(TARGET_HOME)/doc/occam.doc \ - $(TARGET_HOME)/doc/m2ref.doc \ - $(TARGET_HOME)/doc/ceg.doc \ - $(TARGET_HOME)/doc/nopt.doc \ - $(TARGET_HOME)/doc/int.doc \ - $(TARGET_HOME)/doc/lint.doc \ - $(TARGET_HOME)/doc/install.pr \ - $(TARGET_HOME)/doc/READ_ME \ - $(TARGET_HOME)/doc/Makefile - -install: $(RESFILES) - -$(TARGET_HOME)/doc/toolkit.doc: $(SRC_HOME)/doc/toolkit.doc - cat $(SRC_HOME)/doc/toolkit.doc >$@ - -$(TARGET_HOME)/doc/install.doc: $(SRC_HOME)/doc/install.doc - $(TBL) $(SRC_HOME)/doc/install.doc >$@ - -$(TARGET_HOME)/doc/em.doc: em.X -em.X: - cd em; make "TBL="$(TBL) - -$(TARGET_HOME)/doc/ack.doc: $(SRC_HOME)/doc/ack.doc - cat $(SRC_HOME)/doc/ack.doc >$@ - -$(TARGET_HOME)/doc/v7bugs.doc: $(SRC_HOME)/doc/v7bugs.doc - cat $(SRC_HOME)/doc/v7bugs.doc >$@ - -$(TARGET_HOME)/doc/peep.doc: $(SRC_HOME)/doc/peep.doc - cat $(SRC_HOME)/doc/peep.doc >$@ - -$(TARGET_HOME)/doc/cg.doc: $(SRC_HOME)/doc/cg.doc - cat $(SRC_HOME)/doc/cg.doc >$@ - -$(TARGET_HOME)/doc/ncg.doc: $(SRC_HOME)/doc/ncg.doc - $(TBL) $(SRC_HOME)/doc/ncg.doc >$@ - -$(TARGET_HOME)/doc/regadd.doc: $(SRC_HOME)/doc/regadd.doc - cat $(SRC_HOME)/doc/regadd.doc >$@ - -$(TARGET_HOME)/doc/LLgen.doc: LLgen.X -LLgen.X: - cd LLgen; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) "GRAP="$(GRAP) - -$(TARGET_HOME)/doc/LLgen_NCER.doc: LLgen_NCER.X -LLgen_NCER.X: - cd LLgen; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) "GRAP="$(GRAP) - -$(TARGET_HOME)/doc/basic.doc: $(SRC_HOME)/doc/basic.doc - cat $(SRC_HOME)/doc/basic.doc >$@ - -$(TARGET_HOME)/doc/crefman.doc: $(SRC_HOME)/doc/crefman.doc - $(EQN) $(SRC_HOME)/doc/crefman.doc >$@ - -$(TARGET_HOME)/doc/ansi_C.doc: $(SRC_HOME)/doc/ansi_C.doc - $(TBL) $(SRC_HOME)/doc/ansi_C.doc >$@ - -$(TARGET_HOME)/doc/pcref.doc: $(SRC_HOME)/doc/pcref.doc - $(TBL) $(SRC_HOME)/doc/pcref.doc >$@ - -$(TARGET_HOME)/doc/val.doc: $(SRC_HOME)/doc/val.doc - cat $(SRC_HOME)/doc/val.doc >$@ - -$(TARGET_HOME)/doc/6500.doc: $(SRC_HOME)/doc/6500.doc - $(TBL) $(SRC_HOME)/doc/6500.doc >$@ - -$(TARGET_HOME)/doc/i80.doc: $(SRC_HOME)/doc/i80.doc - cat $(SRC_HOME)/doc/i80.doc >$@ - -$(TARGET_HOME)/doc/z80.doc: $(SRC_HOME)/doc/z80.doc - cat $(SRC_HOME)/doc/z80.doc >$@ - -$(TARGET_HOME)/doc/m68020.doc: $(SRC_HOME)/doc/m68020.doc - $(EQN) $(SRC_HOME)/doc/m68020.doc | $(TBL) >$@ - -$(TARGET_HOME)/doc/sparc.doc: sparc.X -sparc.X: - cd sparc; make "PIC="$(PIC) "TBL="$(TBL) - -$(TARGET_HOME)/doc/pascal.doc: pascal.X -pascal.X: - cd pascal; make "PIC="$(PIC) - -$(TARGET_HOME)/doc/top.doc: top.X -top.X: - cd top; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) - -$(TARGET_HOME)/doc/ego.doc: ego.X -ego.X: - cd ego; make "REFER="$(REFER) "TBL="$(TBL) - -$(TARGET_HOME)/doc/occam.doc: occam.X -occam.X: - cd occam; make "PIC="$(PIC) "TBL="$(TBL) "EQN="$(EQN) - -$(TARGET_HOME)/doc/m2ref.doc: $(SRC_HOME)/doc/m2ref.doc - cat $(SRC_HOME)/doc/m2ref.doc >$@ - -$(TARGET_HOME)/doc/ceg.doc: ceg.X -ceg.X: - cd ceg; make "PIC="$(PIC) "TBL="$(TBL) "REFER="$(REFER) - -$(TARGET_HOME)/doc/nopt.doc: $(SRC_HOME)/doc/nopt.doc - cat $(SRC_HOME)/doc/nopt.doc >$@ - -$(TARGET_HOME)/doc/int.doc: int.X -int.X: - cd int; make "TBL="$(TBL) - -$(TARGET_HOME)/doc/lint.doc: lint.X -lint.X: - cd lint; make - -$(TARGET_HOME)/doc/install.pr: $(SRC_HOME)/doc/install.pr - cat $(SRC_HOME)/doc/install.pr >$@ - -$(TARGET_HOME)/doc/READ_ME: $(SRC_HOME)/doc/READ_ME - cat $(SRC_HOME)/doc/READ_ME >$@ - -$(TARGET_HOME)/doc/Makefile: $(SRC_HOME)/doc/Makefile - cat $(SRC_HOME)/doc/Makefile >$@ - -cmp: - -clean: - -rm -f *.old Out diff --git a/doc/regadd.doc b/doc/regadd.doc deleted file mode 100644 index b50630b83..000000000 --- a/doc/regadd.doc +++ /dev/null @@ -1,131 +0,0 @@ -.\" $Id$ -.TL -Addition of register variables to an existing table. -.NH 1 -Introduction -.PP -This is a short description of the newest feature in the -table driven code generator for the Amsterdam Compiler Kit. -It describes how to add register variables to an existing table. -This assumes a distribution of October 1983 or later. -It is not clear whether one should read this when starting with -a table for a new machine, -or waiting till the table is well debugged already. -.NH 1 -Modifications to the table itself. -.NH 2 -Register section -.PP -Just before the properties of the register one -of the following can be added: -.IP - 2 -regvar -.IP - -regvar ( pointer ) -.IP - -regvar ( loop ) -.IP - -regvar ( float ) -.LP -All register variables of one type must be of the same size, -and they may have no subregisters. -.NH 2 -Codesection -.PP -.IP - 2 -Two pseudo functions are added to the list allowed inside expressions: -.RS -.IP 1) 3 -inreg ( expr ) has as a parameter the offset of a local, -and returns 0,1 or 2: -.RS -.IP 2: 3 -if the variable is in a register. -.IP 1: -if the variable could be in a register but isn't. -.IP 0: -if the variable cannot be in a register. -.RE -.IP 2) -regvar ( expr ) returns the register associated with the variable. -Undefined if it is not in a register. -So regvar ( expr ) is defined if and only if inreg (expr ) == 2. -.RE -.IP - -It is now possible to remove() a register expression, -this is of course needed for a store into a register local. -.IP - -The return out of a procedure may now involve register restores, -so the special word 'return' in the table will invoke a user defined -function. -.NH 1 -Modifications to mach.c -.PP -If register variables are used in a table, the program -.I cgg -will define the word REGVARS during compilation of the sources. -So the following functions described here should be bracketed -by #ifdef REGVARS and #endif. -.IP - 2 -regscore(off,size,typ,freq,totyp) long off; -.br -This function should assign a score to a register variable, -the score should preferably be the estimated number of bytes -gained when it is put in a register. -Off and size are the offset and size of the variable, -typ is the type, that is reg_any, reg_pointer, reg_loop or reg_float. -Freq is the number of times it occurs statically, and totyp -is the type of the register it is planned to go into. -.br -Keep in mind that the gain should be net, that is the cost for -register save/restore sequences and the cost of initialisation -in the case of parameters should already be included. -.IP - -i_regsave() -.br -This function is called at the start of a procedure, just before -register saves are done. -It can be used to initialise some variables if needed. -.IP - -f_regsave() -.br -This function is called at end of the register save sequence. -It can be used to do the real saving if multiple register move -instructions are available. -.IP - -regsave(regstr,off,size) char *regstr; long off; -.br -Should either do the real saving or set up a table to have -it done by f_regsave. -Note that initialisation of parameters should also be done, -or planned here. -.IP - -regreturn() -.br -Should restore saved registers and return. -The function result is already in the function return area by now. -.NH 1 -Examples -.PP -Here are some examples out of the PDP 11 table -.DS -lol inreg($1)==2| | | regvar($1) | | - -lil inreg($1)==2| | | {regdef2, regvar($1)} | | - -stl inreg($1)==2| xsource2 | - remove(regvar($1)) - move(%[1],regvar($1)) | | | - -inl inreg($1)==2| | remove(regvar($1)) - "inc %(regvar($1)%)" - setcc(regvar($1)) | | | -.DE -.NH 1 -Afterthoughts. -.PP -At the time of this writing the tables for the PDP 11 and the M68000 and -the VAX are converted, in all cases the two byte wordsize versions. -No big problems have occurred, but experience has shown that it is -necessary to check the table carefully for all patterns with locals in them. -Code may be generated that uses the memoryslot the local is not in. diff --git a/doc/sparc/.distr b/doc/sparc/.distr deleted file mode 100644 index bed3d274f..000000000 --- a/doc/sparc/.distr +++ /dev/null @@ -1,15 +0,0 @@ -1 -2 -3 -4 -5 -A -B -init -intro -note_on_reg_wins -refs -timing -title -proto.make -pics diff --git a/doc/sparc/1 b/doc/sparc/1 deleted file mode 100644 index 79bb4e3b5..000000000 --- a/doc/sparc/1 +++ /dev/null @@ -1,53 +0,0 @@ -.In -.NH -INTRODUCTION -.NH 2 -Why an EM backend for SPARC processors? -.PP -With the introduction of SPARC-based computers like the Sun-4, a -whole new range of fast computers became readily available to the general -public. The power of large mainframes had been captured into a small -desk-top computer at only a fraction of the cost. -.PP -In the older days, a new computer used to be very hard to integrate into -the existing environment, but due to standardization in the software world -incompatibility in hardware no longer means incompatibility in software. -Programs that are written for computer A can often be run on computer B -without major modifications. Unfortunately this is not true for all software. -.PP -There will always be programs that rely on the specific -hardware of a certain computer for many different reasons. They -can be categorized as: -.IP - -poorly written programs -.IP - -programs to directly control hardware (device drivers) -.IP - -code that requires efficiency (time-critical I/O drivers) -.IP - -programs to generate code to run on the hardware (compilers) -.LP -This project for instance, the design and implementation of an EM backend -for SPARC processors, comes in the last category. -.PP -We have designed and implemented an algorithm to convert EM programs to code -that will run directly on the SPARC hardware. Henceforth, both the algorithm -and the implementation will be referred to as the EM-to-SPARC backend, -or simply: the backend. -.NH 2 -Why has nobody done this before? -.PP -Since EM was designed around 1981 and even SPARC has been around for some -years now, one may wonder why nobody has ever written an EM to SPARC backend -before. The reason is twofold. In the first place, there are some -non-trivial problems to be solved in the design phase, and secondly, -the SPARC-design combined with the lack of documentation, would surely -cost a lot of blood, sweat and tears. The absence of -clues to any of the design problems, combined with the \(em at first -glance \(em inhuman -SPARC instruction set did not make this a very attractive project. -.PP -On the other hand, these were exactly the reasons which made us take on -this particular project: it would require design skills, as well as some -hard work; a golden combination for a successful project. -.bp diff --git a/doc/sparc/2 b/doc/sparc/2 deleted file mode 100644 index df29b8276..000000000 --- a/doc/sparc/2 +++ /dev/null @@ -1,109 +0,0 @@ -.In -.nr H1 1 -.NH -CLOSE-UP LOOK -.NH 2 -What is EM? -.PP -As the abstract of the IR-81 rapport on EM -.[ [ -description of a machine architecture -.]] -says: \*(OQEM is a family -of intermediate languages designed for producing portable compilers.\*(CQ -Because EM is to be used on a wide range of languages and processors, -the instruction set is kept simple enough to allow easy translation to, -or interpretation on, almost any processor. Yet it is also powerful enough -to accommodate easy translation from almost any block-structured language. -.PP -Even though EM was designed in the early 1980s, it -is based on -.\" already shows strong signs of being influenced by -the (then innovative) RISC architecture. All instructions -have 0 or 1 operands, there are no fancy addressing modes as in the -68020's\*(Si move.w a3(_array,d3.w*2), -(sp)\*(So, no explicit registers, -although instructions for higher languages -such as array-operations, multiway branches (case) and -floating point operations are provided. -.PP -To fully understand the discussion in the following chapters, -the reader should at least have some knowledge of EM. -.NH 2 -What is SPARC? -.PP -According to Sun's RISC tutorial: \*(OQSun Microsystems has designed a RISC -architecture, called SPARC, and has implemented that architecture with -the Sun-4 family of supercomputing workstations and servers. SPARC stands -for Scalable Processor ARChitecture, emphasizing its applicability to -large as well as small machines.\*(CQ -.PP -In sharp contrast to EM, SPARC does have -explicit registers (31 integer and 32 floating point, all of which -are 32 bits wide) and -does not support any high level language operations: it does not even have -multiplication or division instructions. Because the SPARC design is -very straightforward, all instructions could be hard-coded (no microcode -involved) to -provided extremely high performance. All register-to-register operations -require exactly one clock cycle, and all register-to-memory and -memory-to-register operations require two clock cycles, one to retrieve -the instruction and one to access external memory. At a clock speed of -over 20 MHz this means that well over 10 VAX MIPS can be achieved: -more than 4 times the speed of a 15 MHz 68020 used in the Sun3/50. -.PP -As above, the reader should also have some general knowledge about -the SPARC processer to be able to understand the following chapters. -.NH 2 -What exactly is a (fast) backend? -.PP -To put in the simplest of ways: a (fast) backend is a set of routines to -translate EM code to code that will run 'on the metal' (for example the SPARC -processor). The distinction between full-fledged backends (code generators) -.[ [ -The table driven code generator -.]] -and fast backends (code expanders) -.[ [ -The Code Expander Generator -.]] -is related to -the compilation-time vs. run-time trade off. Code generators generate -efficient code and code expanders generate code very efficient. -For details about code expanders see also -.[ [ -The design of very fast portable compilers -.]]. -.PP -The reasons for us to implement a code expander are numerous: Our first reason to -implement a code expander, rather than a code generator was that implementing a -code expander would be hard enough already. Code generators only give -more problems and there were already enough problems to be solved. Secondly, -we knew we would never be able to compete with original SPARC compilers due -to loss of information in the frontends (see also chapter 5). By implementing -a code expander we might be able to outrun the existing compilers on a -completely different terrain: compile speed. -.PP -The third 'reason' to implement a code expander lies a little deeper and was -not discovered until we had actually started the implementation... It was only -then that we found out that for certain architectures, such as the SPARC, -the idea behind the code-expander is not necessarily inferior to that -behind a code-generator. It seems that for highly orthogonal instruction -sets it is possible to generate near optimal code without using the -code-expander. We have to say, however, that this is only true for our -optimized version of the code-expander. With the original code-expander -it would not have been possible to generate near-optimal code for the -SPARC processor. -.NH 2 -So, what are the main differences between EM and SPARC? -.PP -The main -difference between EM and SPARC is the stack versus register orientation. -The other differences, such as the presence of high level language -operations in EM, can easily be overcome by subroutines, -or small pieces of in-line SPARC code. -The design-part of this project mostly concentrates on -building a bridge between EM's stack and SPARC's registers. -.PP -In the next chapter we will make a list of all our design problems which -will then be discussed in chapter 4. -.bp diff --git a/doc/sparc/3 b/doc/sparc/3 deleted file mode 100644 index b6ddce807..000000000 --- a/doc/sparc/3 +++ /dev/null @@ -1,82 +0,0 @@ -.In -.nr H1 2 -.NH -PROBLEMS -.NH 2 -Maintain SPARC speed -.PP -If we want to generate SPARC code, we should try to generate efficient code -as fast as possible. It would be quite embarrassing to find out that the -same program would run faster on a Motorola 68020 than on a SPARC processor, -when both operate at the same clock frequency. -Looking at some code generated by Sun's C-compiler and optimizing assembler, -we can spot a few remarkable characteristics of the generated SPARC code: -.IP - -There are almost no memory references -.IP - -Parameters to functions are passed through registers. -.IP - -Almost all delay slots\(dg -.FS -\(dg For details about delay slots see the SPARC Architecture Manual, chapter 4, pp. 42-48 -.FE -are filled in by the assembler -.LP -If we want to generate efficient code, we should at least try to -reduce the number of memory references and use registers wherever we can. -Since EM is stack-oriented it references its stack for every operation so -this will not be an easy task; a suitable solution will however be given in -the next chapter. -.NH 2 -Increase compilation speed -.PP -Because we will implement a code expander (fast backend) we should keep -a close eye on efficiency; if we cannot beat regular compilers on producing -efficient code we will try to beat them on fast code generation. -The usual trick to achieve fast compilation is to pack the frontend, -optimizer, code-generator and -assembler all into a single large binary to reduce the overhead of -reading and writing temporary files. Unfortunately, due to the -SPARC instruction set, its relocation information is slightly bizarre -and cannot be represented with the present primitives. -This means that it will not be possible to generate the required output -format directly from our backend. -.PP -There are three solutions here: generate assembler code, and let an -existing assembler generate the required object (\fI.o\fR) files, -create our own primitives than can handle the SPARC relocation format, or -do not use any of the addressing modes that require the bizarre relocation. -Because we have enough on our hands already we will -let the existing assembler deal with generating object files. -.NH 2 -Convert stack to register operations -.PP -As we wrote in the previous chapter, for RISC machines a code expander can -produce almost as efficient code as a code generator. The fact that this is -true for stack-oriented RISC processors is rather obvious. The problem we -face, however, is that the SPARC processor is register, instead of -stack oriented. In the next chapter we will give a suitable solution to -convert most stack accesses to register accesses. -.NH 2 -Miscellaneous -.PP -Besides performance and \fI.o\fR-compatibility there are some other -peculiarities of the SPARC processor and Sun's C-compiler (henceforth -simply called \fIcc\fR). -.PP -For some reason, the SPARC stack pointer requires alignment -on 8 bytes, so it is impossible to push a 4-byte integer on the stack -and then \*(Sisub 4, %sp\*(So\(dd. -.FS -\(dd For more information about SPARC assembler see the Sun-4 Assembly -Language Reference Manual -.FE -This too will be discussed in the next chapter, where we will take a -more in-depth look into this problem and also discuss a couple of -possible solutions. -.PP -Another thing is that \fIcc\fR usually passes the first six parameters of a -function-call through registers. To be \fI.o\fR-compatible we would have to -pass the first six parameters of each function call through registers as well. -Exactly why this is not feasible will also be discussed in the next chapter. -.bp diff --git a/doc/sparc/4 b/doc/sparc/4 deleted file mode 100644 index 7775e8577..000000000 --- a/doc/sparc/4 +++ /dev/null @@ -1,468 +0,0 @@ -.In -.hw data-structures -.nr H1 3 -.NH -SOLUTIONS -.NH 2 -Maintaining SPARC speed -.PP -In chapter 3 we wrote: -.sp 0.3 -.nf ->If we want to generate efficient code, we should at least try to reduce the number of ->memory references and use registers wherever we can. -.fi -.sp 0.3 -In this chapter we will device a strategy to swiftly generate acceptable -code by using push-pop optimization. -Note that this is not the push-pop -optimization already available in the EM-kit, since that is only present -in the assembler-to-binary part which we do not use -.[ [ -The Code Expander Generator -.]]. -Our push-pop optimization -works more like the fake-stack described in -.[ [ -The table driven code generator -.]]. -.NH 3 -Ad-hoc optimization -.PP -Before getting involved in any optimization let's have a look at some -code generated with a straightforward EM to SPARC conversion of the -C statement: \*(Sif(a[i]);\*(So Note that \*(Si%SP\*(So is an alias -for a general purpose -register and acts as the EM stack pointer. It has nothing to do with -\*(Si%sp\*(So \(em the SPARC stack pointer. -Analogous \*(Si%LB\*(So is EMs local base pointer. -.br -.IP -.HS -.TS -; -l s l s l -l1f6 lf6 l2f6 lf6 l. -EM code SPARC code Comment - -lae _a set _a, %g1 ! load address of external _a - dec 4, %SP - st %g1, [%SP] - -lol -4 set -4, %g1 ! load local -4 (i) - ld [%g1+%LB], %g2 - dec 4, %SP - st %g2, [%SP] - -loc 2 set 2, %g1 ! load constant 2 - dec 4, %SP - st %g1, [%SP] - -sli 4 ld [%SP], %g1 ! pop shift count - ld [%SP+4], %g2 ! pop shiftee - sll %g2, %g1, %g3 - inc 4, %SP - st %g3, [%SP] ! push 4 * i - -ads 4 ld [%SP], %g1 ! add pointer and offset - ld [%SP+4], %g2 - add %g1, %g2, %g3 - inc 4, %SP - st %g3, [%SP] ! push address of _a + (4 * i) - -loi 4 ld [%SP], %g1 ! load indirect 4 bytes - ld [%g1], %g2 - st %g2, [%SP] ! push a[i] -cal _f - ... -.TE -.HS -.LP -Although the code is easy understand, it clearly is far from optimal. -The above code uses approximately 60 clock-cycles\(dg -.FS -\(dg In general each instruction only takes one cycle, -except for \*(Sild\*(So and -\*(Sist\*(So which may both require additional clock cycles. The exact amount -of extra cycles needed depends on the SPARC implementation and memory access -time. Furthermore, the -\*(Siset\*(So pseudo-instruction is a bit tricky. It takes one cycle when -its argument lies between -4096 and 4095, and two cycles otherwise. -.FE -to push an array-element on the stack, -something which a 68020 can do in a single instruction. The SPARC -processor may be fast, but not fast enough to justify the above code. -.PP -The same statement can be translated much more efficiently: -.DS -.TS -; -l2f6 lf6 l. -sll %i0, 2, %g2 ! multiply index by 4 -set _a, g3 -ld [%g2+%g3], %g1 ! get contents of a[i] -dec 4, SP -st %g2, [SP] ! push a[i] onto the stack -.TE -.DE -which, instead of 60, uses only 5 clock cycles to retrieve the element -from memory and 5 additional cycles when the result has to be pushed -on the stack. Note that when the result is not a parameter it does not -have to be pushed on the stack. By making efficient use of the SPARC -registers we can fetch \*(Sia[i]\*(So in only 5 cycles! -.NH 3 -Analyzing optimization -.PP -Instead of ad-hoc optimization we will need something more solid. -When one tries to optimize the above code in an ad-hoc manner one will -probably notice the large overhead due to stack access. Almost every EM -instruction requires at least three SPARC instructions: one to carry out -the EM instruction and two to pop and push the result from and onto the -stack. This happens for every instruction, even though the data being pushed -will probably be needed by the next instruction. To optimize this extensive -pushing and popping of data we will use the appropriately named push-pop -optimization. -.PP -The idea behind push-pop optimization is to delay the push operation until -it is almost certain that the data actually has to be pushed. -As is often the case, the data does not have to be pushed, -but will be used as input to another EM instruction. -If we can decide at compile time that this will indeed be -the case we can save the time of first pushing the data and then popping it -back again by temporarily storing the data (possibly only during compilation!) -and using it no sooner than it is actually needed. -.PP -The \*(Sisli 4\*(So instruction, for instance, expects two inputs on top of the -stack: on top a counter and right below that the shiftee (the number -to be shifted). As a result \*(Sisli\*(So -pushes 'shiftee << counter' back to the stack. Now consider the following -sequence, which could be the result of the expression \*(Si4 * i\*(So -.DS -.TS -; -l1f6 lf6 l. -lol -4 -loc 2 -sli 4 -.TE -.DE -In the non-optimized situation the \*(Silol\*(So would push -a local variable (whose offset is -4) on the stack. -Then the \*(Siloc\*(So pushes a 2 on the stack and finally \*(Sisli\*(So -retrieves both these numbers to replace then with the result. -On most machines it is not necessary to -push the 2 on the stack, since it can be used in the shift instruction -as an immediately operand. On a SPARC, for instance, one can write -.DS -.TS -; -l2f6 lf6 l. -ld [%LB-4], %g1 ! load local variable into register g1 -sll %g1, 2, %g2 ! perform the shift-left-by-2 -.TE -.DE -where the output of the \*(Silol\*(So, as well as the immediate operand 2 are used -in the shift instruction. As suggested before, all of this can be -achieved with push-pop optimization. -.NH 3 -A mechanism for push-pop optimization -.PP -To implement the above optimization we need some mechanism to -temporarily store information during compilation. -We need to be able to store, compare and retrieve information from the -temporary storage (cache) without any -loss of information. Before describing all the routines used -to implement our cache we will first describe how the cache works. -.PP -Items in the cache are structures containing an external (\*(Sichar *\*(So), -two registers (\*(Sireg_t\*(So) and a constant (\*(Siarith\*(So), -any of which may be 0. -The value of such a structure is the sum of (the values of) -its elements. To put a register in the cache, one has to be allocated either -by calling \*(Sialloc_reg\*(So which returns a free register, by -\*(Siforced_alloc_reg\*(So which allocates a specific register or any -of the other routines available to allocate a register. The keep things -simple, we will not discuss all of the available primitives here. -When the register -is then put in the cache by the \*(Sipush_reg\*(So routine, the ownership will -be transferred from the user to the cache. Ownership is important, because -only the owner of a register may (and must!) deallocate it. Registers can be -owned by either an (imaginary) register manager, the cache or the user. -When the user retrieves a register from the stack with \*(Sipop_reg\*(So for -instance, ownership is back to the user. -The user should then call \*(Sifree_reg\*(So -to transfer ownership to the register manager or call \*(Sipush_reg\*(So -to give it back to the cache. -Since the cache behaves itself as a stack we will use the term pop resp. push -to get items from, resp. put items in the cache. -.PP -We shall now present the sets of routines that implement the cache. -.IP \(bu -The routines -.DS -\*(Si -reg_t alloc_reg(void) -reg_t alloc_reg_var(void) -reg_t alloc_float(void) -reg_t alloc_float_var(void) -reg_t alloc_double(void) -reg_t alloc_double_var(void) - -void forced_alloc_reg(reg_t) -void soft_alloc_reg(reg_t) - -void free_reg(reg_t) -void free_double_reg(reg_t) -\*(So -.DE -allocate and deallocate registers. If there are no more register left, -i.e. they are owned by the cache, -one or more registers will be freed by flushing part of the cache -onto the real stack. -The \*(Sialloc_xxx_var\*(So primitives try to allocate a register that -can be used to store local variables. (In the current implementation -only the input and local registers.) If none can be found \*(SiNULL\*(So -is returned. \*(Siforced_alloc_reg\*(So forces the allocation of a certain -register. If it was already in use, its contents are moved to another -register. Finally \*(Sisoft_alloc_reg\*(So provides the possibility to -push a register onto the cache and still keep a copy for later use. -(Used to implement the \*(Sidup 4\*(So for example.) -.IP \(bu -The routines -.DS -\*(Si -void push_const(arith) -arith pop_const(void) -\*(So -.DE -push or pop a constant onto or from the stack. Distinction between -constants and other types is made so as not to loose any information; constants -may be used later on as immediate operators, which is not the case -for other types. If \*(Sipop_const\*(So is called, but the element on top of -the cache has either one of the external or register fields non-zero a -fatal error will be reported. -.IP \(bu -The routines -.DS -\*(Si -reg_t pop_reg(void) -reg_t pop_float(void) -reg_t pop_double(void) -reg_t pop_reg_c13(char *n) - -void pop_reg_as(reg_t) - -void push_reg(reg_t) -\*(So -.DE -push or pop a register. These will be used most often since results from one -EM instruction, which are computed in a register, are often used in the next. -When the element on top of the cache is more -than just a register the cache manager -will generate code to compute the sum of its fields and put the result in a -register. This register will then be given to the user. -If the user wants the result is a special register, he should use the -\*(Sipop_reg_as\*(So routine. -The \*(Sipop_reg_c13\*(So gives an optional number (as character string) whose -value can be represented in 13 bits. The constant can then be used as an -offset for the SPARC \*(Sild\*(So and \*(Sist\*(So instructions. -.IP \(bu -The routine -.DS -\*(Si -void push_ext(char *) -\*(So -.DE -pushes an external onto the stack. There is no pop-variant of this one since -there is no use in popping an external. -.IP \(bu -The routines -.DS -\*(Si -void inc_tos(arith n) -void inc_tos_reg(reg_t r) -\*(So -.DE -increment the element on top of the cache by either the constant \*(Sin\*(So -or by a register. The latter is useful for pointer addition when referencing -external memory. -.KS -.IP \(bu -The routine -.DS -\*(Si -int type_of_tos(void) -\*(So -.DE -.KE -returns the type of the element on top of the cache. This is a combination -(binary OR) of \*(SiT_ext\*(So, \*(SiT_reg\*(So or \*(SiT_float\*(So, -\*(SiT_reg2\*(So or \*(SiT_float2\*(So, and \*(SiT_cst\*(So, -and tells the -user which of the three fields are non-zero. When the register-fields -represent \*(Si%g0\*(So, it is considered zero. -.IP \(bu -Miscellaneous routines: -.DS -\*(Si -void init_cache(void) -void cache_need(int) -void change_reg(void) -void flush_cache(void) -\*(So -.DE -\*(Siinit_cache\*(So should be called before any -other cache routines, to initialize some internal datastructures. -\*(Sicache_need\*(So is used to tell the cache that a certain number -of register are needed for the next operation. This way the cache can -load them efficiently in one fell swoop. \*(Sichange_reg\*(So is to be -called when the user changes a register of which the cache (possibly) has -co-ownership. Because the contents of registers in the cache are -not allowed to change the user should call \*(Sichange_reg\*(So to -instruct the cache to copy the contents to some other register. -\*(Siflush_cache\*(So writes the cache to the stack and invalidates -the cache. It should be used before branches, -before labels and on other places where the stack has to be valid (i.e. where -every item on the EM-stack should be stored on the real stack, not in some -virtual cache). -.NH 3 -Implementing push-pop optimization in the EM_table -.PP -As indicated above, there is no regular way to represent the described -optimization in the EM_table. The only possible escapes from the EM_table -are function calls, but that is clearly not enough to implement a good -push-pop optimizer. Therefore we will use a modified version of the EM_table -format, where the description of, say, the \*(Silol\*(So instruction might look -like this\(dg: -.FS -\(dg This is not the way the \*(Silol\*(So actually looks in the EM_table; -it only shows how it \fImight\fR look using the forementioned push/pop -primitives. -.FE -.DS -\*(Si -reg_t A, B; -const_str_t n; - -alloc_reg(A); -push_reg(LB); -inc_tos($1); -B = pop_reg_c13(n); -"ld [$B+$n], $A"; -push_reg(A); -free_reg(B); -\*(So -.DE -For more details about the exact implementation consult -appendix B which contains some characteristic excerpts from the EM_table. -.NH 2 -Stack management -.PP -When converting EM code to some executable code there is the problem of -maintaining multiple stacks. The usual way to do this is described in -.[ [ -Description of a Machine Architecture -.]] -and is shown in figure \*(SN1. -.KE -.PS -copy "pics/EM_stack.orig" -.PE -.ce 1 -\fIFigure \*(SN1: usual stack management. -.KE -.sp -.LP -This means that the EM stack and the hardware stack (used -for subroutine calls, etc.) are interleaved in memory. On the SPARC, however, -this brings up a large problem: in the former model it is assumed that the -resolution of the stack pointer is a word, but this is not the case on the -SPARC processor. On the SPARC processor the stack-pointer as well as the -frame-pointer have to be aligned on 8-byte boundaries, so one can not simply -push a word on the stack and then lower the stack-pointer by 4 bytes! -.NH 3 -Possible solutions -.PP -A simple idea might be to use a swiss-cheese stack; we could -push a 4-byte word onto the stack and then lower the stack by 8. -Unfortunately, this is not a very solid solution, because -pointer-arithmetic involving pointers to objects on the stack would cause -hard-to-predict anomalies. -.PP -Another try would be not to use the hardware stack at all. As long as we -do not generate subroutine-calls everything will be all right. This -approach, however, also has some disadvantages: first we would not be able -to use any of the existing debuggers such as \fIadb\fR, because they all -assume a regular stack format. Secondly, we would not be able to make use -of the SPARC's register windows to keep local variables. Finally, doing all the -administrative work necessary for subroutine calls ourselves instead of -letting the hardware handle it for us, -causes unnecessary procedure-call overhead. -.PP -Yet another alternative would be to emulate the EM-part of the stack, -and to let the hardware handle the subroutine call. Since we will -emulate our own stack, there are no alignment restrictions and because -we will use the hardware procedure call we can still make use of -the register windows. -.NH 3 -Our implementation -.PP -To implement the hybrid stack we need two extra registers: one for the -the EM stack pointer (the forementioned \*(Si%SP\*(So) and one for the -EM local base pointer (\*(Si%LB\*(So). The most elegant solution would be to -put both stacks in different segments, so they would not influence -each other. Unfortunately -.UX -lacks the ability to add segments and -since we will implement our backend under -.UX, -we will have to put -both stacks in the same segment. Exactly how this can be done is shown -in figure \*(SN2. -.DS -.PS -copy "pics/mem_config" -.PE -.ce 1 -\fIFigure \*(SN2: our stack management.\fR -.DE -.sp -During normal procedure execution, the SPARC stack pointer has to point to -a memory location where the operating system can dump the active part of -the register window. The rest of the -register window will be dumped in the therefor pre-allocated (stack) space -by following the frame -pointer. When a signal occurs things get even more complicated and -result in figure \*(SN3. -.DS -.PS -copy "pics/signal_stack" -.PE -.ce 1 -\fIFigure \*(SN3: our signal stack.\fR -.DE -.PP -The exact implementation of the stack is shown in figure \*(SN4. -.KF -.PS -copy "pics/EM_stack.ours" -.PE -.ce 1 -\fIFigure \*(SN4: stack overview.\fR -.KE -.NH 2 -Miscellaneous -.PP -As mentioned in the previous chapter, the generated \fI.o\fR-files are -not compatible with Sun's own object format. The primary reason for -this is that Sun usually passes the first six parameters of a procedure call -through registers. If we were to do that too, we would always have -to fetch the top six words from the stack into registers, even when -the procedure would not have any parameters at all. Apart from this, -structure-passing is another exception in Sun's object format which -makes is impossible to generate object-compatible code.\(dg -.FS -\(dg Exactly how Sun passes structures as parameters is described in -Appendix D of the SPARC Architecture Manual (Software Considerations) -.FE -.bp diff --git a/doc/sparc/5 b/doc/sparc/5 deleted file mode 100644 index 8fde12270..000000000 --- a/doc/sparc/5 +++ /dev/null @@ -1,153 +0,0 @@ -.In -.nr H1 4 -.NH -FUTURE WORK -.NH 2 -A critique of EM -.PP -In general, EM fits its purpose quite well. Numerous compilers have been -written using EM as their intermediate language and it has even become a -commercial product. A great deal of its success is probably due to its -simplicity. There are no extravagant instructions but it does have all the -necessary functions to write a decent compiler. -.PP -There are, however, a few functions that come rather close to being -extravagant. The \*(Silar\*(So function for example \(em used -to fetch an element from an array \(em does not make it much easier -to write a frontend, but does make it unnecessary hard to write an -efficient backend. Other instructions for which it is difficult -to generate efficient code for are those that permit -dynamic operators, such as the \*(Silos\*(So. Dynamic operators, however, provide -significant extra possibilities and can therefore not be disposed of. -Note that even though the array operations \*(Silar\*(So and \*(Sisar\*(So -provide dynamic operators, they do not add additional power, since -they can easily be replaced with a sequence using the \*(Silos\*(So or -\*(Sists\*(So instructions. -.PP -EM code to reference arrays generated by the C frontend can be translated -very efficiently for almost any processor. However the same operation -generated by the Modula-2 frontend (which uses the \*(Silar\*(So), -is much less efficient, although the only difference is that the -latter performs range checking whereas the former does not.\(dg -.FS -\(dg Actually this depends on whether or not explicit range checking in enabled. -This clearly shows that the current code generators are not optimal and -often depend on ad-hoc decisions. -.FE -Since range checking can also be expressed explicitly in -EM (\*(Sirck\*(So) there is no need for any of the array operations -(\*(Siaar\*(So, \*(Silar\*(So and \*(Sisar\*(So). -.PP -Besides efficiency of the array-operations themselves, there still is another -major disadvantage of using these array-operations. In sharp contrast to -all other EM instructions except the \*(Silos\*(So and the \*(Sists\*(So, -they allow dynamic operators, so their effect on the stack-pointer can not -always be -determined at compile-time. This means that efficient caching of the -top-of-stack in registers is almost impossible, -so using these array-operations also effects the -efficiency of the surrounding code. Now that processors are produced with -more and more registers it could be very beneficiary to cache the -top-of-stack, so that the memory/register reference ratio decreases -to the benefit of the overall performance. -.PP -As a final critique, we would also like to discuss the semantics of some of -the EM instructions. In -.[ [ -Description of a Machine Architecture -.]] -it is said that -all signed instructions such as the \*(Siadi\*(So, should cause an exception -on overflow. The unsigned operations such as \*(Siadu\*(So, however, -should act as modulo operations and therefor not perform overflow checking. -Since it is very expensive to perform overflow checking in EM, -we would suggest that the backend takes care of this. For languages which -do not require overflow checking, a simple message could be generated to -disable overflow checking in backends. This way all backends could be -written to fully comply to the official EM definition without any reduction in -efficiency.\(dd -.FS -\(dd Currently many backends do not implement error checks because they -are too expensive and almost never needed. Some frontends even have -facilities build in to generate EM-code to force these checks. If this -trend continues we will end up with a de-facto and a de-jure standard -both developed by the same people but nonetheless incompatible. -.FE -When such messages will be added we would like to suggest -that they can enforce overflow checks on unsigned, as well as signed arithmetic. -.PP -As a conclusion we would like to suggest removal of the array operations from -EM, or at least discontinuation of there usage in frontends. -.NH 2 -\*(OQWanted: Procedure call information\*(CQ -.PP -The advantage of an intermediate language such as EM is that the backend -no longer has to know about any 'quirks' of the 'input'-language. The major -disadvantage, however, is that the backend no longer knows about any 'quirks' -of the 'input'-language... If the SPARC backend ever has to compete -with Sun's own C-compiler for example, removal of the array-operations -will not be enough. The amount of information that is lost during -the translation to EM is too large to ever generate truly efficient SPARC code. -.PP -To write such an efficient backend one needs to know, for example, whether, -when and what type of parameter is being computed, so the result can be stored -in the proper place and scratch registers can be reused. -(On the SPARC processor, for example, it is very beneficiary -to pass the first six parameters of a procedure call through -registers instead of using the stack.) -One way to express such things in EM is to insert extra messages in -the EM-code. The C statement \*(Sia = f(4, a + b);\*(So for example, -could be translated to the following EM-code: -.DS -.TS -; -l1f6 lf6 l. -lol -4 ! a -lol -8 ! b -mes x, 2 ! next instruction will compute 2nd parameter -adi 4 -mes x, 1 ! next instruction will compute 1st parameter -loc 4 -cal _f ! call function f -lfr 4 -stl -4 ! store result in a -.TE -.DE -For a code expander it is important that the \*(Simes\*(So pseudo -instructions appear \fIbefore\fR -the EM instruction that computes the parameter, because that way the final -computation (the \*(Siadi\*(So and \*(Siloc\*(So in the previous example) -can be translated to machine code that performs the required computation -and also puts the result in the required place. If it is found to be -too difficult for the frontend to insert these \*(Simes\*(So instructions -at the right place the peep-hole optimizer might swap the \*(Simes\*(So and -the instruction that computes the parameter. -.PP -For some architectures, it is also -possible to generate more efficient code for a procedure when it is a -so-called leaf-procedure: a procedure that doesn't call other procedures. -On the SPARC, for example, it is not necessary to rotate the register -window for a call to a leaf procedure and it is also possible to use -the global registers for register variables in leaf procedures. -It will be a little harder to insert useful messages about leaf procedures, -because just as with register messages, they are only useful to the -backend when they appear immediately -after or before the \*(Sipro\*(So pseudo instruction. The frontend, -however, only knows whether a certain procedure is a leaf-procedure or not -when it has already generated the entire procedure in EM. Just as with the -\*(Sipro ? / end n\*(So-dilemma the peep-hole optimizer -.[ [ -Using Peephole Optimization -.]] -might be able to lend a hand -and help us out by delaying EM-code generation until it has reached the -end of the procedure. -.PP -As with most optimizations, the main problem is that they have to be -implemented with the \*(Simes\*(So pseudo instruction. -Because the \*(Simes\*(So instruction can have many different meanings -depending on its argument, -it is important that all optimizers recognize and respect them. Addition -of even a single message will require careful inspection of, and maybe even -incorporate small changes to each of the optimizers. -.bp diff --git a/doc/sparc/A b/doc/sparc/A deleted file mode 100644 index 2fc580ab6..000000000 --- a/doc/sparc/A +++ /dev/null @@ -1,184 +0,0 @@ -.In -.SH -A. MEASUREMENTS -.SH -A.1. \*(OQThe bottom line\*(CQ -.PP -Although examples often are most illustrative, the cruel world out there is -usually more interested in everyday performance figures. To satisfy those -people too, we will present a series of measurements on our code expander -taken from (close to) real life situations. These include measurements -of compile and run times of different programs, -compiled with different compilers. -.SH -A.2. Compile time measurements -.PP -Figure A.2.1 shows compile-time measurements for typical C code: -the dhrystone benchmark\(dg -.[ [ -dhrystone -.]]. -.FS -\(dg To be certain that we only tested the compiler and not the quality of -the code in the library, we have added our own version of -\fIstrcmp\fR and \fIstrcpy\fR and have not used the ones present in the -library. -.FE -The numbers represent the duration of each separate pass of the compiler. -The numbers at the end of each bar represent the total duration of the -compilation process. As with all measurements in this chapter, the -quoted time or duration is the sum of user and system time in seconds. -.PS -copy "pics/compile_bars" -.PE -.DS -.IP cem: 6 -C to EM frontend -.IP opt: -EM peep-hole optimizer -.IP be: -EM to assembler backend -.IP cpp: -Sun's C preprocessor -.IP ccom: -Sun's C compiler -.IP iropt: -Sun's optimizer -.IP cg: -Sun's code generator -.IP as: -Sun's assembler -.IP ld: -Sun's linker -.ce 1 -\fIFigure A.2.1: compile-time measurements.\fR -.DE -.sp -.PP -A close examination of the first two bars in fig A.2.1 shows that the maximum -achievable compile-time -gain compared to \fIcc\fR is about 50% for medium-sized -programs.\(dd -.FS -\(dd (cpp+ccom+as+ld)/(cem+as+ld) = 1.53 -.FE -For small programs the gain will be less, due to the almost constant -start-up time of each pass in the compilation process. Only a -built-in assembler may increase this number up to -180% in the ideal case that the optimizer, backend and assembler -would run in zero time. Speed-ups of 5 to 10 times as mentioned in -.[ [ -fast portable compilers -.]] -are therefore not possible on the Sun-4 family. This is also due to -Sun's implementation of saving and restoring register windows. With -the current implementation in which only a single window is saved -or restored on a register-window overflow, it is very time consuming -when programs have highly dynamic stack use -due to procedure calls (as is often the case with compilers). -.PP -Although we are currently a little slower than \fIcc\fR, it is hard to -blame this on our backend. Optimizing the backend so that it would run -twice as fast would only reduce the total compilation process by -a mere 14%. -.PP -Finally it is nice to see that our push/pop-optimization, -initially designed to generate faster code, has also increased the -compilation speed. (see also figures A.4.1 and A.4.2.) -.SH -A.3. Run time performance -.PP -Figure A.3.1 shows the run-time performance of different compilers. -All results are normalized, where the best available compiler (Sun's -compiler with full optimization) is represented by 1.0 on our scale. -.PS -copy "pics/run-time_bars" -.PE -.ce 1 -\fIFigure A.3.1: run time performance.\fR -.sp 1 -.PP -The fact that our compiler behaves rather poorly compared to Sun's -compiler is due to the fact that the dhrystone benchmark uses -relatively many subroutine calls; all of which have to be 'emulated' -by our backend. -.SH -A.4. Overall performance -.LP -In the next two figures we will show the combined run and compile time -performance of 'our' compiler (the ACK C frontend and our backend) -compared to Sun's C compiler. Figure A.4.1 shows the results from -measurements on the dhrystone benchmark. -.G1 -frame invis left solid bot solid -label left "run time" "(in \(*msec/dhrystone)" -label bot "compile time (in sec)" -coord x 0,21 y 0,610 -ticks left out from 0 to 600 by 200 -ticks bot out from 0 to 20 by 5 -"\(bu" at 3.5, 1000000/1700 -"ack w/o opt" ljust at 3.5 + 1, 1000000/1700 -"\(bu" at 2.8, 1000000/8770 -"ack with opt" below at 2.8 + 0.1, 1000000/8770 -"\(bu" at 16.0, 1000000/10434 -"ack -O4" above at 16.0, 1000000/10434 -"\(bu" at 2.3, 1000000/7270 -"\fIcc\fR" above at 2.3, 1000000/7270 -"\(bu" at 9.0, 1000000/12500 -"\fIcc -O4\fR" above at 9.0, 1000000/12500 -"\(bu" at 5.9, 1000000/15250 -"\fIcc -O\fR" below at 5.9, 1000000/15250 -.G2 -.ce 1 -\fIFigure A.4.1: overall performance on dhrystones. -.sp 1 -.LP -Fortunately for us, dhrystones are not all there is. The following -figure shows the same measurements as the previous one, except -this time we took a benchmark that uses no subroutines: an implementation -of Eratosthenes' sieve: -.G1 -frame invis left solid bot solid -label left "run time" "for one run" "(in sec)" left .6 -label bot "compile time (in sec)" -coord x 0,11 y 0,21 -ticks bot out from 0 to 10 by 5 -ticks left out from 0 to 20 by 5 -"\(bu" at 2.5, 17.28 -"ack w/o opt" above at 2.5, 17.28 -"\(bu" at 1.6, 2.93 -"ack with opt" above at 1.6, 2.93 -"\(bu" at 9.4, 2.26 -"ack -O4" above at 9.4, 2.26 -"\(bu" at 1.5, 7.43 -"\fIcc\fR" above at 1.5, 7.43 -"\(bu" at 2.7, 2.02 -"\fIcc -O4\fR" ljust at 1.9, 1.2 -"\(bu" at 2.6, 2.10 -"\fIcc -O\fR" ljust at 3.1,2.5 -.G2 -.ce 1 -\fIFigure A.4.2: overall performance on Eratosthenes' sieve. -.sp 1 -.PP -Although the above figures speak for themselves, a small comment -may be in place. At first it is clear that our compiler is neither -faster than \fIcc\fR, nor produces faster code than \fIcc -O4\fR. It should -also be noted however, that we do produce better code than \fIcc\fR -at only a very small additional cost. -It is also worth noticing that push-pop optimization -increases run-time speed as well as compile speed. -The first seems rather obvious, -since optimized code is -faster code, but the increase in compile speed may come as a surprise. -The main reason is that the \fIas\fR+\fIld\fR time depends largely on the -amount of generated code, which in general -depends on the efficiency of the code. -Push-pop optimization removes a lot of useless instructions which -would otherwise -have found their way through to the assembler and the loader. -Useless instructions inserted in an early stage in the compilation -process will slow down every following stage, so elimination of useless -instructions in an early stage, even when it requires a little computational -overhead, can often be beneficial to the overall compilation speed. -.bp diff --git a/doc/sparc/B b/doc/sparc/B deleted file mode 100644 index 04fdadcd6..000000000 --- a/doc/sparc/B +++ /dev/null @@ -1,128 +0,0 @@ -.In -.SH -B. IMPLEMENTATION -.SH -B.1. Excerpts from the non-optimized EM_table -.PP -Even though the non-optimized version of the EM_table is relatively -straight-forward, examples have never hurt anybody. -One of the simplest instructions is the \*(Siloc\*(So, which appears in -our EM_table as follows: -.DS -\f6 -.TA 8 16 24 32 40 48 56 64 -C_loc ==> "set $1, T1"; - "dec 4, SP"; - "st T1, [SP]". -\f1 -.DE -Just as \*(SiSP\*(So is an alias for \*(Si%l0\*(So, \*(SiT1\*(So is -an alias for \*(Si%g1\*(So. -A little more complex is the \*(Siadi\*(So which performs integer -addition. -.DS -\f6 -C_adi ==> "ld [SP], T1"; - "ld [SP+4], T2"; - "add T1, T2, T3"; - "st T3, [SP+4]; - "inc 4, SP". -\f1 -.DE -We could go on with even more complex instructions, but since that would -not contribute to anything the reader is referred to the implementation -for more details. -.SH -B.2. Excerpts from the optimized EM_table -.PP -The optimized EM_table uses the cache primitives mentioned in chapter 4. -This means that the \*(Siloc\*(So this time appears as -.DS -\f6 -C_loc ==> push_const($1). -\f1 -.DE -The \*(Silol\*(So can now be written as -.DS -\f6 -C_lol ==> push_reg(LB); - inc_tos($1); - push_const(4); - C_los(4). -\f1 -.DE -Due to the law of conservation of misery somebody has to do the dirty work. -In this case, it is the \*(Silos\*(So. To show just a small part of -the implementation of the \*(Silos\*(So: -.DS -\f6 -C_los $1 == 4 ==> - if (type_of_tos() == T_cst) { - arith size; - const_str_t n; - - size= pop_const(); - if (size <= 4) { - reg_t a; - reg_t a; - char *LD; - - switch (size) { - case 1: LD = "ldub"; break; - case 2: LD = "lduh"; break; - case 4: LD = "ld"; break; - default: arg_error("C_los", size); - } - a = pop_reg_c13(n); - b = alloc_reg(); - "$LD [$a+$n], $b"; - push_reg(b); - free_reg(a); - } else ... -\f1 -.DE -For the full implementation, the reader is again referred to the actual -implementation. Just to show how other instructions are affected -by the optimization we will show that implementation of the \*(Sitge\*(So -instruction: -.DS -\f6 -C_tge ==> { - reg_t a; - reg_t b; - - a = pop_reg(); - b = alloc_reg(); - " tst $a"; - " bge,a 1f"; - " mov 1, $b"; /* delay slot */ - " set 0, $b"; - "1:"; - free_reg(a); - push_reg(b); - }. - -\f1 -.DE -.SH -.bp -CREDITS -.PP -In order of appearance: -.TS -center; -r c l. -Original idea - Dick Grune -Design & implementation - Philip Homburg - - Raymond Michiels -Tutor - Dick Grune -Assistant Tutor - Ceriel Jacobs -Proofreading - Dick Grune - - Hans van Eck -.TE -.SH -REFERENCES -.PP -.[ -$LIST$ -.] diff --git a/doc/sparc/Makefile b/doc/sparc/Makefile deleted file mode 100644 index 6bff68167..000000000 --- a/doc/sparc/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# $Header$ - -REFER=refer -TBL=tbl -TARGET=-Tlp -PIC=pic -GRAP=grap - -../sparc.doc: refs title intro 1 2 3 4 5 A B init - $(REFER) -sA+T '-l\", ' -p refs title intro 1 2 3 4 5 A B | $(GRAP) | $(PIC) | $(TBL) | soelim > $@ diff --git a/doc/sparc/init b/doc/sparc/init deleted file mode 100644 index ead703420..000000000 --- a/doc/sparc/init +++ /dev/null @@ -1,20 +0,0 @@ -.de In -.nr PS 12 -.nr VS 14 -.\" .fp 6 AM -.fp 6 CW -.ds Si \f6\s-1 -.ds So \f1\s+1 -.ds OQ `\h'-1p'` -.ds CQ '\h'-1p'' -.. -.de UX -.ie \\n(UX \s-1UNIX\s0\\$1 -.el \{\ -\s-1UNIX\s0\\$1\(dg -.FS -\(dg \s-1UNIX\s0 is a registered bell of AT&T Trademark Laboratories. -.FE -.nr UX 1 -.\} -.. diff --git a/doc/sparc/intro b/doc/sparc/intro deleted file mode 100644 index b8e21300c..000000000 --- a/doc/sparc/intro +++ /dev/null @@ -1,23 +0,0 @@ -.In -.hw de-vised -.TL -A fast backend for SPARC processors -.AU -Philip Homburg -Raymond Michiels -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.AB -The language EM is an intermediate language for use in compiler -construction. -In this paper we describe the construction of a so-called fast backend -which translates EM code to assembler for SPARC processors. -.br -Our construction deviates strongly from the usual procedure. We have -devised and implemented a virtual stack with which it is possible to -generate very acceptable code without much loss in compile time. -.AE -.PP -.bp diff --git a/doc/sparc/note_on_reg_wins b/doc/sparc/note_on_reg_wins deleted file mode 100644 index c2927c1e1..000000000 --- a/doc/sparc/note_on_reg_wins +++ /dev/null @@ -1,58 +0,0 @@ -When developing a fast compiler for the Sun-4 series we have encountered -rather strange behavior of the Sun kernel. - -The problem is that with lots of nested procedure calls, (as -is often the case in compilers and parsers) the registers fill up which -causes a kernel trap. The kernel will then write out some of the registers -to memory to make room for another window. When returning from the nested -procedure call, just the reverse happens: yet another kernel trap so the -kernel can load the register from memory. - -Unfortunately the kernel only saves or loads a single window (= 16 register) -on each trap. This means that when calling a procedure recursively it causes -a kernel trap on almost every invocation (except for the first few). - -To illustrate this consider the following little program: - ---------------- little program ------------- -f(i) /* calls itself i times */ -int i; -{ - if (i) - f(i-1); -} - -main(argc, argv) -int argc; -char *argv[]; -{ - - - i = atoi(argv[1]); /* # loops */ - j = atoi(argv[2]); /* depth */ - - while (i--) - f(j); -} ------------- end of little program ----------- - - -The performance decreases abruptly when the depth (j) becomes larger -than 5. On a SPARC station we got the following results: - - depth run time (in seconds) - - 1 0.5 - 2 0.8 - 3 1.0 - 4 1.4 <- from here on it's +6 seconds for each - 5 7.6 step deeper. - 6 13.9 - 7 19.9 - 8 26.3 - 9 32.9 - -Things would be a lot better when instead of just 1, the kernel would -save or restore 4 windows (= 64 registers = 50% on our SPARC stations). - - -Raymond. diff --git a/doc/sparc/pics/.distr b/doc/sparc/pics/.distr deleted file mode 100644 index 32d6efca8..000000000 --- a/doc/sparc/pics/.distr +++ /dev/null @@ -1,12 +0,0 @@ -EM_stack.orig -EM_stack.ours -compile_bars -mem_config -perf -perf.comp -perf.d -perf.dhry -reg_layout -run-time_bars -run-time_bars.bup -signal_stack diff --git a/doc/sparc/pics/EM_stack.orig b/doc/sparc/pics/EM_stack.orig deleted file mode 100644 index 7cae3f3b7..000000000 --- a/doc/sparc/pics/EM_stack.orig +++ /dev/null @@ -1,34 +0,0 @@ -.PS -.ps -2 -.vs -2 -boxwid = 1.5; -boxht = 0.24 -down; -box "actual parameter n-1"; -box "." "." "." ht 0.6; -box "actual parameter 0"; -move 0.3 -box "return status block"; -{arrow <- right with .w at last box.e; \ -box invis wid 0.3 "LB" } -down -move to 2nd last box.s -move 0.1 -box "local variables" -box "compiler temporaries" -move 0.1 -box "register save block" -move 0.1 -box "dynamic local generators" -move 0.1 -box "operand" -box "operand" -move 0.1 -box "parameter m-1" -box "." "." "." ht 0.6; -box "parameter 0" with .n at last box .s -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "SP" } -.ps +2 -.vs +2 -.PE diff --git a/doc/sparc/pics/EM_stack.ours b/doc/sparc/pics/EM_stack.ours deleted file mode 100644 index 260f2c66f..000000000 --- a/doc/sparc/pics/EM_stack.ours +++ /dev/null @@ -1,106 +0,0 @@ -.ps 10 -.vs 12 -.PS -boxwid = 1.3 -boxht = 0.25 -down; -box "floating point" "register dump area" ht 0.6 -box "tmp float store" -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%fp" } -move .1 -box dotted "gap" -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%LB" } -move .1 -box "locals" -box "actual parameter n-1"; -box "." "." "." ht 0.6; -box "actual parameter 0"; -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%SP" } -move 0.1 -box "large gap" "(>64kb)" ht 1.0 -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%sp" } -move 0.2 -box invis "\\s+2just before call\\s0" -move 1 -box dotted "gap" -box invis "0 or 4 bytes" "for stack alignment" with .w at last box.e -box invis height .7 "when gap is 0 bytes," "%fp == %LB" with .n at 2nd last box.s -.PF -.PS -down; -move to 2.4,0 -box "floating point" "register dump area" ht 0.6 -box "tmp float store" -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%fp" } -move .1 -box dotted "gap" -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%LB" } -move .1 -box "locals" -box "actual parameter n-1"; -box "." "." "." ht 0.6; -box "actual parameter 0"; -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%SP" } -move .1 -box dotted "gap" -move .4 -box "floating point" "register dump area" ht 0.6 -box "tmp float store" -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%sp" } -move 0.2 -box invis "\\s+2'during' call\\s0" -.PF -.PS -down; -move to 4.8,0 -box "floating point" "register dump area" ht 0.6 -box "tmp float store" -box "register dump area" ht 0.6 -move .1 -box dotted "gap" -move .1 -box "locals" -box "actual parameter n-1"; -box "." "." "." ht 0.6; -box "actual parameter 0"; -move .1 -box dotted "gap" -move .4 -box "floating point" "register dump area" ht 0.6 -box "tmp float store" -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%fp" } -move .1 -box dotted "gap" -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%LB" } -move .1 -box "locals" -box "actual parameter n-1"; -box "." "." "." ht 0.6; -box "actual parameter 0"; -{ arrow <- right with .w at last box.e; \ -box invis wid 0.3 "%SP" } -move 0.1 -box "large gap" "(>64kb)" ht 1.0 -box "register dump area" ht 0.6 -{ arrow <- right with .w at 3/4 ; \ -box invis wid 0.3 "%sp" } -move 0.2 -box invis "\\s+2after call\\s0" -.PF -.ps 12 -.vs 14 diff --git a/doc/sparc/pics/compile_bars b/doc/sparc/pics/compile_bars deleted file mode 100644 index 657a41812..000000000 --- a/doc/sparc/pics/compile_bars +++ /dev/null @@ -1,49 +0,0 @@ -.PS -boxht = 0.5 -boxwid = 1 -moveht = 0.65 -down; -{ -right; -box invis "ACK" "w/o" "opt" -box "cem" "0.7" wid 0.7 -box "opt" "0.4" wid 0.4 -box "be" "1.1" wid 1.1 -box "as" "1.4" wid 1.4 -box "ld" "0.4" wid 0.4 -box invis "4.0" wid 0.5 -} -move -{ -right; -box invis "ACK" "with" "opt" -box "cem" "0.7" wid 0.7 -box "opt" "0.4" wid 0.4 -box "be" "0.6" wid 0.6 -box "as" "0.7" wid 0.7 -box "ld" "0.4" wid 0.4 -box invis "2.8" wid 0.5 -} -move -{ -right; -box invis "\fIcc\fR" -box "cpp" "0.2" wid 0.2 -box "ccom" "1.0" wid 1.0 -box "as" "0.7" wid 0.7 -box "ld" "0.4" wid 0.4 -box invis "2.3" wid 0.5 -} -move -{ -right; -box invis "\fIcc -O4\fR" -box "cpp" "0.2" wid 0.2 -box "ccom" "1.0" wid 1.0 -box "iropt" "5.0 (not to scale!)" wid 1.5 -box "cg" "0.7" wid 0.7 -box "as" "1.7" wid 1.7 -box "ld" "0.4" wid 0.4 -box invis "9.0" wid 0.5 -} -.PE diff --git a/doc/sparc/pics/mem_config b/doc/sparc/pics/mem_config deleted file mode 100644 index 0ad88184f..000000000 --- a/doc/sparc/pics/mem_config +++ /dev/null @@ -1,34 +0,0 @@ -.PS -boxwid = 1.3 -down -[ -right -[ -down; -box "stack" ht .6 -box "free" ht 1 -box "heap" ht .3 -box "text" ht .5 -] -move 1 -[ -down; -box "\s-4SPARC stack\s+4" ht .2 -box "\s-4EM stack\s+4" ht .1 -box "\s-4SPARC stack\s+4" ht .1 -box "\s-4EM stack\s+4" ht .1 -box "\s-4free\s+4" ht .2 -box "\s-4SPARC stack\s+4" ht .1 -box "free" ht .8 -box "heap" ht .3 -box "text" ht .5 -] -] -move .3 -[ -right -box invis "regular \(UX memory layout" -move 1 -box invis "memory layout for EM" -] -.PF diff --git a/doc/sparc/pics/perf b/doc/sparc/pics/perf deleted file mode 100644 index a48965ea0..000000000 --- a/doc/sparc/pics/perf +++ /dev/null @@ -1,12 +0,0 @@ -.G1 -frame invis left solid bot solid -label left "run time" "(log scale)" left .5 -label bot "compile time (log scale)" -coord x 0.1,10 log x y 1000,20000 log y -ticks left out at 2000,5000,10000,20000 -ticks bot out at 0.1 0.3 1.0 3.0 10 -copy "perf.d" thru X - "\(bu" at $1, $2 - "$3" rjust at $1, $2 -X -.G2 diff --git a/doc/sparc/pics/perf.comp b/doc/sparc/pics/perf.comp deleted file mode 100644 index 761fd0671..000000000 --- a/doc/sparc/pics/perf.comp +++ /dev/null @@ -1,7 +0,0 @@ -in-line in ../A - -2.5 17.28 ack w/o opt -1.6 2.93 ack with opt -9.4 2.26 ack -O4 -1.5 7.43 \fIcc\fR -2.7 2.02 \fIcc -O4\fR diff --git a/doc/sparc/pics/perf.d b/doc/sparc/pics/perf.d deleted file mode 100644 index 9cf4081e8..000000000 --- a/doc/sparc/pics/perf.d +++ /dev/null @@ -1,4 +0,0 @@ -1.0 1700 ack w/o opt -1.9 8000 ack with opt -1.6 8000 \fIcc\fR -7 18000 \fIcc -O4\fR diff --git a/doc/sparc/pics/perf.dhry b/doc/sparc/pics/perf.dhry deleted file mode 100644 index 8faa4e332..000000000 --- a/doc/sparc/pics/perf.dhry +++ /dev/null @@ -1,7 +0,0 @@ -in-line in ../A - -3.5 1700 ack w/o opt -2.8 8770 ack with opt -16.0 10434 ack -O4 -2.3 7270 \fIcc\fR -9.0 12500 \fIcc -O4\fR diff --git a/doc/sparc/pics/reg_layout b/doc/sparc/pics/reg_layout deleted file mode 100644 index 58ddd92e1..000000000 --- a/doc/sparc/pics/reg_layout +++ /dev/null @@ -1,24 +0,0 @@ -.nr PS 12 -.nr VS 14 -.PP -.TS -allbox; -l l l l -l2f6 l l2f6 l. -g0 0 l0 EM_SP -g1 temporary 1 l1 EM_LB -g2 temporary 2 l2 -g3 temporary 3 l3 reserved -g4 64k..1M l4 reserved -g5 temporary 4 l5 reserved -g6 line number l6 reserved -g7 file name l7 reserved -o0 param 1 i0 -o1 param 2 i1 -o2 param 3 i2 -o3 param 4 i3 -o4 RETL_LD i4 RETL_ST -o5 RETH_LD i5 RETH_ST -sp stack pointer fp frame pointer -o7 xxx i7 return address -.TE diff --git a/doc/sparc/pics/run-time_bars b/doc/sparc/pics/run-time_bars deleted file mode 100644 index cf4d29f93..000000000 --- a/doc/sparc/pics/run-time_bars +++ /dev/null @@ -1,101 +0,0 @@ -.PS -boxht = 0.5 -boxwid = 1 -moveht = 1 -down; -{ -right; -box invis "ACK" "w/o" "opt." -move -[ -down; -boxht = 0.25 -box wid 4.5 -"Sieve" ljust at last box.w + 0.1,-0.02 -"10(!)" ljust at last box.e + 0.1,-0.02 -box wid 4.5 with .nw at last box.sw -"Dhrystones" ljust at last box.w + 0.1,-0.02 -"10(!)" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "ACK" "with" "our" "opt." -move -[ -down; -boxht = 0.25 -box wid 1.4 -"Sieve" ljust at last box.w + 0.1,-0.02 -"1.4" ljust at last box.e + 0.1,-0.02 -box wid 1.9 with .nw at last box.sw -"Dhrystones" ljust at last box.w + 0.1,-0.02 -"1.9" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "ACK" "-O4" -move -[ -down; -boxht = 0.25 -box wid 1.1 -"Sieve" ljust at last box.w + 0.1,-0.02 -"1.1" ljust at last box.e + 0.1,-0.02 -box wid 1.6 with .nw at last box.sw -"Dhrystones" ljust at last box.w + 0.1,-0.02 -"1.6" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "Sun's" "compiler" "w/o opt." -move -[ -down; -boxht = 0.25 -box wid 3.7 -"Sieve" ljust at last box.w + 0.1,-0.02 -"3.7" ljust at last box.e + 0.1,-0.02 -box wid 2.2 with .nw at last box.sw -"Dhrystones" ljust at last box.w + 0.1,-0.02 -"2.2" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "Sun's" "compiler" "-O" -move -[ -down; -boxht = 0.25 -box wid 1.1 -"Sieve" ljust at last box.w + 0.1,-0.02 -"1.1" ljust at last box.e + 0.1,-0.02 -box wid 0.8 with .nw at last box.sw -"Dhryst." ljust at last box.w + 0.1,-0.02 -"0.8!" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "Sun's" "compiler" "-O4" -move -[ -down; -boxht = 0.25 -box wid 1.0 -"Sieve" ljust at last box.w + 0.1,-0.02 -"1.0" ljust at last box.e + 0.1,-0.02 -box wid 1.0 with .nw at last box.sw -"Dhrystones" ljust at last box.w + 0.1,-0.02 -"1.0" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -.PE diff --git a/doc/sparc/pics/run-time_bars.bup b/doc/sparc/pics/run-time_bars.bup deleted file mode 100644 index 6bb014dad..000000000 --- a/doc/sparc/pics/run-time_bars.bup +++ /dev/null @@ -1,100 +0,0 @@ -.PS -boxht = 0.5 -boxwid = 1 -moveht = 1 -down; -{ -right; -box invis "ACK" "w/o" "opt" -move -[ -down; -boxht = 0.25 -box wid 4.5 -"C (arithmetic)" ljust at last box.w + 0.1,-0.02 -"10(!)" ljust at last box.e + 0.1,-0.02 -box wid 4.5 with .nw at last box.sw -"C (dhrystones)" ljust at last box.w + 0.1,-0.02 -"10(!)" ljust at last box.e + 0.1,-0.02 -box wid 4.5 with .nw at last box.sw -"Modula-2" ljust at last box.w + 0.1,-0.02 -"8(!)" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "ACK" "with" "peep-hole" "opt" -move -[ -down; -boxht = 0.25 -box wid 1.4 -"C (arithmetic)" ljust at last box.w + 0.1,-0.02 -"1.4" ljust at last box.e + 0.1,-0.02 -box wid 1.9 with .nw at last box.sw -"C (dhrystones)" ljust at last box.w + 0.1,-0.02 -"1.9" ljust at last box.e + 0.1,-0.02 -box wid 2.5 with .nw at last box.sw -"Modula-2" ljust at last box.w + 0.1,-0.02 -"2.5" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "ACK" "-O4" -move -[ -down; -boxht = 0.25 -box wid 1.1 -"C (arithmetic)" ljust at last box.w + 0.1,-0.02 -"1.1" ljust at last box.e + 0.1,-0.02 -box wid 1.6 with .nw at last box.sw -"C (dhrystones)" ljust at last box.w + 0.1,-0.02 -"1.6" ljust at last box.e + 0.1,-0.02 -box wid 2.5 with .nw at last box.sw -"Modula-2" ljust at last box.w + 0.1,-0.02 -"2.5" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "Sun's" "compiler" "w/o opt." -move -[ -down; -boxht = 0.25 -box wid 3.7 -"C (arithmetic)" ljust at last box.w + 0.1,-0.02 -"3.7" ljust at last box.e + 0.1,-0.02 -box wid 2.2 with .nw at last box.sw -"C (dhrystones)" ljust at last box.w + 0.1,-0.02 -"2.2" ljust at last box.e + 0.1,-0.02 -box wid 1.8 with .nw at last box.sw -"Modula-2" ljust at last box.w + 0.1,-0.02 -"1.8" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -move -{ -right; -box invis "Sun's" "compiler" "-O4" -move -[ -down; -boxht = 0.25 -box wid 1.0 -"C (arith.)" ljust at last box.w + 0.1,-0.02 -"1.0" ljust at last box.e + 0.1,-0.02 -box wid 1.0 with .nw at last box.sw -"C (dhryst.)" ljust at last box.w + 0.1,-0.02 -"1.0" ljust at last box.e + 0.1,-0.02 -box wid 1.0 with .nw at last box.sw -"Modula-2" ljust at last box.w + 0.1,-0.02 -"1.0" ljust at last box.e + 0.1,-0.02 -] with .w at last box.e -} -.PE diff --git a/doc/sparc/pics/signal_stack b/doc/sparc/pics/signal_stack deleted file mode 100644 index 6afe5ad76..000000000 --- a/doc/sparc/pics/signal_stack +++ /dev/null @@ -1,42 +0,0 @@ -.PS -boxwid = 1.3 -down -[ -right -[ -down; -box "\s-4SPARC stack\s+4" ht .2 -box "\s-4EM stack\s+4" ht .1 -box "\s-4SPARC stack\s+4" ht .1 -box "\s-4EM stack\s+4" ht .1 -box "\s-4free\s+4" ht .2 -box "\s-4SPARC stack\s+4" ht .1 -box "free" ht .8 -box "heap" ht .3 -box "text" ht .5 -] -move 1 -[ -down; -box "\s-4SPARC stack\s+4" ht .2 -box "\s-4EM stack\s+4" ht .1 -box "\s-4SPARC stack\s+4" ht .1 -box "\s-4EM stack\s+4" ht .1 -box "\s-4free\s+4" ht .2 -box "\s-4SPARC stack\s+4" ht .1 -box "\s-4EM stack\s+4" ht .1 -box "\s-4free\s+4" ht .2 -box "\s-4SPARC stack\s+4" ht .1 -box "free" ht .4 -box "heap" ht .3 -box "text" ht .5 -] -] -move .3 -[ -right -box invis "before signal" -move 1 -box invis "during (1st) signal" -] -.PF diff --git a/doc/sparc/printP4P b/doc/sparc/printP4P deleted file mode 100644 index e871c2534..000000000 --- a/doc/sparc/printP4P +++ /dev/null @@ -1,31 +0,0 @@ -echo $0 -case $1 in -1 ) - CMD="cat" -;; -2 ) - CMD="cat" -;; -3 ) - CMD="cat" -;; -4 ) - CMD="pic | tbl" -;; -5 ) - CMD="tbl" -;; -A ) - CMD="grap | pic" -;; -B ) - CMD="tbl" -;; -esac -echo $0 -if [ $0 = printP4P ] -then - refer -sA+T '-l\", ' -p refs $1 | eval $CMD | troff -ms -Tp4p | dip -Tp4p -Pp4p -else - xtroff -full -geom 665x883+566+0 -command "refer -sA+T '-l\", ' -p refs $1 | $CMD | troff -ms -Tp4p" -fi diff --git a/doc/sparc/proto.make b/doc/sparc/proto.make deleted file mode 100644 index bd3f5834a..000000000 --- a/doc/sparc/proto.make +++ /dev/null @@ -1,32 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/doc/sparc -TARGET = $(TARGET_HOME)/doc/sparc.doc - -REFER=refer -TBL=tbl -PIC=pic -GRAP=grap - -SRC = $(SRC_DIR)/refs \ - $(SRC_DIR)/init \ - $(SRC_DIR)/title \ - $(SRC_DIR)/intro \ - $(SRC_DIR)/1 \ - $(SRC_DIR)/2 \ - $(SRC_DIR)/3 \ - 4 \ - $(SRC_DIR)/5 \ - A \ - $(SRC_DIR)/B - -$(TARGET): $(SRC) - $(REFER) -sA+T '-l\", ' -p $(SRC) | $(GRAP) | $(PIC) | $(TBL) > $@ - -4: $(SRC_DIR)/4 - sed 's^pics/^$(SRC_DIR)/pics/^' < $(SRC_DIR)/4 > 4 - -A: $(SRC_DIR)/A - sed 's^pics/^$(SRC_DIR)/pics/^' < $(SRC_DIR)/A > A diff --git a/doc/sparc/refs b/doc/sparc/refs deleted file mode 100644 index ba46c3b47..000000000 --- a/doc/sparc/refs +++ /dev/null @@ -1,185 +0,0 @@ -%T The design of very fast portable compilers -%A A.S. Tanenbaum -%A M.F. Kaashoek -%A K.G. Langendoen -%A C.J.H. Jacobs -%J SIGPLAN Notices -%V 24 -%N 11 -%P 125-131 -%D November 1989 - -%T A Programmer-friendly LL(1) Parser Generator -%A D. Grune -%A C.J.H. Jacobs -%J Software \- Practice and Experience -%V 18 -%N 1 -%P 29-38 -%D January 1988 - -%T The Code Expander Generator -%A Frans Kaashoek -%A Koen Langendoen -%R IM-9 -%I Vrije Universiteit, Amsterdam -%D November 1987 - -%T The ACK Pascal Compiler -%A Aad Geudeke -%A Frans Hofmeester -%R IM-8 -%I Vrije Universiteit, Amsterdam -%D November 1987 - -%T The EM-interpreter -%A Eddo de Groot -%A Leo van den Berge -%R IM-7 -%I Vrije Universiteit, Amsterdam -%D June 1987 - -%T A set of multi\-process primitives for stack based machines -%A K. Bot -%A E. Scheffer -%R IR-122 -%I Vrije Universiteit, Amsterdam -%D December 1986 - -%T An Occam Compiler -%A K. Bot -%A E. Scheffer -%R IM-6 -%I Vrije Universiteit, Amsterdam -%D December 1986 - -%T Language- and Machine-independent Global Optimization on Intermediate Code -%A H.E. Bal -%A A.S. Tanenbaum -%J Computer Languages -%V 11 -%N 2 -%P 105-121 -%D April 1986 - -%T The ACK Target Optimizer -%A H.E. Bal -%R IR-107 -%D 1985 -%I Vrije Universiteit, Amsterdam - -%T Some Topics in Parser Generation -%A C.J.H. Jacobs -%R IR-105 -%D October 1985 -%I Vrije Universiteit, Amsterdam - -%T The CEM compiler -%A E.H. Baalbergen -%A D. Grune -%A M. Waage -%R IM-4 -%I Vrije Universiteit, Amsterdam -%D 1985 - -%T The Design and Implementation of the EM Global Optimizer -%A H.E. Bal -%I Vrije Universiteit, Amsterdam -%R IR-99 -%D March 1985 - -%T Does anybody out there want to write HALF of a compiler? -%A A.S. Tanenbaum -%A E.G. Keizer -%A H. van Staveren -%J Sigplan Notices -%V 19 -%N 8 -%P 106-108 -%D August 1984 - -%T Amsterdam Compiler Kit documentation -%A A.S. Tanenbaum et. al. -%I Vrije Universiteit, Amsterdam -%R IR-90 -%D June 1984 - -%T A Practical Toolkit for Making Portable Compilers -%A A. S. Tanenbaum -%A H. van Staveren -%A E. G. Keizer -%A J. W. Stevenson -%J Communications of the ACM -%V 26 -%N 9 -%P 654-660 -%D September 1983 - -%T Description of a Machine Architecture for use with Block Structured -Languages -%A A. S. Tanenbaum -%A H. van Staveren -%A E. G. Keizer -%A J. W. Stevenson -%R IR-81 -%D August 1983 -%I Vrije Universiteit, Amsterdam - -%T A Unix Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A H. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J Proceedings USENIX conf. -%C Toronto, Canada -%V 26 -%D July 1983 -%P 255-261 - -%T Using Peephole Optimization on Intermediate Code -%A A.S. Tanenbaum -%A J.M. van Staveren -%A J.W. Stevenson -%J TOPLAS -%V 4 -%N 1 -%P 21-36 -%D January 1982 - -%T EM-1 Compiler -%A A.S. Tanenbaum -%J Pascal News -%D September 1981 -%P 4-38 - -%T A portable compiler for the Proposed ISO Standard Pascal Language -%A A.S. Tanenbaum -%A J.W. Stevenson -%A H. van Staveren -%J Sigplan Notices -%V 15 -%N 10 -%D 1980 - -%T Implications of Structured Programming for Machine Architecture -%A A.S. Tanenbaum -%J CACM -%V 21 -%N 3 -%P 237-246 -%D March 1978 - -%T The table driven code generator from the Amsterdam Compiler Kit (Second -revised edition) -%A H. van Staveren -%I Vrije Universiteit, Amsterdam -%R on-line internal ACK documentation -%D early 1985 - -%T Dhrystone Benchmark: Rationale for Version 2 and Measurement Rules -%A R.P. Weicker -%J Sigplan Notices -%V 23 -%N 8 -%D august 1988 -%P 49-62 diff --git a/doc/sparc/timing b/doc/sparc/timing deleted file mode 100644 index 9887db71b..000000000 --- a/doc/sparc/timing +++ /dev/null @@ -1,22 +0,0 @@ - DHRYSTONES V2.0 - - cc cc -O4 cc -O fccO fccCE ack ack -O4 -compile time: - real 4.0 12.0 10.0 6.4 8.0 31.0 - user 1.6 7.3 4.1 1.9 1.8 2.0 9.3 - sys 0.9 2.1 1.8 2.5 1.5 2.0 7.7 - -run time: 7263 16250 15250 4730 3430 8474 10434 -(stones/sec) - - SIEVE - - cc cc -O4 fccO fccCE ack ack -O4 -compile time: - real 2.4 4.4 x 3.3 6.4 17.0 - user 0.8 1.6 x 0.7 0.7 3.2 - sys 0.7 1.0 x 0.8 1.3 6.2 - -run time: 7.43 2.02 x 12.18 2.93 2.26 - -All ack-derived compilers are shell script driven diff --git a/doc/sparc/title b/doc/sparc/title deleted file mode 100644 index f073f45ec..000000000 --- a/doc/sparc/title +++ /dev/null @@ -1,15 +0,0 @@ -.In -.TL -.sp 1.2c -A fast backend for SPARC processors -.AU -Philip Homburg -Raymond Michiels -.AI -Dept. of Mathematics and Computer Science -Vrije Universiteit -Amsterdam, The Netherlands -.PP -.sp 1i -Afstudeerverslag, 20 augustus 1990 -.bp diff --git a/doc/toolkit.doc b/doc/toolkit.doc deleted file mode 100644 index 913594a2b..000000000 --- a/doc/toolkit.doc +++ /dev/null @@ -1,896 +0,0 @@ -.\" $Id$ -.RP -.ND July 1984 -.tr ~ -.ds as * -.TL -A Practical Tool Kit for Making Portable Compilers -.AU -Andrew S. Tanenbaum -Hans van Staveren -E. G. Keizer -Johan W. Stevenson -.AI -Mathematics Dept. -Vrije Universiteit -Amsterdam, The Netherlands -.AB -The Amsterdam Compiler Kit is an integrated collection of programs designed to -simplify the task of producing portable (cross) compilers and interpreters. -For each language to be compiled, a program (called a front end) -must be written to -translate the source program into a common intermediate code. -This intermediate code can be optimized and then either directly interpreted -or translated to the assembly language of the desired target machine. -The paper describes the various pieces of the tool kit in some detail, as well -as discussing the overall strategy. -.sp -Keywords: Compiler, Interpreter, Portability, Translator -.sp -CR Categories: 4.12, 4.13, 4.22 -.sp 12 -Author's present addresses: - A.S. Tanenbaum, H. van Staveren, E.G. Keizer: Mathematics - Dept., Vrije Universiteit, Postbus 7161, 1007 MC Amsterdam, - The Netherlands - - J.W. Stevenson: NV Philips, S&I, T&M, Building TQ V5, Eindhoven, - The Netherlands -.AE -.NH 1 -Introduction -.PP -As more and more organizations acquire many micro- and minicomputers, -the need for portable compilers is becoming more and more acute. -The present situation, in which each hardware vendor provides its own -compilers -- each with its own deficiencies and extensions, and none of them -compatible -- leaves much to be desired. -The ideal situation would be an integrated system containing a family -of (cross) compilers, each compiler accepting a standard source language and -producing code for a wide variety of target machines. -Furthermore, the compilers should be compatible, so programs written in -one language can call procedures written in another language. -Finally, the system should be designed so as to make adding new languages -and new machines easy. -Such an integrated system is being built at the Vrije Universiteit. -Its design and implementation is the subject of this article. -.PP -Our compiler building system, which is called the "Amsterdam Compiler Kit" -(ACK), can be thought of as a "tool kit." -It consists of a number of parts that can be combined to form compilers -(and interpreters) with various properties. -The tool kit is based on an idea (UNCOL) that was first suggested in 1960 -[7], but which never really caught on then. -The problem which UNCOL attempts to solve is how to make a compiler for -each of -.I N -languages on -.I M -different machines without having to write -.I N -x -.I M -programs. -.PP -As shown in Fig. 1, the UNCOL approach is to write -.I N -"front ends," each -of which translates one source language to a common intermediate language, -UNCOL (UNiversal Computer Oriented Language), and -.I M -"back ends," each -of which translates programs in UNCOL to a specific machine language. -Under these conditions, only -.I N -+ -.I M -programs must be written to provide all -.I N -languages on all -.I M -machines, instead of -.I N -x -.I M -programs. -.PP -Various researchers have attempted to design a suitable UNCOL -[2,8], but none of these have become popular. -It is our belief that previous attempts have failed because they have been -too ambitious, that is, they have tried to cover all languages -and all machines using a single UNCOL. -Our approach is more modest: we cater only to algebraic languages -and machines whose memory consists of 8-bit bytes, each with its own address. -Typical languages that could be handled include -Ada, ALGOL 60, ALGOL 68, BASIC, C, FORTRAN, -Modula, Pascal, PL/I, PL/M, PLAIN, and RATFOR, -whereas COBOL, LISP, and SNOBOL would be less efficient. -Examples of machines that could be included are the Intel 8080 and 8086, -Motorola 6800, 6809, and 68000, Zilog Z80 and Z8000, DEC PDP-11 and VAX, -and IBM 370 but not the Burroughs 6700, CDC Cyber, or Univac 1108 (because -they are not byte-oriented). -With these restrictions, we believe the old UNCOL idea can be used as the -basis of a practical compiler-building system. -.KF -.sp 15P -.ce 1 -Fig. 1. The UNCOL model. -.sp -.KE -.NH 1 -An Overview of the Amsterdam Compiler Kit -.PP -The tool kit consists of eight components: -.sp - 1. The preprocessor. - 2. The front ends. - 3. The peephole optimizer. - 4. The global optimizer. - 5. The back end. - 6. The target machine optimizer. - 7. The universal assembler/linker. - 8. The utility package. -.sp -.PP -A fully optimizing compiler, -depicted in Fig. 2, has seven cascaded phases. -Conceptually, each component reads an input file and writes a -transformed output file to be used as input to the next component. -In practice, some components may use temporary files to allow multiple -passes over the input or internal intermediate files. -.KF -.sp 12P -.ce 1 -Fig. 2. Structure of the Amsterdam Compiler Kit. -.sp -.KE -.PP -In the following paragraphs we will briefly describe each component. -After this overview, we will look at all of them again in more detail. -A program to be compiled is first fed into the (language independent) -preprocessor, which provides a simple macro facility, -and similar textual facilties. -The preprocessor's output is a legal program in one of the programming -languages supported, whereas the input is a program possibly augmented -with macros, etc. -.PP -This output goes into the appropriate front end, whose job it is to -produce intermediate code. -This intermediate code (our UNCOL) is the machine language for a simple -stack machine called EM (Encoding Machine). -A typical front end might build a parse tree from the input, and then -use the parse tree to generate EM code, which is similar to reverse Polish. -In order to perform this work, the front end has to maintain tables of -declared variables, labels, etc., determine where to place the -data structures in memory, and so on. -.PP -The EM code generated by the front end is fed into the peephole optimizer, -which scans it with a window of a few instructions, replacing certain -inefficient code sequences by better ones. -Such a search is important because EM contains instructions to handle -numerous important special cases efficiently -(e.g., incrementing a variable by 1). -It is our strategy to relieve the front ends of the burden of hunting for -special cases because there are many front ends and only one peephole -optimizer. -By handling the special cases in the peephole optimizer, -the front ends become simpler, easier to write and easier to maintain. -.PP -Following the peephole optimizer is a global optimizer [5], which -unlike the peephole optimizer, examines the program as a whole. -It builds a data flow graph to make possible a variety of -global optimizations, -among them, moving invariant code out of loops, avoiding redundant -computations, live/dead analysis and eliminating tail recursion. -Note that the output of the global optimizer is still EM code. -.PP -Next comes the back end, which differs from the front ends in a -fundamental way. -Each front end is a separate program, whereas the back end is a single -program that is driven by a machine dependent driving table. -The driving table for a specific machine tells how the EM code is mapped -onto the machine's assembly language. -Although a simple driving table might just macro expand each EM instruction -into a sequence of target machine instructions, a much more sophisticated -translation strategy is normally used, as described later. -For speed, the back end does not actually read in the driving table at run time. -Instead, the tables are compiled along with the back end in advance, resulting -in one binary program per machine. -.PP -The output of the back end is a program in the assembly language of some -particular machine. -The next component in the pipeline reads this program and performs peephole -optimization on it. -The optimizations performed here involve idiosyncracies -of the target machine that cannot be performed in the machine-independent -EM-to-EM peephole optimizer. -Typically these optimizations take advantage of special instructions or special -addressing modes. -.PP -The optimized target machine assembly code then goes into the final -component in the pipeline, the universal assembler/linker. -This program assembles the input to object format, extracting routines from -libraries and including them as needed. -.PP -The final component of the tool kit is the utility package, which contains -various test programs, interpreters for EM code, -EM libraries, conversion programs, and other aids for the implementer and -user. -.NH 1 -The Preprocessor -.PP -The function of the preprocessor is to extend all the programming languages -by adding certain generally useful facilities to them in a uniform way. -One of these is a simple macro system, in which the user can give names to -character strings. -The names can be used in the program, with the knowledge that they will be -macro expanded prior to being input to the front end. -Macros can be used for named constants, expanding short "procedures" -in line, etc. -.PP -Another useful facility provided by the preprocessor is the ability to -include compile-time libraries. -On large projects, it is common to have all the declarations and definitions -gathered together in a few files that are textually included in the programs -by instructing the preprocessor to read them in, thus fooling the front end -into thinking that they were part of the source program. -.PP -A third feature of the preprocessor is conditional compilation. -The input program can be split up into labeled sections. -By setting flags, some of the sections can be deleted by the preprocessor, -thus allowing a family of slightly different programs to be conveniently stored -on a single file. -.NH 1 -The Front Ends -.PP -A front end is a program that converts input in some source language to a -program in EM. -At present, front ends -exist or are in preparation for Pascal, C, and Plain, and are being considered -for Ada, ALGOL 68, FORTRAN 77, and Modula 2. -Each of the present front ends is independent of all the other ones, -although a general-purpose, table-driven front end is conceivable, provided -one can devise a way to express the semantics of the source language in the -driving tables. -The Pascal front end uses a top-down parsing algorithm (recursive descent), -whereas the C and Plain front ends are bottom-up. -.PP -All front ends, independent of the language being compiled, -produce a common intermediate code called EM, which is -the assembly language for a simple stack machine. -The EM machine is based on a memory architecture -containing a stack for local variables, a (static) data area for variables -declared in the outermost block and global to the whole program, and a heap -for dynamic data structures. -In some ways EM resembles P-code [6], but is more general, since it is -intended for a wider class of languages than just Pascal. -.PP -The EM instruction set has been described elsewhere -[9,10,11] -so we will only briefly summarize it here. -Instructions exist to: -.sp - 1. Load a variable or constant of some length onto the stack. - 2. Store the top item on the stack in memory. - 3. Add, subtract, multiply, divide, etc. the top two stack items. - 4. Examine the top one or two stack items and branch conditionally. - 5. Call procedures and return from them. -.sp -.PP -Loads and stores come in several variations, corresponding to the most common -programming language semantics, for example, constants, simple variables, -fields of a record, elements of an array, and so on. -Distinctions are also made between variables local to the current block -(i.e., stack frame), those in the outermost block (static storage), and those -at intermediate lexicographic levels, which are accessed by following the -static chain at run time. -.PP -All arithmetic instructions have a type (integer, unsigned, real, -pointer, or set) and an -operand length, which may either be explicit or may be popped from the stack -at run time. -Monadic branch instructions pop an item from the stack and branch if it is -less than zero, less than or equal to zero, etc. -Dyadic branch instructions pop two items, compare them, and branch accordingly. -.PP -In addition to these basic EM instructions, there is a collection of special -purpose instructions (e.g., to increment a local variable), which are typically -produced from the simple ones by the peephole optimizer. -Although the complete EM instruction set contains nearly 150 instructions, -only about 60 of them are really primitive; the rest are simply abbreviations -for commonly occurring EM instruction sequences. -.PP -Of particular interest is the way object sizes are parametrized. -The front ends allow the user to indicate how many bytes an integer, real, etc. -should occupy. -Given this information, the front ends can allocate memory, determining -the placement of variables within the stack frame. -Sizes for primitive types are restricted to 8, 16, 32, 64, etc. bits. -The front ends are also parametrized by the target machine's word length -and address size so they can tell, for example, how many "load" instructions -to generate to move a 32-bit integer. -In the examples used henceforth, -we will assume a 16-bit word size and 16-bit integers. -.PP -Since only byte-addressable target machines are permitted, -it is nearly -always possible to implement any requested sizes on any target machine. -For example, the designer of the back end tables for the Z80 should provide -code for 8-, 16-, and 32-bit arithmetic. -In our view, the Pascal, C, or Plain programmer specifies what lengths -are needed, -without reference to the target machine, -and the back end provides it. -This approach greatly enhances portability. -While it is true that doing all arithmetic using 32-bit integers on the Z80 -will not be terribly fast, we feel that if that is what the programmer needs, -it should be possible to implement it. -.PP -Like all assembly languages, EM has not only machine instructions, but also -pseudoinstructions. -These are used to indicate the start and end of each procedure, allocate -and initialize storage for data, and similar functions. -One particularly important pseudoinstruction is the one that is used to -transmit information to the back end for optimization purposes. -It can be used to suggest variables that are good candidates to assign to -registers, delimit the scope of loops, indicate that certain variables -contain a useful value (next operation is a load) or not (next operation is -a store), and various other things. -.NH 1 -The Peephole Optimizer -.PP -The peephole optimizer reads in unoptimized EM programs and writes out -optimized ones. -Both the input and output are expressed in a highly compact code, rather than -in ASCII, to reduce the i/o time, which would otherwise dominate the CPU -time. -The program itself is table driven, and is, by and large, ignorant of the -semantics of EM. -The knowledge of EM is contained in a -language- and machine-independent table consisting of about 400 -pattern-replacement pairs. -We will briefly describe the kinds of optimizations it performs below; -a more complete discussion can be found in [9]. -.PP -Each line in the driving table describes one optimization, consisting of a -pattern part and a replacement part. -The pattern part is a series of one or more EM instructions and a boolean -expression. -The replacement part is a series of EM instructions with operands. -A typical optimization might be: -.sp - LOL LOC ADI STL ($1 = $4) and ($2 = 1) and ($3 = 2) ==> INL $1 -.sp -where the text prior to the ==> symbol is the pattern and the text after it is -the replacement. -LOL loads a local variable onto the stack, LOC loads a constant onto the stack, -ADI is integer addition, and STL is store local. -The pattern specifies that four consecutive EM instructions are present, with -the indicated opcodes, and that furthermore the operand of the first -instruction (denoted by $1) and the fourth instruction (denoted by $4) are the -same, the constant pushed by LOC is 1, and the size of the integers added by -ADI is 2 bytes. -(EM instructions have at most one operand, so it is not necessary to specify -the operand number.) -Under these conditions, the four instructions can be replaced by a single INL -(increment local) instruction whose operand is equal to that of LOL. -.PP -Although the optimizations cover a wide range, the main ones -can be roughly divided into the following categories. -\fIConstant folding\fR -is used to evaluate constant expressions, such as 2*3~+~7 at -compile time instead of run time. -\fIStrength reduction\fR -is used to replace one operation, such as multiply, by -another, such as shift. -\fIReordering of expressions\fR -helps in cases like -K/5, which can be better -evaluated as K/-5, because the former requires -a division and a negation, whereas the latter requires only a division. -\fINull instructions\fR -include resetting the stack pointer after a call with 0 parameters, -offsetting zero bytes to access the -first element of a record, or jumping to the next instruction. -\fISpecial instructions\fR -are those like INL, which deal with common special cases -such as adding one to a variable or comparing something to zero. -\fIGroup moves\fR -are useful because a sequence -of consecutive moves can often be replaced with EM code -that allows the back end to generate a loop instead of in line code. -\fIDead code elimination\fR -is a technique for removing unreachable statements, possibly made unreachable -by previous optimizations. -\fIBranch chain compression\fR -can be applied when a branch instruction jumps to another branch instruction. -The first branch can jump directly to the final destination instead of -indirectly. -.PP -The last two optimizations logically belong in the global optimizer but are -in the local optimizer for historical reasons (meaning that the local -optimizer has been the only optimizer for many years and the optimizations were -easy to do there). -.NH 1 -The Global Optimizer -.PP -In contrast to the peephole optimizer, which examines the EM code a few lines -at a time through a small window, the global optimizer examines the -program's large scale structure. -Three distinct types of optimizations can be found here: -.sp - 1. Interprocedural optimizations. - 2. Intraprocedural optimizations. - 3. Basic block optimizations. -.sp -We will now look at each of these in turn. -.PP -Interprocedural optimizations are those spanning procedure boundaries. -The most important one is deciding to expand procedures in line, -especially short procedures that occur in loops and pass several parameters. -If it takes more time or memory to pass the parameters than to do the work, -the program can be improved by eliminating the procedure. -The inverse optimization -- discovering long common code sequences and -turning them into a procedure -- is also possible, but much more difficult. -Like much of the global optimizer's work, the decision to make or not make -a certain program transformation is a heuristic one, based on knowledge of -how the back end works, how most target machines are organized, etc. -.PP -The heart of the global optimizer is its analysis of individual -procedures. -To perform this analysis, the optimizer must locate the basic blocks, -instruction sequences which can be entered only at the top and exited -only at the bottom. -It then constructs a data flow graph, with the basic blocks as nodes and -jumps between blocks as arcs. -.PP -From the data flow graph, many important properties of the program can be -discovered and exploited. -Chief among these is the presence of loops, indicated by cycles in the graph. -One important optimization is looking for code that can be moved outside the -loop, either prior to it or subsequent to it. -Such code motion saves execution time, although it does not save memory. -Unrolling loops is also possible and desirable in some cases. -.PP -Another area in which global analysis of loops is especially important is -in register allocation. -While it is true that EM does not have any registers to allocate, -the optimizer can easily collect information to allow the -back end to allocate registers wisely. -For example, the global optimizer can collect static frequency-of-use -and live/dead information about variables. -(A variable is dead at some point in the program if its current value is -not needed, i.e., the next reference to it overwrites it rather than -reading it; if the current value will eventually be used, the variable is -live.) -If two variables are never simultaneously live over some interval of code -(e.g., the body of a loop), they can be packed into a single variable, -which, if used often enough, may warrant being assigned to a register. -.PP -Many loops involve arrays: this leads to other optimizations. -If an array is accessed sequentially, with each iteration using the next -higher numbered element, code improvement is often possible. -Typically, a pointer to the bottom element of each array can be set up -prior to the loop. -Within the loop the element is accessed indirectly via the pointer, which is -also incremented by the element size on each iteration. -If the target machine has an autoincrement addressing mode and the pointer -is assigned to a register, an array access can often be done in a single -instruction. -.PP -Other intraprocedural optimizations include removing tail recursion -(last statement is a recursive call to the procedure itself), -topologically sorting the basic blocks to minimize the number of branch -instructions, and common subexpression recognition. -.PP -The third general class of optimizations done by the global optimizer is -improving the structure of a basic block. -For the most part these involve transforming arithmetic or boolean -expressions into forms that are likely to result in better target code. -As a simple example, A~+~B*C can be converted to B*C~+~A. -The latter can often -be handled by loading B into a register, multiplying the register by C, and -then adding in A, whereas the former may involve first putting A into a -temporary, depending on the details of the code generation table. -Another example of this kind of basic block optimization is transforming --B~+~A~<~0 into the equivalent, but simpler, A~<~B. -.NH 1 -The Back End -.PP -The back end reads a stream of EM instructions and generates assembly code -for the target machine. -Although the algorithm itself is machine independent, for each target -machine a machine dependent driving table must be supplied. -The driving table effectively defines the mapping of EM code to target code. -.PP -It will be convenient to think of the EM instructions being read as a -stream of tokens. -For didactic purposes, we will concentrate on two kinds of tokens: -those that load something onto the stack, and those that perform some operation -on the top one or two values on the stack. -The back end maintains at compile time a simulated stack whose behavior -mirrors what the stack of a hardware EM machine would do at run time. -If the current input token is a load instruction, a new entry is pushed onto -the simulated stack. -.PP -Consider, as an example, the EM code produced for the statement K~:=~I~+~7. -If K and I are -2-byte local variables, it will normally be LOL I; LOC 7; ADI~2; STL K. -Initially the simulated stack is empty. -After the first token has been read and processed, the simulated stack will -contain a stack token of type MEM with attributes telling that it is a local, -giving its address, etc. -After the second token has been read and processed, the top two tokens on the -simulated stack will be CON (constant) on top and MEM directly underneath it. -.PP -At this point the back end reads the ADI~2 token and -looks in the driving table to find a line or lines that define the -action to be taken for ADI~2. -For a typical multiregister machine, instructions will exist to add constants -to registers, but not to memory. -Consequently, the driving table will not contain an entry for ADI~2 with stack -configuration CON, MEM. -.PP -The back end is now faced with the problem of how to get from its -current stack configuration, CON, MEM, which is not listed, to one that is -listed. -The table will normally contain rules (which we call "coercions") -for converting between CON, REG, MEM, and similar tokens. -Therefore the back end attempts to "coerce" the stack into a configuration -that -.I is -present in the table. -A typical coercion rule might tell how to convert a MEM into -a REG, namely by performing the actions of allocating a -register and emitting code to move the memory word to that register. -Having transformed the compile-time stack into a configuration allowed for -ADI~2, the rule can be carried out. -A typical rule -for ADI~2 might have stack configuration REG, MEM -and would emit code to add the MEM to the REG, leaving the stack -with a single REG token instead of the REG and MEM tokens present before the -ADI~2. -.PP -In general, there will be more than one possible coercion path. -Assuming reasonable coercion rules for our example, -we might be able to convert -CON MEM into CON REG by loading the variable I into a register. -Alternatively, we could coerce CON to REG by loading the constant into a register. -The first coercion path does the add by first loading I into a register and -then adding 7 to it. -The second path first loads 7 into a register and then adds I to it. -On machines with a fast LOAD IMMEDIATE instruction for small constants -but no fast ADD IMMEDIATE, or vice -versa, one code sequence will be preferable to the other. -.PP -In fact, we actually have more choices than suggested above. -In both coercion paths a register must be allocated. -On many machines, not every register can be used in every operation, so the -choice may be important. -On some machines, for example, the operand of a multiply must be in an odd -register. -To summarize, from any state (i.e., token and stack configuration), a -variety of choices can be made, leading to a variety of different target -code sequences. -.PP -To decide which of the various code sequences to emit, the back end must have -some information about the time and memory cost of each one. -To provide this information, each rule in the driving table, including -coercions, specifies both the time and memory cost of the code emitted when -the rule is applied. -The back end can then simply try each of the legal possibilities (including all -the possible register allocations) to find the cheapest one. -.PP -This situation is similar to that found in a chess or other game-playing -program, in which from any state a finite number of moves can be made. -Just as in a chess program, the back end can look at all the "moves" that can -be made from each state reachable from the original state, and thus find the -sequence that gives the minimum cost to a depth of one. -More generally, the back end can evaluate all paths corresponding to accepting -the next -.I N -input tokens, find the cheapest one, and then make the first move along -that path, precisely the way a chess program would. -.PP -Since the back end is analogous to both a parser and a chess playing program, -some clarifying remarks may be helpful. -First, chess programs and the back end must do some look ahead, whereas the -parser for a well-designed grammar can usually suffice with one input token -because grammars are supposed to be unambiguous. -In contrast, many legal mappings -from a sequence of EM instructions to target code may exist. -Second, like a parser but unlike a chess program, the back end has perfect -information -- it does not have to contend with an unpredictable opponent's -moves. -Third, chess programs normally make a static evaluation of the board and -label the -.I nodes -of the tree with the resulting scores. -The back end, in contrast, associates costs with -.I arcs -(moves) rather than nodes (states). -However, the difference is not essential, since it could -also label each node with the cumulative cost from the root to that node. -.PP -As mentioned above, the cost field in the table contains -.I both -the time and memory costs for the code emitted. -It should be clear that the back end could use either one -or some linear combination of them as the scoring function for evaluating moves. -A user can instruct the compiler to optimize for time or for memory or -for, say, 0.3 x time + 0.7 x memory. -Thus the same compiler can provide a wide range of performance options to -the user. -The writer of the back end table can take advantage of this flexibility by -providing several code sequences with different tradeoffs for each EM -instruction (e.g., in line code vs. call to a run time routine). -.PP -In addition to the time-space tradeoffs, by specifying the depth of search -parameter, -.I N , -the user can effectively also tradeoff compile time vs. object -code quality, for whatever code metric has been chosen. -In summary, by combining the properties of a parser and a game playing program, -it is possible to make a code generator that is table driven, -highly flexible, and has the ability to produce good code from a -stack machine intermediate code. -.NH 1 -The Target Machine Optimizer -.PP -In the model of Fig 2., the peephole optimizer comes before the global -optimizer. -It may happen that the code produced by the global optimizer can also -be improved by another round of peephole optimization. -Conceivably, the system could have been designed to iterate peephole and -global optimizations until no more of either could be performed. -.PP -However, both of these optimizations are done on the machine independent -EM code. -Neither is able to take advantage of the peculiarities and idiosyncracies with -which most target machines are well endowed. -It is the function of the final -optimizer to do any (peephole) optimizations that still remain. -.PP -The algorithm used here is the same as in the EM peephole optimizer. -In fact, if it were not for the differences between EM syntax, which is -very restricted, and target assembly language syntax, -which is less so, precisely the same program could be used for both. -Nevertheless, the same ideas apply concerning patterns and replacements, so -our discussion of this optimizer will be restricted to one example. -.PP -To see what the target optimizer might do, consider the -PDP-11 instruction sequence sub #2,r0; mov (r0),x. -First 2 is subtracted from register 0, then the word pointed to by it -is moved to x. -The PDP-11 happens to have an addressing mode to perform this sequence in -one instruction: mov -(r0),x. -Although it is conceivable that this instruction could be included in the -back end driving table for the PDP-11, it is awkward to do so because it -can occur in so many contexts. -It is much easier to catch things like this in a separate program. -.NH 1 -The Universal Assembler/Linker -.PP -Although assembly languages for different machines may appear very different -at first glance, they have a surprisingly large intersection. -We have been able to construct an assembler/linker that is almost entirely -independent of the assembly language being processed. -To tailor the program to a specific assembly language, it is necessary to -supply a table giving the list of instructions, the bit patterns required for -each one, and the language syntax. -The machine independent part of the assembler/linker is then compiled with the -table to produce an assembler and linker for a particular target machine. -Experience has shown that writing the necessary table for a new machine can be -done in less than a week. -.PP -To enforce a modicum of uniformity, we have chosen to use a common set of -pseudoinstructions for all target machines. -They are used to initialize memory, allocate uninitialized memory, determine the -current segment, and similar functions found in most assemblers. -.PP -The assembler is also a linker. -After assembling a program, it checks to see if there are any -unsatisfied external references. -If so, it begins reading the libraries to find the necessary routines, including -them in the object file as it finds them. -This approach requires libraries to be maintained in assembly language form, -but eliminates the need for inventing a language to express relocatable -object programs in a machine independent way. -It also simplifies the assembler, since producing absolute object code is -easier than producing relocatable object code. -Finally, although assembly language libraries may be somewhat larger than -relocatable object module libraries, the loss in speed due to having more -input may be more than compensated for by not having to pass an intermediate -file between the assembler and linker. -.NH 1 -The Utility Package -.PP -The utility package is a collection of programs designed to aid the -implementers of new front ends or new back ends. -The most useful ones are the test programs. -For example, one test set, EMTEST, systematically checks out a back end by -executing an ever larger subset of the EM instructions. -It starts out by testing LOC, LOL and a few of the other essential instructions. -If these appear to work, it then tries out new instructions one at a time, -adding them to the set of instructions "known" to work as they pass the tests. -.PP -Each instruction is tested with a variety of operands chosen from values -where problems can be expected. -For example, on target machines which have 16-bit index registers but only -allow 8-bit displacements, a fundamentally different algorithm may be needed -for accessing -the first few bytes of local variables and those with offsets of thousands. -The test programs have been carefully designed to thoroughly test all relevant -cases. -.PP -In addition to EMTEST, test programs in Pascal, C, and other languages are also -available. -A typical test is: -.sp - i := 9; \fBif\fP i + 250 <> 259 \fBthen\fP error(16); -.sp -Like EMTEST, the other test programs systematically exercise all features of the -language being tested, and do so in a way that makes it possible to pinpoint -errors precisely. -While it has been said that testing can only demonstrate the presence of errors -and not their absence, our experience is that -the test programs have been invaluable in debugging new parts of the system -quickly. -.PP -Other utilities include programs to convert -the highly compact EM code produced by front ends to ASCII and vice versa, -programs to build various internal tables from human writable input formats, -a variety of libraries written in or compiled to EM to make them portable, -an EM assembler, and EM interpreters for various machines. -.PP -Interpreting the EM code instead of translating it to target machine language -is useful for several reasons. -First, the interpreters provide extensive run time diagnostics including -an option to list the original source program (in Pascal, C, etc.) with the -execution frequency or execution time for each source line printed in the -left margin. -Second, since an EM program is typically about one-third the size of a -compiled program, large programs can be executed on small machines. -Third, running the EM code directly makes it easier to pinpoint errors in -the EM output of front ends still being debugged. -.NH 1 -Summary and Conclusions -.PP -The Amsterdam Compiler Kit is a tool kit for building -portable (cross) compilers and interpreters. -The main pieces of the kit are the front ends, which convert source programs -to EM code, optimizers, which improve the EM code, and back ends, which convert -the EM code to target assembly language. -The kit is highly modular, so writing one front end -(and its associated runtime routines) -is sufficient to implement -a new language on a dozen or more machines, and writing one back end table -and one universal assembler/linker table is all that is needed to bring up all -the previously implemented languages on a new machine. -In this manner, the contents, and hopefully the usefulness, of the toolkit -will increase in time. -.PP -We believe the principal lesson to be learned from our work is that the old -UNCOL idea is basically a sound way to produce compilers, provided suitable -restrictions are placed on the source languages and target machines. -We also believe that although compilers produced by this technology may not -be equal to the very best handcrafted compilers, -in terms of object code quality, they are certainly -competitive with many existing compilers. -However, when one factors in the cost of producing the compiler, -the possible slight loss in performance may be more than compensated for by the -large decrease in production cost. -As a consequence of our work and similar work by other researchers [1,3,4], -we expect integrated compiler building kits to become increasingly popular -in the near future. -.PP -The toolkit is now available for various computers running the -.UX -operating system. -For information, contact the authors. -.NH 1 -References -.LP -.nr r 0 1 -.in +4 -.ti -4 -\fB~\n+r.\fR Graham, S.L. -Table-Driven Code Generation. -.I "Computer~13" , -8 (August 1980), 25-34. -.PP -A discussion of systematic ways to do code generation, -in particular, the idea of having a table with templates that match parts of -the parse tree and convert them into machine instructions. -.sp 2 -.ti -4 -\fB~\n+r.\fR Haddon, B.K., and Waite, W.M. -Experience with the Universal Intermediate Language Janus. -.I "Software Practice & Experience~8" , -5 (Sept.-Oct. 1978), 601-616. -.PP -An intermediate language for use with ALGOL 68, Pascal, etc. is described. -The paper discusses some problems encountered and how they were dealt with. -.sp 2 -.ti -4 -\fB~\n+r.\fR Johnson, S.C. -A Portable Compiler: Theory and Practice. -.I "Ann. ACM Symp. Prin. Prog. Lang." , -Jan. 1978. -.PP -A cogent discussion of the portable C compiler. -Particularly interesting are the author's thoughts on the value of -computer science theory. -.sp 2 -.ti -4 -\fB~\n+r.\fR Leverett, B.W., Cattell, R.G.G, Hobbs, S.O., Newcomer, J.M., -Reiner, A.H., Schatz, B.R., and Wulf, W.A. -An Overview of the Production-Quality Compiler-Compiler Project. -.I Computer~13 , -8 (August 1980), 38-49. -.PP -PQCC is a system for building compilers similar in concept but differing in -details from the Amsterdam Compiler Kit. -The paper describes the intermediate representation used and the code generation -strategy. -.sp 2 -.ti -4 -\fB~\n+r.\fR Lowry, E.S., and Medlock, C.W. -Object Code Optimization. -.I "Commun.~ACM~12", -(Jan. 1969), 13-22. -.PP -A classic paper on global object code optimization. -It covers data flow analysis, common subexpressions, code motion, register -allocation and other techniques. -.sp 2 -.ti -4 -\fB~\n+r.\fR Nori, K.V., Ammann, U., Jensen, K., Nageli, H. -The Pascal P Compiler Implementation Notes. -Eidgen. Tech. Hochschule, Zurich, 1975. -.PP -A description of the original P-code machine, used to transport the Pascal-P -compiler to new computers. -.sp 2 -.ti -4 -\fB~\n+r.\fR Steel, T.B., Jr. UNCOL: the Myth and the Fact. in -.I "Ann. Rev. Auto. Prog." -Goodman, R. (ed.), vol 2., (1960), 325-344. -.PP -An introduction to the UNCOL idea by its originator. -.sp 2 -.ti -4 -\fB~\n+r.\fR Steel, T.B., Jr. -A First Version of UNCOL. -.I "Proc. Western Joint Comp. Conf." , -(1961), 371-377. -.PP -The first detailed proposal for an UNCOL. By current standards it is a -primitive language, but it is interesting for its historical perspective. -.sp 2 -.ti -4 -\fB~\n+r.\fR Tanenbaum, A.S., van Staveren, H., and Stevenson, J.W. -Using Peephole Optimization on Intermediate Code. -.I "ACM Trans. Prog. Lang. and Sys. 3" , -1 (Jan. 1982) pp. 21-36. -.PP -A detailed description of a table-driven peephole optimizer. -The driving table provides a list of patterns to match as well as the -replacement text to use for each successful match. -.sp 2 -.ti -4 -\fB\n+r.\fR Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren, H. -Description of an Experimental Machine Architecture for use with Block -Structured Languages. -Informatica Rapport 81, Vrije Universiteit, Amsterdam, 1983. -.PP -The defining document for EM. -.sp 2 -.ti -4 -\fB\n+r.\fR Tanenbaum, A.S. -Implications of Structured Programming for Machine Architecture. -.I "Comm. ACM~21" , -3 (March 1978), 237-246. -.PP -The background and motivation for the design of EM. -This early version emphasized the idea of interpreting the intermediate -code (then called EM-1) rather than compiling it. diff --git a/doc/top/.distr b/doc/top/.distr deleted file mode 100644 index 32161cf4b..000000000 --- a/doc/top/.distr +++ /dev/null @@ -1,3 +0,0 @@ -proto.make -refs.top -top.n diff --git a/doc/top/Makefile b/doc/top/Makefile deleted file mode 100644 index 3884528d3..000000000 --- a/doc/top/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# $Header$ - -REFER=refer -TBL=tbl -TARGET=-Tlp - -../top.doc: top.n refs.top - $(REFER) -sA+T -l4,2 -p refs.top top.n | $(TBL) > $@ diff --git a/doc/top/proto.make b/doc/top/proto.make deleted file mode 100644 index 6107057a5..000000000 --- a/doc/top/proto.make +++ /dev/null @@ -1,11 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -REFER=refer -TBL=tbl - -SRC_DIR = $(SRC_HOME)/doc/top - -$(TARGET_HOME)/doc/top.doc: $(SRC_DIR)/top.n $(SRC_DIR)/refs.top - $(REFER) -sA+T -l4,2 -p $(SRC_DIR)/refs.top $(SRC_DIR)/top.n | $(TBL) > $@ diff --git a/doc/top/refs.top b/doc/top/refs.top deleted file mode 100644 index c4438987a..000000000 --- a/doc/top/refs.top +++ /dev/null @@ -1,84 +0,0 @@ -%T A Practical Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A J.M. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%I Vrije Universiteit, Amsterdam -%R Rapport nr IR-74 -%D October 1981 - -%T A Practical Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A J.M. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J CACM -%V 26 -%N 9 -%P 654-660 -%D September 1983 - -%T A Unix Toolkit for Making Portable Compilers -%A A.S. Tanenbaum -%A J.M. van Staveren -%A E.G. Keizer -%A J.W. Stevenson -%J Proceedings USENIX conf. -%C Toronto, Canada -%V 26 -%D July 1983 -%P 255-261 - -%T Using Peephole Optimization on Intermediate Code -%A A.S. Tanenbaum -%A J.M. van Staveren -%A J.W. Stevenson -%J TOPLAS -%V 4 -%N 1 -%P 21-36 -%D January 1982 - -%T Amsterdam Compiler Kit documentation -%A A.S. Tanenbaum -%A E.G. Keizer -%A J.M. van Staveren -%A J.W. Stevenson -%I Vrije Universiteit, Amsterdam -%R Rapport nr IR-90 -%D June 1984 - -%T Language- and Machine-independant Global Optimization on -Intermediate Code -%A H.E. Bal -%A A.S. Tanenbaum -%I Vrije Universiteit, Amsterdam -%R Rapport IR-98 -%D March 1985 - -%T The Design and Implementation of the EM Global Optimizer -%A H.E. Bal -%I Vrije Universiteit, Amsterdam -%R Rapport IR-99 -%D March 1985 - - -%T The C Programming Language -%A B.W. Kernighan -%A D.M. Ritchie -%I Prentice-Hall, Inc -%C Englewood Cliffs,NJ -%D 1978 - -%T Principles of compiler design -%A A.V. Aho -%A J.D. Ullman -%I Addison-Wesley -%C Reading, Massachusetts -%D 1978 - -%T Some Topics in Parser Generation -%A C.J.H. Jacobs -%R Rapport IR-105 -%D October 1985 -%I Vrije Universiteit, Amsterdam diff --git a/doc/top/top.n b/doc/top/top.n deleted file mode 100644 index 8e0715c47..000000000 --- a/doc/top/top.n +++ /dev/null @@ -1,869 +0,0 @@ -.ND -.tr ~ -.ds <. -.ds <, -.ds >. . -.ds >, , -.ds [. [ -.ds .] ] -.TL -The ACK Target Optimizer -.AU -H.E. Bal -.AI -Vrije Universiteit -Wiskundig Seminarium, Amsterdam -.AB -The Target Optimizer is one of several optimizers that are part of -the Amsterdam Compiler Kit. -It operates directly on assembly code, -rather than on a higher level intermediate code, -as the Peephole Optimizer and Global Optimizer do. -Consequently, the Target Optimizer can do optimizations -that are highly machine-dependent. -.PP -Each target machine has its own Target Optimizer. -New optimizers are generated by the Target Optimizer Generator, -which uses a machine-dependent table as input. -This document contains full information on how to -write such a table for a new machine. -It also discusses the implementation of the -Target Optimizer and its generator. -.AE -.NH 1 -Introduction -.PP -.FS -This work was supported by the -Stichting Technische Wetenschappen (STW) -under grant VWI03.0001. -.FE -This document describes the target optimizer component -of the Amsterdam Compiler Kit (ACK) . -.[ -tanenbaum staveren amsterdam toolkit -.] -.[ -tanenbaum staveren cacm -.] -.[ -tanenbaum staveren toronto -.] -Optimization takes place in several parts of ACK compilers, -most notably in the Peephole Optimizer -.[ -staveren peephole toplas -.] -and -the Global Optimizer, -.[ -bal tanenbaum global optimization -.] -.[ -bal implementation global optimizer -.] -which are both language- and machine-independent, -and in the machine-specific code generators. -.[ -documentation amsterdam compiler kit -.] -The target optimizer is the finishing touch in this sequence of -optimizers. -It can be used to capture those optimizations that are hard -to express in the other parts of ACK. -These optimizations will typically be very machine-specific. -.PP -The target optimizer operates on the assembly code of some target machine. -Hence there is one target optimizer per machine. -However, just as for the ACK code generators and assemblers, -a framework has been build that allows easy generation of -target optimizers out of machine-independent parts and a -machine-dependent description table (see figure 1.). -So the major part of the code of a target optimizer is -shared among all target optimizers. -.DS -.ft CW - - - |-------------------------| - | machine-independent | - | code | - | | - |-----------------| |-------------------------| -descrip- |target optimizer | | machine-dependent code | - tion --> |generator | ----> | + tables | -table | | | | - |-----------------| |-------------------------| - - target optimizer -.ft R - - Figure 1: Generation of a target optimizer. - -.DE -.PP -This document focusses on the description of the machine-dependent table. -In chapter 2 we give an informal introduction to the optimization -algorithm and to the definition of the table format. -Chapters 3 and 4 discuss the implementation of the target optimizer -and the target optimizer generator. -Appendix A gives full information for writing a description table. -.NH 1 -Global structure of the target optimizer -.PP -The target optimizer is based on the well understood model -of a \fIpeephole optimizer\fR. -.[ -aho ullman compiler -.] -It contains a machine-dependent table -of (pattern,replacement) pairs. -Each pattern describes -a sequence of one or more assembler instructions -that can be replaced by zero or more equivalent, yet cheaper, -instructions (the 'replacement'). -The optimizer maintains a \fIwindow\fR that moves over the input. -At any moment, the window contains some contiguous part of the input. -If the instructions in the current window match some pattern -in the table, -they are replaced by the corresponding replacement; -else, the window moves one instruction to the right. -.PP -In the remainder of this section we will give an informal -description of the machine-dependent table. -A more precise definition is given in appendix A. -We will first discuss the restrictions put on the -format of the assembly code. -.NH 2 -Assumptions about the assembly code format -.PP -We assume that a line of assembly code begins with an -instruction \fImnemonic\fR (opcode), -followed by zero or more \fIoperands\fR. -The mnemonic and the first operand must be separated by a special -character (e.g. a space or a tab). -Likewise, the operands must be separated by a special -character (e.g. a comma). -These separators need not be the same for all machines. -.NH 2 -Informal description of the machine-dependent tables -.PP -The major part of the table consists of (pattern,replacement) pairs -called \fIentries\fR. -.PP -A pattern is a list of instruction descriptions. -Each instruction description describes the instruction mnemonic and -the operands. -.PP -A mnemonic is described either by a string constant or by the -keyword ANY. -As all entities dealt with by the target optimizer are strings, -string constants do not contain quotes. -A string constant matches only itself. -ANY matches every instruction mnemonic. -.nf - -Examples of mnemonic descriptions: -.ft CW - - add - sub.l - mulw3 - ANY -.ft R -.fi -.PP -An operand can also be described by a string constant. -.nf - -Examples: -.ft CW - - (sp)+ - r5 - -4(r6) - -.ft R -.fi -Alternatively, it can be described by means of a \fIvariable name\fR. -Variables have values which are strings. -They have to be declared in the table before the patterns. -Each such declaration defines the name of a variable and -a \fIrestriction\fR to which its value is subjected. -.nf -Example of variable declarations: -.ft CW - - CONST { VAL[0] == '$' }; - REG { VAL[0] == 'r' && VAL[1] >= '0' && VAL[1] <= '3' && - VAL[2] == '\\0' }; - X { TRUE }; - -.ft R -.fi -The keyword VAL denotes the value of the variable, which is -a null-terminated string. -An operand description given via a variable name matches an -actual operand if the actual operand obeys the associated restriction. -.nf -.ft CW - - CONST matches $1, $-5, $foo etc. - REG matches r0, r1, r2 and r3 - X matches anything -.ft R - -.fi -The restriction (between curly braces) may be any legal "C" -.[ -kernighan ritchie c programming -.] -expression. -It may also contain calls to user-defined procedures. -These procedures must be added to the table after the patterns. -.nf - -Example: -.ft CW - - FERMAT_NUMBER { VAL[0] == '$' && is_fermat_number(&VAL[1]) }; - -.ft R -.fi -An operand can also be described by a mixture of a string constant -and a variable name. -The most general form allowed is: -.nf - - string_constant1 variable_name string_constant2 - -Example: -.ft CW - - (REG)+ matches (r0)+, (r1)+, (r2)+ and (r3)+ - -.ft R -.fi -Any of the three components may be omitted, -so the first two forms are just special cases of the general form. -The name of a variable can not be used as a string constant. -In the above context, it is impossible to define an operand that -matches the string "REG". -This limitation is of little consequence, -as the table writer is free to choose the names of variables. -This approach, however, avoids the need for awkward escape sequences. -.PP -A pattern consists of one or more instruction descriptions -(separated by a colon) -followed by an optional constraint. -A pattern "P1 : P2 : .. : Pn C" matches the sequence of -instructions "I1 I2 .. In" if: -.IP (i) 7 -for each i, 1 <= i <= n, Pi matches Ii, as described above; -.IP (ii) -multiple occurrences of the same variable name or of -the keyword ANY stand for the same values throughout the pattern; -.IP (iii) -the optional constraint C is satisfied, i.e. it evaluates to TRUE. -.LP -.nf -The pattern: -.ft CW - - dec REG : move.b CONST,(REG) - -.ft R -matches: -.ft CW - - dec r0 : move.b $4,(r0) - -.ft R -but not: -.ft CW - - dec r0 : move.b $4,(r1) - -.ft R -(as the variable REG matches two different strings). -.fi -If a pattern containing different registers must be described, -extra names for a register should be declared, all sharing -the same restriction. -.nf -Example: -.ft CW - - REG1,REG2 { VAL[0] == 'r' && ..... }; - - addl3 REG1,REG1,REG2 : subl2 REG2,REG1 -.ft R -.fi -.PP -The optional constraint is an auxiliary "C" expression (just like -the parameter restrictions). -The expression may refer to the variables and to ANY. -.nf -Example: -.ft CW - - move REG1,REG2 { REG1[1] == REG2[1] + 1 } - -.ft R -matches -.ft CW - - move r1,r0 - move r2,r1 - move r3,r2 -.ft R -.fi -.PP -The replacement part of a (pattern,replacement) table entry -has the same structure as a pattern, except that: -.IP (i) -it may not contain an additional constraint; -.IP (ii) -it may be empty. -.LP -A replacement may also refer to the values of variables and ANY. -.NH 2 -Examples -.PP -This section contains some realistic examples for -optimization on PDP-11 and Vax assembly code. -.NH 3 -Vax examples -.PP -Suppose the table contains the following declarations: -.nf - -.ft CW - X, LOG { TRUE }; - LAB { VAL[0] == 'L' }; /* e.g. L0017 */ - A { no_side_effects(VAL) }; - NUM { is_number(VAL) }; -.ft R - -.fi -The procedure "no_side_effects" checks if its argument -contains any side effects, i.e. auto increment or auto decrement. -The procedure "is_number" checks if its argument contains only digits. -These procedures must be supplied by the table-writer and must be -included in the table. -.PP -.nf -.ft CW -\fIentry:\fP addl3 X,A,A -> addl2 X,A; -.ft R - -.fi -This entry changes a 3-operand instruction into a cheaper 2-operand -instruction. -An optimization like: -.nf -.ft CW - - addl3 r0,(r2)+,(r2)+ -> addl2 r0,(r2)+ - -.ft R -.fi -is illegal, as r2 should be incremented twice. -Hence the second argument is required to -be side-effect free. -.PP -.nf -.ft CW -\fIentry:\fP addw2 $-NUM,X -> subw2 $NUM,X; -.ft R - -.fi -An instruction like "subw2 $5,r0" is cheaper -than "addw2 $-5,r0", -because constants in the range 0 to 63 are represented -very efficiently on the Vax. -.PP -.nf -.ft CW -\fIentry:\fP bitw $NUM,A : jneq LAB - { is_poweroftwo(NUM,LOG) } -> jbs $LOG,A,LAB; - -.ft R -.fi -A "bitw x,y" sets the condition codes to the bitwise "and" of -x and y. -A "jbs n,x,l" branches to l if bit n of x is set. -So, for example, the following transformation is possible: -.nf -.ft CW - - bitw $32,r0 : jneq L0017 -> jbs $5,r0,L0017 - -.ft R -.fi -The user-defined procedure "is_poweroftwo" checks if its first argument is -a power of 2 and, if so, sets its second argument to the logarithm -of the first argument. (Both arguments are strings). -Note that the variable LOG is not used in the pattern itself. -It is assigned a (string) value by "is_poweroftwo" and is used -in the replacement. -.NH 3 -PDP-11 examples -.PP -Suppose we have the following declarations: -.nf - -.ft CW - X { TRUE }; - A { no_side_effects(VAL) }; - L1, L2 { VAL[0] == 'I' }; - REG { VAL[0] == 'r' && VAL[1] >= '0' && VAL[1] <= '5' && - VAL[2] == '\\0' }; - -.ft P -.fi -The implementation of "no_side_effects" may of course -differ for the PDP-11 and the Vax. -.PP -.nf -.ft CW -\fIentry:\fP mov REG,A : ANY A,X -> mov REG,A : ANY REG,X ; -.ft R - -.fi -This entry implements register subsumption. -If A and REG hold the same value (which is true after "mov REG,A") -and A is used as source (first) operand, it is cheaper to use REG instead. -.PP -.nf -.ft CW -\fIentry:\fP jeq L1 : jbr L2 : labdef L1 -> jne L2 : labdef L1; -.ft R - -.fi -The "jeq L1" is a "skip over an unconditional jump". "labdef L1" -denotes the definition (i.e. defining occurrence) of label L1. -As the target optimizer has to know how such a definition -looks like, this must be expressed in the table (see Appendix A). -.PP -.nf -.ft CW -\fIentry:\fP add $01,X { carry_dead(REST) } -> inc X; -.ft R - -.fi -On the PDP-11, an add-one is not equivalent to an increment. -The latter does not set the carry-bit of the condition codes, -while the former does. -So a look-ahead is needed to see if the rest of the input uses -the carry-bit before changing the condition codes. -A look-ahead of one instruction is provided by -the target optimizer. -This will normally be sufficient for compiler-generated code. -The keyword REST contains the mnemonic of the first instruction of -the rest of the input. -If this instruction uses the carry-bit (e.g. an adc, subc, bhis) -the transformation is not allowed. -.NH 1 -Implementation of the target optimizer -.PP -The target optimizer reads one input file of assembler instructions, -processes it, and writes the optimized code -to the output file. -So it performs one pass over the input. -.NH 2 -The window mechanism -.PP -The optimizer uses a \fIwindow\fR that moves over the input. -It repeatedly tries to match the instructions in the window -with the patterns in the table. -If no match is possible, the window moves -one instruction forwards (to the right). -After a successful match the matched instructions are -removed from the window and are replaced by the -replacement part of the table entry. -Furthermore, the window is moved a few instructions -backwards, -as it is possible that instructions that were rejected earlier now do match. -For example, consider the following patterns: -.DS -.ft CW -cmp $0, X -> tst X ; -mov REG,X : tst X -> move REG.X ; /* redundant test */ -.ft R -.DE -If the input is: -.DS -.ft CW -mov r0,foo : cmp $0,foo -.ft R -.DE -then the first instruction is initially rejected. -However, after the transformation -.DS -.ft CW -cmp $0,foo -> tst foo -.ft R -.DE -the following optimization is possible: -.DS -.ft CW -mov r0,foo : tst foo -> mov r0,foo -.ft R -.DE -.PP -The window is implemented as a \fIqueue\fR. -Matching takes place at the head of the queue. -New instructions are added at the tail. -If the window is moved forwards, the instruction at the head -is not yet written to the output, -as it may be needed later on. -Instead it is added to a second queue, -the \fIbackup queue\fR. -After a successful match, the entire backup queue is -inserted at the front of the window queue, -which effectively implements the shift backwards. -.PP -Both queues have the length of the longest pattern in the table. -If, as a result of a forward window move, -the backup queue gets full, -the instruction at its head is outputted and removed. -Instructions are read from the input whenever the -window queue contains fewer elements than the length -of the longest pattern. -.NH 2 -Pattern matching -.PP -Pattern matching is done in three steps: -.IP (i) 7 -find patterns in the table whose instruction mnemonics -match the mnemonics of the instructions in the -current window; -.IP (ii) -check if the operands of the pattern match the operands of the -instructions in the current window; -.IP (iii) -check if the optional constraint is satisfied. -.LP -For step (i) hashing is used. -The mnemonic of the first instruction of the window -is used to determine a list of possible patterns. -Patterns starting with ANY are always tried. -.PP -Matching of operand descriptions against actual operands -takes place as follows. -The general form of an operand description is: -.DS -string_constant1 variable_name string_constant2 -.DE -The actual operand should begin with string_constant1 and end -on string_constant2. -If so, these strings are stripped from it and the remaining string is -matched against the variable. -Matching a string against a variable is -defined as follows: -.IP 1. -initially (before the entire pattern match) -all variables are uninstantiated; -.IP 2. -matching a string against an uninstantiated variable -succeeds if the restriction associated with the variable is -satisfied. -As a side effect, it causes the variable to be instantiated to -the string; -.IP 3. -matching a string against an instantiated variable succeeds -only if the variable was instantiated to the same string. -.LP -Matching an actual mnemonic against the keyword ANY is defined likewise. -.PP -The matching scheme implements the requirement that multiple occurrences -of the same variable name or of the keyword ANY should -stand for the same values throughout the entire pattern -(see section 2.). -.PP -Both the parameter restriction of 2. and the constraint of step (iii) -are checked by executing the "C" expression. -.NH 2 -Data structures -.PP -The most important data structure is the representation -of the input instructions. -For every instruction we use two representations: -.IP (i) -the textual representation, -i.e. the exact code as it appeared in the input; -.IP (ii) -a structural representation, -containing the opcode and the operands. -.LP -The opcode of an instruction is determined as soon as it is read. -If the line contains a label definition, the opcode is set -to "labdef", so a label definition is treated like a normal -instruction. -.PP -The operands of an instruction are not determined until -they are needed, i.e. until step (i) of the pattern matching -process has succeeded. -For every instruction we keep track of a \fIstate\fR. -After the opcode has successfully been determined, -the state is OPC_ONLY. -Once the operands have been recognized, the state is set to DONE. -If the opcode or operands can not be determined, -or if the instruction cannot be optimized for any other -reason (see Appendix A), the state is set to JUNK -and any attempt to match it will fail. -.PP -For each table entry we record the following information: -.IP (i) 7 -the length of the pattern (i.e. the number of instruction descriptions) -.IP (ii) -a description of the instructions of the pattern -.IP (iii) -the length of the replacement -.IP (iv) -a description of the instructions of the replacement. -.LP -The description of an instruction consists of: -.IP (i) -the opcode -.IP (ii) -for each operand, a description of the operand. -.LP -The description of an operand of the form: -.DS -string_constant1 variable_name string_constant2 -.DE -contains: -.IP (i) -both string constants -.IP (ii) -the number of the variable. -.LP -Each declared variable is assigned a unique number. -For every variable we maintain: -.IP (i) -its state (instantiated or not instantiated) -.IP (ii) -its current value (a string). -.LP -The restrictions on variables and the constraints are stored -in a switch-statement, -indexed by variable number and entry number respectively. -.NH 1 -Implementation of the target optimizer generator -.PP -The target optimizer generator (\fItopgen\fR) -reads a target machine description table and produces -two files: -.IP gen.h: 9 -contains macro definitions for -machine parameters that were changed -in the parameter section of the table (see appendix A) -and for some attributes derived from the table -(longest pattern, number of patterns, number -of variables). -.IP gen.c: -contains the entry description tables, -code for checking the parameter restrictions and constraints -(switch statements) -and the user-defined procedures. -.LP -These two files are compiled together with some machine-independent -files to produce a target optimizer. -.PP -Topgen is implemented using -the LL(1) parser generator system LLgen , -.[ -jacobs topics parser generation -.] -a powerful tool of the Amsterdam Compiler Kit. -This system provides a flexible way of describing the syntax of the tables. -The syntactical description of the table format included -in Appendix A was derived from the LLgen syntax rules. -.PP -The parser uses a simple, hand-written, lexical analyzer (scanner). -The scanner returns a single character in most cases. -The recognition of identifiers is left to the parser, as -this eases the analysis of operand descriptions. -Comments are removed from the input by the scanner, -but white space is passed to the parser, -as it is meaningful in some contexts (it separates the -opcode description from the description of the first operand). -.PP -Topgen maintains two symbol tables, one for variable names and one -for tunable parameters. -The symbol tables are organized as binary trees. -.bp -.NH 1 -References -.[ -$LIST$ -.] -.bp -.SH -Appendix A -.PP -In this appendix we present a complete definition of the target -optimizer description table format. -This appendix is intended for table-writers. -We use syntax rules for the description of the table format. -The following notation is used: -.TS -center; -l l. -{ a } zero or more of a -[ a ] zero or one of a -a b a followed by b -a | b a or b -.TE -Terminals are given in quotes, as in ';'. -.PP -The table may contain white space and comment at all reasonable places. -Comments are as in "C", so they begin with /* and end on */. -Identifiers are sequences of letters, digits and the underscore ('_'), -beginning with a letter. -.PP -.DS -.ft CW -table -> {parameter_line} '%%;' {variable_declaration} '%%;' - {entry} '%%;' user_routines. -.ft R -.DE -A table consists of four sections, containing machine-dependent -constants, variable declarations, pattern rules and -user-supplied subroutines. -.PP -.DS -.ft CW -parameter_line -> identifier value ';' . -.ft R -.DE -A parameter line defines some attributes of the target machines -assembly code. -For unspecified parameters default values apply. -The names of the parameters and the corresponding defaults -are shown in table 1. -.TS -center; -l l. -OPC_TERMINATOR ' ' -OP_SEPARATOR ',' -LABEL_STARTER 'I' -LABEL_TERMINATOR ':' -MAXOP 2 -MAXOPLEN 25 -MAX_OPC_LEN 10 -MAXVARLEN 25 -MAXLINELEN 100 -PAREN_OPEN not defined -PAREN_CLOSE not defined -.TE -.ce 1 -table 1: parameter names and defaults -.DE -The OPC_TERMINATOR is the character that separates the instruction -mnemonic from the first operand (if any). -The OP_SEPARATOR separates adjacent operands. -A LABEL_STARTER is the first character of an instruction label. -(Instruction labels are assumed to start with the same character). -The LABEL_TERMINATOR is the last character of a label definition. -It is assumed that this character is not used in an applied -occurrence of the label identifier. -For example, the defining occurrence may be "I0017:" -and the applied occurrence may be "I0017" -as in "jmp I0017". -MAXOP defines the maximum number of operands an instruction can have. -MAXOPLEN is the maximum length (in characters) of an operand. -MAX_OPC_LEN is the maximum length of an instruction opcode. -MAXVARLEN is the maximum length of a declared string variable. -As variables may be set by user routines (see "bitw" example for -the Vax) the table-writer must have access to this length and -must be able to change it. -MAXLINELEN denotes the maximum length of a line of assembly code. -PAREN_OPEN and PAREN_CLOSE must be used when the operand separator can also -occur within operands, between parentheses of some kind. In this case, -PAREN_OPEN must be set to a string containing the opening parentheses, and -PAREN_CLOSE must be set to a string containing the closing parentheses. -.PP -If a line of assembly code violates any of the assumptions or -exceeds some limit, -the line is not optimized. -Optimization does, however, proceed with the rest of the input. -.PP -.DS -.ft CW -variable_declaration -> identifier {',' identifier} restriction ';' . - -restriction -> '{' anything '}' . -.ft R -.DE -A variable declaration declares one or more string variables -that may be used in the patterns and in the replacements. -If a variable is used as part of an operand description in -a pattern, the entire pattern can only match if the -restriction evaluates to TRUE. -If the pattern does match, the variable is assigned the matching -part of the actual operand. -Variables that are not used in a pattern are initialized to -null-strings and may be assigned a value in the constraint-part of -the pattern. -.PP -The restriction must be a legal "C" expression. -It may not contain a closing bracket ('}'). -Inside the expression, the name VAL stands for the part of the actual -(matching) operand. -The expression may contain calls to procedures that are defined in the -user-routines section. -.DS -.ft CW -entry -> pattern '->' replacement ';' . - -pattern -> instruction_descr - { ':' instruction_descr } - constraint . - -replacement -> [ instruction_descr { ':' instruction_descr } ] . - -instruction_descr -> opcode - white - [ operand_descr { ',' operand_descr } ] . - -constraint -> '{' anything '}' . - -operand_descr -> [ string_constant ] - [ variable_name ] - [ string_constant ] . - -variable_name -> identifier . - -opcode -> anything . -.ft R -.DE -The symbol 'white' stands for white space (space or tab). -An opcode can be any string not containing the special -symbols ';', '{', '}', ':', ',', '->' or white space. -To be recognized, it must begin with a letter. -The opcode should either be a mnemonic of a target machine -instruction or it should be one of the keywords ANY and labdef. -ANY matches any actual opcode. labdef matches only label definitions. -.PP -If an operand description contains an identifier (as defined earlier), -it is checked if the identifier is the name of a declared variable. -This effects the semantics of the matching rules for the operand, -as described in section 2. -An operand may contain at most one such variable name. -.PP -The constraint must be a legal "C" expression, just as the operand restriction. -It may call user-defined procedures and use or change the value of -declared variables. -It may also use the string variable REST, -which contains the mnemonic of the first instruction of the -rest of the input. (REST is a null-string if this mnemonic can -not be determined). -.DS -.ft CW -user_routines -> anything . -.ft R -.DE -The remainder of the table consists of user-defined subroutines. diff --git a/doc/v7bugs.doc b/doc/v7bugs.doc deleted file mode 100644 index bfc113f73..000000000 --- a/doc/v7bugs.doc +++ /dev/null @@ -1,303 +0,0 @@ -.\" $Id$ -.wh 0 hd -.wh 60 fo -.de hd -'sp 5 -.. -.de fo -'bp -.. -.nr e 0 1 -.de ER -.br -.ne 20 -.sp 2 -.in 5n -.ti -5n -ERROR \\n+e: -.. -.de PS -.sp -.nf -.in +5n -.. -.de PE -.sp -.fi -.in -5n -.. -.sp 3 -.ce -UNIX version 7 bugs -.sp 3 -This document describes the UNIX version 7 errors fixed at the -Vrije Universiteit, Amsterdam. -Several of these are discovered at the VU. -Others are quoted from a list of bugs distributed by BellLabs. -.sp -For each error the differences between the original and modified -source files are given, -as well as a test program. -.ER -C optimizer bug for unsigned comparison -.sp -The following C program caused an IOT trap, while it should not -(compile with 'cc -O prog.c'): -.PS -unsigned i = 0; - -main() { - register j; - - j = -1; - if (i > 40000) - abort(); -} -.PE -BellLabs suggests to make the following patch in c21.c: -.PS -/* modified /usr/src/cmd/c/c21.c */ - -189 if (r==0) { -190 /* next 2 lines replaced as indicated by -191 * Bell Labs bug distribution ( v7optbug ) -192 p->back->back->forw = p->forw; -193 p->forw->back = p->back->back; -194 End of lines changed */ -195 if (p->forw->op==CBR -196 || p->forw->op==SXT -197 || p->forw->op==CFCC) { -198 p->back->forw = p->forw; -199 p->forw->back = p->back; -200 } else { -201 p->back->back->forw = p->forw; -202 p->forw->back = p->back->back; -203 } -204 /* End of new lines */ -205 decref(p->ref); -206 p = p->back->back; -207 nchange++; -208 } else if (r>0) { -.PE -Use the previous program to test before and after the modification. -.ER -The loader fails for large data or text portions -.sp -The loader 'ld' produces a "local symbol botch" error -for the following C program. -.PS -int big1[10000] = { - 1 -}; -int big2[10000] = { - 2 -}; - -main() { - printf("loader is fine\\n"); -} -.PE -We have made the following fix: -.PS -/* original /usr/src/cmd/ld.c */ - -113 struct { -114 int fmagic; -115 int tsize; -116 int dsize; -117 int bsize; -118 int ssize; -119 int entry; -120 int pad; -121 int relflg; -122 } filhdr; - -/* modified /usr/src/cmd/ld.c */ - -113 /* -114 * The original Version 7 loader had problems loading large -115 * text or data portions. -116 * Why not include ??? -117 * then they would be declared unsigned -118 */ -119 struct { -120 int fmagic; -121 unsigned tsize; /* not int !!! */ -122 unsigned dsize; /* not int !!! */ -123 unsigned bsize; /* not int !!! */ -124 unsigned ssize; /* not int !!! */ -125 unsigned entry; /* not int !!! */ -126 unsigned pad; /* not int !!! */ -127 unsigned relflg; /* not int !!! */ -128 } filhdr; -.PE -.ER -Floating point registers -.sp -When a program is swapped to disk if it needs more memory, -then the floating point registers were not saved, so that -it may have different registers when it is restarted. -A small assembly program demonstrates this for the status register. -If the error is not fixed, then the program generates an IOT error. -A "memory fault" is generated if all is fine. -.PS -start: ldfps $7400 -1: stfps r0 - mov r0,-(sp) - cmp r0,$7400 - beq 1b - 4 -.PE -Some digging into the kernel is required to fix it. -The following patch will do: -.PS -/* original /usr/sys/sys/slp.c */ - -563 a2 = malloc(coremap, newsize); -564 if(a2 == NULL) { -565 xswap(p, 1, n); -566 p->p_flag |= SSWAP; -567 qswtch(); -568 /* no return */ -569 } - -/* modified /usr/sys/sys/slp.c */ - -590 a2 = malloc(coremap, newsize); -591 if(a2 == NULL) { -592 #ifdef FPBUG -593 /* -594 * copy floating point register and status, -595 * but only if you must switch processes -596 */ -597 if(u.u_fpsaved == 0) { -598 savfp(&u.u_fps); -599 u.u_fpsaved = 1; -600 } -601 #endif -602 xswap(p, 1, n); -603 p->p_flag |= SSWAP; -604 qswtch(); -605 /* no return */ -606 } -.PE -.ER -Floating point registers. -.sp -A similar problem arises when a process forks. -The child will have random floating point registers as is -demonstrated by the following assembly language program. -The child process will die by an IOT trap and the father prints -the message "child failed". -.PS -exit = 1. -fork = 2. -write = 4. -wait = 7. - -start: ldfps $7400 - sys fork - br child - sys wait - tst r1 - bne bad - stfps r2 - cmp r2,$7400 - beq start - 4 -child: stfps r2 - cmp r2,$7400 - beq ex - 4 -bad: clr r0 - sys write;mess;13. -ex: clr r0 - sys exit - - .data -mess: -.PE -The same file slp.c should be patched as follows: -.PS -/* original /usr/sys/sys/slp.c */ - -499 /* -500 * When the resume is executed for the new process, -501 * here's where it will resume. -502 */ -503 if (save(u.u_ssav)) { -504 sureg(); -505 return(1); -506 } -507 a2 = malloc(coremap, n); -508 /* -509 * If there is not enough core for the -510 * new process, swap out the current process to generate the -511 * copy. -512 */ - -/* modified /usr/sys/sys/slp.c */ - -519 /* -520 * When the resume is executed for the new process, -521 * here's where it will resume. -522 */ -523 if (save(u.u_ssav)) { -524 sureg(); -525 return(1); -526 } -527 #ifdef FPBUG -528 /* copy the floating point registers and status to child */ -529 if(u.u_fpsaved == 0) { -530 savfp(&u.u_fps); -531 u.u_fpsaved = 1; -532 } -533 #endif -534 a2 = malloc(coremap, n); -535 /* -536 * If there is not enough core for the -537 * new process, swap out the current process to generate the -538 * copy. -539 */ -.PE -.ER -/usr/src/libc/v6/stat.c -.sp -Some system calls are changed from version 6 to version 7. -A library of system call entries, that make a version 6 UNIX look like -a version 7 system, is provided to run some -useful version 7 utilities, like 'tar', on UNIX-6. -The entry for 'stat' contained two bugs: -the 24-bit file size was incorrectly converted to 32 bits -(sign extension of bit 15) -and the uid/gid fields suffered from sign extension. -.sp -Transferring files from version 6 to version 7 using 'tar' -will fail for all files for which -.sp - ( (size & 0100000) != 0 ) -.sp -These two errors are fixed if stat.c is modified as follows: -.PS -/* original /usr/src/libc/v6/stat.c */ - -11 char os_size0; -12 short os_size1; -13 short os_addr[8]; - -49 buf->st_nlink = osbuf.os_nlinks; -50 buf->st_uid = osbuf.os_uid; -51 buf->st_gid = osbuf.os_gid; -52 buf->st_rdev = 0; - -/* modified /usr/src/libc/v6/stat.c */ - -11 char os_size0; -12 unsigned os_size1; -13 short os_addr[8]; - -49 buf->st_nlink = osbuf.os_nlinks; -50 buf->st_uid = osbuf.os_uid & 0377; -51 buf->st_gid = osbuf.os_gid & 0377; -52 buf->st_rdev = 0; -.PE diff --git a/doc/val.doc b/doc/val.doc deleted file mode 100644 index 851a6c371..000000000 --- a/doc/val.doc +++ /dev/null @@ -1,753 +0,0 @@ -.\" $Id$ -.ll 72n -.wh 0 hd -.wh 60 fo -.de hd -'sp 5 -.. -.de fo -'bp -.. -.tr ~ -. PARAGRAPH -.de PP -.sp -.. -. CHAPTER -.de CH -.br -.ne 15 -.sp 3 -.in 0 -\\fB\\$1\\fR -.in 5 -.PP -.. -. SUBCHAPTER -.de SH -.br -.ne 10 -.sp -.in 5 -\\fB\\$1\\fR -.in 10 -.PP -.. -. INDENT START -.de IS -.sp -.in +5 -.. -. INDENT END -.de IE -.in -5 -.sp -.. -. DOUBLE INDENT START -.de DS -.sp -.in +5 -.ll -5 -.. -. DOUBLE INDENT END -.de DE -.ll +5 -.in -5 -.sp -.. -. EQUATION START -.de EQ -.sp -.nf -.. -. EQUATION END -.de EN -.fi -.sp -.. -. TEST -.de TT -.ti -5 -Test~\\$1:~ -.br -.. -. IMPLEMENTATION 1 -.de I1 -.br -Implementation~1: -.. -. IMPLEMENTATION 2 -.de I2 -.br -Implementation~2: -.. -.de CS -.br -~-~\\ -.. -.br -.fi -.sp 5 -.ce -\fBPascal Validation Suite Report\fR -.CH "Pascal processor identification" -The ACK-Pascal compiler produces code for an EM machine -as defined in [1]. -It is up to the implementor of the EM machine whether errors like -integer overflow, undefined operand and range bound error are recognized or not. -Therefore it depends on the EM machine implementation whether these errors -are recognized in Pascal programs or not. -The validation suite results of all known implementations are given. -.PP -There does not (yet) exist a hardware EM machine. -Therefore, EM programs must be interpreted, or translated into -instructions for a target machine. -The following implementations currently exist: -.IS -.I1 -an interpreter running on a PDP-11 (using UNIX). -The normal mode of operation for this interpreter is to check -for undefined integers, overflow, range errors etc. -.sp -.I2 -a translator into PDP-11 instructions (using UNIX). -Less checks are performed than in the interpreter, because the translator -is intended to speed up the execution of well-debugged programs. -.IE -.CH "Test Conditions" -Tester: E.G. Keizer -.br -Date: October 1983 -.br -Validation Suite version: 3.0 -.PP -The final test run is made with a slightly -modified validation suite. -.SH "Erroneous programs" -Some test did not conform to the standard proposal of February 1979. -It is this version of the standard proposal that is used -by the authors of the validation suite. -.IS -.TT 6.6.3.7-4 -The semicolon between high and integer on line 17 is replaced -by a colon. -.sp -.TT 6.7.2.2-13 -The div operator on line 14 replaced by mod. -.CH "Conformance tests" -Number of tests passed = 150 -.br -Number of tests failed = 6 -.SH "Details of failed tests" -.IS -.TT 6.1.2-1 -Character sequences starting with the 8 characters 'procedur' -or 'function' are -erroneously classified as the word-symbols 'procedure' and 'function'. -.sp -.TT 6.1.3-2 -Identifiers identical in the first eight characters, but -differing in ninth or higher numbered characters are treated as -identical. -.sp -.TT 6.5.1-1 -ACK-Pascal requires all formal program parameters to be -declared with type \fIfile\fP. -.sp -.TT 6.6.6.5-1 -Gives run-time error eof seen at call to eoln. -A have a hunch that this is a error in the suit. -.sp -.TT 6.6.4.1-1 -Redefining the names of some standard procedures leads to incorrect -behaviour of the runtime system. -In this case it crashes without a sensible error message. -.sp -.TT 6.9.3.5.1-1 -This test can not be translated by our compiler because two -non-identical variables are used in the same block with the same first eight -characters. -The test passed after replacement of one of those names. -.IE -.CH "Deviance tests" -Number of deviations correctly detected = 120 -.br -Number of tests not detecting deviations = 20 -.SH "Details of deviations" -The following tests are compiled without a proper error -indication although they do -not conform to the standard. -.IS -.TT 6.1.6-5 -ACK-Pascal allows labels in the range 0..32767. -A warning is produced when testing for deviations from the -standard. -.sp -.TT 6.1.8-5 -A missing space between a number and a word symbol is not -detected. -.sp -.TT 6.2.2-8 -.TT 6.3-6 -.TT 6.4.1-3 -.TT 6.6.1-3 -.TT 6.6.1-4 -Undetected scope error. The scope of an identifier should start at the -beginning of the block in which it is declared. -In the ACK-Pascal compiler the scope starts just after the declaration, -however. -.sp -.TT 6.4.3.3-7 -The values of fields from one variant are accessible from -another variant. -The correlation is exact. -.sp -.TT 6.6.3.3-4 -The passing as a variable parameter of the selector of a -variant part is not detected. -A runtime error is produced because the variant selector is not -initialized. -.sp -.TT 6.8.2.4-2 -.TT 6.8.2.4-3 -.TT 6.8.2.4-4 -.TT 6.8.2.4-5 -.TT 6.8.2.4-6 -The ACK-Pascal compiler does not restrict the places from where -a jump to a label by means of a goto-statement is allowed. -.sp -.TT 6.8.3.9-5 -.TT 6.8.3.9-6 -.TT 6.8.3.9-7 -.TT 6.8.3.9-16 -There are no errors produced for assignments to a variable -in use as control-variable of a for-statement. -.TT 6.8.3.9-8 -.TT 6.8.3.9-9 -Use of a controlled variable after leaving the loop without -intervening initialization is not detected. -.IE -.CH "Error handling" -The results depend on the EM implementation. -.sp -Number of errors correctly detected = -.in +5 -.I1 -32 -.I2 -17 -.in -5 -Number of errors not detected = -.in +5 -.I1 -21 -.I2 -36 -.in -5 -Number of errors incorrectly detected = -.in +5 -.I1 -2 -.I2 -2 -.in -5 -.SH "Details of errors not detected" -The following test fails because the ACK-Pascal compiler only -generates a warning that does not prevent to run the tests. -.IS -.TT 6.6.2-8 -A warning is produced if there is no assignment to a function-identifier. -.IE -With this test the ACK-Pascal compiler issues an error message for a legal -construct not directly related to the error to be detected. -.IS -.TT 6.5.5-2 -Program does not compile. -Buffer variable of text file is not allowed as variable -parameter. -.IE -The following errors are not detected at all. -.IS -.TT 6.2.1-11 -.I2 -The use of an undefined integer is not caught as an error. -.sp -.TT 6.4.3.3-10 -.TT 6.4.3.3-11 -.TT 6.4.3.3-12 -.TT 6.4.3.3-13 -The notion of 'current variant' is not implemented, not even if a tagfield -is present. -.sp -.TT 6.4.5-15 -.TT 6.4.6-9 -.TT 6.4.6-10 -.TT 6.4.6-11 -.TT 6.5.3.2-2 -.I2 -Subrange bounds are not checked. -.sp -.TT 6.4.6-12 -.TT 6.4.6-13 -.TT 6.7.2.4-4 -If the base-type of a set is a subrange, then the set elements are not checked -against the bounds of the subrange. -Only the host-type of this subrange-type is relevant for ACK-Pascal. -.sp -.TT 6.5.4-1 -.I2 -Nil pointers are not detected. -.sp -.TT 6.5.4-2 -.I2 -Undefined pointers are not detected. -.sp -.TT 6.5.5-3 -Changing the file position while the window is in use as actual variable -parameter or as an element of the record variable list of a with-statement -is not detected. -.sp -.TT 6.6.2-9 -An undefined function result is not detected, -because it is never used in an expression. -.sp -.TT 6.6.5.3-6 -.TT 6.6.5.3-7 -Disposing a variable while it is in use as actual variable parameter or -as an element of the record variable list of a with-statement is not detected. -.sp -.TT 6.6.5.3-8 -.TT 6.6.5.3-9 -.TT 6.6.5.3-10 -It is not detected that a record variable, created with the variant form -of new, is used as an operand in an expression or as the variable in an -assignment or as an actual value parameter. -.sp -.TT 6.6.5.3-11 -Use of a variable that is not reinitialized after a dispose is -not detected. -.sp -.TT 6.6.6.4-4 -.TT 6.6.6.4-5 -.TT 6.6.6.4-7 -.I2 -There are no range checks for pred, succ and chr. -.sp -.TT 6.6.6.5-6 -ACK-Pascal considers a rewrite of a file as a defining -occurence. -.sp -.TT 6.7.2.2-8 -.TT 6.7.2.2-9 -.TT 6.7.2.2-10 -.TT 6.7.2.2-12 -.I2 -Division by 0 or integer overflow is not detected. -.sp -.TT 6.8.3.9-18 -The use of the some control variable in two nested for -statements in not detected. -.sp -.TT 6.8.3.9-19 -Access of a control variable after leaving the loop results in -the final-value, although an error should be produced. -.sp -.TT 6.9.3.2-3 -The program stops with a file not open error. -The rewrite before the write is missing in the program. -.sp -.TT 6.9.3.2-4 -.TT 6.9.3.2-5 -Illegal FracDigits values are not detected. -.CH "Implementation dependence" -Number of tests run = 14 -.br -Number of tests incorrectly handled = 0 -.SH "Details of implementation dependence" -.IS -.TT 6.1.9-5 -Alternate comment delimiters are implemented -.sp -.TT 6.1.9-6 -The equivalent symbols @ for ^, (. for [ and .) for ] are not -implemented. -.sp -.TT 6.4.2.2-10 -Maxint = 32767 -.sp -.TT 6.4.3.4-5 -Only elements with non-negative ordinal value are allowed in sets. -.sp -.TT 6.6.6.1-1 -Standard procedures and functions are not allowed as parameters. -.sp -.TT 6.6.6.2-11 -Details of the machine characteristics regarding real numbers: -.IS -.nf -beta = 2 -t = 56 -rnd = 1 -ngrd = 0 -machep = -56 -negep = -56 -iexp = 8 -minexp = -128 -maxexp = 127 -eps = 1.387779e-17 -epsneg = 1.387779e-17 -xmin = 2.938736e-39 -xmax = 1.701412e+38 -.fi -.IE -.sp -.TT 6.7.2.3-3 -.TT 6.7.2.3-4 -All operands of boolean expressions are evaluated. -.sp -.TT 6.8.2.2-1 -.TT 6.8.2.2-2 -The expression in an assignment statement is evaluated -before the variable selection if this involves pointer -dereferencing or array indexing. -.sp -.TT 6.8.2.3-2 -Actual parameters are evaluated in reverse order. -.sp -.TT 6.9.3.2-6 -The default width for integer, Boolean and real are 6, 5 and 13. -.sp -.TT 6.9.3.5.1-2 -The number of digits written in an exponent is 2. -.sp -.TT 6.9.3.6-1 -The representations of true and false are (~true) and (false). -The parenthesis serve to indicate width. -.IE -.CH "Quality measurement" -Number of tests run = 60 -.br -Number of tests handled incorrectly = 1 -.SH "Results of tests" -Several test perform operations on reals on indicate the error -introduced by these operations. -For each of these tests the following two quality measures are extracted: -.sp -.in +5 -maxRE:~~maximum relative error -.br -rmsRE:~~root-mean-square relative error -.in -5 -.sp 2 -.IS -.TT 1.2-1 -.I1 -25 thousand Whetstone instructions per second. -.I2 -169 thousand Whetstone instructions per second. -.sp -.TT 1.2-2 -The value of (TRUEACC-ACC)*2^56/100000 is 1.4 . -This is well within the bounds specified in [3]. -.br -The GAMM measure is: -.I1 -238 microseconds -.I2 -26.3 microseconds. -.sp -.TT 1.2-3 -The number of procedure calls calculated in this test exceeds -the maximum integer value. -The program stops indicating overflow. -.sp -.TT 6.1.3-3 -The number of significant characters for identifiers is 8. -.sp -.TT 6.1.5-8 -There is no maximum to the line length. -.sp -.TT 6.1.5-9 -The error message "too many digits" is given for numbers larger -than maxint. -.sp -.TT 6.1.5-10 -.TT 6.1.5-11 -.TT 6.1.5-12 -Normal values are allowed for real constants and variables. -.sp -.TT 6.1.7-14 -A reasonably large number of strings is allowed. -.sp -.TT 6.1.8-6 -No warning is given for possibly unclosed comments. -.sp -.TT 6.2.1-12 -.TT 6.2.1-13 -.TT 6.2.1-14 -.TT 6.2.1-15 -.TT 6.5.1-2 -Large lists of declarations are possible in each block. -.sp -.TT 6.4.3.2-6 -An 'array[integer] of' is not allowed. -.sp -.TT 6.4.3.2-7 -.TT 6.4.3.2-8 -Large values are allowed for arrays and indices. -.sp -.TT 6.4.3.3-14 -Large amounts of case-constant values are allowed in variants. -.sp -.TT 6.4.3.3-15 -Large amounts of record sections can appear in the fixed part of -a record. -.sp -.TT 6.4.3.3-16 -Large amounts of variants are allowed in a record. -.TT 6.4.3.4-4 -Size and speed of Warshall's algorithm depend on the -implementation of EM: -.IS -.I1 -.br -size: 122 bytes -.br -speed: 5.2 seconds -.sp -.I2 -.br -size: 196 bytes -.br -speed: 0.7 seconds -.IE -.TT 6.5.3.2-3 -Deep nesting of array indices is allowed. -.sp -.TT 6.5.3.2-4 -.TT 6.5.3.2-5 -Arrays can have at least 8 dimensions. -.sp -.TT 6.6.1-8 -Deep static nesting of procedure is allowed. -.sp -.TT 6.6.3.1-6 -Large amounts of formal parameters are allowed. -.sp -.TT 6.6.5.3-12 -Dispose is fully implemented. -.sp -.TT 6.6.6.2-6 -Test sqrt(x): no errors. -The error is within acceptable bounds. -.in +5 -maxRE:~~2~**~-55.50 -.br -rmsRE:~~2~**~-57.53 -.in -5 -.sp -.TT 6.6.6.2-7 -Test arctan(x): may cause underflow or overflow errors. -The error is within acceptable bounds. -.in +5 -.br -maxRE:~~2~**~-55.00 -.br -rmsRE:~~2~**~-56.36 -.in -5 -.sp -.TT 6.6.6.2-8 -Test exp(x): may cause underflow or overflow errors. -The error is not within acceptable bounds. -.in +5 -maxRE:~~2~**~-50.03 -.br -rmsRE:~~2~**~-51.03 -.in -5 -.sp -.TT 6.6.6.2-9 -Test sin(x): may cause underflow errors. -The error is not within acceptable bounds. -.in +5 -maxRE:~~2~**~-38.20 -.br -rmsRE:~~2~**~-43.68 -.in -5 -.sp -Test cos(x): may cause underflow errors. -The error is not within acceptable bounds. -.in +5 -maxRE:~~2~**~-41.33 -.br -rmsRE:~~2~**~-46.62 -.in -5 -.sp -.TT 6.6.6.2-10 -Test ln(x): -The error is not within acceptable bounds. -.in +5 -maxRE:~~2~**~-54.05 -.br -rmsRE:~~2~**~-55.77 -.in -5 -.sp -.TT 6.7.1-3 -.TT 6.7.1-4 -.TT 6.7.1-5 -Complex nested expressions are allowed. -.sp -.TT 6.7.2.2-14 -Test real division: -The error is within acceptable bounds. -.in +5 -maxRE:~~0 -.br -rmsRE:~~0 -.in -5 -.sp -.TT 6.7.2.2-15 -Operations of reals in the integer range are exact. -.sp -.TT 6.7.3-1 -.TT 6.8.3.2-1 -.TT 6.8.3.4-2 -.TT 6.8.3.5-15 -.TT 6.8.3.7-4 -.TT 6.8.3.8-3 -.TT 6.8.3.9-20 -.TT 6.8.3.10-7 -Static deep nesting of function calls, -compound statements, if statements, case statements, repeat -loops, while loops, for loops and with statements is possible. -.sp -.TT 6.8.3.2-2 -Large amounts of statements are allowed in a compound -statement. -.sp -.TT 6.8.3.5-12 -The compiler requires case constants to be compatible with -the case selector. -.sp -.TT 6.8.3.5-13 -.TT 6.8.3.5-14 -Large case statements are possible. -.sp -.TT 6.9-2 -Recursive IO on the same file is well-behaved. -.sp -.TT 6.9.1-6 -The reading of real values from a text file is done with -sufficient accuracy. -.in +5 -maxRE:~~2~**~-54.61 -.br -rmsRE:~~2~**~-56.32 -.in -5 -.sp -.TT 6.9.1-7 -.TT 6.9.2-2 -.TT 6.9.3-3 -.TT 6.9.4-2 -Read, readln, write and writeln may have large amounts of -parameters. -.sp -.TT 6.9.1-8 -The loss of precision for reals written on a text file and read -back is: -.in +5 -maxRE:~~2~**~-53.95 -.br -rmsRE:~~2~**~-55.90 -.in -5 -.sp -.TT 6.9.3-2 -File IO buffers without trailing marker are correctly flushed. -.sp -.TT 6.9.3.5.2-2 -Reals are written with sufficient accuracy. -.in +5 -maxRE:~~0 -.br -rmsRE:~~0 -.in -5 -.IE -.CH "Level 1 conformance tests" -Number of test passed = 4 -.br -Number of tests failed = 1 -.SH "Details of failed tests" -.IS -.TT 6.6.3.7-4 -An expression indicated by parenthesis whose -value is a conformant array is not allowed. -.IE -.CH "Level 1 deviance tests" -Number of deviations correctly detected = 4 -.br -Number of tests not detecting deviations = 0 -.IE -.CH "Level 1 error handling" -The results depend on the EM implementation. -.sp -Number of errors correctly detected = -.in +5 -.I1 -1 -.I2 -0 -.in -5 -Number of errors not detected = -.in +5 -.I1 -0 -.I2 -1 -.in -5 -.SH "Details of errors not detected" -.IS -.TT 6.6.3.7-9 -.I2 -Subrange bounds are not checked. -.IE -.CH "Level 1 quality measurement" -Number of tests run = 1 -.SH "Results of test" -.IS -.TT 6.6.3.7-10 -Large conformant arrays are allowed. -.IE -.CH "Extensions" -Number of tests run = 3 -.SH Details of test failed -.IS -.TT 6.1.9-7 -The alternative relational operators are not allowed. -.sp -.TT 6.1.9-8 -The alternative symbols for colon, semicolon and assignment are -not allowed. -.sp -.TT 6.8.3.5-16 -The otherwise selector in case statements is not allowed. -.IE -.CH "References" -.ti -5 -[1]~~\ -A.S.Tanenbaum, E.G.Keizer, J.W.Stevenson, Hans van Staveren, -"Description of a machine architecture for use with block structured -languages", -Informatica rapport IR-81. -.ti -5 -[2]~~\ -ISO standard proposal ISO/TC97/SC5-N462, dated February 1979. -The same proposal, in slightly modified form, can be found in: -A.M.Addyman e.a., "A draft description of Pascal", -Software, practice and experience, May 1979. -An improved version, received March 1980, -is followed as much as possible for the -current ACK-Pascal. -.ti -5 -[3]~~\ -B. A. Wichman and J du Croz, -A program to calculate the GAMM measure, Computer Journal, -November 1979. diff --git a/doc/z80.doc b/doc/z80.doc deleted file mode 100644 index 1b0f18524..000000000 --- a/doc/z80.doc +++ /dev/null @@ -1,80 +0,0 @@ -. \" $Id$ -.ND April 1985 -.TL -THE Z80 BACK END TABLE -.AU -Frans van Haarlem -.NH 1 -INTRODUCTION -.PP -This table was written to make it run, not to make it clever! -The effect is, that the table written for the intel 8080, -which was made very clever runs faster and requiers less space!! -So, for anyone to run programs on a z80 machine: -n attempt could be made to make this table as clever as the one for the i80, -or the i80 table could be used, for that can run on every z80 too. -.NH -IMPLEMENTATION -.PP -It will not be possible to run the entire Amsterdam Compiler Kit on a -Z80-based computer system. -One has to write a program on another -system, a system where the compiler kit runs on. -This program may be a mixture of high-level languages, such as -C or Pascal, EM and z80 assembly code. -The program should be compiled using the compiler kit, -producing z80 machine code. -This code should come available to the z80 machine -for example by downloading or -by storing it in ROM (Read Only Memory). -Depending on the characteristics of the particular z80 based system, some -adaptions have to be made: -.IP 1) -In \fIhead_em\fP: the base address, which is the address where the first -z80 instruction will be stored, and the initial value of the -stackpointer are set to 0x1000 and 0x7ffe respectivally. -The latter because it could run on a 32K machine as well. -Other systems require other values. -.IP 2) -In \fIhead_em\fP: before calling "__m_a_i_n", the environment -pointer, argument vector and argument count will have to be pushed -onto the stack. -Since this back-end is tested on a system without any knowledge -of these things, dummies are pushed now. -.IP 3) -In \fItail_em\fP: proper routines "putchar" and "getchar" should -be provided. -They should write resp. read a character on/from the monitor. -Maybe some conversions will have to be made. -The ones for the Nascom and Hermac z80 micro's are to be found -in the EM-library. -.IP 4) -In \fIhead_em\fP: an application program returns control to the monitor by -jumping to address 0x20. -Thie may have to be changed on different systems. -For an CPM-machine for example this should be 0x5, to provide a warm boot. -.IP 5) -In \fItail_em\fP: the current version of the z80 back-end has very limited I/O -capabilities, because it was tested on a system that -had no knowlegde of files. -So the implementation of the EM-instruction \fImon\fP is very simple; -it can only do the following things: -.DS - Monitor call 1: - Exit - Monitor call 3: - read, always reads from the monitor. - echos the read character. - ignores file descriptor. - Monitor call 4: - write, always writes on the monitor. - ignores file descriptor. - Monitor call 5: - open file, returns file descriptor -1. - Monitor call 6: - close file, returns error code = 0. - Monitor call 54: - io-control, returns error code = 0. -.DE -If the system should do file-handling the routine ".mon" -should be extended thoroughly. diff --git a/emtest/.distr b/emtest/.distr deleted file mode 100644 index 3d60866ba..000000000 --- a/emtest/.distr +++ /dev/null @@ -1,6 +0,0 @@ -Makefile -READ_ME -ok -select.c -test.h -tests diff --git a/emtest/Makefile b/emtest/Makefile deleted file mode 100644 index a3ea72f51..000000000 --- a/emtest/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -ACK=ack -tested: last - set -x ;\ - for i in `awk '{for(i=\$$1;i<=151;i++)print i}' last ` ;\ - do \ - echo $$i; \ - echo $$i >last; \ - select $$i tests > test.e; \ - $(ACK) test.e; \ - a.out \ - : ok; \ - done - rm -f test.e a.out - >tested - -last: tests test.h select - echo 0 >last - -select: select.c - $(CC) -O -o select select.c diff --git a/emtest/READ_ME b/emtest/READ_ME deleted file mode 100644 index 021ae77fd..000000000 --- a/emtest/READ_ME +++ /dev/null @@ -1,136 +0,0 @@ -This directory contains test programs for EM implementations. -The test programs are all part of the file "tests". -Each individual test program looks like: - - TEST 004: test ... - ... ; data declarations etc. - MAIN nlocal - ... ; part of the body of MAIN - PROC - ... ; subroutines used by this test - -The PROC part is optional, so the smallest test program looks like: - - TEST 000: null test - MAIN 0 - -The keywords used by "select", like TEST, MAIN, PROC, HOL, OK and ERRLAB, -all consist of upper case letters and start in column one. -A convention for test numbers is to use 3 digit numbers, possibly left -padded with zero's. - -A program, called "select", is provided to combine a range of tests -into a single test program. -"Select" expects a range as argument, like 0-127, or -127, or 0-. -Tests that have a TEST number in that range are included. -"Select" also expects the file from which the tests should -be selected as an argument. -If no argument is given, or only a range argument, select expects -the tests to slect from on standard input. - -To prevent name clashes, some rules must be obeyed: - - data label names, procedure names and instruction label numbers - must be unique over all tests. A good habit is to use the - three digit test number as suffix. - - only keyword of "select" may start with uppercase letters in column - one, to allow for expansion in the future. - - because only a single 'hol' pseudo is allowed, "select" must - generate the 'hol' pseudo. An individual test may request - some 'hol' space by a special HOL line, starting in column one - and followed by a single number, the number of bytes needed. - This number must consists of digits only, no constant symbols, - because "select" must compute the maximum, so before the - preprocessor has replaced the constant symbols by their values. - - a similar problem is caused by the number of bytes of local - storage for 'main'. An individual test may specify the number - of bytes it needs as parameter to the MAIN line. - Again, the number must consist of digits only. - -Test programs print a sequence of integers greater than 1. -This sequence is terminated by the number 1 as soon as an error is detected. -If all tests are performed correctedly the number 0 is printed. - -To allow test programs to print integers without the full machinery of -conversion and i/o routines, the EM instruction 'nop' is used. -Each time this instruction is executed, the current line number as -maintained by the 'lin' instruction must be printed, followed by a -newline, at least during debugging. - -The following abbrevation may be used in test programs: - - OK -> lin n - nop - -Numbers are automatically assigned in order of static appearance. -As soon as an error is detected you must branch to label 1, by instructions -like 'bra *1' and 'zne *1'. -Label 1 is automatically provided in the main routine. -If you jump to label 1 in a subroutine, then that subroutine must -end with ERRLAB, like in: - - PROC - pro $test,0 - ... - bra *1 - ... - ret 0 - ERRLAB - end - -An option to "select" is to generate 'fil' instructions whenever a -new test starts. -This is useful if 'nop' prints the 'fil' string as well as the 'lin' number. -This 'f' option is on by default, off if a '-f' flag is given. - -The EM file generated by "select" includes "test.h". -"test.h" may contain definitions of the following symbols: - W2S: the size of double precision integers, if implemented. - FS: the size of single precision floats, if implemented. - F2S: the size of double precision floats, if implemented. -The value of these symbols, if defined, must be the size of the object involved. - -Two other symbols are used: - EM_PSIZE: pointer size - EM_WSIZE: word size -The machine dependent translation program, like 8086 and vax2, give -definitions of these symbols while calling the EM encode program. -Because these size names occur quite often, they may be abbreviated: - WS -> EM_WSIZE - PS -> EM_PSIZE - -Before running the tests in the file "tests", it is wise to test -the necessary basic functions with some simple tests like - - TEST 000: null - MAIN 0 -and - TEST 001: ok - MAIN 0 - OK -and - TEST 998: error - MAIN 0 - bra *1 -and - TEST 999: test lni - MAIN 0 - lin 1 - lni - loe 0 - loc 2 - bne *1 - OK -The first two of these are part of "tests" as well. The last two are -not included in "tests" intensionally, because they would fail. -The last tests fails because it references the ABS block which is -inaccessable after an 'hol' pseudo. -Proceed as follows for each of these basic tests: - - make a file called 'basic' containing the test - - run select: - select basic >basic.e - - compile by - machine basic.e - - and load and run - - where machine should be replaced by the name of program - used to compile EM programs for the current machine. diff --git a/emtest/last b/emtest/last deleted file mode 100644 index 573541ac9..000000000 --- a/emtest/last +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/emtest/ok b/emtest/ok deleted file mode 100755 index 1eca33bb9..000000000 --- a/emtest/ok +++ /dev/null @@ -1,10 +0,0 @@ -trap "" 1 2 - -while read x -do - case $x in - 0) exit 0;; - bad) exit 1;; - esac -done -exit 1 diff --git a/emtest/select.c b/emtest/select.c deleted file mode 100644 index 8d1f9c6a0..000000000 --- a/emtest/select.c +++ /dev/null @@ -1,256 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - * - */ - -#include -#include -#include - -#define LINSIZ 100 - -int sigs[] = { - SIGHUP, - SIGINT, - SIGQUIT, - SIGTERM, - 0 -}; - -char *prog; -char line[LINSIZ]; -int nlocals = 0; -int nhol = 0; -int nerrors = 0; -int oknum = 2; -int fflag = 1; -int low = 0; -int high = 999; - -FILE *file1; -FILE *file2; -FILE *file3; -char name1[] = "/tmp/f1XXXXXX"; -char name2[] = "/tmp/f2XXXXXX"; -char name3[] = "/tmp/f3XXXXXX"; - -char *to3dig(); - -stop() { - unlink(name1); - unlink(name2); - unlink(name3); - exit(nerrors); -} - -main(argc,argv) char **argv; { - register *p; - register char *s; - - prog = *argv++; --argc; - mktemp(name1); - mktemp(name2); - mktemp(name3); - for (p = sigs; *p; p++) - if (signal(*p, stop) == SIG_IGN) - signal(*p, SIG_IGN); - while (argc > 0 && argv[0][0] == '-') { - switch (argv[0][1]) { - case 'f': - fflag ^= 1; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - high = atoi(&argv[0][1]); - break; - default: - usage(); - break; - } - argc--; - argv++; - } - if (argc > 0 && argv[0][0] >= '0' && argv[0][0] <= '9') { - s = argv[0]; - do - low = low*10 + *s++ - '0'; - while (*s >= '0' && *s <= '9'); - if (*s == 0) - high = low; - else if (*s++ == '-') { - high = atoi(s); - if (high == 0) - high = 999; - } else - fatal("bad range %s", argv[0]); - argc--; - argv++; - } - if (argc > 1) - usage(); - if (argc == 1 && freopen(argv[0], "r", stdin) == NULL) - fatal("cannot open %s", argv[0]); - if ((file1 = fopen(name1, "w")) == NULL) - fatal("cannot create %s", name1); - if ((file2 = fopen(name2, "w")) == NULL) - fatal("cannot create %s", name2); - if ((file3 = fopen(name3, "w")) == NULL) - fatal("cannot create %s", name3); - if (getline()) - while (select()) - ; - fclose(file1); - fclose(file2); - fclose(file3); - combine(); - stop(); -} - -select() { - register FILE *f; - int i; - - if (sscanf(line, "TEST %d", &i) != 1) - fatal("bad test identification(%s)", line); - if (i < low || i > high) { - while (getline()) - if (line[0] == 'T') - return(1); - return(0); - } - fprintf(file2, "; %s\n", line); - if (fflag) { - char *s = to3dig(i); - fprintf(file1, ".%s\n", s); - fprintf(file1, " con \"tst%s\"\n", s); - fprintf(file2, " fil .%s\n", s); - } - f = file1; - while (getline()) { - switch (line[0]) { - case 'T': - return(1); - case 'M': - if (sscanf(line, "MAIN%d", &i) != 1 || i%4 != 0) - break; - if (i > nlocals) - nlocals = i; - f = file2; - continue; - case 'P': - if (strcmp(line, "PROC") != 0) - break; - f = file3; - continue; - case 'H': - if (f != file1 || - sscanf(line, "HOL%d", &i) != 1 || - i%4 != 0) - break; - if (i > nhol) - nhol = i; - continue; - case 'O': - if (strcmp(line, "OK") != 0) - break; - fprintf(f, " lin %d\n nop\n", oknum++); - continue; - case 'E': - if (f != file3 || strcmp(line, "ERRLAB") != 0) - break; - fprintf(f, "1\n lin 1\n nop\n loc 1\n loc 1\n mon\n"); - continue; - default: - putline(f); - continue; - } - fatal("bad line (%s)", line); - } - return(0); -} - -combine() { - - printf("#define WS EM_WSIZE\n"); - printf("#define PS EM_PSIZE\n"); - printf("#include \"test.h\"\n"); - printf(" mes 2,WS,PS\n"); - printf(" mes 1\n"); - printf(" mes 4,300\n"); - if (nhol) - printf(" hol %d,0,0\n", nhol); - copy(name1); - printf(" exp $_m_a_i_n\n"); - printf(" pro $_m_a_i_n,%d\n", nlocals); - printf(" loc 123\n"); - printf(" loc -98\n"); - copy(name2); - printf(" loc -98\n"); - printf(" bne *1\n"); - printf(" loc 123\n"); - printf(" bne *1\n"); - printf(" lin 0\n"); - printf(" nop\n"); - printf(" loc 0\n"); - printf(" ret WS\n"); - printf("1\n"); - printf(" lin 1\n"); - printf(" nop\n"); - printf(" loc 1\n"); - printf(" ret WS\n"); - printf(" end\n"); - copy(name3); -} - -copy(s) char *s; { - if (freopen(s, "r", stdin) == NULL) - fatal("cannot reopen %s", s); - while (getline()) - putline(stdout); -} - -getline() { - register len; - - if (fgets(line, LINSIZ, stdin) == NULL) - return(0); - len = strlen(line); - if (line[len-1] != '\n') - fatal("line too long(%s)", line); - line[len-1] = 0; - return(1); -} - -putline(f) FILE *f; { - fprintf(f, "%s\n", line); -} - -fatal(s, a1, a2, a3, a4) char *s; { - fprintf(stderr, "%s: ", prog); - fprintf(stderr, s, a1, a2, a3, a4); - fprintf(stderr, " (fatal)\n"); - nerrors++; - stop(); -} - -usage() { - fprintf(stderr, "usage: %s -f [[low]-[high]] [testcollection]\n", prog); - nerrors++; - stop(); -} - -char * -to3dig(i) - register int i; -{ - static char buf[4]; - register char *s = buf; - - *s++ = (i % 1000) / 100 + '0'; - *s++ = (i % 100) / 10 + '0'; - *s++ = (i % 10) + '0'; - *s = '\0'; - return buf; -} diff --git a/emtest/test.e b/emtest/test.e deleted file mode 100644 index 7f26cf5f5..000000000 --- a/emtest/test.e +++ /dev/null @@ -1,28 +0,0 @@ -#define WS EM_WSIZE -#define PS EM_PSIZE -#include "test.h" - mes 2,WS,PS - mes 1 - mes 4,300 -.000 - con "tst000" - exp $m_a_i_n - pro $m_a_i_n,0 - loc 123 - loc -98 -; TEST 000: empty - fil .000 - loc -98 - bne *1 - loc 123 - bne *1 - lin 0 - nop - loc 0 - ret WS -1 - lin 1 - nop - loc 1 - ret WS - end diff --git a/emtest/test.h b/emtest/test.h deleted file mode 100644 index e1deeccf8..000000000 --- a/emtest/test.h +++ /dev/null @@ -1,8 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/*#define W2S 4 /* double word size */ -/*#define FS 4 /* float size */ -/*#define F2S 8 /* double size */ diff --git a/emtest/tests b/emtest/tests deleted file mode 100644 index 68d558a0e..000000000 --- a/emtest/tests +++ /dev/null @@ -1,4597 +0,0 @@ -TEST 000: empty -MAIN 0 -TEST 001: OK -MAIN 0 -OK -TEST 002: test loc, bne -MAIN 0 - loc 0 - loc 0 - bne *1 -OK - loc 1 - loc 1 - bne *1 -OK - loc -1 - loc -1 - bne *1 -OK - loc 127 - loc 127 - bne *1 -OK - loc -127 - loc -127 - bne *1 -OK - loc -128 - loc -128 - bne *1 -OK -#if WS > 1 - loc 255 - loc 255 - bne *1 -OK - loc 256 - loc 256 - bne *1 -OK - loc 257 - loc 257 - bne *1 -OK - loc 32767 - loc 32767 - bne *1 -OK - loc -32767 - loc -32767 - bne *1 -OK - loc -32768 - loc -32768 - bne *1 -OK -#endif /* WS > 1 */ -#if WS > 2 - loc 65535 - loc 65535 - bne *1 -OK - loc 65536 - loc 65536 - bne *1 -OK - loc 65537 - loc 65537 - bne *1 -OK - loc 2147483647 - loc 2147483647 - bne *1 -OK - loc -2147483647 - loc -2147483647 - bne *1 -OK - loc -2147483648 - loc -2147483648 - bne *1 -OK -#endif /* WS > 2 */ -TEST 003: test signed and unsigned loc -MAIN 0 -#if WS == 1 - loc 128 - loc -128 - bne *1 -OK - loc 129 - loc -127 - bne *1 -OK - loc 255 - loc -1 - bne *1 -OK -#endif -#if WS == 2 - loc 32768 - loc -32768 - bne *1 -OK - loc 32769 - loc -32767 - bne *1 -OK - loc 65535 - loc -1 - bne *1 -OK -#endif -#if WS == 4 - loc 2147483648 - loc -2147483648 - bne *1 -OK - loc 2147483649 - loc -2147483647 - bne *1 -OK - loc 4294967295 - loc -1 - bne *1 -OK -#endif -TEST 004: test lol, stl, del, inl, zrl -MAIN 8 - loc 95 - stl -4 - lol -4 - loc 95 - bne *1 -OK - lol 0 - stl -4 - loc 125 - stl -8 - loc 125 - lol -8 - bne *1 -OK - lol 0 - lol -4 - bne *1 -OK - zrl -4 - lol -4 - loc 0 - bne *1 -OK - inl -4 - lol -4 - loc 1 - bne *1 - loc -1 - stl -4 - inl -4 - lol -4 - loc 0 - bne *1 -OK - del -4 - lol -4 - loc -1 - bne *1 - loc 1 - stl -4 - del -4 - lol -4 - loc 0 - bne *1 -OK -TEST 005: test loe, ste, zre, ine, dee -HOL 8 -MAIN 0 - loc 95 - ste WS - loe WS - loc 95 - bne *1 -OK - loc 125 - ste 0 - loc 125 - loe 0 - bne *1 -OK - loc 95 - ste 0 - loe WS - loe 0 - bne *1 -OK - zre WS - loe WS - loc 0 - bne *1 -OK - ine WS - loe WS - loc 1 - bne *1 - loc -1 - ste WS - ine WS - loe WS - loc 0 - bne *1 -OK - dee WS - loe WS - loc -1 - bne *1 - loc 1 - ste WS - dee WS - loe WS - loc 0 - bne *1 -OK -TEST 006: test named global data: con, rom, bss -a006 - bss 4,0,0 -b006 - con 35 -c006 - bss 4,0,1 -d006 - bss 4,35,1 -e006 - rom 57 -MAIN 0 - loc 0 - loe c006 - bne *1 -OK - loe b006 - ste a006 - loe d006 - loe a006 - bne *1 -OK - loe e006 - ste b006 - loc 57 - loe b006 - bne *1 -OK -TEST 007: test lal, lil -MAIN 12 - lal -8 -#if WS == PS - stl -4 -#endif -#if WS*2 == PS - sdl -4 -#endif - loc 117 - stl -8 - lil -4 - loc 117 - bne *1 -OK -TEST 008: test sil -MAIN 12 - loc 29 - stl -8 - lal -8 -#if WS == PS - stl -4 -#endif -#if WS*2 == PS - sdl -4 -#endif - loc 110 - sil -4 - lol -8 - loc 110 - bne *1 -OK -TEST 009: test lof -MAIN 20 - lal -16 - loc 120 - stl -12 - lof 4 - loc 120 - bne *1 -OK -TEST 010: test stf -MAIN 20 - loc 180 - lal -16 - stf 8 - lol -8 - loc 180 - bne *1 -OK -TEST 011: test loi WS -MAIN 20 - loc 140 - stl -12 - lal -12 - loi WS - loc 140 - bne *1 -OK -TEST 012: test loi 1, sti 1 -MAIN 20 -#if WS > 1 - loc 519 - lal -10 - sti 1 - lal -10 - loi 1 - loc 7 - bne *1 - loc 129 - lal -10 - sti 1 - lal -10 - loi 1 - loc 9 - ble *1 -OK -#endif -TEST 013: test loi 2*WS -MAIN 20 - loc 77 - stl -12 - loc 119 - stl -12+WS - lal -12 - loi 2*WS - loc 77 - bne *1 - loc 119 - bne *1 -OK -TEST 014: test loi 4*WS -MAIN 20 - loc 150 - stl -16+(3*WS) - loc 152 - stl -16+(2*WS) - loc 154 - stl -16+WS - loc 156 - stl -16 - lal -16 - loi 4*WS - loc 156 - bne *1 - loc 154 - bne *1 - loc 152 - bne *1 - loc 150 - bne *1 -OK -TEST 015: test los WS with 1 -MAIN 20 - loc 7 - lal -10 - sti 1 - lal -10 - loc 1 - los WS - loc 7 - bne *1 -OK -TEST 016: test los WS with 2 -MAIN 20 -#if WS == 1 - loc 77 - loc 78 - stl -12 - stl -11 - lal -12 - loc 2 - los WS - loc 78 - bne *1 - loc 77 - bne *1 -#endif -#if WS > 1 - loc 777 - lal -12 - sti 2 - lal -12 - loc 2 - los WS - loc 777 - bne *1 -#endif -OK -TEST 017: test los WS with 4 -MAIN 20 -#if WS == 1 - loc 14 - loc 15 - loc 16 - loc 17 - lal -12 - sti 4 - lal -12 - loc 4 - los WS - loc 17 - bne *1 - loc 16 - bne *1 - loc 15 - bne *1 - loc 14 - bne *1 -#endif -#if WS == 2 - loc 3001 - loc 3002 - stl -12 - stl -10 - lal -12 - loc 4 - los WS - loc 3002 - bne *1 - loc 3001 - bne *1 -#endif -#if WS > 2 - loc 123001 - stl -12 - lal -12 - loc 4 - los WS - loc 123001 - bne *1 -#endif -OK -TEST 018: test ldl -MAIN 20 - loc 77 - stl -12 - loc 123 - stl -12+WS - ldl -12 - loc 77 - bne *1 - loc 123 - bne *1 -OK -TEST 019: test lde -HOL 20 -MAIN 0 - loc 70 - ste 12 - loc 71 - ste 12+WS - lde 12 - loc 70 - bne *1 - loc 71 - bne *1 -OK -TEST 020: test ldf -MAIN 20 - loc 123 - loc 77 - sdl -8 - lal -13 - ldf 5 - loc 77 - bne *1 - loc 123 - bne *1 -OK -TEST 021: test ldf -MAIN 20 - loc 75 - stl -12 - loc 77 - stl -12+WS - lal -20 - ldf 8 - loc 75 - bne *1 - loc 77 - bne *1 -OK -TEST 022: test sdl -MAIN 20 - loc 30 - loc 31 - sdl -12 - lol -12+WS - loc 30 - bne *1 - lol -12 - loc 31 - bne *1 -OK -TEST 023: test sde -HOL 24 -MAIN 0 - loc 40 - loc 41 - sde 16 - loe 16 - loc 41 - bne *1 - loe 16+WS - loc 40 - bne *1 -OK -TEST 024: test sdf -HOL 24 -MAIN 0 - loc 51 - loc 50 - lae 6 - sdf 10 - loe 16 - loc 50 - bne *1 - loc 51 - loe 16+WS - bne *1 -OK -TEST 025: test sti 1 -MAIN 20 - loc 4136 - lal -11 - sti 1 - loc 1034 - lal -10 - sti 1 - lal -10 - loi 1 - loc 10 - bne *1 - lal -11 - loi 1 - loc 40 - bne *1 -OK -TEST 026: test sti 1 and lol -MAIN 20 - loc 257 - stl -12+WS - loc 514 - stl -12-WS - loc 50 - lal -12 - sti 1 -#if WS > 1 - loc -50 - lal -11 - sti 1 -#endif -#if WS > 2 - loc 49 - lal -10 - sti 1 - loc -49 - lal -9 - sti 1 -#endif - loc 257 - lol -12+WS - bne *1 - loc 514 - lol -12-WS - bne *1 -OK -TEST 027: test sti 2 -MAIN 20 -#if WS == 1 - ldc 90 -#else - loc 90 -#endif - lal -12 - sti 2 -#if WS != 2 - lal -12 - loi 2 -#else - lol -12 -#endif -#if WS == 1 - ldc 90 -#else - loc 90 -#endif - bne *1 -OK -TEST 028: test sti 4 -MAIN 20 -#if WS == 1 - loc 100 - loc 101 -#endif -#if WS <= 2 - loc 102 -#endif - loc 103 - lal -16 - sti 4 - lol -16 - loc 103 - bne *1 -OK -TEST 029: test sts WS with 1 -MAIN 20 - loc 57 - lal -13 - sti 1 - loc 123 - lal -11 - sti 1 - loc 20 - lal -12 - loc 1 - sts WS - lal -12 - loi 1 - loc 20 - bne *1 - lal -11 - loi 1 - loc 123 - bne *1 - lal -13 - loi 1 - loc 57 - bne *1 -OK -TEST 030: test sts WS with WS -MAIN 20 - loc 210 - lal -12 - loc WS - sts WS - lol -12 - loc 210 - bne *1 -OK -TEST 031: test sts WS with 4 -MAIN 20 -#if WS == 1 - loc 100 - loc 101 -#endif -#if WS <= 2 - loc 102 -#endif - loc 103 - lal -16 - loc 4 - sts WS - lol -16 - loc 103 - bne *1 -OK -TEST 032: test adi WS -MAIN 0 - loc 1 - loc 1 - adi WS - loc 2 - bne *1 -OK - loc 5 - loc -6 - adi WS - loc -1 - bne *1 -OK -#if WS >= 2 - loc -1007 - loc +999 - adi WS - loc -8 - bne *1 -OK - loc -1300 - loc +1405 - adi WS - loc 105 - bne *1 -OK - loc -30000 - loc -20 - adi WS - loc -30020 - bne *1 -OK -#endif -#if WS >= 4 - loc -100007 - loc +99999 - adi WS - loc -8 - bne *1 -OK - loc -1300 - loc +140567 - adi WS - loc 139267 - bne *1 -OK - loc -30000000 - loc -20 - adi WS - loc -30000020 - bne *1 -OK -#endif -TEST 033: test sbi WS -MAIN 0 - loc 1 - loc 1 - sbi WS - loc 0 - bne *1 -OK - loc 5 - loc -6 - sbi WS - loc +11 - bne *1 -OK -#if WS >= 2 - loc -1007 - loc +999 - sbi WS - loc -2006 - bne *1 -OK - loc -1300 - loc +1405 - sbi WS - loc -2705 - bne *1 -OK - loc -30000 - loc -20 - sbi WS - loc -29980 - bne *1 -OK -#endif -#if WS >= 4 - loc -100007 - loc +99999 - sbi WS - loc -200006 - bne *1 -OK - loc -1300 - loc +140567 - sbi WS - loc -141867 - bne *1 -OK - loc -30000000 - loc -20 - sbi WS - loc -29999980 - bne *1 -OK -#endif -TEST 034: test mli WS -MAIN 0 - loc 5 - loc 9 - mli WS - loc 45 - bne *1 -OK - loc 0 - loc 10 - mli WS - loc 0 - bne *1 -OK -#if WS > 1 - loc -99 - loc 7 - mli WS - loc -693 - bne *1 -OK - loc -100 - loc -90 - mli WS - loc 9000 - bne *1 -OK -#endif -#if WS > 2 - loc 123456 - loc 200 - mli WS - loc 24691200 - bne *1 -OK -#endif -TEST 035: test dvi WS -MAIN 0 - loc 15 - loc 5 - dvi WS - loc 3 - bne *1 -OK - loc 100 - loc -7 - dvi WS - loc -14 - bne *1 -OK - loc -100 - loc 7 - dvi WS - loc -14 - bne *1 -OK -#if WS >= 2 - loc -1000 - loc -201 - dvi WS - loc 4 - bne *1 -OK -#endif -#if WS >= 4 - loc 1234567 - loc -100 - dvi WS - loc -12345 - bne *1 -OK -#endif -TEST 036: test rmi WS -MAIN 0 - loc 100 - loc 7 - rmi WS - loc 2 - bne *1 -OK - loc -100 - loc 7 - rmi WS - loc -2 - bne *1 -OK - loc 100 - loc -7 - rmi WS - loc 2 - bne *1 -OK - loc -100 - loc -7 - rmi WS - loc -2 - bne *1 -OK -#if WS >= 2 - loc -1000 - loc -201 - rmi WS - loc -196 - bne *1 -OK -#endif -#if WS >= 4 - loc 1234567 - loc -100 - rmi WS - loc 67 - bne *1 -OK -#endif -TEST 037: test ngi WS -MAIN 0 - loc 100 - ngi WS - loc -100 - bne *1 -OK - loc -100 - ngi WS - loc 100 - bne *1 -OK -TEST 038: test sli,slu WS -MAIN 0 - loc 5 - loc 3 - sli WS - loc 40 - bne *1 -OK - loc -2 - loc 4 - sli WS - loc -32 - bne *1 -OK - loc 5 - loc 3 - slu WS - loc 40 - bne *1 -OK - loc -2 - loc 4 - slu WS - loc -32 - bne *1 -OK -TEST 039: test sri,sru WS -MAIN 0 - loc 64 - loc 5 - sri WS - loc 2 - bne *1 -OK - loc -3 - loc 2 - sri WS - loc -1 - bne *1 -OK - loc -16 - loc 3 - sri WS - loc -2 - bne *1 -OK - loc 64 - loc 5 - sru WS - loc 2 - bne *1 -OK - loc -3 - loc 2 - sru WS -#if WS == 1 - loc 63 -#endif -#if WS == 2 - loc 16383 -#endif -#if WS == 4 - loc 1073741823 -#endif - bne *1 -OK -TEST 040: test rol WS -MAIN 0 - loc 1 - loc 3 - rol WS - loc 8 - bne *1 -OK -#if WS > 1 - loc 16384 - loc WS * 8 - 1 - rol WS - loc 8192 - bne *1 -OK - loc -2 - loc WS * 8 - 1 - rol WS -#if WS == 2 - loc 32767 -#endif -#if WS == 4 - loc 2147483647 -#endif - bne *1 -OK - loc -28671 - loc 0 - rol WS - loc -28671 - bne *1 -OK -#endif -TEST 041: test ror WS -MAIN 0 - loc 4 - loc 5 - ror WS -#if WS == 1 - loc 32 -#endif -#if WS == 2 - loc 8192 -#endif -#if WS == 4 - loc 536870912 -#endif - bne *1 -OK -#if WS == 2 - loc 32767 - loc 15 - ror WS - loc -2 - bne *1 -OK -#endif - loc -28 - loc 0 - ror WS - loc -28 - bne *1 -OK -TEST 042: test inc -MAIN 0 - loc 20 - inc - loc 21 - bne *1 -OK - loc -50 - inc - loc -49 - bne *1 -OK -TEST 043: test dec -MAIN 0 - loc 66 - dec - loc 65 - bne *1 -OK - loc -44 - dec - loc -45 - bne *1 -OK -TEST 044: test adp -MAIN 20 - lal -10 - adp -2 - lal -12 - cmp - zne *1 -OK - loc 519 - lal -11 - sti 1 - lal -12 - adp 1 - loi 1 - loc 7 - bne *1 -OK -TEST 045: test inn WS -MAIN 0 - loc 8 - loc 3 - inn WS - loc 1 - bne *1 -OK - loc 8 - loc 4 - inn WS - loc 0 - bne *1 -OK - loc 2 - loc -5 - inn WS - loc 0 - bne *1 -OK - loc -5 - loc 3 - inn WS - loc 1 - bne *1 -OK -TEST 046: test inn ? -MAIN 0 - loc 8 - loc 3 - loc WS - inn ? - loc 1 - bne *1 -OK - loc 8 - loc 4 - loc WS - inn ? - loc 0 - bne *1 -OK - loc 2 - loc -5 - loc WS - inn ? - loc 0 - bne *1 -OK - loc -5 - loc 3 - loc WS - inn ? - loc 1 - bne *1 -OK -TEST 047: test set -MAIN 0 - loc 3 - set WS - loc 8 - bne *1 -OK - loc 6 - set 2*WS - loc 64 - bne *1 - loc 0 - bne *1 -OK - loc 0 - set 4*WS - loc 1 - bne *1 - loc 0 - bne *1 - loc 0 - bne *1 - loc 0 - bne *1 -OK -TEST 048: test set ? -MAIN 0 - loc 3 - loc WS - set ? - loc 8 - bne *1 -OK - loc 6 - loc 2*WS - set ? - loc 64 - bne *1 - loc 0 - bne *1 -OK - loc 0 - loc 4*WS - set ? - loc 1 - bne *1 - loc 0 - bne *1 - loc 0 - bne *1 - loc 0 - bne *1 -OK -TEST 049: test aar -a049 - rom 5,2,4 -b049 - con 5,2,1 -MAIN 20 - lal -12 - loc 5 - lae a049 - aar WS - lal -12 - cmp - zne *1 -OK - lal -11 - loc 7 - lae b049 - aar WS - lal -9 - cmp - zne *1 -OK -TEST 050: test aar ? -a050 - rom 5,2,4 -b050 - con 5,2,1 -MAIN 20 - lal -12 - loc 5 - lae a050 - loc WS - aar ? - lal -12 - cmp - zne *1 -OK - lal -11 - loc 7 - lae b050 - loc WS - aar ? - lal -9 - cmp - zne *1 -OK -TEST 051: test lar -a051 - rom 5,2,2*WS -b051 - con 5,2,1 -MAIN 20 - loc 40 - loc 41 - sdl -16+(2*WS) - lal -16 - loc 6 - lae a051 - lar WS - loc 41 - bne *1 - loc 40 - bne *1 -OK - loc 42 - lal -13 - sti 1 - lal -15 - loc 7 - lae b051 - lar WS - lal -13 - loi 1 - bne *1 -OK -TEST 052: test lar ? -a052 - rom 5,2,2*WS -b052 - con 5,2,1 -MAIN 20 - loc 40 - loc 41 - sdl -16+(2*WS) - lal -16 - loc 6 - lae a052 - loc WS - lar ? - loc 41 - bne *1 - loc 40 - bne *1 -OK - loc 42 - lal -13 - sti 1 - lal -15 - loc 7 - lae b052 - loc WS - lar ? - lal -13 - loi 1 - bne *1 -OK -TEST 053: test sar -a053 - rom 5,2,3*WS -b053 - con 5,2,1 -MAIN 28 : assert WS <= 8 - loc 39 - loc 40 - loc 41 - lal -28 - loc 6 - lae a053 - sar WS - lal -28+(3*WS) - loi 3*WS - loc 41 - bne *1 - loc 40 - bne *1 - loc 39 - bne *1 -OK - loc -2 - lal -11 - loc 7 - lae b053 - sar WS - lal -9 - loi 1 - loc 254 - bne *1 -OK -TEST 054: test sar ? -a054 - rom 5,2,3*WS -b054 - con 5,2,1 -MAIN 28 - loc 39 - loc 40 - loc 41 - lal -28 - loc 6 - lae a054 - loc WS - sar ? - lal -28+(3*WS) - loi 3*WS - loc 41 - bne *1 - loc 40 - bne *1 - loc 39 - bne *1 -OK - loc -2 - lal -11 - loc 7 - lae b054 - loc WS - sar ? - lal -9 - loi 1 - loc 254 - bne *1 -OK -TEST 055: test tlt -MAIN 0 - loc 1 - tlt - loc 0 - bne *1 -OK - loc 0 - tlt - loc 0 - bne *1 -OK - loc -3 - tlt - loc 1 - bne *1 -OK -TEST 056: test tle -MAIN 0 - loc 8 - tle - loc 0 - bne *1 -OK - loc 0 - tle - loc 1 - bne *1 -OK - loc -19 - tle - loc 1 - bne *1 -OK -TEST 057: test teq -MAIN 0 - loc 17 - teq - loc 0 - bne *1 -OK - loc 0 - teq - loc 1 - bne *1 -OK - loc -100 - teq - loc 0 - bne *1 -OK -TEST 058: test tne -MAIN 0 - loc 76 - tne - loc 1 - bne *1 -OK - loc 0 - tne - loc 0 - bne *1 -OK - loc -99 - tne - loc 1 - bne *1 -OK -TEST 059: test tge -MAIN 0 - loc 14 - tge - loc 1 - bne *1 -OK - loc 0 - tge - loc 1 - bne *1 -OK - loc -76 - tge - loc 0 - bne *1 -OK -TEST 060: test tgt -MAIN 0 - loc 20 - tgt - loc 1 - bne *1 -OK - loc 0 - tgt - loc 0 - bne *1 -OK - loc -66 - tgt - loc 0 - bne *1 -OK -TEST 061: test cmi WS -MAIN 0 - loc 10 - loc 20 - cmi WS - tlt - loc 1 - bne *1 -OK - loc 20 - loc 10 - cmi WS - tgt - loc 1 - bne *1 -OK - loc 44 - loc 44 - cmi WS - loc 0 - bne *1 -OK -TEST 062: test cmp, adp, cmu -MAIN 20 - lal -5 - lal -2 - cmp - tlt - loc 1 - bne *1 -OK - lal -2 - lal -5 - cmp - tle - loc 0 - bne *1 -OK - lal -2 - lal -2 - cmp - loc 0 - bne *1 -OK - lal -5 - adp 3 - lal -2 - cmp - loc 0 - bne *1 -OK - lal -5 - adp 4 - lal -2 - cmp - tge - loc 1 - bne *1 -OK - loc 6 - loc 6 - cmu WS - loc 0 - bne *1 -OK - loc 17 - loc 27 - cmu WS - tlt - loc 1 - bne *1 -OK -#ifdef W2S - loc 17 - loc 6 - loc 27 - loc 6 - cmu W2S - tlt - loc 1 - bne *1 -OK - loc 6 - loc 27 - loc 6 - loc 17 - cmu W2S - tgt - loc 1 - bne *1 -OK -#endif -TEST 063: test cms, zer -MAIN 0 - loc 6 - loc -10 - loc -125 - loc 30 - loc 6 - loc -10 - loc -125 - loc 30 - loc 4*WS - cms - teq - loc 1 - bne *1 -OK - loc 6 - loc -10 - loc -125 - loc 30 - loc 6 - loc -10 - loc -125 - loc 30 - cms 4*WS - teq - loc 1 - bne *1 -OK - loc 6 - loc -10 - loc -125 - loc 30 - loc 6 - loc -10 - loc -126 - loc 30 - loc 4*WS - cms - tne - loc 1 - bne *1 -OK - loc 6 - loc -10 - loc -125 - loc 30 - loc 6 - loc -10 - loc -126 - loc 30 - cms 4*WS - tne - loc 1 - bne *1 -OK - loc 0 - loc 0 - loc 0 - loc 0 - zer 4*WS - cms 4*WS - teq - loc 1 - bne *1 -OK -TEST 064: test blt -MAIN 0 - loc 3 - loc 4 - blt *1064 - bra *1 -1064 - loc 4 - loc 3 - blt *1 - loc -5 - loc -4 - blt *2064 - bra *1 -2064 - loc -4 - loc -5 - blt *1 - loc 4 - loc 4 - blt *1 -OK -TEST 065: test ble -MAIN 0 - loc 3 - loc 4 - ble *1065 - bra *1 -1065 - loc 4 - loc 3 - ble *1 - loc -99 - loc -5 - ble *2065 - bra *1 -2065 - loc -99 - loc -99 - ble *3065 - bra *1 -3065 - loc 100 - loc -100 - ble *1 -OK -TEST 066: test beq -MAIN 0 - loc 3 - loc 3 - beq *1066 - bra *1 -1066 - loc 3 - loc 4 - beq *1 - loc -18 - loc -17 - beq *1 - loc 0 - loc 0 - beq *2066 - bra *1 -2066 -OK -TEST 067: test bne -MAIN 0 - loc 50 - loc 50 - bne *1 - loc 50 - loc 51 - bne *1067 - bra *1 -1067 - loc 0 - loc 0 - bne *1 -OK -TEST 068: test bge -MAIN 0 - loc 100 - loc 101 - bge *1 - loc 101 - loc 100 - bge *1068 - bra *1 -1068 - loc -100 - loc -99 - bge *1 - loc -100 - loc 100 - bge *1 - loc 0 - loc 0 - bge *2068 - bra *1 -2068 - loc 50 - loc 50 - bge *3068 - bra *1 -3068 -OK -TEST 069: test bgt -MAIN 0 - loc 3 - loc 10 - bgt *1 - loc 10 - loc 3 - bgt *1069 - bra *1 -1069 - loc -100 - loc -50 - bgt *1 - loc -100 - loc 50 - bgt *1 - loc 5 - loc 5 - bgt *1 -OK -TEST 070: test zlt -MAIN 0 - loc 4 - zlt *1 - loc -4 - zlt *1070 - bra *1 -1070 - loc 0 - zlt *1 -OK -TEST 071: test zle -MAIN 0 - loc 4 - zle *1 - loc -4 - zlt *1071 - bra *1 -1071 - loc 0 - zle *2071 - bra *1 -2071 -OK -TEST 072: test zeq -MAIN 0 - loc 4 - zeq *1 - loc -4 - zeq *1 - loc 0 - zeq *1072 - bra *1 -1072 -OK -TEST 073: test zne -MAIN 0 - loc 4 - zne *1073 - bra *1 -1073 - loc -4 - zne *2073 - bra *1 -2073 - loc 0 - zne *1 -OK -TEST 074: test zge -MAIN 0 - loc 4 - zge *1074 - bra *1 -1074 - loc -4 - zge *1 - loc 0 - zge *2074 - bra *1 -2074 -OK -TEST 075: test zgt -MAIN 0 - loc 4 - zgt *1075 - bra *1 -1075 - loc -4 - zgt *1 - loc 0 - zgt *1 -OK -TEST 076: test asp -MAIN 12 - loc 104 - loc 4 - loc 5 - loc 6 - asp 2*WS - stl -4 - loc 104 - bne *1 -OK -TEST 077: test cal -HOL 8 -MAIN 0 - cal $p077 - loe 4 - loc 34 - bne *1 -OK -PROC - pro $p077,0 - loc 34 - ste 4 - ret 0 - end -TEST 078: test cai -HOL 8 -MAIN 0 - lpi $p078 - cai - loe 4 - loc 34 - bne *1 -OK -PROC - pro $p078,0 - loc 34 - ste 4 - ret 0 - end -TEST 079: test ads WS -MAIN 20 - lal -6 - loc -2 - ads WS - lal -8 - cmp - zne *1 -OK - lal -6 - loc 2 - ads WS - lal -4 - cmp - zne *1 -OK -; test adp - lal -5 - adp 1 - lal -4 - cmp - zne *1 -OK -TEST 080: test sbs -HOL 12 -MAIN 20 - lal -4 - lal -6 - sbs WS - loc 2 - bne *1 -OK - lae 7 - lae 10 - sbs WS - loc -3 - bne *1 -OK -TEST 081: test lor -#define LB 0 -#define SP 1 -MAIN 20 - lor LB - lxl 0 - cmp - zne *1 -OK - loc 31 - lor SP - loi WS - bne *1 -OK - lor SP - lal -12 - sti PS - asp -4 - lor SP - lal -8 - sti PS - asp 4 - lal -12 - loi PS - lal -8 - loi PS - sbs WS - loc 4 - bne *1 -OK -TEST 082: test ass WS -#define SP 1 -MAIN 20 - loc 104 - loc 50 - loc 60 - loc 2*WS - ass WS - loc 104 - bne *1 -OK - lor SP - lal -8 - sti PS - loc -24 - ass WS - lor SP - lal -4 - sti PS - lal -8 - loi PS - lal -4 - loi PS - sbs WS - loc 24 - bne *1 - loc 24 - ass WS -OK - lor SP - lal -8 - loi PS - cmp - zne *1 -OK -TEST 083: test blm -HOL 28 -MAIN 32 - loc 61 - ste 12 - loc 0 - stl -4 - lae 12 - lal -4 - blm WS - lol -4 - loc 61 - bne *1 -OK - loc 44 - loc 43 - loc 42 - loc 41 - lal -20 - sti 4*WS - lal -20 - lae 8 - blm 4*WS - loe 8 - loc 41 - bne *1 - loe 8+WS - loc 42 - bne *1 - loe 8+(2*WS) - loc 43 - bne *1 - loe 8+(3*WS) - loc 44 - bne *1 -OK -TEST 084: test bls WS -HOL 28 -MAIN 32 - loc 20 - stl -8 - lal -8 - lae 20 - loc WS - bls WS - loe 20 - loc 20 - bne *1 -OK - loc 55 - stl -8+WS - loc 56 - stl -8 - lal -8 - lae 20 - loc 2*WS - bls WS - loe 20 - loc 56 - bne *1 - loe 20+WS - loc 55 - bne *1 -OK -TEST 085: test dup -MAIN 0 - loc 199 - dup WS - bne *1 -OK - loc 130 - loc 150 - dup 2*WS - loc 150 - bne *1 - loc 130 - bne *1 - loc 150 - bne *1 - loc 130 - bne *1 -OK -; test dus WS - loc 400 - loc 399 - loc 2*WS - dus WS - loc 399 - bne *1 - loc 400 - bne *1 - loc 399 - bne *1 - loc 400 - bne *1 -OK -TEST 087: test rck -a087 - con 10,14 -MAIN 12 - loc 10 - lae a087 - rck WS - inc - lae a087 - rck WS - inc - lae a087 - rck WS - inc - lae a087 - rck WS - inc - lae a087 - rck WS - stl -4 -OK -TEST 088: test csa -MAIN 20 -a088 - con *1088,4,2,*3088,*2088,*4088 - zrl -8 - loc 4 -5088 - inc - dup WS - lae a088 - csa WS - bra *1 -3088 - bra *1 -2088 - lol -8 - loc 3 - adi WS - stl -8 - bra *5088 -1088 - inl -8 - bra *6088 -4088 - lol -8 - loc 2 - adi WS - stl -8 - bra *5088 -6088 - loc 7 - bne *1 -OK - lol -8 - loc 6 - bne *1 -OK -TEST 089: test csb -MAIN 20 -b089 - rom *1089,3,-1,*3089,0,*2089,1,*4089 - loc -1 - zrl -12 -5089 - inc - dup WS - lae b089 - csb WS - bra *1 -3089 - bra *1 -2089 - lol -12 - loc 3 - adi WS - stl -12 - bra *5089 -1089 - inl -12 - bra *6089 -4089 - lol -12 - loc 2 - adi WS - stl -12 - bra *5089 -6089 - loc 2 - bne *1 -OK - lol -12 - loc 6 - bne *1 -OK -TEST 090: now test loi again, because it is so tricky -MAIN 20 - loc 256 - lal -3 - sti 1 - loc 1 - lal -4 - sti 1 - loc 517 - lal -5 - sti 1 - loc 2 - lal -6 - sti 1 - loc 1030 - lal -8 - sti 2 - loc 4 - lal -9 - sti 1 - lal -3 - loi 1 - loc 0 - bne *1 -OK - lal -3 - adp -1 - loi 1 - loc 1 - bne *1 -OK - lal -3 - adp -2 - loi 1 - loc 5 - bne *1 -OK - lal -3 - adp -3 - loi 1 - loc 2 - bne *1 -OK - lal -4 - adp -4 - loi 2 - loc 1030 - bne *1 -OK - lal -6 - adp -3 - loi 1 - loc 4 - bne *1 -OK -TEST 091: now test sti 1 again -MAIN 12 - loc 3 - loc 2 - loc 1 - loc 7 - loc 8 - loc 10 - lal -4 - sti 1 - lal -4 - adp -1 - sti 1 - lal -4 - adp -2 - sti 1 - lal -4 - adp -3 - sti 1 - lal -4 - adp -4 - sti 1 - lal -4 - adp -5 - sti 1 - lal -4 - loi 1 - loc 10 - bne *1 -OK - lal -6 - loi 1 - loc 7 - bne *1 -OK - lal -8 - loi 1 - loc 2 - bne *1 -OK -TEST 092: test ldc and cii -MAIN 0 -#ifdef W2S - ldc 0 - bne *1 -OK - ldc 1 - loc 1 - loc WS - loc W2S - cii - cmi W2S - zne *1 -OK - ldc -1 - loc -1 - loc WS - loc W2S - cii - cmi W2S - zne *1 -OK - loc -1 - ldc -1 - loc W2S - loc WS - cii - cmi WS - zne *1 -OK -#if WS == 1 - ldc -127 - loc -127 -#endif -#if WS == 2 - ldc -32767 - loc -32767 -#endif -#if WS == 4 - ldc -2147483647 - loc -2147483647 -#endif - loc WS - loc W2S - cii - cmi W2S - zne *1 -OK - ldc -1 - loc -1 - bne *1 -OK - loc -1 - bne *1 -OK - ldc 1 - zeq *1092 -; low order part on top of stack - loc 0 - bne *1 -OK - bra *2092 -; high order part on top of stack -1092 - loc 1 - bne *1 -OK -2092 -#endif -TEST 093: test cii -MAIN 0 - loc 123 - dup WS - loc WS - dup WS - cii - bne *1 -OK -#ifdef W2S - loc 0 - loc WS - loc W2S - cii - loc 0 - bne *1 - loc 0 - bne *1 -OK -; dynamically determine format of doubles - loc 1 - loc WS - loc W2S - cii - zeq *1093 -; low order part on top of stack - loc 0 - bne *1 -OK - loc -3 - loc WS - loc W2S - cii - loc -3 - bne *1 - loc -1 - bne *1 -OK - bra *2093 -; high order part on top of stack -1093 - loc 1 - bne *1 -OK - loc -3 - loc WS - loc W2S - cii - loc -1 - bne *1 - loc -3 - bne *1 -OK -2093 -#endif -TEST 094: test adi W2S -MAIN 0 -#ifdef W2S - ldc 1 - ldc 1 - adi W2S - ldc 2 - cmi W2S - zne *1 -OK - ldc 5 - ldc -6 - adi W2S - ldc -1 - cmi W2S - zne *1 -OK - ldc -1007 - ldc +999 - adi W2S - ldc -8 - cmi W2S - zne *1 -OK - ldc -1300 - ldc +1405 - adi W2S - ldc 105 - cmi W2S - zne *1 -OK - ldc -30000 - ldc -20 - adi W2S - ldc -30020 - cmi W2S - zne *1 -OK -#if WS >= 2 - ldc -100007 - ldc +99999 - adi W2S - ldc -8 - cmi W2S - zne *1 -OK - ldc -1300 - ldc +140567 - adi W2S - ldc 139267 - cmi W2S - zne *1 -OK - ldc -30000000 - ldc -20 - adi W2S - ldc -30000020 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 095: test sbi W2S -MAIN 0 -#ifdef W2S - ldc 1 - ldc 1 - sbi W2S - ldc 0 - cmi W2S - zne *1 -OK - ldc 5 - ldc -6 - sbi W2S - ldc +11 - cmi W2S - zne *1 -OK - ldc -1007 - ldc +999 - sbi W2S - ldc -2006 - cmi W2S - zne *1 -OK - ldc -1300 - ldc +1405 - sbi W2S - ldc -2705 - cmi W2S - zne *1 -OK - ldc -30000 - ldc -20 - sbi W2S - ldc -29980 - cmi W2S - zne *1 -OK -#if WS >= 2 - ldc -100007 - ldc +99999 - sbi W2S - ldc -200006 - cmi W2S - zne *1 -OK - ldc -1300 - ldc +140567 - sbi W2S - ldc -141867 - cmi W2S - zne *1 -OK - ldc -30000000 - ldc -20 - sbi W2S - ldc -29999980 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 096: test mli W2S -MAIN 0 -#ifdef W2S - ldc 5 - ldc 9 - mli W2S - ldc 45 - cmi W2S - zne *1 -OK - ldc 0 - ldc 10 - mli W2S - ldc 0 - cmi W2S - zne *1 -OK - ldc -99 - ldc 7 - mli W2S - ldc -693 - cmi W2S - zne *1 -OK - ldc -100 - ldc -90 - mli W2S - ldc 9000 - cmi W2S - zne *1 -OK -#if WS >= 2 - ldc 123456 - ldc 200 - mli W2S - ldc 24691200 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 097: test dvi W2S -MAIN 0 -#ifdef W2S - ldc 15 - ldc 5 - dvi W2S - ldc 3 - cmi W2S - zne *1 -OK - ldc 100 - ldc -7 - dvi W2S - ldc -14 - cmi W2S - zne *1 -OK - ldc -100 - ldc 7 - dvi W2S - ldc -14 - cmi W2S - zne *1 -OK - ldc -1000 - ldc -201 - dvi W2S - ldc 4 - cmi W2S - zne *1 -OK -#if WS >= 2 - ldc 1234567 - ldc -100 - dvi W2S - ldc -12345 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 098: test rmi W2S -MAIN 0 -#ifdef W2S - ldc 100 - ldc 7 - rmi W2S - ldc 2 - cmi W2S - zne *1 -OK - ldc -100 - ldc 7 - rmi W2S - ldc -2 - cmi W2S - zne *1 -OK - ldc 100 - ldc -7 - rmi W2S - ldc 2 - cmi W2S - zne *1 -OK - ldc -100 - ldc -7 - rmi W2S - ldc -2 - cmi W2S - zne *1 -OK - ldc -1000 - ldc -201 - rmi W2S - ldc -196 - cmi W2S - zne *1 -OK -#if WS >= 4 - ldc 1234567 - ldc -100 - rmi W2S - ldc 67 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 099: test and -MAIN 0 - loc 68 - loc 65 - and WS - loc 64 - bne *1 -OK - loc 17 - loc 34 - loc 3 - loc 36 - and 2*WS - loc 32 - bne *1 - loc 1 - bne *1 -OK - loc 17 - loc 34 - loc 68 - loc -120 - loc 1 - loc 37 - loc 12 - loc -127 - and 4*WS - loc -128 - bne *1 - loc 4 - bne *1 - loc 32 - bne *1 - loc 1 - bne *1 -OK -TEST 100: test ior -MAIN 0 - loc 68 - loc 65 - ior WS - loc 69 - bne *1 -OK - loc 17 - loc 34 - loc 3 - loc 36 - ior 2*WS - loc 38 - bne *1 - loc 19 - bne *1 -OK - loc 17 - loc 34 - loc 68 - loc -120 - loc 1 - loc 37 - loc 12 - loc -127 - ior 4*WS - loc -119 - bne *1 - loc 76 - bne *1 - loc 39 - bne *1 - loc 17 - bne *1 -OK -TEST 101: test xor -MAIN 0 - loc 68 - loc 65 - xor WS - loc 5 - bne *1 -OK - loc 17 - loc 34 - loc 3 - loc 36 - xor 2*WS - loc 6 - bne *1 - loc 18 - bne *1 -OK - loc 17 - loc 34 - loc 68 - loc -120 - loc 1 - loc 37 - loc 12 - loc -127 - xor 4*WS - loc 9 - bne *1 - loc 72 - bne *1 - loc 7 - bne *1 - loc 16 - bne *1 -OK -TEST 102: test com -MAIN 0 - loc 68 - com WS - loc -69 - bne *1 -OK - loc 17 - loc 34 - com 2*WS - loc -35 - bne *1 - loc -18 - bne *1 -OK - loc 17 - loc 34 - loc 68 - loc -120 - com 4*WS - loc 119 - bne *1 - loc -69 - bne *1 - loc -35 - bne *1 - loc -18 - bne *1 -OK -TEST 103: test sli,slu W2S -MAIN 0 -#ifdef W2S - ldc 5 - loc 3 - sli W2S - ldc 40 - cmi W2S - zne *1 -OK - ldc -2 - loc 4 - sli W2S - ldc -32 - cmi W2S - zne *1 -OK - ldc 5 - loc 3 - slu W2S - ldc 40 - cmi W2S - zne *1 -OK - ldc -2 - loc 4 - slu W2S - ldc -32 - cmi W2S - zne *1 -OK -#endif -TEST 104: test sri,sru W2S -MAIN 0 -#ifdef W2S - ldc 64 - loc 5 - sri W2S - ldc 2 - cmi W2S - zne *1 -OK - ldc -3 - loc 2 - sri W2S - ldc -1 - cmi W2S - zne *1 -OK - ldc -16 - loc 3 - sri W2S - ldc -2 - cmi W2S - zne *1 -OK - ldc 64 - loc 5 - sru W2S - ldc 2 - cmi W2S - zne *1 -OK - ldc -3 - loc 2 - sru W2S -#if W2S == 2 - ldc 16383 -#endif -#if W2S == 4 - ldc 1073741823 -#endif - cmi W2S - zne *1 -OK -#endif -TEST 105: common test of double arithmetic -MAIN 0 -#ifdef W2S - ldc 1000 - ldc 10 - mli W2S - ldc 4 - dvi W2S - ldc 1500 - adi W2S - ldc 2856 - sbi W2S - ldc 100 - rmi W2S - ldc 44 - cmi W2S - zne *1 -OK -#endif -TEST 106: test cmi W2S -MAIN 0 -#if W2S==2 - ldc 64 - ldc 4 - mli W2S - ldc 63 - ldc 4 - mli W2S - cmi W2S - tge - loc 1 - bne *1 -OK -#endif -#if W2S==4 - ldc 16384 - ldc 4 - mli W2S - ldc 16383 - ldc 4 - mli W2S - cmi W2S - tge - loc 1 - bne *1 -OK -#endif -TEST 107: test cii W2S -> WS -MAIN 0 -#ifdef W2S - ldc 100 - loc W2S - loc WS - cii - loc 100 - bne *1 -OK - ldc 5000 - ldc -6 - mli W2S - ldc 1000 - dvi W2S - loc W2S - loc WS - cii - loc -30 - bne *1 -OK -#endif -TEST 108: test cif, cfi, adf FS -MAIN 0 -#ifdef FS - loc 100 - loc WS - loc FS - cif - loc 44 - loc WS - loc FS - cif - adf FS - loc FS - loc WS - cfi - loc 144 - bne *1 -OK - loc 65 - loc WS - loc FS - cif - loc -65 - loc WS - loc FS - cif - adf FS - loc FS - loc WS - cfi - loc 0 - bne *1 -OK -#endif -TEST 109: test cdf, cfd -MAIN 0 -#ifdef FS -#ifdef W2S - loc 55 - loc WS - loc W2S - cii - loc W2S - loc FS - cif - loc 55 - loc WS - loc FS - cif - cmf FS - zne *1 -OK - loc 24 - loc WS - loc FS - cif - loc FS - loc W2S - cfi - loc W2S - loc WS - cii - loc 24 - bne *1 -OK - loc 57 - loc WS - loc FS - cif - loc FS - loc W2S - cfi - loc W2S - loc WS - cii - loc 57 - bne *1 -OK - loc 40 - loc WS - loc W2S - cii - loc W2S - loc FS - cif - loc FS - loc WS - cfi - loc 40 - bne *1 -OK -#endif -#endif -TEST 110: test sbf FS -MAIN 0 -#ifdef FS - loc 100 - loc WS - loc FS - cif - loc 50 - loc WS - loc FS - cif - sbf FS - loc FS - loc WS - cfi - loc 50 - bne *1 -OK - loc 32 - loc WS - loc FS - cif - loc 101 - loc WS - loc FS - cif - sbf FS - loc -69 - loc WS - loc FS - cif - cmf FS - zne *1 -OK -#endif -TEST 111: test mlf FS -MAIN 0 -#ifdef FS - loc 4 - loc WS - loc FS - cif - loc 20 - loc WS - loc FS - cif - mlf FS - loc FS - loc WS - cfi - loc 80 - bne *1 -OK - loc -12 - loc WS - loc FS - cif - loc -9 - loc WS - loc FS - cif - mlf FS - loc FS - loc WS - cfi - loc 108 - bne *1 -OK -#endif -TEST 112: test dvf FS -MAIN 0 -#ifdef FS - loc 45 - loc WS - loc FS - cif - loc 9 - loc WS - loc FS - cif - dvf FS - loc 5 - loc WS - loc FS - cif - cmf FS - zne *1 -OK - loc -60 - loc WS - loc FS - cif - loc 7 - loc WS - loc FS - cif - dvf FS - loc FS - loc WS - cfi - loc -8 - bne *1 -OK -#endif -TEST 113: test fractions using FS floating arithmetic -MAIN 0 -#ifdef FS - loc 7 - loc WS - loc FS - cif - loc 2 - loc WS - loc FS - cif - dvf FS - loc FS - loc WS - cfi - loc 3 - bne *1 -OK - loc -7 - loc WS - loc FS - cif - loc 2 - loc WS - loc FS - cif - dvf FS - loc FS - loc WS - cfi - loc -3 - bne *1 -OK - loc 11 - loc WS - loc FS - cif - loc 2 - loc WS - loc FS - cif - dvf FS - loc 5 - loc WS - loc FS - cif - mlf FS - loc 4 - loc WS - loc FS - cif - mlf FS - loc 110 - loc WS - loc FS - cif - cmf FS - zne *1 -OK -#endif -TEST 114: test cif, cfi, adf F2S -MAIN 0 -#ifdef F2S - loc 10 - loc WS - loc F2S - cif - loc 44 - loc WS - loc F2S - cif - adf F2S - loc F2S - loc WS - cfi - loc 54 - bne *1 -OK - loc 65 - loc WS - loc F2S - cif - loc -65 - loc WS - loc F2S - cif - adf F2S - loc F2S - loc WS - cfi - loc 0 - bne *1 -OK -#endif -TEST 115: test cif, cfi W2S F2S -MAIN 0 -#ifdef F2S -#ifdef W2S - loc 55 - loc WS - loc W2S - cii - loc W2S - loc F2S - cif - loc 55 - loc WS - loc F2S - cif - cmf F2S - zne *1 -OK - loc 24 - loc WS - loc F2S - cif - loc F2S - loc W2S - cfi - loc W2S - loc WS - cii - loc 24 - bne *1 -OK - loc 57 - loc WS - loc F2S - cif - loc F2S - loc W2S - cfi - loc W2S - loc WS - cii - loc 57 - bne *1 -OK - loc 41 - loc WS - loc W2S - cii - loc W2S - loc F2S - cif - loc F2S - loc WS - cfi - loc 41 - bne *1 -OK -#endif -#endif -TEST 116: test sbf F2S -MAIN 0 -#ifdef F2S - loc 100 - loc WS - loc F2S - cif - loc 50 - loc WS - loc F2S - cif - sbf F2S - loc F2S - loc WS - cfi - loc 50 - bne *1 -OK - loc 32 - loc WS - loc F2S - cif - loc 101 - loc WS - loc F2S - cif - sbf F2S - loc -69 - loc WS - loc F2S - cif - cmf F2S - zne *1 -OK -#endif -TEST 117: test fmu F2S -MAIN 0 -#ifdef F2S - loc 4 - loc WS - loc F2S - cif - loc 20 - loc WS - loc F2S - cif - mlf F2S - loc F2S - loc WS - cfi - loc 80 - bne *1 -OK - loc -20 - loc WS - loc F2S - cif - loc -6 - loc WS - loc F2S - cif - mlf F2S - loc F2S - loc WS - cfi - loc 120 - bne *1 -OK -#endif -TEST 118: test dvf F2S -MAIN 0 -#ifdef F2S - loc 45 - loc WS - loc F2S - cif - loc 9 - loc WS - loc F2S - cif - dvf F2S - loc 5 - loc WS - loc F2S - cif - cmf F2S - zne *1 -OK - loc -60 - loc WS - loc F2S - cif - loc 7 - loc WS - loc F2S - cif - dvf F2S - loc F2S - loc WS - cfi - loc -8 - bne *1 -OK -#endif -TEST 119: test fractions using F2S floating arithmetic -MAIN 0 -#ifdef F2S - loc 7 - loc WS - loc F2S - cif - loc 2 - loc WS - loc F2S - cif - dvf F2S - loc F2S - loc WS - cfi - loc 3 - bne *1 -OK - loc -7 - loc WS - loc F2S - cif - loc 2 - loc WS - loc F2S - cif - dvf F2S - loc F2S - loc WS - cfi - loc -3 - bne *1 -OK - loc 11 - loc WS - loc F2S - cif - loc 2 - loc WS - loc F2S - cif - dvf F2S - loc 5 - loc WS - loc F2S - cif - mlf F2S - loc 4 - loc WS - loc F2S - cif - mlf F2S - loc 110 - loc WS - loc F2S - cif - cmf F2S - zne *1 -OK -#endif -TEST 120: test cal -HOL 8 -MAIN 0 - loc 0 - ste 4 - cal $p120 - loe 4 - loc 34 - bne *1 -OK -PROC - pro $p120,0 - loc 34 - ste 4 - ret 0 - end -TEST 121: test cal -MAIN 0 - cal $p121 - lfr WS - loc 7 - bne *1 -OK -PROC - pro $p121,0 - loc 7 - ret WS - end -TEST 122: test cal -MAIN 0 - loc 7 - cal $p122 - asp WS - lfr WS - loc 7 - bne *1 -OK -PROC - pro $p122,0 - lol 0 - ret WS - end -TEST 123: test cal -MAIN 4 - loc 7 - stl -4 - lor 0 - cal $p123 - asp PS - lfr WS - loc 7 - bne *1 -OK - lxl 0 - cal $p123 - asp PS - lfr WS - loc 7 - bne *1 -OK -PROC - pro $p123,0 - lxl 1 - lof -4 - ret WS - end -TEST 124: test cal -MAIN 0 - loc 7 - cal $p124 - asp WS -PROC - pro $p124,0 - lol 0 - loc 7 - bne *1 -OK - ret 0 -ERRLAB - end -TEST 125: test cal -MAIN 4 - loc 10 - stl -WS - loc 90 - lxl 0 - cal $p1125 - asp PS+WS -OK -PROC - pro $p1125,WS - lol PS - loc 90 - bne *1 -OK - loc 11 - stl -WS - loc 21 - loc 91 - lxl 0 - cal $p2125 - asp PS+WS - lfr WS -OK - loc 82 - bne *1 -OK - loc 21 - bne *1 -OK - ret 0 -ERRLAB - end - pro $p2125,0 - lol PS - loc 91 - bne *1 -OK - loc 12 - loc 92 - lxl 0 - cal $p3125 - asp PS+WS - lfr WS+WS -OK - loc 86 - bne *1 - loc 83 - bne *1 -OK - loc 12 - bne *1 -OK - lal PS - loi WS - loc 91 - bne *1 -OK - loc 82 - ret WS -ERRLAB - end - pro $p3125,WS - lol PS - loc 92 - bne *1 -OK - loc 13 - stl -WS - lxa 0 - adp PS - loi WS - loc 92 - bne *1 -OK - lxa 1 - adp PS - loi WS - loc 91 - bne *1 -OK - lxa 2 - adp PS - loi WS - loc 90 - bne *1 -OK - lxl 2 - lof -WS - loc 11 - bne *1 -OK - lxl 3 - adp -WS - loi WS - loc 10 - bne *1 -OK - loc 83 - lxl 1 - cal $p4125 - asp PS - lxl 2 - cal $p5125 - asp PS - lxl 3 - cal $p6125 - asp PS - lfr WS - ret WS+WS -ERRLAB - end - pro $p4125,0 - lxa 1 - adp PS - loi WS - loc 91 - bne *1 -OK - ret 0 -ERRLAB - end - pro $p5125,0 - lxa 1 - adp PS - loi WS - loc 90 - bne *1 -OK - ret 0 -ERRLAB - end - pro $p6125,0 - lxl 1 - adp -WS - loi WS - loc 10 - bne *1 -OK - loc 86 - ret WS -ERRLAB - end -TEST 126: test bra -MAIN 0 - bra *0126 - bra *1 -9126 - bra *8126 -0126 - bra *6126 - bra *1 -1126 - bra *5126 - bra *1 -2126 - bra *4126 - bra *1 -3126 - bra *7126 - bra *1 -4126 - bra *3126 - bra *1 -5126 - bra *2126 - bra *1 -6126 - bra *1126 - bra *1 -7126 - bra *9126 - bra *1 -8126 -OK -TEST 127: test ret and lfr -a127 - bss 4,0,0 -MAIN 0 -; return nothing - loc 123 - cal $retw0 - loc 123 - bne *1 -OK -; return single word - cal $retw1 - lfr WS - loc 45 - bne *1 -OK -; return single pointer - cal $retp1 - lfr PS - lae a127 - cmp - zne *1 -OK -; return procedure instance identifier (two pointers) -; this value may not be disturbed by ASP - lxl 0 - cal $retp2 - asp PS - lfr 2*PS - lpi $retp2 - cmp - zne *1 - lxl 0 - cmp - zne *1 -OK -PROC - pro $retw0,0 - ret 0 - end - pro $retw1,0 - loc 45 - ret WS - end - pro $retp1,0 - lae a127 - ret PS - end - pro $retp2,0 - lxl 1 - lpi $retp2 - ret 2*PS - end -TEST 128: test adu WS -MAIN 0 - loc 1 - loc 1 - adu WS - loc 2 - bne *1 -OK -#if WS >= 2 - loc 32767 - loc +999 - adu WS - loc 33766 - bne *1 -OK -#endif -#if WS >= 4 - loc 2147483640 - loc 1111111111 - adu WS - loc 3258594751 - bne *1 -OK -#endif -TEST 129: test sbu WS -MAIN 0 - loc 1 - loc 1 - sbu WS - loc 0 - bne *1 -OK -#if WS >= 2 - loc 32767 - loc -100 - sbu WS - loc 32867 - bne *1 -OK -#endif -#if WS >= 4 - loc 2147483647 - loc -100 - sbu WS - loc 2147483747 - bne *1 -OK -#endif -TEST 130: test mlu WS -MAIN 0 - loc 5 - loc 9 - mlu WS - loc 45 - bne *1 -OK - loc 0 - loc 10 - mlu WS - loc 0 - bne *1 -OK -#if WS > 1 - loc 1024 - loc 63 - mlu WS - loc 64512 - bne *1 -OK -#endif -#if WS > 2 - loc 65536 - loc 32768 - mlu WS - loc 2147483648 - bne *1 -OK -#endif -TEST 131: test dvu WS -MAIN 0 - loc 15 - loc 5 - dvu WS - loc 3 - bne *1 -OK -#if WS >= 2 - loc 65530 - loc 100 - dvu WS - loc 655 - bne *1 -OK -#endif -#if WS >= 4 - loc 2447684712 - loc 100 - dvu WS - loc 24476847 - bne *1 -OK -#endif -TEST 132: test rmu WS -MAIN 0 - loc 15 - loc 5 - rmu WS - loc 0 - bne *1 -OK -#if WS >= 2 - loc 65530 - loc 100 - rmu WS - loc 30 - bne *1 -OK -#endif -#if WS >= 4 - loc 2447684712 - loc 100 - rmu WS - loc 12 - bne *1 -OK -#endif -TEST 133: test adu W2S -MAIN 0 -#ifdef W2S - ldc 1 - ldc 1 - adu W2S - ldc 2 - cmu W2S - zne *1 -OK - ldc 32767 - ldc +999 - adu W2S - ldc 33766 - cmu W2S - zne *1 -OK -#if WS >= 2 - ldc 2147483640 - ldc 1111111111 - adu W2S - ldc 3258594751 - cmu W2S - zne *1 -OK -#endif -#endif -TEST 134: test sbu W2S -MAIN 0 -#ifdef W2S - ldc 1 - ldc 1 - sbu W2S - ldc 0 - cmu W2S - zne *1 -OK - ldc 32767 - ldc -100 - sbu W2S - ldc 32867 - cmu W2S - zne *1 -OK -#if WS >= 2 - ldc 2147483647 - ldc -100 - sbu W2S - ldc 2147483747 - cmu W2S - zne *1 -OK -#endif -#endif -TEST 135: test mlu W2S -MAIN 0 -#ifdef W2S - ldc 5 - ldc 9 - mlu W2S - ldc 45 - cmu W2S - zne *1 -OK - ldc 0 - ldc 10 - mlu W2S - ldc 0 - cmu W2S - zne *1 -OK -#if WS > 1 - ldc 1024 - ldc 63 - mlu W2S - ldc 64512 - cmu W2S - zne *1 -OK -#endif -#if WS >= 2 - ldc 65536 - ldc 32768 - mlu W2S - ldc 2147483648 - cmu W2S - zne *1 -OK -#endif -#endif -TEST 136: test dvu W2S -MAIN 0 -#ifdef W2S - ldc 15 - ldc 5 - dvu W2S - ldc 3 - cmu W2S - zne *1 -OK - ldc 65530 - ldc 100 - dvu W2S - ldc 655 - cmu W2S - zne *1 -OK -#if WS >= 2 - ldc 2447684712 - ldc 100 - dvu W2S - ldc 24476847 - cmu W2S - zne *1 -OK -#endif -#endif -TEST 137: test rmu W2S -MAIN 0 -#ifdef W2S - ldc 15 - ldc 5 - rmu W2S - ldc 0 - cmu W2S - zne *1 -OK -#if WS >= 2 - ldc 65530 - ldc 100 - rmu W2S - ldc 30 - cmu W2S - zne *1 -OK -#endif -#if WS >= 4 - ldc 2447684712 - ldc 100 - rmu W2S - ldc 12 - cmu W2S - zne *1 -OK -#endif -#endif -TEST 138: test cuu -MAIN 0 -#ifdef W2S - loc 100 - loc WS - loc W2S - cuu - ldc 100 - cmu W2S - zne *1 -OK - ldc 100 - loc W2S - loc WS - cuu - loc 100 - bne *1 -OK -#if WS >= 2 - loc 32768 - loc WS - loc W2S - cuu - ldc 32768 - cmu W2S - zne *1 -OK - ldc 32768 - loc W2S - loc WS - cuu - loc 32768 - bne *1 -OK -#endif -#endif -TEST 139: test gto, dch, lpb -MAIN 0 -.1139 - rom *1139 - lae .1139 - loi PS - lxa 0 - lxl 0 - cal $p139_1 -1139 - asp 3*PS -OK -PROC - pro $p139_1,0 - lal 0 - loi PS - lxl 0 - dch - cmp - zne *1 -OK - lal PS - loi PS - lal 0 - loi PS - lpb - cmp - zne *1 -OK - lal 2*PS - loi PS - lal PS - loi PS - lal 0 - loi PS - cal $p139_2 - asp 3*PS - ret 0 -ERRLAB - end 0 - - pro $p139_2,0 - lal 0 - loi PS - lxl 0 - dch - dch - cmp - zne *1 -OK - lal PS - loi PS - lal 0 - loi PS - lpb - cmp - zne *1 -OK -; now create GTO descriptor - lal 0 - loi PS ; LB - lxl 0 - dch - lpb ; SP - lal 2*PS - loi PS ; PC -.2139 - bss 3*PS,0,0 - lae .2139 - sti 3*PS - gto .2139 -ERRLAB - end 0 -TEST 140: test exg -MAIN 0 - loc 0 - loc 1 - exg WS - loc 0 - bne *1 - loc 1 - bne *1 -OK - loc 0 - loc 1 - loc 2 - loc 3 - exg 2*WS - loc 1 - bne *1 - loc 0 - bne *1 - loc 3 - bne *1 - loc 2 - bne *1 -OK - zer 4*WS - loc 1 - loc 1 - dup 2*WS - exg 4*WS - zer 4*WS - cms 4*WS - zne *1 - loc 1 - loc 1 - dup 2*WS - cms 4*WS - zne *1 -OK -TEST 141: test lim,sim,sig,trp,rtt -MAIN 0 -a140 - con 0 - lim - dup WS - loc 9 - set WS - ior WS - dup WS - sim - lim - bne *1 -OK - lpi $p1_141 - sig - loc 9 - trp - sig - asp PS -OK - sim - lpi $p2_141 - sig - loc 9 - trp - sig - asp PS - loe a140 - zeq *1 -OK -PROC - pro $p1_141,0 - bra *1 -ERRLAB - end - - pro $p2_141,0 - loc 1 - ste a140 - rtt - end -TEST 142: test ciu, cui -MAIN 0 - loc 100 - loc WS - loc WS - ciu - loc 100 - bne *1 - loc -1 - loc WS - loc WS - ciu - ; should not cause a trap - asp WS -OK -#ifdef W2S - loc 100 - loc WS - loc W2S - ciu - ldc 100 - cmu W2S - zne *1 - ldc 100 - loc W2S - loc WS - ciu - loc 100 - bne *1 - ldc 100 - loc W2S - loc W2S - ciu - ldc 100 - cmu W2S - zne *1 -OK -#endif - loc 100 - loc WS - loc WS - cui - loc 100 - bne *1 -OK -#ifdef W2S - loc 100 - loc WS - loc W2S - cui - ldc 100 - cmi W2S - zne *1 - ldc 100 - loc W2S - loc WS - cui - loc 100 - bne *1 - ldc 100 - loc W2S - loc W2S - cui - ldc 100 - cmi W2S - zne *1 -OK -#if WS >= 2 - loc 32768 - loc WS - loc W2S - cui - ldc 32768 - cmi W2S - zne *1 -OK -#endif -#endif -TEST 143: test zrf -MAIN 0 -#ifdef FS - loc 0 - loc WS - loc FS - cif - zrf FS - cmf FS - zne *1 -OK -#endif -#ifdef F2S - loc 0 - loc WS - loc F2S - cif - zrf F2S - cmf F2S - zne *1 -OK -#endif -TEST 144: test ngf -MAIN 0 -#ifdef FS - zrf FS - loc 100 - loc WS - loc FS - cif - sbf FS - loc 100 - loc WS - loc FS - cif - ngf FS - cmf FS - zne *1 -OK -#endif -#ifdef F2S - zrf F2S - loc 100 - loc WS - loc F2S - cif - sbf F2S - loc 100 - loc WS - loc F2S - cif - ngf F2S - cmf F2S - zne *1 -OK -#endif -TEST 145: test cuf, cfu WS,FS -MAIN 0 -#ifdef FS - loc 100 - loc WS - loc FS - cuf - loc FS - loc WS - cfu - loc 100 - bne *1 -OK - loc 0 - loc WS - loc FS - cuf - loc FS - loc WS - cfu - loc 0 - bne *1 -OK -#endif -TEST 146: test cuf, cfu W2S,FS -MAIN 0 -#ifdef FS -#ifdef W2S - loc 55 - loc WS - loc W2S - cuu - loc W2S - loc FS - cuf - loc 55 - loc WS - loc FS - cuf - cmf FS - zne *1 -OK - loc 24 - loc WS - loc FS - cuf - loc FS - loc W2S - cfu - loc W2S - loc WS - cuu - loc 24 - bne *1 -OK - loc 57 - loc WS - loc FS - cuf - loc FS - loc W2S - cfu - loc W2S - loc WS - cuu - loc 57 - bne *1 -OK - loc 40 - loc WS - loc W2S - cuu - loc W2S - loc FS - cuf - loc FS - loc WS - cfu - loc 40 - bne *1 -OK -#endif -#endif -TEST 147: test cuf, cfu WS,F2S -MAIN 0 -#ifdef F2S - loc 10 - loc WS - loc F2S - cuf - loc F2S - loc WS - cfu - loc 10 - bne *1 -OK - loc 0 - loc WS - loc F2S - cuf - loc F2S - loc WS - cfu - loc 0 - bne *1 -OK -#endif -TEST 148: test cuf, cfu W2S F2S -MAIN 0 -#ifdef F2S -#ifdef W2S - ldc 55 - loc W2S - loc F2S - cuf - loc 55 - loc WS - loc F2S - cuf - cmf F2S - zne *1 -OK - loc 24 - loc WS - loc F2S - cuf - loc F2S - loc W2S - cfu - loc W2S - loc WS - cuu - loc 24 - bne *1 -OK - loc 57 - loc WS - loc F2S - cuf - loc F2S - loc W2S - cfu - loc W2S - loc WS - cuu - loc 57 - bne *1 -OK - ldc 41 - loc W2S - loc F2S - cuf - loc F2S - loc WS - cfu - loc 41 - bne *1 -OK -#endif -#endif -TEST 149: test cff -MAIN 0 -#ifdef FS -#ifdef F2S - loc 0 - loc WS - loc FS - cif - loc FS - loc F2S - cff - dup F2S - zrf F2S - cmf F2S - zne *1 - loc F2S - loc FS - cff - zrf FS - cmf FS - zne *1 -OK - loc 100 - loc WS - loc FS - cif - loc FS - loc F2S - cff - loc 100 - loc WS - loc F2S - cif - cmf F2S - zne *1 -OK - loc 100 - loc WS - loc F2S - cif - loc F2S - loc FS - cff - loc 100 - loc WS - loc FS - cif - cmf FS - zne *1 -OK -#endif -#endif -TEST 150: test fif -MAIN 0 -#ifdef FS - loc 50 - loc WS - loc FS - cif - loc 2 - loc WS - loc FS - cif - fif FS - loc 100 - loc WS - loc FS - cif - cmf FS - zne *1 - zrf FS - cmf FS - zne *1 -OK -#endif -#ifdef F2S - loc 50 - loc WS - loc F2S - cif - loc 2 - loc WS - loc F2S - cif - fif F2S - loc 100 - loc WS - loc F2S - cif - cmf F2S - zne *1 - zrf F2S - cmf F2S - zne *1 -OK -#endif -TEST 151; test fef (needs more work) -MAIN 0 -#ifdef FS - loc 10 - loc WS - loc FS - cif - fef FS - loc 4 - bne *1 - asp FS -OK -#endif -#ifdef F2S - loc 10 - loc WS - loc F2S - cif - fef F2S - loc 4 - bne *1 - asp F2S -OK -#endif diff --git a/etc/.distr b/etc/.distr deleted file mode 100644 index b9c5d3c85..000000000 --- a/etc/.distr +++ /dev/null @@ -1,7 +0,0 @@ -proto.make -em_table -new_table -new_table_done -pop_push -traps -ip_spec.t diff --git a/etc/Makefile b/etc/Makefile deleted file mode 100644 index 7007755da..000000000 --- a/etc/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -d=.. -h=$d/h -c=$d/util/data - -FILES= \ -$h/em_spec.h \ -$h/em_pseu.h \ -$h/em_mnem.h \ -$c/em_flag.c \ -$c/em_pseu.c \ -$c/em_mnem.c - -$(FILES): em_table - -mkdir $d/h - -mkdir $d/util - -mkdir $d/util/data - new_table $h $c - -install: $(FILES) - -distr: $(FILES) - touch new_table_done - -opr: - make pr ^ opr -pr: - @pr Makefile em_table new_table pop_push traps - -clean: - -rm -f *.old - -cmp : # do nothing diff --git a/etc/em_table b/etc/em_table deleted file mode 100644 index 38ca26d1a..000000000 --- a/etc/em_table +++ /dev/null @@ -1,175 +0,0 @@ -magic 173 -fmnem 1 -nmnem 149 -fpseu 150 -npseu 30 -filb0 180 -nilb0 60 -fcst0 0 -zcst0 120 -ncst0 240 -fspec 240 -nspec 16 -ilb1 240 -ilb2 241 -dlb1 242 -dlb2 243 -dnam 244 -cst2 245 -cst4 246 -cst8 247 -doff 248 -pnam 249 -scon 250 -icon 251 -ucon 252 -fcon 253 -cend 255 - -bss 0 nvt -con 1 a+ -end 2 n? -exa 3 e -exc 4 nn -exp 5 p -hol 6 nvt -ina 7 e -inp 8 p -mes 9 na* -pro 10 pn? -rom 11 a+ - -aar w- -p-a-p+p -adf w- -a-a+a -adi w- -a-a+a -adp f- -p+p -ads w- -a-p+p -adu w- -a-a+a -and w- -a-a+a -asp f- -a -ass w- -a-x -beq bc -w-w -bge bc -w-w -bgt bc -w-w -ble bc -w-w -blm z- -p-p -bls w- -a-p-p -blt bc -w-w -bne bc -w-w -bra bt 0 -cai -p -p -cal pp 0 -cff -- -w-w-y+x -cfi -- -w-w-y+x -cfu -- -w-w-y+x -cif -- -w-w-y+x -cii -- -w-w-y+x -ciu -- -w-w-y+x -cmf w- -a-a+w -cmi w- -a-a+w -cmp -- -p-p+w -cms w- -a-a+w -cmu w- -a-a+w -com w- -a-a+a -csa wt -p-a -csb wt -p-a -cuf -- -w-w-y+x -cui -- -w-w-y+x -cuu -- -w-w-y+x -dch -- -p+p -dec -- -w+w -dee g- 0 -del l- 0 -dup s- -a+a+a -dus w- -a-x+x+x -dvf w- -a-a+a -dvi w- -a-a+a -dvu w- -a-a+a -exg w- -a-a+a+a -fef w- -a+a+w -fif w- -a-a+a+a -fil g- 0 -gto gt -p-? -inc -- -w+w -ine g- 0 -inl l- 0 -inn w- -w-a+w -ior w- -a-a+a -lae g- +p -lal l- +p -lar w- -p-a-p+? -ldc d- +d -lde g- +d -ldf f- -p+d -ldl l- +d -lfr s- +a -lil l- +w -lim -- +w -lin n- 0 -lni -- 0 -loc c- +w -loe g- +w -lof f- -p+w -loi o- -p+a -lol l- +w -lor r- +p -los w- -a-p+x -lpb -- -p+p -lpi p- +p -lxa n- +p -lxl n- +p -mlf w- -a-a+a -mli w- -a-a+a -mlu w- -a-a+a -mon -- -?+? -ngf w- -a+a -ngi w- -a+a -nop -- 0 -rck w- -p-a+a -ret zt -a-? -rmi w- -a-a+a -rmu w- -a-a+a -rol w- -w-a+a -ror w- -w-a+a -rtt -t -? -sar w- -p-a-p-? -sbf w- -a-a+a -sbi w- -a-a+a -sbs w- -p-p+a -sbu w- -a-a+a -sde g- -d -sdf f- -p-d -sdl l- -d -set w- -w+a -sig -- -p-p+p+p -sil l- -w -sim -- -w -sli w- -w-a+a -slu w- -w-a+a -sri w- -w-a+a -sru w- -w-a+a -ste g- -w -stf f- -p-w -sti o- -p-a -stl l- -w -str r- -p -sts w- -a-p-x -teq -- -w+w -tge -- -w+w -tgt -- -w+w -tle -- -w+w -tlt -- -w+w -tne -- -w+w -trp -p -w+? -xor w- -a-a+a -zeq bc -w -zer w- +a -zge bc -w -zgt bc -w -zle bc -w -zlt bc -w -zne bc -w -zre g- 0 -zrf w- +a -zrl l- 0 - diff --git a/etc/ip_spec.t b/etc/ip_spec.t deleted file mode 100644 index 54ca88f13..000000000 --- a/etc/ip_spec.t +++ /dev/null @@ -1,354 +0,0 @@ -aar mwPo 1 34 -adf sP 1 35 -adi mwPo 2 36 -adp 2 38 -adp mPo 2 39 -adp sP 1 41 -adp sN 1 42 -ads mwPo 1 43 -and mwPo 1 44 -asp mwPo 5 45 -asp swP 1 50 -beq 2 51 -beq sP 1 52 -bge sP 1 53 -bgt sP 1 54 -ble sP 1 55 -blm sP 1 56 -blt sP 1 57 -bne sP 1 58 -bra 2 59 -bra sN 2 60 -bra sP 2 62 -cal mPo 28 64 -cal sP 1 92 -cff - 93 -cif - 94 -cii - 95 -cmf sP 1 96 -cmi mwPo 2 97 -cmp - 99 -cms sP 1 100 -csa mwPo 1 101 -csb mwPo 1 102 -dec - 103 -dee sw 1 104 -del swN 1 105 -dup mwPo 1 106 -dvf sP 1 107 -dvi mwPo 1 108 -fil u 109 -inc - 110 -ine w2 111 -ine sw 1 112 -inl mwN 3 113 -inl swN 1 116 -inn sP 1 117 -ior mwPo 1 118 -ior sP 1 119 -lae u 120 -lae sw 7 121 -lal P2 128 -lal N2 129 -lal mP 1 130 -lal mN 1 131 -lal swP 1 132 -lal swN 2 133 -lar mwPo 1 135 -ldc mP 1 136 -lde w2 137 -lde sw 1 138 -ldl mP 1 139 -ldl swN 1 140 -lfr mwPo 2 141 -lfr sP 1 143 -lil swN 1 144 -lil swP 1 145 -lil mwP 2 146 -lin 2 148 -lin sP 1 149 -lni - 150 -loc 2 151 -loc mP 34 0 -loc mN 1 152 -loc sP 1 153 -loc sN 1 154 -loe w2 155 -loe sw 5 156 -lof 2 161 -lof mwPo 4 162 -lof sP 1 166 -loi 2 167 -loi mPo 1 168 -loi mwPo 4 169 -loi sP 1 173 -lol wP2 174 -lol wN2 175 -lol mwP 4 176 -lol mwN 8 180 -lol swP 1 188 -lol swN 1 189 -lxa mPo 1 190 -lxl mPo 2 191 -mlf sP 1 193 -mli mwPo 2 194 -rck mwPo 1 196 -ret mwP 2 197 -ret sP 1 199 -rmi mwPo 1 200 -sar mwPo 1 201 -sbf sP 1 202 -sbi mwPo 2 203 -sdl swN 1 205 -set sP 1 206 -sil swN 1 207 -sil swP 1 208 -sli mwPo 1 209 -ste w2 210 -ste sw 3 211 -stf 2 214 -stf mwPo 2 215 -stf sP 1 217 -sti mPo 1 218 -sti mwPo 4 219 -sti sP 1 223 -stl wP2 224 -stl wN2 225 -stl mwP 2 226 -stl mwN 5 228 -stl swN 1 233 -teq - 234 -tgt - 235 -tlt - 236 -tne - 237 -zeq 2 238 -zeq sP 2 239 -zer sP 1 241 -zge sP 1 242 -zgt sP 1 243 -zle sP 1 244 -zlt sP 1 245 -zne sP 1 246 -zne sN 1 247 -zre w2 248 -zre sw 1 249 -zrl mwN 2 250 -zrl swN 1 252 -zrl wN2 253 -aar e2 0 -aar e- 1 -adf e2 2 -adf e- 3 -adi e2 4 -adi e- 5 -ads e2 6 -ads e- 7 -adu e2 8 -adu e- 9 -and e2 10 -and e- 11 -asp ew2 12 -ass e2 13 -ass e- 14 -bge e2 15 -bgt e2 16 -ble e2 17 -blm e2 18 -bls e2 19 -bls e- 20 -blt e2 21 -bne e2 22 -cai e- 23 -cal e2 24 -cfi e- 25 -cfu e- 26 -ciu e- 27 -cmf e2 28 -cmf e- 29 -cmi e2 30 -cmi e- 31 -cms e2 32 -cms e- 33 -cmu e2 34 -cmu e- 35 -com e2 36 -com e- 37 -csa e2 38 -csa e- 39 -csb e2 40 -csb e- 41 -cuf e- 42 -cui e- 43 -cuu e- 44 -dee ew2 45 -del ewP2 46 -del ewN2 47 -dup e2 48 -dus e2 49 -dus e- 50 -dvf e2 51 -dvf e- 52 -dvi e2 53 -dvi e- 54 -dvu e2 55 -dvu e- 56 -fef e2 57 -fef e- 58 -fif e2 59 -fif e- 60 -inl ewP2 61 -inl ewN2 62 -inn e2 63 -inn e- 64 -ior e2 65 -ior e- 66 -lar e2 67 -lar e- 68 -ldc e2 69 -ldf e2 70 -ldl ewP2 71 -ldl ewN2 72 -lfr e2 73 -lil ewP2 74 -lil ewN2 75 -lim e- 76 -los e2 77 -los e- 78 -lor esP 1 79 -lpi e2 80 -lxa e2 81 -lxl e2 82 -mlf e2 83 -mlf e- 84 -mli e2 85 -mli e- 86 -mlu e2 87 -mlu e- 88 -mon e- 89 -ngf e2 90 -ngf e- 91 -ngi e2 92 -ngi e- 93 -nop e- 94 -rck e2 95 -rck e- 96 -ret e2 97 -rmi e2 98 -rmi e- 99 -rmu e2 100 -rmu e- 101 -rol e2 102 -rol e- 103 -ror e2 104 -ror e- 105 -rtt e- 106 -sar e2 107 -sar e- 108 -sbf e2 109 -sbf e- 110 -sbi e2 111 -sbi e- 112 -sbs e2 113 -sbs e- 114 -sbu e2 115 -sbu e- 116 -sde eu 117 -sdf e2 118 -sdl ewP2 119 -sdl ewN2 120 -set e2 121 -set e- 122 -sig e- 123 -sil ewP2 124 -sil ewN2 125 -sim e- 126 -sli e2 127 -sli e- 128 -slu e2 129 -slu e- 130 -sri e2 131 -sri e- 132 -sru e2 133 -sru e- 134 -sti e2 135 -sts e2 136 -sts e- 137 -str esP 1 138 -tge e- 139 -tle e- 140 -trp e- 141 -xor e2 142 -xor e- 143 -zer e2 144 -zer e- 145 -zge e2 146 -zgt e2 147 -zle e2 148 -zlt e2 149 -zne e2 150 -zrf e2 151 -zrf e- 152 -zrl ewP2 153 -dch e- 154 -exg esP 1 155 -exg e2 156 -exg e- 157 -lpb e- 158 -gto eu 159 -ldc 4 0 -lae 4 1 -lal P4 2 -lal N4 3 -lde w4 4 -ldf 4 5 -ldl wP4 6 -ldl wN4 7 -lil wP4 8 -lil wN4 9 -loc 4 10 -loe w4 11 -lof 4 12 -lol wP4 13 -lol wN4 14 -lpi 4 15 -adp 4 16 -asp w4 17 -beq 4 18 -bge 4 19 -bgt 4 20 -ble 4 21 -blm 4 22 -blt 4 23 -bne 4 24 -bra 4 25 -cal 4 26 -dee w4 27 -del wP4 28 -del wN4 29 -fil 4 30 -gto 4 31 -ine w4 32 -inl wP4 33 -inl wN4 34 -lin 4 35 -sde 4 36 -sdf 4 37 -sdl wP4 38 -sdl wN4 39 -sil wP4 40 -sil wN4 41 -ste w4 42 -stf 4 43 -stl wP4 44 -stl wN4 45 -zeq 4 46 -zge 4 47 -zgt 4 48 -zle 4 49 -zlt 4 50 -zne 4 51 -zre w4 52 -zrl wP4 53 -zrl wN4 54 -loi 4 55 -sti 4 56 diff --git a/etc/new_table b/etc/new_table deleted file mode 100755 index accc7d14f..000000000 --- a/etc/new_table +++ /dev/null @@ -1,72 +0,0 @@ -h=${1-.} -d=${2-.} - -set `grep fpseu em_table` -p=$2 -set `grep fmnem em_table` -m=$2 - -ed - em_table <<'A' > X -1,/^$/g/ /s// /gp -A - -ed - em_table <<'A' | awk '{$2=$2+'$p'; print}' > Y -1,/^$/d -1,/^$/g/ /s// /gp -A - -ed - em_table <<'A' | awk '{print $0,'$m'+i++}' > Z -1,/^$/d -1,/^$/d -1,/^$/g/ /s// /gp -A - -i=`wc -l >X -i=`wc -l >X - -ed - X <<'A' > $h/em_spec.h -g/^/s//#define sp_/p -A - -ed - Y <<'A' > $h/em_pseu.h -g/ \(.*\) .*/s// \1/ -g/\(.*\) \(.*\)/s//#define ps_\1 \2/p -A - -ed - Z <<'A' > $h/em_mnem.h -g/ .* /s// / -g/\(.*\) \(.*\)/s//#define op_\1 \2/p -A - -( -echo 'char em_pseu[][4] = {' -ed - Y <<'A' -g/\(...\).*/s// "\1",/p -A -echo '};' -) > $d/em_pseu.c - -( -echo 'char em_mnem[][4] = {' -ed - Z <<'A' -g/\(...\).*/s// "\1",/p -A -echo '};' -) > $d/em_mnem.c - -( -echo '#include -char em_flag[] = {' -ed - Z <<'A' | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ -g/^... /s/// -g/ .*/s/// -g/\(.\)\(.\)/s//PAR_\1 | FLO_\2/ -g/-/s//NO/g -g/.*/s// &,/p -A -echo '};' -) > $d/em_flag.c - -rm X Y Z diff --git a/etc/pc_errors b/etc/pc_errors deleted file mode 100644 index 688e4b372..000000000 --- a/etc/pc_errors +++ /dev/null @@ -1,289 +0,0 @@ -non-standard feature used -identifier '%s' declared twice -end of file encountered -bad line directive -unsigned real: digit of fraction expected -unsigned real: digit of exponent expected -unsigned real: too many digits (>72) -unsigned integer: too many digits (>72) -unsigned integer: overflow (>32767) -string constant: must not exceed one line -string constant: at least one character expected -string constant: double quotes not allowed (see c option) -string constant: too long (>72 chars) -bad character -identifier '%s' not declared -location counter overflow: arrays too big -location counter overflow: arrays too big -arraysize too big -variable '%s' never used -variable '%s' never assigned -the files contained in '%s' are not closed automatically -constant expected -constant: only integers and reals may be signed -constant: out of bounds -simple type expected -enumerated type: element identifier expected -enumerated type: ',' or ')' expected -enumerated type: ',' expected -enumerated type: ')' expected -subrange type: type must be scalar, but not real -subrange type: '..' expected -subrange type: type of lower and upper bound incompatible -subrange type: lower bound exceeds upper bound -array type: '[' expected -conformant array: low bound identifier expected -conformant array: '..' expected -conformant array: high bound identifier expected -conformant array: ':' expected -conformant array: index type identifier expected -array type: index type not bounded -array type: index separator or ']' expected -array type: index separator expected -array type: ']' expected -array type: 'of' expected -record variant part: tag type identifier expected -record variant part: tag type identifier expected -record variant part: type must be bounded -record variant part: 'of' expected -record variant: type of case label and tag incompatible -record variant: multiple defined case label -record variant: ',' or ':' expected -record variant: ',' expected -record variant: ':' expected -record variant: '(' expected -record variant: ')' expected -record variant part: ';' or end of variant list expected -record variant part: ';' expected -record variant part: end of variant list expected -record variant part: there must be a variant for each tag value -field list: record section expected -record section: field identifier expected -record section: ',' or ':' expected -record section: ',' expected -record section: ':' expected -field list: ';' or end of record section list expected -field list: ';' expected -field list: end of record section list expected -type expected -type: simple and pointer type may not be packed -pointer type: type identifier expected -pointer type: type identifier expected -record type: 'end' expected -set type: 'of' expected -set type: too many elements in set -set type: bad subrange of integer -set of integer: the i option dictates the number of bits (default 16) -set type: base type not bounded -file type: 'of' expected -file type: files within files not allowed -var parameter: type identifier or conformant array expected -var parameter: type identifier expected -label declaration: unsigned integer expected -label declaration: label '%i' multiple declared -label declaration: ',' or ';' expected -label declaration: ',' expected -label declaration: ';' expected -const declaration: constant identifier expected -const declaration: '=' expected -const declaration: ';' expected -const declaration: constant identifier or 'type', 'var', 'procedure', 'function' or 'begin' expected -type declaration: type identifier expected -type declaration: '=' expected -type declaration: ';' expected -type declaration: type identifier or 'var', 'procedure', 'function' or 'begin' expected -var declaration: var identifier expected -var declaration: ',' or ':' expected -var declaration: ',' expected -var declaration: ':' expected -var declaration: ';' expected -var declaration: var identifier or 'procedure', 'function' or 'begin' expected -parameter list: 'var','procedure','function' or identifier expected -parameter list: parameter identifier expected -parameter list: ',' or ':' expected -parameter list: ',' expected -parameter list: ':' expected -parameter list: type identifier expected -parameter list: ';' or ')' expected -parameter list: ';' expected -proc/func declaration: proc/func identifier expected -proc/func declaration: previous declaration of '%s' was not forward -proc/func declaration: parameter list expected -parameterlist: ')' expected -func declaration: ':' expected -func declaration: result type identifier expected -func declaration: result type must be scalar, subrange or pointer -proc/func declaration: ';' expected -proc/func declaration: block or directive expected -proc/func declaration: '%s' unknown directive -proc/func declaration: '%s' again forward declared -proc/func declaration: ';' expected -indexed variable: '[' only allowed following array variables -indexed variable: index type not compatible with declaration -indexed variable: ',' or ']' expected -indexed variable: ',' expected -assignment: standard function not allowed as destination -assignment: cannot store the function result -assignment: formal parameter function not allowed as destination -assignment: function identifier may not be de-referenced -variable: '[', '.', '^' or end of variable expected -indexed variable: ']' expected -field designator: field identifier expected -field designator: '.' only allowed following record variables -field designator: no field '%s' in this record -referenced variable: '^' not allowed following zero-terminated strings -referenced variable: '^' only allowed following pointer or file variables -variable: var or field identifier expected -call: too many actual parameters supplied -call: proc/func identifier expected -call: standard proc/func may not be used as parameter -call: parameter lists of actual and formal proc/func incompatible -call: type of actual and formal value parameter not compatible -call: array parameter not conformable -call: type of actual and formal variable parameter not similar -call: packed elements not allowed as variable parameter -call: ',' or ')' expected -call: too few actual parameters supplied -read(ln): type must be integer, char or real -write(ln): type must be integer, char, real, string or boolean -write(ln): ':', ',' or ')' expected -write(ln): field width must be integer -write(ln): ':', ',' or ')' expected -write(ln): precision must be integer -write(ln): precision may only be specified for reals -read/write: too few actual parameters supplied -read/write: standard input/output not mentioned in program heading -read/write: ',' or ')' expected -read/write: type of parameter not the same as that of the file elements -read/write: parameter list expected -readln/writeln: standard input/output not mentioned in program heading -readln/writeln: only allowed on text files -new/dispose: C-type strings not allowed here -new/dispose: ',' or ')' expected -new/dispose: too many actual parameters supplied -new/dispose: type of tagfield value is incompatible with declaration -call: '(' or end of call expected -standard proc/func: parameter list expected -standard input/output not mentioned in program heading -file variable expected -pointer variable expected -pack: ',' expected -pack: ',' expected -unpack: ',' expected -unpack: ',' expected -standard proc/func: parameter type incompatible with specification -eoln/page: text file variable expected -pack/unpack: array types are incompatible -pack/unpack: only for arrays -abs: integer or real expected -sqr: integer or real expected -ord: type must be scalar or subrange, but not real -pred/succ: type must be scalar or subrange, but not real -trunc/round: real argument required -call: ')' expected -expression: left and right operand are incompatible -set: incompatible elements -set: base type must be bounded or of type integer -set: base type upper bound exceeds maximum set element number -set: element out of range -set: ']' or element list expected -set: '..', ',' or ']' expected -set: ',' or ']' expected -set: ',' expected -factor expected -factor: ')' expected -factor: type of factor must be boolean -set: ']' expected -term: multiplying operator or end of term expected -term: '*' only defined for integers, reals and sets -term: '/' only defined for integers and reals -term: 'div' only defined for integers -term: 'mod' only defined for integers -term: 'and' only defined for booleans -simple expression: only integers and reals may be signed -simple expression: adding operator or end of simple expression expected -simple expression: '+' only defined for integers, reals and sets -simple expression: '-' only defined for integers, reals and sets -simple expression: 'or' only defined for booleans -expression: relational operator or end of expression expected -expression: set expected -expression: left operand of 'in' not compatible with base type of right operand -expression: only '=' and '<>' allowed on pointers -expression: '<' and '>' not allowed on sets -expression: comparison of arrays only allowed for strings -expression: comparison of records not allowed -expression: comparison of files not allowed -assignment: ':=' expected -assignment: left and right hand side incompatible -goto statement: unsigned integer expected -goto statement: label '%i' not declared -if statement: type of expression must be boolean -if statement: 'then' expected -if statement: 'else' or end of if statement expected -case statement: type must be scalar or subrange, but not real -case statement: 'of' expected -case statement: incompatible case label -case statement: multiple defined case label -case statement: ',' or ':' expected -case statement: ',' expected -case statement: ':' expected -case statement: ';' or 'end' expected -case statement: ';' expected -case statement: 'end' expected -repeat statement: ';' or 'until' expected -repeat statement: ';' expected -repeat statement: 'until' expected -repeat statement: type of expression must be boolean -while statement: type of expression must be boolean -while statement: 'do' expected -for statement: type of bound and control variable incompatible -for statement: control variable expected -for statement: control variable must be local -for statement: type must be scalar or subrange, but not real -for statement: ':=' expected -for statement: 'to' or 'downto' expected -for statement: upper bound not assignment compatible -for statement: 'do' expected -with statement: record variable expected -with statement: ',' or 'do' expected -with statement: ',' expected -with statement: 'do' expected -assertion: type of expression must be boolean -statement expected -label '%i' not declared -label '%i' multiple defined -statement: ':' expected -unlabeled statement expected -compound statement: ';' or 'end' expected -compound statement: ';' expected -compound statement: 'end' expected -case statement: 'end' expected -body: ';' or 'end' expected -body: ';' expected -body: label '%i' declared, but never defined -program parameter '%s' not declared -function '%s' never assigned -block: declaration or body expected -block: 'const', 'type', 'var', 'procedure', 'function' or 'begin' expected -block: 'type', 'var', 'procedure', 'function' or 'begin' expected -block: 'var', 'procedure', 'function' or 'begin' expected -block: 'procedure', 'function' or 'begin' expected -block: unsatisfied forward proc/func declaration(s) -block: 'begin' expected -block: 'end' expected -program heading: 'program' expected -program heading: program identifier expected -program heading: file identifier list expected -program heading: file identifier expected -program heading: ',' or ')' expected -program heading: ',' expected -program heading: maximum number of file arguments exceeded (12) -program heading: ')' expected -program heading: ';' expected -program: '.' expected -'program' expected -module: 'const', 'type', 'var', 'procedure' or 'function' expected -module: 'type', 'var', 'procedure' or 'function' expected -module: 'var', 'procedure' or 'function' expected -module: 'procedure' or 'function' expected -garbage at end of program diff --git a/etc/pc_rt_errors b/etc/pc_rt_errors deleted file mode 100644 index 530604965..000000000 --- a/etc/pc_rt_errors +++ /dev/null @@ -1,107 +0,0 @@ -array bound error -range bound error -set bound error -integer overflow -real overflow -real underflow -divide by 0 -divide by 0.0 -undefined integer -real undefined -conversion error -error 11 -error 12 -error 13 -error 14 -error 15 -stack overflow -heap error -illegal instruction -odd or zero byte count -case error -memory fault -bad pointer -bad program counter -bad external address -bad monitor call -bad line number -error 27 -error 28 -error 29 -error 30 -error 31 -error 32 -error 33 -error 34 -error 35 -error 36 -error 37 -error 38 -error 39 -error 40 -error 41 -error 42 -error 43 -error 44 -error 45 -error 46 -error 47 -error 48 -error 49 -error 50 -error 51 -error 52 -error 53 -error 54 -error 55 -error 56 -error 57 -error 58 -error 59 -error 60 -error 61 -error 62 -error 63 -more args expected -error in exp -error in ln -error in sqrt -assertion failed -array bound error in pack -array bound error in unpack -only positive j in 'i mod j' -file not yet open -dispose error -error 74 -error 75 -error 76 -error 77 -error 78 -error 79 -error 80 -error 81 -error 82 -error 83 -error 84 -error 85 -error 86 -error 87 -error 88 -error 89 -error 90 -error 91 -error 92 -error 93 -error 94 -error 95 -not writable -not readable -end of file -truncated -reset error -rewrite error -close error -read error -write error -digit expected -non-ASCII char read diff --git a/etc/pop_push b/etc/pop_push deleted file mode 100644 index 478f96789..000000000 --- a/etc/pop_push +++ /dev/null @@ -1,15 +0,0 @@ -description of third column of em_table: - - -: pop item indicated by next character - +: push item indicated by next character - 0: no effect on the stack - -characters describing items: - - w: target machine word (1, 2 or 4) - d: double target machine word (2, 4 or 8) - p: target machine address - a: item with size specified in argument - x: item with size specified by top item of stack - y: item with size specified by second item on stack - ?: one or more items of unknown size diff --git a/etc/proto.make b/etc/proto.make deleted file mode 100644 index d4a34ad33..000000000 --- a/etc/proto.make +++ /dev/null @@ -1,31 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -h=$(TARGET_HOME)/h -c=$(SRC_HOME)/util/data - -FILES= \ -$h/em_spec.h \ -$h/em_pseu.h \ -$h/em_mnem.h \ -$c/em_flag.c \ -$c/em_pseu.c \ -$c/em_mnem.c - -$(FILES): $(SRC_DIR)/etc/em_table - cd $(SRC_DIR)/etc ; new_table $h $c - -install: $(FILES) - -distr: $(FILES) - -opr: - make pr ^ opr -pr: - @cd $(SRC_DIR) ; pr proto.make em_table new_table pop_push traps - -clean: - -rm -f *.old - -cmp : # do nothing diff --git a/etc/traps b/etc/traps deleted file mode 100644 index 858cf618a..000000000 --- a/etc/traps +++ /dev/null @@ -1,28 +0,0 @@ -~ Array bound error -~ Range bound error -~ Set bound error -~ Integer overflow -~ Floating overflow -~ Floating underflow -~ Divide by 0 -~ Divide by 0.0 -~ Integer undefined -~ Floating undefined -~ Conversion error - - - - - -* Stack overflow -* Heap overflow -* Illegal instruction -* Illegal odd or zero argument -* Case error -* Addressing non existent memory -* Bad pointer used -* Program counter out of range -* Bad argument of LAE -* Bad monitor call -* Argument of LIN too high -* Bad GTO descriptor diff --git a/fast/.distr b/fast/.distr deleted file mode 100644 index 08ea893ed..000000000 --- a/fast/.distr +++ /dev/null @@ -1,6 +0,0 @@ -Action -driver -f_c.ansi -f_c -f_m2 -f_pc diff --git a/fast/Action b/fast/Action deleted file mode 100644 index 451eb0246..000000000 --- a/fast/Action +++ /dev/null @@ -1,15 +0,0 @@ -name "Fast C compiler" -dir f_c -end -name "Fast ANSI C compiler" -dir f_c.ansi -end -name "Fast Pascal compiler" -dir f_pc -end -name "Fast Modula-2 compiler" -dir f_m2 -end -name "Driver for fast compilers" -dir driver -end diff --git a/fast/driver/.distr b/fast/driver/.distr deleted file mode 100644 index 467369b36..000000000 --- a/fast/driver/.distr +++ /dev/null @@ -1,5 +0,0 @@ -afcc.1 -afm2.1 -afpc.1 -driver.c -proto.make diff --git a/fast/driver/afcc.1 b/fast/driver/afcc.1 deleted file mode 100644 index a95b5b578..000000000 --- a/fast/driver/afcc.1 +++ /dev/null @@ -1,158 +0,0 @@ -.TH AFCC 1 -.SH NAME -afcc \- fast ACK compatible C compiler -.SH SYNOPSIS -.B afcc -[ -.B \-c -] -[ -.B \-v -] -[ -.B \-ansi -] -[ -.B \-R -] -[ -.B \-vn -] -[ \fB\-D\fIname\fR ] -[ \fB\-D\fIname\fB=\fIdef\fR ] -[ -.BI \-I pathname -] -[ -.B \-w -] -[ -.B \-g -] -[ -.B \-o -.I outfile -] -[ -.BI \-U name -] -[ -.BI -M compiler -] -.I sourcefile ... -.SH DESCRIPTION -.LP -.I Afcc -is a fast -.B C -compiler. It translates -.B C -programs -into ack(1)-compatible relocatable object modules, and does so in one pass. -Then, if the \fB\-c\fP flag is not given, -.I afcc -offers the object modules to a link-editor, -to create an executable binary. -.LP -.I Afcc -accepts several types of filename arguments. Files with -names ending in -.B .c -are taken to be -.B C -source programs. -They are compiled, and the resulting object module is placed in the current -directory. -The object module is named after its source file, the suffix -.B .o -replacing -.BR .c -in the name of the object. -.LP -Other arguments refer to loader options, -object modules, or object libraries. -Unless the -.B \-c -flag is given, these modules and libraries, together with the results of any -specified compilations, are passed (in the order given) to the -link-editor to produce -an output file named -.IR a.out . -You can specify a name for the executable by using the -.B \-o -option. -.SH OPTIONS -.LP -.IP \fB\-ansi\fP -Use the ANSI C compiler instead of the K&R one. This flag must be first, -and must also be used when linking through -.I afcc -or -.I ack. -.IP \fB\-R\fP -test for more compatibility with Kernighan & Ritchie C [1]. -.IP \fB\-c\fP -.br -Suppress the loading phase of the compilation, and force an object module to -be produced, even if only one program is compiled. -A single object module can be named explicitly using the -.B \-o -option. -.IP \fB\-D\fIname\fR\fB=\fIdef\fR -Define a symbol -.I name -to the -preprocessor, as if by "#define". -.IP \fB\-D\fIname\fR -.br -same as \fB\-D\fIname\fB=1\fR. -.IP \fB\-I\fIpathname\fR -.br -Add -.I pathname -to the list of directories in which to search for -.B #include -files with filenames not beginning with slash. -The compiler first searches for -.B #include -files in the directory containing -.I sourcefile, -then in directories in -.B \-I -options, then in the ACK include directory, -and finally, in -.I /usr/include. -.IP "\fB\-o \fIoutput\fR" -Name the final output file -.I output. -.IP \fB\-U\fIname\fR -.br -Remove any initial definition of -.I name. -.IP \fB\-g\fP -.br -Produce symbolic debugging information for grind(1). -.IP \fB\-v\fP -.br -Verbose. Print the commands as they are executed. -.IP \fB\-vn\fP -.br -Verbose, no execute. Only print the commands, do not execute them. -.IP \fB\-w\fP -suppress warning messages. -.IP \fB\-M\fIcompiler\fR -.br -use \fIcompiler\fR as C compiler instead of the default. -.LP -Object modules produced by ack(1) and -.I afcc -can be freely mixed. -.SH "SEE ALSO" -.IP [1] -B.W. Kernighan, D. Ritchie, "\fIThe C programming Language\fP", Prentice-Hall Inc., 1978 -.IP [2] -ack(1) manual page. -.IP [3] -grind(1) manual page. -.SH DIAGNOSTICS -Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/afm2.1 b/fast/driver/afm2.1 deleted file mode 100644 index 431f0f07c..000000000 --- a/fast/driver/afm2.1 +++ /dev/null @@ -1,209 +0,0 @@ -.TH FM2 1 -.SH NAME -afm2 \- fast ACK compatible Modula-2 compiler -.SH SYNOPSIS -.B afm2 -[ -.B \-c -] -[ -.B \-v -] -[ -.B \-vn -] -[ \fB\-D\fIname\fR ] -[ \fB\-D\fIname\fB=\fIdef\fR ] -[ -.BI \-I pathname -] -[ -.BI \-w classes -] -[ -.BI \-W classes -] -[ -.B \-L -] -[ -.B \-o -.I outfile -] -[ -.B \-R -] -[ -.B \-A -] -[ -.B \-3 -] -[ -.B \-_ -] -[ -.B \-g -] -[ -.BI \-U name -] -[ -.BI -M compiler -] -.I sourcefile ... -.SH DESCRIPTION -.LP -.I afm2 -is a fast -.B Modula-2 -compiler. It translates -.B Modula-2 -programs -into ack(1)-compatible relocatable object modules, and does so in one pass. -Then, if the \fB\-c\fP flag is not given, -.I afm2 -offers the object modules to a link-editor, -to create an executable binary. -.LP -.I Afm2 -accepts several types of filename arguments. Files with -names ending in -.B .mod -are taken to be -.B Modula-2 -source programs. -They are compiled, and the resulting object module is placed in the current -directory. -The object module is named after its source file, the suffix -.B .o -replacing -.BR .mod -in the name of the object. -A file with suffix -.B .mod -is passed through the C preprocessor if it begins with a '#'. -.PP -Definition modules are not separately compiled. The compiler reads them when -it needs them. -Definition modules are expected to reside in files with names ending -in -.BR .def . -The name of the file in which a definition module is stored must be the same as -the module-name, apart from the extension. -Also, in most Unix systems filenames are only 14 characters long. -So, given an IMPORT declaration for a module called "LongModulName", -the compiler will try to open a file called "LongModulN.def". -The requirement does not hold for implementation or program modules, -but is certainly recommended. -.LP -Other arguments refer to loader options, -object modules, or object libraries. -Unless the -.B \-c -flag is given, these modules and libraries, together with the results of any -specified compilations, are passed (in the order given) to the -link-editor to produce -an output file named -.IR a.out . -You can specify a name for the executable by using the -.B \-o -option. -.SH OPTIONS -.LP -.IP \fB\-c\fP -.br -Suppress the loading phase of the compilation, and force an object module to -be produced, even if only one program is compiled. -A single object module can be named explicitly using the -.B \-o -option. -.IP \fB\-D\fIname\fR\fB=\fIdef\fR -Define a symbol -.I name -to the -preprocessor, as if by "#define". -.IP \fB\-D\fIname\fR -.br -same as \fB\-D\fIname\fB=1\fR. -.IP \fB\-I\fIpathname\fR -.br -Add -.I pathname -to the list of directories in which to search for -.B #include -files with filenames not beginning with slash. -The preprocessor first searches for -.B #include -files in the directory containing -.I sourcefile, -then in directories in -.B \-I -options, then in the ACK include directory, -and finally, in -.I /usr/include. -This flag is also passed to the compiler. When the compiler needs a definition -module, it is first searched for in the current directory, then in the -directories given to it by the \fB\-I\fP flag, and then in a default directory, -.I $TARGET_HOME/lib/m2. -.I afm2 -This default directory contains all definition modules of -the runtime system. -.IP "\fB\-o \fIoutput\fR" -Name the final output file -.I output. -.IP \fB\-U\fIname\fR -.br -Remove any initial definition of -.I name. -.IP \fB\-v\fP -.br -Verbose. Print the commands as they are executed. -.IP \fB\-vn\fP -.br -Verbose, no execute. Only print the commands, do not execute them. -.IP \fB\-L\fR -do not generate code to keep track of -the current location in the source code. -.IP \fB\-g\fP -.br -Produce symbolic debugging information for grind(1). -.IP \fB\-w\fR\fIclasses\fR -suppress warning messages whose class is a member of \fIclasses\fR. -Currently, there are three classes: \fBO\fR, indicating old-flashioned use, -\fBW\fR, indicating "ordinary" warnings, and \fBR\fR, indicating -restricted Modula-2. -If no \fIclasses\fR are given, all warnings are suppressed. -By default, warnings in class \fBO\fR and \fBW\fR are given. -.IP \fB\-W\fR\fIclasses\fR -allow for warning messages whose class is a member of \fIclasses\fR. -.IP \fB\-R\fP -.br -disable all range-checks. -.IP \fB\-A\fP -.br -enable extra array bound checks. Unfortunately, the back-end used for this -compiler is a bit sloppy, so extra array bound checks are needed if you want -detection of array bound errors. -.IP \fB\-3\fP -.br -Only accept Modula-2 programs that strictly conform to the 3rd Edition of -[1]. -.IP \fB\-_\fP -.br -allow for underscores within identifiers. Identifiers may not start or end -with an underscore, even if this flag is given. -.IP \fB\-M\fIcompiler\fR -.br -use \fIcompiler\fR as Modula-2 compiler instead of the default. -.SH "SEE ALSO" -.IP [1] -N. Wirth, \fIProgramming in Modula-2\fP, 3rd edition, Springer Verlag. -.IP [2] -C.J.H. Jacobs, \fIThe ACK Modula-2 Compiler\fP. -.IP [3] -ack(1) unix manual page. -.IP [4] -grind(1) unix manual page. -.SH DIAGNOSTICS -Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/afpc.1 b/fast/driver/afpc.1 deleted file mode 100644 index f96d2e3e3..000000000 --- a/fast/driver/afpc.1 +++ /dev/null @@ -1,227 +0,0 @@ -.TH AFPC 1 -.SH NAME -afpc \- fast ACK compatible Pascal compiler -.SH SYNOPSIS -.B afpc -[ -.B \-c -] -[ -.B \-v -] -[ -.B \-vn -] -[ \fB\-D\fIname\fR ] -[ \fB\-D\fIname\fB=\fIdef\fR ] -[ -.BI \-I pathname -] -[ -.B \-w -] -[ -.B \-g -] -[ -.B \-L -] -[ -.B \-o -.I outfile -] -[ -.B \-R -] -[ -.B \-A -] -[ -.B \-a -] -[ -.B \-d -] -[ -.BI \-i num -] -[ -.B \-t -] -[ -.B \-C -] -[ -.B \-U+ -] -[ -.B \-u+ -] -[ -.B \-s+ -] -[ -.B \-c+ -] -[ -.BI \-U name -] -[ -.BI -M compiler -] -.I sourcefile ... -.SH DESCRIPTION -.LP -.I afpc -is a fast -.B Pascal -compiler. It translates -.B Pascal -programs -into ack(1)-compatible relocatable object modules, and does so in one pass. -Then, if the \fB\-c\fP flag is not given, -.I afpc -offers the object modules to a link-editor, -to create an executable binary. -.LP -.I Fpc -accepts several types of filename arguments. Files with -names ending in -.B .p -are taken to be -.B Pascal -source programs. -They are compiled, and the resulting object module is placed in the current -directory. -The object module is named after its source file, the suffix -.B .o -replacing -.BR .p -in the name of the object. -A file with suffix -.B .p -is passed through the C preprocessor if it begins with a '#'. -.LP -Other arguments refer to loader options, -object modules, or object libraries. -Unless the -.B \-c -flag is given, these modules and libraries, together with the results of any -specified compilations, are passed (in the order given) to the -link-editor to produce -an output file named -.IR a.out . -You can specify a name for the executable by using the -.B \-o -option. -.LP -If a single -.B Pascal -program is compiled and loaded all at once, the object module -file is deleted. -.SH OPTIONS -.LP -.IP \fB\-c\fP -.br -Suppress the loading phase of the compilation, and force an object module to -be produced, even if only one program is compiled. -A single object module can be named explicitly using the -.B \-o -option. -.IP \fB\-D\fIname\fR\fB=\fIdef\fR -Define a symbol -.I name -to the -preprocessor, as if by "#define". -.IP \fB\-D\fIname\fR -.br -same as \fB\-D\fIname\fB=1\fR. -.IP \fB\-I\fIpathname\fR -.br -Add -.I pathname -to the list of directories in which to search for -.B #include -files with filenames not beginning with slash. -The preprocessor first searches for -.B #include -files in the directory containing -.I sourcefile, -then in directories in -.B \-I -options, and finally, in -.I /usr/include. -.IP "\fB\-o \fIoutput\fR" -Name the final output file -.I output. -.IP \fB\-U\fIname\fR -.br -Remove any initial definition of -.I name. -.IP \fB\-v\fP -.br -Verbose. Print the commands as they are executed. -.IP \fB\-vn\fP -.br -Verbose, no execute. Only print the commands, do not execute them. -.IP \fB\-L\fP -.br -do not generate code to keep track of -the current location in the source code. -.IP \fB\-g\fP -.br -Produce symbolic debugging information for grind(1). -.IP \fB\-w\fP -.br -suppress warning messages. -.IP \fB\-d\fP -.br -allow for "long"s. -.IP \fB\-i\fInum\fR -.br -set size for integer sets. By default, the set size is the word size. -.IP \fB\-C\fP -.br -distinguish between lower case and upper case. Normally, upper case letters -are considered equal to their lower case counterpart. -.IP \fB\-t\fP -.br -trace calls and exits of procedures and functions. -.IP \fB\-R\fP -.br -disable all range-checks. -.IP \fB\-A\fP -.br -enable extra array bound checks. Unfortunately, the back-end used for this -compiler is a bit sloppy, so extra array bound checks are needed if you want -detection of array bound errors. -.IP \fB\-a\fP -.br -disable assertions. Assertions are skipped instead of evaluated. -.IP "\fB\-U+\fP, \fB\-u+\fP" -.br -allow for underscores within identifiers. Identifiers may not start -with an underscore, even if this flag is given. -.IP \fB-s+\fP -.br -allow only standard -.BR Pascal . -This disables the \fB\-c+\fP, \fB\-d\fR, \fB\-u+\fR, -\fB\-U+\fR and \fB\-C\fR options. -Furthermore, assertions are not recognized at all. -.IP \fB-c+\fP -.br -allow C-like strings. This option is mainly intended for usage with -C-functions. This option will cause the type 'string' to be known. -.IP \fB\-M\fIcompiler\fR -.br -use \fIcompiler\fR as Modula-2 compiler instead of the default. -.SH "SEE ALSO" -.IP [1] -J.W. Stevenson, H. v. Eck, \fIAmsterdam Compiler Kit-Pascal reference manual\fP. -.IP [2] -ack(1) unix manual page. -.IP [3] -grind(1) unix manual page. -.SH DIAGNOSTICS -Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/driver.c b/fast/driver/driver.c deleted file mode 100644 index 3f4388909..000000000 --- a/fast/driver/driver.c +++ /dev/null @@ -1,860 +0,0 @@ -/* fcc/fm2/fpc - Driver for fast ACK compilers. - - Derived from the C compiler driver from Minix. - - Compile this file with - cc -O -I/config -DF?? driver.c - where F?? is either FCC, FPC, or FM2. - Install the resulting binaries in the EM bin directory. - Suggested names: afcc, afm2, and afpc. -*/ - -#if FM2+FPC+FCC > 1 -Something wrong here! Only one of FM2, FPC, or FCC must be defined -#endif - -#ifdef sun3 -#define MACHNAME "m68020" -#define SYSNAME "sun3" -#endif - -#ifdef vax4 -#define MACHNAME "vax4" -#define SYSNAME "vax4" -#endif - -#ifdef i386 -#define MACHNAME "i386" -#define SYSNAME "i386" -#endif - -#include -#include -#include -#include -#if __STDC__ -#include -#else -#include -#endif - -/* - Version producing ACK .o files in one pass. -*/ -#define MAXARGC 256 /* maximum number of arguments allowed in a list */ -#define USTR_SIZE 128 /* maximum length of string variable */ - -typedef char USTRING[USTR_SIZE]; - -struct arglist { - int al_argc; - char *al_argv[MAXARGC]; -}; - -#define CPP_NAME "$H/lib.bin/cpp" -#define LD_NAME "$H/lib.bin/em_led" -#define CV_NAME "$H/lib.bin/$S/cv" -#define SHELL "/bin/sh" - -char *CPP; -char *COMP; -char *cc = "cc"; - -int kids = -1; -int ecount = 0; - -struct arglist CPP_FLAGS = { -#ifdef FCC - 7, -#else - 13, -#endif - { - "-D__unix", - "-D_EM_WSIZE=4", - "-D_EM_PSIZE=4", - "-D_EM_SSIZE=2", - "-D_EM_LSIZE=4", - "-D_EM_FSIZE=4", - "-D_EM_DSIZE=8", -#ifndef FCC - "-DEM_WSIZE=4", - "-DEM_PSIZE=4", - "-DEM_SSIZE=2", - "-DEM_LSIZE=4", - "-DEM_FSIZE=4", - "-DEM_DSIZE=8", -#endif - } -}; - -struct arglist LD_HEAD = { - 2, - { - "$H/lib/$S/head_em", -#ifdef FCC - "$H/lib/$S/head_$A" -#endif -#ifdef FM2 - "$H/lib/$S/head_m2" -#endif -#ifdef FPC - "$H/lib/$S/head_pc" -#endif - } -}; - -struct arglist LD_TAIL = { -#if defined(sun3) || defined(i386) - 5, -#else - 4, -#endif - { -#ifdef FCC - "$H/lib/$S/tail_$A", -#endif -#ifdef FM2 - "$H/lib/$S/tail_m2", -#endif -#ifdef FPC - "$H/lib/$S/tail_pc", -#endif -#if defined(sun3) || defined(i386) - "$H/lib/$M/tail_fp", -#endif - "$H/lib/$M/tail_em", - "$H/lib/$S/tail_mon", - "$H/lib/$M/end_em" - } -}; - -struct arglist align = { - 5, { -#ifdef sun3 - "-a0:4", - "-a1:4", - "-a2:0x20000", - "-a3:4", - "-b0:0x2020" -#endif -#ifdef vax4 - "-a0:4", - "-a1:4", - "-a2:0x400", - "-a3:4", - "-b0:0" -#endif -#ifdef i386 - "-a0:4", - "-a1:4", - "-a2:4", - "-a3:4", - "-b1:0x1880000" -#endif - } -}; - -struct arglist COMP_FLAGS; - -char *o_FILE = "a.out"; /* default name for executable file */ - -#define remove(str) ((noexec || unlink(str)), (str)[0] = '\0') -#define cleanup(str) (str && str[0] && remove(str)) -#define init(al) ((al)->al_argc = 1) - -char ProgCall[128]; - -struct arglist SRCFILES; -struct arglist LDFILES; - -int RET_CODE = 0; - -struct arglist LD_FLAGS; - -struct arglist CALL_VEC; - -int o_flag = 0; -int c_flag = 0; -int g_flag = 0; -int v_flag = 0; -int O_flag = 0; -int ansi_c = 0; - -#if __STDC__ -char *mkstr(char *, ...); -#else -char *mkstr(); -#endif -char *malloc(); -char *alloc(); -char *extension(); -char *expand_string(); - -USTRING ofile; -USTRING BASE; -USTRING tmp_file; - -int noexec = 0; - -extern char *strcat(), *strcpy(), *mktemp(), *strchr(); - -trapcc(sig) - int sig; -{ - signal(sig, SIG_IGN); - if (kids != -1) kill(kids, sig); - cleanup(ofile); - cleanup(tmp_file); - exit(1); -} - -#ifdef FCC -#define lang_suffix() "c" -#define comp_name() "$H/lib.bin/c_ce" -#define ansi_c_name() "$H/lib.bin/c_ce.ansi" -#endif /* FCC */ - -#ifdef FM2 -#define lang_suffix() "mod" -#define comp_name() "$H/lib.bin/m2_ce" -#endif /* FM2 */ - -#ifdef FPC -#define lang_suffix() "p" -#define comp_name() "$H/lib.bin/pc_ce" -#endif /* FPC */ - - -#ifdef FCC -int -lang_opt(str) - char *str; -{ - switch(str[1]) { - case 'R': - if (! ansi_c) { - append(&COMP_FLAGS, str); - return 1; - } - break; - case '-': /* debug options */ - append(&COMP_FLAGS, str); - return 1; - case 'a': /* -ansi flag */ - if (! strcmp(str, "-ansi")) { - ansi_c = 1; - COMP = expand_string(ansi_c_name()); - return 1; - } - break; - case 'w': /* disable warnings */ - if (! ansi_c) { - append(&COMP_FLAGS, str); - return 1; - } - if (str[2]) { - str[1] = '-'; - append(&COMP_FLAGS, &str[1]); - } - else append(&COMP_FLAGS, "-a"); - return 1; - } - return 0; -} -#endif /* FCC */ - -#ifdef FM2 -int -lang_opt(str) - char *str; -{ - switch(str[1]) { - case '-': /* debug options */ - case 'w': /* disable warnings */ - case 'R': /* no runtime checks */ - case 'W': /* add warnings */ - case 'L': /* no line numbers */ - case 'A': /* extra array bound checks */ - case '3': /* only accept 3rd edition Modula-2 */ - append(&COMP_FLAGS, str); - return 1; - case 'I': - append(&COMP_FLAGS, str); - break; /* !!! */ - case 'U': /* underscores in identifiers allowed */ - if (str[2] == '\0') { - append(&COMP_FLAGS, str); - return 1; - } - break; - case 'e': /* local extension for Modula-2 compiler: - procedure constants - */ - str[1] = 'l'; - append(&COMP_FLAGS, str); - return 1; - } - return 0; -} -#endif /* FM2 */ - -#ifdef FPC -int -lang_opt(str) - char *str; -{ - switch(str[1]) { - case '-': /* debug options */ - case 'a': /* enable assertions */ - case 'd': /* allow doubles (longs) */ - case 'i': /* set size of integer sets */ - case 't': /* tracing */ - case 'w': /* disable warnings */ - case 'A': /* extra array bound checks */ - case 'C': /* distinguish between lower case and upper case */ - case 'L': /* no FIL and LIN instructions */ - case 'R': /* no runtime checks */ - append(&COMP_FLAGS, str); - return 1; - case 'u': - case 'U': - /* underscores in identifiers */ - case 's': - /* only compile standard pascal */ - case 'c': - /* C type strings */ - if (str[2] == '+' && str[3] == '\0') { - str[2] = 0; - append(&COMP_FLAGS, str); - return 1; - } - } - return 0; -} -#endif /* FPC */ - -main(argc, argv) - char *argv[]; -{ - char *str; - char **argvec; - int count; - char *ext; - register struct arglist *call = &CALL_VEC; - char *file; - char *ldfile; - char *INCLUDE = 0; - int compile_cnt = 0; - - setbuf(stdout, (char *) 0); - basename(*argv++,ProgCall); - - COMP = expand_string(comp_name()); - CPP = expand_string(CPP_NAME); - -#ifdef vax4 - append(&CPP_FLAGS, "-D__vax"); -#endif -#ifdef sun3 - append(&CPP_FLAGS, "-D__sun"); -#endif -#ifdef m68020 - append(&CPP_FLAGS, "-D__mc68020"); - append(&CPP_FLAGS, "-D__mc68000"); -#endif - - if (signal(SIGHUP, SIG_IGN) != SIG_IGN) - signal(SIGHUP, trapcc); - if (signal(SIGINT, SIG_IGN) != SIG_IGN) - signal(SIGINT, trapcc); - if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) - signal(SIGQUIT, trapcc); - while (--argc > 0) { - if (*(str = *argv++) != '-') { - append(&SRCFILES, str); - continue; - } - - if (lang_opt(str)) { - } - else switch (str[1]) { - - case 'c': /* stop after producing .o files */ - c_flag = 1; - break; - case 'D': /* preprocessor #define */ - case 'U': /* preprocessor #undef */ - append(&CPP_FLAGS, str); - break; - case 'I': /* include directory */ - append(&CPP_FLAGS, str); - break; - case 'g': /* debugger support */ - append(&COMP_FLAGS, str); - g_flag = 1; - break; - case 'a': /* -ansi flag */ - if (! strcmp(str, "-ansi")) { - ansi_c = 1; - return 1; - } - break; - case 'o': /* target file */ - if (argc-- >= 0) { - o_flag = 1; - o_FILE = *argv++; - ext = extension(o_FILE); - if (ext != o_FILE && ! strcmp(ext, lang_suffix()) - ) { - error("-o would overwrite %s", o_FILE); - } - } - break; - case 'u': /* mark identifier as undefined */ - append(&LD_FLAGS, str); - if (argc-- >= 0) - append(&LD_FLAGS, *argv++); - break; - case 'O': /* use built in peephole optimizer */ - O_flag = 1; - break; - case 'v': /* verbose */ - v_flag++; - if (str[2] == 'n') - noexec = 1; - break; - case 'l': /* library file */ - append(&SRCFILES, str); - break; - case 'M': /* use other compiler (for testing) */ - strcpy(COMP, str+2); - break; - case 's': /* strip */ - if (str[2] == '\0') { - append(&LD_FLAGS, str); - break; - } - /* fall through */ - default: - warning("%s flag ignored", str); - break; - } - } - - if (ecount) exit(1); - - count = SRCFILES.al_argc; - argvec = &(SRCFILES.al_argv[0]); - while (count-- > 0) { - ext = extension(*argvec); - if (*argvec[0] != '-' && - ext != *argvec++ && (! strcmp(ext, lang_suffix()) - )) { - compile_cnt++; - } - } - - if (compile_cnt > 1 && c_flag && o_flag) { - warning("-o flag ignored"); - o_flag = 0; - } - -#ifdef FM2 - INCLUDE = expand_string("-I$H/lib/m2"); -#endif /* FM2 */ -#ifdef FCC - INCLUDE = expand_string(ansi_c ? "-I$H/include/tail_ac" : "-I$H/include/_tail_cc"); - append(&COMP_FLAGS, "-L"); -#endif /* FCC */ - count = SRCFILES.al_argc; - argvec = &(SRCFILES.al_argv[0]); - while (count-- > 0) { - register char *f; - basename(file = *argvec++, BASE); - - ext = extension(file); - - if (file[0] != '-' && - ext != file && (!strcmp(ext, lang_suffix()) - )) { - if (compile_cnt > 1) printf("%s\n", file); - - ldfile = c_flag ? ofile : alloc((unsigned)strlen(BASE)+3); - if ( -#ifdef FCC - !strcmp(ext, "s") && -#endif - needsprep(file)) { - strcpy(tmp_file, TMP_DIR); - strcat(tmp_file, "/F_XXXXXX"); - mktemp(tmp_file); - init(call); - append(call, CPP); - concat(call, &CPP_FLAGS); - append(call, INCLUDE); - append(call, file); - if (runvec(call, tmp_file)) { - file = tmp_file; - } - else { - remove(tmp_file); - tmp_file[0] = '\0'; - continue; - } - } - init(call); - if (o_flag && c_flag) { - f = o_FILE; - } - else f = mkstr(ldfile, BASE, ".", "o", (char *)0); - append(call, COMP); -#ifdef FCC - concat(call, &CPP_FLAGS); -#endif - concat(call, &COMP_FLAGS); -#if FM2 || FCC - append(call, INCLUDE); -#endif - append(call, file); - append(call, f); - if (runvec(call, (char *) 0)) { - file = f; - } - else { - remove(f); - continue; - } - cleanup(tmp_file); - tmp_file[0] = '\0'; - } - - else if (file[0] != '-' && - strcmp(ext, "o") && strcmp(ext, "a")) { - warning("file with unknown suffix (%s) passed to the loader", ext); - } - - if (c_flag) - continue; - - append(&LDFILES, file); - } - - /* *.s to a.out */ - if (RET_CODE == 0 && LDFILES.al_argc > 0) { - init(call); - expand(&LD_HEAD); - cc = "cc.2g"; - expand(&LD_TAIL); - append(call, expand_string(LD_NAME)); - concat(call, &align); - append(call, "-o"); - strcpy(tmp_file, TMP_DIR); - strcat(tmp_file, "/F_XXXXXX"); - mktemp(tmp_file); - append(call, tmp_file); - concat(call, &LD_HEAD); - concat(call, &LD_FLAGS); - concat(call, &LDFILES); - if (g_flag) append(call, expand_string("$H/lib/$M/tail_db")); -#ifdef FCC - if (! ansi_c) append(call, expand_string("$H/lib/$S/tail_cc.1s")); -#endif - concat(call, &LD_TAIL); - if (! runvec(call, (char *) 0)) { - cleanup(tmp_file); - exit(RET_CODE); - } - init(call); - append(call, expand_string(CV_NAME)); - append(call, tmp_file); - append(call, o_FILE); - runvec(call, (char *) 0); - cleanup(tmp_file); - } - exit(RET_CODE); -} - -needsprep(name) - char *name; -{ - int file; - char fc; - - file = open(name,0); - if (file < 0) return 0; - if (read(file, &fc, 1) != 1) fc = 0; - close(file); - return fc == '#'; -} - -char * -alloc(u) - unsigned u; -{ - char *p = malloc(u); - - if (p == 0) - panic("no space"); - return p; -} - -char * -expand_string(s) - char *s; -{ - char buf[1024]; - register char *p = s; - register char *q = &buf[0]; - int expanded = 0; - - if (!p) return p; - while (*p) { - if (*p == '$') { - p++; - expanded = 1; - switch(*p++) { - case 'A': - if (ansi_c) strcpy(q, "ac"); - else strcpy(q, cc); - break; - case 'H': - strcpy(q, EM_DIR); - break; - case 'M': - strcpy(q, MACHNAME); - break; - case 'S': - strcpy(q, SYSNAME); - break; - default: - panic("internal error"); - break; - } - while (*q) q++; - } - else *q++ = *p++; - } - if (! expanded) return s; - *q++ = '\0'; - p = alloc((unsigned int) (q - buf)); - return strcpy(p, buf); -} - -append(al, arg) - register struct arglist *al; - char *arg; -{ - if (!arg || !*arg) return; - if (al->al_argc >= MAXARGC) - panic("argument list overflow"); - al->al_argv[(al->al_argc)++] = arg; -} - -expand(al) - register struct arglist *al; -{ - register int i = al->al_argc; - register char **p = &(al->al_argv[0]); - - while (i-- > 0) { - *p = expand_string(*p); - p++; - } -} - -concat(al1, al2) - struct arglist *al1, *al2; -{ - register i = al2->al_argc; - register char **p = &(al1->al_argv[al1->al_argc]); - register char **q = &(al2->al_argv[0]); - - if ((al1->al_argc += i) >= MAXARGC) - panic("argument list overflow"); - while (i-- > 0) { - *p++ = *q++; - } -} -#if __STDC__ -/*VARARGS*/ -char * -mkstr(char *dst, ...) -{ - va_list ap; - - va_start(ap, dst); - { - register char *p; - register char *q; - - q = dst; - p = va_arg(ap, char *); - - while (p) { - while (*q++ = *p++); - q--; - p = va_arg(ap, char *); - } - } - va_end(ap); - - return dst; -} -#else -/*VARARGS*/ -char * -mkstr(va_alist) - va_dcl -{ - va_list ap; - char *dst; - - va_start(ap); - { - register char *p; - register char *q; - - dst = q = va_arg(ap, char *); - p = va_arg(ap, char *); - - while (p) { - while (*q++ = *p++); - q--; - p = va_arg(ap, char *); - } - } - va_end(ap); - - return dst; -} -#endif -basename(str, dst) - char *str; - register char *dst; -{ - register char *p1 = str; - register char *p2 = p1; - - while (*p1) - if (*p1++ == '/') - p2 = p1; - p1--; - while (*p1 != '.' && p1 >= p2) p1--; - if (p1 >= p2) { - *p1 = '\0'; - while (*dst++ = *p2++); - *p1 = '.'; - } - else - while (*dst++ = *p2++); -} - -char * -extension(fn) - char *fn; -{ - register char *c = fn; - - while (*c++) ; - while (*--c != '.' && c >= fn) { } - if (c++ < fn || !*c) return fn; - return c; -} - -runvec(vec, outp) - struct arglist *vec; - char *outp; -{ - int pid, status; - - if (v_flag) { - pr_vec(vec); - putc('\n', stderr); - } - if ((pid = fork()) == 0) { /* start up the process */ - if (outp) { /* redirect standard output */ - close(1); - if (creat(outp, 0666) != 1) - panic("cannot create output file"); - } - ex_vec(vec); - } - if (pid == -1) - panic("no more processes"); - kids = pid; - wait(&status); - if (status) switch(status & 0177) { - case SIGHUP: - case SIGINT: - case SIGQUIT: - case SIGTERM: - case 0: - break; - default: - error("%s died with signal %d\n", vec->al_argv[1], status&0177); - } - kids = -1; - return status ? ((RET_CODE = 1), 0) : 1; -} - -/*VARARGS1*/ -error(str, s1, s2) - char *str, *s1, *s2; -{ - fprintf(stderr, "%s: ", ProgCall); - fprintf(stderr, str, s1, s2); - putc('\n', stderr); - ecount++; -} - -/*VARARGS1*/ -warning(str, s1, s2) - char *str, *s1, *s2; -{ - fprintf(stderr, "%s: (warning) ", ProgCall); - fprintf(stderr, str, s1, s2); - putc('\n', stderr); -} - -panic(str) - char *str; -{ - error(str); - trapcc(SIGINT); -} - -pr_vec(vec) - register struct arglist *vec; -{ - register char **ap = &vec->al_argv[1]; - - vec->al_argv[vec->al_argc] = 0; - fprintf(stderr, "%s", *ap); - while (*++ap) { - fprintf(stderr, " %s", *ap); - } -} - -extern int errno; - -ex_vec(vec) - register struct arglist *vec; -{ - if (noexec) - exit(0); - vec->al_argv[vec->al_argc] = 0; - execv(vec->al_argv[1], &(vec->al_argv[1])); - if (errno == ENOEXEC) { /* not an a.out, try it with the SHELL */ - vec->al_argv[0] = SHELL; - execv(SHELL, &(vec->al_argv[0])); - } - if (access(vec->al_argv[1], 1) == 0) { - /* File is executable. */ - error("cannot execute %s", vec->al_argv[1]); - } else { - error("%s is not executable", vec->al_argv[1]); - } - exit(1); -} diff --git a/fast/driver/proto.make b/fast/driver/proto.make deleted file mode 100644 index 1604e87f9..000000000 --- a/fast/driver/proto.make +++ /dev/null @@ -1,57 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/fast/driver -INCLUDES = -I$(TARGET_HOME)/config -CFLAGS = $(COPTIONS) $(INCLUDES) -LINTFLAGS = $(LINTOPTIONS) $(INCLUDES) -LDFLAGS = $(LDOPTIONS) - -all: afcc afm2 afpc - -install: all - cp afcc afm2 afpc $(TARGET_HOME)/bin - if [ $(DO_MACHINE_INDEP) = y ] ; \ - then mk_manpage $(SRC_DIR)/afcc.1 $(TARGET_HOME) ; \ - mk_manpage $(SRC_DIR)/afpc.1 $(TARGET_HOME) ; \ - mk_manpage $(SRC_DIR)/afm2.1 $(TARGET_HOME) ; \ - fi - -cmp: all - -cmp afcc $(TARGET_HOME)/bin/afcc - -cmp afm2 $(TARGET_HOME)/bin/afm2 - -cmp afpc $(TARGET_HOME)/bin/afpc - -pr: - @pr $(SRC_DIR)/proto.make $(SRC_DIR)/driver.c - -opr: - make pr | opr - -clean: - rm -f *.$(SUF) afcc afm2 afpc Out - -lint: - $(LINT) $(LINTFLAGS) -DFCC -D`ack_sys` $(SRC_DIR)/driver.c - -afcc.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h - $(CC) $(CFLAGS) -c -DFCC -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c - mv driver.$(SUF) afcc.$(SUF) - -afpc.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h - $(CC) $(CFLAGS) -c -DFPC -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c - mv driver.$(SUF) afpc.$(SUF) - -afm2.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h - $(CC) $(CFLAGS) -c -DFM2 -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c - mv driver.$(SUF) afm2.$(SUF) - -afcc: afcc.$(SUF) - $(CC) $(LDFLAGS) -o afcc afcc.$(SUF) - -afm2: afm2.$(SUF) - $(CC) $(LDFLAGS) -o afm2 afm2.$(SUF) - -afpc: afpc.$(SUF) - $(CC) $(LDFLAGS) -o afpc afpc.$(SUF) diff --git a/fast/f_c.ansi/.distr b/fast/f_c.ansi/.distr deleted file mode 100644 index ebf6f5c24..000000000 --- a/fast/f_c.ansi/.distr +++ /dev/null @@ -1,3 +0,0 @@ -Parameters -proto.main -proto.make diff --git a/fast/f_c.ansi/Parameters b/fast/f_c.ansi/Parameters deleted file mode 100644 index 239e82c7e..000000000 --- a/fast/f_c.ansi/Parameters +++ /dev/null @@ -1,143 +0,0 @@ -!File: lint.h -/*#define LINT 1 /* if defined, 'lint' is produced */ - - -!File: pathlength.h -#define PATHLENGTH 1024 /* max. length of path to file */ - - -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define ERR_SHADOW 5 /* a syntax error overshadows error messages - until ERR_SHADOW symbols have been - accepted without syntax error */ - - -!File: idfsize.h -#define IDFSIZE 64 /* maximum significant length of an identifier */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: nparams.h -#define NPARAMS 32 /* maximum number of parameters */ -#define STDC_NPARAMS 31 /* ANSI limit on number of parameters */ - - -!File: ifdepth.h -#define IFDEPTH 256 /* maximum number of nested if-constructions */ - - -!File: density.h -#define DENSITY 3 /* see switch.[ch] for an explanation */ - - -!File: macbuf.h -#define LAPBUF 128 /* initial size of macro replacement buffer */ -#define ARGBUF 128 /* initial size of macro parameter buffer(s) */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 16 /* step size in enlarging the memory for - the storage of a string */ - - -!File: trgt_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR 1 -#define SZ_SHORT 2 -#define SZ_WORD 4 -#define SZ_INT 4 -#define SZ_LONG 4 -#define SZ_FLOAT 4 -#define SZ_DOUBLE 8 -#define SZ_LNGDBL 8 /* for now */ -#define SZ_POINTER 4 - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_SHORT SZ_SHORT -#define AL_WORD SZ_WORD -#define AL_INT SZ_WORD -#define AL_LONG SZ_WORD -#define AL_FLOAT SZ_WORD -#define AL_DOUBLE SZ_WORD -#define AL_LNGDBL SZ_WORD -#define AL_POINTER SZ_WORD -#define AL_STRUCT 1 -#define AL_UNION 1 - - -!File: botch_free.h -/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ - - -!File: dataflow.h -/*#define DATAFLOW 1 /* produce some compile-time xref */ - - -!File: debug.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - - -!File: use_tmp.h -#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands - and if USE_TMP is defined let them - precede the rest of the generated - compact code */ -/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism - to generate EM-code in the order needed - for the code-generators. If not defined, - the old-style peephole optimizer is - needed. */ - - -!File: parbufsize.h -#define PARBUFSIZE 1024 - - -!File: textsize.h -#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ - - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: nopp.h -/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ - - -!File: nobitfield.h -/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ - - -!File: spec_arith.h -/* describes internal compiler arithmetics */ -/*#define SPECIAL_ARITHMETICS /* something different from native long */ -#define UNSIGNED_ARITH unsigned arith /* when it is supported */ - - -!File: static.h -#define GSTATIC /* for large global "static" arrays */ - - -!File: nocross.h -#define NOCROSS 1 /* if NOT defined, cross compiler */ - - -!File: regcount.h -/*#define REGCOUNT 1 /* count occurrences for register messages */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ - - diff --git a/fast/f_c.ansi/proto.main b/fast/f_c.ansi/proto.main deleted file mode 100644 index 5e113963e..000000000 --- a/fast/f_c.ansi/proto.main +++ /dev/null @@ -1,65 +0,0 @@ -# $Id$ - -# C compilation part. Not to be called directly. -# Instead, it is to be called by the Makefile. -# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, -# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. - -#PARAMS do not remove this line! - -MDIR = $(TARGET_HOME)/modules -LIBDIR = $(MDIR)/lib -LINTLIBDIR = $(UTIL_HOME)/modules/lib - -MALLOC = $(LIBDIR)/malloc.$(SUF) - -EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ - $(LIBDIR)/libCEopt.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ - $(LIBDIR)/libobject.$(LIBSUF) - -MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ - $(LIBDIR)/libassert.$(LIBSUF) \ - $(LIBDIR)/liballoc.$(LIBSUF) \ - $(MALLOC) \ - $(LIBDIR)/libflt.$(LIBSUF) \ - $(LIBDIR)/libprint.$(LIBSUF) \ - $(LIBDIR)/libstring.$(LIBSUF) \ - $(LIBDIR)/libsystem.$(LIBSUF) - -LIBS = $(EMLIB) $(MODLIB) - -LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) - -PROFILE = -INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg -CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -LDFLAGS = $(PROFILE) $(LDOPTIONS) - -# C_SRC and OBJ should be set here. -#LISTS do not remove this line! - -all: main - -clean: - rm -f *.$(SUF) main - -lint: - $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) - -main: $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main - -# do not remove the next line; it is used for generating dependencies -#DEPENDENCIES diff --git a/fast/f_c.ansi/proto.make b/fast/f_c.ansi/proto.make deleted file mode 100644 index 1d93b77b3..000000000 --- a/fast/f_c.ansi/proto.make +++ /dev/null @@ -1,268 +0,0 @@ -# $Id$ - -# make ANSI C compiler - -#PARAMS do not remove this line! - -UTIL_BIN = \ - $(UTIL_HOME)/bin -SRC_DIR = \ - $(SRC_HOME)/lang/cem/cemcom.ansi -FSRC_DIR = \ - $(SRC_HOME)/fast/f_c.ansi - -TABGEN= $(UTIL_BIN)/tabgen -LLGEN = $(UTIL_BIN)/LLgen -LLGENOPTIONS = \ - -v - -SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ - $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g -GEN_G = tokenfile.g -GFILES= $(GEN_G) $(SRC_G) - -SRC_C = \ - $(SRC_DIR)/Version.c \ - $(SRC_DIR)/LLlex.c \ - $(SRC_DIR)/LLmessage.c \ - $(SRC_DIR)/arith.c \ - $(SRC_DIR)/blocks.c \ - $(SRC_DIR)/ch3.c \ - $(SRC_DIR)/ch3bin.c \ - $(SRC_DIR)/ch3mon.c \ - $(SRC_DIR)/code.c \ - $(SRC_DIR)/conversion.c \ - $(SRC_DIR)/cstoper.c \ - $(SRC_DIR)/dataflow.c \ - $(SRC_DIR)/declarator.c \ - $(SRC_DIR)/decspecs.c \ - $(SRC_DIR)/domacro.c \ - $(SRC_DIR)/dumpidf.c \ - $(SRC_DIR)/error.c \ - $(SRC_DIR)/eval.c \ - $(SRC_DIR)/expr.c \ - $(SRC_DIR)/field.c \ - $(SRC_DIR)/fltcstoper.c \ - $(SRC_DIR)/idf.c \ - $(SRC_DIR)/init.c \ - $(SRC_DIR)/input.c \ - $(SRC_DIR)/l_comment.c \ - $(SRC_DIR)/l_ev_ord.c \ - $(SRC_DIR)/l_lint.c \ - $(SRC_DIR)/l_misc.c \ - $(SRC_DIR)/l_outdef.c \ - $(SRC_DIR)/l_states.c \ - $(SRC_DIR)/label.c \ - $(SRC_DIR)/main.c \ - $(SRC_DIR)/options.c \ - $(SRC_DIR)/pragma.c \ - $(SRC_DIR)/proto.c \ - $(SRC_DIR)/replace.c \ - $(SRC_DIR)/skip.c \ - $(SRC_DIR)/stab.c \ - $(SRC_DIR)/stack.c \ - $(SRC_DIR)/struct.c \ - $(SRC_DIR)/switch.c \ - $(SRC_DIR)/tokenname.c \ - $(SRC_DIR)/type.c \ - $(SRC_DIR)/util.c -GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ - symbol2str.c char.c Lpars.c next.c -CFILES= $(SRC_C) $(GEN_C) - -SRC_H = \ - $(SRC_DIR)/LLlex.h \ - $(SRC_DIR)/align.h \ - $(SRC_DIR)/arith.h \ - $(SRC_DIR)/assert.h \ - $(SRC_DIR)/atw.h \ - $(SRC_DIR)/class.h \ - $(SRC_DIR)/decspecs.h \ - $(SRC_DIR)/file_info.h \ - $(SRC_DIR)/input.h \ - $(SRC_DIR)/interface.h \ - $(SRC_DIR)/l_class.h \ - $(SRC_DIR)/l_comment.h \ - $(SRC_DIR)/l_em.h \ - $(SRC_DIR)/l_lint.h \ - $(SRC_DIR)/label.h \ - $(SRC_DIR)/level.h \ - $(SRC_DIR)/mes.h \ - $(SRC_DIR)/sizes.h \ - $(SRC_DIR)/specials.h \ - $(SRC_DIR)/tokenname.h - -GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ - idfsize.h ifdepth.h inputtype.h macbuf.h lint.h \ - nobitfield.h nopp.h nocross.h \ - nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ - strsize.h trgt_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ - regcount.h dbsymtab.h \ - code.h declar.h def.h expr.h field.h estack.h util.h proto.h replace.h \ - idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ - l_outdef.h stack.h - -HFILES= $(GEN_H) $(SRC_H) - -NEXTFILES = \ - $(SRC_DIR)/code.str \ - $(SRC_DIR)/declar.str \ - $(SRC_DIR)/def.str \ - $(SRC_DIR)/expr.str \ - $(SRC_DIR)/field.str \ - $(SRC_DIR)/estack.str \ - $(SRC_DIR)/util.str \ - $(SRC_DIR)/proto.str \ - $(SRC_DIR)/replace.str \ - $(SRC_DIR)/idf.str \ - $(SRC_DIR)/macro.str \ - $(SRC_DIR)/stack.str \ - $(SRC_DIR)/stmt.str \ - $(SRC_DIR)/struct.str \ - $(SRC_DIR)/switch.str \ - $(SRC_DIR)/type.str \ - $(SRC_DIR)/l_brace.str \ - $(SRC_DIR)/l_state.str \ - $(SRC_DIR)/l_outdef.str - -all: make.main - make -f make.main main - -install: all - cp main $(TARGET_HOME)/lib.bin/c_ce.ansi - -cmp: all - -cmp main $(TARGET_HOME)/lib.bin/c_ce.ansi - -opr: - make pr | opr - -pr: - @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ - $(FSRC_DIR)/Parameters - -lint: make.main - make -f make.main lint - -Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile - echo $(CFILES) | tr ' ' '\012' > Cfiles - echo $(HFILES) | tr ' ' '\012' >> Cfiles - -resolved: Cfiles - CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new - -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi - touch resolved - -# there is no file called "dependencies"; we want dependencies checked -# every time. This means that make.main is made every time. Oh well ... -# it does not take much time. -dependencies: resolved - do_deps `grep '.c$$' Cfiles` - -make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main - rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main - cat *.dep >> make.main - -make_macros: Makefile - echo 'SRC_DIR=$(SRC_DIR)' > make_macros - echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros - echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros - echo 'CC=$(CC)' >> make_macros - echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros - echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros - echo 'LINT=$(LINT)' >> make_macros - echo 'LINTSUF=$(LINTSUF)' >> make_macros - echo 'LINTPREF=$(LINTPREF)' >> make_macros - echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros - echo 'SUF=$(SUF)' >> make_macros - echo 'LIBSUF=$(LIBSUF)' >> make_macros - echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros - echo 'MACH=$(MACH)' >> make_macros - -lists: Cfiles - echo "C_SRC = \\" > lists - echo $(CFILES) >> lists - echo "OBJ = \\" >> lists - echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists - -clean: - -make -f make.main clean - rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output - rm -f resolved *.dep lists make.main make_macros - -LLfiles: $(GFILES) - $(LLGEN) $(LLGENOPTIONS) $(GFILES) - @touch LLfiles - -hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles - $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters - touch hfiles - -tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile - $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g - -symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase - $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c - -code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h - -declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h - -def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h - -expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h - -field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h - -estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h - -util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h - -proto.h: $(SRC_DIR)/proto.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/proto.str > proto.h - -replace.h: $(SRC_DIR)/replace.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/replace.str > replace.h - -idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h - -macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h - -stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h - -stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h - -struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h - -switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h - -type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h - -l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h - -l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h - -l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h - -next.c: $(NEXTFILES) $(SRC_DIR)/make.next - $(SRC_DIR)/make.next $(NEXTFILES) > next.c - -char.c: $(SRC_DIR)/char.tab - $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_c/.distr b/fast/f_c/.distr deleted file mode 100644 index ebf6f5c24..000000000 --- a/fast/f_c/.distr +++ /dev/null @@ -1,3 +0,0 @@ -Parameters -proto.main -proto.make diff --git a/fast/f_c/Parameters b/fast/f_c/Parameters deleted file mode 100644 index 34a4f4564..000000000 --- a/fast/f_c/Parameters +++ /dev/null @@ -1,148 +0,0 @@ -!File: lint.h -/*#define LINT 1 /* if defined, 'lint' is produced */ - - -!File: pathlength.h -#define PATHLENGTH 1024 /* max. length of path to file */ - - -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define ERR_SHADOW 5 /* a syntax error overshadows error messages - until ERR_SHADOW symbols have been - accepted without syntax error */ - - -!File: idfsize.h -#define IDFSIZE 64 /* maximum significant length of an identifier */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: nparams.h -#define NPARAMS 32 /* maximum number of parameters of macros */ - - -!File: ifdepth.h -#define IFDEPTH 256 /* maximum number of nested if-constructions */ - - -!File: density.h -#define DENSITY 3 /* see switch.[ch] for an explanation */ - - -!File: lapbuf.h -#define LAPBUF 4096 /* size of macro actual parameter buffer */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 16 /* step size in enlarging the memory for - the storage of a string */ - - -!File: target_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR (arith)1 -#define SZ_SHORT (arith)2 -#define SZ_WORD (arith)4 -#define SZ_INT (arith)4 -#define SZ_LONG (arith)4 -#define SZ_FLOAT (arith)4 -#define SZ_DOUBLE (arith)8 -#define SZ_POINTER (arith)4 - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_SHORT SZ_SHORT -#define AL_WORD SZ_WORD -#define AL_INT SZ_WORD -#define AL_LONG SZ_WORD -#define AL_FLOAT SZ_WORD -#define AL_DOUBLE SZ_WORD -#define AL_POINTER SZ_WORD -#define AL_STRUCT 1 -#define AL_UNION 1 - - -!File: botch_free.h -/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ - - -!File: dataflow.h -/*#define DATAFLOW 1 /* produce some compile-time xref */ - - -!File: debug.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - - -!File: use_tmp.h -#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands - and if USE_TMP is defined let them - precede the rest of the generated - compact code */ -/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism - to generate EM-code in the order needed - for the code-generators. If not defined, - the old-style peephole optimizer is - needed. */ - - -!File: parbufsize.h -#define PARBUFSIZE 1024 - - -!File: textsize.h -#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ -#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ - - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: nopp.h -/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ - - -!File: nobitfield.h -/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ - - -!File: spec_arith.h -/* describes internal compiler arithmetics */ -/*#define SPECIAL_ARITHMETICS /* something different from native long */ - - -!File: static.h -#define GSTATIC /* for large global "static" arrays */ - - -!File: nofloat.h -/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ - - -!File: noRoption.h -/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ - - -!File: nocross.h -#define NOCROSS 1 /* if NOT defined, cross compiler */ - - -!File: regcount.h -/*#define REGCOUNT 1 /* count occurrences for register messages */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger -*/ - - diff --git a/fast/f_c/proto.main b/fast/f_c/proto.main deleted file mode 100644 index 5e113963e..000000000 --- a/fast/f_c/proto.main +++ /dev/null @@ -1,65 +0,0 @@ -# $Id$ - -# C compilation part. Not to be called directly. -# Instead, it is to be called by the Makefile. -# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, -# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. - -#PARAMS do not remove this line! - -MDIR = $(TARGET_HOME)/modules -LIBDIR = $(MDIR)/lib -LINTLIBDIR = $(UTIL_HOME)/modules/lib - -MALLOC = $(LIBDIR)/malloc.$(SUF) - -EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ - $(LIBDIR)/libCEopt.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ - $(LIBDIR)/libobject.$(LIBSUF) - -MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ - $(LIBDIR)/libassert.$(LIBSUF) \ - $(LIBDIR)/liballoc.$(LIBSUF) \ - $(MALLOC) \ - $(LIBDIR)/libflt.$(LIBSUF) \ - $(LIBDIR)/libprint.$(LIBSUF) \ - $(LIBDIR)/libstring.$(LIBSUF) \ - $(LIBDIR)/libsystem.$(LIBSUF) - -LIBS = $(EMLIB) $(MODLIB) - -LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) - -PROFILE = -INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg -CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -LDFLAGS = $(PROFILE) $(LDOPTIONS) - -# C_SRC and OBJ should be set here. -#LISTS do not remove this line! - -all: main - -clean: - rm -f *.$(SUF) main - -lint: - $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) - -main: $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main - -# do not remove the next line; it is used for generating dependencies -#DEPENDENCIES diff --git a/fast/f_c/proto.make b/fast/f_c/proto.make deleted file mode 100644 index f4e53be64..000000000 --- a/fast/f_c/proto.make +++ /dev/null @@ -1,264 +0,0 @@ -# $Id$ - -# make C compiler - -#PARAMS do not remove this line! - -UTIL_BIN = \ - $(UTIL_HOME)/bin -SRC_DIR = \ - $(SRC_HOME)/lang/cem/cemcom -FSRC_DIR = \ - $(SRC_HOME)/fast/f_c - -TABGEN= $(UTIL_BIN)/tabgen -LLGEN = $(UTIL_BIN)/LLgen -LLGENOPTIONS = \ - -v - -SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ - $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g -GEN_G = tokenfile.g -GFILES= $(GEN_G) $(SRC_G) - -SRC_C = \ - $(SRC_DIR)/Version.c \ - $(SRC_DIR)/LLlex.c \ - $(SRC_DIR)/LLmessage.c \ - $(SRC_DIR)/arith.c \ - $(SRC_DIR)/asm.c \ - $(SRC_DIR)/blocks.c \ - $(SRC_DIR)/ch7.c \ - $(SRC_DIR)/ch7bin.c \ - $(SRC_DIR)/ch7mon.c \ - $(SRC_DIR)/code.c \ - $(SRC_DIR)/conversion.c \ - $(SRC_DIR)/cstoper.c \ - $(SRC_DIR)/dataflow.c \ - $(SRC_DIR)/declarator.c \ - $(SRC_DIR)/decspecs.c \ - $(SRC_DIR)/domacro.c \ - $(SRC_DIR)/dumpidf.c \ - $(SRC_DIR)/error.c \ - $(SRC_DIR)/eval.c \ - $(SRC_DIR)/expr.c \ - $(SRC_DIR)/field.c \ - $(SRC_DIR)/idf.c \ - $(SRC_DIR)/init.c \ - $(SRC_DIR)/input.c \ - $(SRC_DIR)/l_comment.c \ - $(SRC_DIR)/l_ev_ord.c \ - $(SRC_DIR)/l_lint.c \ - $(SRC_DIR)/l_misc.c \ - $(SRC_DIR)/l_outdef.c \ - $(SRC_DIR)/l_states.c \ - $(SRC_DIR)/label.c \ - $(SRC_DIR)/main.c \ - $(SRC_DIR)/options.c \ - $(SRC_DIR)/replace.c \ - $(SRC_DIR)/scan.c \ - $(SRC_DIR)/skip.c \ - $(SRC_DIR)/stack.c \ - $(SRC_DIR)/struct.c \ - $(SRC_DIR)/switch.c \ - $(SRC_DIR)/tokenname.c \ - $(SRC_DIR)/type.c \ - $(SRC_DIR)/util.c \ - $(SRC_DIR)/stab.c - -GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ - symbol2str.c char.c Lpars.c next.c -CFILES= $(SRC_C) $(GEN_C) - -SRC_H = \ - $(SRC_DIR)/LLlex.h \ - $(SRC_DIR)/align.h \ - $(SRC_DIR)/arith.h \ - $(SRC_DIR)/assert.h \ - $(SRC_DIR)/atw.h \ - $(SRC_DIR)/class.h \ - $(SRC_DIR)/decspecs.h \ - $(SRC_DIR)/file_info.h \ - $(SRC_DIR)/input.h \ - $(SRC_DIR)/interface.h \ - $(SRC_DIR)/l_class.h \ - $(SRC_DIR)/l_comment.h \ - $(SRC_DIR)/l_em.h \ - $(SRC_DIR)/l_lint.h \ - $(SRC_DIR)/label.h \ - $(SRC_DIR)/level.h \ - $(SRC_DIR)/mes.h \ - $(SRC_DIR)/sizes.h \ - $(SRC_DIR)/specials.h \ - $(SRC_DIR)/tokenname.h - -GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ - idfsize.h ifdepth.h inputtype.h lint.h \ - nobitfield.h nopp.h nocross.h \ - nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ - strsize.h target_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ - regcount.h \ - code.h declar.h decspecs.h def.h expr.h field.h estack.h util.h \ - idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ - l_outdef.h stack.h lapbuf.h noRoption.h nofloat.h dbsymtab.h - -HFILES= $(GEN_H) $(SRC_H) - -NEXTFILES = \ - $(SRC_DIR)/code.str \ - $(SRC_DIR)/declar.str \ - $(SRC_DIR)/decspecs.str \ - $(SRC_DIR)/def.str \ - $(SRC_DIR)/expr.str \ - $(SRC_DIR)/field.str \ - $(SRC_DIR)/estack.str \ - $(SRC_DIR)/util.str \ - $(SRC_DIR)/idf.str \ - $(SRC_DIR)/macro.str \ - $(SRC_DIR)/stack.str \ - $(SRC_DIR)/stmt.str \ - $(SRC_DIR)/struct.str \ - $(SRC_DIR)/switch.str \ - $(SRC_DIR)/type.str \ - $(SRC_DIR)/l_brace.str \ - $(SRC_DIR)/l_state.str \ - $(SRC_DIR)/l_outdef.str - -all: make.main - make -f make.main main - -install: all - cp main $(TARGET_HOME)/lib.bin/c_ce - -cmp: all - -cmp main $(TARGET_HOME)/lib.bin/c_ce - -opr: - make pr | opr - -pr: - @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ - $(FSRC_DIR)/Parameters - -lint: make.main - make -f make.main lint - -Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile - echo $(CFILES) | tr ' ' '\012' > Cfiles - echo $(HFILES) | tr ' ' '\012' >> Cfiles - -resolved: Cfiles - CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new - -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi - touch resolved - -# there is no file called "dependencies"; we want dependencies checked -# every time. This means that make.main is made every time. Oh well ... -# it does not take much time. -dependencies: resolved - do_deps `grep '.c$$' Cfiles` - -make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main - rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main - cat *.dep >> make.main - -make_macros: Makefile - echo 'SRC_DIR=$(SRC_DIR)' > make_macros - echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros - echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros - echo 'CC=$(CC)' >> make_macros - echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros - echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros - echo 'LINT=$(LINT)' >> make_macros - echo 'LINTSUF=$(LINTSUF)' >> make_macros - echo 'LINTPREF=$(LINTPREF)' >> make_macros - echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros - echo 'SUF=$(SUF)' >> make_macros - echo 'LIBSUF=$(LIBSUF)' >> make_macros - echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros - echo 'MACH=$(MACH)' >> make_macros - -lists: Cfiles - echo "C_SRC = \\" > lists - echo $(CFILES) >> lists - echo "OBJ = \\" >> lists - echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists - -clean: - -make -f make.main clean - rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output - rm -f resolved *.dep lists make.main make_macros - -LLfiles: $(GFILES) - $(LLGEN) $(LLGENOPTIONS) $(GFILES) - @touch LLfiles - -hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles - $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters - touch hfiles - -tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile - $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g - -symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase - $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c - -code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h - -declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h - -def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h - -expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h - -field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h - -estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h - -util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h - -decspecs.h: $(SRC_DIR)/decspecs.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/decspecs.str > decspecs.h - -idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h - -macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h - -stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h - -stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h - -struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h - -switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h - -type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h - -l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h - -l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h - -l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h - -next.c: $(NEXTFILES) $(SRC_DIR)/make.next - $(SRC_DIR)/make.next $(NEXTFILES) > next.c - -char.c: $(SRC_DIR)/char.tab - $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_m2/.distr b/fast/f_m2/.distr deleted file mode 100644 index ebf6f5c24..000000000 --- a/fast/f_m2/.distr +++ /dev/null @@ -1,3 +0,0 @@ -Parameters -proto.main -proto.make diff --git a/fast/f_m2/Parameters b/fast/f_m2/Parameters deleted file mode 100644 index 211ca9431..000000000 --- a/fast/f_m2/Parameters +++ /dev/null @@ -1,101 +0,0 @@ -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define ERR_SHADOW 5 /* a syntax error overshadows error messages - until ERR_SHADOW symbols have been - accepted without syntax error */ - - -!File: idfsize.h -#define IDFSIZE 128 /* maximum significant length of an identifier */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 8 /* step size in enlarging the memory for - the storage of a string */ - - -!File: target_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR ((arith)1) -#define SZ_SHORT ((arith)2) -#define SZ_WORD ((arith)4) -#define SZ_INT ((arith)4) -#define SZ_LONG ((arith)4) -#define SZ_FLOAT ((arith)4) -#define SZ_DOUBLE ((arith)8) -#define SZ_POINTER ((arith)4) - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_SHORT ((int)SZ_SHORT) -#define AL_WORD ((int)SZ_WORD) -#define AL_INT ((int)SZ_WORD) -#define AL_LONG ((int)SZ_WORD) -#define AL_FLOAT ((int)SZ_WORD) -#define AL_DOUBLE ((int)SZ_WORD) -#define AL_POINTER ((int)SZ_WORD) -#define AL_STRUCT ((int)SZ_WORD) - - -!File: debugcst.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: density.h -#define DENSITY 3 /* see casestat.C for an explanation */ - - -!File: squeeze.h -/*#define SQUEEZE 1 /* define on "small" machines */ - - -!File: strict3rd.h -/*#define STRICT_3RD_ED 1 /* define on "small" machines, and if you want - a compiler that only implements "3rd edition" - Modula-2 - */ - - -!File: nocross.h -#define NOCROSS 1 /* define when cross-compiler not needed */ - - -!File: nostrict.h -/*#define NOSTRICT 1 /* define when STRICT warnings disabled - (yet another squeezing method) - */ - - -!File: bigresult.h -#define BIG_RESULT_ON_STACK 1 /* define when function results must be - put on the stack; in this case, caller - reserves space for it. When not defined, - callee puts result in global data area and - returns a pointer to it - */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ - - -!File: use_insert.h -/*#define USE_INSERT 1 /* use C_insertpart mechanism */ - - -!File: uns_arith.h -#define UNSIGNED_ARITH unsigned arith - - diff --git a/fast/f_m2/proto.main b/fast/f_m2/proto.main deleted file mode 100644 index 5e113963e..000000000 --- a/fast/f_m2/proto.main +++ /dev/null @@ -1,65 +0,0 @@ -# $Id$ - -# C compilation part. Not to be called directly. -# Instead, it is to be called by the Makefile. -# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, -# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. - -#PARAMS do not remove this line! - -MDIR = $(TARGET_HOME)/modules -LIBDIR = $(MDIR)/lib -LINTLIBDIR = $(UTIL_HOME)/modules/lib - -MALLOC = $(LIBDIR)/malloc.$(SUF) - -EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ - $(LIBDIR)/libCEopt.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ - $(LIBDIR)/libobject.$(LIBSUF) - -MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ - $(LIBDIR)/libassert.$(LIBSUF) \ - $(LIBDIR)/liballoc.$(LIBSUF) \ - $(MALLOC) \ - $(LIBDIR)/libflt.$(LIBSUF) \ - $(LIBDIR)/libprint.$(LIBSUF) \ - $(LIBDIR)/libstring.$(LIBSUF) \ - $(LIBDIR)/libsystem.$(LIBSUF) - -LIBS = $(EMLIB) $(MODLIB) - -LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) - -PROFILE = -INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg -CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -LDFLAGS = $(PROFILE) $(LDOPTIONS) - -# C_SRC and OBJ should be set here. -#LISTS do not remove this line! - -all: main - -clean: - rm -f *.$(SUF) main - -lint: - $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) - -main: $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main - -# do not remove the next line; it is used for generating dependencies -#DEPENDENCIES diff --git a/fast/f_m2/proto.make b/fast/f_m2/proto.make deleted file mode 100644 index 9b689210f..000000000 --- a/fast/f_m2/proto.make +++ /dev/null @@ -1,156 +0,0 @@ -# $Id$ - -# make Modula-2 compiler - -#PARAMS do not remove this line! - -UTIL_BIN = \ - $(UTIL_HOME)/bin -SRC_DIR = \ - $(SRC_HOME)/lang/m2/comp -FSRC_DIR = \ - $(SRC_HOME)/fast/f_m2 - -TABGEN= $(UTIL_BIN)/tabgen -LLGEN = $(UTIL_BIN)/LLgen -LLGENOPTIONS = \ - -v - -SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ - $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g -GEN_G = tokenfile.g -GFILES= $(GEN_G) $(SRC_G) - -SRC_C = $(SRC_DIR)/LLlex.c $(SRC_DIR)/LLmessage.c $(SRC_DIR)/error.c \ - $(SRC_DIR)/main.c $(SRC_DIR)/tokenname.c $(SRC_DIR)/idf.c \ - $(SRC_DIR)/input.c $(SRC_DIR)/type.c $(SRC_DIR)/def.c \ - $(SRC_DIR)/misc.c $(SRC_DIR)/enter.c $(SRC_DIR)/defmodule.c \ - $(SRC_DIR)/typequiv.c $(SRC_DIR)/node.c $(SRC_DIR)/cstoper.c \ - $(SRC_DIR)/chk_expr.c $(SRC_DIR)/options.c $(SRC_DIR)/walk.c \ - $(SRC_DIR)/desig.c $(SRC_DIR)/code.c $(SRC_DIR)/lookup.c \ - $(SRC_DIR)/Version.c $(SRC_DIR)/stab.c -GEN_C = tokenfile.c program.c declar.c expression.c statement.c \ - symbol2str.c char.c Lpars.c casestat.c tmpvar.c scope.c next.c -CFILES= $(SRC_C) $(GEN_C) - -SRC_H = $(SRC_DIR)/LLlex.h $(SRC_DIR)/chk_expr.h $(SRC_DIR)/class.h \ - $(SRC_DIR)/debug.h $(SRC_DIR)/desig.h $(SRC_DIR)/f_info.h \ - $(SRC_DIR)/idf.h $(SRC_DIR)/input.h $(SRC_DIR)/main.h \ - $(SRC_DIR)/misc.h $(SRC_DIR)/scope.h $(SRC_DIR)/standards.h \ - $(SRC_DIR)/tokenname.h $(SRC_DIR)/walk.h $(SRC_DIR)/warning.h \ - $(SRC_DIR)/SYSTEM.h -GEN_H = errout.h idfsize.h numsize.h strsize.h target_sizes.h bigresult.h \ - inputtype.h density.h squeeze.h nocross.h nostrict.h def.h debugcst.h \ - type.h Lpars.h node.h strict3rd.h real.h use_insert.h dbsymtab.h \ - uns_arith.h def.h type.h node.h real.h -HFILES= $(GEN_H) $(SRC_H) - -NEXTFILES = \ - $(SRC_DIR)/def.H $(SRC_DIR)/type.H $(SRC_DIR)/node.H $(SRC_DIR)/real.H \ - $(SRC_DIR)/scope.C $(SRC_DIR)/tmpvar.C $(SRC_DIR)/casestat.C - -all: make.main - make -f make.main main - -install: all - cp main $(TARGET_HOME)/lib.bin/m2_ce - -cmp: all - -cmp main $(TARGET_HOME)/lib.bin/m2_ce - -opr: - make pr | opr - -pr: - @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ - $(FSRC_DIR)/Parameters - -lint: make.main - make -f make.main lint - -Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile - echo $(CFILES) | tr ' ' '\012' > Cfiles - echo $(HFILES) | tr ' ' '\012' >> Cfiles - -resolved: Cfiles - CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new - -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi - touch resolved - -# there is no file called "dependencies"; we want dependencies checked -# every time. This means that make.main is made every time. Oh well ... -# it does not take much time. -dependencies: resolved - do_deps `grep '.c$$' Cfiles` - -make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main - rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main - cat *.dep >> make.main - -make_macros: Makefile - echo 'SRC_DIR=$(SRC_DIR)' > make_macros - echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros - echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros - echo 'CC=$(CC)' >> make_macros - echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros - echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros - echo 'LINT=$(LINT)' >> make_macros - echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros - echo 'LINTSUF=$(LINTSUF)' >> make_macros - echo 'LINTPREF=$(LINTPREF)' >> make_macros - echo 'SUF=$(SUF)' >> make_macros - echo 'LIBSUF=$(LIBSUF)' >> make_macros - echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros - echo 'MACH=$(MACH)' >> make_macros - -lists: Cfiles - echo "C_SRC = \\" > lists - echo $(CFILES) >> lists - echo "OBJ = \\" >> lists - echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists - -clean: - -make -f make.main clean - rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output - rm -f resolved *.dep lists make.main make_macros - -LLfiles: $(GFILES) - $(LLGEN) $(LLGENOPTIONS) $(GFILES) - @touch LLfiles - -hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles - $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters - touch hfiles - -tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile - $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g - -symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase - $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c - -def.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/def.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.H > def.h - -type.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/type.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.H > type.h - -real.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/real.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/real.H > real.h - -node.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/node.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/node.H > node.h - -scope.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/scope.C - $(SRC_DIR)/make.allocd < $(SRC_DIR)/scope.C > scope.c - -tmpvar.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/tmpvar.C - $(SRC_DIR)/make.allocd < $(SRC_DIR)/tmpvar.C > tmpvar.c - -casestat.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/casestat.C - $(SRC_DIR)/make.allocd < $(SRC_DIR)/casestat.C > casestat.c - -next.c: $(NEXTFILES) $(SRC_DIR)/make.next - $(SRC_DIR)/make.next $(NEXTFILES) > next.c - -char.c: $(SRC_DIR)/char.tab - $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_pc/.distr b/fast/f_pc/.distr deleted file mode 100644 index ebf6f5c24..000000000 --- a/fast/f_pc/.distr +++ /dev/null @@ -1,3 +0,0 @@ -Parameters -proto.main -proto.make diff --git a/fast/f_pc/Parameters b/fast/f_pc/Parameters deleted file mode 100644 index e26506bb5..000000000 --- a/fast/f_pc/Parameters +++ /dev/null @@ -1,62 +0,0 @@ -!File: debugcst.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - - -!File: density.h -#define DENSITY 3 /* to determine, if a csa or csb - instruction must be generated */ - - -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define MAXERR_LINE 5 /* maximum number of error messages given - on the same input line. */ - - -!File: idfsize.h -#define IDFSIZE 128 /* max. significant length of an identifier */ - - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 8 /* step size in enlarging the memory for - the storage of a string */ - - -!File: target_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR (arith)1 -#define SZ_WORD (arith)4 -#define SZ_INT (arith)4 -#define SZ_LONG (arith)4 -#define SZ_POINTER (arith)4 -#define SZ_REAL (arith)8 - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_WORD ((int)SZ_WORD) -#define AL_INT ((int)SZ_WORD) -#define AL_LONG ((int)SZ_WORD) -#define AL_POINTER ((int)SZ_WORD) -#define AL_REAL ((int)SZ_WORD) -#define AL_STRUCT ((int)SZ_WORD) - - -!File: nocross.h -#define NOCROSS 1 /* define when cross compiler not needed */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ diff --git a/fast/f_pc/proto.main b/fast/f_pc/proto.main deleted file mode 100644 index 80f41d6c0..000000000 --- a/fast/f_pc/proto.main +++ /dev/null @@ -1,65 +0,0 @@ -# $Id$ - -# C compilation part. Not to be called directly. -# Instead, it is to be called by the Makefile. -# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, -# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. - -#PARAMS do not remove this line! - -MDIR = $(TARGET_HOME)/modules -LIBDIR = $(MDIR)/lib -LINTLIBDIR = $(UTIL_HOME)/modules/lib - -MALLOC = $(LIBDIR)/malloc.$(SUF) - -EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ - $(LIBDIR)/libCEopt.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ - $(LIBDIR)/libobject.$(LIBSUF) - -MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ - $(LIBDIR)/libassert.$(LIBSUF) \ - $(LIBDIR)/liballoc.$(LIBSUF) \ - $(MALLOC) \ - $(LIBDIR)/libflt.$(LIBSUF) \ - $(LIBDIR)/libprint.$(LIBSUF) \ - $(LIBDIR)/libstring.$(LIBSUF) \ - $(LIBDIR)/libsystem.$(LIBSUF) - -LIBS = $(EMLIB) $(MODLIB) - -LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) - -PROFILE = -INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg -CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -DSTATIC=static -LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -DNORCSID -DSTATIC=static -LDFLAGS = $(PROFILE) $(LDOPTIONS) - -# C_SRC and OBJ should be set here. -#LISTS do not remove this line! - -all: main - -clean: - rm -f *.$(SUF) main - -lint: - $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) - -main: $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main - -# do not remove the next line; it is used for generating dependencies -#DEPENDENCIES diff --git a/fast/f_pc/proto.make b/fast/f_pc/proto.make deleted file mode 100644 index 47d04e764..000000000 --- a/fast/f_pc/proto.make +++ /dev/null @@ -1,157 +0,0 @@ -# $Id$ - -# make Pascal compiler - -#PARAMS do not remove this line! - -UTIL_BIN = \ - $(UTIL_HOME)/bin -SRC_DIR = \ - $(SRC_HOME)/lang/pc/comp -FSRC_DIR = \ - $(SRC_HOME)/fast/f_pc - -TABGEN= $(UTIL_BIN)/tabgen -LLGEN = $(UTIL_BIN)/LLgen -LLGENOPTIONS = \ - -v - -SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ - $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g -GEN_G = tokenfile.g -GFILES= $(GEN_G) $(SRC_G) - -SRC_C = $(SRC_DIR)/LLlex.c $(SRC_DIR)/LLmessage.c $(SRC_DIR)/body.c \ - $(SRC_DIR)/error.c $(SRC_DIR)/label.c $(SRC_DIR)/readwrite.c \ - $(SRC_DIR)/main.c $(SRC_DIR)/tokenname.c $(SRC_DIR)/idf.c \ - $(SRC_DIR)/input.c $(SRC_DIR)/type.c $(SRC_DIR)/def.c \ - $(SRC_DIR)/misc.c $(SRC_DIR)/enter.c $(SRC_DIR)/progs.c \ - $(SRC_DIR)/typequiv.c $(SRC_DIR)/node.c $(SRC_DIR)/cstoper.c \ - $(SRC_DIR)/chk_expr.c $(SRC_DIR)/options.c $(SRC_DIR)/scope.c \ - $(SRC_DIR)/desig.c $(SRC_DIR)/code.c $(SRC_DIR)/lookup.c \ - $(SRC_DIR)/stab.c -GEN_C = tokenfile.c program.c declar.c expression.c statement.c \ - symbol2str.c char.c Lpars.c casestat.c tmpvar.c next.c -CFILES= $(SRC_C) $(GEN_C) - -SRC_H = $(SRC_DIR)/LLlex.h $(SRC_DIR)/chk_expr.h $(SRC_DIR)/class.h \ - $(SRC_DIR)/const.h $(SRC_DIR)/debug.h $(SRC_DIR)/f_info.h \ - $(SRC_DIR)/idf.h $(SRC_DIR)/input.h $(SRC_DIR)/main.h \ - $(SRC_DIR)/misc.h $(SRC_DIR)/required.h $(SRC_DIR)/tokenname.h - -GEN_H = errout.h idfsize.h numsize.h strsize.h target_sizes.h \ - inputtype.h density.h nocross.h def.h debugcst.h \ - type.h Lpars.h node.h dbsymtab.h scope.h desig.h - -HFILES= $(GEN_H) $(SRC_H) - -NEXTFILES = \ - $(SRC_DIR)/def.H $(SRC_DIR)/type.H $(SRC_DIR)/node.H \ - $(SRC_DIR)/scope.H $(SRC_DIR)/desig.H \ - $(SRC_DIR)/tmpvar.C $(SRC_DIR)/casestat.C - -all: make.main - make -f make.main main - -install: all - cp main $(TARGET_HOME)/lib.bin/pc_ce - -cmp: all - -cmp main $(TARGET_HOME)/lib.bin/pc_ce - -opr: - make pr | opr - -pr: - @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ - $(FSRC_DIR)/Parameters - -lint: make.main - make -f make.main lint - -Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile - echo $(CFILES) | tr ' ' '\012' > Cfiles - echo $(HFILES) | tr ' ' '\012' >> Cfiles - -resolved: Cfiles - CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new - -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi - touch resolved - -# there is no file called "dependencies"; we want dependencies checked -# every time. This means that make.main is made every time. Oh well ... -# it does not take much time. -dependencies: resolved - do_deps `grep '.c$$' Cfiles` - -make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main - rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main - cat *.dep >> make.main - -make_macros: Makefile - echo 'SRC_DIR=$(SRC_DIR)' > make_macros - echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros - echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros - echo 'CC=$(CC)' >> make_macros - echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros - echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros - echo 'LINT=$(LINT)' >> make_macros - echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros - echo 'LINTSUF=$(LINTSUF)' >> make_macros - echo 'LINTPREF=$(LINTPREF)' >> make_macros - echo 'SUF=$(SUF)' >> make_macros - echo 'LIBSUF=$(LIBSUF)' >> make_macros - echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros - echo 'MACH=$(MACH)' >> make_macros - -lists: Cfiles - echo "C_SRC = \\" > lists - echo $(CFILES) >> lists - echo "OBJ = \\" >> lists - echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists - -clean: - -make -f make.main clean - rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output - rm -f resolved *.dep lists make.main make_macros - -LLfiles: $(GFILES) - $(LLGEN) $(LLGENOPTIONS) $(GFILES) - @touch LLfiles - -hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles - $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters - touch hfiles - -tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile - $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g - -symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase - $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c - -def.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/def.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.H > def.h - -type.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/type.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.H > type.h - -scope.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/scope.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/scope.H > scope.h - -node.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/node.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/node.H > node.h - -desig.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/desig.H - $(SRC_DIR)/make.allocd < $(SRC_DIR)/desig.H > desig.h - -tmpvar.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/tmpvar.C - $(SRC_DIR)/make.allocd < $(SRC_DIR)/tmpvar.C > tmpvar.c - -casestat.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/casestat.C - $(SRC_DIR)/make.allocd < $(SRC_DIR)/casestat.C > casestat.c - -next.c: $(NEXTFILES) $(SRC_DIR)/make.next - $(SRC_DIR)/make.next $(NEXTFILES) > next.c - -char.c: $(SRC_DIR)/char.tab - $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fcc/.distr b/fcc/.distr deleted file mode 100644 index 04696d0c4..000000000 --- a/fcc/.distr +++ /dev/null @@ -1,3 +0,0 @@ -Action -cemcom -driver diff --git a/fcc/Action b/fcc/Action deleted file mode 100644 index 3f4edc99b..000000000 --- a/fcc/Action +++ /dev/null @@ -1,6 +0,0 @@ -name "Fast cc-compatible C compiler" -dir cemcom -end -name "Driver for fast cc-compatible C compiler" -dir driver -end diff --git a/fcc/cemcom/.distr b/fcc/cemcom/.distr deleted file mode 100644 index 7f00154bd..000000000 --- a/fcc/cemcom/.distr +++ /dev/null @@ -1,4 +0,0 @@ -Parameters.sun3 -Parameters.vax4 -proto.main -proto.make diff --git a/fcc/cemcom/Parameters.sun3 b/fcc/cemcom/Parameters.sun3 deleted file mode 100644 index 9b4540d7e..000000000 --- a/fcc/cemcom/Parameters.sun3 +++ /dev/null @@ -1,148 +0,0 @@ -!File: lint.h -/*#define LINT 1 /* if defined, 'lint' is produced */ - - -!File: pathlength.h -#define PATHLENGTH 1024 /* max. length of path to file */ - - -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define ERR_SHADOW 5 /* a syntax error overshadows error messages - until ERR_SHADOW symbols have been - accepted without syntax error */ - - -!File: idfsize.h -#define IDFSIZE 64 /* maximum significant length of an identifier */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: nparams.h -#define NPARAMS 32 /* maximum number of parameters of macros */ - - -!File: ifdepth.h -#define IFDEPTH 256 /* maximum number of nested if-constructions */ - - -!File: density.h -#define DENSITY 3 /* see switch.[ch] for an explanation */ - - -!File: lapbuf.h -#define LAPBUF 4096 /* size of macro actual parameter buffer */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 16 /* step size in enlarging the memory for - the storage of a string */ - - -!File: target_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR (arith)1 -#define SZ_SHORT (arith)2 -#define SZ_WORD (arith)4 -#define SZ_INT (arith)4 -#define SZ_LONG (arith)4 -#define SZ_FLOAT (arith)4 -#define SZ_DOUBLE (arith)8 -#define SZ_POINTER (arith)4 - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_SHORT SZ_SHORT -#define AL_WORD 2 -#define AL_INT 2 -#define AL_LONG 2 -#define AL_FLOAT 2 -#define AL_DOUBLE 2 -#define AL_POINTER 2 -#define AL_STRUCT 2 -#define AL_UNION 2 - - -!File: botch_free.h -/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ - - -!File: dataflow.h -/*#define DATAFLOW 1 /* produce some compile-time xref */ - - -!File: debug.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - - -!File: use_tmp.h -#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands - and if USE_TMP is defined let them - precede the rest of the generated - compact code */ -/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism - to generate EM-code in the order needed - for the code-generators. If not defined, - the old-style peephole optimizer is - needed. */ - - -!File: parbufsize.h -#define PARBUFSIZE 1024 - - -!File: textsize.h -#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ -#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ - - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: nopp.h -/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ - - -!File: nobitfield.h -/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ - - -!File: spec_arith.h -/* describes internal compiler arithmetics */ -/*#define SPECIAL_ARITHMETICS /* something different from native long */ - - -!File: static.h -#define GSTATIC /* for large global "static" arrays */ - - -!File: nofloat.h -/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ - - -!File: noRoption.h -/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ - - -!File: nocross.h -#define NOCROSS 1 /* if NOT defined, cross compiler */ - - -!File: regcount.h -/*#define REGCOUNT 1 /* count occurrences for register messages */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger -*/ - - diff --git a/fcc/cemcom/Parameters.vax4 b/fcc/cemcom/Parameters.vax4 deleted file mode 100644 index 34a4f4564..000000000 --- a/fcc/cemcom/Parameters.vax4 +++ /dev/null @@ -1,148 +0,0 @@ -!File: lint.h -/*#define LINT 1 /* if defined, 'lint' is produced */ - - -!File: pathlength.h -#define PATHLENGTH 1024 /* max. length of path to file */ - - -!File: errout.h -#define ERROUT STDERR /* file pointer for writing messages */ -#define ERR_SHADOW 5 /* a syntax error overshadows error messages - until ERR_SHADOW symbols have been - accepted without syntax error */ - - -!File: idfsize.h -#define IDFSIZE 64 /* maximum significant length of an identifier */ - - -!File: numsize.h -#define NUMSIZE 256 /* maximum length of a numeric constant */ - - -!File: nparams.h -#define NPARAMS 32 /* maximum number of parameters of macros */ - - -!File: ifdepth.h -#define IFDEPTH 256 /* maximum number of nested if-constructions */ - - -!File: density.h -#define DENSITY 3 /* see switch.[ch] for an explanation */ - - -!File: lapbuf.h -#define LAPBUF 4096 /* size of macro actual parameter buffer */ - - -!File: strsize.h -#define ISTRSIZE 32 /* minimum number of bytes allocated for - storing a string */ -#define RSTRSIZE 16 /* step size in enlarging the memory for - the storage of a string */ - - -!File: target_sizes.h -#define MAXSIZE 8 /* the maximum of the SZ_* constants */ - -/* target machine sizes */ -#define SZ_CHAR (arith)1 -#define SZ_SHORT (arith)2 -#define SZ_WORD (arith)4 -#define SZ_INT (arith)4 -#define SZ_LONG (arith)4 -#define SZ_FLOAT (arith)4 -#define SZ_DOUBLE (arith)8 -#define SZ_POINTER (arith)4 - -/* target machine alignment requirements */ -#define AL_CHAR 1 -#define AL_SHORT SZ_SHORT -#define AL_WORD SZ_WORD -#define AL_INT SZ_WORD -#define AL_LONG SZ_WORD -#define AL_FLOAT SZ_WORD -#define AL_DOUBLE SZ_WORD -#define AL_POINTER SZ_WORD -#define AL_STRUCT 1 -#define AL_UNION 1 - - -!File: botch_free.h -/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ - - -!File: dataflow.h -/*#define DATAFLOW 1 /* produce some compile-time xref */ - - -!File: debug.h -/*#define DEBUG 1 /* perform various self-tests */ -#define NDEBUG 1 /* disable assertions */ - - -!File: use_tmp.h -#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands - and if USE_TMP is defined let them - precede the rest of the generated - compact code */ -/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism - to generate EM-code in the order needed - for the code-generators. If not defined, - the old-style peephole optimizer is - needed. */ - - -!File: parbufsize.h -#define PARBUFSIZE 1024 - - -!File: textsize.h -#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ -#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ - - -!File: inputtype.h -#define INP_READ_IN_ONE 1 /* read input file in one */ - - -!File: nopp.h -/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ - - -!File: nobitfield.h -/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ - - -!File: spec_arith.h -/* describes internal compiler arithmetics */ -/*#define SPECIAL_ARITHMETICS /* something different from native long */ - - -!File: static.h -#define GSTATIC /* for large global "static" arrays */ - - -!File: nofloat.h -/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ - - -!File: noRoption.h -/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ - - -!File: nocross.h -#define NOCROSS 1 /* if NOT defined, cross compiler */ - - -!File: regcount.h -/*#define REGCOUNT 1 /* count occurrences for register messages */ - - -!File: dbsymtab.h -#define DBSYMTAB 1 /* ability to produce symbol table for debugger -*/ - - diff --git a/fcc/cemcom/proto.main b/fcc/cemcom/proto.main deleted file mode 100644 index 3fb82f361..000000000 --- a/fcc/cemcom/proto.main +++ /dev/null @@ -1,64 +0,0 @@ -# $Id$ - -# C compilation part. Not to be called directly. -# Instead, it is to be called by the Makefile. -# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, -# CC_AND_MKDEP, SUF, LIBSUF, CE should be set here. - -#PARAMS do not remove this line! - -MDIR = $(TARGET_HOME)/modules -LIBDIR = $(MDIR)/lib -LINTLIBDIR = $(UTIL_HOME)/modules/lib - -MALLOC = $(LIBDIR)/malloc.$(SUF) - -EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ - $(LIBDIR)/libCEopt.$(LIBSUF) \ - $(CE) \ - $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ - $(LIBDIR)/libobject.$(LIBSUF) - -MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ - $(LIBDIR)/libassert.$(LIBSUF) \ - $(LIBDIR)/liballoc.$(LIBSUF) \ - $(MALLOC) \ - $(LIBDIR)/libflt.$(LIBSUF) \ - $(LIBDIR)/libprint.$(LIBSUF) \ - $(LIBDIR)/libstring.$(LIBSUF) \ - $(LIBDIR)/libsystem.$(LIBSUF) - -LIBS = $(EMLIB) $(MODLIB) - -LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ - $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) - -PROFILE = -INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg -CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -LDFLAGS = $(PROFILE) $(LDOPTIONS) - -# C_SRC and OBJ should be set here. -#LISTS do not remove this line! - -all: main - -clean: - rm -f *.$(SUF) main - -lint: - $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) - -main: $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main - -# do not remove the next line; it is used for generating dependencies -#DEPENDENCIES diff --git a/fcc/cemcom/proto.make b/fcc/cemcom/proto.make deleted file mode 100644 index 5547298e2..000000000 --- a/fcc/cemcom/proto.make +++ /dev/null @@ -1,275 +0,0 @@ -# $Id$ - -# make C compiler - -#PARAMS do not remove this line! - -UTIL_BIN = \ - $(UTIL_HOME)/bin -SRC_DIR = \ - $(SRC_HOME)/lang/cem/cemcom -FSRC_DIR = \ - $(SRC_HOME)/fcc/cemcom - -TABGEN= $(UTIL_BIN)/tabgen -LLGEN = $(UTIL_BIN)/LLgen -LLGENOPTIONS = \ - -v - -SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ - $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g -GEN_G = tokenfile.g -GFILES= $(GEN_G) $(SRC_G) - -SRC_C = \ - $(SRC_DIR)/Version.c \ - $(SRC_DIR)/LLlex.c \ - $(SRC_DIR)/LLmessage.c \ - $(SRC_DIR)/arith.c \ - $(SRC_DIR)/asm.c \ - $(SRC_DIR)/blocks.c \ - $(SRC_DIR)/ch7.c \ - $(SRC_DIR)/ch7bin.c \ - $(SRC_DIR)/ch7mon.c \ - $(SRC_DIR)/code.c \ - $(SRC_DIR)/conversion.c \ - $(SRC_DIR)/cstoper.c \ - $(SRC_DIR)/dataflow.c \ - $(SRC_DIR)/declarator.c \ - $(SRC_DIR)/decspecs.c \ - $(SRC_DIR)/domacro.c \ - $(SRC_DIR)/dumpidf.c \ - $(SRC_DIR)/error.c \ - $(SRC_DIR)/eval.c \ - $(SRC_DIR)/expr.c \ - $(SRC_DIR)/field.c \ - $(SRC_DIR)/idf.c \ - $(SRC_DIR)/init.c \ - $(SRC_DIR)/input.c \ - $(SRC_DIR)/l_comment.c \ - $(SRC_DIR)/l_ev_ord.c \ - $(SRC_DIR)/l_lint.c \ - $(SRC_DIR)/l_misc.c \ - $(SRC_DIR)/l_outdef.c \ - $(SRC_DIR)/l_states.c \ - $(SRC_DIR)/label.c \ - $(SRC_DIR)/main.c \ - $(SRC_DIR)/options.c \ - $(SRC_DIR)/replace.c \ - $(SRC_DIR)/scan.c \ - $(SRC_DIR)/skip.c \ - $(SRC_DIR)/stack.c \ - $(SRC_DIR)/struct.c \ - $(SRC_DIR)/switch.c \ - $(SRC_DIR)/tokenname.c \ - $(SRC_DIR)/type.c \ - $(SRC_DIR)/util.c \ - $(SRC_DIR)/stab.c - -GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ - symbol2str.c char.c Lpars.c next.c -CFILES= $(SRC_C) $(GEN_C) - -SRC_H = \ - $(SRC_DIR)/LLlex.h \ - $(SRC_DIR)/align.h \ - $(SRC_DIR)/arith.h \ - $(SRC_DIR)/assert.h \ - $(SRC_DIR)/atw.h \ - $(SRC_DIR)/class.h \ - $(SRC_DIR)/decspecs.h \ - $(SRC_DIR)/file_info.h \ - $(SRC_DIR)/input.h \ - $(SRC_DIR)/interface.h \ - $(SRC_DIR)/l_class.h \ - $(SRC_DIR)/l_comment.h \ - $(SRC_DIR)/l_em.h \ - $(SRC_DIR)/l_lint.h \ - $(SRC_DIR)/label.h \ - $(SRC_DIR)/level.h \ - $(SRC_DIR)/mes.h \ - $(SRC_DIR)/sizes.h \ - $(SRC_DIR)/specials.h \ - $(SRC_DIR)/tokenname.h - -GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ - idfsize.h ifdepth.h inputtype.h lint.h \ - nobitfield.h nopp.h nocross.h \ - nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ - strsize.h target_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ - regcount.h \ - code.h declar.h decspecs.h def.h expr.h field.h estack.h util.h \ - idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ - l_outdef.h stack.h lapbuf.h noRoption.h nofloat.h dbsymtab.h - -HFILES= $(GEN_H) $(SRC_H) - -NEXTFILES = \ - $(SRC_DIR)/code.str \ - $(SRC_DIR)/declar.str \ - $(SRC_DIR)/decspecs.str \ - $(SRC_DIR)/def.str \ - $(SRC_DIR)/expr.str \ - $(SRC_DIR)/field.str \ - $(SRC_DIR)/estack.str \ - $(SRC_DIR)/util.str \ - $(SRC_DIR)/idf.str \ - $(SRC_DIR)/macro.str \ - $(SRC_DIR)/stack.str \ - $(SRC_DIR)/stmt.str \ - $(SRC_DIR)/struct.str \ - $(SRC_DIR)/switch.str \ - $(SRC_DIR)/type.str \ - $(SRC_DIR)/l_brace.str \ - $(SRC_DIR)/l_state.str \ - $(SRC_DIR)/l_outdef.str - -all: make.main - make -f make.main main - -install: all - cp main $(TARGET_HOME)/lib.bin/c_cccompat - -cmp: all - -cmp main $(TARGET_HOME)/lib.bin/c_cccompat - -opr: - make pr | opr - -pr: - @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ - $(FSRC_DIR)/Parameters.vax4 $(FSRC_DIR)/Parameters.sun3 - -lint: make.main - make -f make.main lint - -Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile - echo $(CFILES) | tr ' ' '\012' > Cfiles - echo $(HFILES) | tr ' ' '\012' >> Cfiles - -resolved: Cfiles - CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new - -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi - touch resolved - -# there is no file called "dependencies"; we want dependencies checked -# every time. This means that make.main is made every time. Oh well ... -# it does not take much time. -dependencies: resolved - do_deps `grep '.c$$' Cfiles` - -make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main - rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main - cat *.dep >> make.main - -make_macros: Makefile - echo 'SRC_DIR=$(SRC_DIR)' > make_macros - echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros - echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros - echo 'CC=$(CC)' >> make_macros - echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros - echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros - echo 'LINT=$(LINT)' >> make_macros - echo 'LINTSUF=$(LINTSUF)' >> make_macros - echo 'LINTPREF=$(LINTPREF)' >> make_macros - echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros - echo 'SUF=$(SUF)' >> make_macros - echo 'LIBSUF=$(LIBSUF)' >> make_macros - echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros - if [ $(MACH) = m68020 ] ; then \ - echo 'CE=$(TARGET_HOME)/lib.bin/sun3/ce.$(LIBSUF) $(TARGET_HOME)/lib.bin/sun3/back.$(LIBSUF)' >> make_macros ; \ - else \ - echo 'CE=$(TARGET_HOME)/lib.bin/vax4/ce.$(LIBSUF) $(TARGET_HOME)/lib.bin/vax4/back_vax.$(LIBSUF)' >> make_macros ; \ - fi - -lists: Cfiles - echo "C_SRC = \\" > lists - echo $(CFILES) >> lists - echo "OBJ = \\" >> lists - echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists - -clean: - -make -f make.main clean - rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output - rm -f resolved *.dep lists make.main make_macros - -LLfiles: $(GFILES) - $(LLGEN) $(LLGENOPTIONS) $(GFILES) - @touch LLfiles - -hfiles: Parameters $(SRC_DIR)/make.hfiles - $(SRC_DIR)/make.hfiles Parameters - touch hfiles - -Parameters: $(FSRC_DIR)/Parameters.sun3 $(FSRC_DIR)/Parameters.vax4 - if [ $(MACH) = m68020 ] ; then \ - cp $(FSRC_DIR)/Parameters.sun3 Parameters ; \ - else \ - cp $(FSRC_DIR)/Parameters.vax4 Parameters ; \ - fi - -tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile - $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g - -symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase - $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c - -code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h - -declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h - -def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h - -expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h - -field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h - -estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h - -util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h - -decspecs.h: $(SRC_DIR)/decspecs.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/decspecs.str > decspecs.h - -idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h - -macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h - -stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h - -stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h - -struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h - -switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h - -type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h - -l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h - -l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h - -l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd - $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h - -next.c: $(NEXTFILES) $(SRC_DIR)/make.next - $(SRC_DIR)/make.next $(NEXTFILES) > next.c - -char.c: $(SRC_DIR)/char.tab - $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fcc/driver/.distr b/fcc/driver/.distr deleted file mode 100644 index ebc669f71..000000000 --- a/fcc/driver/.distr +++ /dev/null @@ -1,3 +0,0 @@ -fcc.1 -fcc.c -proto.make diff --git a/fcc/driver/fcc.1 b/fcc/driver/fcc.1 deleted file mode 100644 index acd05b758..000000000 --- a/fcc/driver/fcc.1 +++ /dev/null @@ -1,148 +0,0 @@ -.TH FCC 1 -.SH NAME -fcc \- fast CC-compatible C compiler -.SH SYNOPSIS -.B fcc -[ -.B \-c -] -[ -.B \-v -] -[ -.B \-vn -] -[ \fB\-D\fIname\fR ] -[ \fB\-D\fIname\fB=\fIdef\fR ] -[ -.BI \-I pathname -] -[ -.B \-w -] -[ -.B \-o -.I outfile -] -[ -.B \-R -] -[ -.BI \-U name -] -[ -.BI -M compiler -] -.I sourcefile ... -.SH DESCRIPTION -.LP -.I Fcc -is a fast -.B C -compiler. It translates -.B C -programs -into cc(1)-compatible relocatable object modules, and does so in one pass. -Then, if the \fB\-c\fP flag is not given, -.I fcc -offers the object modules to a link-editor, -to create an executable binary. -.LP -.I Fcc -accepts several types of filename arguments. Files with -names ending in -.B .c -are taken to be -.B C -source programs. -They are compiled, and the resulting object module is placed in the current -directory. -The object module is named after its source file, the suffix -.B .o -replacing -.BR .c -in the name of the object. -.LP -Other arguments refer to loader options, -object modules, or object libraries. -Unless the -.B \-c -flag is given, these modules and libraries, together with the results of any -specified compilations, are passed (in the order given) to the -link-editor to produce -an output file named -.IR a.out . -You can specify a name for the executable by using the -.B \-o -option. -.SH OPTIONS -.LP -The \fB\-l\fIlib\fR, \fB\-n\fP, \fB\-N\fP, -\fB\-r\fP, \fB\-s\fP, \fB\-S\fP, and \fB\-u\fP options are -passed to the link-editor program. -The \fB\-u\fP option takes an extra argument. -.IP \fB\-c\fP -.br -Suppress the loading phase of the compilation, and force an object module to -be produced, even if only one program is compiled. -A single object module can be named explicitly using the -.B \-o -option. -.IP \fB\-D\fIname\fR\fB=\fIdef\fR -Define a symbol -.I name -to the -preprocessor, as if by "#define". -.IP \fB\-D\fIname\fR -.br -same as \fB\-D\fIname\fB=1\fR. -.IP \fB\-I\fIpathname\fR -.br -Add -.I pathname -to the list of directories in which to search for -.B #include -files with filenames not beginning with slash. -The compiler first searches for -.B #include -files in the directory containing -.I sourcefile, -then in directories in -.B \-I -options, and finally, in -.I /usr/include. -.IP "\fB\-o \fIoutput\fR" -Name the final output file -.I output. -.IP \fB\-U\fIname\fR -.br -Remove any initial definition of -.I name. -.IP \fB\-v\fP -.br -Verbose. Print the commands as they are executed. -.IP \fB\-vn\fP -.br -Verbose, no execute. Only print the commands, do not execute them. -.IP \fB\-w\fP -suppress warning messages. -.IP \fB\-R\fP -.br -test for more compatibility with Kernighan & Ritchie C [1]. -.IP \fB\-M\fIcompiler\fR -.br -use \fIcompiler\fR as C-2 compiler instead of the default. -.LP -Object modules produced by cc(1) and -.I fcc -can be freely mixed, as long as the link-editor is called through -.I fcc. -.SH "SEE ALSO" -.IP [1] -B.W. Kernighan, D. Ritchie, "\fIThe C programming Language\fP", Prentice-Hall Inc., 1978 -.IP [2] -E.H. Baalbergen, "\fIThe ACK CEM compiler\fP". -.IP [3] -cc(1) unix manual page. -.SH DIAGNOSTICS -Diagnostics are intended to be self-explanatory. diff --git a/fcc/driver/fcc.c b/fcc/driver/fcc.c deleted file mode 100644 index 101fa3fce..000000000 --- a/fcc/driver/fcc.c +++ /dev/null @@ -1,676 +0,0 @@ -/* fcc - Driver for fast cc-compatible ACK C compiler. - - Derived from the C compiler driver from Minix. - - Compile this file with - cc -O -I/config driver.c - Install the resulting binaries in the EM bin directory. - Suggested name: fcc -*/ - -#ifdef sun3 -#define MACHNAME "m68020" -#define SYSNAME "sun3" -#endif - -#ifdef vax4 -#define MACHNAME "vax4" -#define SYSNAME "vax4" -#endif - -#include -#include -#include -#include -#if __STDC__ -#include -#else -#include -#endif - - -/* - Version producing cc-compatible .o files in one pass. -*/ -#define MAXARGC 256 /* maximum number of arguments allowed in a list */ -#define USTR_SIZE 128 /* maximum length of string variable */ - -typedef char USTRING[USTR_SIZE]; - -struct arglist { - int al_argc; - char *al_argv[MAXARGC]; -}; - -#define CPP_NAME "$H/lib.bin/cpp" -#define LD_NAME "/bin/ld" -#define AS_NAME "/bin/as" -#define SHELL "/bin/sh" - -char *CPP; -char *COMP; - -int kids = -1; -int ecount = 0; - -struct arglist CPP_FLAGS = { - 7, - { - "-Dunix", - "-D_EM_WSIZE=4", - "-D_EM_PSIZE=4", - "-D_EM_SSIZE=2", - "-D_EM_LSIZE=4", - "-D_EM_FSIZE=4", - "-D_EM_DSIZE=8", - } -}; - -struct arglist LD_HEAD = { -#ifdef sun3 - 8, - { - "-dc", - "-dp", - "-e", - "start", - "-X", - "-L/usr/lib/fsoft", - "/usr/lib/crt0.o", - "/usr/lib/Fcrt1.o" - } -#endif -#ifdef vax4 - 2, - { - "-X", - "/lib/crt0.o" - } -#endif -}; - -struct arglist LD_TAIL = { - 2, - { - "$H/lib/$S/tail_ext", - "-lc" - } -}; - -struct arglist LD_FLAGS; - -struct arglist COMP_FLAGS; - -char *o_FILE = "a.out"; /* default name for executable file */ - -#define remove(str) ((noexec || unlink(str)), (str)[0] = '\0') -#define cleanup(str) (str && str[0] && remove(str)) -#define init(al) ((al)->al_argc = 1) - -char ProgCall[128]; - -struct arglist SRCFILES; -struct arglist LDFILES; - -int RET_CODE = 0; - -struct arglist CALL_VEC; - -int o_flag = 0; -int c_flag = 0; -int v_flag = 0; -int O_flag = 0; - -#if __STDC__ -char *mkstr(char *, ...); -#else -char *mkstr(); -#endif -char *malloc(); -char *alloc(); -char *extension(); -char *expand_string(); - -USTRING ofile; -USTRING BASE; -USTRING tmp_file; - -int noexec = 0; - -extern char *strcat(), *strcpy(), *mktemp(), *strchr(); - -trapcc(sig) - int sig; -{ - signal(sig, SIG_IGN); - if (kids != -1) kill(kids, sig); - cleanup(ofile); - cleanup(tmp_file); - exit(1); -} - -#define lang_suffix() "c" -#define comp_name() "$H/lib.bin/c_cccompat" - -int -lang_opt(str) - char *str; -{ - switch(str[1]) { - case '-': /* debug options */ - case 'R': /* strict K&R */ - case 'w': /* disable warnings */ - append(&COMP_FLAGS, str); - return 1; - } - return 0; -} - -main(argc, argv) - char *argv[]; -{ - char *str; - char **argvec; - int count; - char *ext; - register struct arglist *call = &CALL_VEC; - char *file; - char *ldfile; - int compile_cnt = 0; - - setbuf(stdout, (char *) 0); - basename(*argv++,ProgCall); - - COMP = expand_string(comp_name()); - CPP = expand_string(CPP_NAME); - -#ifdef vax4 - append(&CPP_FLAGS, "-Dvax"); -#endif -#ifdef sun3 - append(&CPP_FLAGS, "-Dsun"); - append(&CPP_FLAGS, "-Dmc68020"); - append(&CPP_FLAGS, "-Dmc68000"); -#endif - - if (signal(SIGHUP, SIG_IGN) != SIG_IGN) - signal(SIGHUP, trapcc); - if (signal(SIGINT, SIG_IGN) != SIG_IGN) - signal(SIGINT, trapcc); - if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) - signal(SIGQUIT, trapcc); - while (--argc > 0) { - if (*(str = *argv++) != '-') { - append(&SRCFILES, str); - continue; - } - - if (lang_opt(str)) { - } - else switch (str[1]) { - - case 'c': /* stop after producing .o files */ - c_flag = 1; - break; - case 'D': /* preprocessor #define */ - case 'U': /* preprocessor #undef */ - append(&CPP_FLAGS, str); - break; - case 'I': /* include directory */ - append(&CPP_FLAGS, str); - break; - case 'g': /* debugger support */ - append(&COMP_FLAGS, str); - break; - case 'o': /* target file */ - if (argc-- >= 0) { - o_flag = 1; - o_FILE = *argv++; - ext = extension(o_FILE); - if (ext != o_FILE && ! strcmp(ext, lang_suffix()) - ) { - error("-o would overwrite %s", o_FILE); - } - } - break; - case 'u': /* mark identifier as undefined */ - append(&LD_FLAGS, str); - if (argc-- >= 0) - append(&LD_FLAGS, *argv++); - break; - case 'O': /* use built in peephole optimizer */ - O_flag = 1; - break; - case 'v': /* verbose */ - v_flag++; - if (str[2] == 'n') - noexec = 1; - break; - case 'l': /* library file */ - append(&SRCFILES, str); - break; - case 't': /* -target? */ - if (! strcmp(str, "-target")) { - if (argc-- >= 0) argv++; - break; - } - warning("%s flag ignored", str); - break; - case 'M': /* use other compiler (for testing) */ - strcpy(COMP, str+2); - break; - case 's': /* strip, -sun3? */ - if (! strcmp(str, "-sun3")) { - break; - } - /* fall through */ - case 'n': /* text not read-only */ - case 'N': /* text read-only */ - case 'r': /* relocation produced */ - case 'S': /* strip, but leave locals and globals */ - if (str[2] == '\0') { - append(&LD_FLAGS, str); - break; - } - /* fall through */ - default: - warning("%s flag ignored", str); - break; - } - } - - if (ecount) exit(1); - - count = SRCFILES.al_argc; - argvec = &(SRCFILES.al_argv[0]); - while (count-- > 0) { - ext = extension(*argvec); - if (*argvec[0] != '-' && - ext != *argvec++ && (! strcmp(ext, lang_suffix()) - )) { - compile_cnt++; - } - } - - if (compile_cnt > 1 && c_flag && o_flag) { - warning("-o flag ignored"); - o_flag = 0; - } - - append(&COMP_FLAGS, "-L"); - count = SRCFILES.al_argc; - argvec = &(SRCFILES.al_argv[0]); - while (count-- > 0) { - register char *f; - basename(file = *argvec++, BASE); - - ext = extension(file); - - if (file[0] != '-' && - ext != file && (!strcmp(ext, lang_suffix()) - )) { - if (compile_cnt > 1) printf("%s\n", file); - - ldfile = c_flag ? ofile : alloc((unsigned)strlen(BASE)+3); - if ( - !strcmp(ext, "s") && - needsprep(file)) { - strcpy(tmp_file, TMP_DIR); - strcat(tmp_file, "/F_XXXXXX"); - mktemp(tmp_file); - init(call); - append(call, CPP); - concat(call, &CPP_FLAGS); - append(call, file); - if (runvec(call, tmp_file)) { - file = tmp_file; - } - else { - remove(tmp_file); - tmp_file[0] = '\0'; - continue; - } - } - init(call); - if (o_flag && c_flag) { - f = o_FILE; - } - else f = mkstr(ldfile, BASE, ".", "o", (char *)0); - if (strcmp(ext, "s")) { - append(call, COMP); - concat(call, &CPP_FLAGS); - concat(call, &COMP_FLAGS); - append(call, file); - append(call, f); - } - else { - append(call, AS_NAME); - append(call, "-o"); - append(call, f); -#ifdef sun3 - append(call, "-mc68020"); -#endif - append(call, file); - } - if (runvec(call, (char *) 0)) { - file = f; - } - else { - remove(f); - continue; - } - cleanup(tmp_file); - tmp_file[0] = '\0'; - } - - else if (file[0] != '-' && - strcmp(ext, "o") && strcmp(ext, "a")) { - warning("file with unknown suffix (%s) passed to the loader", ext); - } - - if (c_flag) - continue; - - append(&LDFILES, file); - } - - /* *.s to a.out */ - if (RET_CODE == 0 && LDFILES.al_argc > 0) { - init(call); - expand(&LD_HEAD); - expand(&LD_TAIL); - append(call, expand_string(LD_NAME)); - concat(call, &LD_FLAGS); - append(call, "-o"); - append(call, o_FILE); - concat(call, &LD_HEAD); - concat(call, &LDFILES); - concat(call, &LD_TAIL); - if (! runvec(call, (char *) 0)) { - exit(RET_CODE); - } - } - exit(RET_CODE); -} - -needsprep(name) - char *name; -{ - int file; - char fc; - - file = open(name,0); - if (file < 0) return 0; - if (read(file, &fc, 1) != 1) fc = 0; - close(file); - return fc == '#'; -} - -char * -alloc(u) - unsigned u; -{ - char *p = malloc(u); - - if (p == 0) - panic("no space"); - return p; -} - -char * -expand_string(s) - char *s; -{ - char buf[1024]; - register char *p = s; - register char *q = &buf[0]; - int expanded = 0; - - if (!p) return p; - while (*p) { - if (*p == '$') { - p++; - expanded = 1; - switch(*p++) { - case 'H': - strcpy(q, EM_DIR); - break; - case 'M': - strcpy(q, MACHNAME); - break; - case 'S': - strcpy(q, SYSNAME); - break; - default: - panic("internal error"); - break; - } - while (*q) q++; - } - else *q++ = *p++; - } - if (! expanded) return s; - *q++ = '\0'; - p = alloc((unsigned int) (q - buf)); - return strcpy(p, buf); -} - -append(al, arg) - register struct arglist *al; - char *arg; -{ - if (!arg || !*arg) return; - if (al->al_argc >= MAXARGC) - panic("argument list overflow"); - al->al_argv[(al->al_argc)++] = arg; -} - -expand(al) - register struct arglist *al; -{ - register int i = al->al_argc; - register char **p = &(al->al_argv[0]); - - while (i-- > 0) { - *p = expand_string(*p); - p++; - } -} - -concat(al1, al2) - struct arglist *al1, *al2; -{ - register i = al2->al_argc; - register char **p = &(al1->al_argv[al1->al_argc]); - register char **q = &(al2->al_argv[0]); - - if ((al1->al_argc += i) >= MAXARGC) - panic("argument list overflow"); - while (i-- > 0) { - *p++ = *q++; - } -} - -#if __STDC__ -/*VARARGS*/ -char * -mkstr(char *dst, ...) -{ - va_list ap; - - va_start(ap, dst); - { - register char *p; - register char *q; - - q = dst; - p = va_arg(ap, char *); - - while (p) { - while (*q++ = *p++); - q--; - p = va_arg(ap, char *); - } - } - va_end(ap); - - return dst; -} -#else -/*VARARGS*/ -char * -mkstr(va_alist) - va_dcl -{ - va_list ap; - char *dst; - - va_start(ap); - { - register char *p; - register char *q; - - dst = q = va_arg(ap, char *); - p = va_arg(ap, char *); - - while (p) { - while (*q++ = *p++); - q--; - p = va_arg(ap, char *); - } - } - va_end(ap); - - return dst; -} -#endif - -basename(str, dst) - char *str; - register char *dst; -{ - register char *p1 = str; - register char *p2 = p1; - - while (*p1) - if (*p1++ == '/') - p2 = p1; - p1--; - while (*p1 != '.' && p1 >= p2) p1--; - if (p1 >= p2) { - *p1 = '\0'; - while (*dst++ = *p2++); - *p1 = '.'; - } - else - while (*dst++ = *p2++); -} - -char * -extension(fn) - char *fn; -{ - register char *c = fn; - - while (*c++) ; - while (*--c != '.' && c >= fn) { } - if (c++ < fn || !*c) return fn; - return c; -} - -runvec(vec, outp) - struct arglist *vec; - char *outp; -{ - int pid, status; - - if (v_flag) { - pr_vec(vec); - putc('\n', stderr); - } - if ((pid = fork()) == 0) { /* start up the process */ - if (outp) { /* redirect standard output */ - close(1); - if (creat(outp, 0666) != 1) - panic("cannot create output file"); - } - ex_vec(vec); - } - if (pid == -1) - panic("no more processes"); - kids = pid; - wait(&status); - if (status) switch(status & 0177) { - case SIGHUP: - case SIGINT: - case SIGQUIT: - case SIGTERM: - case 0: - break; - default: - error("%s died with signal %d\n", vec->al_argv[1], status&0177); - } - kids = -1; - return status ? ((RET_CODE = 1), 0) : 1; -} - -/*VARARGS1*/ -error(str, s1, s2) - char *str, *s1, *s2; -{ - fprintf(stderr, "%s: ", ProgCall); - fprintf(stderr, str, s1, s2); - putc('\n', stderr); - ecount++; -} - -/*VARARGS1*/ -warning(str, s1, s2) - char *str, *s1, *s2; -{ - fprintf(stderr, "%s: (warning) ", ProgCall); - fprintf(stderr, str, s1, s2); - putc('\n', stderr); -} - -panic(str) - char *str; -{ - error(str); - trapcc(SIGINT); -} - -pr_vec(vec) - register struct arglist *vec; -{ - register char **ap = &vec->al_argv[1]; - - vec->al_argv[vec->al_argc] = 0; - fprintf(stderr, "%s", *ap); - while (*++ap) { - fprintf(stderr, " %s", *ap); - } -} - -extern int errno; - -ex_vec(vec) - register struct arglist *vec; -{ - if (noexec) - exit(0); - vec->al_argv[vec->al_argc] = 0; - execv(vec->al_argv[1], &(vec->al_argv[1])); - if (errno == ENOEXEC) { /* not an a.out, try it with the SHELL */ - vec->al_argv[0] = SHELL; - execv(SHELL, &(vec->al_argv[0])); - } - if (access(vec->al_argv[1], 1) == 0) { - /* File is executable. */ - error("cannot execute %s", vec->al_argv[1]); - } else { - error("%s is not executable", vec->al_argv[1]); - } - exit(1); -} diff --git a/fcc/driver/proto.make b/fcc/driver/proto.make deleted file mode 100644 index 46c72a361..000000000 --- a/fcc/driver/proto.make +++ /dev/null @@ -1,38 +0,0 @@ -# $Id$ - -#PARAMS do not remove this line! - -SRC_DIR = $(SRC_HOME)/fcc/driver -INCLUDES = -I$(TARGET_HOME)/config -CFLAGS = $(COPTIONS) $(INCLUDES) -LINTFLAGS = $(LINTOPTIONS) $(INCLUDES) -LDFLAGS = $(LDOPTIONS) - -all: fcc - -install: all - cp fcc $(TARGET_HOME)/bin - if [ $(DO_MACHINE_INDEP) = y ] ; \ - then mk_manpage $(SRC_DIR)/fcc.1 $(TARGET_HOME) ; \ - fi - -cmp: all - -cmp fcc $(TARGET_HOME)/bin/fcc - -pr: - @pr $(SRC_DIR)/proto.make $(SRC_DIR)/fcc.c - -opr: - make pr | opr - -clean: - rm -f *.$(SUF) fcc Out - -lint: - $(LINT) $(LINTFLAGS) -D`ack_sys` $(SRC_DIR)/driver.c - -fcc.$(SUF): $(SRC_DIR)/fcc.c $(TARGET_HOME)/config/em_path.h - $(CC) $(CFLAGS) -c -D$(MACH) -D`ack_sys` $(SRC_DIR)/fcc.c - -fcc: fcc.$(SUF) - $(CC) $(LDFLAGS) -o fcc fcc.$(SUF) diff --git a/first/.distr b/first/.distr deleted file mode 100644 index ed9c6a4ef..000000000 --- a/first/.distr +++ /dev/null @@ -1,18 +0,0 @@ -create_dir -cp_dir -em_path.h.src -first -get_answer -get_makepars -get_sys -get_sysvax -install_tail -limit_enquire -limit_impl -lint_params -local.h.src -mk_config -mk_makefile -mk_target -target_comp -util_comp diff --git a/first/cc.xenix.src b/first/cc.xenix.src deleted file mode 100644 index b178c0812..000000000 --- a/first/cc.xenix.src +++ /dev/null @@ -1,70 +0,0 @@ -trap "rm -f x$$.c" 0 1 2 3 15 -EMHOME=/usr/em -CFLAG=0 -TARGET=a.out -while : -do - case $# in - 0) break;; - esac - case $1 in - -I*|-D*|-U*) - PREP=$PREP" "$1 - ;; - -c) CFLAG=1 - ;; - -o) shift - TARGET=$1 - ;; - -F) shift - LFLAG="-F $1" - ;; - -*) FLAGS=$FLAGS" "$1 - ;; - *) ARG=$ARG" "$1 - ;; - esac - shift -done -for i in $ARG -do - case $i in - *.c) - nm=`basename $i .c` - if [ -x $EMHOME/lib/cpp ] - then - cpp=$EMHOME/lib/cpp - cppf=-P - else - cpp=/bin/cc - cppf=-E - fi - if $cpp $cppf $PREP $i > x$$.c && /bin/cc $FLAGS -c x$$.c - then - mv x$$.o $nm.o - LDARG=$LDARG" "$nm.o - else - rm -f x$$.c - exit 1 - fi - rm -f x$$.c - ;; - *.s) - if /bin/cc $FLAGS -c $i - then - LDARG=$LDARG" "`basename $i .s`.o - else exit 1 - fi - ;; - *) LDARG=$LDARG" "$i - ;; - esac -done -case $CFLAG in -1) ;; -*) if /bin/cc $FLAGS $LFLAG $LDARG -o $TARGET - then : - else exit 1 - fi - ;; -esac diff --git a/first/ckpath b/first/ckpath deleted file mode 100644 index 57eb47235..000000000 --- a/first/ckpath +++ /dev/null @@ -1,67 +0,0 @@ -rm -f ../bin/x_tpath x_tpath -echo 'Checking out your $PATH; . and $ACK/bin should be in front ...' -echo "echo $$" >../bin/x_tpath -rm -f x_tpath -chmod +x ../bin/x_tpath -case x`(x_tpath) 2>/dev/null` -in -x$$) - STAT=0 ;; -x) - (cd ../bin ; echo Sorry, `pwd` is not in your shell PATH" ($PATH)") - STAT=1 ;; -*) - echo "Sorry, there is something wrong with your PATH ($PATH)" ;; -esac -echo "echo t_$$" > X_Y_Z_ -chmod +x X_Y_Z_ -case x`X_Y_Z_` -in -xt_$$) - ;; -x) - (cd ../bin ; echo Sorry, . is not in your shell PATH" ($PATH)") - STAT=2 ;; -*) - echo "Sorry, there is something wrong with your PATH ($PATH)" ;; -esac -rm -f X_Y_Z_ -case $STAT -in -2) - ;; -*) - hash -r ;; -esac -echo "echo l_$$" >x_tpath -chmod +x x_tpath -case x`(x_tpath) 2>/dev/null` -in -xl_$$) - ;; -x) - (cd ../bin ; echo Sorry, . is not in your shell PATH" ($PATH)") - STAT=2 ;; -x$$) - echo Sorry, . is not in your PATH" ($PATH)" or after the ACK bin directory - STAT=3 ;; -*) - echo "Sorry, there is something wrong with your PATH ($PATH)" - STAT=4 ;; -esac -rm -f ../bin/x_tpath x_tpath -echo "echo 93" > ../bin/cat -chmod +x ../bin/cat -hash -r -case x`cat < /dev/null 2>/dev/null` -in -x93) - rm -f ../bin/cat - ;; -*) - rm -f ../bin/cat - (cd ../bin ; echo Sorry, `pwd` comes too late in your PATH" ($PATH)" ) - STAT=13 - ;; -esac -exit $STAT diff --git a/first/cp_dir b/first/cp_dir deleted file mode 100755 index 3bc1b28a7..000000000 --- a/first/cp_dir +++ /dev/null @@ -1,16 +0,0 @@ -set -e -trap "rm -f /tmp/xx$$" 0 1 2 3 15 -case $2 in -/*) target_dir=$2 - ;; -*) target_dir=`pwd`/$2 - ;; -esac -cd $1 -tar cf /tmp/xx$$ . -if [ -d $target_dir ] -then : -else mkdir $target_dir -fi -cd $target_dir -tar xf /tmp/xx$$ diff --git a/first/create_dir b/first/create_dir deleted file mode 100755 index e805fd6e7..000000000 --- a/first/create_dir +++ /dev/null @@ -1,11 +0,0 @@ -if ( cd $1 ) 2>/dev/null -then - : -elif mkdir $1 2>/dev/null -then - : -else - echo $0: could not create directory $1 1>&2 - exit 1 -fi -exit 0 diff --git a/first/did_first b/first/did_first deleted file mode 100755 index f0d920a97..000000000 --- a/first/did_first +++ /dev/null @@ -1,7 +0,0 @@ -if (ack_sys ) >/dev/null 2>&1 -then - exit 0 -else - echo "You need to run 'first' first" - exit 1 -fi diff --git a/first/em_path.h.src b/first/em_path.h.src deleted file mode 100644 index 3da9a8acc..000000000 --- a/first/em_path.h.src +++ /dev/null @@ -1,11 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* Intended as a common directory for ALL temporary files */ -#define TMP_DIR "/tmp" - -/* Access to the ACK tree and parts thereof */ -#define EM_DIR "/usr/em" /* The root directory for EM stuff */ -#define ACK_PATH "lib/descr" diff --git a/first/first b/first/first deleted file mode 100755 index 0bd2d5c83..000000000 --- a/first/first +++ /dev/null @@ -1,207 +0,0 @@ -#! /bin/sh -case $0 in -*/first) - FDIR=`expr $0 : '\(.*\)/first'` - ;; -first) FDIR=. - ;; -esac - -if echo 'a\c' | grep 'c' >/dev/null ; then - : We have BSD-like echo command. - E_FLAG='-n' - E_SFX='' -else - : We have SystemV-like echo command. - E_FLAG='' - E_SFX='\c' -fi -export E_FLAG E_SFX - -echo check write-ability of /tmp ... -if ( >/tmp/aaax.$$ ) -then - rm /tmp/aaax.$$ -else - echo /tmp must exist and be writable. - exit 2 -fi -echo "/tmp ok" - -if [ -f macros ] -then - . macros -fi - -: "Now get system name and directories" -. $FDIR/get_sys - -: "Take action according to the system used" -BM=1 -OLDACM=$ACM -OLDSYS=$SYS -case $SYSNAME in -vax_bsd4_1a) ACM=vax4 ; SYS=BSD4_1 ; BYTE_ORDER=0123 ; MACH=vax4 ;; -vax_bsd4_2) ACM=vax4 ; SYS=BSD4_2 ; BYTE_ORDER=0123 ; MACH=vax4 ;; -vax_sysV_2) ACM=vax4 ; SYS=SYS_5 ; BYTE_ORDER=0123 ; MACH=vax4 ;; -i386) ACM=i386 ; SYS=SYS_5 ; BYTE_ORDER=0123 ; MACH=i386 ;; -sun3) ACM=sun3 ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=m68020 ;; -sun2) ACM=sun2 ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=m68k4 ;; -m68_unisoft|m68k2) ACM=m68k2 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; -m68_sysV_0|mantra) ACM=mantra ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=m68k4 ;; -m68020) ACM=m68020 ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=m68020 ;; -sparc) ACM=sparc ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=sparc ;; -sparc_solaris) ACM=sparc_solaris ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=sparc_solaris ;; -i86) ACM=i86 ; SYS=SYS_5; BYTE_ORDER=0123 ; MACH=i86 ;; -xenix3) ACM=xenix3 ; SYS=SYS_5; BYTE_ORDER=0123 ; MACH=i86 ;; -minix) ACM=minix ; SYS=V7; BYTE_ORDER=0123 ; MACH=i86 ;; -pmds) ACM=pmds ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; -pmds4) ACM=pmds4 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k4 ;; -minixST) ACM=minixST ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; -m68k4) ACM=m68k4 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k4 ;; -*) ACM=XXX ; SYS=XXX ; BYTE_ORDER=XXX ; MACH=XXX ;; -esac - -: do not change the order in MACH_LIST. check limit_enquire first. -MACH_LIST="i86 xenix3 minix i386 6500 6800 6805 6809 i80 em22 em24 em44 m68k2 pmds minixST m68k4 pmds4 sun2 mantra m68020 sun3 sparc sparc_solaris ns pdp s2650 vax4 z80 z8000 arm" - -while : -do - for i in $MACH_LIST - do - if [ $i = "$ACM" ] - then break - fi - done - if [ $i = "$ACM" ] - then break - fi - echo "This installation script has no knowledge about $SYSNAME. -You will have to specify the default machine that you want ACK to -compile for. Choices:" - l= - x= - for i in $MACH_LIST - do - l="$l $i" - x=x$x - case $x in - xxxxxxxxxx) echo $l - x= - l= - ;; - esac - done - ACM=$OLDACM - echo $l - echo $E_FLAG "Your choice: [$OLDACM] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$ACM";; - esac - ACM="$ANS" -done -while : -do -case $SYS in -V7|BSD4_1|BSD4_2|SYS_5) - break - ;; -*) - SYS=$OLDSYS - echo 'What kind of Unix is the target system running? -Choices: - V7 for Unix V7, BSD 2.* - BSD4_1 for Berkeley 4.1 - BSD4_2 for Berkeley 4.2, 4.3, SunOs 3, SunOs 4 - SYS_5 for Xenix, System III, System V, SunOs 5' - echo $E_FLAG "Your choice (V7|BSD4_1|BSD4_2|SYS_5): [$OLDSYS] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$SYS";; - esac - SYS="$ANS" - ;; -esac -done -rm -f local.h -sed -e /ACKM/s/'".*"'/'"'$ACM'"'/ -e /BIGMACH/s/'[01]'/$BM/ -e /SYSTEM/s/'^#[ ]*define[ ]*[a-zA-Z_][a-zA-Z0-9_]*'/"# define $SYS"/ < $FDIR/local.h.src >local.h -case $BYTE_ORDER in -XXX) ;; -*) echo '/* Optional definition of BYTE_ORDER: */' >> local.h - echo "#define BYTE_ORDER 0x$BYTE_ORDER" >> local.h - ;; -esac - -echo "Your default machine to compile for is $ACM" - -echo "Installation of the complete ACK takes a long time. Limiting the -number of languages, runtime libraries, back-ends, and assemblers to be -installed may save a lot of time. If you want to install everything, -answer no to the next question. Otherwise, answer yes, and you will be -prompted for details" -while : -do - echo $E_FLAG "Do you want to limit the installation in any way? (y/n) [$LIMIT] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$LIMIT";; - esac - case X$ANS in - Xj*|Xy|X) - LIMIT=y - . $FDIR/limit_enquire - break - ;; - Xn*) . $FDIR/get_sysvax - LIMIT=n - DO_MACHINE_INDEP=y - DO_FAST=n - DISABLE_LANG= - DISABLE_SUP= - if [ $TARGET_HOME = $UTIL_HOME ] - then - case $ACM in - sun3|m68020|i386|vax*) - DO_FAST=y - ;; - esac - fi - break - ;; - *) echo "I do not understand your answer ($ANS). Try again." - ;; - esac -done - -. $FDIR/get_makepars - -echo "TARGET_HOME=\"$TARGET_HOME\"; export TARGET_HOME" > macros -echo "UTIL_HOME=\"$UTIL_HOME\"; export UTIL_HOME" >> macros -echo "SRC_HOME=\"$SRC_HOME\"; export SRC_HOME" >> macros -echo "SYSNAME=\"$SYSNAME\"; export SYSNAME" >> macros -echo "ACM=\"$ACM\"; export ACM" >> macros -echo "CURRENT=\"$CURRENT\"; export CURRENT" >> macros -echo "SYS=\"$SYS\"; export SYS" >> macros -echo "LIMIT=\"$LIMIT\"; export LIMIT" >> macros -echo "CONFIG=\"$CONFIG\"; export CONFIG" >> macros -echo "DISABLE_LANG=\"$DISABLE_LANG\"; export DISABLE_LANG" >> macros -echo "DISABLE_SUP=\"$DISABLE_SUP\"; export DISABLE_SUP" >> macros -echo "DO_MACHINE_INDEP=\"$DO_MACHINE_INDEP\"; export DO_MACHINE_INDEP" >> macros -echo "MACH_LIST=\"$MACH_LIST\"; export MACH_LIST" >> macros -echo "SYSVAX=\"$SYSVAX\"; export SYSVAX" >> macros -echo "MACH=\"$MACH\"; export MACH" >> macros -echo "WS=\"$WS\"; export WS" >> macros -echo "PS=\"$PS\"; export PS" >> macros -echo "DO_FAST=\"$DO_FAST\"; export DO_FAST" >> macros - -cat macros $FDIR/install_tail > INSTALL -chmod +x INSTALL - -echo " -A shell-script called 'INSTALL' has been created. Running it -installs ACK. Note that this may take a (very) long time, so run it -in the background, with its output redirected, f.i.: - sh INSTALL > INSTALL.out 2>&1 & -" -exit 0 diff --git a/first/fixlexlib b/first/fixlexlib deleted file mode 100755 index f7c53825c..000000000 --- a/first/fixlexlib +++ /dev/null @@ -1,75 +0,0 @@ -FL=succes -TRIES= -case X$# in -X0) - if (.Xlex) > /dev/null 2>&1 - then - TRY=`.Xlex` - else TRY=-lln - fi - echo "trying to find your lex library ..." - cat > x.l <<'EOF' -%% -[A-Z] putchar(yytext[0]+'a'-'A'); -EOF - if lex x.l > /dev/null 2>&1 && cc -c lex.yy.c > /dev/null 2>&1 - then : - else echo "Sorry, your lex does not seem to work" - exit 2 - fi - cat > trylib <<'EOF' -if cc lex.yy.o $1 > /dev/null 2>&1 -then - rm -f lex.yy.* a.out - exit 0 -else - exit 1 -fi -EOF - if sh trylib $TRY - then - LEX=$TRY - else - exec $0 -ll $TRY - fi - ;; -*) if sh trylib $1 - then - LEX=$1 - else - TRIES="$2 and $1" - FL=fail - fi - ;; -esac -case X$FL in -Xfail) echo 'What option do I have to give to cc to get the LEX library?' - echo "I tried " $TRIES "but these don't seem to work." - echo -n 'LEX library option: ' - if read ANSWER - then : - else echo "Sorry, got EOF while reading your answer" - exit 9 - fi - exec $0 $ANSWER "$TRIES" - ;; -Xsucces) - for i in ../util/opt ../util/cgg ../util/ncgg ../lang/occam/comp ../modules/src/em_opt ../util/ceg/as_parser - do - ( cd $i - cp Makefile makefile - ed - makefile << EOF -/^LEXLIB/c -LEXLIB = $LEX -. -w -q -EOF - ) - done - ;; -esac -rm -f x.l trylib lex.yy.* -echo echo "$LEX" > .Xlex -chmod +x .Xlex -echo "apparently, \"cc ... $LEX\" works" diff --git a/first/get_answer b/first/get_answer deleted file mode 100755 index b610e24f6..000000000 --- a/first/get_answer +++ /dev/null @@ -1,5 +0,0 @@ -if read ANS -then echo -else echo "Sorry, got EOF when reading your answer" - exit 1 -fi diff --git a/first/get_makepars b/first/get_makepars deleted file mode 100755 index 36da886cd..000000000 --- a/first/get_makepars +++ /dev/null @@ -1,104 +0,0 @@ -know_target=0 -case $SYSNAME in -vax*|i386|sun*|sparc*|m68_sysV_0|m68020|mantra|pmds4|m68k4) - WS=4 ; PS=4 - know_target=1 - ;; -m68_unisoft|m68k2|minixST|pmds) - WS=2 ; PS=4 - know_target=1 - ;; -i86|minix|xenix3) - WS=2 ; PS=2 - know_target=1 - ;; -*) trap "rm -f ws.c ws.o a.out t$$" 0 1 2 3 15 - cat > ws.c <<'EOF' -#include -main() -{ - printf("WS=%d ; PS=%d\n", sizeof(int), sizeof(char *)); - exit(0); -} -EOF - if [ $TARGET_HOME = $UTIL_HOME ] && cc ws.c 2>/dev/null - then - : We can find out ourselves what the word-size and - : the pointer-size of the target machine is. - cc ws.c 2>/dev/null - ./a.out > t$$ - . t$$ - rm -f t$$ a.out ws.[co] - else - : we will have to ask installer. - echo $E_FLAG "Please give the word-size of the target-machine (sizeof(int)) in bytes: [$WS] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$WS";; - esac - WS="$ANS" - echo $E_FLAG "Please give the pointer-size of the target-machine (sizeof(char *)) in bytes: [$PS] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$PS";; - esac - PS="$ANS" - fi - ;; -esac - -echo "# Paths: -SRC_HOME = $SRC_HOME -TARGET_HOME = $TARGET_HOME -UTIL_HOME = $UTIL_HOME - -# Machine independent part created? -DO_MACHINE_INDEP = $DO_MACHINE_INDEP - -# Target machine, only needed for fast compilers -MACH = $MACH -" > make_macros - -if [ $TARGET_HOME = $UTIL_HOME ] -then - if [ -f /bin/ranlib -o -f /usr/bin/ranlib -o -f /usr/ucb/ranlib ] - then - sed "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" < $FDIR/target_comp >> make_macros - else - sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros - fi - case $ACM in - sun3|sparc) - ed -s make_macros <<'EOF' -/cc-and-mkdep.sun/s/^..// -w -q -EOF - ;; - esac - cat $FDIR/util_comp >> make_macros -else - case $know_target in - 1) sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "/cc-and-mkdep.ack/s/^..//" -e "s/^CC=cc/CC=acc -m$ACM/" -e "s/^# AR=aal/AR=aal/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros - ;; - *) if [ -f /bin/ranlib -o -f /usr/bin/ranlib -o -f /usr/ucb/ranlib ] - then - sed "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" < $FDIR/target_comp >> make_macros - else - sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros - fi - ;; - esac - sed "s/^#U/U/" < $FDIR/util_comp >> make_macros -fi - -cat $FDIR/lint_params >> make_macros - -echo "A file called 'make_macros' has been created. This file defines some -'make' variables that parameterize all Makefiles in ACK. You may want -to check it before attempting to actually install ACK." -case $know_target in -0) echo "In fact, this installation script does not know much about -your target machine, so expect some things to be wrong" - ;; -esac diff --git a/first/get_sys b/first/get_sys deleted file mode 100755 index 0ff18596a..000000000 --- a/first/get_sys +++ /dev/null @@ -1,155 +0,0 @@ -rm -f em_path.h -echo "You will now be asked for the root directory of the ACK sources. -This directory will not be changed by the installation process. -" -while : -do - echo $E_FLAG "Please give the root of the ACK source tree, -an absolute path: [$SRC_HOME] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$SRC_HOME" ;; - esac - SRC_HOME="$ANS" - case $SRC_HOME in - /*) break; - ;; - *) echo "$SRC_HOME is not an absolute path; try again" - ;; - esac -done -echo "You will now be asked for a configuration directory. This is -the directory in which the compilations will take place. The tree that -resides in it will have the same structure as the ACK source tree, but -the directories will usually only contain Makefiles and .o files. -" -while : -do - echo $E_FLAG "Please give the root of the configuration tree, -an absolute path: [$CONFIG] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$CONFIG";; - esac - CONFIG="$ANS" - case $CONFIG in - /*) break; - ;; - *) echo "$CONFIG is not an absolute path; try again" - ;; - esac -done -echo "You will now be asked for the root directory of the ACK binaries. After -installation, this directory will have subdirectories bin, lib, lib.bin, -man, h, config, include, modules, doc. -Four of these directories will contain stuff that depends on the machine -for which the ACK binaries are made: bin, modules, config, and lib.bin. The -other sub-directories (lib, man, h, include and doc) will contain -machine-independent stuff. -This information may be useful if you want to use ACK on different platforms -and you have a shared file system. See the installation manual. -" -while : -do - echo $E_FLAG "Please give the root of the ACK binaries, -an absolute path: [$TARGET_HOME] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$TARGET_HOME";; - esac - TARGET_HOME="$ANS" - case $TARGET_HOME in - /*) break; - ;; - *) echo "$TARGET_HOME is not an absolute path; try again" - ;; - esac -done -sed -e "/^#define[ ]*EM_DIR/s@\".*\"@\"$TARGET_HOME\"@" <$FDIR/em_path.h.src >em_path.h - -echo "You will now be asked for the type of the system that you want -ACK binaries produced for. This is not neccessarily the system you -run this program on. In this case, if you have not done so already, -you will have to install ACK on the current machine first. -" -echo "Give me the type of the system, the current choice is: -vax_bsd4_1a VAX11 with BSD4.1a -vax_bsd4_2 VAX11 with BSD4.2 -vax_sysV_2 VAX11 with System V.2 -i386 Intel 80386 system running Xenix System V -sun3 Sun 3 M68020 workstation -sun2 Sun 2 M68000 workstation -m68_sysV_0 Motorola 68000 with Uniplus System V.0 Unix -m68020 Motorola M68020 VME131 running Unix System V/68 R2V2.1 -sparc SUN SPARC workstation running SunOs 4 -sparc_solaris SUN SPARC workstation running solaris 2 -ANY Neither of the above -" -echo $E_FLAG "system type: [$SYSNAME] $E_SFX" -. $FDIR/get_answer -case $ANS in -'') ANS="$SYSNAME";; -esac -SYSNAME="$ANS" - -while : -do -echo $E_FLAG "Is this the system you are running on? (y/n) [$CURRENT] $E_SFX" -. $FDIR/get_answer -case $ANS in -'') ANS="$CURRENT";; -esac -case X$ANS in -Xj*|Xy*|X) UTIL_HOME=$TARGET_HOME - CURRENT=y - break - ;; -Xn*) CURRENT=n - echo "You will now be asked for the root directory of ACK on the current machine. -This tree will not be changed by the installation process. -" - while : - do - echo $E_FLAG "Please give the root of a runnable ACK tree, -an absolute path: [$UTIL_HOME] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$UTIL_HOME" ;; - esac - UTIL_HOME="$ANS" - case $UTIL_HOME in - /*) break; - ;; - *) echo "$UTIL_HOME is not an absolute path; try again" - ;; - esac - done - break - ;; -*) echo "I do not understand your answer ($ANS). Try again." - ;; -esac -done - -while : -do -echo "The system to install ACK for is $SYSNAME, -the root of the ACK source tree is $SRC_HOME, -the root of the configuration tree is $CONFIG, -the root of the ACK binary tree to be created is $TARGET_HOME, -and the root of a runnable ACK binary tree is $UTIL_HOME. -If the machine to compile ACK for is the current machine, the last two names -may be identical." -echo $E_FLAG "Are you satisfied with all this? (y/n) $E_SFX" -. $FDIR/get_answer -case X$ANS in -Xj*|Xy*|X) break - ;; -Xn*) echo Ok, I will give you another chance.... - . $0 - break - ;; -*) echo "I do not understand your answer ($ANS). Try again." - ;; -esac -done diff --git a/first/get_sysvax b/first/get_sysvax deleted file mode 100755 index eee8f1271..000000000 --- a/first/get_sysvax +++ /dev/null @@ -1,21 +0,0 @@ -while : -do - echo $E_FLAG "Which system-call library do you want to install for the VAX? -You can choose between - libbsd4_1a for Berkeley Unix 4.1 - libbsd4_2 for Berkeley Unix 4.2 or newer, or Ultrix - libsysV_2 for Unix System V -Your choice: [$SYSVAX] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$SYSVAX";; - esac - SYSVAX="$ANS" - case $SYSVAX in - libbsd4_1a|libbsd4_2|libsysV_2) - break - ;; - *) echo "I do not understand your answer ($SYSVAX). Try again" - ;; - esac -done diff --git a/first/hash b/first/hash deleted file mode 100755 index e69de29bb..000000000 diff --git a/first/install_tail b/first/install_tail deleted file mode 100644 index 5e644ce8a..000000000 --- a/first/install_tail +++ /dev/null @@ -1,84 +0,0 @@ -set -e - -PATH=::$CONFIG/bin:$UTIL_HOME/bin:/usr/ccs/bin:/usr/ucb:$PATH -export PATH - -$SRC_HOME/first/create_dir $CONFIG -$SRC_HOME/first/create_dir $CONFIG/bin - -echo "PATH=:$CONFIG/bin:$UTIL_HOME/bin:$PATH; export PATH" > $CONFIG/bin/TakeAction -cat $SRC_HOME/TakeAction >> $CONFIG/bin/TakeAction -sed '/^#PARAMS/r make_macros' < $SRC_HOME/first/mk_makefile > $CONFIG/bin/mk_makefile -cp $SRC_HOME/first/create_dir $CONFIG/bin/create_dir -cp $SRC_HOME/first/cp_dir $CONFIG/bin/cp_dir -chmod +x $CONFIG/bin/* - -$SRC_HOME/first/mk_config - -$SRC_HOME/first/mk_target - -$SRC_HOME/first/limit_impl - -case $SYSNAME in -i386) - ed -s $TARGET_HOME/lib/descr/fe << 'ABC' -1,$s/-D{NAME}/-D{NAME} -DNO_PROTOTYPE/ -w -q -ABC - ;; -esac - -: find varargs include file -: if not present use our own - -if test -f /usr/include/varargs.h -then - : -else - cp $SRC_HOME/include/_tail_cc/varargs.h $TARGET_HOME/modules/h -fi - -case X$SYSVAX in -Xvax_sysV_2) - ed -s $TARGET_HOME/lib/vax4/descr << 'ABC' -/CPP_F/s/$/ -D__USG/ -w -q -ABC - ed -s $CONFIG/mach/vax4/Action << 'ABC' -/libbsd4_2/s/libbsd4_2/libsysV_2/ -w -q -ABC - ( cd $CONFIG/mach/vax4 - for i in libcc libcc.ansi - do - ed -s $i/Makefile << 'ABC' -/BFS/s/BFS/UFS/ -w -q -ABC - done - ) - ;; -Xvax_bsd4_2) - ed -s $TARGET_HOME/lib/vax4/descr << 'ABC' -/CPP_F/s/$/ -D__BSD4_2/ -w -q -ABC - ;; -Xvax_bsd4_1a) - ed -s $CONFIG/mach/vax4/Action << 'ABC' -/libbsd4_2/s/libbsd4_2/libbsd4_1a/ -w -q -ABC - ;; -esac - -: and finally installing ... -cd $CONFIG -set +e -exec TakeAction diff --git a/first/limit_enquire b/first/limit_enquire deleted file mode 100644 index 8d9173d0b..000000000 --- a/first/limit_enquire +++ /dev/null @@ -1,207 +0,0 @@ -while : -do - echo "The libraries will end up in the machine-independent part of the -ACK binary tree. You may already have them from a previous ACK installation -on a different machine, in particular if you have an NFS file system. -Therefore, it may not be neccessary to install them again. As this part -of the ACK installation takes the most time, you are given the opportunity -to disable installation of the machine-independent part" - echo $E_FLAG \ - "Do you want to install the machine-independent part? (y/n) [$DO_MACHINE_INDEP] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$DO_MACHINE_INDEP" ;; - esac - case X$ANS in - Xj*|Xy*|X) DO_MACHINE_INDEP=y - echo "machine-independent part will be installed" - echo - break - ;; - Xn*) DO_MACHINE_INDEP=n - echo "machine-independent part will not be installed" - echo - break - ;; - *) echo "I do not understand your answer ($ANS). Try again." - ;; - esac -done -OLD_DIS_LANG="$DISABLE_LANG" -DISABLE_LANG= -case X$OLD_DIS_LANG in -X) ;; -*) set $OLD_DIS_LANG - ;; -esac -for i in Modula-2 Pascal Occam Basic ANSI-C C Fortran -do - DEF=y - if [ $# != 0 -a X$i = X$1 ] - then - DEF=n - shift - fi - while : - do - echo $E_FLAG "Do you want to install $i? (y/n) [$DEF] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$DEF";; - esac - case X$ANS in - Xj*|Xy*) - echo "$i will be installed" - echo - break - ;; - Xn*) DISABLE_LANG=$DISABLE_LANG" $i" - echo "$i will not be installed" - echo - break - ;; - *) echo "I do not understand your answer ($ANS). Try again." - ;; - esac - done -done -OLD_DIS_SUP="$DISABLE_SUP" -DISABLE_SUP= -set $MACH_LIST -while test $# != 0 -do - DEF=y - for i in $OLD_DIS_SUP - do - if [ X$i = X$1 ] - then - DEF=n - break - fi - done - while : - do - case $1 in - i86) echo "not installing i86 will disable installation of xenix3 and minix." - ;; - m68k2) echo "not installing m68k2 will disable installation of pmds, minixST, -m68k4, pmds4, sun2, and mantra." - ;; - m68k4) echo "not installing m68k4 will disable installation of pmds4, sun2, and mantra." - ;; - m68020) echo "not installing m68020 will disable installation of sun3." - ;; - esac - echo $E_FLAG "Do you want to install the $1 support? (y/n) [$DEF] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$DEF";; - esac - case X$ANS in - Xj*|Xy*) - echo "The $1 support will be installed" - echo - case $1 in - vax4) case $SYSNAME in - vax_bsd4_1a) SYSVAX=libbsd4_1a - ;; - vax_bsd4_2) SYSVAX=libbsd4_2 - ;; - vax_sysV_2) SYSVAX=libsysV_2 - ;; - *) if [ $DO_MACHINE_INDEP = y ] - then - . $FDIR/get_sysvax - fi - ;; - esac - ;; - sparc) shift - ;; - esac - break - ;; - Xn*) DISABLE_SUP=$DISABLE_SUP" $1" - echo "The $1 support will not be installed" - echo - case $1 in - i86) - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - ;; - m68k2) - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - ;; - m68k4) - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - shift - DISABLE_SUP=$DISABLE_SUP" $1" - ;; - sparc) - shift - DISABLE_SUP=$DISABLE_SUP" $1" - ;; - m68020) - shift - DISABLE_SUP=$DISABLE_SUP" $1" - ;; - esac - break - ;; - *) echo "I do not understand your answer ($ANS). Try again." - ;; - esac - done - shift -done - -if [ $TARGET_HOME = $UTIL_HOME ] -then - case $ACM in - sun3|m68020|i386|vax*) - while : - do - echo $E_FLAG "Do you want to install the fast compilers? (y/n) [$DO_FAST] $E_SFX" - . $FDIR/get_answer - case $ANS in - '') ANS="$DO_FAST" ;; - esac - case X$ANS in - Xj*|Xy*) - DO_FAST=y - echo "The fast compilers will be installed" - break - ;; - Xn*) - DO_FAST=n - echo "The fast compilers will not be installed" - break - ;; - *) echo "I do not understand your answer ($ANS). Try again." - ;; - esac - done - ;; - *) - DO_FAST=n - ;; - esac -else - DO_FAST=n -fi diff --git a/first/limit_impl b/first/limit_impl deleted file mode 100755 index 2314ab7a0..000000000 --- a/first/limit_impl +++ /dev/null @@ -1,206 +0,0 @@ -for i in $DISABLE_LANG -do - ed -s $CONFIG/Action <> Action <> Action < $CONFIG/bin/ack_sys -chmod +x $CONFIG/bin/ack_sys - -cd $SRC_HOME -find . -type d -perm -555 -print > $CONFIG/dir_list - -cd $CONFIG -for i in mach/*/libsys -do - rm -rf $i -done - -for i in `cat dir_list` -do - create_dir $i - rm -f $i/No* - if [ -f $i/Makefile ] - then - ( cd $i ; if make clean ; then exit 0 ; else exit 0 ; fi ) > /dev/null 2>&1 - fi - if [ -f $SRC_HOME/$i/proto.make ] - then mk_makefile $SRC_HOME/$i/proto.make > $i/Makefile - fi - if [ -f $SRC_HOME/$i/Action ] - then - cd $SRC_HOME/$i - cp Action* $CONFIG/$i - chmod +w $CONFIG/$i/Action* - cd $CONFIG - fi -done - -cd $CONFIG - -for i in lang/cem/cemcom.ansi lang/cem/cemcom lang/m2/comp -do - cp $SRC_HOME/$i/BigPars $CONFIG/$i/Parameters - chmod +w $CONFIG/$i/Parameters -done -for i in lang/pc/comp lang/cem/cpp.ansi -do - cp $SRC_HOME/$i/Parameters $CONFIG/$i/Parameters - chmod +w $CONFIG/$i/Parameters -done - -cd $CONFIG/mach -for i in * -do - if [ -d $i ] - then - if [ -d $i/as ] - then - cd $i/as - mk_makefile $SRC_HOME/mach/proto/as/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile - cd ../.. - fi - if [ -d $i/top ] - then - cd $i/top - mk_makefile $SRC_HOME/mach/proto/top/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile - cd ../.. - fi - if [ -d $i/cg ] - then - cd $i/cg - mk_makefile $SRC_HOME/mach/proto/cg/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile - cd ../.. - fi - if [ -d $i/ncg ] - then - cd $i/ncg - mk_makefile $SRC_HOME/mach/proto/ncg/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile - if [ -f $SRC_HOME/mach/$i/ncg/table_dir ] - then - ed -s Makefile < Makefile - cd ../.. - fi - done - for j in libbsd4_1a libbsd4_2 libsysV_2 - do - if [ -d $i/$j ] - then - cd $i/$j - mk_makefile $SRC_HOME/mach/proto/libg/proto.libsys | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" -e "s/libsys/$j/g" > Makefile - cd ../.. - fi - done - for j in libcc libcc.ansi libm2 libpc libbc liboc libf77 - do - create_dir $i/$j - rm -f $i/$j/No* - cd $i/$j - mk_makefile $SRC_HOME/mach/proto/libg/proto.$j | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" > Makefile - cd ../.. - done - if [ $i = vax4 ] - then : - elif [ -d $i/libsys ] - then : - else - create_dir $i/libsys - rm -f $i/libsys/No* - cd $i/libsys - mk_makefile $SRC_HOME/mach/proto/libg/proto.sysmon | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" > Makefile - cd ../.. - fi - fi -done diff --git a/first/mk_makefile b/first/mk_makefile deleted file mode 100755 index 97435474b..000000000 --- a/first/mk_makefile +++ /dev/null @@ -1,34 +0,0 @@ -: '$Id$' - -: This shell script inserts make macros after a line -: starting with #PARAMS in "make_proto", and produces the result on -: standard output. - -trap "rm -f /tmp/mk_mak$$" 0 1 2 3 15 -case $# in -1) ;; -*) echo "Usage: $0 " 1>&2 - exit 1 - ;; -esac - -cp $1 /tmp/mk_mak$$ - -ed -s /tmp/mk_mak$$ << 'EOF' -/^#PARAMS/c -#PARAMS do not remove this line! -. -w -q -EOF -case `ack_sys` in -sparc_solaris) - ed -s /tmp/mk_mak$$ << 'EOF' -g/^EXTRALIB/s/=/= -lelf/ -w -q -EOF - ;; -esac -cat /tmp/mk_mak$$ -exit 0 diff --git a/first/mk_target b/first/mk_target deleted file mode 100755 index 1bc50252c..000000000 --- a/first/mk_target +++ /dev/null @@ -1,39 +0,0 @@ -set -e - -: machine-dependant stuff - -create_dir $TARGET_HOME -create_dir $TARGET_HOME/config -create_dir $TARGET_HOME/lib.bin -create_dir $TARGET_HOME/modules -create_dir $TARGET_HOME/modules/h -create_dir $TARGET_HOME/bin - -cp local.h em_path.h $TARGET_HOME/config -cp_dir $SRC_HOME/bin $TARGET_HOME/bin -cp $CONFIG/bin/cp_dir $TARGET_HOME/bin/cp_dir -echo "echo $SYSNAME" > $TARGET_HOME/bin/ack_sys -chmod +x $TARGET_HOME/bin/ack_sys - -: machine-independant stuff - -if [ $DO_MACHINE_INDEP = n ] -then - exit 0 -fi - -create_dir $TARGET_HOME/lib -create_dir $TARGET_HOME/etc -create_dir $TARGET_HOME/h -create_dir $TARGET_HOME/include -create_dir $TARGET_HOME/doc - -cp $SRC_HOME/etc/ip_spec.t $TARGET_HOME/etc/ip_spec.t -cp_dir $SRC_HOME/lib $TARGET_HOME/lib -cp_dir $SRC_HOME/h $TARGET_HOME/h -cp_dir $SRC_HOME/include $TARGET_HOME/include - -cd $TARGET_HOME -find . -type f -exec chmod +w {} \; - -exit 0 diff --git a/first/myecho.c b/first/myecho.c deleted file mode 100644 index d74c09a38..000000000 --- a/first/myecho.c +++ /dev/null @@ -1,21 +0,0 @@ -#include - -main(argc, argv) - int argc; - char *argv[]; -{ - int nflag = 0; - - if(argc > 1 && ! strncmp(argv[1], "-n", 2)) { - nflag++; - argc--; - argv++; - } - while (--argc > 0) { - fputs(argv[1], stdout); - argv++; - if (argc > 1) putchar(' '); - } - if (!nflag) putchar('\n'); - exit(0); -} diff --git a/first/target_comp b/first/target_comp deleted file mode 100644 index bd95b42a0..000000000 --- a/first/target_comp +++ /dev/null @@ -1,27 +0,0 @@ -# compiler set for target machine - -CC=cc# # compiler to be used for compiling ACK - -# always passed to $(CC) -c. -COPTIONS=-O -D_EM_WSIZE=4 -D_EM_PSIZE=4 - -# passed to $(CC) -c when compiling modules. -LIBOPTIONS=# -# LIBOPTIONS=-LIB -L # when $(CC) is ACK - -CC_AND_MKDEP=cc-and-mkdep.all# # when $(CC) is neither ACK or SUN, -# CC_AND_MKDEP=cc-and-mkdep.ack## when $(CC) is an ACK-derived C compiler, -# CC_AND_MKDEP=cc-and-mkdep.sun## when $(CC) is a SUN C compiler - -LDOPTIONS=# # always passed to $(CC) when linking - -SUF=o# # suffix of files produced with $(CC) -c - -AR=ar# # archiver for Unix format objects -# AR=aal# # archiver for ACK .o format objects -# AR=arch# # archiver for ACK .s format objects - -RANLIB=ranlib# # when ranlib required -# RANLIB=:# # when ranlib not required - -LIBSUF=a# # suffix of object libraries diff --git a/first/util_comp b/first/util_comp deleted file mode 100644 index 377d04664..000000000 --- a/first/util_comp +++ /dev/null @@ -1,23 +0,0 @@ - -# compiler set for producing runnable binaries (possibly using $(UTIL_HOME)). -# This must describe the compiler with which $(UTIL_HOME) has been compiled. -# If $(TARGET_HOME) is identical to $(UTIL_HOME), which usually will be -# the case, this part does not have to be changed. Otherwise (when you are -# cross-compiling ACK), you will have to change this part. Probable -# replacements are given in comments. Maybe the installation script -# has already changed them, but they should be checked to be sure. - -UCC=$(CC) -#UCC=cc# # compiler to be used - -UCOPTIONS=$(COPTIONS) -#UCOPTIONS=-O# # always passed to $(UCC) -c. - -ULDOPTIONS=$(LDOPTIONS) -#ULDOPTIONS=# # always passed to $(UCC) when linking - -USUF=$(SUF) -#USUF=o# # suffix of files produced with $(UCC) -c - -ULIBSUF=$(LIBSUF) -#ULIBSUF=a# # suffix of object libraries for $(UCC) diff --git a/h/.distr b/h/.distr deleted file mode 100644 index 1aa187a40..000000000 --- a/h/.distr +++ /dev/null @@ -1,27 +0,0 @@ -Makefile -arch.h -bc_io.h -bc_string.h -as_spec.h -cg_pattern.h -cgg_cg.h -em_abs.h -em_ego.h -em_flag.h -em_mes.h -em_mnem.h -em_pseu.h -em_ptyp.h -em_reg.h -em_spec.h -out.h -stb.h -pc_err.h -pc_file.h -pc_math.h -ranlib.h -ocm_chan.h -ocm_parco.h -ocm_proc.h -m2_traps.h -ip_spec.h diff --git a/h/Makefile b/h/Makefile deleted file mode 100644 index d94a71ac2..000000000 --- a/h/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -install cmp: - -opr: - make pr | opr - -pr: - @pr Makefile *.h - -clean: - -rm -f *.old diff --git a/h/arch.h b/h/arch.h deleted file mode 100644 index 9d6e99cd8..000000000 --- a/h/arch.h +++ /dev/null @@ -1,25 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ - -#ifndef __ARCH_H_INCLUDED -#define __ARCH_H_INCLUDED - -#define ARMAG 0177545 -#define AALMAG 0177454 - -struct ar_hdr { - char ar_name[14]; - long ar_date; - char ar_uid; - char ar_gid; - short ar_mode; - long ar_size; -}; - -#define AR_TOTAL 26 -#define AR_SIZE 22 - -#endif /* __ARCH_H_INCLUDED */ diff --git a/h/as_spec.h b/h/as_spec.h deleted file mode 100644 index f6f07c750..000000000 --- a/h/as_spec.h +++ /dev/null @@ -1,6 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -#define as_magic (sp_magic|(14<<8)) diff --git a/h/bc_io.h b/h/bc_io.h deleted file mode 100644 index 319afc750..000000000 --- a/h/bc_io.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -#include - -/* $Id$ */ - -/* BASIC file io definitions */ - -extern FILE *_chanrd; -extern FILE *_chanwr; -extern int _chann; -/* BASIC file descriptor table */ -/* Channel assignment: - -1 terminal IO - 0 data file - 1-15 user files -*/ - -/* FILE MODES:*/ -#define IMODE 1 -#define OMODE 2 -#define RMODE 3 - -typedef struct { - char *fname; - FILE *fd; - int pos; - int mode; - int reclength; - }Filedesc; -extern Filedesc _fdtable[16]; diff --git a/h/bc_string.h b/h/bc_string.h deleted file mode 100644 index 32430a1ee..000000000 --- a/h/bc_string.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -# - -/* $Id$ */ - -/* Strings are allocated in a fixed string descriptor table -** This mechanism is used to avoid string copying as much as possible -*/ - -typedef struct{ - char *strval; - int strcount; - int strlength; - } String; - -String *_newstr() ; - -#define MAXSTRING 1024 diff --git a/h/cg_pattern.h b/h/cg_pattern.h deleted file mode 100644 index 203c4c4e7..000000000 --- a/h/cg_pattern.h +++ /dev/null @@ -1,161 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* offsets of interesting fields in EM-pattern */ - -#define PO_HASH 0 -#define PO_NEXT 1 -#define PO_MATCH 3 - -#define ILLHASH 0177777 - -/* Escapes in printstrings */ - -#define PR_TOK '\001' -#define PR_TOKFLD '\002' -#define PR_EMINT '\003' -#define PR_EMSTR '\004' -#define PR_ALLREG '\005' -#define PR_SUBREG '\006' -/* - * In case this list gets longer remember to keep out printable nonprintables - * like \t \n \r and the like. - */ - -/* Commands for codegenerator, in low order 5 bits of byte */ - -#define DO_NEXTEM 0 -#define DO_MATCH 1 -#define DO_XMATCH 2 -#define DO_XXMATCH 3 -#define DO_REMOVE 4 -#define DO_DEALLOCATE 5 -#define DO_REALLOCATE 6 -#define DO_ALLOCATE 7 -#define DO_LOUTPUT 8 -#define DO_ROUTPUT 9 -#define DO_MOVE 10 -#define DO_ERASE 11 -#define DO_TOKREPLACE 12 -#define DO_EMREPLACE 13 -#define DO_COST 14 -#define DO_RETURN 15 -#define DO_COERC 16 -#define DO_PRETURN 17 -#define DO_RREMOVE 18 - -typedef struct instance { - int in_which; -# define IN_COPY 1 -# define IN_RIDENT 2 -# define IN_ALLOC 3 -# define IN_DESCR 4 -# define IN_REGVAR 5 - int in_info[TOKENSIZE+1]; -} inst_t,*inst_p; - -typedef struct { - int c_size; /* index in enode-table */ - int c_time; /* dito */ -} cost_t,*cost_p; - -typedef struct { - int m_set1; /* number of tokenexpr in move: from */ - int m_expr1; /* optional expression */ - int m_set2; /* number of tokenexpr in move: to */ - int m_expr2; /* optional expression */ - int m_cindex; /* code index to really do it */ - cost_t m_cost; /* associated cost */ -} move_t, *move_p; - -typedef struct { - int set_size; - short set_val[SETSIZE]; -} set_t,*set_p; - -struct exprnode { - short ex_operator; - short ex_lnode; - short ex_rnode; -}; -typedef struct exprnode node_t; -typedef struct exprnode *node_p; - -typedef struct { /* to stack coercions */ - int c1_texpno; /* token expression number */ - int c1_expr; /* boolean expression */ - int c1_prop; /* property of register needed */ - int c1_codep; /* code index */ - cost_t c1_cost; /* cost involved */ -} c1_t,*c1_p; - -#ifdef MAXSPLIT -typedef struct { /* splitting coercions */ - int c2_texpno; /* token expression number */ - int c2_nsplit; /* split factor */ - int c2_repl[MAXSPLIT]; /* replacement instances */ - int c2_codep; /* code index */ -} c2_t,*c2_p; -#endif /* MAXSPLIT */ - -typedef struct { /* one to one coercions */ - int c3_texpno; /* token expression number */ - int c3_prop; /* property of register needed */ - int c3_repl; /* replacement instance */ - int c3_codep; /* code index */ -} c3_t,*c3_p; - -/* - * contents of .ex_operator - */ - -#define EX_TOKFIELD 0 -#define EX_ARG 1 -#define EX_CON 2 -#define EX_ALLREG 3 -#define EX_SAMESIGN 4 -#define EX_SFIT 5 -#define EX_UFIT 6 -#define EX_ROM 7 -#define EX_NCPEQ 8 -#define EX_SCPEQ 9 -#define EX_RCPEQ 10 -#define EX_NCPNE 11 -#define EX_SCPNE 12 -#define EX_RCPNE 13 -#define EX_NCPGT 14 -#define EX_NCPGE 15 -#define EX_NCPLT 16 -#define EX_NCPLE 17 -#define EX_OR2 18 -#define EX_AND2 19 -#define EX_PLUS 20 -#define EX_CAT 21 -#define EX_MINUS 22 -#define EX_TIMES 23 -#define EX_DIVIDE 24 -#define EX_MOD 25 -#define EX_LSHIFT 26 -#define EX_RSHIFT 27 -#define EX_NOT 28 -#define EX_COMP 29 -#define EX_COST 30 -#define EX_STRING 31 -#define EX_DEFINED 32 -#define EX_SUBREG 33 -#define EX_TOSTRING 34 -#define EX_UMINUS 35 -#define EX_REG 36 -#define EX_LOWW 37 -#define EX_HIGHW 38 -#define EX_INREG 39 -#define EX_REGVAR 40 - - - -#define getint(a,b) \ - if ((a=((*(b)++)&BMASK)) >= 128) {\ - a = ((a-128)<= 128) {\ - a = ((a-128)<=0) */ -#define PAR_F 0004 /* address offset */ -#define PAR_L 0005 /* addressing locals/parameters */ -#define PAR_G 0006 /* addressing globals */ -#define PAR_W 0007 /* size: word multiple, fits word, possibly indirect */ -#define PAR_S 0010 /* size: word multiple */ -#define PAR_Z 0011 /* size: zero or word multiple */ -#define PAR_O 0012 /* size: word multiple or word fraction */ -#define PAR_P 0013 /* procedure name */ -#define PAR_B 0014 /* branch: instruction label */ -#define PAR_R 0015 /* register number (0,1,2) */ - -/* flow */ -#define FLO_NO 0000 /* straight on */ -#define FLO_C 0020 /* conditional branch */ -#define FLO_P 0040 /* procedure: call and return */ -#define FLO_T 0060 /* terminate: no return */ diff --git a/h/em_mes.h b/h/em_mes.h deleted file mode 100644 index 6278527ac..000000000 --- a/h/em_mes.h +++ /dev/null @@ -1,29 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* - * mnemonics for the message numbers in EM - */ - -#define ms_err 0 /* Compilation error occurred, ignore rest of module */ -#define ms_opt 1 /* Disable optimization please */ -#define ms_emx 2 /* Wordsize and pointersize assumed */ -#define ms_reg 3 /* Hint for possible register usage from frontend */ -#define ms_src 4 /* Number of source lines in this module */ -#define ms_flt 5 /* Floating point used */ -#define ms_com 6 /* Comment to be retained in compact code */ -#define ms_ret 7 /* Reserved */ -#define ms_ext 8 /* List of exported symbols from this library module */ -#define ms_par 9 /* Number of bytes of parameters accessed */ -#define ms_ego 10 /* Hint from EM Global Optimizer */ -#define ms_gto 11 /* Dangerous procedure, uses nonlocal goto */ -#define ms_stb 12 /* symbol table entry (for debugger) */ -#define ms_std 13 /* symbol table entry (for debugger) referring to dot */ -#define ms_tes 14 /* Size of the top element at a certain label */ - -/* - * for details about ms_reg, see em_reg.h - * for details about ms_ego, see em_ego.h - */ diff --git a/h/em_ptyp.h b/h/em_ptyp.h deleted file mode 100644 index cb311ae35..000000000 --- a/h/em_ptyp.h +++ /dev/null @@ -1,25 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -#define ptyp(x) (1<<(x-sp_fspec)) - -#define cst_ptyp (ptyp(sp_cst2)|ptyp(sp_cst4)) -#define nof_ptyp (ptyp(sp_dlb1)|ptyp(sp_dlb2)|ptyp(sp_doff)) -#define sof_ptyp (ptyp(sp_dnam)|ptyp(sp_doff)) -#define lab_ptyp (ptyp(sp_dlb1)|ptyp(sp_dlb2)|ptyp(sp_dnam)) -#define ico_ptyp (ptyp(sp_icon)) -#define uco_ptyp (ptyp(sp_ucon)) -#define fco_ptyp (ptyp(sp_fcon)) -#define str_ptyp (ptyp(sp_scon)) -#define con_ptyp (str_ptyp|ico_ptyp|uco_ptyp|fco_ptyp) -#define ilb_ptyp (ptyp(sp_ilb1)|ptyp(sp_ilb2)) -#define pro_ptyp (ptyp(sp_pnam)) -#define off_ptyp (ptyp(sp_doff)) -#define end_ptyp (ptyp(sp_cend)) -#define sym_ptyp (lab_ptyp) -#define arg_ptyp (nof_ptyp|cst_ptyp|sof_ptyp) -#define par_ptyp (arg_ptyp|ico_ptyp|uco_ptyp|fco_ptyp|pro_ptyp|ilb_ptyp) -#define val_ptyp (par_ptyp|str_ptyp) -#define any_ptyp (val_ptyp|end_ptyp) diff --git a/h/em_reg.h b/h/em_reg.h deleted file mode 100644 index 3da8c50a1..000000000 --- a/h/em_reg.h +++ /dev/null @@ -1,15 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* - * mes ms_reg,offset,size,type,priority - * - * Here are the defines for type - */ - -#define reg_any 0 /* Unspecified type */ -#define reg_loop 1 /* loop control variable */ -#define reg_pointer 2 /* pointer variable */ -#define reg_float 3 /* floating point variable */ diff --git a/h/ip_spec.h b/h/ip_spec.h deleted file mode 100644 index d6c630448..000000000 --- a/h/ip_spec.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* Contents of flags used when describing interpreter opcodes */ - -#define RCS_IP "$Id$" - -#define OPTYPE 07 /* type field in flag */ - -#define OPMINI 0 /* m MINI */ -#define OPSHORT 1 /* s SHORT */ -#define OPNO 2 /* - No operand */ -#define OP8 3 /* 1 1-byte signed operand */ -#define OP16 4 /* 2 2-byte signed operand */ -#define OP32 5 /* 4 4-byte signed operand */ -#define OP64 6 /* 8 8-byte signed operand */ -#define OP16U 7 /* u 2-byte unsigned operand */ - -#define OPESC 010 /* e escaped opcode */ -#define OPWORD 020 /* w operand is word multiple */ -#define OPNZ 040 /* o operand starts at 1 ( or wordsize if w-flag) */ - -#define OPRANGE 0300 /* Range of operands: Positive, negative, both */ - -#define OP_BOTH 0000 /* the default */ -#define OP_POS 0100 /* p Positive (>=0) operands only */ -#define OP_NEG 0200 /* n Negative (<0) operands only */ - -struct opform { - char i_opcode ; /* the opcode number */ - char i_flag ; /* the flag byte */ - char i_low ; /* the interpreter first opcode */ - char i_num ; /* the number of shorts/minis (optional) */ -}; - -/* Escape indicators */ - -#define ESC 254 /* To escape group */ -#define ESC_L 255 /* To 32 and 64 bit operands */ diff --git a/h/m2_traps.h b/h/m2_traps.h deleted file mode 100644 index a09190037..000000000 --- a/h/m2_traps.h +++ /dev/null @@ -1,16 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1990 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ - -/* Modula-2 runtime errors */ - -#define M2_TOOLARGE 64 /* stack of process too large */ -#define M2_TOOMANY 65 /* too many nested traps & handlers */ -#define M2_NORESULT 66 /* no RETURN from procedure function */ -#define M2_UOVFL 67 /* cardinal overflow */ -#define M2_FORCH 68 /* FOR-loop control variable changed */ -#define M2_UUVFL 69 /* cardinal underflow */ -#define M2_INTERNAL 70 /* internal error, should not happen */ -#define M2_UNIXSIG 71 /* unix signal */ diff --git a/h/ocm_chan.h b/h/ocm_chan.h deleted file mode 100644 index d731b9456..000000000 --- a/h/ocm_chan.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* ocm_chan.h - channel definitions */ -#include -#include "ocm_parco.h" - -typedef union channel { - struct { /* Interprocess channel */ - char _type; /* Channel type, see note */ - char synch; /* State in channel synchronization */ - long val; /* Transmitted value */ - } c; - struct { /* File channel */ - char _type; /* Dummy field, see note */ - char index; /* Index in the file array */ - char flgs; /* Status flags: in use & readahead */ - char preread; /* Possible preread character */ - } f; -} chan; -#define type c._type /* Channel type */ -/* Note: The channel type should not be part of each structure in chan. But - * the C alignment rules would make chan about 50% bigger if we had done it - * the right way. Note that the order of fields in a struct cannot be a problem - * as long as struct c is the largest within the union. - */ - -#define C_T_CHAN 0 /* Type of a interprocess channel */ -#define C_T_FILE 1 /* Type of a file channel */ - -#define C_S_FREE 0 /* IP channel is free */ -#define C_S_ANY 1 /* IP channel contains data */ -#define C_S_ACK 2 /* IP channel data is removed */ - -#define C_F_EOF (-1L) /* File channel returns EOF */ -#define C_F_TEXT (-2L) /* File channel becomes line oriented */ -#define C_F_RAW (-3L) /* File channel becomes character oriented */ - -#define C_F_INUSE 0x01 /* File channel is connected to a UNIX file */ -#define C_F_READAHEAD 0x02 /* File channel has a preread character */ - -extern chan file[20]; /* Array of file channels */ -extern FILE *unix_file[20]; /* Pointers to buffered UNIX files */ - -void c_init(); - -void chan_in(), cbyte_in(), c_wa_in(), c_ba_in(); -void chan_out(), c_wa_out(), c_ba_out(); - -int chan_any(); diff --git a/h/ocm_parco.h b/h/ocm_parco.h deleted file mode 100644 index bd6f38237..000000000 --- a/h/ocm_parco.h +++ /dev/null @@ -1,23 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* parco.h - Define names for simulation routines - * - * This file is to be included by users of the higher-level routines - * - */ - -void pc_begin(), resumenext(), parend(), resume(), coend(); -int pc_fork(); - -#define nullid ((int *) 0 - (int *) 0) - /* I.e. a 0 of type "pointer difference" */ - -#define parbegin(sbrk) pc_begin(sbrk, nullid) -#define parfork() pc_fork(nullid) -#define cobegin(sbrk, id) pc_begin(sbrk, id) -#define cofork(id) pc_fork(id) - -extern int deadlock; diff --git a/h/ocm_proc.h b/h/ocm_proc.h deleted file mode 100644 index 6281bc24b..000000000 --- a/h/ocm_proc.h +++ /dev/null @@ -1,61 +0,0 @@ -/* $Id$ */ -/* - * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. - * See the copyright notice in the ACK home directory, in the file "Copyright". - */ -/* process.h - Define administration types and functions - * - * This file is to be included by implementors of the higher - * level routines - * - */ -#include "ocm_parco.h" - -#ifndef ptrdiff /* This type must be able to hold a pointer difference */ -#if EM_WSIZE