Whitespace changes to make the comments more obvious

[rrq/jonesforth.git] / jonesforth.f
diff --git a/jonesforth.f b/jonesforth.f

index b05b64cba2d0b2012b8f07aca2f5011285e8e8ed..3641fa18f5a77708122aa43b0e2f899c4b68f149 100644 (file)
--- a/jonesforth.f
+++ b/jonesforth.f
@@ -2,7 +2,7 @@
  \      A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
  \      By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
  \      This is PUBLIC DOMAIN (see public domain release statement below).
-\      $Id: jonesforth.f,v 1.5 2007-09-26 22:20:52 rich Exp $
+\      $Id: jonesforth.f,v 1.9 2007-09-28 20:22:41 rich Exp $
  \
  \      The first part of this tutorial is in jonesforth.S.  Get if from http://annexia.org/forth
  \
@@ -100,6 +100,7 @@
  ;
  
  \ A few more character constants defined the same way as above.
+: ';' [ CHAR ; ] LITERAL ;
  : '(' [ CHAR ( ] LITERAL ;
  : ')' [ CHAR ) ] LITERAL ;
  : '"' [ CHAR " ] LITERAL ;
@@ -241,6 +242,16 @@
         ( -- )          means the word has no effect on the stack
  )
  
+( Some more complicated stack examples, showing the stack notation. )
+: NIP ( x y -- y ) SWAP DROP ;
+: TUCK ( x y -- y x y ) DUP ROT ;
+: PICK ( x_u ... x_1 x_0 u -- x_u ... x_1 x_0 x_u )
+       1+              ( add one because of 'u' on the stack )
+       4 *             ( multiply by the word size )
+       DSP@ +          ( add to the stack pointer )
+       @               ( and fetch )
+;
+
  ( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
  : SPACES       ( n -- )
         BEGIN
@@ -384,7 +395,7 @@
  : U. U. SPACE ;
  
  ( ? fetches the integer at an address and prints it. )
-: ? @ . ;
+: ? ( addr -- ) @ . ;
  
  ( c a b WITHIN returns true if a <= c and c < b )
  : WITHIN
@@ -422,7 +433,7 @@
  
  (
         S" string" is used in FORTH to define strings.  It leaves the address of the string and
-       its length on the stack, with the address at the top.  The space following S" is the normal
+       its length on the stack, (length at the top of stack).  The space following S" is the normal
         space between FORTH words and is not a part of the string.
  
         This is tricky to define because it has to do different things depending on whether
@@ -438,7 +449,7 @@
         case we put the string at HERE (but we _don't_ change HERE).  This is meant as a temporary
         location, likely to be overwritten soon after.
  )
-: S" IMMEDIATE         ( -- len addr )
+: S" IMMEDIATE         ( -- addr len )
         STATE @ IF      ( compiling? )
                 ' LITSTRING ,   ( compile LITSTRING )
                 HERE @          ( save the address of the length word on the stack )
@@ -468,6 +479,7 @@
                 DROP            ( drop the final " character )
                 HERE @ -        ( calculate the length )
                 HERE @          ( push the start address )
+               SWAP            ( addr len )
         THEN
  ;
  
@@ -479,8 +491,8 @@
         In immediate mode we just keep reading characters and printing them until we get to
         the next double quote.
  
-       In compile mode we use S" to store the string, then add EMITSTRING afterwards:
-               LITSTRING <string length> <string rounded up to 4 bytes> EMITSTRING
+       In compile mode we use S" to store the string, then add TELL afterwards:
+               LITSTRING <string length> <string rounded up to 4 bytes> TELL
  
         It may be interesting to note the use of [COMPILE] to turn the call to the immediate
         word S" into compilation of that word.  It compiles it into the definition of .",
@@ -490,7 +502,7 @@
  : ." IMMEDIATE         ( -- )
         STATE @ IF      ( compiling? )
                 [COMPILE] S"    ( read the string, and compile LITSTRING, etc. )
-               ' EMITSTRING ,  ( compile the final EMITSTRING )
+               ' TELL ,        ( compile the final TELL )
         ELSE
                 ( In immediate mode, just read characters and print them until we get
                   to the ending double quote. )
@@ -600,7 +612,7 @@
         is the natural size for integers on this machine architecture.  On this 32 bit machine therefore
         CELLS just multiplies the top of stack by 4.
  )
-: CELLS ( n -- n ) 4 * ;
+: CELLS ( n -- n ) 4* ;
  
  (
         So now we can define VARIABLE easily in much the same way as CONSTANT above.  Refer to the
@@ -784,6 +796,9 @@
  
  (
         DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
+
+       Notice that the parameters to DUMP (address, length) are compatible with string words
+       such as WORD and S".
  )
  : DUMP         ( addr len -- )
         BASE @ ROT              ( save the current BASE at the bottom of the stack )
@@ -838,6 +853,228 @@
         BASE !                  ( restore saved BASE )
  ;
  
+(
+       CASE...ENDCASE is how we do switch statements in FORTH.  There is no generally
+       agreed syntax for this, so I've gone for the syntax mandated by the ISO standard
+       FORTH (ANS-FORTH).
+
+       ( some value on the stack )
+       CASE
+       test1 OF ... ENDOF
+       test2 OF ... ENDOF
+       testn OF ... ENDOF
+       ... ( default case )
+       ENDCASE
+
+       The CASE statement tests the value on the stack by comparing it for equality with
+       test1, test2, ..., testn and executes the matching piece of code within OF ... ENDOF.
+       If none of the test values match then the default case is executed.  Inside the ... of
+       the default case, the value is still at the top of stack (it is implicitly DROP-ed
+       by ENDCASE).  When ENDOF is executed it jumps after ENDCASE (ie. there is no "fall-through"
+       and no need for a break statement like in C).
+
+       The default case may be omitted.  In fact the tests may also be omitted so that you
+       just have a default case, although this is probably not very useful.
+
+       An example (assuming that 'q', etc. are words which push the ASCII value of the letter
+       on the stack):
+
+       0 VALUE QUIT
+       0 VALUE SLEEP
+       KEY CASE
+               'q' OF 1 TO QUIT ENDOF
+               's' OF 1 TO SLEEP ENDOF
+               ( default case: )
+               ." Sorry, I didn't understand key <" DUP EMIT ." >, try again." CR
+       ENDCASE
+
+       (In some versions of FORTH, more advanced tests are supported, such as ranges, etc.
+       Other versions of FORTH need you to write OTHERWISE to indicate the default case.
+       As I said above, this FORTH tries to follow the ANS FORTH standard).
+
+       The implementation of CASE...ENDCASE is somewhat non-trivial.  I'm following the
+       implementations from here:
+       http://www.uni-giessen.de/faq/archiv/forthfaq.case_endcase/msg00000.html
+
+       The general plan is to compile the code as a series of IF statements:
+
+       CASE                            (push 0 on the immediate-mode parameter stack)
+       test1 OF ... ENDOF              test1 OVER = IF DROP ... ELSE
+       test2 OF ... ENDOF              test2 OVER = IF DROP ... ELSE
+       testn OF ... ENDOF              testn OVER = IF DROP ... ELSE
+       ... ( default case )            ...
+       ENDCASE                         DROP THEN [THEN [THEN ...]]
+
+       The CASE statement pushes 0 on the immediate-mode parameter stack, and that number
+       is used to count how many THEN statements we need when we get to ENDCASE so that each
+       IF has a matching THEN.  The counting is done implicitly.  If you recall from the
+       implementation above of IF, each IF pushes a code address on the immediate-mode stack,
+       and these addresses are non-zero, so by the time we get to ENDCASE the stack contains
+       some number of non-zeroes, followed by a zero.  The number of non-zeroes is how many
+       times IF has been called, so how many times we need to match it with THEN.
+
+       This code uses [COMPILE] so that we compile calls to IF, ELSE, THEN instead of
+       actually calling them while we're compiling the words below.
+
+       As is the case with all of our control structures, they only work within word
+       definitions, not in immediate mode.
+)
+: CASE IMMEDIATE
+       0               ( push 0 to mark the bottom of the stack )
+;
+
+: OF IMMEDIATE
+       ' OVER ,        ( compile OVER )
+       ' = ,           ( compile = )
+       [COMPILE] IF    ( compile IF )
+       ' DROP ,        ( compile DROP )
+;
+
+: ENDOF IMMEDIATE
+       [COMPILE] ELSE  ( ENDOF is the same as ELSE )
+;
+
+: ENDCASE IMMEDIATE
+       ' DROP ,        ( compile DROP )
+
+       ( keep compiling THEN until we get to our zero marker )
+       BEGIN
+               ?DUP
+       WHILE
+               [COMPILE] THEN
+       REPEAT
+;
+
+(
+       CFA> is the opposite of >CFA.  It takes a codeword and tries to find the matching
+       dictionary definition.
+
+       In this FORTH this is not so easy.  In fact we have to search through the dictionary
+       because we don't have a convenient back-pointer (as is often the case in other versions
+       of FORTH).
+
+       This word returns 0 if it doesn't find a match.
+)
+: CFA>
+       LATEST @        ( start at LATEST dictionary entry )
+       BEGIN
+               DUP 0<>         ( while link pointer is not null )
+       WHILE
+               DUP >CFA        ( cfa curr curr-cfa )
+               2 PICK          ( cfa curr curr-cfa cfa )
+               = IF            ( found a match? )
+                       NIP             ( leave curr dictionary entry on the stack )
+                       EXIT            ( and return from the function )
+               THEN
+               @               ( follow link pointer back )
+       REPEAT
+       2DROP           ( restore stack )
+       0               ( sorry, nothing found )
+;
+
+(
+       SEE disassembles a FORTH word.
+
+       We search for the dictionary entry of the word, then search again for the next
+       word (effectively, the end of the compiled word).  This results in two pointers:
+
+       +---------+---+---+---+---+------------+------------+------------+------------+
+       | LINK    | 3 | T | E | N | DOCOL      | LIT        | 10         | EXIT       |
+       +---------+---+---+---+---+------------+------------+------------+------------+
+        ^                                                                             ^
+        |                                                                             |
+       Start of word                                                         End of word
+
+       With this information we can have a go at decompiling the word.  We need to
+       recognise "meta-words" like LIT, LITSTRING, BRANCH, etc. and treat those separately.
+)
+: SEE
+       WORD FIND       ( find the dictionary entry to decompile )
+
+       ( Now we search again, looking for the next word in the dictionary.  This gives us
+         the length of the word that we will be decompiling.  (Well, mostly it does). )
+       HERE @          ( address of the end of the last compiled word )
+       LATEST @        ( word last curr )
+       BEGIN
+               2 PICK          ( word last curr word )
+               OVER            ( word last curr word curr )
+               <>              ( word last curr word<>curr? )
+       WHILE                   ( word last curr )
+               NIP             ( word curr )
+               DUP @           ( word curr prev (which becomes: word last curr) )
+       REPEAT
+
+       DROP            ( at this point, the stack is: start-of-word end-of-word )
+       SWAP            ( end-of-word start-of-word )
+
+       ( begin the definition with : NAME [IMMEDIATE] )
+       ':' EMIT SPACE DUP ID. SPACE
+       DUP ?IMMEDIATE IF ." IMMEDIATE " THEN
+
+       >DFA            ( get the data address, ie. points after DOCOL | end-of-word start-of-data )
+
+       ( now we start decompiling until we hit the end of the word )
+       BEGIN           ( end start )
+               2DUP >
+       WHILE
+               DUP @           ( end start codeword )
+
+               CASE
+               ' LIT OF                ( is it LIT ? )
+                       4 + DUP @               ( get next word which is the integer constant )
+                       .                       ( and print it )
+               ENDOF
+               ' LITSTRING OF          ( is it LITSTRING ? )
+                       [ CHAR S ] LITERAL EMIT '"' EMIT SPACE ( print S"<space> )
+                       4 + DUP @               ( get the length word )
+                       SWAP 4 + SWAP           ( end start+4 length )
+                       2DUP TELL               ( print the string )
+                       '"' EMIT SPACE          ( finish the string with a final quote )
+                       + ALIGNED               ( end start+4+len, aligned )
+                       4 -                     ( because we're about to add 4 below )
+               ENDOF
+               ' 0BRANCH OF            ( is it 0BRANCH ? )
+                       ." 0BRANCH ( "
+                       4 + DUP @               ( print the offset )
+                       .
+                       ')' EMIT SPACE
+               ENDOF
+               ' BRANCH OF             ( is it BRANCH ? )
+                       ." BRANCH ( "
+                       4 + DUP @               ( print the offset )
+                       .
+                       ')' EMIT SPACE
+               ENDOF
+               ' ' OF                  ( is it ' (TICK) ? )
+                       [ CHAR ' ] LITERAL EMIT SPACE
+                       4 + DUP @               ( get the next codeword )
+                       CFA>                    ( and force it to be printed as a dictionary entry )
+                       ID. SPACE
+               ENDOF
+               ' EXIT OF               ( is it EXIT? )
+                       ( We expect the last word to be EXIT, and if it is then we don't print it
+                         because EXIT is normally implied by ;.  EXIT can also appear in the middle
+                         of words, and then it needs to be printed. )
+                       2DUP                    ( end start end start )
+                       4 +                     ( end start end start+4 )
+                       <> IF                   ( end start | we're not at the end )
+                               ." EXIT "
+                       THEN
+               ENDOF
+                                       ( default case: )
+                       DUP                     ( in the default case we always need to DUP before using )
+                       CFA>                    ( look up the codeword to get the dictionary entry )
+                       ID. SPACE               ( and print it )
+               ENDCASE
+
+               4 +             ( end start+4 )
+       REPEAT
+
+       ';' EMIT CR
+
+       2DROP           ( restore stack )
+;
+
  ( Finally print the welcome prompt. )
  ." JONESFORTH VERSION " VERSION . CR
  ." OK "