Added headings, fixed CELLS, DOES>, C strings, environment

[rrq/jonesforth.git] / jonesforth.f
diff --git a/jonesforth.f b/jonesforth.f

index 7f2051770ba0a75388f0553eb0cacd2daac139b3..3c62f34dcfce938b80cec795f161b17f2e8ae08c 100644 (file)
--- a/jonesforth.f
+++ b/jonesforth.f
@@ -2,7 +2,7 @@
  \      A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
  \      By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
  \      This is PUBLIC DOMAIN (see public domain release statement below).
  \      A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
  \      By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
  \      This is PUBLIC DOMAIN (see public domain release statement below).
-\      $Id: jonesforth.f,v 1.3 2007-09-25 09:50:54 rich Exp $
+\      $Id: jonesforth.f,v 1.10 2007-09-29 16:06:27 rich Exp $
  \
  \      The first part of this tutorial is in jonesforth.S.  Get if from http://annexia.org/forth
  \
  \
  \      The first part of this tutorial is in jonesforth.S.  Get if from http://annexia.org/forth
  \
@@ -44,6 +44,12 @@
  \
  \      FORTH is case-sensitive.  Use capslock!
  
  \
  \      FORTH is case-sensitive.  Use capslock!
  
+\ The primitive word /MOD (DIVMOD) leaves both the quotient and the remainder on the stack.  (On
+\ i386, the idivl instruction gives both anyway).  Now we can define the / and MOD in terms of /MOD
+\ and a few other primitives.
+: / /MOD SWAP DROP ;
+: MOD /MOD DROP ;
+
  \ Define some character constants
  : '\n'   10 ;
  : 'SPACE' 32 ;
  \ Define some character constants
  : '\n'   10 ;
  : 'SPACE' 32 ;
@@ -59,13 +65,6 @@
  \ : DUP _X ! _X @ _X @ ;
  \ : DROP _X ! ;
  
  \ : DUP _X ! _X @ _X @ ;
  \ : DROP _X ! ;
  
-\ The built-in . (DOT) function doesn't print a space after the number (unlike the real FORTH word).
-\ However this is very easily fixed by redefining . (DOT).  Any built-in word can be redefined.
-: .
-       .               \ this refers back to the previous definition (but see also RECURSE below)
-       SPACE
-;
-
  \ The 2... versions of the standard operators work on pairs of stack entries.  They're not used
  \ very commonly so not really worth writing in assembler.  Here is how they are defined in FORTH.
  : 2DUP OVER OVER ;
  \ The 2... versions of the standard operators work on pairs of stack entries.  They're not used
  \ very commonly so not really worth writing in assembler.  Here is how they are defined in FORTH.
  : 2DUP OVER OVER ;
@@ -75,14 +74,13 @@
  : 2* 2 * ;
  : 2/ 2 / ;
  
  : 2* 2 * ;
  : 2/ 2 / ;
  
-\ Standard words for manipulating BASE.
-: DECIMAL 10 BASE ! ;
-: HEX 16 BASE ! ;
+\ NEGATE leaves the negative of a number on the stack.
+: NEGATE 0 SWAP - ;
  
  \ Standard words for booleans.
  
  \ Standard words for booleans.
-: TRUE 1 ;
+: TRUE  1 ;
  : FALSE 0 ;
  : FALSE 0 ;
-: NOT 0= ;
+: NOT   0= ;
  
  \ LITERAL takes whatever is on the stack and compiles LIT <foo>
  : LITERAL IMMEDIATE
  
  \ LITERAL takes whatever is on the stack and compiles LIT <foo>
  : LITERAL IMMEDIATE
@@ -90,20 +88,26 @@
         ,               \ compile the literal itself (from the stack)
         ;
  
         ,               \ compile the literal itself (from the stack)
         ;
  
-\ Now we can use [ and ] to insert literals which are calculated at compile time.
+\ Now we can use [ and ] to insert literals which are calculated at compile time.  (Recall that
+\ [ and ] are the FORTH words which switch into and out of immediate mode.)
  \ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you
  \ would rather only compute once (at compile time, rather than calculating it each time your word runs).
  : ':'
  \ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you
  \ would rather only compute once (at compile time, rather than calculating it each time your word runs).
  : ':'
-       [               \ go into immediate mode temporarily
-       CHAR :          \ push the number 58 (ASCII code of colon) on the stack
+       [               \ go into immediate mode (temporarily)
+       CHAR :          \ push the number 58 (ASCII code of colon) on the parameter stack
         ]               \ go back to compile mode
         LITERAL         \ compile LIT 58 as the definition of ':' word
  ;
  
  \ A few more character constants defined the same way as above.
         ]               \ go back to compile mode
         LITERAL         \ compile LIT 58 as the definition of ':' word
  ;
  
  \ A few more character constants defined the same way as above.
+: ';' [ CHAR ; ] LITERAL ;
  : '(' [ CHAR ( ] LITERAL ;
  : ')' [ CHAR ) ] LITERAL ;
  : '"' [ CHAR " ] LITERAL ;
  : '(' [ CHAR ( ] LITERAL ;
  : ')' [ CHAR ) ] LITERAL ;
  : '"' [ CHAR " ] LITERAL ;
+: 'A' [ CHAR A ] LITERAL ;
+: '0' [ CHAR 0 ] LITERAL ;
+: '-' [ CHAR - ] LITERAL ;
+: '.' [ CHAR . ] LITERAL ;
  
  \ While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE.
  : [COMPILE] IMMEDIATE
  
  \ While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE.
  : [COMPILE] IMMEDIATE
@@ -113,6 +117,20 @@
         ,               \ and compile that
  ;
  
         ,               \ and compile that
  ;
  
+\ RECURSE makes a recursive call to the current word that is being compiled.
+\
+\ Normally while a word is being compiled, it is marked HIDDEN so that references to the
+\ same word within are calls to the previous definition of the word.  However we still have
+\ access to the word which we are currently compiling through the LATEST pointer so we
+\ can use that to compile a recursive call.
+: RECURSE IMMEDIATE
+       LATEST @        \ LATEST points to the word being compiled at the moment
+       >CFA            \ get the codeword
+       ,               \ compile it
+;
+
+\      CONTROL STRUCTURES ----------------------------------------------------------------------
+\
  \ So far we have defined only very simple definitions.  Before we can go further, we really need to
  \ make some control structures, like IF ... THEN and loops.  Luckily we can define arbitrary control
  \ structures directly in FORTH.
  \ So far we have defined only very simple definitions.  Before we can go further, we really need to
  \ make some control structures, like IF ... THEN and loops.  Luckily we can define arbitrary control
  \ structures directly in FORTH.
@@ -196,6 +214,8 @@
         SWAP !          \ and back-fill it in the original location
  ;
  
         SWAP !          \ and back-fill it in the original location
  ;
  
+\      COMMENTS ----------------------------------------------------------------------
+\
  \ FORTH allows ( ... ) as comments within function definitions.  This works by having an IMMEDIATE
  \ word called ( which just drops input characters until it hits the corresponding ).
  : ( IMMEDIATE
  \ FORTH allows ( ... ) as comments within function definitions.  This works by having an IMMEDIATE
  \ word called ( which just drops input characters until it hits the corresponding ).
  : ( IMMEDIATE
@@ -217,6 +237,8 @@
  (
         From now on we can use ( ... ) for comments.
  
  (
         From now on we can use ( ... ) for comments.
  
+       STACK NOTATION ----------------------------------------------------------------------
+
         In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the
         parameter stack.  For example:
  
         In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the
         parameter stack.  For example:
  
@@ -226,6 +248,16 @@
         ( -- )          means the word has no effect on the stack
  )
  
         ( -- )          means the word has no effect on the stack
  )
  
+( Some more complicated stack examples, showing the stack notation. )
+: NIP ( x y -- y ) SWAP DROP ;
+: TUCK ( x y -- y x y ) DUP ROT ;
+: PICK ( x_u ... x_1 x_0 u -- x_u ... x_1 x_0 x_u )
+       1+              ( add one because of 'u' on the stack )
+       4 *             ( multiply by the word size )
+       DSP@ +          ( add to the stack pointer )
+       @               ( and fetch )
+;
+
  ( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
  : SPACES       ( n -- )
         BEGIN
  ( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
  : SPACES       ( n -- )
         BEGIN
@@ -237,6 +269,142 @@
         DROP
  ;
  
         DROP
  ;
  
+( Standard words for manipulating BASE. )
+: DECIMAL ( -- ) 10 BASE ! ;
+: HEX ( -- ) 16 BASE ! ;
+
+(
+       PRINTING NUMBERS ----------------------------------------------------------------------
+
+       The standard FORTH word . (DOT) is very important.  It takes the number at the top
+       of the stack and prints it out.  However first I'm going to implement some lower-level
+       FORTH words:
+
+       U.R     ( u width -- )  which prints an unsigned number, padded to a certain width
+       U.      ( u -- )        which prints an unsigned number
+       .R      ( n width -- )  which prints a signed number, padded to a certain width.
+
+       For example:
+               -123 6 .R
+       will print out these characters:
+               <space> <space> - 1 2 3
+
+       In other words, the number padded left to a certain number of characters.
+
+       The full number is printed even if it is wider than width, and this is what allows us to
+       define the ordinary functions U. and . (we just set width to zero knowing that the full
+       number will be printed anyway).
+
+       Another wrinkle of . and friends is that they obey the current base in the variable BASE.
+       BASE can be anything in the range 2 to 36.
+
+       While we're defining . &c we can also define .S which is a useful debugging tool.  This
+       word prints the current stack (non-destructively) from top to bottom.
+)
+
+( This is the underlying recursive definition of U. )
+: U.           ( u -- )
+       BASE @ /MOD     ( width rem quot )
+       DUP 0<> IF      ( if quotient <> 0 then )
+               RECURSE         ( print the quotient )
+       ELSE
+               DROP            ( drop the zero quotient )
+       THEN
+
+       ( print the remainder )
+       DUP 10 < IF
+               '0'             ( decimal digits 0..9 )
+       ELSE
+               10 -            ( hex and beyond digits A..Z )
+               'A'
+       THEN
+       +
+       EMIT
+;
+
+(
+       FORTH word .S prints the contents of the stack.  It doesn't alter the stack.
+       Very useful for debugging.
+)
+: .S           ( -- )
+       DSP@            ( get current stack pointer )
+       BEGIN
+               DUP S0 @ <
+       WHILE
+               DUP @ U.        ( print the stack element )
+               SPACE
+               4+              ( move up )
+       REPEAT
+       DROP
+;
+
+( This word returns the width (in characters) of an unsigned number in the current base )
+: UWIDTH       ( u -- width )
+       BASE @ /        ( rem quot )
+       DUP 0<> IF      ( if quotient <> 0 then )
+               RECURSE 1+      ( return 1+recursive call )
+       ELSE
+               DROP            ( drop the zero quotient )
+               1               ( return 1 )
+       THEN
+;
+
+: U.R          ( u width -- )
+       SWAP            ( width u )
+       DUP             ( width u u )
+       UWIDTH          ( width u uwidth )
+       -ROT            ( u uwidth width )
+       SWAP -          ( u width-uwidth )
+       ( At this point if the requested width is narrower, we'll have a negative number on the stack.
+         Otherwise the number on the stack is the number of spaces to print.  But SPACES won't print
+         a negative number of spaces anyway, so it's now safe to call SPACES ... )
+       SPACES
+       ( ... and then call the underlying implementation of U. )
+       U.
+;
+
+(
+       .R prints a signed number, padded to a certain width.  We can't just print the sign
+       and call U.R because we want the sign to be next to the number ('-123' instead of '-  123').
+)
+: .R           ( n width -- )
+       SWAP            ( width n )
+       DUP 0< IF
+               NEGATE          ( width u )
+               1               ( save a flag to remember that it was negative | width n 1 )
+               ROT             ( 1 width u )
+               SWAP            ( 1 u width )
+               1-              ( 1 u width-1 )
+       ELSE
+               0               ( width u 0 )
+               ROT             ( 0 width u )
+               SWAP            ( 0 u width )
+       THEN
+       SWAP            ( flag width u )
+       DUP             ( flag width u u )
+       UWIDTH          ( flag width u uwidth )
+       -ROT            ( flag u uwidth width )
+       SWAP -          ( flag u width-uwidth )
+
+       SPACES          ( flag u )
+       SWAP            ( u flag )
+
+       IF                      ( was it negative? print the - character )
+               '-' EMIT
+       THEN
+
+       U.
+;
+
+( Finally we can define word . in terms of .R, with a trailing space. )
+: . 0 .R SPACE ;
+
+( The real U., note the trailing space. )
+: U. U. SPACE ;
+
+( ? fetches the integer at an address and prints it. )
+: ? ( addr -- ) @ . ;
+
  ( c a b WITHIN returns true if a <= c and c < b )
  : WITHIN
         ROT             ( b c a )
  ( c a b WITHIN returns true if a <= c and c < b )
  : WITHIN
         ROT             ( b c a )
@@ -253,18 +421,6 @@
         THEN
  ;
  
         THEN
  ;
  
-( .S prints the contents of the stack.  Very useful for debugging. )
-: .S           ( -- )
-       DSP@            ( get current stack pointer )
-       BEGIN
-               DUP S0 @ <
-       WHILE
-               DUP @ .         ( print the stack element )
-               4+              ( move up )
-       REPEAT
-       DROP
-;
-
  ( DEPTH returns the depth of the stack. )
  : DEPTH                ( -- n )
         S0 @ DSP@ -
  ( DEPTH returns the depth of the stack. )
  : DEPTH                ( -- n )
         S0 @ DSP@ -
@@ -272,8 +428,22 @@
  ;
  
  (
  ;
  
  (
+       ALIGNED takes an address and rounds it up (aligns it) to the next 4 byte boundary.
+)
+: ALIGNED      ( addr -- addr )
+       3 + 3 INVERT AND        ( (addr+3) & ~3 )
+;
+
+(
+       ALIGN aligns the HERE pointer, so the next word appended will be aligned properly.
+)
+: ALIGN HERE @ ALIGNED HERE ! ;
+
+(
+       STRINGS ----------------------------------------------------------------------
+
         S" string" is used in FORTH to define strings.  It leaves the address of the string and
         S" string" is used in FORTH to define strings.  It leaves the address of the string and
-       its length on the stack, with the address at the top.  The space following S" is the normal
+       its length on the stack, (length at the top of stack).  The space following S" is the normal
         space between FORTH words and is not a part of the string.
  
         This is tricky to define because it has to do different things depending on whether
         space between FORTH words and is not a part of the string.
  
         This is tricky to define because it has to do different things depending on whether
@@ -289,7 +459,7 @@
         case we put the string at HERE (but we _don't_ change HERE).  This is meant as a temporary
         location, likely to be overwritten soon after.
  )
         case we put the string at HERE (but we _don't_ change HERE).  This is meant as a temporary
         location, likely to be overwritten soon after.
  )
-: S" IMMEDIATE         ( -- len addr )
+: S" IMMEDIATE         ( -- addr len )
         STATE @ IF      ( compiling? )
                 ' LITSTRING ,   ( compile LITSTRING )
                 HERE @          ( save the address of the length word on the stack )
         STATE @ IF      ( compiling? )
                 ' LITSTRING ,   ( compile LITSTRING )
                 HERE @          ( save the address of the length word on the stack )
@@ -298,7 +468,7 @@
                         KEY             ( get next character of the string )
                         DUP '"' <>
                 WHILE
                         KEY             ( get next character of the string )
                         DUP '"' <>
                 WHILE
-                       HERE @ !b       ( store the character in the compiled image )
+                       HERE @ C!       ( store the character in the compiled image )
                         1 HERE +!       ( increment HERE pointer by 1 byte )
                 REPEAT
                 DROP            ( drop the double quote character at the end )
                         1 HERE +!       ( increment HERE pointer by 1 byte )
                 REPEAT
                 DROP            ( drop the double quote character at the end )
@@ -306,22 +476,20 @@
                 HERE @ SWAP -   ( calculate the length )
                 4-              ( subtract 4 (because we measured from the start of the length word) )
                 SWAP !          ( and back-fill the length location )
                 HERE @ SWAP -   ( calculate the length )
                 4-              ( subtract 4 (because we measured from the start of the length word) )
                 SWAP !          ( and back-fill the length location )
-               HERE @          ( round up to next multiple of 4 bytes for the remaining code )
-               3 +
-               3 INVERT AND
-               HERE !
+               ALIGN           ( round up to next multiple of 4 bytes for the remaining code )
         ELSE            ( immediate mode )
                 HERE @          ( get the start address of the temporary space )
                 BEGIN
                         KEY
                         DUP '"' <>
                 WHILE
         ELSE            ( immediate mode )
                 HERE @          ( get the start address of the temporary space )
                 BEGIN
                         KEY
                         DUP '"' <>
                 WHILE
-                       OVER !b         ( save next character )
+                       OVER C!         ( save next character )
                         1+              ( increment address )
                 REPEAT
                 DROP            ( drop the final " character )
                 HERE @ -        ( calculate the length )
                 HERE @          ( push the start address )
                         1+              ( increment address )
                 REPEAT
                 DROP            ( drop the final " character )
                 HERE @ -        ( calculate the length )
                 HERE @          ( push the start address )
+               SWAP            ( addr len )
         THEN
  ;
  
         THEN
  ;
  
@@ -333,8 +501,8 @@
         In immediate mode we just keep reading characters and printing them until we get to
         the next double quote.
  
         In immediate mode we just keep reading characters and printing them until we get to
         the next double quote.
  
-       In compile mode we use S" to store the string, then add EMITSTRING afterwards:
-               LITSTRING <string length> <string rounded up to 4 bytes> EMITSTRING
+       In compile mode we use S" to store the string, then add TELL afterwards:
+               LITSTRING <string length> <string rounded up to 4 bytes> TELL
  
         It may be interesting to note the use of [COMPILE] to turn the call to the immediate
         word S" into compilation of that word.  It compiles it into the definition of .",
  
         It may be interesting to note the use of [COMPILE] to turn the call to the immediate
         word S" into compilation of that word.  It compiles it into the definition of .",
@@ -344,7 +512,7 @@
  : ." IMMEDIATE         ( -- )
         STATE @ IF      ( compiling? )
                 [COMPILE] S"    ( read the string, and compile LITSTRING, etc. )
  : ." IMMEDIATE         ( -- )
         STATE @ IF      ( compiling? )
                 [COMPILE] S"    ( read the string, and compile LITSTRING, etc. )
-               ' EMITSTRING ,  ( compile the final EMITSTRING )
+               ' TELL ,        ( compile the final TELL )
         ELSE
                 ( In immediate mode, just read characters and print them until we get
                   to the ending double quote. )
         ELSE
                 ( In immediate mode, just read characters and print them until we get
                   to the ending double quote. )
@@ -360,12 +528,14 @@
  ;
  
  (
  ;
  
  (
+       CONSTANTS AND VARIABLES ----------------------------------------------------------------------
+
         In FORTH, global constants and variables are defined like this:
  
         10 CONSTANT TEN         when TEN is executed, it leaves the integer 10 on the stack
         VARIABLE VAR            when VAR is executed, it leaves the address of VAR on the stack
  
         In FORTH, global constants and variables are defined like this:
  
         10 CONSTANT TEN         when TEN is executed, it leaves the integer 10 on the stack
         VARIABLE VAR            when VAR is executed, it leaves the address of VAR on the stack
  
-       Constants can be read by not written, eg:
+       Constants can be read but not written, eg:
  
         TEN . CR                prints 10
  
  
         TEN . CR                prints 10
  
@@ -373,6 +543,7 @@
  
         VAR @                   leaves the value of VAR on the stack
         VAR @ . CR              prints the value of VAR
  
         VAR @                   leaves the value of VAR on the stack
         VAR @ . CR              prints the value of VAR
+       VAR ? CR                same as above, since ? is the same as @ .
  
         and update the variable by doing:
  
  
         and update the variable by doing:
  
@@ -407,6 +578,9 @@
  
         Notice that this word definition is exactly the same as you would have got if you had
         written : TEN 10 ;
  
         Notice that this word definition is exactly the same as you would have got if you had
         written : TEN 10 ;
+
+       Note for people reading the code below: DOCOL is a constant word which we defined in the
+       assembler part which returns the value of the assembler symbol of the same name.
  )
  : CONSTANT
         CREATE          ( make the dictionary entry (the name follows CONSTANT) )
  )
  : CONSTANT
         CREATE          ( make the dictionary entry (the name follows CONSTANT) )
@@ -466,6 +640,8 @@
  ;
  
  (
  ;
  
  (
+       VALUES ----------------------------------------------------------------------
+
         VALUEs are like VARIABLEs but with a simpler syntax.  You would generally use them when you
         want a variable which is read often, and written infrequently.
  
         VALUEs are like VARIABLEs but with a simpler syntax.  You would generally use them when you
         want a variable which is read often, and written infrequently.
  
@@ -551,20 +727,22 @@
  ;
  
  (
  ;
  
  (
+       PRINTING THE DICTIONARY ----------------------------------------------------------------------
+
         ID. takes an address of a dictionary entry and prints the word's name.
  
         For example: LATEST @ ID. would print the name of the last word that was defined.
  )
  : ID.
         4+              ( skip over the link pointer )
         ID. takes an address of a dictionary entry and prints the word's name.
  
         For example: LATEST @ ID. would print the name of the last word that was defined.
  )
  : ID.
         4+              ( skip over the link pointer )
-       DUP @b          ( get the flags/length byte )
+       DUP C@          ( get the flags/length byte )
         F_LENMASK AND   ( mask out the flags - just want the length )
  
         BEGIN
                 DUP 0>          ( length > 0? )
         WHILE
                 SWAP 1+         ( addr len -- len addr+1 )
         F_LENMASK AND   ( mask out the flags - just want the length )
  
         BEGIN
                 DUP 0>          ( length > 0? )
         WHILE
                 SWAP 1+         ( addr len -- len addr+1 )
-               DUP @b          ( len addr -- len addr char | get the next character)
+               DUP C@          ( len addr -- len addr char | get the next character)
                 EMIT            ( len addr char -- len addr | and print it)
                 SWAP 1-         ( len addr -- addr len-1    | subtract one from length )
         REPEAT
                 EMIT            ( len addr char -- len addr | and print it)
                 SWAP 1-         ( len addr -- addr len-1    | subtract one from length )
         REPEAT
@@ -578,12 +756,12 @@
  )
  : ?HIDDEN
         4+              ( skip over the link pointer )
  )
  : ?HIDDEN
         4+              ( skip over the link pointer )
-       @b              ( get the flags/length byte )
+       C@              ( get the flags/length byte )
         F_HIDDEN AND    ( mask the F_HIDDEN flag and return it (as a truth value) )
  ;
  : ?IMMEDIATE
         4+              ( skip over the link pointer )
         F_HIDDEN AND    ( mask the F_HIDDEN flag and return it (as a truth value) )
  ;
  : ?IMMEDIATE
         4+              ( skip over the link pointer )
-       @b              ( get the flags/length byte )
+       C@              ( get the flags/length byte )
         F_IMMED AND     ( mask the F_IMMED flag and return it (as a truth value) )
  ;
  
         F_IMMED AND     ( mask the F_IMMED flag and return it (as a truth value) )
  ;
  
@@ -609,6 +787,8 @@
  ;
  
  (
  ;
  
  (
+       FORGET ----------------------------------------------------------------------
+
         So far we have only allocated words and memory.  FORTH provides a rather primitive method
         to deallocate.
  
         So far we have only allocated words and memory.  FORTH provides a rather primitive method
         to deallocate.
  
@@ -633,20 +813,12 @@
  ;
  
  (
  ;
  
  (
-       RECURSE makes a recursive call to the current word that is being compiled.
+       DUMP ----------------------------------------------------------------------
  
  
-       Normally while a word is being compiled, it is marked HIDDEN so that references to the
-       same word within are calls to the previous definition of the word.  However we still have
-       access to the word which we are currently compiling through the LATEST pointer so we
-       can use that to compile a recursive call.
-)
-: RECURSE IMMEDIATE
-       LATEST @ >CFA   ( LATEST points to the word being compiled at the moment )
-       ,               ( compile it )
-;
-
-(
         DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
         DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
+
+       Notice that the parameters to DUMP (address, length) are compatible with string words
+       such as WORD and S".
  )
  : DUMP         ( addr len -- )
         BASE @ ROT              ( save the current BASE at the bottom of the stack )
  )
  : DUMP         ( addr len -- )
         BASE @ ROT              ( save the current BASE at the bottom of the stack )
@@ -655,7 +827,7 @@
         BEGIN
                 DUP 0>          ( while len > 0 )
         WHILE
         BEGIN
                 DUP 0>          ( while len > 0 )
         WHILE
-               OVER .          ( print the address )
+               OVER 8 U.R      ( print the address )
                 SPACE
  
                 ( print up to 16 words on this line )
                 SPACE
  
                 ( print up to 16 words on this line )
@@ -665,8 +837,8 @@
                         DUP 0>          ( while linelen > 0 )
                 WHILE
                         SWAP            ( addr len linelen addr )
                         DUP 0>          ( while linelen > 0 )
                 WHILE
                         SWAP            ( addr len linelen addr )
-                       DUP @b          ( addr len linelen addr byte )
-                       . SPACE         ( print the byte )
+                       DUP C@          ( addr len linelen addr byte )
+                       2 .R SPACE      ( print the byte )
                         1+ SWAP 1-      ( addr len linelen addr -- addr len addr+1 linelen-1 )
                 REPEAT
                 2DROP           ( addr len )
                         1+ SWAP 1-      ( addr len linelen addr -- addr len addr+1 linelen-1 )
                 REPEAT
                 2DROP           ( addr len )
@@ -677,11 +849,11 @@
                         DUP 0>          ( while linelen > 0)
                 WHILE
                         SWAP            ( addr len linelen addr )
                         DUP 0>          ( while linelen > 0)
                 WHILE
                         SWAP            ( addr len linelen addr )
-                       DUP @b          ( addr len linelen addr byte )
+                       DUP C@          ( addr len linelen addr byte )
                         DUP 32 128 WITHIN IF    ( 32 <= c < 128? )
                                 EMIT
                         ELSE
                         DUP 32 128 WITHIN IF    ( 32 <= c < 128? )
                                 EMIT
                         ELSE
-                               DROP [ CHAR ? ] LITERAL EMIT
+                               DROP '.' EMIT
                         THEN
                         1+ SWAP 1-      ( addr len linelen addr -- addr len addr+1 linelen-1 )
                 REPEAT
                         THEN
                         1+ SWAP 1-      ( addr len linelen addr -- addr len addr+1 linelen-1 )
                 REPEAT
@@ -701,6 +873,400 @@
         BASE !                  ( restore saved BASE )
  ;
  
         BASE !                  ( restore saved BASE )
  ;
  
+(
+       CASE ----------------------------------------------------------------------
+
+       CASE...ENDCASE is how we do switch statements in FORTH.  There is no generally
+       agreed syntax for this, so I've gone for the syntax mandated by the ISO standard
+       FORTH (ANS-FORTH).
+
+       ( some value on the stack )
+       CASE
+       test1 OF ... ENDOF
+       test2 OF ... ENDOF
+       testn OF ... ENDOF
+       ... ( default case )
+       ENDCASE
+
+       The CASE statement tests the value on the stack by comparing it for equality with
+       test1, test2, ..., testn and executes the matching piece of code within OF ... ENDOF.
+       If none of the test values match then the default case is executed.  Inside the ... of
+       the default case, the value is still at the top of stack (it is implicitly DROP-ed
+       by ENDCASE).  When ENDOF is executed it jumps after ENDCASE (ie. there is no "fall-through"
+       and no need for a break statement like in C).
+
+       The default case may be omitted.  In fact the tests may also be omitted so that you
+       just have a default case, although this is probably not very useful.
+
+       An example (assuming that 'q', etc. are words which push the ASCII value of the letter
+       on the stack):
+
+       0 VALUE QUIT
+       0 VALUE SLEEP
+       KEY CASE
+               'q' OF 1 TO QUIT ENDOF
+               's' OF 1 TO SLEEP ENDOF
+               ( default case: )
+               ." Sorry, I didn't understand key <" DUP EMIT ." >, try again." CR
+       ENDCASE
+
+       (In some versions of FORTH, more advanced tests are supported, such as ranges, etc.
+       Other versions of FORTH need you to write OTHERWISE to indicate the default case.
+       As I said above, this FORTH tries to follow the ANS FORTH standard).
+
+       The implementation of CASE...ENDCASE is somewhat non-trivial.  I'm following the
+       implementations from here:
+       http://www.uni-giessen.de/faq/archiv/forthfaq.case_endcase/msg00000.html
+
+       The general plan is to compile the code as a series of IF statements:
+
+       CASE                            (push 0 on the immediate-mode parameter stack)
+       test1 OF ... ENDOF              test1 OVER = IF DROP ... ELSE
+       test2 OF ... ENDOF              test2 OVER = IF DROP ... ELSE
+       testn OF ... ENDOF              testn OVER = IF DROP ... ELSE
+       ... ( default case )            ...
+       ENDCASE                         DROP THEN [THEN [THEN ...]]
+
+       The CASE statement pushes 0 on the immediate-mode parameter stack, and that number
+       is used to count how many THEN statements we need when we get to ENDCASE so that each
+       IF has a matching THEN.  The counting is done implicitly.  If you recall from the
+       implementation above of IF, each IF pushes a code address on the immediate-mode stack,
+       and these addresses are non-zero, so by the time we get to ENDCASE the stack contains
+       some number of non-zeroes, followed by a zero.  The number of non-zeroes is how many
+       times IF has been called, so how many times we need to match it with THEN.
+
+       This code uses [COMPILE] so that we compile calls to IF, ELSE, THEN instead of
+       actually calling them while we're compiling the words below.
+
+       As is the case with all of our control structures, they only work within word
+       definitions, not in immediate mode.
+)
+: CASE IMMEDIATE
+       0               ( push 0 to mark the bottom of the stack )
+;
+
+: OF IMMEDIATE
+       ' OVER ,        ( compile OVER )
+       ' = ,           ( compile = )
+       [COMPILE] IF    ( compile IF )
+       ' DROP ,        ( compile DROP )
+;
+
+: ENDOF IMMEDIATE
+       [COMPILE] ELSE  ( ENDOF is the same as ELSE )
+;
+
+: ENDCASE IMMEDIATE
+       ' DROP ,        ( compile DROP )
+
+       ( keep compiling THEN until we get to our zero marker )
+       BEGIN
+               ?DUP
+       WHILE
+               [COMPILE] THEN
+       REPEAT
+;
+
+(
+       DECOMPILER ----------------------------------------------------------------------
+
+       CFA> is the opposite of >CFA.  It takes a codeword and tries to find the matching
+       dictionary definition.
+
+       In this FORTH this is not so easy.  In fact we have to search through the dictionary
+       because we don't have a convenient back-pointer (as is often the case in other versions
+       of FORTH).
+
+       This word returns 0 if it doesn't find a match.
+)
+: CFA>
+       LATEST @        ( start at LATEST dictionary entry )
+       BEGIN
+               DUP 0<>         ( while link pointer is not null )
+       WHILE
+               DUP >CFA        ( cfa curr curr-cfa )
+               2 PICK          ( cfa curr curr-cfa cfa )
+               = IF            ( found a match? )
+                       NIP             ( leave curr dictionary entry on the stack )
+                       EXIT            ( and return from the function )
+               THEN
+               @               ( follow link pointer back )
+       REPEAT
+       2DROP           ( restore stack )
+       0               ( sorry, nothing found )
+;
+
+(
+       SEE decompiles a FORTH word.
+
+       We search for the dictionary entry of the word, then search again for the next
+       word (effectively, the end of the compiled word).  This results in two pointers:
+
+       +---------+---+---+---+---+------------+------------+------------+------------+
+       | LINK    | 3 | T | E | N | DOCOL      | LIT        | 10         | EXIT       |
+       +---------+---+---+---+---+------------+------------+------------+------------+
+        ^                                                                             ^
+        |                                                                             |
+       Start of word                                                         End of word
+
+       With this information we can have a go at decompiling the word.  We need to
+       recognise "meta-words" like LIT, LITSTRING, BRANCH, etc. and treat those separately.
+)
+: SEE
+       WORD FIND       ( find the dictionary entry to decompile )
+
+       ( Now we search again, looking for the next word in the dictionary.  This gives us
+         the length of the word that we will be decompiling.  (Well, mostly it does). )
+       HERE @          ( address of the end of the last compiled word )
+       LATEST @        ( word last curr )
+       BEGIN
+               2 PICK          ( word last curr word )
+               OVER            ( word last curr word curr )
+               <>              ( word last curr word<>curr? )
+       WHILE                   ( word last curr )
+               NIP             ( word curr )
+               DUP @           ( word curr prev (which becomes: word last curr) )
+       REPEAT
+
+       DROP            ( at this point, the stack is: start-of-word end-of-word )
+       SWAP            ( end-of-word start-of-word )
+
+       ( begin the definition with : NAME [IMMEDIATE] )
+       ':' EMIT SPACE DUP ID. SPACE
+       DUP ?IMMEDIATE IF ." IMMEDIATE " THEN
+
+       >DFA            ( get the data address, ie. points after DOCOL | end-of-word start-of-data )
+
+       ( now we start decompiling until we hit the end of the word )
+       BEGIN           ( end start )
+               2DUP >
+       WHILE
+               DUP @           ( end start codeword )
+
+               CASE
+               ' LIT OF                ( is it LIT ? )
+                       4 + DUP @               ( get next word which is the integer constant )
+                       .                       ( and print it )
+               ENDOF
+               ' LITSTRING OF          ( is it LITSTRING ? )
+                       [ CHAR S ] LITERAL EMIT '"' EMIT SPACE ( print S"<space> )
+                       4 + DUP @               ( get the length word )
+                       SWAP 4 + SWAP           ( end start+4 length )
+                       2DUP TELL               ( print the string )
+                       '"' EMIT SPACE          ( finish the string with a final quote )
+                       + ALIGNED               ( end start+4+len, aligned )
+                       4 -                     ( because we're about to add 4 below )
+               ENDOF
+               ' 0BRANCH OF            ( is it 0BRANCH ? )
+                       ." 0BRANCH ( "
+                       4 + DUP @               ( print the offset )
+                       .
+                       ')' EMIT SPACE
+               ENDOF
+               ' BRANCH OF             ( is it BRANCH ? )
+                       ." BRANCH ( "
+                       4 + DUP @               ( print the offset )
+                       .
+                       ')' EMIT SPACE
+               ENDOF
+               ' ' OF                  ( is it ' (TICK) ? )
+                       [ CHAR ' ] LITERAL EMIT SPACE
+                       4 + DUP @               ( get the next codeword )
+                       CFA>                    ( and force it to be printed as a dictionary entry )
+                       ID. SPACE
+               ENDOF
+               ' EXIT OF               ( is it EXIT? )
+                       ( We expect the last word to be EXIT, and if it is then we don't print it
+                         because EXIT is normally implied by ;.  EXIT can also appear in the middle
+                         of words, and then it needs to be printed. )
+                       2DUP                    ( end start end start )
+                       4 +                     ( end start end start+4 )
+                       <> IF                   ( end start | we're not at the end )
+                               ." EXIT "
+                       THEN
+               ENDOF
+                                       ( default case: )
+                       DUP                     ( in the default case we always need to DUP before using )
+                       CFA>                    ( look up the codeword to get the dictionary entry )
+                       ID. SPACE               ( and print it )
+               ENDCASE
+
+               4 +             ( end start+4 )
+       REPEAT
+
+       ';' EMIT CR
+
+       2DROP           ( restore stack )
+;
+
+(
+       DOES> ----------------------------------------------------------------------
+
+       CREATE ... DOES> is a tricky construct allowing you to create words which create other words.
+       For example CONSTANT (defined above) is a word which creates words, and it could have been
+       written as follows:
+
+               : CONSTANT CREATE DOCOL , , DOES> @ ;
+
+       Even explaining what DOES> is supposed to do is tricky.  It's possible that the implementation
+       is easier to understand than the explanation.
+
+       If we look at the definition of CONSTANT here, and remember that when it is called the value
+       of the constant is on the stack and the name follows.  So first CREATE makes the header of a
+       new word with the name.  Secondly the codeword is set to DOCOL.  Thirdly , (COMMA) takes the
+       value off the stack and adds it to the definition.  At this point (just before executing DOES>)
+       the word looks like this:
+
+         ________ CREATE _______   _ DOCOL ,_   ____ , ___
+        /                       \ /          \ /          \
+       +---------+---+---+---+---+------------+------------+
+       | LINK    | 3 | T | E | N | DOCOL      | 10         |
+       +---------+---+---+---+---+------------+------------+
+            ^      len              codeword
+           |
+         LATEST
+
+       
+)
+
+
+
+
+: DOES>
+       R> LATEST @ >DFA !
+;
+
+(
+       C STRINGS ----------------------------------------------------------------------
+
+       FORTH strings are represented by a start address and length kept on the stack or in memory.
+
+       Most FORTHs don't handle C strings, but we need them in order to access the process arguments
+       and environment left on the stack by the Linux kernel.
+
+       The main function we need is STRLEN which works out the length of a C string.  DUP STRLEN is
+       a common idiom which 'converts' a C string into a FORTH string.  (For example, DUP STRLEN TELL
+       prints a C string).
+)
+
+( STRLEN returns the length of a C string )
+: STRLEN       ( str -- len )
+       DUP             ( save start address )
+       BEGIN
+               DUP C@ 0<>      ( zero byte found? )
+       WHILE
+               1+
+       REPEAT
+
+       SWAP -          ( calculate the length )
+;
+
+(
+       STRNCMP compares two strings up to a length.  As with C's strncmp it returns 0 if they
+       are equal, or a number > 0 or < 0 indicating their order.
+)
+: STRNCMP      ( str1 str2 len -- eq? )
+       BEGIN
+               ?DUP
+       WHILE
+               ROT             ( len str1 str2 )
+               DUP C@          ( len str1 str2 char2 )
+               2 PICK C@       ( len str1 str2 char2 char1 )
+               OVER            ( len str1 str2 char2 char1 char2 )
+               -               ( len str1 str2 char2 char1-char2 )
+
+               ?DUP IF         ( strings not the same at this position? )
+                       NIP             ( len str1 str2 diff )
+                       ROT             ( len diff str1 str2 )
+                       DROP DROP       ( len diff )
+                       NIP             ( diff )
+                       EXIT
+               THEN
+
+               0= IF           ( characters are equal, but is this the end of the C string? )
+                       DROP DROP DROP
+                       0
+                       EXIT
+               THEN
+
+               1+              ( len str1 str2+1 )
+               ROT             ( str2+1 len str1 )
+               1+ ROT          ( str1+1 str2+1 len )
+               1-              ( str1+1 str2+1 len-1 )
+       REPEAT
+
+       2DROP           ( restore stack )
+       0               ( equal )
+;
+
+(
+       THE ENVIRONMENT ----------------------------------------------------------------------
+
+       Linux makes the process arguments and environment available to us on the stack.
+
+       The top of stack pointer is saved by the early assembler code when we start up in the FORTH
+       variable S0, and starting at this pointer we can read out the command line arguments and the
+       environment.
+
+       Starting at S0, S0 itself points to argc (the number of command line arguments).
+
+       S0+4 points to argv[0], S0+8 points to argv[1] etc up to argv[argc-1].
+
+       argv[argc] is a NULL pointer.
+
+       After that the stack contains environment variables, a set of pointers to strings of the
+       form NAME=VALUE and on until we get to another NULL pointer.
+
+       The first word that we define, ARGC, pushes the number of command line arguments (note that
+       as with C argc, this includes the name of the command).
+)
+: ARGC
+       S0 @ @
+;
+
+(
+       n ARGV gets the nth command line argument.
+
+       For example to print the command name you would do:
+               0 ARGV TELL CR
+)
+: ARGV ( n -- str u )
+       1+ CELLS S0 @ + ( get the address of argv[n] entry )
+       @               ( get the address of the string )
+       DUP STRLEN      ( and get its length / turn it into a FORTH string )
+;
+
+(
+       ENVIRON returns the address of the first environment string.  The list of strings ends
+       with a NULL pointer.
+
+       For example to print the first string in the environment you could do:
+               ENVIRON @ DUP STRLEN TELL
+)
+: ENVIRON      ( -- addr )
+       ARGC            ( number of command line parameters on the stack to skip )
+       2 +             ( skip command line count and NULL pointer after the command line args )
+       CELLS           ( convert to an offset )
+       S0 @ +          ( add to base stack address )
+;
+
+(
+       ANS FORTH ----------------------------------------------------------------------
+
+       From this point we're trying to fill in the missing parts of the ISO standard, commonly
+       referred to as ANS FORTH.
+
+       http://www.taygeta.com/forth/dpans.html
+       http://www.taygeta.com/forth/dpansf.htm (list of words)
+)
+( BL pushes the ASCII character code of space on the stack. )
+: BL 32 ;
+
+( C, writes a byte at the HERE pointer. )
+: C, HERE @ C! 1 HERE +! ;
+
+
+
  ( Finally print the welcome prompt. )
  ." JONESFORTH VERSION " VERSION . CR
  ." OK "
  ( Finally print the welcome prompt. )
  ." JONESFORTH VERSION " VERSION . CR
  ." OK "