1 /* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
2 * By Richard W.M. Jones <rich@annexia.org>
4 * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
7 #include <asm-i386/unistd.h>
9 /* NOTES-------------------------------------------------------------------------------------------------------------------
11 Need to say something about $ before constants.
13 And about je/jne/ja/jb/jbe/etc
26 /* Macros to deal with the return stack. */
28 lea -4(%ebp),%ebp // push reg on to return stack
33 mov (%ebp),\reg // pop top of return stack to reg
37 /* ELF entry point. */
42 mov $return_stack,%ebp // Initialise the return stack.
44 mov $cold_start,%esi // Initialise interpreter.
45 NEXT // Run interpreter!
48 cold_start: // High-level code without a codeword.
51 /* DOCOL - the interpreter! */
55 PUSHRSP %esi // push %esi on to the return stack
56 addl $4,%eax // %eax points to codeword, so make
57 movl %eax,%esi // %esi point to first data word
60 /*----------------------------------------------------------------------
61 * Fixed sized buffers for everything.
65 /* FORTH return stack. */
66 #define RETURN_STACK_SIZE 8192
68 .space RETURN_STACK_SIZE
71 /* Space for user-defined words. */
72 #define USER_DEFS_SIZE 16384
82 /*----------------------------------------------------------------------
83 * Built-in words defined the long way.
88 // Store the chain of links.
91 .macro defcode name, namelen, flags=0, label
98 .byte \flags+\namelen // flags + length byte
99 .ascii "\name" // the name
103 .int code_\label // codeword
107 code_\label : // assembler code follows
110 .macro defword name, namelen, flags=0, label
116 .set link,name_\label
117 .byte \flags+\namelen // flags + length byte
118 .ascii "\name" // the name
122 .int DOCOL // codeword - the interpreter
123 // list of word pointers follow
126 .macro defvar name, namelen, flags=0, label, initial=0
127 defcode \name,\namelen,\flags,\label
136 // Some easy ones, written in assembly for speed
137 defcode "DROP",4,,DROP
138 pop %eax // drop top of stack
142 pop %eax // duplicate top of stack
147 defcode "SWAP",4,,SWAP
148 pop %eax // swap top of stack
154 defcode "OVER",4,,OVER
155 mov 4(%esp),%eax // get the second element of stack
156 push %eax // and push it on top
160 incl (%esp) // increment top of stack
164 decl (%esp) // decrement top of stack
181 push %eax // ignore overflow
189 push %eax // push quotient
197 push %edx // push remainder
200 // COLD must not return (ie. must not call EXIT).
201 defword "COLD",4,,COLD
202 // XXX reinitialisation of the interpreter
203 .int INTERPRETER // call the interpreter loop (never returns)
204 .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1).
206 defcode "EXIT",4,,EXIT
207 POPRSP %esi // pop return stack into %esi
211 // %esi points to the next command, but in this case it points to the next
212 // literal 32 bit integer. Get that literal into %eax and increment %esi.
213 // On x86, it's a convenient single byte instruction! (cf. NEXT macro)
215 push %eax // push the literal number on to stack
219 defcode "0SKIP",5,,ZSKIP
220 // If the top of stack is zero, skip the next instruction.
224 lodsl // this does the skip
229 pop %ebx // address to store at
230 pop %eax // data to store there
231 mov %eax,(%ebx) // store it
235 pop %ebx // address to fetch
236 mov (%ebx),%eax // fetch it
237 push %eax // push value onto stack
240 // The STATE variable is 0 for execute mode, != 0 for compile mode
241 defvar "STATE",5,,STATE
243 // This points to where compiled words go.
244 defvar "HERE",4,,HERE,user_defs_start
246 // This is the last definition in the dictionary.
247 defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
249 // _X, _Y and _Z are scratch variables used by standard words.
255 pop %eax // pop parameter stack into %eax
256 PUSHRSP %eax // push it on to the return stack
259 defcode "R>",2,,FROMR
260 POPRSP %eax // pop return stack on to %eax
261 push %eax // and push on to parameter stack
264 defcode "RSP@",4,,RSPFETCH
268 defcode "RSP!",4,,RSPSTORE
272 defcode "RDROP",5,,RDROP
273 lea 4(%ebp),%ebp // pop return stack and throw away
278 push %eax // push return value on stack
290 1: // out of input; use read(2) to fetch more input from stdin
291 xor %ebx,%ebx // 1st param: stdin
292 mov $buffer,%ecx // 2nd param: buffer
294 mov $buffend-buffer,%edx // 3rd param: max length
295 mov $__NR_read,%eax // syscall: read
297 test %eax,%eax // If %eax <= 0, then exit.
299 addl %eax,%ecx // buffer+%eax = bufftop
303 2: // error or out of input: exit
305 mov $__NR_exit,%eax // syscall: exit
308 defcode "EMIT",4,,EMIT
313 mov $1,%ebx // 1st param: stdout
315 // write needs the address of the byte to write
317 mov $2f,%ecx // 2nd param: address
319 mov $1,%edx // 3rd param: nbytes = 1
321 mov $__NR_write,%eax // write syscall
326 2: .space 1 // scratch used by EMIT
328 defcode "WORD",4,,WORD
330 push %ecx // push length
331 push %edi // push base address
335 /* Search for first non-blank character. Also skip \ comments. */
337 call _KEY // get next key, returned in %eax
338 cmpb $'\\',%al // start of a comment?
339 je 3f // if so, skip the comment
341 jbe 1b // if so, keep looking
343 /* Search for the end of the word, storing chars as we go. */
344 mov $5f,%edi // pointer to return buffer
346 stosb // add character to return buffer
347 call _KEY // get next key, returned in %al
348 cmpb $' ',%al // is blank?
349 ja 2b // if not, keep looping
351 /* Return the word (well, the static buffer) and length. */
353 mov %edi,%ecx // return length of the word
354 mov $5f,%edi // return address of the word
357 /* Code to skip \ comments to end of the current line. */
360 cmpb $'\n',%al // end of line yet?
365 // A static buffer where WORD returns. Subsequent calls
366 // overwrite this buffer. Maximum word length is 32 chars.
369 defcode "EMITWORD",8,,EMITWORD
370 mov $1,%ebx // 1st param: stdout
371 pop %ecx // 2nd param: address of string
372 pop %edx // 3rd param: length of string
374 mov $__NR_write,%eax // write syscall
380 pop %eax // Get the number to print into %eax
381 call _DOT // Easier to do this recursively ...
384 mov $10,%ecx // Base 10
388 xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx
402 // Parse a number from a string on the stack -- almost the opposite of . (DOT)
403 // Note that there is absolutely no error checking. In particular the length of the
404 // string must be >= 1 bytes.
405 defcode "SNUMBER",7,,SNUMBER
415 imull $10,%eax // %eax *= 10
418 subb $'0',%bl // ASCII -> digit
424 defcode "FIND",4,,FIND
425 pop %edi // %edi = address
426 pop %ecx // %ecx = length
432 push %esi // Save %esi so we can use it in string comparison.
434 // Now we start searching backwards through the dictionary for this word.
435 mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary
437 test %edx,%edx // NULL pointer? (end of the linked list)
440 // Compare the length expected and the length of the word.
441 // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
442 // this won't pick the word (the length will appear to be wrong).
444 movb 4(%edx),%al // %al = flags+length field
445 andb $(F_HIDDEN|0x1f),%al // %al = name length
446 cmpb %cl,%al // Length is the same?
449 // Compare the strings in detail.
450 push %ecx // Save the length
451 push %edi // Save the address (repe cmpsb will move this pointer)
452 lea 5(%edx),%esi // Dictionary string we are checking against.
453 repe cmpsb // Compare the strings.
456 jne 2f // Not the same.
458 // The strings are the same - return the header pointer in %eax
464 mov (%edx),%edx // Move back through the link field to the previous word
465 jmp 1b // .. and loop.
469 xor %eax,%eax // Return zero to indicate not found.
472 defcode ">CFA",4,,TCFA // DEA -> Codeword address
479 add $4,%edi // Skip link pointer.
480 movb (%edi),%al // Load flags+len into %al.
481 inc %edi // Skip flags+len byte.
482 andb $0x1f,%al // Just the length, not the flags.
483 add %eax,%edi // Skip the name.
484 addl $3,%edi // The codeword is 4-byte aligned.
490 .int WORD // Get the following word.
491 .int FIND // Look it up in the dictionary.
492 .int DUP // If not found, skip >CFA (TCFA) instruction.
494 .int TCFA // Convert to a codeword pointer.
500 // Get the word and create a dictionary entry header for it.
501 call _WORD // Returns %ecx = length, %edi = pointer to word.
502 mov %edi,%ebx // %ebx = address of the word
504 movl var_HERE,%edi // %edi is the address of the header
505 movl var_LATEST,%eax // Get link pointer
506 stosl // and store it in the header.
508 mov %cl,%al // Get the length.
509 orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry.
510 stosb // Store the length/flags byte.
512 mov %ebx,%esi // %esi = word
513 rep movsb // Copy the word
515 addl $3,%edi // Align to next 4 byte boundary.
518 movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter)
521 // Header built, so now update LATEST and HERE.
522 // We'll be compiling words and putting them HERE.
527 // And go into compile mode by setting STATE to 1.
534 pop %eax // Code pointer to store.
538 movl var_HERE,%edi // HERE
540 movl %edi,var_HERE // Update HERE (incremented)
543 defcode "HIDDEN",6,,HIDDEN
547 movl var_LATEST,%edi // LATEST word.
548 addl $4,%edi // Point to name/flags byte.
549 xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit.
552 defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
556 movl var_LATEST,%edi // LATEST word.
557 addl $4,%edi // Point to name/flags byte.
558 xorb $F_IMMED,(%edi) // Toggle the IMMED bit.
561 defcode ";",1,F_IMMED,SEMICOLON
562 movl $EXIT,%eax // EXIT is the final codeword in compiled words.
563 call _COMMA // Store it.
564 call _HIDDEN // Toggle the HIDDEN flag (unhides the new word).
565 xor %eax,%eax // Set STATE to 0 (back to execute mode).
569 /* This interpreter is pretty simple, but remember that in FORTH you can always override
570 * it later with a more powerful one!
572 defword "INTERPRETER",11,,INTERPRETER
573 .int INTERPRET,RDROP,INTERPRETER
575 defcode "INTERPRET",9,,INTERPRET
576 call _WORD // Returns %ecx = length, %edi = pointer to word.
578 // Is it in the dictionary?
579 call _FIND // Returns %eax = pointer to header or 0 if not found.
580 test %eax,%eax // Found?
583 // In the dictionary. Is it an IMMEDIATE codeword?
584 mov %eax,%edi // %edi = dictionary entry
585 movb 4(%edi),%al // Get name+flags.
586 push %ax // Just save it for now.
587 call _TCFA // Convert dictionary entry (in %edi) to codeword pointer.
589 andb $F_IMMED,%al // Is IMMED flag set?
591 jnz 3f // If IMMED, jump straight to executing.
595 1: // Not in the dictionary (not a word) so assume it's a number.
596 call _SNUMBER // Returns the parsed number in %eax
598 mov $LIT,%eax // The word is LIT
600 2: // Are we compiling or executing?
603 jz 3f // Jump if executing.
605 // Compiling - just append the word to the current dictionary definition.
607 cmp $LIT,%eax // Was it LIT?
609 mov %ebx,%eax // Yes, so LIT is followed by a number.
613 3: // Executing - run it!
614 cmp $LIT,%eax // Literal?
616 // Not a literal, execute it now. This never returns, but the codeword will
617 // eventually call NEXT which will reenter the loop in INTERPRETER.
620 4: // Executing a literal, which means push it on the stack.
624 // NB: SYSEXIT must be the last entry in the built-in dictionary.
625 defcode SYSEXIT,7,,SYSEXIT
630 /*----------------------------------------------------------------------
631 * Input buffer & initial input.
636 // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
638 \\ Define some character constants
643 \\ CR prints a carriage return
646 \\ SPACE prints a space
647 : SPACE 'SPACE' EMIT ;
649 \\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
650 \\ Notice how we can trivially redefine existing functions.
655 \\ DUP, DROP are defined in assembly for speed, but this is how you might define them
656 \\ in FORTH. Notice use of the scratch variables _X and _Y.
657 \\ : DUP _X ! _X @ _X @ ;
662 \\ Finally print the welcome prompt.
663 79 EMIT 75 EMIT 'SPACE' EMIT