1 /* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
2 * By Richard W.M. Jones <rich@annexia.org>
4 * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
7 #include <asm-i386/unistd.h>
9 /* NOTES-------------------------------------------------------------------------------------------------------------------
11 Need to say something about $ before constants.
13 And about je/jne/ja/jb/jbe/etc
26 /* Macros to deal with the return stack. */
28 lea -4(%ebp),%ebp // push reg on to return stack
33 mov (%ebp),\reg // pop top of return stack to reg
37 /* ELF entry point. */
42 mov $return_stack,%ebp // Initialise the return stack.
44 mov $cold_start,%esi // Initialise interpreter.
45 NEXT // Run interpreter!
48 cold_start: // High-level code without a codeword.
51 /* DOCOL - the interpreter! */
55 PUSHRSP %esi // push %esi on to the return stack
56 addl $4,%eax // %eax points to codeword, so make
57 movl %eax,%esi // %esi point to first data word
60 /*----------------------------------------------------------------------
61 * Fixed sized buffers for everything.
65 /* FORTH return stack. */
66 #define RETURN_STACK_SIZE 8192
68 .space RETURN_STACK_SIZE
71 /* Space for user-defined words. */
72 #define USER_DEFS_SIZE 16384
82 /*----------------------------------------------------------------------
83 * Built-in words defined the long way.
88 // Store the chain of links.
91 .macro defcode name, namelen, flags=0, label
98 .byte \flags+\namelen // flags + length byte
99 .ascii "\name" // the name
103 .int code_\label // codeword
107 code_\label : // assembler code follows
110 .macro defword name, namelen, flags=0, label
116 .set link,name_\label
117 .byte \flags+\namelen // flags + length byte
118 .ascii "\name" // the name
122 .int DOCOL // codeword - the interpreter
123 // list of word pointers follow
126 .macro defvar name, namelen, flags=0, label, initial=0
127 defcode \name,\namelen,\flags,\label
136 // Some easy ones, written in assembly for speed
137 defcode "DROP",4,,DROP
138 pop %eax // drop top of stack
142 pop %eax // duplicate top of stack
147 defcode "SWAP",4,,SWAP
148 pop %eax // swap top of stack
154 defcode "OVER",4,,OVER
155 mov 4(%esp),%eax // get the second element of stack
156 push %eax // and push it on top
159 // COLD must not return (ie. must not call EXIT).
160 defword "COLD",4,,COLD
161 // XXX reinitialisation of the interpreter
162 .int INTERPRETER // call the interpreter loop (never returns)
163 .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1).
165 defcode "EXIT",4,,EXIT
166 POPRSP %esi // pop return stack into %esi
170 // %esi points to the next command, but in this case it points to the next
171 // literal 32 bit integer. Get that literal into %eax and increment %esi.
172 // On x86, it's a convenient single byte instruction! (cf. NEXT macro)
174 push %eax // push the literal number on to stack
178 defcode "0SKIP",5,,ZSKIP
179 // If the top of stack is zero, skip the next instruction.
183 lodsl // this does the skip
188 pop %ebx // address to store at
189 pop %eax // data to store there
190 mov %eax,(%ebx) // store it
194 pop %ebx // address to fetch
195 mov (%ebx),%eax // fetch it
196 push %eax // push value onto stack
199 // The STATE variable is 0 for execute mode, != 0 for compile mode
200 defvar "STATE",5,,STATE
202 // This points to where compiled words go.
203 defvar "HERE",4,,HERE,user_defs_start
205 // This is the last definition in the dictionary.
206 defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
208 // _X, _Y and _Z are scratch variables used by standard words.
214 pop %eax // pop parameter stack into %eax
215 PUSHRSP %eax // push it on to the return stack
218 defcode "R>",2,,FROMR
219 POPRSP %eax // pop return stack on to %eax
220 push %eax // and push on to parameter stack
223 defcode "RSP@",4,,RSPFETCH
227 defcode "RSP!",4,,RSPSTORE
231 defcode "RDROP",5,,RDROP
232 lea 4(%ebp),%ebp // pop return stack and throw away
237 push %eax // push return value on stack
249 1: // out of input; use read(2) to fetch more input from stdin
250 xor %ebx,%ebx // 1st param: stdin
251 mov $buffer,%ecx // 2nd param: buffer
253 mov $buffend-buffer,%edx // 3rd param: max length
254 mov $__NR_read,%eax // syscall: read
256 test %eax,%eax // If %eax <= 0, then exit.
258 addl %eax,%ecx // buffer+%eax = bufftop
262 2: // error or out of input: exit
264 mov $__NR_exit,%eax // syscall: exit
267 defcode "EMIT",4,,EMIT
272 mov $1,%ebx // 1st param: stdout
274 // write needs the address of the byte to write
276 mov $2f,%ecx // 2nd param: address
278 mov $1,%edx // 3rd param: nbytes = 1
280 mov $__NR_write,%eax // write syscall
285 2: .space 1 // scratch used by EMIT
287 defcode "WORD",4,,WORD
289 push %ecx // push length
290 push %edi // push base address
294 /* Search for first non-blank character. Also skip \ comments. */
296 call _KEY // get next key, returned in %eax
297 cmpb $'\\',%al // start of a comment?
298 je 3f // if so, skip the comment
300 jbe 1b // if so, keep looking
302 /* Search for the end of the word, storing chars as we go. */
303 mov $5f,%edi // pointer to return buffer
305 stosb // add character to return buffer
306 call _KEY // get next key, returned in %al
307 cmpb $' ',%al // is blank?
308 ja 2b // if not, keep looping
310 /* Return the word (well, the static buffer) and length. */
312 mov %edi,%ecx // return length of the word
313 mov $5f,%edi // return address of the word
316 /* Code to skip \ comments to end of the current line. */
319 cmpb $'\n',%al // end of line yet?
324 // A static buffer where WORD returns. Subsequent calls
325 // overwrite this buffer. Maximum word length is 32 chars.
328 defcode "EMITWORD",8,,EMITWORD
329 mov $1,%ebx // 1st param: stdout
330 pop %ecx // 2nd param: address of string
331 pop %edx // 3rd param: length of string
333 mov $__NR_write,%eax // write syscall
339 pop %eax // Get the number to print into %eax
340 call _DOT // Easier to do this recursively ...
343 mov $10,%ecx // Base 10
359 // Parse a number from a string on the stack -- almost the opposite of . (DOT)
360 // Note that there is absolutely no error checking. In particular the length of the
361 // string must be >= 1 bytes.
362 defcode "SNUMBER",7,,SNUMBER
372 imull $10,%eax // %eax *= 10
375 subb $'0',%bl // ASCII -> digit
381 defcode "FIND",4,,FIND
382 pop %edi // %edi = address
383 pop %ecx // %ecx = length
389 push %esi // Save %esi so we can use it in string comparison.
391 // Now we start searching backwards through the dictionary for this word.
392 mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary
394 test %edx,%edx // NULL pointer? (end of the linked list)
397 // Compare the length expected and the length of the word.
398 // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
399 // this won't pick the word (the length will appear to be wrong).
401 movb 4(%edx),%al // %al = flags+length field
402 andb $(F_HIDDEN|0x1f),%al // %al = name length
403 cmpb %cl,%al // Length is the same?
406 // Compare the strings in detail.
407 push %ecx // Save the length
408 push %edi // Save the address (repe cmpsb will move this pointer)
409 lea 5(%edx),%esi // Dictionary string we are checking against.
410 repe cmpsb // Compare the strings.
413 jne 2f // Not the same.
415 // The strings are the same - return the header pointer in %eax
421 mov (%edx),%edx // Move back through the link field to the previous word
422 jmp 1b // .. and loop.
426 xor %eax,%eax // Return zero to indicate not found.
429 defcode ">CFA",4,,TCFA // DEA -> Codeword address
436 add $4,%edi // Skip link pointer.
437 movb (%edi),%al // Load flags+len into %al.
438 inc %edi // Skip flags+len byte.
439 andb $0x1f,%al // Just the length, not the flags.
440 add %eax,%edi // Skip the name.
441 addl $3,%edi // The codeword is 4-byte aligned.
447 .int WORD // Get the following word.
448 .int FIND // Look it up in the dictionary.
449 .int DUP // If not found, skip >CFA (TCFA) instruction.
451 .int TCFA // Convert to a codeword pointer.
457 // Get the word and create a dictionary entry header for it.
458 call _WORD // Returns %ecx = length, %edi = pointer to word.
459 mov %edi,%ebx // %ebx = address of the word
461 movl var_HERE,%edi // %edi is the address of the header
462 movl var_LATEST,%eax // Get link pointer
463 stosl // and store it in the header.
465 mov %cl,%al // Get the length.
466 orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry.
467 stosb // Store the length/flags byte.
469 mov %ebx,%esi // %esi = word
470 rep movsb // Copy the word
472 addl $3,%edi // Align to next 4 byte boundary.
475 movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter)
478 // Header built, so now update LATEST and HERE.
479 // We'll be compiling words and putting them HERE.
484 // And go into compile mode by setting STATE to 1.
491 pop %eax // Code pointer to store.
495 movl var_HERE,%edi // HERE
497 movl %edi,var_HERE // Update HERE (incremented)
500 defcode "HIDDEN",6,,HIDDEN
504 movl var_LATEST,%edi // LATEST word.
505 addl $4,%edi // Point to name/flags byte.
506 xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit.
509 defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
513 movl var_LATEST,%edi // LATEST word.
514 addl $4,%edi // Point to name/flags byte.
515 xorb $F_IMMED,(%edi) // Toggle the IMMED bit.
518 defcode ";",1,F_IMMED,SEMICOLON
519 movl $EXIT,%eax // EXIT is the final codeword in compiled words.
520 call _COMMA // Store it.
521 call _HIDDEN // Toggle the HIDDEN flag (unhides the new word).
522 xor %eax,%eax // Set STATE to 0 (back to execute mode).
526 /* This interpreter is pretty simple, but remember that in FORTH you can always override
527 * it later with a more powerful one!
529 defword "INTERPRETER",11,,INTERPRETER
530 .int INTERPRET,RDROP,INTERPRETER
532 defcode "INTERPRET",9,,INTERPRET
533 call _WORD // Returns %ecx = length, %edi = pointer to word.
535 // Is it in the dictionary?
536 call _FIND // Returns %eax = pointer to header or 0 if not found.
537 test %eax,%eax // Found?
540 // In the dictionary. Is it an IMMEDIATE codeword?
541 mov %eax,%edi // %edi = dictionary entry
542 movb 4(%edi),%al // Get name+flags.
543 push %ax // Just save it for now.
544 call _TCFA // Convert dictionary entry (in %edi) to codeword pointer.
546 andb $F_IMMED,%al // Is IMMED flag set?
548 jnz 3f // If IMMED, jump straight to executing.
552 1: // Not in the dictionary (not a word) so assume it's a number.
553 call _SNUMBER // Returns the parsed number in %eax
555 mov $LIT,%eax // The word is LIT
557 2: // Are we compiling or executing?
560 jz 3f // Jump if executing.
562 // Compiling - just append the word to the current dictionary definition.
564 cmp $LIT,%eax // Was it LIT?
566 mov %ebx,%eax // Yes, so LIT is followed by a number.
570 3: // Executing - run it!
571 cmp $LIT,%eax // Literal?
573 // Not a literal, execute it now. This never returns, but the codeword will
574 // eventually call NEXT which will reenter the loop in INTERPRETER.
577 4: // Executing a literal, which means push it on the stack.
581 // NB: SYSEXIT must be the last entry in the built-in dictionary.
582 defcode SYSEXIT,7,,SYSEXIT
587 /*----------------------------------------------------------------------
588 * Input buffer & initial input.
593 // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
595 \\ Define some character constants
600 \\ CR prints a carriage return
603 \\ SPACE prints a space
604 : SPACE 'SPACE' EMIT ;
606 \\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
607 \\ Notice how we can trivially redefine existing functions.
612 \\ DUP, DROP are defined in assembly for speed, but this is how you might define them
613 \\ in FORTH. Notice use of the scratch variables _X and _Y.
614 \\ : DUP _X ! _X @ _X @ ;
619 \\ Finally print the welcome prompt.
620 79 EMIT 75 EMIT 'SPACE' EMIT