1 /* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
2 * By Richard W.M. Jones <rich@annexia.org>
4 * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
7 #include <asm-i386/unistd.h>
9 /* NOTES-------------------------------------------------------------------------------------------------------------------
11 Need to say something about $ before constants.
13 And about je/jne/ja/jb/jbe/etc
26 /* Macros to deal with the return stack. */
28 lea -4(%ebp),%ebp // push reg on to return stack
33 mov (%ebp),\reg // pop top of return stack to reg
37 /* ELF entry point. */
42 mov $return_stack,%ebp // Initialise the return stack.
44 mov $cold_start,%esi // Initialise interpreter.
45 NEXT // Run interpreter!
48 cold_start: // High-level code without a codeword.
51 /* DOCOL - the interpreter! */
55 PUSHRSP %esi // push %esi on to the return stack
56 addl $4,%eax // %eax points to codeword, so make
57 movl %eax,%esi // %esi point to first data word
60 /*----------------------------------------------------------------------
61 * Fixed sized buffers for everything.
65 /* FORTH return stack. */
66 #define RETURN_STACK_SIZE 8192
68 .space RETURN_STACK_SIZE
71 /* Space for user-defined words. */
72 #define USER_DEFS_SIZE 16384
82 /*----------------------------------------------------------------------
83 * Built-in words defined the long way.
88 // Store the chain of links.
91 .macro defcode name, namelen, flags=0, label
98 .byte \flags+\namelen // flags + length byte
99 .ascii "\name" // the name
103 .int code_\label // codeword
107 code_\label : // assembler code follows
110 .macro defword name, namelen, flags=0, label
116 .set link,name_\label
117 .byte \flags+\namelen // flags + length byte
118 .ascii "\name" // the name
122 .int DOCOL // codeword - the interpreter
123 // list of word pointers follow
126 .macro defvar name, namelen, flags=0, label, initial=0
127 defcode \name,\namelen,\flags,\label
136 /* Some easy ones .... */
138 pop %eax // duplicate top of stack
143 defcode "DROP",4,,DROP
144 pop %eax // drop top of stack
147 defcode "SWAP",4,,SWAP
148 pop %eax // swap top of stack
154 defcode "OVER",4,,OVER
155 mov 4(%esp),%eax // get the second element of stack
156 push %eax // and push it on top
159 // COLD must not return (ie. must not call EXIT).
160 defword "COLD",4,,COLD
161 // XXX reinitialisation of the interpreter
162 .int INTERPRETER // call the interpreter loop (never returns)
163 .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1).
165 defcode "EXIT",4,,EXIT
166 POPRSP %esi // pop return stack into %esi
170 // %esi points to the next command, but in this case it points to the next
171 // literal 32 bit integer. Get that literal into %eax and increment %esi.
172 // On x86, it's a convenient single byte instruction! (cf. NEXT macro)
174 push %eax // push %eax on to stack
177 defcode "0SKIP",5,,ZSKIP
178 // If the top of stack is zero, skip the next instruction.
182 lodsl // this does the skip
186 pop %ebx // address to store at
187 pop %eax // data to store there
188 mov %eax,(%ebx) // store it
192 pop %ebx // address to fetch
193 mov (%ebx),%eax // fetch it
194 push %eax // push value onto stack
197 // The STATE variable is 0 for execute mode, != 0 for compile mode
198 defvar "STATE",5,,STATE
200 // This points to where compiled words go.
201 defvar "HERE",4,,HERE,user_defs_start
203 // This is the last definition in the dictionary.
204 defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
207 pop %eax // pop parameter stack into %eax
208 PUSHRSP %eax // push it on to the return stack
211 defcode "R>",2,,FROMR
212 POPRSP %eax // pop return stack on to %eax
213 push %eax // and push on to parameter stack
216 #if 0 /* This definition is wrong. */
218 mov %(ebp),%eax // copy (don't pop) top of return stack to %eax
219 push %eax // and push on to parameter stack
223 defcode "RSP@",4,,RSPFETCH
227 defcode "RSP!",4,,RSPSTORE
231 defcode "RDROP",5,,RDROP
232 lea 4(%ebp),%ebp // pop return stack and throw away
237 push %eax // push return value on stack
249 mov $0,%ebx // out of input, exit (0)
253 defcode "EMIT",4,,EMIT
258 mov $1,%ebx // 1st param: stdout
260 // write needs the address of the byte to write
262 mov $2f,%ecx // 2nd param: address
264 mov $1,%edx // 3rd param: nbytes = 1
266 mov $__NR_write,%eax // write syscall
271 2: .space 1 // scratch used by EMIT
273 defcode "WORD",4,,WORD
275 push %ecx // push length
276 push %edi // push base address
280 /* Search for first non-blank character. Also skip \ comments. */
282 call _KEY // get next key, returned in %eax
283 cmpb $'\\',%al // start of a comment?
284 je 3f // if so, skip the comment
286 jbe 1b // if so, keep looking
288 /* Search for the end of the word, storing chars as we go. */
289 mov $5f,%edi // pointer to return buffer
291 stosb // add character to return buffer
292 call _KEY // get next key, returned in %al
293 cmpb $' ',%al // is blank?
294 ja 2b // if not, keep looping
296 /* Return the word (well, the static buffer) and length. */
298 mov %edi,%ecx // return length of the word
299 mov $5f,%edi // return address of the word
302 /* Code to skip \ comments to end of the current line. */
305 cmpb $'\n',%al // end of line yet?
310 // A static buffer where WORD returns. Subsequent calls
311 // overwrite this buffer. Maximum word length is 32 chars.
314 defcode "EMITWORD",8,,EMITWORD
315 mov $1,%ebx // 1st param: stdout
316 pop %ecx // 2nd param: address of string
317 pop %edx // 3rd param: length of string
319 mov $__NR_write,%eax // write syscall
325 pop %eax // Get the number to print into %eax
326 call _DOT // Easier to do this recursively ...
329 mov $10,%ecx // Base 10
345 // Parse a number from a string on the stack -- almost the opposite of . (DOT)
346 // Note that there is absolutely no error checking. In particular the length of the
347 // string must be >= 1 bytes.
348 defcode "SNUMBER",7,,SNUMBER
358 imull $10,%eax // %eax *= 10
361 subb $'0',%bl // ASCII -> digit
367 defcode "FIND",4,,FIND
368 pop %edi // %edi = address
369 pop %ecx // %ecx = length
375 push %esi // Save %esi so we can use it in string comparison.
377 // Now we start searching backwards through the dictionary for this word.
378 mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary
380 test %edx,%edx // NULL pointer? (end of the linked list)
383 // Compare the length expected and the length of the word.
384 // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
385 // this won't pick the word (the length will appear to be wrong).
387 movb 4(%edx),%al // %al = flags+length field
388 andb $(F_HIDDEN|0x1f),%al // %al = name length
389 cmpb %cl,%al // Length is the same?
392 // Compare the strings in detail.
393 push %ecx // Save the length
394 push %edi // Save the address (repe cmpsb will move this pointer)
395 lea 5(%edx),%esi // Dictionary string we are checking against.
396 repe cmpsb // Compare the strings.
399 jne 2f // Not the same.
401 // The strings are the same - return the header pointer in %eax
407 mov (%edx),%edx // Move back through the link field to the previous word
408 jmp 1b // .. and loop.
412 xor %eax,%eax // Return zero to indicate not found.
415 defcode ">CFA",4,,TCFA // DEA -> Codeword address
422 add $4,%edi // Skip link pointer.
423 movb (%edi),%al // Load flags+len into %al.
424 inc %edi // Skip flags+len byte.
425 andb $0x1f,%al // Just the length, not the flags.
426 add %eax,%edi // Skip the name.
427 orl $3,%edi // The codeword is 4-byte aligned.
432 .int WORD // Get the following word.
433 .int FIND // Look it up in the dictionary.
434 .int DUP // If not found, skip >CFA (TCFA) instruction.
436 .int TCFA // Convert to a codeword pointer.
441 // Get the word and create a dictionary entry header for it.
442 call _WORD // Returns %ecx = length, %edi = pointer to word.
443 mov %edi,%ebx // %ebx = address of the word
445 movl var_HERE,%edi // %edi is the address of the header
446 movl var_LATEST,%eax // Get link pointer
447 stosl // and store it in the header.
449 mov %cl,%al // Get the length.
450 orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry.
451 stosb // Store the length/flags byte.
453 mov %ebx,%esi // %esi = word
454 rep movsb // Copy the word
456 orl $3,%edi // Align to next 4 byte boundary.
459 movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter)
462 // Header built, so now update LATEST and HERE.
463 // We'll be compiling words and putting them HERE.
468 // And go into compile mode by setting STATE to 1.
475 pop %eax // Code pointer to store.
479 movl var_HERE,%edi // HERE
481 movl %edi,var_HERE // Update HERE (incremented)
484 defcode "HIDDEN",6,,HIDDEN
488 movl var_LATEST,%edi // LATEST word.
489 addl $4,%edi // Point to name/flags byte.
490 xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit.
493 defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
497 movl var_LATEST,%edi // LATEST word.
498 addl $4,%edi // Point to name/flags byte.
499 xorb $F_IMMED,(%edi) // Toggle the IMMED bit.
502 defcode ";",1,F_IMMED,SEMICOLON
503 movl $EXIT,%eax // EXIT is the final codeword in compiled words.
504 call _COMMA // Store it.
505 call _HIDDEN // Toggle the HIDDEN flag (unhides the new word).
506 xor %eax,%eax // Set STATE to 0 (back to execute mode).
510 /* This interpreter is pretty simple, but remember that in FORTH you can always override
511 * it later with a more powerful one!
513 defword "INTERPRETER",11,,INTERPRETER
514 .int INTERPRET,RDROP,INTERPRETER
516 defcode "INTERPRET",9,,INTERPRET
517 call _WORD // Returns %ecx = length, %edi = pointer to word.
519 // Is it in the dictionary?
520 call _FIND // Returns %eax = pointer to header or 0 if not found.
521 test %eax,%eax // Found?
524 // In the dictionary. Is it an IMMEDIATE codeword?
525 mov %eax,%edi // %edi = dictionary entry
526 movb 4(%edi),%al // Get name+flags.
527 push %ax // Just save it for now.
528 call _TCFA // Convert dictionary entry (in %edi) to codeword pointer.
530 andb $F_IMMED,%al // Is IMMED flag set?
532 jnz 3f // If IMMED, jump straight to executing.
536 1: // Not in the dictionary (not a word) so assume it's a number.
537 call _SNUMBER // Returns the parsed number in %eax
539 mov $LIT,%eax // The word is LIT
541 2: // Are we compiling or executing?
544 jz 3f // Jump if executing.
546 // Compiling - just append the word to the current dictionary definition.
548 cmp $LIT,%eax // Was it LIT?
550 mov %ebx,%eax // Yes, so LIT is followed by a number.
554 3: // Executing - run it!
555 cmp $LIT,%eax // Literal?
557 // Not a literal, execute it now. This never returns, but the codeword will
558 // eventually call NEXT which will reenter the loop in INTERPRETER.
561 4: // Executing a literal, which means push it on the stack.
565 // NB: SYSEXIT must be the last entry in the built-in dictionary.
566 defcode SYSEXIT,7,,SYSEXIT
571 /*----------------------------------------------------------------------
572 * Input buffer & initial input.
586 : CR '\\n' EMIT ; \n\
588 CR CR '0' EMIT CR CR \n\