/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- * By Richard W.M. Jones * * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S */ #include /* NOTES---------------------------------------------------------------------- Need to say something about $ before constants. And about je/jne/ja/jb/jbe/etc */ /* NEXT macro. */ .macro NEXT lodsl jmp *(%eax) .endm /* Macros to deal with the return stack. */ .macro PUSHRSP reg lea -4(%ebp),%ebp // push reg on to return stack movl \reg,(%ebp) .endm .macro POPRSP reg mov (%ebp),\reg // pop top of return stack to reg lea 4(%ebp),%ebp .endm /* ELF entry point. */ .text .globl _start _start: cld mov $return_stack,%ebp // Initialise the return stack. mov $cold_start,%esi // Initialise interpreter. NEXT // Run interpreter! .section .rodata cold_start: // High-level code without a codeword. .int COLD /* DOCOL - the interpreter! */ .text .align 4 DOCOL: PUSHRSP %esi // push %esi on to the return stack addl $4,%eax // %eax points to codeword, so make movl %eax,%esi // %esi point to first data word NEXT /*---------------------------------------------------------------------- * Fixed sized buffers for everything. */ .bss /* FORTH return stack. */ #define RETURN_STACK_SIZE 8192 .align 4096 .space RETURN_STACK_SIZE return_stack: /* Space for user-defined words. */ #define USER_DEFS_SIZE 16384 .align 4096 user_defs_start: .space USER_DEFS_SIZE /*---------------------------------------------------------------------- * Built-in words defined the long way. */ #define F_IMMED 0x80 #define F_HIDDEN 0x40 // Store the chain of links. .set link,0 .macro defcode name, namelen, flags=0, label .section .rodata .align 4 .globl name_\label name_\label : .int link // link .set link,name_\label .byte \flags+\namelen // flags + length byte .ascii "\name" // the name .align 4 .globl \label \label : .int code_\label // codeword .text .align 4 .globl code_\label code_\label : // assembler code follows .endm .macro defword name, namelen, flags=0, label .section .rodata .align 4 .globl name_\label name_\label : .int link // link .set link,name_\label .byte \flags+\namelen // flags + length byte .ascii "\name" // the name .align 4 .globl \label \label : .int DOCOL // codeword - the interpreter // list of word pointers follow .endm /* Some easy ones .... */ defcode "DUP",3,,DUP pop %eax // duplicate top of stack push %eax push %eax NEXT defcode "DROP",4,,DROP pop %eax // drop top of stack NEXT defcode "SWAP",4,,SWAP pop %eax // swap top of stack pop %ebx push %eax push %ebx NEXT defcode "OVER",4,,OVER mov 4(%esp),%eax // get the second element of stack push %eax // and push it on top NEXT /* COLD must not return (ie. must not call EXIT). */ defword "COLD",4,,COLD .int LIT,'<',ECHO .int WORD,OVER,OVER,ECHOWORD,LIT,'=',ECHO,FIND,DOT .int LIT,'>',ECHO .int LIT,10,ECHO .int RDROP,COLD /* This prints out each word in the input as \n defword "COLD",4,,COLD .int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD */ defcode "EXIT",4,,EXIT POPRSP %esi // pop return stack into %esi NEXT defcode "LIT",3,,LIT // %esi points to the next command, but in this case it points to the next // literal 32 bit integer. Get that literal into %eax and increment %esi. // On x86, it's a convenient single byte instruction! (cf. NEXT macro) lodsl push %eax // push %eax on to stack NEXT defcode "!",1,,STORE pop %ebx // address to store at pop %eax // data to store there mov %eax,(%ebx) // store it NEXT defcode "@",1,,FETCH pop %ebx // address to fetch mov (%ebx),%eax // fetch it push %eax // push value onto stack NEXT defcode "STATE",5,,STATE push $v_state NEXT defcode "HERE",4,,HERE push $v_here NEXT defcode "LATEST",6,,LATEST push $v_latest NEXT defcode ">R",2,,TOR pop %eax // pop parameter stack into %eax PUSHRSP %eax // push it on to the return stack NEXT defcode "R>",2,,FROMR POPRSP %eax // pop return stack on to %eax push %eax // and push on to parameter stack NEXT #if 0 /* This definition is wrong. */ defcode "R",1,,R mov %(ebp),%eax // copy (don't pop) top of return stack to %eax push %eax // and push on to parameter stack NEXT #endif defcode "RSP@",4,,RSPFETCH push %ebp NEXT defcode "RSP!",4,,RSPSTORE pop %ebp NEXT defcode "RDROP",5,,RDROP lea 4(%ebp),%ebp // pop return stack and throw away NEXT defcode "KEY",3,,KEY call _KEY push %eax // push return value on stack NEXT _KEY: mov (currkey),%ebx cmp (bufftop),%ebx jge 1f xor %eax,%eax mov (%ebx),%al inc %ebx mov %ebx,(currkey) ret 1: mov $0,%ebx // out of input, exit (0) mov $__NR_exit,%eax int $0x80 defcode "ECHO",4,,ECHO pop %eax call _ECHO NEXT _ECHO: mov $1,%ebx // 1st param: stdout // write needs the address of the byte to write mov %al,(2f) mov $2f,%ecx // 2nd param: address mov $1,%edx // 3rd param: nbytes = 1 mov $__NR_write,%eax // write syscall int $0x80 ret .bss 2: .space 1 // scratch used by ECHO defcode "WORD",4,,WORD call _WORD push %eax // push length push %ebx // push base address NEXT _WORD: /* Search for first non-blank character. Also skip \ comments. */ 1: call _KEY // get next key, returned in %eax cmpb $'\\',%al // start of a comment? je 3f // if so, skip the comment cmpb $' ',%al jbe 1b // if so, keep looking /* Search for the end of the word, storing chars as we go. */ mov $5f,%edi // pointer to return buffer 2: stosb // add character to return buffer call _KEY // get next key, returned in %al cmpb $' ',%al // is blank? ja 2b // if not, keep looping /* Return the word (well, the static buffer) and length. */ sub $5f,%edi mov %edi,%eax // return length of the word mov $5f,%ebx // return address of the word ret /* Code to skip \ comments to end of the current line. */ 3: call _KEY cmpb $'\n',%al // end of line yet? jne 3b jmp 1b .bss // A static buffer where WORD returns. Subsequent calls // overwrite this buffer. Maximum word length is 32 chars. 5: .space 32 defcode "ECHOWORD",8,,ECHOWORD mov $1,%ebx // 1st param: stdout pop %ecx // 2nd param: address of string pop %edx // 3rd param: length of string mov $__NR_write,%eax // write syscall int $0x80 NEXT defcode ".",1,,DOT pop %eax // Get the number to print into %eax call _DOT // Easier to do this recursively ... NEXT /* if eax >= 10 then print (eax / 10) r = eax mod 10 echo r */ _DOT: mov $10,%ecx // Base 10 cmp %ecx,%eax jb 1f pushl %eax xor %edx,%edx idivl %ecx call _DOT popl %eax 1: aam $10 cbw cwde addl $'0',%eax call _ECHO ret defcode "FIND",4,,FIND pop %edi // %edi = address pop %ecx // %ecx = length push %esi // Save %esi so we can use it in string comparison. // Now we start searching backwards through the dictionary for this word. mov v_latest,%edx // LATEST points to name header of the latest word in the dictionary 1: test %edx,%edx // NULL pointer? (end of the linked list) je 4f xor %eax,%eax movb 4(%edx),%al // %al = flags+length field andb $0x1f,%al // %al = name length cmpb %cl,%al // Length is the same? jne 2f // Compare the strings in detail. push %ecx // Save the length push %edi // Save the address (repe cmpsb will move this pointer) lea 5(%edx),%esi // Dictionary string we are checking against. repe cmpsb // Compare the strings. pop %edi pop %ecx jne 2f // Not the same. // The strings are the same - return the header pointer on the stack. pop %esi push %edx NEXT 2: mov (%edx),%edx // Move back through the link field to the previous word jmp 1b // .. and loop. 4: // Not found. pop %esi xor %eax,%eax // Push zero on to the stack to indicate not found. push %eax NEXT #if 0 defcode ":",1,,COLON call nextword // get next word, the procedure name // The next word is returned in %ebx and has length %ecx bytes. // Save the current value of VOCAB. mov v_vocab,%eax push %eax // Change VOCAB to point to our new word's header (at LATEST). mov v_latest,%edi mov %edi,v_vocab // We'll start by writing the word's header at LATEST; the header // is just length byte, the word itself, link pointer. mov %ecx,(%edi) // Length byte inc %edi mov %ebx,%esi // Copy the string. rep movsb // Round up to the next multiple of 4 so that the link pointer // is aligned. or $3,%edi inc %edi pop %eax // Link pointer, points to old VOCAB. mov %eax,(%edi) add $4,%edi // Write the codeword, which for user-defined words is always a // pointer to the FORTH indirect threaded interpreter. movl $DOCOL,(%edi) add $4,%edi // Finally, update LATEST. As we go along compiling, we'll be // writing compiled codewords to the LATEST pointer (and moving // it along each time). mov %edi,v_latest movl $1,v_state // go into compiling mode ret defcode ";",1,F_IMMED,SEMICOLON // XXX #endif defcode SYSEXIT,7,,SYSEXIT pop %ebx mov $__NR_exit,%eax int $0x80 /*---------------------------------------------------------------------- * Variables containing the interpreter's state. */ .data .align 4 v_state: .int 0 // 0 = immediate, non-zero = compiling v_latest: // XXX should use 'link', but how to join it with name_? .int name_SYSEXIT // last word in the dictionary v_here: .int user_defs_start // pointer to next space for user definition or current compiled def /*---------------------------------------------------------------------- * Input buffer & initial input. */ .data .align 4096 buffer: .ascii " \n\ \\ Define some constants \n\ : '\\n' 10 ; \n\ : ')' 41 ; \n\ : 'space' 32 ; \n\ : '\"' 34 ; \n\ : '-' 45 ; \n\ : '0' 48 ; \n\ \n\ \\ CR command \n\ : CR '\\n' ECHO ; \n\ \n\ ECHO DUP DROP OVER \n\ " _initbufftop: .align 4096 buffend: currkey: .int buffer bufftop: .int _initbufftop