main.asm

   1 format ELF64 executable
   2
   3 ;; The code in this macro is placed at the end of each Forth word. When we are
   4 ;; executing a definition, this code is what causes execution to resume at the
   5 ;; next word in that definition.
   6 macro next {
   7   ;; RSI points to the address of the definition of the next word to execute.
   8   lodsq                   ; Load value at RSI into RAX and increment RSI
   9   ;; Now RAX contains the location of the next word to execute. The first 8
  10   ;; bytes of this word is the address of the codeword, which is what we want
  11   ;; to execute.
  12   jmp qword [rax]         ; Jump to the codeword of the current word
  13 }
  14
  15 ;; pushr and popr work on the return stack, whose location is stored in the
  16 ;; register RBP.
  17 macro pushr x {
  18   sub rbp, 8
  19   mov qword [rbp], x
  20 }
  21 macro popr x {
  22   mov x, [rbp]
  23   add rbp, 8
  24 }
  25
  26 segment readable executable
  27
  28 main:
  29   cld                        ; Clear direction flag so LODSQ does the right thing.
  30   mov rbp, return_stack_top  ; Initialize return stack
  31
  32   mov rsi, program
  33   next
  34
  35 program: dq MAIN
  36
  37 ;; The codeword is the code that will be executed at the beginning of a forth
  38 ;; word. It needs to save the old RSI and update it to point to the next word to
  39 ;; execute.
  40 docol:
  41   pushr rsi            ; Save old value of RSI on return stack; we will continue execution there after we are done executing this word
  42   lea rsi, [rax + 8]   ; RAX currently points to the address of the codeword, so we want to continue at RAX+8
  43   next                 ; Execute word pointed to by RSI
  44
  45 ;; This word is called at the end of a Forth definition. It just needs to
  46 ;; restore the old value of RSI (saved by 'docol') and resume execution.
  47 EXIT_entry:
  48   dq 0
  49   db 4
  50   db 'EXIT'
  51 EXIT:
  52   dq .start
  53 .start:
  54   popr rsi
  55   next
  56
  57 ;; LIT is a special word that reads the next "word pointer" and causes it to be
  58 ;; placed on the stack rather than executed.
  59 LIT_entry:
  60   dq EXIT_entry
  61   db 3
  62   db 'LIT'
  63 LIT:
  64   dq .start
  65 .start:
  66   lodsq
  67   push rax
  68   next
  69
  70 ;; Given a string (a pointer following by a size), return the location of the
  71 ;; dictionary entry for that word. If no such word exists, return 0.
  72 FIND_entry:
  73   dq LIT_entry
  74   db 4
  75   db 'FIND'
  76 FIND:
  77   dq .start
  78 .start:
  79   mov [.rsi], rsi
  80   pop [.search_length]
  81   pop [.search_buffer]
  82
  83   ;; RSI contains the entry we are currently looking at
  84   mov rsi, [latest_entry]       ; Start with the last added word
  85
  86 .loop:
  87   movzx rcx, byte [rsi + 8]     ; Length of word being looked at
  88   cmp rcx, [.search_length]
  89   jne .next    ; If the words don't have the same length, we have the wrong word
  90
  91   ;; Otherwise, we need to compare strings
  92   lea rdx, [rsi + 8 + 1]        ; Location of character being compared in entry
  93   mov rdi, [.search_buffer]     ; Location of character being compared in search buffer
  94 .compare_char:
  95   mov al, [rdx]
  96   mov ah, [rdi]
  97   cmp al, ah
  98   jne .next                     ; They don't match; try again
  99   inc rdx                       ; These characters match; look at the next ones
 100   inc rdi
 101   loop .compare_char
 102
 103   jmp .found                    ; They match! We are done.
 104
 105 .next:
 106   mov rsi, [rsi]                ; Look at the previous entry
 107   cmp rsi, 0
 108   jnz .loop                    ; If there is no previous word, exit and return 0
 109
 110 .found:
 111   push rsi
 112
 113   mov rsi, [.rsi]
 114   next
 115
 116 ;; BRANCH is the fundamental mechanism for branching. BRANCH reads the next word
 117 ;; as a signed integer literal and jumps by that offset.
 118 BRANCH_entry:
 119   dq FIND_entry
 120   db 6
 121   db 'BRANCH'
 122 BRANCH:
 123   dq .start
 124 .start:
 125   add rsi, [rsi] ; [RSI], which is the next word, contains the offset; we add this to the instruction pointer.
 126   next           ; Then, we can just continue execution as normal
 127
 128 ;; 0BRANCH is like BRANCH, but it jumps only if the top of the stack is zero.
 129 ZBRANCH:
 130   dq .start
 131 .start:
 132   ;; Compare top of stack to see if we should branch
 133   pop rax
 134   cmp rax, 0
 135   jnz .dont_branch
 136 .do_branch:
 137   jmp BRANCH.start
 138 .dont_branch:
 139   add rsi, 8     ; We need to skip over the next word, which contains the offset.
 140   next
 141
 142 ;; Expects a character on the stack and prints it to standard output.
 143 EMIT:
 144   dq .start
 145 .start:
 146   pushr rsi
 147   pushr rax
 148   mov rax, 1
 149   mov rdi, 1
 150   lea rsi, [rsp]
 151   mov rdx, 1
 152   syscall
 153   add rsp, 8
 154   popr rax
 155   popr rsi
 156   next
 157
 158 ;; Prints a newline to standard output.
 159 NEWLINE:
 160   dq docol
 161   dq LIT, $A
 162   dq EMIT
 163   dq EXIT
 164
 165 ;; Prints a space to standard output.
 166 SPACE_entry:
 167   dq BRANCH_entry
 168   db 5
 169   db 'SPACE'
 170 SPACE:
 171   dq docol
 172   dq LIT, ' '
 173   dq EMIT
 174   dq EXIT
 175
 176 ;; Read a word from standard input and push it onto the stack as a pointer and a
 177 ;; size. The pointer is valid until the next call to READ_WORD.
 178 READ_WORD:
 179   dq .start
 180 .start:
 181   mov [.rsi], rsi
 182   mov [.rax], rax
 183
 184 .skip_whitespace:
 185   ;; Read characters into .char_buffer until one of them is not whitespace.
 186   mov rax, 0
 187   mov rdi, 0
 188   mov rsi, .char_buffer
 189   mov rdx, 1
 190   syscall
 191
 192   cmp [.char_buffer], ' '
 193   je .skip_whitespace
 194   cmp [.char_buffer], $A
 195   je .skip_whitespace
 196
 197 .alpha:
 198   ;; We got a character that wasn't whitespace. Now read the actual word.
 199   mov [.length], 0
 200
 201 .read_alpha:
 202   mov al, [.char_buffer]
 203   movzx rbx, [.length]
 204   mov rsi, .buffer
 205   add rsi, rbx
 206   mov [rsi], al
 207   inc [.length]
 208
 209   mov rax, 0
 210   mov rdi, 0
 211   mov rsi, .char_buffer
 212   mov rdx, 1
 213   syscall
 214
 215   cmp [.char_buffer], ' '
 216   je .end
 217   cmp [.char_buffer], $A
 218   jne .read_alpha
 219
 220 .end:
 221   push .buffer
 222   movzx rax, [.length]
 223   push rax
 224
 225   mov rsi, [.rsi]
 226   mov rax, [.rax]
 227
 228   next
 229
 230 ;; Takes a string on the stack and replaces it with the decimal number that the
 231 ;; string represents.
 232 PARSE_NUMBER:
 233   dq .start
 234 .start:
 235   pop [.length]                 ; Length
 236   pop rdi                       ; String pointer
 237   mov r8, 0                     ; Result
 238
 239   ;; Add (10^(rcx-1) * parse_char(rdi[length - rcx])) to the accumulated value
 240   ;; for each rcx.
 241   mov rcx, [.length]
 242 .loop:
 243   ;; First, calcuate 10^(rcx - 1)
 244   mov rax, 1
 245
 246   mov r9, rcx
 247   .exp_loop:
 248     dec r9
 249     jz .break
 250     mov rbx, 10
 251     mul rbx
 252     jmp .exp_loop
 253   .break:
 254
 255   ;; Now, rax = 10^(rcx - 1).
 256
 257   ;; We need to calulate the value of the character at rdi[length - rcx].
 258   mov rbx, rdi
 259   add rbx, [.length]
 260   sub rbx, rcx
 261   movzx rbx, byte [rbx]
 262   sub rbx, '0'
 263
 264   ;; Multiply this value by rax to get (10^(rcx-1) * parse_char(rdi[length - rcx])),
 265   ;; then add this to the result.
 266   mul rbx
 267
 268   ;; Add that value to r8
 269   add r8, rax
 270
 271   dec rcx
 272   jnz .loop
 273
 274   push r8
 275
 276   next
 277
 278 READ_NUMBER:
 279   dq docol
 280   dq READ_WORD
 281   dq PARSE_NUMBER
 282   dq EXIT
 283
 284 ;; Takes a string (in the form of a pointer and a length on the stack) and
 285 ;; prints it to standard output.
 286 TELL:
 287   dq .start
 288 .start:
 289   mov rbx, rsi
 290   mov rcx, rax
 291
 292   mov rax, 1
 293   mov rdi, 1
 294   pop rdx     ; Length
 295   pop rsi     ; Buffer
 296   syscall
 297
 298   mov rax, rcx
 299   mov rsi, rbx
 300   next
 301
 302 ;; Exit the program cleanly.
 303 TERMINATE:
 304   dq .start
 305 .start:
 306   mov rax, $3C
 307   mov rdi, 0
 308   syscall
 309
 310 PUSH_HELLO_CHARS:
 311   dq docol
 312   dq LIT, $A
 313   dq LIT, 'o'
 314   dq LIT, 'l'
 315   dq LIT, 'l'
 316   dq LIT, 'e'
 317   dq LIT, 'H'
 318   dq EXIT
 319
 320 PUSH_YOU_TYPED:
 321   dq .start
 322 .start:
 323   push you_typed_string
 324   push you_typed_string.length
 325   next
 326
 327 HELLO_entry:
 328   dq SPACE_entry
 329   db 5
 330   db 'HELLO'
 331 HELLO:
 332   dq docol
 333   dq LIT, 'H', EMIT
 334   dq LIT, 'e', EMIT
 335   dq LIT, 'l', EMIT
 336   dq LIT, 'l', EMIT
 337   dq LIT, 'o', EMIT
 338   dq LIT, '!', EMIT
 339   dq NEWLINE
 340   dq EXIT
 341
 342 ;; .U prints the value on the stack as an unsigned integer in hexadecimal.
 343 DOTU_entry:
 344   dq HELLO_entry
 345   db 2
 346   db '.U'
 347 DOTU:
 348   dq .start
 349 .start:
 350   mov [.length], 0
 351   mov [.printed_length], 1
 352   pop rax                       ; RAX = value to print
 353   push rsi                      ; Save value of RSI
 354
 355   ;; We start by constructing the buffer to print in reverse
 356
 357 .loop:
 358   mov rdx, 0
 359   mov rbx, $10
 360   div rbx                       ; Put remainer in RDX and quotient in RAX
 361
 362   ;; Place the appropriate character in the buffer
 363   mov rsi, .chars
 364   add rsi, rdx
 365   mov bl, [rsi]
 366   mov rdi, .rbuffer
 367   add rdi, [.length]
 368   mov [rdi], bl
 369   inc [.length]
 370
 371   ;; .printed_length is the number of characters that we ulitmately want to
 372   ;; print. If we have printed a non-zero character, then we should update
 373   ;; .printed_length.
 374   cmp bl, '0'
 375   je .skip_updating_real_length
 376   mov rbx, [.length]
 377   mov [.printed_length], rbx
 378 .skip_updating_real_length:
 379
 380   cmp [.length], 16
 381   jle .loop
 382
 383   ;; Flip buffer around, since it is currently reversed
 384   mov rcx, [.printed_length]
 385 .flip:
 386   mov rsi, .rbuffer
 387   add rsi, rcx
 388   dec rsi
 389   mov al, [rsi]
 390
 391   mov rdi, .buffer
 392   add rdi, [.printed_length]
 393   sub rdi, rcx
 394   mov [rdi], al
 395
 396   loop .flip
 397
 398   ;; Print the buffer
 399   mov rax, 1
 400   mov rdi, 1
 401   mov rsi, .buffer
 402   mov rdx, [.printed_length]
 403   syscall
 404
 405   ;; Restore RSI and continue execution
 406   pop rsi
 407   next
 408
 409 MAIN:
 410   dq docol
 411   dq HELLO
 412   dq LIT, SPACE_entry, DOTU, NEWLINE
 413   dq LIT, HELLO_entry, DOTU, NEWLINE
 414   dq LIT, DOTU_entry, DOTU, NEWLINE
 415   dq LIT, SPACE_string, LIT, SPACE_string.length, TELL, SPACE
 416   dq LIT, SPACE_string, LIT, SPACE_string.length, FIND, DOTU, NEWLINE
 417   dq LIT, HELLO_string, LIT, HELLO_string.length, TELL, SPACE
 418   dq LIT, HELLO_string, LIT, HELLO_string.length, FIND, DOTU, NEWLINE
 419   dq LIT, DOTU_string, LIT, DOTU_string.length, TELL, SPACE
 420   dq LIT, DOTU_string, LIT, DOTU_string.length, FIND, DOTU, NEWLINE
 421   dq LIT, HELLA_string, LIT, HELLA_string.length, TELL, SPACE
 422   dq LIT, HELLA_string, LIT, HELLA_string.length, FIND, DOTU, NEWLINE
 423   dq TERMINATE
 424
 425 segment readable writable
 426
 427 latest_entry dq DOTU_entry
 428
 429 SPACE_string db 'SPACE'
 430 .length = $ - SPACE_string
 431 HELLO_string db 'HELLO'
 432 .length = $ - HELLO_string
 433 DOTU_string db '.U'
 434 .length = $ - DOTU_string
 435 HELLA_string db 'HELLA'
 436 .length = $ - HELLA_string
 437
 438
 439 you_typed_string db 'You typed: '
 440 .length = $ - you_typed_string
 441
 442 FIND.search_length dq ?
 443 FIND.search_buffer dq ?
 444 FIND.rsi dq ?
 445
 446 READ_WORD.rsi dq ?
 447 READ_WORD.rax dq ?
 448 READ_WORD.max_size = $FF
 449 READ_WORD.buffer rb READ_WORD.max_size
 450 READ_WORD.length db ?
 451 READ_WORD.char_buffer db ?
 452
 453 DOTU.chars db '0123456789ABCDEF'
 454 DOTU.buffer rq 16               ; 64-bit number has no more than 16 digits in hex
 455 DOTU.rbuffer rq 16
 456 DOTU.length dq ?
 457 DOTU.printed_length dq ?
 458
 459 PARSE_NUMBER.length dq ?
 460
 461 ;; Return stack
 462 rq $2000
 463 return_stack_top: