From 1d56576f49d8fb5d2871a5c7010a9d37e7dbfbf1 Mon Sep 17 00:00:00 2001 From: Jesse McDonald Date: Sun, 11 Oct 2020 02:08:26 -0500 Subject: [PATCH] separate bootstrap (asm) definitions from runtime definitions --- jumpforth.S | 455 ++++++++++++++++++++---------- startup.4th | 779 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 897 insertions(+), 337 deletions(-) diff --git a/jumpforth.S b/jumpforth.S index 2b5f7d3..b0142ce 100644 --- a/jumpforth.S +++ b/jumpforth.S @@ -9,7 +9,7 @@ .set BUFFER_SIZE,4096 .set RETURN_STACK_SIZE,8192 -.set DATA_SEGMENT_ALLOC_SIZE,65536 +.set DATA_SEGMENT_INITIAL_SIZE,65536 .set F_IMMED,0x80 .set F_HIDDEN,0x40 @@ -44,9 +44,20 @@ _start: int $0x80 movl %eax,data_C0 movl %eax,data_CP - movl %eax,data_BRK + movl %eax,%ebx + addl $DATA_SEGMENT_INITIAL_SIZE + 4096 - 1,%ebx + andl $-4096,%ebx + movl %ebx,data_BRK + movl $__NR_brk,%eax + int $0x80 + cmpl %eax,(data_BRK) + jne 0f mov $cold_start,%esi NEXT +0: movl $254,%ebx + movl $__NR_exit,%eax + int $0x80 + jmp 0b /* The entry point for threaded FORTH words */ /* Push the return address (%esi) on the return stack */ @@ -104,6 +115,13 @@ DODOES: /* Execute the DOES> code */ NEXT +.macro litstring text:req + .int LITSTRING + .byte (9f - 8f) +8: .ascii "\text" +9: .align 4 +.endm + .macro defname label:req,codeword:req,dataword:req,name="",flags=0 .section .data .align 4 @@ -197,7 +215,7 @@ defconst SYS_WRITE,__NR_write /* defconst SYS_TIME,__NR_time */ /* defconst SYS_CHMOD,__NR_chmod */ /* defconst SYS_LSEEK,__NR_lseek */ -/* defconst SYS_GETPID,__NR_getpid */ +defconst SYS_GETPID,__NR_getpid /* defconst SYS_MOUNT,__NR_mount */ /* defconst SYS_STIME,__NR_stime */ /* defconst SYS_PTRACE,__NR_ptrace */ @@ -326,7 +344,7 @@ defconst SYS_BRK,__NR_brk /* defconst SYS_MADVISE,__NR_madvise */ /* defconst SYS_GETDENTS64,__NR_getdents64 */ /* defconst SYS_FCNTL64,__NR_fcntl64 */ -/* defconst SYS_GETTID,__NR_gettid */ +defconst SYS_GETTID,__NR_gettid /* defconst SYS_READAHEAD,__NR_readahead */ /* defconst SYS_SETXATTR,__NR_setxattr */ /* defconst SYS_LSETXATTR,__NR_lsetxattr */ @@ -361,7 +379,7 @@ defconst SYS_BRK,__NR_brk /* defconst SYS_TIMER_DELETE,__NR_timer_delete */ /* defconst SYS_STATFS64,__NR_statfs64 */ /* defconst SYS_FSTATFS64,__NR_fstatfs64 */ -/* defconst SYS_TGKILL,__NR_tgkill */ +defconst SYS_TGKILL,__NR_tgkill /* defconst SYS_UTIMES,__NR_utimes */ /* defconst SYS_FADVISE64_64,__NR_fadvise64_64 */ /* defconst SYS_MBIND,__NR_mbind */ @@ -507,6 +525,44 @@ defconst SYS_BRK,__NR_brk /* Special dirfd for *at syscalls to resolve path relative to current directory */ /* defconst AT_FDCWD,-100 */ +/* +** errno values +*/ +/* defconst ERRNO_EPERM,1 */ +/* defconst ERRNO_ENOENT,2 */ +/* defconst ERRNO_ESRCH,3 */ +defconst ERRNO_EINTR,4 +/* defconst ERRNO_EIO,5 */ +/* defconst ERRNO_ENXIO,6 */ +/* defconst ERRNO_E2BIG,7 */ +/* defconst ERRNO_ENOEXEC,8 */ +/* defconst ERRNO_EBADF,9 */ +/* defconst ERRNO_ECHILD,10 */ +/* defconst ERRNO_EAGAIN,11 */ +/* defconst ERRNO_ENOMEM,12 */ +/* defconst ERRNO_EACCES,13 */ +/* defconst ERRNO_EFAULT,14 */ +/* defconst ERRNO_ENOTBLK,15 */ +/* defconst ERRNO_EBUSY,16 */ +/* defconst ERRNO_EEXIST,17 */ +/* defconst ERRNO_EXDEV,18 */ +/* defconst ERRNO_ENODEV,19 */ +/* defconst ERRNO_ENOTDIR,20 */ +/* defconst ERRNO_EISDIR,21 */ +/* defconst ERRNO_EINVAL,22 */ +/* defconst ERRNO_ENFILE,23 */ +/* defconst ERRNO_EMFILE,24 */ +/* defconst ERRNO_ENOTTY,25 */ +/* defconst ERRNO_ETXTBSY,26 */ +/* defconst ERRNO_EFBIG,27 */ +/* defconst ERRNO_ENOSPC,28 */ +/* defconst ERRNO_ESPIPE,29 */ +/* defconst ERRNO_EROFS,30 */ +/* defconst ERRNO_EMLINK,31 */ +/* defconst ERRNO_EPIPE,32 */ +/* defconst ERRNO_EDOM,33 */ +/* defconst ERRNO_ERANGE,34 */ + /* ** signal numbers */ @@ -515,7 +571,7 @@ defconst SYS_BRK,__NR_brk /* defconst SIGQUIT,3 */ /* defconst SIGILL,4 */ /* defconst SIGTRAP,5 */ -/* defconst SIGABRT,6 */ +defconst SIGABRT,6 /* defconst SIGIOT,6 */ /* defconst SIGBUS,7 */ /* defconst SIGFPE,8 */ @@ -572,16 +628,26 @@ defvar BUFFTOP,startup_defs_end defvar CP /* "compilation pointer", next free byte in the heap */ defvar BRK /* the (current) end of the heap */ -/* The initial word list containing all the standard FORTH words */ -defvar FORTH_WORDLIST,last_word,"FORTH-WORDLIST" +/* The list of primitive (native code) words provided by this file */ +defvar PRIMITIVE_WORDLIST,last_primitive,"PRIMITIVE-WORDLIST" + +/* The list of basic non-primitive words used to bootstrap the startup.4th file */ +defvar BOOTSTRAP_WORDLIST,last_word,"BOOTSTRAP-WORDLIST" + +/* The word list containing all the standard FORTH words */ +/* Initially it just mirrors the primitive list */ +/* The rest will be populated by the startup.4th script */ +defvar FORTH_WORDLIST,last_primitive,"FORTH-WORDLIST" /* The current compilation word list, initially FORTH-WORDLIST */ defvar CURRENT,data_FORTH_WORDLIST .section .data .align 4 +/* This is a linked list; initial_order points to the head of the list */ +0: .int 0,data_BOOTSTRAP_WORDLIST initial_order: - .int 0,data_FORTH_WORDLIST + .int 0b,data_FORTH_WORDLIST /* Head of the linked list representing the current search order */ defvar CURRENT_ORDER,initial_order,"CURRENT-ORDER" @@ -601,14 +667,12 @@ defcode SWAP /* ( a -- a a ) */ defcode DUP - mov (%esp),%eax - push %eax + pushl (%esp) NEXT /* ( a b -- a b a ) */ defcode OVER - mov 4(%esp),%eax - push %eax + pushl 4(%esp) NEXT /* ( a b -- b ) */ @@ -653,18 +717,14 @@ defcode TWODROP,"2DROP" /* ( a b -- a b a b ) */ defcode TWODUP,"2DUP" - mov (%esp),%ebx - mov 4(%esp),%eax - push %eax - push %ebx + pushl 4(%esp) + pushl 4(%esp) NEXT /* ( a b c d -- a b c d a b ) */ defcode TWOOVER,"2OVER" - mov 8(%esp),%ebx - mov 12(%esp),%eax - push %eax - push %ebx + pushl 12(%esp) + pushl 12(%esp) NEXT /* ( a b c d -- c d a b ) */ @@ -685,6 +745,12 @@ defcode NDROP lea (%esp,%eax,4),%esp NEXT +/* ( xu ... x0 u -- xu ... x0 xu ) */ +defcode PICK + pop %eax + pushl (%esp,%eax,4) + NEXT + /* ( 0 -- 0 ) */ /* ( a -- a a ) */ defcode QDUP,"?DUP" @@ -738,9 +804,9 @@ defcode UMUL,"U*" /* ( n1 n2 -- n1%n2 n1/n2 ) */ defcode DIVMOD,"/MOD" - xor %edx,%edx pop %ebx pop %eax + cltd idivl %ebx push %edx push %eax @@ -756,16 +822,44 @@ defcode UDIVMOD,"U/MOD" push %eax NEXT +.macro defzcmp label,opcode,name="\label",flags=0 +defcode \label,"\name",0,\flags + pop %eax + test %eax,%eax + \opcode %al +.ifdef .Lsetzflag + jmp .Lsetzflag +.else +.Lsetzflag: + movzbl %al,%eax + neg %eax + push %eax + NEXT +.endif +.endm + +defzcmp ZEQU,sete,"0=" +defzcmp ZNEQU,setne,"0<>" +defzcmp ZLT,setl,"0<" +defzcmp ZGT,setg,"0>" +defzcmp ZLE,setle,"0<=" +defzcmp ZGE,setge,"0>=" + .macro defcmp label,opcode,name="\label",flags=0 defcode \label,"\name",0,\flags pop %eax pop %ebx cmp %eax,%ebx \opcode %al +.ifdef .Lsetflag + jmp .Lsetflag +.else +.Lsetflag: movzbl %al,%eax neg %eax push %eax NEXT +.endif .endm /* ( a b -- flag ) ( various comparison operators, e.g. flag=a" push %eax NEXT -/* (R: a -- a ) ( -- a ) */ +/* ( R: a -- a ) ( -- a ) */ defcode RFETCH,"R@" movl (%ebp),%eax push %eax NEXT +/* ( R: xu ... x0 -- xu ... x0 ) ( S: u -- xu ) */ +defcode RPICK + pop %eax + pushl (%ebp,%eax,4) + NEXT + /* ( a b -- ) ( R: -- a b ) */ defcode TWOTOR,"2>R" pop %ebx @@ -918,13 +1018,6 @@ defcode LITSTRING andl $0xfffffffc,%esi NEXT -.macro litstring text:req - .int LITSTRING - .byte (9f - 8f) -8: .ascii "\text" -9: .align 4 -.endm - defcode BRANCH add (%esi),%esi NEXT @@ -1022,17 +1115,39 @@ defcode SYSCALL0 push %eax NEXT -defword HERE - .int CP,FETCH,EXIT +/* No runtime effect, but this code address can be used for debugger breakpoints */ +defcode BREAK + NEXT +/* This marks the start of the bootstrap word list */ +.eqv last_primitive,BREAK +.set link,0 + + .section .data +bootstrap_data_begin: + +defword TYPE + .int LIT,1,NROT,SYS_WRITE,SYSCALL3,DROP,EXIT + +defword EOL + litstring "\n" + .int TYPE,EXIT + +/* Used for any fatal errors that occur during bootstrapping */ +defword BAILOUT + .int BREAK + litstring "Fatal error\n" + .int TYPE +0: .int LIT,254,SYS_EXIT,SYSCALL1,DROP,BRANCH,(0b - .) + +/* Simplified KEY that can only read from the pre-filled input buffer */ +/* The startup.4th should replace this with a more complete version */ +/* If the input ends while this version is still in use the program will terminate */ defword KEY -0: .int CURRKEY,FETCH,DUP,BUFFTOP,FETCH,GE,ZBRANCH,(3f - .) /* ( -- currkey ) */ - .int DROP,BUFFER,LIT,0,OVER,__BUFFER_SIZE,SYS_READ,SYSCALL3 - .int DUP,LIT,0,LE,ZBRANCH,(2f - .) /* ( currkey -- buffer read-result ) */ - .int TWODROP /* ( buffer read-result -- ) */ -1: .int LIT,0,SYS_EXIT,SYSCALL1,DROP,BRANCH,(1b - .) /* ( -- ) */ -2: .int OVER,ADD,BUFFTOP,STORE /* ( buffer read-result -- buffer ) */ -3: .int DUP,ADD1,CURRKEY,STORE,FETCHBYTE,EXIT /* ( currkey -- currkey-C@ ) */ + .int CURRKEY,FETCH,DUP,BUFFTOP,FETCH,GE,ZBRANCH,(1f - .) + litstring "Unexpected end of buffer\n" + .int TYPE,BAILOUT +1: .int DUP,ADD1,CURRKEY,STORE,FETCHBYTE,EXIT /* Puts the most recently read key back in the input buffer */ /* CAUTION: Can only safely be used ONCE after each call to KEY! */ @@ -1040,60 +1155,33 @@ defword PUTBACK .int CURRKEY,FETCH,SUB1,CURRKEY,STORE,EXIT defword ISSPACE,"SPACE?" - .int DUP,LIT,' ',NEQU,ZBRANCH,(0f - .) - .int DUP,LIT,'\t',NEQU,ZBRANCH,(0f - .) - .int DUP,LIT,'\n',NEQU,ZBRANCH,(0f - .) - .int DUP,LIT,'\r',NEQU,ZBRANCH,(0f - .) - .int DUP,LIT,'\v',NEQU,ZBRANCH,(0f - .) - .int DROP,FALSE,EXIT -0: .int DROP,TRUE,EXIT + /* check for space (32) first and return true if input matches */ + .int DUP,LIT,32,EQU,QDUP,ZBRANCH,(0f - .),NIP,EXIT + /* otherwise test for 9...13 inclusive (HT, LF, VT, FF, CR) */ +0: .int LIT,9,SUB,LIT,(13 - 9),ULT,EXIT defword SKIPSPACE -0: .int KEY,ISSPACE,INVERT,ZBRANCH,(0b - .),PUTBACK,EXIT - -defword EMIT - .int SPFETCH,TWODUP,STOREBYTE,LIT,0,SWAP,LIT,1,SYS_WRITE,SYSCALL3,TWODROP,EXIT - -/* ( c-addr u -- ) */ -defword TYPE -0: .int QDUP,ZBRANCH,(1f - .),OVER,FETCHBYTE,EMIT - .int SUB1,SWAP,ADD1,SWAP,BRANCH,(0b - .) -1: .int DROP,EXIT - -defword ABORT -0: .int S0,SPSTORE,QUIT +0: .int KEY,ISSPACE,ZEQU,ZBRANCH,(0b - .),PUTBACK,EXIT +/* Simplified version that can only work within the preallocated data region */ +/* The startup.4th should replace this with a more complete version */ defword ALLOT - .int DUP,LIT,0,LT,ZBRANCH,(0f - .) - .int DUP,C0,FETCH,HERE,SUB,LT,ZBRANCH,(1f - .),BRANCH,(6f - .) -0: .int DUP,HERE,INVERT,UGT,ZBRANCH,(1f - .),BRANCH,(6f - .) -1: .int HERE,ADD,DUP,BRK,FETCH,UGT,ZBRANCH,(3f - .) - .int LIT,DATA_SEGMENT_ALLOC_SIZE-1,TWODUP,ADD,SWAP,INVERT,AND - .int DUP,SYS_BRK,SYSCALL1,OVER,NEQU,ZBRANCH,(2f - .) - .int TWODROP - litstring "Out of memory\n" - .int TYPE,ABORT -2: .int BRK,STORE -3: .int CP,STORE,EXIT -6: litstring "Allocation out of bounds\n" - .int TYPE,ABORT + .int CP,FETCH,ADD,CP,STORE,EXIT defword COMMA,"," - .int HERE,CELL,ALLOT,STORE,EXIT + .int CP,FETCH,CELL,ALLOT,STORE,EXIT defword COMMABYTE,"C," - .int HERE,LIT,1,ALLOT,STOREBYTE,EXIT + .int CP,FETCH,LIT,1,ALLOT,STOREBYTE,EXIT /* ( addr -- a-addr ) Round up to next cell-aligned address */ defword ALIGNED .int LIT,3,ADD,LIT,-4,AND,EXIT +/* ( -- ) Allocate data space up to the next cell-aligned address */ +/* Any bytes skipped over during alignment should be considered uninitialized */ defword ALIGN - .int HERE,ALIGNED,HERE,SUB - .int QDUP,ZBRANCH,(0f - .),LIT,0,COMMABYTE,SUB1 - .int QDUP,ZBRANCH,(0f - .),LIT,0,COMMABYTE,SUB1 - .int ZBRANCH,(0f - .),LIT,0,COMMABYTE -0: .int EXIT + .int CP,FETCH,DUP,ALIGNED,SWAP,SUB,ALLOT,EXIT /* ( c-addr-1 u-1 c-addr-2 u-2 -- flag ) */ defword STREQU,"=S" @@ -1106,38 +1194,44 @@ defword STREQU,"=S" 1: .int TWODROP,DROP,FALSE,EXIT /* FALSE R: */ 2: .int TWODROP,DROP,TRUE,EXIT /* TRUE R: */ -/* ( entry -- cfa-addr ) Address of the codeword field */ +/* ( xt -- cfa-addr ) Address of the codeword field */ defword TCFA,">CFA" .int EXIT -/* ( entry -- dfa-addr ) Address of the dataword field */ +/* ( xt -- dfa-addr ) Address of the dataword field */ defword TDFA,">DFA" .int CELL,ADD,EXIT -/* ( entry -- dfa-addr ) Address of the dataword field */ +/* ( xt -- link-addr ) Address of the dataword field */ defword TLINK,">LINK" .int LIT,8,ADD,EXIT -/* ( entry -- flags-addr ) Address of the flag/length byte */ +/* ( xt -- flags-addr ) Address of the flag/length byte */ defword TFLAGS,">FLAGS" .int LIT,12,ADD,EXIT -/* ( entry -- name-addr name-len ) Address and length of the name field */ +/* ( xt -- name-addr name-len ) Address and length of the name field */ defword TNAME,">NAME" .int TFLAGS,DUP,ADD1,SWAP,FETCHBYTE,__F_LENMASK,AND,EXIT -/* ( entry -- a-addr ) Data-field address (next cell after the name) */ +/* ( xt -- a-addr ) Data-field address (next cell after the name) */ defword TBODY,">BODY" .int TNAME,ADD,ALIGNED,EXIT -/* ( entry -- flag ) Is the F_IMMED flag set? */ +/* ( xt -- flag ) Is the F_IMMED flag set? */ defword ISIMMEDIATE,"IMMEDIATE?" .int LIT,12,ADD,FETCHBYTE,__F_IMMED,AND,LIT,0,NEQU,EXIT -/* ( entry -- flag ) Is the F_HIDDEN flag set? */ +/* ( xt -- flag ) Is the F_HIDDEN flag set? */ defword ISHIDDEN,"HIDDEN?" .int LIT,12,ADD,FETCHBYTE,__F_HIDDEN,AND,LIT,0,NEQU,EXIT +/* ( xt -- flag ) Is the xt a non-primitive bootstrap word? */ +defword ISBOOTSTRAP,"BOOTSTRAP?" + .int DUP,LIT,bootstrap_data_begin,UGE,ZBRANCH,(0f - .) + .int LIT,bootstrap_data_end,ULT,EXIT +0: .int DROP,FALSE,EXIT + /* Convert search order entry address to address of word list identifier field */ defword ORDER_TWID,"ORDER>WID" .int CELL,ADD,EXIT @@ -1171,72 +1265,21 @@ defword FIND /* ( c-addr u -- xt 1 | xt -1 ) */ defword FIND_OR_ABORT,"FIND-OR-ABORT" .int FIND,QDUP,ZBRANCH,(0f - .),EXIT -0: litstring "UNKNOWN WORD: " - .int TYPE,TYPE,LIT,'\n',EMIT,ABORT - -defword QUOTE,"'" - .int WORD,FIND_OR_ABORT,DROP,EXIT - -defword CREATE - .int ALIGN,HERE - .int LIT,DODATA,COMMA - .int LIT,0,COMMA - .int CURRENT,FETCH,FETCH,COMMA - .int LIT,0,COMMABYTE - .int WORD,NIP,DUP,ALLOT,ALIGN - .int OVER,TFLAGS,STOREBYTE - .int HERE,OVER,TDFA,STORE - .int CURRENT,FETCH,STORE,EXIT - -defword COLON,":" - .int CREATE - .int CURRENT,FETCH,FETCH - .int DUP,TFLAGS,DUP,FETCHBYTE,__F_HIDDEN,OR,SWAP,STOREBYTE - .int LIT,DOCOL,OVER,TCFA,STORE - .int HERE,SWAP,TDFA,STORE - .int TRUE,STATE,STORE,EXIT - -defword NONAME,":NONAME" - .int ALIGN,HERE - .int LIT,DOCOL,COMMA - .int HERE,LIT,12,ADD,COMMA - .int CURRENT,FETCH,FETCH,COMMA - .int __F_HIDDEN,COMMABYTE,ALIGN - .int DUP,CURRENT,FETCH,STORE - .int TRUE,STATE,STORE - .int EXIT - -/* ( xt -- ) Clear the F_HIDDEN flag of the word denoted by xt */ -defword _UNHIDE_,"(UNHIDE)",F_HIDDEN - .int TFLAGS,DUP,FETCHBYTE,__F_HIDDEN,INVERT,AND,SWAP,STOREBYTE,EXIT - -defword SEMI,";",F_IMMED - .int LIT,EXIT,COMMA,CURRENT,FETCH,FETCH,_UNHIDE_ - .int FALSE,STATE,STORE,ALIGN,EXIT - -/* ( dfa -- ) Set CFA of latest word to DODOES and set DFA field to address on stack */ -defword _DOES_,"(DOES)",F_HIDDEN - .int CURRENT,FETCH,FETCH,LIT,DODOES,OVER,TCFA,STORE,TDFA,STORE,EXIT - -/* Append " (DOES) EXIT" to the current definition */ -/* where is the next address after the "EXIT" as a literal number */ -/* Stay in compilation mode for the body of the DOES> clause */ -defword DOES,"DOES>",F_IMMED - .int LIT,LIT,COMMA,HERE,LIT,12,ADD,COMMA - .int LIT,_DOES_,COMMA,LIT,EXIT,COMMA,EXIT +0: litstring "Word not found: " + .int TYPE,TYPE,EOL,BAILOUT defword WORD - .int SKIPSPACE,HERE + .int SKIPSPACE,CP,FETCH 0: .int KEY,DUP,ISSPACE,ZBRANCH,(1f - .) - .int DROP,PUTBACK,HERE,OVER,SUB,OVER,CP,STORE,EXIT + .int DROP,PUTBACK,CP,FETCH,OVER,SUB,OVER,CP,STORE,EXIT 1: .int COMMABYTE,BRANCH,(0b - .) defword READSTRING - .int HERE + .int CP,FETCH 0: .int KEY,DUP,LIT,'\\',EQU,ZBRANCH,(1f - .) .int DROP,KEY,BRANCH,(2f - .) 1: .int DUP,LIT,'"',EQU,ZBRANCH,(2f - .) - .int DROP,HERE,OVER,SUB,ALIGN,EXIT + .int DROP,CP,FETCH,OVER,SUB,ALIGN,EXIT 2: .int COMMABYTE,BRANCH,(0b - .) defword PARSENUMBER @@ -1259,12 +1302,12 @@ defword INTERPRET .int SKIPSPACE .int KEY,LIT,'"',EQU,ZBRANCH,(1f - .) .int STATE,FETCH,ZBRANCH,(0f - .) - .int LIT,LITSTRING,COMMA,HERE,LIT,0,COMMABYTE + .int LIT,LITSTRING,COMMA,CP,FETCH,LIT,0,COMMABYTE .int READSTRING,ROT,STOREBYTE,DROP,ALIGN,EXIT /* ELSE */ 0: .int READSTRING,EXIT /* ELSE */ -1: .int PUTBACK,WORD +1: .int PUTBACK,LIT,64,ALLOT,WORD,LIT,-64,ALLOT .int TWODUP,PARSENUMBER,ZBRANCH,(3f - .) .int NROT,TWODROP .int STATE,FETCH,ZBRANCH,(2f - .) @@ -1277,7 +1320,37 @@ defword INTERPRET .int DUP,ISIMMEDIATE,ZBRANCH,(5f - .) 4: .int EXECUTE,EXIT /* ELSE */ -5: .int COMMA,EXIT +5: .int DUP,ISBOOTSTRAP,ZBRANCH,(6f - .) + litstring "Compiled bootstrap word: " + .int TYPE,TNAME,TYPE,EOL,BAILOUT +6: .int COMMA,EXIT + +defword QUIT + .int R0,RSPSTORE +0: .int INTERPRET,BRANCH,(0b - .) + +/* CREATE depends on bootstrap ALIGN, COMMA, WORD, ALLOT, >FLAGS, and >DFA */ +defword CREATE + .int ALIGN,CP,FETCH + .int LIT,DODATA,COMMA + .int LIT,0,COMMA + .int CURRENT,FETCH,FETCH,COMMA + .int LIT,0,COMMABYTE + .int WORD,NIP,DUP,ALLOT,ALIGN + .int OVER,TFLAGS,STOREBYTE + .int CP,FETCH,OVER,TDFA,STORE + .int CURRENT,FETCH,STORE,EXIT + +/* +** These next few words aren't strictly necessary for bootstrapping but +** do make the early parts of the startup.4th file much more readable. +*/ + +defword LBRACKET,"[",F_IMMED + .int FALSE,STATE,STORE,EXIT + +defword RBRACKET,"]",F_IMMED + .int TRUE,STATE,STORE,EXIT defword SLASH,"\\",F_IMMED 0: .int KEY,LIT,10,EQU,ZBRANCH,(0b - .),EXIT @@ -1285,12 +1358,90 @@ defword SLASH,"\\",F_IMMED defword PAREN,"(",F_IMMED 0: .int KEY,LIT,')',EQU,ZBRANCH,(0b - .),EXIT -defword QUIT - .int R0,RSPSTORE -0: .int INTERPRET,BRANCH,(0b - .) +defword COLON,":" + /* Make word & fetch address */ + .int CREATE,CURRENT,FETCH,FETCH + /* Set as hidden */ + .int DUP,TFLAGS,DUP,FETCHBYTE,__F_HIDDEN,OR,SWAP,STOREBYTE + /* Convert to DOCOL codeword */ + .int __DOCOL,SWAP,TCFA,STORE + /* Enter compilation mode */ + .int TRUE,STATE,STORE,EXIT -/* This is the initial value of the FORTH_WORDLIST variable */ -.set last_word,QUIT +defword SEMI,";",F_IMMED + /* Terminate the code with EXIT */ + .int LIT,EXIT,COMMA + /* Fetch the address of the latest definition */ + .int CURRENT,FETCH,FETCH + /* Clear the F_HIDDEN flag */ + .int TFLAGS,DUP,FETCHBYTE,__F_HIDDEN,INVERT,AND,SWAP,STOREBYTE + /* Leave compilation mode */ + .int FALSE,STATE,STORE,EXIT + +defword CONSTANT + .int CREATE,CURRENT,FETCH,FETCH,TDFA,STORE,EXIT + +defword QUOTE,"'" + .int WORD,FIND_OR_ABORT,DROP,EXIT + +defword LITERAL,,F_IMMED + .int LIT,LIT,COMMA,COMMA,EXIT + +defword COMPILE_QUOTE,"[']",F_IMMED + .int QUOTE,LITERAL,EXIT + +defword POSTPONE,,F_IMMED + .int WORD,FIND_OR_ABORT,ZGT,ZBRANCH,(0f - .) + .int LITERAL + /* this would compile bootstrap COMMA into the definition */ + /* .int LITERAL,LIT,COMMA,COMMA,EXIT */ + /* instead, try to use whichever COMPILE, is currently in scope */ + /* and fail if no COMPILE, is available */ + litstring "COMPILE," + .int FIND,ZEQU,ZBRANCH,(0f - .) + litstring "POSTPONE used on non-immediate word without COMPILE,: " + .int TYPE,TWODROP,TNAME,TYPE,EOL,BAILOUT +0: .int DUP,ISBOOTSTRAP,ZBRANCH,(1f - .) + litstring "POSTPONE used on bootstrap word: " + .int TYPE,TNAME,TYPE,EOL,BAILOUT +1: .int COMMA,EXIT + +defword AHEAD,,F_IMMED + .int LIT,BRANCH,COMMA,CP,FETCH,LIT,0,COMMA,EXIT + +defword IF,,F_IMMED + .int LIT,ZBRANCH,COMMA,CP,FETCH,LIT,0,COMMA,EXIT + +defword THEN,,F_IMMED + .int CP,FETCH,OVER,SUB,SWAP,STORE,EXIT + +defword ELSE,,F_IMMED + .int AHEAD,SWAP,THEN,EXIT + +defword BEGIN,,F_IMMED + .int CP,FETCH,EXIT + +defword AGAIN,,F_IMMED + .int LIT,BRANCH,COMMA,CP,FETCH,SUB,COMMA,EXIT + +defword UNTIL,,F_IMMED + .int LIT,ZBRANCH,COMMA,CP,FETCH,SUB,COMMA,EXIT + +defword WHILE,,F_IMMED + .int IF,SWAP,EXIT + +defword REPEAT,,F_IMMED + .int AGAIN,THEN,EXIT + + .section .data +bootstrap_data_end: + +/* +** End of convenience words +*/ + +/* This is the initial value of the BUILTIN-WORDLIST variable */ +.eqv last_word,REPEAT .section .rodata .align 4 diff --git a/startup.4th b/startup.4th index 91b093d..52c1637 100644 --- a/startup.4th +++ b/startup.4th @@ -1,71 +1,158 @@ -\ Read the next word and return the first character -( "name" -- c ) -: CHAR WORD DROP C@ ; - -\ Some common non-word characters -: HT 9 ; \ Horizontal Tab -: LF 10 ; \ Line Feed (newline) -: CR 13 ; \ Carriage Return -: BL 32 ; \ BLank (space) - -\ Emit an implementation-dependent End-of-Line sequence -: EOL LF EMIT ; - -\ Emit a blank (space) character -: SPACE BL EMIT ; - -\ Separate division and modulus operators -: / /MOD NIP ; -: MOD /MOD DROP ; - \ Get and set the current compilation word list -: GET-CURRENT CURRENT @ ; -: SET-CURRENT CURRENT ! ; +: GET-CURRENT ( -- wid ) CURRENT @ ; +: SET-CURRENT ( wid -- ) CURRENT ! ; \ Get the execution token of the most recent word in the compilation word list -: LATEST GET-CURRENT @ ; +\ If the word list is empty the result will be zero +: LATEST ( -- xt | 0 ) GET-CURRENT @ ; + +\ Shorthand for working with cell-aligned addresses +: CELL+ ( addr1 -- addr2 ) CELL + ; +: CELL- ( addr1 -- addr2 ) CELL - ; +: CELLS ( n1 -- n2 ) CELL * ; +: CELLS+ ( addr1 n -- addr2 ) CELL * + ; +: CELLS- ( addr1 n -- addr2 ) CELL * - ; + +\ Round up to the next cell-aligned address +: ALIGNED ( addr -- a-addr ) + [ CELL 1- ] LITERAL + [ CELL NEGATE ] LITERAL AND ; + +\ Field accessors for execution tokens +: >CFA ( xt -- a-addr ) ; +: >DFA ( xt -- a-addr ) CELL+ ; +: >LINK ( xt -- a-addr ) 2 CELLS+ ; +: >FLAGS ( xt -- c-addr ) 3 CELLS+ ; +: >NAME ( xt -- c-addr u ) >FLAGS DUP 1+ SWAP C@ F_LENMASK AND ; +: >BODY ( xt -- a-addr ) >NAME + ALIGNED ; + +: IMMEDIATE? ( xt -- flag ) >FLAGS C@ F_IMMED AND 0<> ; +: HIDDEN? ( xt -- flag ) >FLAGS C@ F_HIDDEN AND 0<> ; \ Set the latest defined word as immediate \ Note that IMMEDIATE is itself an immediate word -: IMMEDIATE LATEST >FLAGS DUP C@ F_IMMED OR SWAP C! ; IMMEDIATE +: IMMEDIATE ( -- ) LATEST >FLAGS DUP C@ F_IMMED OR SWAP C! ; IMMEDIATE \ Switch from compiling to interpreting, or vice-versa -: [ FALSE STATE ! ; IMMEDIATE -: ] TRUE STATE ! ; IMMEDIATE +: [ ( -- ) IMMEDIATE FALSE STATE ! ; +: ] ( -- ) IMMEDIATE TRUE STATE ! ; -\ COMPILE, appends the effect of the execution token on the top of the stack -\ to the current definition. In this implementation it's equivalent to , since -\ definitions are just arrays of execution tokens. -: COMPILE, , ; +\ Separate the division and modulus operators +: / ( n1 n2 -- n1/n2 ) /MOD NIP ; +: MOD ( n1 n2 -- n1%n2 ) /MOD DROP ; -\ Append the execution semantics of the current definition to the current definition. -: RECURSE LATEST COMPILE, ; IMMEDIATE +\ Names for the standard file descriptor numbers +0 CONSTANT STDIN +1 CONSTANT STDOUT +2 CONSTANT STDERR -\ Append the LIT xt and the topmost word on the stack to the current definition. -\ If POSTPONE were already defined then this could simply be written as: -\ -\ : LITERAL POSTPONE LIT , ; IMMEDIATE -\ -\ ... but since it isn't we must manually insert the LIT sequence to obtain the -\ address of LIT as a literal to be compiled into the current definition. -: LITERAL [ ' LIT COMPILE, ' LIT , ] COMPILE, , ; IMMEDIATE +\ Write one character to FD 1 (stdout) +: EMIT ( c -- "c" ) + SP@ 2DUP C! STDOUT SWAP 1 SYS_WRITE SYSCALL3 2DROP ; -\ Read a word and append its compilation semantics to the current definition. -: POSTPONE ( "name" -- ) IMMEDIATE - WORD FIND-OR-ABORT DROP - \ Would be: DUP IMMEDIATE? IF COMPILE, EXIT THEN - DUP IMMEDIATE? 0BRANCH [ HERE 0 , ] COMPILE, EXIT [ HERE OVER - SWAP ! ] - [ ' LITERAL COMPILE, ' COMPILE, ] LITERAL COMPILE, +\ Write a character array to stdout +\ Repeat write syscall until entire string is written +\ Abandon output on any error other than EINTR +: TYPE ( c-addr u -- "ccc" ) + BEGIN + ?DUP + WHILE + 2DUP STDOUT -ROT SYS_WRITE SYSCALL3 + DUP 0<= IF + ERRNO_EINTR NEGATE <> IF + 2DROP EXIT + THEN + ELSE + \ Decrement the array size and increment the + \ address by the number of bytes written + TUCK - -ROT + SWAP + THEN + REPEAT + DROP ; + +\ Define names for the whitespace characters + 8 CONSTANT HT \ Horizontal Tab +10 CONSTANT LF \ Line Feed (newline) +11 CONSTANT VT \ Vertical Tab +12 CONSTANT FF \ Form Feed +13 CONSTANT CR \ Carriage Return +32 CONSTANT BL \ BLank (space) + +\ Test whether the given character is whitespace (HT, LF, VT, FF, CR, or BL) +\ Note that HT, LF, VT, FF, and CR together form the range 9 ... 13 inclusive +: SPACE? ( c -- flag ) + DUP BL = IF DROP TRUE EXIT THEN + 9 - [ 13 9 - ] LITERAL U<= ; + +\ Emit a blank (space) character +: SPACE ( -- "" ) BL EMIT ; + +\ Emit a horizontal tab character +: TAB ( -- "" ) HT EMIT ; + +\ Emit an implementation-dependent End-of-Line sequence +\ Here this is just a LF (Unix convention) but it could be CR (Mac) or CR+LF (DOS) +: EOL ( -- "" ) LF EMIT ; + +\ Terminate the program, successfully +\ This will never return, even if the system call does +: BYE ( -- ) + BEGIN 0 SYS_EXIT SYSCALL1 DROP AGAIN ; + +\ Terminate the program with a fatal error (SIGABRT) +: FATAL-ERROR ( -- ) + BEGIN + \ A full version would also unmask SIGABRT and restore the default handler + \ For now we assume the mask and handler are already at default settings + SYS_GETPID SYSCALL0 SYS_GETTID SYSCALL0 SIGABRT SYS_TGKILL SYSCALL3 DROP + AGAIN ; + +\ Return the next address in the compilation/data area +: HERE ( -- addr ) CP @ ; + +: ALLOT-BOUNDS "Allocation out of bounds!" TYPE EOL FATAL-ERROR ; +: ALLOT-OOM "Out of memory!" TYPE EOL FATAL-ERROR ; + +\ When growing the data area, round the end address up to a multiple of this size +65536 CONSTANT DATA-SEGMENT-ALIGNMENT + +: ALLOT ( n -- ) + DUP 0< IF + DUP C0 HERE - < IF ALLOT-BOUNDS THEN + ELSE + DUP HERE INVERT U> IF ALLOT-BOUNDS THEN + THEN + HERE + DUP BRK @ U> IF + [ DATA-SEGMENT-ALIGNMENT 1- ] LITERAL 2DUP + SWAP INVERT AND + DUP + SYS_BRK SYSCALL1 + OVER <> IF ALLOT-OOM THEN + BRK ! + THEN + CP ! ; -\ Like CHAR but generates a literal at compile-time. -: [CHAR] CHAR POSTPONE LITERAL ; IMMEDIATE +: , HERE CELL ALLOT ! ; -\ Like ' but generates a literal at compile-time. -: ['] ' POSTPONE LITERAL ; IMMEDIATE +: C, HERE 1 ALLOT C! ; -\ Set the F_HIDDEN flag on the named word: HIDE -: HIDE ' >FLAGS DUP C@ F_HIDDEN OR SWAP C! ; +: ALIGN HERE ALIGNED HERE - BEGIN ?DUP WHILE 0 C, 1- REPEAT ; + +\ Append the effect of the token on top of the stack to the current definition. +\ Here it's equivalent to , since words are just arrays of execution tokens. +\ Once COMPILE, has been defined we can use POSTPONE for non-immediate words. +: COMPILE, ( xt -- ) , ; + +\ Append the LIT xt and the topmost word on the stack to the current definition. +: LITERAL ( Compilation: x -- ) ( Runtime: -- x ) IMMEDIATE + POSTPONE LIT , ; + +\ Append the execution semantics of the current definition to the current definition +: RECURSE ( -- ) IMMEDIATE + LATEST COMPILE, ; + +\ Unhide the current definition so it can refer to itself by name +: RECURSIVE ( -- ) IMMEDIATE + LATEST >FLAGS DUP C@ F_HIDDEN INVERT AND SWAP C! ; \ Our first control-flow primitive: IF {ELSE } THEN \ @@ -78,10 +165,202 @@ \ ELSE inserts an unconditional branch (to THEN) and also resolves the \ previous forward reference (from IF). \ -: IF POSTPONE 0BRANCH HERE 0 , ; IMMEDIATE -: AHEAD POSTPONE BRANCH HERE 0 , ; IMMEDIATE -: THEN HERE OVER - SWAP ! ; IMMEDIATE -: ELSE POSTPONE AHEAD SWAP POSTPONE THEN ; IMMEDIATE +: IF ( C: -- orig ) ( Runtime S: flag -- ) IMMEDIATE + POSTPONE 0BRANCH HERE 0 , ; +: AHEAD ( C: -- orig ) IMMEDIATE + POSTPONE BRANCH HERE 0 , ; +: THEN ( C: orig -- ) IMMEDIATE + HERE OVER - SWAP ! ; +: ELSE ( C: orig1 -- orig2 ) IMMEDIATE + POSTPONE AHEAD SWAP POSTPONE THEN ; + +\ Unbounded loop: BEGIN AGAIN +\ BEGIN places the offset of the start of on the stack. +\ AGAIN creates a relative branch back to the start of . +: BEGIN ( C: -- dest ) IMMEDIATE + HERE ; +: AGAIN ( C: dest -- ) IMMEDIATE + POSTPONE BRANCH HERE - , ; + +\ Simple conditional loop: BEGIN UNTIL +\ UNTIL consumes the top of the stack and branches back to BEGIN if the value was zero. +: UNTIL ( C: dest -- ) ( Runtime S: flag -- ) IMMEDIATE + POSTPONE 0BRANCH HERE - , ; + +\ Alternate conditional loop: BEGIN WHILE REPEAT +: WHILE ( C: dest -- orig dest ) ( Runtime S: flag -- ) IMMEDIATE + POSTPONE IF SWAP ; +: REPEAT ( C: orig dest -- ) IMMEDIATE + POSTPONE AGAIN POSTPONE THEN ; + +\ Range loop: DO LOOP +\ DO +LOOP +: UNLOOP POSTPONE 2RDROP ; IMMEDIATE +: DO POSTPONE 2>R POSTPONE BEGIN ; IMMEDIATE +: (+LOOP) ( step limit index -- flag limit index' ) + ROT + 2DUP = -ROT ; +: +LOOP + POSTPONE 2R> POSTPONE (+LOOP) POSTPONE 2>R + POSTPONE UNTIL POSTPONE 2RDROP +; IMMEDIATE +: LOOP 1 POSTPONE LITERAL POSTPONE +LOOP ; IMMEDIATE + +\ Return the current index value from the innermost or next-innermost loop. +\ The loops must be directly nested with no other changes to the return stack +: I 1 RPICK ; +: J 3 RPICK ; + +\ Sequential equality tests: +\ CASE +\ OF ENDOF +\ OF ENDOF +\ ... +\ ENDCASE +\ +\ When equals execute , when equals execute , etc. +\ During compilation the stack holds a list of forward references to the ENDCASE, +\ with the number of references on top. Inside OF ... ENDOF there is additionally +\ a forward reference to the ENDOF (as with IF ... THEN) above the ENDCASE counter. +\ +\ Begin by creating a counter for the number of unresolved ENDOF forward references +: CASE ( C: -- 0 ) IMMEDIATE + 0 ; +\ At runtime compare the values on the top of the stack; branch to ENDOF if unequal +\ Keep the first value for the next OF if unequal, otherwise consume both +: OF ( C: orign ... orig1 n -- orign ... orig1 n orig-of ) IMMEDIATE + POSTPONE OVER POSTPONE = POSTPONE IF POSTPONE DROP ; +\ Create a forward branch to ENDCASE and resolve the one from OF +: ENDOF ( C: orign ... orig1 n orig-of -- orign ... orig1 orig0 n+1 ) IMMEDIATE + POSTPONE AHEAD -ROT POSTPONE THEN 1+ ; +\ Drop the value in case none of the OF...ENDOF clauses matched +\ Resolve all the forward branches from ENDOF to the location after ENDCASE +: ENDCASE ( C: orign ... orig1 n -- ) IMMEDIATE + POSTPONE DROP 0 DO POSTPONE THEN LOOP ; + +\ If the input buffer is empty, refill it from stdin +\ Return the next character from the input buffer +: KEY + CURRKEY @ + DUP BUFFTOP @ >= IF + DROP BUFFER 0 OVER BUFFER_SIZE SYS_READ SYSCALL3 + DUP 0<= IF + 2DROP + BYE + THEN + OVER + BUFFTOP ! + THEN + DUP 1+ CURRKEY ! C@ +; + +\ Puts the most recently read key back in the input buffer +\ CAUTION: Can only safely be used ONCE after each call to KEY! +\ This takes advantage of the fact that the key is still in the buffer +: PUTBACK CURRKEY @ 1- CURRKEY ! ; + +: SKIPSPACE BEGIN KEY SPACE? INVERT UNTIL PUTBACK ; + +\ Skip whitespace; read and return the next word delimited by whitespace +\ The word is stored in contiguous but *unallocated* data space +\ The delimiting whitespace character is left in the input buffer +: WORD ( "ccc" -- c-addr u ) + SKIPSPACE + HERE + BEGIN + KEY DUP SPACE? 0= + WHILE + C, + REPEAT + DROP + PUTBACK + HERE OVER - + OVER CP ! ; + +: CREATE ( "ccc" -- ) + ALIGN HERE + DODATA , 0 , LATEST , + HERE 0 C, WORD + NIP DUP ALLOT ALIGN SWAP C! + HERE OVER >DFA ! + GET-CURRENT ! +; + +\ Called when a word using DOES> is executed (not compiled) to set +\ the runtime behavior of the most recently defined word +: (DOES) ( dfa -- ) LATEST DODOES OVER >CFA ! >DFA ! ; + +\ Append " (DOES) EXIT" to the current definition +\ where is the next address after the "EXIT" as a literal number +\ Stay in compilation mode for the body of the DOES> clause +: DOES> ( -- ) IMMEDIATE + POSTPONE LIT HERE 0 , POSTPONE (DOES) POSTPONE EXIT + HERE SWAP ! ; + +\ Define a named constant +\ Execution: ( value "name" -- ) +\ name Execution: ( -- value ) +\ +\ By default CREATEd words have codeword DODATA which returns the value +\ of the DFA field, so store the constant value there +\ +\ Alternate definition: +\ : CONSTANT : POSTPONE LITERAL POSTPONE ; ; +: CONSTANT CREATE LATEST >DFA ! ; + +\ Define a single-cell named variable which returns its data address when executed. +\ The initial value is formally undefined. This implementation sets it to zero. +\ Execution: ( "name" -- ) +\ name Execution: ( -- a-addr ) +: VARIABLE CREATE 0 , ; + +\ Define a single-cell named value which returns its data (not address) when executed. +\ Named values defined with VALUE can be modified with TO. +\ Execution: ( x "name" -- ) +\ name execution: ( -- value ) +: VALUE CREATE , DOES> @ ; + +\ Define a threaded FORTH word +\ The word is initially hidden so it can refer to a prior word with the same name +\ The definition is terminated with the ; immediate word, which unhides the name +: : ( "ccc" -- ) + CREATE LATEST + DUP >FLAGS DUP C@ F_HIDDEN OR SWAP C! + DOCOL SWAP >CFA ! + POSTPONE ] ; + +\ Like : but the definition has no name +\ The zero-length name still included in the word list so LATEST can refer to it +\ The execution token is left on the stack for use after the definition ends +: :NONAME ( -- ) + ALIGN HERE + DOCOL , + HERE [ 3 CELLS ] LITERAL + , + LATEST , + F_HIDDEN C, ALIGN + DUP GET-CURRENT ! + POSTPONE ] ; + +\ End a definition by appending EXIT, leaving compilation mode, and unhiding the name +\ As an optimization, zero-length names (from :NONAME) are left hidden +: ; ( -- ) IMMEDIATE + POSTPONE EXIT POSTPONE [ + LATEST >FLAGS DUP C@ + DUP F_LENMASK AND IF + \ Length is not zero; clear the F_HIDDEN flag + F_HIDDEN INVERT AND SWAP C! + ELSE + 2DROP + THEN ; + +\ Create a deferred word +\ At present a deferred word is just an ordinary threaded function +\ DEFER! and IS update which word is called by overwriting the threaded code +\ The explicit EXIT is just a placeholder to be overwritten by DEFER! or IS +\ A future version might use a special codeword with the target in the DFA field +: DEFER ( "ccc" -- ) : POSTPONE EXIT POSTPONE ; ; + +\ Fetch and store the target of the deferred word denoted by deferred-xt +: DEFER@ ( deferred-xt -- xt ) >DFA @ @ ; +: DEFER! ( xt deferred-xt -- ) >DFA @ ! ; \ Inline :NONAME-style function literals. "{ }" has the runtime effect \ of placing the execution token for an anonymous function with the runtime @@ -99,9 +378,9 @@ \ Hello \ \ Compilation effect: ( C: -- latest orig state ) -\ Interpreter effect: ( C: -- latest state ) +\ Interpreter effect: ( S: -- latest state ) \ Enters compilation mode if not already compiling -: { +: { ( -- latest {orig} state ) IMMEDIATE LATEST STATE @ DUP IF @@ -109,12 +388,11 @@ SWAP POSTPONE [ THEN - :NONAME -; IMMEDIATE + :NONAME ; -\ ( C: latest {orig} state -- ) -\ Leave compilation mode if (prior) state was 0 -: } +\ Resolve the forward branch over the inner function +\ Leave compilation mode if STATE was 0 before { was executed +: } ( C: latest {orig} state -- ) IMMEDIATE POSTPONE ; SWAP IF -ROT POSTPONE THEN @@ -123,78 +401,46 @@ POSTPONE ] ELSE SWAP GET-CURRENT ! - THEN -; IMMEDIATE + THEN ; -\ Unbounded loop: BEGIN AGAIN -\ BEGIN places the offset of the start of on the stack. -\ AGAIN creates a relative branch back to the start of . -: BEGIN HERE ; IMMEDIATE -: AGAIN POSTPONE BRANCH HERE - , ; IMMEDIATE +\ Read the next word and return the first character +: CHAR ( "name" -- c ) + WORD DROP C@ ; -\ Conditional loop: BEGIN WHILE REPEAT -: WHILE POSTPONE IF SWAP ; IMMEDIATE -: REPEAT POSTPONE AGAIN POSTPONE THEN ; IMMEDIATE +\ Like CHAR but generates a literal at compile-time. +: [CHAR] ( Compilation: "ccc" -- ) ( Runtime: -- c ) IMMEDIATE + CHAR POSTPONE LITERAL ; -\ Alternate conditional loop: BEGIN UNTIL -\ UNTIL consumes the top of the stack and branches back to BEGIN if the value was zero. -: UNTIL POSTPONE 0BRANCH HERE - , ; IMMEDIATE +\ Return -1, 0, or 1 if n is respectively negative, zero, or positive +: SIGN ( n -- -1 | 0 | 1 ) DUP IF 0< 2 * 1+ THEN ; -\ Range loop: DO LOOP -\ DO +LOOP -: UNLOOP POSTPONE 2RDROP ; IMMEDIATE -: DO POSTPONE 2>R POSTPONE BEGIN ; IMMEDIATE -: (+LOOP) ( step limit index -- flag limit index' ) - ROT + 2DUP <> -ROT ; -: +LOOP - POSTPONE 2R> POSTPONE (+LOOP) POSTPONE 2>R POSTPONE UNTIL POSTPONE 2RDROP -; IMMEDIATE -: LOOP 1 POSTPONE LITERAL POSTPONE +LOOP ; IMMEDIATE - -\ Return the current index value from the innermost or next-innermost loop. -: I RSP@ [ CELL ] LITERAL + @ ; -: J RSP@ [ 3 CELL * ] LITERAL + @ ; - -\ Sequential equality tests: -\ CASE -\ OF ENDOF -\ OF ENDOF -\ ... -\ ENDCASE -\ -\ When equals execute , when equals execute , etc. -\ During compilation the stack holds a list of forward references to the ENDCASE, -\ with the number of references on top. Inside OF ... ENDOF there is additionally -\ a forward reference to the ENDOF (as with IF ... THEN) above the ENDCASE counter. -: CASE 0 ; IMMEDIATE -: OF POSTPONE OVER POSTPONE = POSTPONE IF POSTPONE DROP ; IMMEDIATE -: ENDOF POSTPONE AHEAD -ROT POSTPONE THEN 1+ ; IMMEDIATE -: ENDCASE POSTPONE DROP 0 DO POSTPONE THEN LOOP ; IMMEDIATE - -\ Define a named constant. -\ Execution: ( value "name" -- ) -\ name Execution: ( -- value ) -\ Alternate definition: -\ : CONSTANT : POSTPONE LITERAL POSTPONE ; ; -: CONSTANT CREATE LATEST DODATA OVER >CFA ! >DFA ! ; - -\ Define a single-cell named variable which returns its data address when executed. -\ The initial value is formally undefined. This implementation sets it to zero. -\ Execution: ( "name" -- ) -\ name Execution: ( -- a-addr ) -: VARIABLE CREATE 0 , ; - -\ Define a single-cell named value which returns its data (not address) when executed. -\ Named values defined with VALUE can be modified with TO. -\ Execution: ( x "name" -- ) -\ name execution: ( -- value ) -: VALUE CREATE , DOES> @ ; - -\ When compiling, append code to store to the data field area of the named value. -\ When interpreting, store to the data field directly. -\ An ambiguous condition exists if the name was not created with VALUE. -( x "name" -- ) -: TO ' >BODY STATE @ IF POSTPONE LITERAL POSTPONE ! ELSE ! THEN ; IMMEDIATE +\ Return -1, 0, or 1 if the left string is respectively +\ less than, equal to, or greater than the right string +: COMPARE ( c-addr1 u1 c-addr2 u2 -- -1 | 0 | 1 ) + BEGIN + ROT ?DUP IF + ( S: a1 a2 u2 u1 ) + SWAP ?DUP IF + ( S: a1 a2 u1 u2 ) + 2SWAP 2DUP C@ SWAP C@ - DUP IF + >R 4 NDROP R> SIGN EXIT + ELSE + DROP + ( S: u1 u2 a1 a2 ) + 1+ SWAP 1+ 2SWAP 1- SWAP 1- + ( S: a2' a1' u2' u1' ) + SWAP -ROT 2SWAP + ( S: a1' u1' a2' u2' ) + THEN + ELSE + \ Return 1 since first string is longer + DROP 2DROP 1 EXIT + THEN + ELSE + \ If u2 is also zero return 0; else return -1 since first string is shorter + -ROT 2DROP 0<> EXIT + THEN + AGAIN ; \ Display the signed number at the top of the stack : . ( n -- "" ) @@ -203,7 +449,7 @@ DROP "-2147483648" TYPE ELSE \ Emit the - sign and use absolute value if input is negative - DUP 0 < IF + DUP 0< IF [CHAR] - EMIT NEGATE THEN @@ -220,6 +466,10 @@ THEN ; +\ Field accessors for the search order linked list +: ORDER>LINK ( a-addr1 -- a-addr2 ) ; +: ORDER>WID ( a-addr1 -- a-addr2 ) CELL+ ; + \ When the search order is changed previously allocated entries that are not \ currently needed will be retained on this linked list for later reuse. VARIABLE ORDER-FREELIST @@ -238,7 +488,7 @@ VARIABLE ORDER-FREELIST \ Set the current search order : SET-ORDER ( widn ... wid1 n | -n -- ) - DUP 0 < IF DROP FORTH-WORDLIST 1 THEN + DUP 0< IF DROP FORTH-WORDLIST 1 THEN \ Move all the previous search order entries to the free list CURRENT-ORDER @ BEGIN @@ -262,7 +512,7 @@ VARIABLE ORDER-FREELIST DUP ORDER>LINK @ ORDER-FREELIST ! ELSE \ Allocate a new entry from the data area - ALIGN HERE 2 CELL * ALLOT + ALIGN HERE 2 CELLS ALLOT THEN \ Update the tail pointer with the address of this entry DUP ROT ! @@ -281,19 +531,12 @@ VARIABLE ORDER-FREELIST ALIGN HERE 0 , ; -\ Abstract away the internals of the search order implementation -HIDE CURRENT -HIDE CURRENT-ORDER -HIDE ORDER-FREELIST -HIDE ORDER>WID -HIDE ORDER>LINK - \ Make the first list in the search order the current compilation word list : DEFINITIONS ( -- ) GET-ORDER SWAP SET-CURRENT 1- NDROP ; \ Run a function for each word in the given wordlist -\ xt Execution: ( x*i word-xt -- stop-flag x*j ) -: WITH-WORDLIST ( x*i wid xt -- x*j ) +\ xt Execution: ( i*x word-xt -- stop-flag j*x ) +: WITH-WORDLIST ( i*x wid xt -- j*x ) >R @ BEGIN ?DUP @@ -304,24 +547,26 @@ HIDE ORDER>LINK R> >LINK @ THEN REPEAT - RDROP -; + RDROP ; \ Like WITH-WORDLIST but only runs the function for visible (non-hidden) words : WITH-VISIBLE ( x*i wid xt -- x*j ) SWAP { DUP HIDDEN? IF DROP FALSE ELSE SWAP DUP >R EXECUTE R> SWAP THEN } WITH-WORDLIST DROP ; -\ Display the names of each visible word in the given word list -: SHOW-WORDLIST ( wid -- ) { >NAME TYPE SPACE 0 } WITH-VISIBLE EOL ; +\ Display the name of each visible word in the given word list +: SHOW-WORDLIST ( wid -- ) { >NAME TYPE SPACE FALSE } WITH-VISIBLE EOL ; \ Return the number of visible words in the given word list -: COUNT-WORDLIST ( wid -- n ) 0 SWAP { DROP 1+ 0 } WITH-VISIBLE ; +: COUNT-WORDLIST ( wid -- n ) 0 SWAP { DROP 1+ FALSE } WITH-VISIBLE ; -\ Alternative definition of SEARCH-WORDLIST using WITH-VISIBLE (for demonstration) -: SEARCH-WORDLIST' ( c-addr u wid -- c-addr u 0 | xt 1 | xt -1 ) +\ Look up a name in a word list and return the execution token and immediate flag +\ If the name is not found return the name with the status value 0 +\ If the name is an immediate word return the execution token with status -1 +\ Otherwise return the execution token with status 1 +: SEARCH-WORDLIST ( c-addr u wid -- c-addr u 0 | xt 1 | xt -1 ) 0 SWAP { - >R DROP 2DUP R@ >NAME =S IF + >R DROP 2DUP R@ >NAME COMPARE 0= IF 2DROP R> DUP IMMEDIATE? 1 OR TRUE ELSE RDROP 0 FALSE @@ -329,59 +574,223 @@ HIDE ORDER>LINK } WITH-VISIBLE ; \ Search-Order extension words -: ALSO GET-ORDER >R DUP R> 1+ SET-ORDER ; -: FORTH GET-ORDER NIP FORTH-WORDLIST SWAP SET-ORDER ; -: ONLY -1 SET-ORDER ; -: ORDER GET-ORDER - "ORDER:" TYPE BEGIN ?DUP WHILE 1- SWAP SPACE . REPEAT EOL +: ALSO ( -- ) GET-ORDER >R DUP R> 1+ SET-ORDER ; +: FORTH ( -- ) GET-ORDER NIP FORTH-WORDLIST SWAP SET-ORDER ; +: ONLY ( -- ) -1 SET-ORDER ; +: ORDER ( -- ) + "ORDER:" TYPE GET-ORDER BEGIN ?DUP WHILE 1- SWAP SPACE . REPEAT EOL "CURRENT: " TYPE GET-CURRENT . EOL ; : PREVIOUS ( -- ) GET-ORDER NIP 1- SET-ORDER ; +\ Apply SEARCH-WORDLIST to each word list in the current search order +: FIND ( c-addr u -- c-addr u 0 | xt 1 | xt -1 ) + 2>R GET-ORDER + BEGIN + ?DUP + WHILE + 1- SWAP + 2R> ROT SEARCH-WORDLIST + ?DUP IF 2>R NDROP 2R> EXIT THEN + 2>R + REPEAT + 2R> 0 ; + +\ ABORT needs to be deferred so that it can refer to QUIT and INTERPRET +\ The initial target of FATAL-ERROR terminates the program with SIGABRT +DEFER ABORT ( -- ) +' FATAL-ERROR ' ABORT DEFER! + +\ Same as FIND except that unknown words are reported and result in a call to ABORT +: FIND-OR-ABORT ( c-addr u -- xt 1 | xt -1 ) + FIND ?DUP IF EXIT THEN + "UNKNOWN WORD: " TYPE TYPE EOL ABORT ; + +\ Read a word from the input (during runtime) and return its execution token +\ Aborts if the word is not found in the current (runtime) search order list +: ' ( "ccc" -- xt ) WORD FIND-OR-ABORT DROP ; + +\ Like ' but generates a literal at compile-time. +: ['] ( Compilation: "ccc" -- ) ( Runtime: -- xt ) IMMEDIATE + ' POSTPONE LITERAL ; + +\ Read a word and append its compilation semantics to the current definition. +: POSTPONE ( "name" -- ) IMMEDIATE + WORD FIND-OR-ABORT 0< IF + COMPILE, + ELSE + DUP [ ' BOOTSTRAP? COMPILE, ] IF + "POSTPONE used on non-immediate bootstrap word: " TYPE TYPE EOL + [ ' BAILOUT COMPILE, ] + THEN + POSTPONE LITERAL + POSTPONE COMPILE, + THEN ; + +\ Shorthand for { ' DEFER! } or { ['] DEFER! } depending on STATE +\ If used during compilation, capture the name immediately but set target at runtime +: IS ( Compilation: "ccc" -- ) + ( Runtime: xt -- ) + ( Interpreted: xt "ccc" -- ) + ' STATE @ IF POSTPONE LITERAL POSTPONE DEFER! ELSE DEFER! THEN ; IMMEDIATE + +\ When compiling, append code to store to the data field area of the named value. +\ When interpreting, store to the data field directly. +\ An ambiguous condition exists if the name was not created with VALUE. +: TO ( x "name" -- ) IMMEDIATE + ' >BODY STATE @ IF POSTPONE LITERAL POSTPONE ! ELSE ! THEN ; + +\ Hide the named word: HIDE +: HIDE ( "ccc" -- ) + ' >FLAGS DUP C@ F_HIDDEN OR SWAP C! ; + +\ Hide internal utility functions +HIDE ALLOT-BOUNDS +HIDE ALLOT-OOM +HIDE (DOES) + +\ Abstract away the internals of the search order implementation +HIDE CURRENT +HIDE CURRENT-ORDER +HIDE ORDER-FREELIST +HIDE ORDER>WID +HIDE ORDER>LINK + +\ Read a literal character string up to the next double-quote character +\ Unlike WORD the string is stored in contiguous *allocated* data space +\ The delimiting double-quote character is removed from the input buffer +\ Double-quote and backslash characters can be escaped with a backslash +: READSTRING ( "ccc" -- c-addr u ) + HERE + BEGIN + KEY + DUP [CHAR] \ = IF + DROP KEY TRUE + ELSE + DUP [CHAR] " <> + THEN + WHILE + C, + REPEAT + DROP HERE OVER - +; + +: PARSENUMBER ( c-addr u -- n TRUE | c-addr u FALSE ) + DUP 0= IF FALSE EXIT THEN + 2>R 2R@ DROP C@ [CHAR] - = 0 + ( S: neg-flag accum ) ( R: c-addr u ) + OVER IF R@ 1 = IF 2DROP 2R> FALSE EXIT THEN THEN + OVER 2R@ ROT IF 1- SWAP 1+ SWAP THEN + ( S: neg-flag accum c-addr' u' ) ( R: c-addr u ) + BEGIN ?DUP WHILE + OVER -ROT 2>R C@ [CHAR] 0 - + ( S: neg-flag accum digit ) ( R: c-addr u c-addr' u' ) + DUP 9 U> IF DROP 2DROP 2RDROP 2R> FALSE EXIT THEN + SWAP 10 * + 2R> + ( S: neg-flag accum' c-addr' u' ) ( R: c-addr u ) + 1- SWAP 1+ SWAP + REPEAT + ( S: neg-flag accum c-addr' ) ( R: c-addr u ) + 2RDROP DROP SWAP IF NEGATE THEN + TRUE +; + +\ Read a word, number, or string and either execute it or compile it +\ The stack effect depends on the input and the current value of STATE +: INTERPRET ( i*x "ccc" -- j*x ) + SKIPSPACE + KEY + [CHAR] " = IF + STATE @ IF + POSTPONE LITSTRING + HERE 0 C, + READSTRING NIP SWAP C! ALIGN + ELSE + READSTRING + THEN + ELSE + PUTBACK WORD + PARSENUMBER IF + STATE @ IF + POSTPONE LITERAL + THEN + ELSE + FIND-OR-ABORT + \ -1 => immediate word; execute regardless of STATE + \ 1 => read STATE; compile if true, execute if false + 0< IF EXECUTE EXIT THEN + STATE @ IF COMPILE, EXIT THEN + EXECUTE + THEN + THEN ; + +\ Comments; ignore all characters until the next EOL or ) character, respectively +: \ ( "ccc" -- ) IMMEDIATE BEGIN KEY LF = UNTIL ; +: ( ( "ccc" -- ) IMMEDIATE BEGIN KEY [CHAR] ) = UNTIL ; + +\ Empty the return stack and enter interpretation state +: QUIT ( -- ) R0 RSP! FALSE STATE ! BEGIN INTERPRET AGAIN ; + +\ Redefine ABORT as a non-deferred word; update deferred references to point here +\ Empty the data stack and then perform the function of QUIT without any message +' ABORT +HIDE ABORT +: ABORT ( -- ) S0 SP! QUIT ; +' ABORT SWAP DEFER! + +\ Remove the bootstrap word list from the search order +\ Switch to the interpreter defined in this startup file +FORTH-WORDLIST 1 SET-ORDER +DEFINITIONS +QUIT + +\ ***************************************************************************** +\ Bootstrapping is complete +\ From this point on we only execute threaded FORTH words defined in this file +\ ***************************************************************************** + +\ Return the number of words on the data stack +: DEPTH ( -- n ) SP@ S0 SWAP - CELL / ; + \ Display the content of the data stack -: .DS +: .DS ( -- "" ) SP@ S0 CELL - 2DUP > IF 2DROP EXIT THEN DUP @ . BEGIN CELL - 2DUP > IF 2DROP EXIT THEN DUP @ SPACE . - AGAIN -; + AGAIN ; \ Display the content of the return stack -: .RS +: .RS ( -- "" ) + \ Skip the topmost cell, which is the return address for the call to .RS RSP@ CELL + R0 CELL - 2DUP > IF 2DROP EXIT THEN DUP @ . BEGIN CELL - 2DUP > IF 2DROP EXIT THEN DUP @ SPACE . - AGAIN -; + AGAIN ; -\ Display the content of the data and return stacks on separate lines -: TRACE "DS: " TYPE .DS EOL "RS: " TYPE .RS EOL EOL ; - -\ Return the number of words on the data stack -: DEPTH SP@ S0 SWAP - CELL / ; - -\ Attempt to locate a word whose execution token matches the given address +\ Attempt to locate a visible word whose execution token matches the given address \ If found return the word name and TRUE; otherwise just return FALSE : LOOKUP ( addr -- c-addr u TRUE | FALSE ) - LATEST + >R + GET-ORDER BEGIN - ?DUP IF - \ entry address is not zero - 2DUP = IF - \ entry matches given address - NIP >NAME TRUE EXIT - THEN - \ get next entry address - >LINK @ - ELSE - \ entry address is zero; end of list - DROP FALSE EXIT - THEN - AGAIN -; + ?DUP + WHILE + 1- SWAP R@ FALSE ROT + ( S: widn ... wid1 n addr FALSE wid ) ( R: addr ) + { ( addr FALSE xt -- addr FALSE FALSE | c-addr u TRUE TRUE ) + NIP OVER = IF + >NAME TRUE + ELSE + FALSE + THEN DUP } WITH-VISIBLE + ?DUP IF RDROP EXIT THEN + DROP + REPEAT + RDROP FALSE ; \ Display the top of the stack as a word name if possible, or a number otherwise +\ Words with zero-length names (e.g. from :NONAME) are displayed as numbers : .W ( addr -- "" | "" ) DUP LOOKUP IF TYPE DROP ELSE . THEN ; @@ -391,7 +800,7 @@ HIDE ORDER>LINK BEGIN ?DUP WHILE - SWAP DUP @ .W BL EMIT + SWAP DUP @ .W SPACE CELL + SWAP 1- REPEAT DROP