From d94c274b3391994d9442ef145d32dd95504b8bf7 Mon Sep 17 00:00:00 2001 From: Jesse McDonald Date: Thu, 12 Nov 2020 01:32:39 -0600 Subject: [PATCH] clean up & reorganize --- startup.4th | 463 ++++++++++++++++++++++++-------------------- test/characters.4th | 28 +++ test/characters.exp | 15 ++ test/resize.4th | 2 +- 4 files changed, 299 insertions(+), 209 deletions(-) create mode 100644 test/characters.4th create mode 100644 test/characters.exp diff --git a/startup.4th b/startup.4th index c0e86cc..3c99b2a 100644 --- a/startup.4th +++ b/startup.4th @@ -12,10 +12,24 @@ CREATE SYSTEM 0 , CREATE UTILITY 0 , CREATE LINUX 0 , +\ Use this list until we get around to defining the real GET-ORDER +: STARTUP-ORDER ( -- widn ... wid1 n ) + [ BOOTSTRAP-WORDLIST ] LITERAL + [ LINUX ] LITERAL + [ UTILITY ] LITERAL + [ SYSTEM ] LITERAL + [ FORTH ] LITERAL + 5 ; +' STARTUP-ORDER ' BOOTSTRAP-GET-ORDER DEFER! + \ Get and set the current compilation word list : GET-CURRENT ( -- wid ) CURRENT @ ; : SET-CURRENT ( wid -- ) CURRENT ! ; +\ Get the execution token of the most recent word in the compilation word list +\ If the word list is empty the result will be zero +: LATEST ( -- xt | NULL ) GET-CURRENT @ ; + \ Reserved for "invalid address" or "object not present" \ Signifies (the absence of) a memory address, not a number 0 CONSTANT NULL @@ -39,19 +53,6 @@ CREATE LINUX 0 , \ Round up to the next cell-aligned address : ALIGNED ( addr -- a-addr ) CELL ALIGNED-TO ; -\ Return the next address in the compilation/data area -: HERE ( -- addr ) CP @ ; - -\ Use this list until we get around to defining the real GET-ORDER -: STARTUP-ORDER ( -- widn ... wid1 n ) - [ BOOTSTRAP-WORDLIST ] LITERAL - [ LINUX ] LITERAL - [ UTILITY ] LITERAL - [ SYSTEM ] LITERAL - [ FORTH ] LITERAL - 5 ; -' STARTUP-ORDER ' BOOTSTRAP-GET-ORDER DEFER! - UTILITY SET-CURRENT \ Field accessors for execution tokens @@ -62,31 +63,50 @@ UTILITY SET-CURRENT : >FLAGS ( xt -- c-addr ) [ CELL 1+ ] LITERAL - ; : >NAME ( xt -- c-addr u ) >FLAGS DUP C@ F_LENMASK AND TUCK - SWAP ; -' FORTH >BODY SET-CURRENT - -\ Given the xt of a word defined with VOCABULARY, return the word list identifier -\ This allows the use of vocabulary words with SET-ORDER or SEARCH-WORDLIST -: >WORDLIST ( vocabulary-xt -- wid ) >BODY ; - -' SYSTEM >WORDLIST SET-CURRENT - -\ Set the current compilation word list to the given vocabulary -: (DEFINITIONS) ( vocabulary-xt -- ) >WORDLIST SET-CURRENT ; +\ TRUE if any bits set in u are also set in the flag field; FALSE otherwise +: FLAG? ( xt u -- t=set ) SWAP >FLAGS C@ AND 0<> ; \ Set or clear the HIDDEN flag for word with the given execution token : (HIDE) ( xt -- ) >FLAGS DUP C@ F_HIDDEN OR SWAP C! ; : (UNHIDE) ( xt -- ) >FLAGS DUP C@ F_HIDDEN INVERT AND SWAP C! ; -\ Use GET-CURRENT and SET-CURRENT to manipulate the compilation word list -' CURRENT (HIDE) +' CURRENT (HIDE) \ Use GET-CURRENT and SET-CURRENT +' STARTUP-ORDER (HIDE) \ Only used during early startup -\ This is only used during early startup -' STARTUP-ORDER (HIDE) +\ Set the given xt as the most recent word in the compilation word list +\ Since this replaces the head of a linked list it may affect the entire list, +\ not just the most recent word +: LATEST! ( xt -- ) GET-CURRENT ! ; + +FORTH SET-CURRENT + +\ Set the latest defined word as immediate +\ Note that IMMEDIATE is itself an immediate word +: IMMEDIATE LATEST >FLAGS DUP C@ F_IMMED OR SWAP C! ; IMMEDIATE + +\ Switch from compiling to interpreting, or vice-versa +: [ IMMEDIATE FALSE STATE ! ; +: ] IMMEDIATE TRUE STATE ! ; + +\ Just a visual separator, no compilation or runtime effect +: ▪ IMMEDIATE ; + +\ For documentation, or when an xt with no effect is required +: NO-OP IMMEDIATE ; + +\ Given the xt of a word defined with VOCABULARY, return the word list identifier +\ This allows the use of vocabulary words with SET-ORDER or SEARCH-WORDLIST +: >WORDLIST ( vocabulary-xt -- wid ) >BODY ; + +SYSTEM SET-CURRENT + +\ Set the current compilation word list to the given vocabulary +: (DEFINITIONS) ( vocabulary-xt -- ) >WORDLIST SET-CURRENT ; ' FORTH (DEFINITIONS) -: IMMEDIATE? ( xt -- flag ) >FLAGS C@ F_IMMED AND 0<> ; -: HIDDEN? ( xt -- flag ) >FLAGS C@ F_HIDDEN AND 0<> ; +: IMMEDIATE? ( xt -- flag ) F_IMMED FLAG? ; +: HIDDEN? ( xt -- flag ) F_HIDDEN FLAG? ; \ Decrement the array size and increment the address by the same amount : /STRING ( c-addr u1 u2 -- c-addr+u2 u1-u2 ) TUCK - -ROT + SWAP ; @@ -144,7 +164,7 @@ DEFER QUIT \ For use after CATCH; like THROW but doesn't change the string \ Also, if n is EXCP-QUIT then invokes QUIT for special handling (keeps data stack) : RETHROW ( k*x n -- k*x | i*x n ) - ?DUP IF DUP EXCP-QUIT = IF QUIT THEN THROW-UNWIND THEN ; + ?DUP IF ▪ DUP EXCP-QUIT = IF QUIT THEN ▪ THROW-UNWIND ▪ THEN ; \ THROW while storing a string for context : THROW-STRING ( k*x n c-addr u -- k*x | i*x n ) 2>R ?DUP IF 2R> THROWN-STRING 2! THROW-UNWIND ELSE 2RDROP THEN ; @@ -168,44 +188,20 @@ DEFER QUIT : …TODO… "not implemented" FAIL ; -\ Get the execution token of the most recent word in the compilation word list -\ If the word list is empty the result will be zero -: LATEST ( -- xt | NULL ) GET-CURRENT @ ; - -' UTILITY (DEFINITIONS) - -\ Set the given xt as the most recent word in the compilation word list -\ Since this replaces the head of a linked list it may affect the entire list, -\ not just the most recent word -: LATEST! ( xt -- ) GET-CURRENT ! ; - -' FORTH (DEFINITIONS) - -\ Set the latest defined word as immediate -\ Note that IMMEDIATE is itself an immediate word -: IMMEDIATE LATEST >FLAGS DUP C@ F_IMMED OR SWAP C! ; IMMEDIATE - -\ Switch from compiling to interpreting, or vice-versa -: [ IMMEDIATE FALSE STATE ! ; -: ] IMMEDIATE TRUE STATE ! ; - -\ Just a visual separator, no compilation or runtime effect -: ▪ IMMEDIATE ; - \ Returns the least power of two greater than or equal to u1 -: NATURALLY-ALIGNED ( u1 -- u2 ) +: POW2-ALIGNED ( u1 -- u2 ) 1- ▪ DUP U2/ OR ▪ DUP 2 RSHIFT OR ▪ DUP 4 RSHIFT OR ▪ DUP 8 RSHIFT OR ▪ DUP 16 RSHIFT OR ▪ 1+ ; ' UTILITY (DEFINITIONS) -: DEFERRED? ( xt -- ) >CFA @ ▪ DODEFER <> ▪ EXCP-TYPE-MISMATCH AND ▪ THROW ; +: ?DEFERRED ( xt -- ) >CFA @ ▪ DODEFER <> ▪ EXCP-TYPE-MISMATCH AND ▪ THROW ; ' FORTH (DEFINITIONS) \ Fetch and store the target of the deferred word denoted by deferred-xt -: DEFER@ ( deferred-xt -- xt ) DUP DEFERRED? ▪ >DFA @ ; -: DEFER! ( xt deferred-xt -- ) DUP DEFERRED? ▪ >DFA ! ; +: DEFER@ ( deferred-xt -- xt ) DUP ?DEFERRED ▪ >DFA @ ; +: DEFER! ( xt deferred-xt -- ) DUP ?DEFERRED ▪ >DFA ! ; ' LINUX (DEFINITIONS) @@ -673,7 +669,7 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE : SYSCALL5-RETRY [ ' SYSCALL5 ] LITERAL 5 SYSCALL-RETRY ; : SYSCALL6-RETRY [ ' SYSCALL6 ] LITERAL 6 SYSCALL-RETRY ; -' FORTH (DEFINITIONS) +CREATE OUTPUT-FD STDOUT , \ Write a character array to the given file descriptor \ Repeat write syscall until entire string is written @@ -683,11 +679,18 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE 2DUP R@ -ROT SYS_WRITE SYSCALL3-RETRY DUP 0<= IF 2DROP RDROP EXIT THEN /STRING REPEAT ▪ DROP RDROP ; -\ Specializations for output to stdout and stderr -: TYPE ( c-addr u -- "ccc" ) STDOUT TYPE-FD ; -: TYPE-ERR ( c-addr u -- stderr: "ccc" ) STDERR TYPE-FD ; +' FORTH (DEFINITIONS) -\ Write one character to FD 1 (stdout) +: >STDOUT STDOUT OUTPUT-FD ! ; +: >STDERR STDERR OUTPUT-FD ! ; + +: SAVE-OUTPUT ( -- u*x u ) OUTPUT-FD @ 1 ; +: RESTORE-OUTPUT ( u*x u -- ) OVER OUTPUT-FD ! NDROP ; + +\ Write a character array to the current output file descriptor +: TYPE ( c-addr u -- "ccc" ) OUTPUT-FD @ TYPE-FD ; + +\ Write one character to the current output file descriptor : EMIT ( c -- "c" ) SP@ 2DUP C! 1 TYPE DROP ; \ Convert from a double-cell signed number to a single-cell signed number @@ -739,45 +742,6 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE : D<=> ( d1 d2 -- -1 | 0 | 1 ) 2OVER 2OVER D> >R D< R> - ; : DU<=> ( ud1 ud2 -- -1 | 0 | 1 ) 2OVER 2OVER DU> >R DU< R> - ; -\ Define names for the whitespace characters - 8 CONSTANT HT \ Horizontal Tab -10 CONSTANT LF \ Line Feed (newline) -11 CONSTANT VT \ Vertical Tab -12 CONSTANT FF \ Form Feed -13 CONSTANT CR \ Carriage Return -32 CONSTANT BL \ BLank (space) - -\ Test whether the given character is whitespace (HT, LF, VT, FF, CR, or BL) -\ Note that HT, LF, VT, FF, and CR together form the range 9 ... 13 inclusive -: SPACE? ( c -- flag ) - DUP BL = IF DROP TRUE EXIT THEN - 9 - [ 13 9 - ] LITERAL U<= ; - -\ Emit a blank (space) character -: SPACE ( -- "" ) BL EMIT ; - -\ Emit a horizontal tab character -: TAB ( -- "" ) HT EMIT ; - -' UTILITY (DEFINITIONS) - -\ The implementation-dependent End-of-Line string -\ Here this is just a LF (Unix convention) but it could be CR (Mac) or CR+LF (DOS) -: (EOL) ( -- c-addr u ) "\n" ; - -' FORTH (DEFINITIONS) - -\ Emit the implementation-dependent End-of-Line string -: EOL ( -- "" ) (EOL) TYPE ; - -\ Emit n blank (space) characters -: SPACES ( n -- "" ) BEGIN ?DUP WHILE 1- SPACE REPEAT ; - -\ Terminate the program, successfully -\ This will never return, even if the system call does -: BYE ( -- ) - BEGIN 0 SYS_EXIT SYSCALL1 DROP AGAIN ; - ' SYSTEM (DEFINITIONS) \ When growing the data area, round the end address up to a multiple of this size @@ -785,6 +749,9 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE ' FORTH (DEFINITIONS) +\ Return the next address in the compilation/data area +: HERE ( -- addr ) CP @ ; + \ Allocate n consecutive bytes from the end of the data area \ If necessary use the brk system call to grow the data area \ The value n can be negative to release the most recently allocated space @@ -859,6 +826,63 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE : NULL= ( addr -- flag ) IMMEDIATE STATE @ IF POSTPONE 0= ELSE 0= THEN ; : NULL<> ( addr -- flag ) IMMEDIATE STATE @ IF POSTPONE 0<> ELSE 0<> THEN ; +\ Define names for the whitespace characters + 9 CONSTANT HT \ Horizontal Tab +10 CONSTANT LF \ Line Feed (newline) +11 CONSTANT VT \ Vertical Tab +12 CONSTANT FF \ Form Feed +13 CONSTANT CR \ Carriage Return +32 CONSTANT BL \ BLank (space) + +\ ( ch -- flag ) Test whether a character belongs to a given ASCII character class +\ Note that HT, LF, VT, FF, and CR together form the range 9 ... 13 inclusive +: ASCII? 127 U<= ; +: BLANK? DUP BL = ?DUP IF NIP ELSE HT = THEN ; +: CNTRL? DUP 32 U< ?DUP IF NIP ELSE 127 = THEN ; +: DIGIT? [[ CHAR 0 ]] [[ CHAR 9 1+ ]] WITHIN ; +: GRAPH? 33 127 WITHIN ; +: LOWER? [[ CHAR a ]] [[ CHAR z 1+ ]] WITHIN ; +: UPPER? [[ CHAR A ]] [[ CHAR Z 1+ ]] WITHIN ; +: PRINT? 32 127 WITHIN ; +: SPACE? DUP BL = ?DUP IF NIP ELSE 9 - [[ 13 9 - ]] U<= THEN ; +: ALPHA? DUP UPPER? ?DUP IF NIP ELSE LOWER? THEN ; +: ALNUM? DUP ALPHA? ?DUP IF NIP ELSE DIGIT? THEN ; +: PUNCT? DUP PRINT? ?0DUP IF DUP SPACE? ?DUP IF NIP + ELSE ALNUM? THEN 0= ELSE NIP THEN ; + +\ Convert a character to lowercase or uppercase, respectively +: TO-LOWER ( ch1 -- ch2 ) DUP UPPER? IF [[ CHAR a CHAR A - ]] + THEN ; +: TO-UPPER ( ch1 -- ch2 ) DUP LOWER? IF [[ CHAR a CHAR A - ]] - THEN ; + +: XDIGIT? ( ch -- t=hex-digit ) + DUP DIGIT? ?DUP IF NIP ELSE TO-UPPER [[ CHAR A ]] [[ CHAR F 1+ ]] WITHIN THEN ; + +\ Emit a blank (space) character +: SPACE ( -- "" ) BL EMIT ; + +\ Emit a horizontal tab character +: TAB ( -- "" ) HT EMIT ; + +' UTILITY (DEFINITIONS) + +\ The implementation-dependent End-of-Line string +\ Here this is just a LF (Unix convention) but it could be CR (Mac) or CR+LF (DOS) +: (EOL) ( -- c-addr u ) "\n" ; + +' FORTH (DEFINITIONS) + +\ Emit the implementation-dependent End-of-Line string +: EOL ( -- "" ) (EOL) TYPE ; + +\ Emit n blank (space) characters +: SPACES ( n -- "" ) + BEGIN ?DUP WHILE DUP " " ROT UMIN DUP -ROT TYPE - REPEAT ; + +\ Terminate the program, successfully +\ This will never return, even if the system call does +: BYE ( -- ) + BEGIN 0 SYS_EXIT SYSCALL1 DROP AGAIN ; + \ Our first control-flow primitive: IF {ELSE } THEN \ \ IF compiles an unresolved conditional branch. @@ -915,24 +939,24 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE POSTPONE ALWAYS ▪ HERE ; : AGAIN ( C: orig dest -- ) IMMEDIATE POSTPONE BRANCH ▪ HERE - , ▪ POSTPONE THEN ; -: UNTIL ( C: orig dest -- ) ( Runtime: flag -- ) IMMEDIATE +: UNTIL ( C: orig dest -- ; flag -- ) IMMEDIATE POSTPONE 0BRANCH HERE - , POSTPONE THEN ; -: WHILE ( C: orig1 dest -- orig2 dest ) ( Runtime: flag -- ) IMMEDIATE +: WHILE ( C: orig1 dest -- orig2 dest ; flag -- ) IMMEDIATE SWAP POSTPONE AND-IF SWAP ; : REPEAT ( C: orig dest -- ) IMMEDIATE POSTPONE AGAIN ; \ Sequential equality tests: +\ \ CASE \ OF ENDOF \ OF ENDOF -\ ... +\ OF? ENDOF +\ … +\ \ ENDCASE \ \ When equals execute , when equals execute , etc. -\ During compilation the stack holds a list of forward references to the ENDCASE, -\ with the number of references on top. Inside OF ... ENDOF there is additionally -\ a forward reference to the ENDOF (as with IF ... THEN) above the ENDCASE counter. \ \ Begin by creating a placeholder for the unresolved ENDOF forward references : CASE ( C: -- null-orig ) IMMEDIATE @@ -961,12 +985,12 @@ __O_TMPFILE O_DIRECTORY OR ⇒ O_TMPFILE \ ?DO +LOOP CREATE LEAVE-ORIG NULL , : DO ( C: -- outer-stack dest ; limit index -- R: -- limit index ) IMMEDIATE - POSTPONE 2>R LEAVE-ORIG @ - POSTPONE ALWAYS LEAVE-ORIG ! + POSTPONE 2>R + POSTPONE ALWAYS LEAVE-ORIG XCHG POSTPONE BEGIN ; : ?DO ( C: -- outer-stack dest ; limit index -- R: -- limit index ) IMMEDIATE - POSTPONE 2>R LEAVE-ORIG @ - POSTPONE 2R@ POSTPONE <> POSTPONE IF LEAVE-ORIG ! + POSTPONE 2>R + POSTPONE 2R@ POSTPONE <> POSTPONE IF LEAVE-ORIG XCHG POSTPONE BEGIN ; : LEAVE ( C: -- ; -- R: limit index -- ) IMMEDIATE LEAVE-ORIG @ POSTPONE ONWARD-AHEAD LEAVE-ORIG ! ; @@ -974,15 +998,15 @@ CREATE LEAVE-ORIG NULL , POSTPONE 2RDROP ; : +LOOP ( C: outer-stack dest -- ; n -- R: {limit index} -- ) IMMEDIATE POSTPONE RSP@ POSTPONE +! POSTPONE 2R@ POSTPONE = POSTPONE UNTIL - LEAVE-ORIG @ POSTPONE THEN POSTPONE UNLOOP LEAVE-ORIG ! ; + LEAVE-ORIG XCHG POSTPONE THEN POSTPONE UNLOOP ; : LOOP ( C: outer-stack dest -- ; -- R: {limit index} -- ) IMMEDIATE 1 POSTPONE LITERAL POSTPONE +LOOP ; ' LEAVE-ORIG (HIDE) \ Return the current index value from the innermost or next-innermost loop. \ The loops must be directly nested with no other changes to the return stack -: I 1 RPICK ; -: J 3 RPICK ; +: I IMMEDIATE POSTPONE R@ ; +: J IMMEDIATE POSTPONE L0@ ; ' SYSTEM (DEFINITIONS) @@ -1049,13 +1073,13 @@ CREATE PNO-POINTER PNO-BUFFER-END , \ Return the number of words on the data and return stacks, respectively : DEPTH ( -- n ) SP@ S0 SWAP - CELL / ; -: RDEPTH ( -- n ) RSP@ CELL+ R0 SWAP - CELL / ; +: RDEPTH ( -- n ) R0 RSP@ CELL+ - CELL / ; ' SYSTEM (DEFINITIONS) : STARTUP-UNWIND ( k*x n -- i*x ) - "Exception " TYPE-ERR DUP ABS 0 <# (EOL) HOLDS #S ROT SIGN #> TYPE-ERR - THROWN-STRING 2@ DUP IF TYPE-ERR "\n" TYPE-ERR THEN + >STDERR "Exception " TYPE DUP ABS 0 <# (EOL) HOLDS #S ROT SIGN #> TYPE + THROWN-STRING 2@ DUP IF TYPE EOL THEN [ ' BAILOUT COMPILE, ] ; ' STARTUP-UNWIND ' THROW-UNWIND DEFER! @@ -1064,6 +1088,10 @@ CREATE PNO-POINTER PNO-BUFFER-END , CREATE DISPLAY-ITEM-LIMIT 6 , +\ Display u1 in hexadecimal zero-padded to at least u2 digits +: .HEXPAD ( u1 u2 -- "" ) + >R 0 <# 16 #SB [[ CHAR 0 ]] R> #PAD #> TYPE ; + ' FORTH (DEFINITIONS) \ Display the content of the data stack @@ -1084,34 +1112,32 @@ CREATE DISPLAY-ITEM-LIMIT 6 , \ Display a block of memory : DUMP ( addr u -- ) OVER + 16 ALIGNED-TO ▪ SWAP TUCK -16 AND ▪ ?DO - I 0 <# ":" HOLDS 16 #SB [[ CHAR 0 ]] 9 #PAD #> TYPE + I 8 .HEXPAD ":" TYPE I 16 + I ?DO - DUP I C@ 0 <# 16 #SB [[ CHAR 0 ]] 2 #PAD - ROT I = IF [[ CHAR > ]] ELSE BL THEN HOLD BL HOLD #> TYPE - LOOP ▪ EOL + I 3 AND 0= IF SPACE THEN + DUP I = IF "»" TYPE ELSE BL EMIT THEN + I C@ 2 .HEXPAD + LOOP ▪ " >" TYPE + I 16 + I ?DO + I C@ DUP PRINT? IF EMIT ELSE DROP "•" TYPE THEN + LOOP ▪ "<\n" TYPE 16 +LOOP ▪ DROP ; \ Remove trailing whitespace from a string (only affects length) : -TRAILING ( c-addr u1 -- c-addr u2 ) BEGIN DUP WHILE 2DUP 1- + C@ SPACE? WHILE 1- REPEAT ; -\ Convert a character to lowercase or uppercase, respectively -: TO-LOWER ( ch1 -- ch2 ) - DUP [[ CHAR A ]] [[ CHAR Z 1+ ]] WITHIN IF [[ CHAR a CHAR A - ]] + THEN ; -: TO-UPPER ( ch1 -- ch2 ) - DUP [[ CHAR a ]] [[ CHAR z 1+ ]] WITHIN IF [[ CHAR a CHAR A - ]] - THEN ; - \ If ch is an alphanumeric character return the value in range [0, 36) and TRUE \ Otherwise just return FALSE : >DIGIT ( ch -- u TRUE | FALSE ) - DUP [[ CHAR 0 ]] [[ CHAR 9 1+ ]] WITHIN IF [[ CHAR 0 ]] - TRUE EXIT THEN + DUP [[ CHAR 0 ]] [[ CHAR 9 1+ ]] WITHIN IF [[ CHAR 0 ]] - TRUE EXIT THEN DUP [[ CHAR A ]] [[ CHAR Z 1+ ]] WITHIN IF [[ CHAR A 10 - ]] - TRUE EXIT THEN DUP [[ CHAR a ]] [[ CHAR z 1+ ]] WITHIN IF [[ CHAR a 10 - ]] - TRUE EXIT THEN DROP FALSE ; \ Like >DIGIT but only returns TRUE if ch is a valid digit for the given base : >DIGIT-BASE ( ch base -- u TRUE | FALSE ) - SWAP >DIGIT ?0DUP IF SWAP 2DUP U< DUP 0= IF NIP THEN THEN NIP ; + SWAP >DIGIT ?0DUP IF SWAP 2DUP U< DUP 0= AND-IF NIP THEN NIP ; \ Convert a string in the given base to an unsigned double-cell number \ Stop at the end of the string or when the next character is not valid for the base @@ -1138,14 +1164,21 @@ CREATE DISPLAY-ITEM-LIMIT 6 , THEN >NUMBER-BASE ; +: >SIGNED-NUMBER ( c-addr1 u1 -- d c-addr2 u2 ) + 2DUP 2>R + DUP IF OVER C@ [[ CHAR - ]] = DUP >R AND-IF 1/STRING THEN + DUP IF OVER C@ DIGIT? AND-IF + R> 2RDROP >R >NUMBER R> IF 2>R DNEGATE 2R> THEN + ELSE + RDROP 2DROP 0 0 2R> + THEN ; + ' UTILITY (DEFINITIONS) \ Parse a signed number; to succeed the entire input string must be consumed +\ The number may be single-cell (default) or double-cell (with a # suffix) : PARSENUMBER ( c-addr u -- 0 | n 1 | d 2 ) - DUP 0= IF NIP EXIT THEN ▪ OVER C@ [[ CHAR - ]] = >R - R@ IF 1/STRING DUP 0= IF RDROP NIP EXIT THEN THEN - >NUMBER - R> IF 2SWAP DNEGATE 2SWAP THEN + >SIGNED-NUMBER DUP 1 = IF OVER C@ [[ CHAR # ]] = AND-IF 2DROP 2 ELSE NIP 0= IF @@ -1203,7 +1236,7 @@ CREATE EXCEPTION-STACK NULL , : CATCH ( i*x xt -- j*x 0 | i*x n ) \ Save the original RSP to quickly remove the exception frame data on success RSP@ - \ Save the stack poiner but don't include the xt and RSP on the top + \ Save the stack pointer but don't include the xt and RSP on the top SP@ 2 CELLS+ >R \ Save the input source specification SAVE-INPUT N>R ▪ SOURCE 2>R ▪ SOURCE-ID >R @@ -1240,7 +1273,7 @@ DEFER REFILL BEGIN ▪ PARSE-EMPTY? 0= WHILE ▪ PEEK-CHAR SPACE? WHILE ▪ SKIP-CHAR ▪ REPEAT ; : ENSURE-NONEMPTY ( -- f=eof ) - BEGIN PARSE-EMPTY? ?0DUP WHILE REFILL 0= ?DUP UNTIL 0= ; + BEGIN ▪ PARSE-EMPTY? ?0DUP WHILE ▪ REFILL ▪ 0= ?DUP UNTIL ▪ 0= ; ' FORTH (DEFINITIONS) @@ -1336,10 +1369,13 @@ DEFER REFILL : (2CONSTANT) ( x1 x2 -- ) (CREATE) ▪ HERE ▪ 2 CELLS ALLOT ▪ 2! DOES> ( -- x1 x2 ) 2@ ; +\ Same for string constants +: (SCONSTANT) ( c-addr1 u -- ; c-addr2 u ) (:) ▪ POSTPONE SLITERAL ▪ POSTPONE ; ▪ ; + \ Define a named variables which return the address of their data when executed. -\ The initial value is formally undefined. This implementation sets it to NULL. -: (VARIABLE) ( c-addr u -- ; -- a-addr ) (CREATE) NULL , ; -: (2VARIABLE) ( c-addr u -- ; -- a-addr ) (CREATE) [ NULL NULL ] 2LITERAL 2, ; +\ The initial value is formally undefined. This implementation sets it to zero. +: (VARIABLE) ( c-addr u -- ; -- a-addr ) (CREATE) 0 , ; +: (2VARIABLE) ( c-addr u -- ; -- a-addr ) (CREATE) 0# 2, ; \ Define a single-cell named value which returns its data (not address) when executed. \ Named values defined with VALUE can be modified with TO. @@ -1368,6 +1404,7 @@ DEFER REFILL : ALIAS PARSE-NAME (ALIAS) ; : CONSTANT PARSE-NAME (CONSTANT) ; : 2CONSTANT PARSE-NAME (2CONSTANT) ; +: SCONSTANT PARSE-NAME (SCONSTANT) ; : VARIABLE PARSE-NAME (VARIABLE) ; : 2VARIABLE PARSE-NAME (2VARIABLE) ; : VALUE PARSE-NAME (VALUE) ; @@ -1416,19 +1453,17 @@ DEFER REFILL \ Each field word has runtime effect ( struct-addr -- field-addr) \ If field offset is zero then the word is marked as immediate and generates no code : FIELD ( align1 offset1 field-align field-bytes -- align2 offset2 ) - : -ROT NATURALLY-ALIGNED ▪ DUP >R ▪ ALIGNED-TO DUP + : -ROT POW2-ALIGNED ▪ DUP >R ▪ ALIGNED-TO DUP ?DUP IF POSTPONE LITERAL POSTPONE + ELSE POSTPONE IMMEDIATE THEN POSTPONE ; ▪ + SWAP R> UMAX SWAP ; \ Consume the final alignment and offset and define a type descriptor for the struct : ENDSTRUCT ( align offset "name" -- ) - SWAP NATURALLY-ALIGNED TUCK ALIGNED-TO 2CONSTANT ; + SWAP POW2-ALIGNED TUCK ALIGNED-TO 2CONSTANT ; \ Accessors for type descriptors -: %SIZEOF ( align size -- size ) IMMEDIATE - STATE @ IF POSTPONE NIP ELSE NIP THEN ; -: %ALIGNOF ( align size -- align ) IMMEDIATE - STATE @ IF POSTPONE DROP ELSE DROP THEN ; +: %SIZEOF ( align size -- size ) NIP ; +: %ALIGNOF ( align size -- align ) DROP ; \ Inline :NONAME-style function literals. "{ }" has the runtime effect \ of placing the execution token for an anonymous function with the runtime @@ -1493,6 +1528,7 @@ DEFER REFILL 2R@ "[IF]" COMPARE 0= IF 1+ ELSE 2R@ "[THEN]" COMPARE 0<> IF 2R@ "[ELSE]" COMPARE 0<> AND-IF + NO-OP ELSE ?DUP 0= IF 2RDROP EXIT THEN 1- @@ -1619,7 +1655,7 @@ VARIABLE TOTAL DUP -4095 U>= IF EXCP-HEAP-OVERFLOW THROW THEN ; : MMAP-ALLOCATE-ALIGNED ( size -- a-addr ) - NATURALLY-ALIGNED + POW2-ALIGNED DUP 2* MMAP-ALLOCATE SWAP ( S: addr size ) 2DUP ALIGNED-TO @@ -1655,7 +1691,7 @@ ENDSTRUCT MEMBLOCK% MEMBLOCK-DATA-OFFSET + DUP BUDDY-MAX-BYTES U> IF PAGESIZE ALIGNED-TO DUP MMAP-ALLOCATE ELSE - NATURALLY-ALIGNED DUP BUDDY-MIN-BYTES U< IF DROP BUDDY-MIN-BYTES THEN + POW2-ALIGNED DUP BUDDY-MIN-BYTES U< IF DROP BUDDY-MIN-BYTES THEN BUDDY-ORDER-FROM-BYTES DUP [[ ' BUDDY-ALLOCATE ]] CATCH ?DUP IF DUP EXCP-HEAP-OVERFLOW <> IF RETHROW THEN DROP BUDDY-MAX-BYTES MMAP-ALLOCATE-ALIGNED BUDDY-ORDERS 1- BUDDY-FREE @@ -1717,6 +1753,8 @@ ENDSTRUCT MEMBLOCK% : DUPLICATE ( c-addr u -- obj-addr u ) DUP ALLOCATE >R >R 2R@ CMOVE 2R> ; +: BUFFER: ( u-bytes "name" -- ) ALLOCATE CONSTANT ; + ' SYSTEM (DEFINITIONS) \ Execute the closure captured at a-addr @@ -1935,12 +1973,26 @@ ROOT DEFINITIONS ' VOCABULARY ALIAS VOCABULARY ' >WORDLIST ALIAS >WORDLIST +UTILITY DEFINITIONS + +\ TRUE if x is equal to one of the u cells starting at a-addr; FALSE otherwise +: ELEMENT? ( x a-addr u -- flag ) + 0 ?DO 2DUP I CELLS+ @ = ?DUP IF 2DROP UNLOOP EXIT THEN LOOP 2DROP FALSE ; + +: REMOVE-DUPLICATES ( xu ... x1 u -- xu' ... x1' u' ) + DUP ▪ BEGIN ▪ DUP WHILE + DUP 1+ PICK ▪ SP@ 3 CELLS+ ▪ 2 PICK 1- ▪ ELEMENT? IF + DUP 1+ ROLL DROP + SWAP 1- SWAP + THEN ▪ 1- + REPEAT ▪ DROP ; + FORTH DEFINITIONS \ Apply SEARCH-WORDLIST to each word list in the current search order : FIND ( c-addr u -- c-addr u 0 | xt 1 | xt -1 ) FIND-HOOK ?DUP IF EXIT THEN - 2>R GET-ORDER + 2>R GET-ORDER REMOVE-DUPLICATES BEGIN ?DUP WHILE @@ -1998,13 +2050,6 @@ FORTH DEFINITIONS : :FINALIZE ( "name" -- ) : LATEST DUP >NAME FIND-OR-THROW DROP DUP (HIDE) DEFER! ; -\ Produce the size or alignment of the subsequent type descriptor as a literal -\ The type-name should be a word like CELL% returning an alignment and a size -: SIZEOF ( "" -- size ) IMMEDIATE - ' EXECUTE %SIZEOF STATE @ IF POSTPONE LITERAL THEN ; -: ALIGNOF ( "" -- size ) IMMEDIATE - ' EXECUTE %ALIGNOF STATE @ IF POSTPONE LITERAL THEN ; - \ Produce the offset of the subsequent field name as a literal \ The field-name should be a word which adds a field offset to a given address : OFFSETOF ( "" -- ; -- offset ) IMMEDIATE @@ -2029,7 +2074,7 @@ SYSTEM DEFINITIONS \ The size of this buffer will determine the maximum line length 4096 CONSTANT TERMINAL-BUFFER-BYTES -TERMINAL-BUFFER-BYTES ALLOCATE CONSTANT TERMINAL-BUFFER +TERMINAL-BUFFER-BYTES BUFFER: TERMINAL-BUFFER \ If we read more than one line then these will refer to the rest of the data 2VARIABLE TIB-LEFTOVER @@ -2156,11 +2201,7 @@ SYSTEM DEFINITIONS PEEK-CHAR [[ CHAR " ]] = IF SKIP-CHAR READSTRING - STATE @ IF - POSTPONE LITSTRING - 255 UMIN DUP C, - HERE SWAP DUP ALLOT CMOVE ALIGN - THEN + STATE @ IF POSTPONE SLITERAL THEN ELSE PARSE-NAME 2>R 2R@ PARSENUMBER ?DUP IF @@ -2213,7 +2254,7 @@ SYSTEM DEFINITIONS FORTH DEFINITIONS : TTY? ( fd -- flag ) - IOCTL_TCGETS termios% %ALLOCA SYS_IOCTL SYSCALL3 UNALLOCA 0= ; + IOCTL_TCGETS termios% %ALLOCA SYS_IOCTL SYSCALL3-RETRY UNALLOCA 0= ; STDIN TTY? CONSTANT INTERACTIVE? @@ -2261,35 +2302,36 @@ UTILITY DEFINITIONS DUP REPORTER IF NIP 2@ DROP EXECUTE ELSE - DROP ▪ "Uncaught exception: " TYPE-ERR - DUP ▪ ABS 0 <# #S ROT SIGN #> ▪ TYPE-ERR EOL ; + DROP ▪ "Uncaught exception: " TYPE + DUP ▪ ABS 0 <# #S ROT SIGN #> ▪ TYPE EOL ; THEN ; SYSTEM DEFINITIONS { ( no message for ABORT ) } EXCP-ABORT REPORTER! -{ THROWN-STRING 2@ TYPE-ERR "\n" TYPE-ERR } EXCP-FAIL REPORTER! -{ "Stack overflow\n" TYPE-ERR } EXCP-STACK-OVERFLOW REPORTER! -{ "Stack underflow\n" TYPE-ERR } EXCP-STACK-UNDERFLOW REPORTER! -{ "Return stack overflow\n" TYPE-ERR } EXCP-RETURN-OVERFLOW REPORTER! -{ "Return stack underflow\n" TYPE-ERR } EXCP-RETURN-UNDERFLOW REPORTER! -{ "Dictionary overflow\n" TYPE-ERR } EXCP-DICTIONARY-OVERFLOW REPORTER! -{ "Invalid memory address\n" TYPE-ERR } EXCP-INVALID-ADDRESS REPORTER! -{ "Argument type mismatch\n" TYPE-ERR } EXCP-TYPE-MISMATCH REPORTER! -{ "Undefined word: " TYPE-ERR THROWN-STRING 2@ TYPE-ERR "\n" TYPE-ERR } +{ THROWN-STRING 2@ TYPE EOL } EXCP-FAIL REPORTER! +{ "Stack overflow\n" TYPE } EXCP-STACK-OVERFLOW REPORTER! +{ "Stack underflow\n" TYPE } EXCP-STACK-UNDERFLOW REPORTER! +{ "Return stack overflow\n" TYPE } EXCP-RETURN-OVERFLOW REPORTER! +{ "Return stack underflow\n" TYPE } EXCP-RETURN-UNDERFLOW REPORTER! +{ "Dictionary overflow\n" TYPE } EXCP-DICTIONARY-OVERFLOW REPORTER! +{ "Invalid memory address\n" TYPE } EXCP-INVALID-ADDRESS REPORTER! +{ "Argument type mismatch\n" TYPE } EXCP-TYPE-MISMATCH REPORTER! +{ "Undefined word: " TYPE THROWN-STRING 2@ TYPE EOL } EXCP-UNDEFINED-WORD REPORTER! -{ "Pictured numeric output string overflow\n" TYPE-ERR } +{ "Pictured numeric output string overflow\n" TYPE } EXCP-PNO-OVERFLOW REPORTER! -{ "Invalid numeric argument\n" TYPE-ERR } EXCP-BAD-NUMERIC-ARGUMENT REPORTER! -{ "Non-existent file\n" TYPE-ERR } EXCP-NON-EXISTENT-FILE REPORTER! -{ "File I/O exception\n" TYPE-ERR } EXCP-FILE-IO REPORTER! -{ "Unexpected end of file\n" TYPE-ERR } EXCP-UNEXPECTED-EOF REPORTER! -{ "Quit\n" TYPE-ERR } EXCP-QUIT REPORTER! +{ "Invalid numeric argument\n" TYPE } EXCP-BAD-NUMERIC-ARGUMENT REPORTER! +{ "Non-existent file\n" TYPE } EXCP-NON-EXISTENT-FILE REPORTER! +{ "File I/O exception\n" TYPE } EXCP-FILE-IO REPORTER! +{ "Unexpected end of file\n" TYPE } EXCP-UNEXPECTED-EOF REPORTER! +{ "Quit\n" TYPE } EXCP-QUIT REPORTER! -{ "Out of memory\n" TYPE-ERR } EXCP-HEAP-OVERFLOW REPORTER! -{ "Uninitialized deferred word\n" TYPE-ERR } EXCP-DEFER-UNINITIALIZED REPORTER! +{ "Out of memory\n" TYPE } EXCP-HEAP-OVERFLOW REPORTER! +{ "Uninitialized deferred word\n" TYPE } EXCP-DEFER-UNINITIALIZED REPORTER! -: DEFAULT-UNWIND ( i*x n -- ) REPORT S0 SP! QUIT ; +: DEFAULT-UNWIND ( i*x n -- ) + R0 RSP! >R S0 SP! R> >STDERR REPORT >STDOUT QUIT ; ' DEFAULT-UNWIND IS THROW-UNWIND \ Switch to the interpreter defined in this startup file @@ -2347,6 +2389,30 @@ REVERT { LOCAL-LOOKUP ?DUP 0= IF DEFERS FIND-HOOK THEN } IS FIND-HOOK +FORTH DEFINITIONS + +: LOCALS| ( "name1…namen|" -- ; xn … x1 -- ) IMMEDIATE + LOCAL-NAMES ▪ NULL TO LOCAL-NAMES + 0 BEGIN + PARSE-NAME + 2DUP "|" COMPARE 0<> + WHILE + DUP LOCAL% %ALLOCATE + LOCAL-NAMES OVER LOCAL>LINK ! + 2DUP LOCAL>LENGTH ! + >R R@ LOCAL>NAME-ADDR SWAP CMOVE + R> TO LOCAL-NAMES + 1+ + REPEAT ▪ 2DROP ▪ POSTPONE LITERAL POSTPONE N>R ; + +: UNLOCALS IMMEDIATE POSTPONE NRDROP ; + +: ENDLOCALS IMMEDIATE + LOCAL-NAMES BEGIN ?DUP WHILE DUP LOCAL>LINK @ SWAP FREE REPEAT + TO LOCAL-NAMES ▪ POSTPONE UNLOCALS ; + +SYSTEM DEFINITIONS + \ The environment consists of an array of pointers to "name=value" C strings \ immediately following the NULL-terminated array of argument pointers ARGV ARGC 1+ CELLS+ CONSTANT ENVIRON @@ -2372,26 +2438,6 @@ FORTH DEFINITIONS : ARGV ( u -- c-addr u ) DUP ARGC U>= IF DROP 0 0 ELSE ARGV SWAP CELLS+ @ CSTRING THEN ; -: LOCALS| ( "name1…namen|" -- ; xn … x1 -- ) IMMEDIATE - LOCAL-NAMES ▪ NULL TO LOCAL-NAMES - 0 BEGIN - PARSE-NAME - 2DUP "|" COMPARE 0<> - WHILE - DUP LOCAL% %ALLOCATE - LOCAL-NAMES OVER LOCAL>LINK ! - 2DUP LOCAL>LENGTH ! - >R R@ LOCAL>NAME-ADDR SWAP CMOVE - R> TO LOCAL-NAMES - 1+ - REPEAT ▪ 2DROP ▪ POSTPONE LITERAL POSTPONE N>R ; - -: UNLOCALS IMMEDIATE POSTPONE NRDROP ; - -: ENDLOCALS IMMEDIATE - LOCAL-NAMES BEGIN ?DUP WHILE DUP LOCAL>LINK @ SWAP FREE REPEAT - TO LOCAL-NAMES ▪ POSTPONE UNLOCALS ; - UTILITY DEFINITIONS : (TRACE) ( xt -- ) >NAME TYPE SPACE .S ; @@ -2437,7 +2483,6 @@ FORTH DEFINITIONS UTILITY DEFINITIONS \ Display a string in escaped (double-quoted) format, without the delimiters -: CONTROL-CHAR? ( ch -- flag ) DUP 32 U< SWAP 127 = OR ; : TYPE-ESCAPED ( c-addr u -- "" ) 0 ?DO DUP C@ CASE @@ -2446,7 +2491,7 @@ UTILITY DEFINITIONS [[ CHAR " ]] OF "\\\"" ENDOF [[ CHAR \ ]] OF "\\\\" ENDOF OTHERWISE - 0 <# OVER CONTROL-CHAR? IF 16 #B 16 #B "\\x" HOLDS ELSE OVER HOLD THEN #> + 0 <# OVER CNTRL? IF 16 #B 16 #B "\\x" HOLDS ELSE OVER HOLD THEN #> ENDOF ENDCASE ▪ TYPE ▪ 1+ LOOP ▪ DROP ; @@ -2820,12 +2865,12 @@ O_RDWR CONSTANT R/W ( -- fam ) file CLEAR-LEFTOVER 0# file FILE>POSITION 2! [ NULL 0 ] 2LITERAL file FILE>SOURCE 2! - open-how SIZEOF open_how% 0 FILL + open-how [[ open_how% %SIZEOF ]] 0 FILL 0 fam open-how open_how>flags 2! fam [[ O_CREAT __O_TMPFILE OR ]] AND IF 0 0666 open-how open_how>mode 2! THEN - AT_FDCWD ▪ name ▪ open-how ▪ SIZEOF open_how% + AT_FDCWD ▪ name ▪ open-how ▪ [[ open_how% %SIZEOF ]] SYS_OPENAT2 SYSCALL4-RETRY name FREE DUP ERRNO_ENOENT <> IF ▪ DUP ERRNO_ENOTDIR <> AND-IF ▪ ELSE @@ -2862,14 +2907,14 @@ O_RDWR CONSTANT R/W ( -- fam ) 0<> IF EXCP-FILE-IO THROW THEN ; : FILE-STATUS ( c-addr u -- stat64-addr ) - STAT64-RESULT SIZEOF stat64% 0 FILL + STAT64-RESULT [[ stat64% %SIZEOF ]] 0 FILL AT_FDCWD -ROT MAKE-CSTRING DUP >R STAT64-RESULT 0 SYS_FSTATAT64 SYSCALL4-RETRY R> FREE ▪ 0<> IF EXCP-FILE-IO THROW THEN STAT64-RESULT ; : (FILE-STATUS) ( fileid -- stat64-addr ) - STAT64-RESULT SIZEOF stat64% 0 FILL + STAT64-RESULT [[ stat64% %SIZEOF ]] 0 FILL FD>FILE FILE>FD @ EMPTY-CSTRING STAT64-RESULT AT_EMPTY_PATH SYS_FSTATAT64 SYSCALL4-RETRY 0<> IF EXCP-FILE-IO THROW THEN @@ -3005,3 +3050,5 @@ SYSTEM DEFINITIONS THEN [ INTERACTIVE? ] [IF] BANNER [THEN] } EXECUTE + +( vim:set syntax=jumpforth sw=3 ts=8 et fo-=j: ) diff --git a/test/characters.4th b/test/characters.4th new file mode 100644 index 0000000..89267f7 --- /dev/null +++ b/test/characters.4th @@ -0,0 +1,28 @@ +ALSO UTILITY + +: TSTR "019AFGZafgz[{.:}] \0\a\b\t\n\v\f\r\e\"\'\\\x1f\x7f▪" ; + +: CHECK ( xt -- ) + DUP >NAME TYPE ": " TYPE + "\"" TYPE ▪ TSTR 0 DO + DUP I + C@ + 2 PICK EXECUTE + IF DUP I + 1 TYPE-ESCAPED THEN + LOOP "\"\n" TYPE ; + +: ANY? DROP TRUE ; + +' ANY? CHECK +' ASCII? CHECK +' BLANK? CHECK +' CNTRL? CHECK +' DIGIT? CHECK +' GRAPH? CHECK +' LOWER? CHECK +' UPPER? CHECK +' PRINT? CHECK +' SPACE? CHECK +' ALPHA? CHECK +' ALNUM? CHECK +' PUNCT? CHECK +' XDIGIT? CHECK diff --git a/test/characters.exp b/test/characters.exp new file mode 100644 index 0000000..f35ef59 --- /dev/null +++ b/test/characters.exp @@ -0,0 +1,15 @@ +ANY?: "019AFGZafgz[{.:}] \x00\a\b\t\n\v\f\r\e\"'\\\x1F\x7F▪" +ASCII?: "019AFGZafgz[{.:}] \x00\a\b\t\n\v\f\r\e\"'\\\x1F\x7F" +BLANK?: " \t" +CNTRL?: "\x00\a\b\t\n\v\f\r\e\x1F\x7F" +DIGIT?: "019" +GRAPH?: "019AFGZafgz[{.:}]\"'\\" +LOWER?: "afgz" +UPPER?: "AFGZ" +PRINT?: "019AFGZafgz[{.:}] \"'\\" +SPACE?: " \t\n\v\f\r" +ALPHA?: "AFGZafgz" +ALNUM?: "019AFGZafgz" +PUNCT?: "[{.:}]\"'\\" +XDIGIT?: "019AFaf" +exit-code: 0 diff --git a/test/resize.4th b/test/resize.4th index 14cfe94..f3e2153 100644 --- a/test/resize.4th +++ b/test/resize.4th @@ -5,7 +5,7 @@ ALSO UTILITY PREVIOUS ALSO SYSTEM -256 KB SIZEOF MEMBLOCK% - CONSTANT 256-KB-BLOCK +256 KB MEMBLOCK% %SIZEOF - CONSTANT 256-KB-BLOCK PREVIOUS : TEST