From 295398fee9ff0989293ec86f43d86f0cf48bff2f Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 30 Jul 2024 09:04:38 -0400 Subject: [PATCH 01/10] FASLs: initialize all array elements at once separate setf-array instructions are pointlessly wasteful --- FASL.md | 1 + compile-file/cmpltv.lisp | 22 ++++++++++++---------- compile-file/encode.lisp | 15 ++++++++------- loadltv.lisp | 20 +++++++++++--------- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/FASL.md b/FASL.md index d6f0573..00a87b9 100644 --- a/FASL.md +++ b/FASL.md @@ -24,6 +24,7 @@ After the last instruction is executed, the FASL has been fully loaded. Any rema * `listify-rest-args` pushes to the stack again. `bind-optional-args` and `parse-key-args` do as well. (This makes compilation a little easier and cleaner.) * `parse-key-args` puts aokp in the low bit instead of the high bit, to simplify the long instruction. +* `setf-row-major-aref` is replaced with `initialize-array`, which initializes the entire array at once - saves some space and some repeated accesses to grab array ## 0.14 diff --git a/compile-file/cmpltv.lisp b/compile-file/cmpltv.lisp index f2d7af9..5a80a01 100644 --- a/compile-file/cmpltv.lisp +++ b/compile-file/cmpltv.lisp @@ -54,11 +54,12 @@ (%packing-info :initarg :packing-info :reader packing-info) (%element-type-info :initarg :element-type-info :reader element-type-info))) -;; row-major. -(defclass setf-aref (effect) - ((%array :initarg :array :reader setf-aref-array :type array-creator) - (%index :initarg :index :reader setf-aref-index :type (integer 0)) - (%value :initarg :value :reader setf-aref-value :type creator))) +;;; Initialize contents of a general array. This is a separate instruction +;;; because such arrays may contain themselves. +(defclass initialize-array (effect) + ((%array :initarg :array :reader initialized-array :type array-creator) + ;; A list of creators as long as the array's total size. + (%values :initarg :values :reader array-values :type list))) (defclass hash-table-creator (vcreator) (;; used in disltv @@ -413,11 +414,12 @@ (eql (second element-type-info) +other-uaet+)) ;; (we have to separate initialization here in case the array ;; contains itself. packed arrays can't contain themselves) - (loop for i below (array-total-size value) - do (add-instruction - (make-instance 'setf-aref - :array arr :index i - :value (ensure-constant (row-major-aref value i)))))) + (add-instruction + (make-instance 'initialize-array + :array arr + :values (loop for i below (array-total-size value) + for e = (row-major-aref value i) + collect (ensure-constant e))))) arr)) (defmethod add-constant ((value hash-table)) diff --git a/compile-file/encode.lisp b/compile-file/encode.lisp index 9db4f6a..d7758e4 100644 --- a/compile-file/encode.lisp +++ b/compile-file/encode.lisp @@ -32,7 +32,7 @@ (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) (rplacd 71 ind1 ind2) (make-array 74 sind rank . dims) - (setf-row-major-aref 75 arrayind rmindex valueind) + (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) ((setf gethash) 77 htind keyind valueind) (make-sb64 78 sind sb64) @@ -247,14 +247,15 @@ ((equal packing-type '(signed-byte 64)) (dump (write-b64 elem stream))) ;; TODO: Signed bytes - ((equal packing-type 't)) ; handled by setf-aref instructions + ((equal packing-type 't)) ; handled by initialize-array instruction (t (error "BUG: Unknown packing-type ~s" packing-type)))))) -(defmethod encode ((inst setf-aref) stream) - (write-mnemonic 'setf-row-major-aref stream) - (write-index (setf-aref-array inst) stream) - (write-b16 (setf-aref-index inst) stream) - (write-index (setf-aref-value inst) stream)) +(defmethod encode ((inst initialize-array) stream) + (write-mnemonic 'initialize-array stream) + (write-index (initialized-array inst) stream) + ;; length is implicit from the array being initialized + (loop for c in (array-values inst) + do (write-index c stream))) (defmethod encode ((inst hash-table-creator) stream) (let* ((ht (prototype inst)) diff --git a/loadltv.lisp b/loadltv.lisp index 64523c3..27f8df0 100644 --- a/loadltv.lisp +++ b/loadltv.lisp @@ -15,7 +15,7 @@ (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) (rplacd 71 ind1 ind2) (make-array 74 sind rank . dims) - (setf-row-major-aref 75 arrayind rmindex valueind) + (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) (setf-gethash 77 htind keyind valueind) (make-sb64 78 sind sb64) @@ -342,7 +342,7 @@ Did not initialize constants~{ #~d~}" `(loop for i below (array-total-size array) for elem = ,form do (setf (row-major-aref array i) elem)))) - (cond ((eql etcode +other-uaet+)) ; handled via setf-aref + (cond ((eql etcode +other-uaet+)) ; handled via initialize-array ((equal packing-type 'nil)) ((equal packing-type 'base-char) (undump (code-char (read-byte stream)))) @@ -381,15 +381,17 @@ Did not initialize constants~{ #~d~}" (undump (read-sb32 stream))) ((equal packing-type '(signed-byte 64)) (undump (read-sb64 stream))) - ((equal packing-type 't)) ; setf-aref takes care of it + ((equal packing-type 't)) ; initialize-array takes care of it (t (error "BUG: Unknown packing-type ~s" packing-type)))))) -(defmethod %load-instruction ((mnemonic (eql 'setf-row-major-aref)) stream) - (let ((index (read-index stream)) (aindex (read-ub16 stream)) - (value (read-index stream))) - (dbgprint " ((setf row-major-aref) ~d ~d ~d" index aindex value) - (setf (row-major-aref (constant index) aindex) - (constant value)))) +(defmethod %load-instruction ((mnemonic (eql 'initialize-array)) stream) + (let ((index (read-index stream))) + (dbgprint " (initialize-array ~d)" index) + (loop with array = (constant index) + for i below (array-total-size array) + for vindex = (read-index stream) + for v = (constant vindex) + do (setf (row-major-aref array i) v)))) (defmethod %load-instruction ((mnemonic (eql 'make-hash-table)) stream) (dbgprint " (make-hash-table)") From 391d383e66fc6992057dfe47ccb70deb0299393c Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 30 Jul 2024 09:05:13 -0400 Subject: [PATCH 02/10] progn for with-compilation-results --- compile/compilation-unit.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compile/compilation-unit.lisp b/compile/compilation-unit.lisp index 6bd0bc7..04b07b8 100644 --- a/compile/compilation-unit.lisp +++ b/compile/compilation-unit.lisp @@ -39,7 +39,7 @@ If the compiler encounters an unresolvable problem it can of course fail immedia (lambda (e) (signal e) (setq ,warningsp t ,failurep t)))) - (values ,@body ,warningsp ,failurep))))) + (values (progn ,@body) ,warningsp ,failurep))))) (defvar *in-compilation-unit* nil) From 0e259b8036eeef4ff98361cc0a2066590d74d62a Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 30 Jul 2024 12:05:36 -0400 Subject: [PATCH 03/10] Test externalization of hash tables --- test/fasl/externalize.lisp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/fasl/externalize.lisp b/test/fasl/externalize.lisp index 67dc2d9..d1cbf47 100644 --- a/test/fasl/externalize.lisp +++ b/test/fasl/externalize.lisp @@ -87,6 +87,10 @@ 4/3 -3/2 7.6s0 3.2f-4 4.1d8 5.6l23 #c(3.2 4.7) #c(-8 3/2) #\Newline #\7 dribble #:make-load-form #:nonexist #.*package* (machine-instance . 223) #1=(#1# (#1# . #1#) . #1#) + #.(make-hash-table) #.(make-hash-table :test #'equal) + #.(let ((ht (make-hash-table))) + (setf (gethash 4 ht) "booze" (gethash 'hello ht) 'world) + ht) #3=(#2=(#3# #2#) #1# . #2#) "Hello, world!" #*11010110101 #0a17 #4=#0a#4# #5=#2a((#5# #4#) (#3# #5#)) "畫蛇添足" #.(make-array '(3 2) :element-type 'standard-char From c5d489117e0deee0b94b24192e16948f165f8e14 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 30 Jul 2024 12:10:22 -0400 Subject: [PATCH 04/10] FASLs: initialize hash tables in one instruction again, having a bunch of setf-gethash is pointlessly wasteful --- compile-file/cmpltv.lisp | 42 +++++++++++++++++++++++++--------------- compile-file/encode.lisp | 14 ++++++++------ loadltv.lisp | 20 ++++++++++--------- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/compile-file/cmpltv.lisp b/compile-file/cmpltv.lisp index 5a80a01..1aa40eb 100644 --- a/compile-file/cmpltv.lisp +++ b/compile-file/cmpltv.lisp @@ -67,11 +67,17 @@ (%count :initarg :count :reader hash-table-creator-count :type (integer 0)))) -(defclass setf-gethash (effect) - ((%hash-table :initarg :hash-table :reader setf-gethash-hash-table - :type hash-table-creator) - (%key :initarg :key :reader setf-gethash-key :type creator) - (%value :initarg :value :reader setf-gethash-value :type creator))) +;;; Initialize contents of a hash table. Separate instruction because +;;; circular references are possible. +(defclass initialize-hash-table (effect) + ((%table :initarg :table :reader initialized-table :type hash-table-creator) + ;; We have to store the count ourselves, since the hash table size may + ;; not be identical to the number of elements. + (%count :initarg :count :reader initialized-table-count + :type (unsigned-byte 32)) + ;; An alist of all the keys and values in the table. + ;; The keys and values are creators. + (%alist :initarg :alist :reader alist :type list))) (defclass symbol-creator (vcreator) (;; Is there actually a point to trying to coalesce symbol names? @@ -423,17 +429,21 @@ arr)) (defmethod add-constant ((value hash-table)) - (let ((ht (add-creator - value - (make-instance 'hash-table-creator :prototype value - :test (hash-table-test value) - :count (hash-table-count value))))) - (maphash (lambda (k v) - (add-instruction - (make-instance 'setf-gethash - :hash-table ht - :key (ensure-constant k) :value (ensure-constant v)))) - value) + (let* ((count (hash-table-count value)) + (ht (add-creator + value + (make-instance 'hash-table-creator :prototype value + :test (hash-table-test value) + :count count))) + (alist nil)) + (unless (zerop count) ; empty hash table, so nothing to initialize + (maphash (lambda (k v) + (let ((ck (ensure-constant k)) (cv (ensure-constant v))) + (push (cons ck cv) alist))) + value) + (add-instruction + (make-instance 'initialize-hash-table + :table ht :count count :alist alist))) ht)) (defmethod add-constant ((value symbol)) diff --git a/compile-file/encode.lisp b/compile-file/encode.lisp index d7758e4..6d24947 100644 --- a/compile-file/encode.lisp +++ b/compile-file/encode.lisp @@ -34,7 +34,7 @@ (make-array 74 sind rank . dims) (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) - ((setf gethash) 77 htind keyind valueind) + (initialize-hash-table 77 htind keyind valueind) (make-sb64 78 sind sb64) (find-package 79 sind nameind) (make-bignum 80 sind size . words) @@ -281,11 +281,13 @@ (write-byte testcode stream) (write-b16 count stream))) -(defmethod encode ((inst setf-gethash) stream) - (write-mnemonic '(setf gethash) stream) - (write-index (setf-gethash-hash-table inst) stream) - (write-index (setf-gethash-key inst) stream) - (write-index (setf-gethash-value inst) stream)) +(defmethod encode ((inst initialize-hash-table) stream) + (write-mnemonic 'initialize-hash-table stream) + (write-index (initialized-table inst) stream) + (write-b32 (initialized-table-count inst) stream) + (loop for (k . v) in (alist inst) + do (write-index k stream) + (write-index v stream))) (defmethod encode ((inst singleton-creator) stream) (ecase (prototype inst) diff --git a/loadltv.lisp b/loadltv.lisp index 27f8df0..b40bff6 100644 --- a/loadltv.lisp +++ b/loadltv.lisp @@ -17,7 +17,7 @@ (make-array 74 sind rank . dims) (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) - (setf-gethash 77 htind keyind valueind) + (initialize-hash-table 77 htind keyind valueind) (make-sb64 78 sind sb64) (find-package 79 sind nameind) (make-bignum 80 sind size . words) ; size is signed @@ -335,7 +335,7 @@ Did not initialize constants~{ #~d~}" (rank (read-byte stream)) (dimensions (loop repeat rank collect (read-ub16 stream))) (array (make-array dimensions :element-type element-type))) - (dbgprint " (make-array ~d ~x ~d)" element-typei packing-code rank) + (dbgprint " (make-array ~d ~x ~d)" element-type packing-code rank) (dbgprint " dimensions ~a" dimensions) (setf (next-constant) array) (macrolet ((undump (form) @@ -405,12 +405,14 @@ Did not initialize constants~{ #~d~}" (dbgprint " test = ~a, count = ~d" test count) (setf (next-constant) (make-hash-table :test test :size count)))) -(defmethod %load-instruction ((mnemonic (eql 'setf-gethash)) stream) - (let ((htind (read-index stream)) - (keyind (read-index stream)) (valind (read-index stream))) - (dbgprint " ((setf gethash) ~d ~d ~d)" htind keyind valind) - (setf (gethash (constant keyind) (constant htind)) - (constant valind)))) +(defmethod %load-instruction ((mnemonic (eql 'initialize-hash-table)) stream) + (let ((htind (read-index stream)) (count (read-ub32 stream))) + (dbgprint " (initialize-hash-table ~d ~d)" htind count) + (let ((table (constant htind))) + (loop repeat count + do (let ((k (constant (read-index stream))) + (v (constant (read-index stream)))) + (setf (gethash k table) v)))))) (defmethod %load-instruction ((mnemonic (eql 'make-sb64)) stream) (let ((sb64 (read-sb64 stream))) @@ -521,7 +523,7 @@ Did not initialize constants~{ #~d~}" (defun decode-element-type (code stream) (cond ((eql code +other-uaet+) (constant (read-index stream))) ((first (find code +array-packing-infos+ :key #'second))) - (error "BUG: Unknown array element type code ~x" code))) + (t (error "BUG: Unknown array element type code ~x" code)))) (defmethod %load-instruction ((mnemonic (eql 'make-bytecode-function)) stream) (let ((entry-point (read-ub32 stream)) From 85ca089ae084fd8bf2af48e8700a6eb8a15ffb7b Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 30 Jul 2024 13:43:31 -0400 Subject: [PATCH 05/10] Provide initial element for externalization tests Externalizing an uninitialized array is probably technically undefined behavior, since we read the array elements. --- test/fasl/externalize.lisp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/fasl/externalize.lisp b/test/fasl/externalize.lisp index d1cbf47..a9c23ba 100644 --- a/test/fasl/externalize.lisp +++ b/test/fasl/externalize.lisp @@ -95,10 +95,11 @@ #0a17 #4=#0a#4# #5=#2a((#5# #4#) (#3# #5#)) "畫蛇添足" #.(make-array '(3 2) :element-type 'standard-char :initial-contents '((#\f #\7) (#\Space #\\) (#\. #\%))) - #.(make-array '(3 3 3) :adjustable t) + #.(make-array '(3 3 3) :adjustable t :initial-element '#:horn) ;; This constant crashes SBCL - see SBCL bug 2038233 #-sbcl #.(make-array 2 :displaced-to '#5#) - #.(make-array 7 :element-type '(unsigned-byte 3) :fill-pointer 2) + #.(make-array 7 :element-type '(unsigned-byte 3) :fill-pointer 2 + :initial-element 2) #(2 #:blue #'standard-object "who?" #2a((0 1) (1 0))))) (5am:test externalize-objects From e05b7baa18e1031290c1820854c787ffb3adab93 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Fri, 2 Aug 2024 12:54:25 -0400 Subject: [PATCH 06/10] Fix machine description for fdesignator --- machine.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine.lisp b/machine.lisp index c144944..1453f29 100644 --- a/machine.lisp +++ b/machine.lisp @@ -142,7 +142,7 @@ (push 56) (pop 57) (dup 58) - (fdesignator 59) + (fdesignator 59 ((constant-arg 1)) ((constant-arg 2))) (called-fdefinition 60 ((constant-arg 1)) ((constant-arg 2))) (protect 61 ((constant-arg 1)) ((constant-arg 2))) (cleanup 62) From 0e1f5af8bac5150c32374800032c74e059b9964c Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Fri, 2 Aug 2024 12:54:36 -0400 Subject: [PATCH 07/10] Fix NLX from inside progv we have to do an unbind. --- compile/compile.lisp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compile/compile.lisp b/compile/compile.lisp index 388335e..3f17383 100644 --- a/compile/compile.lisp +++ b/compile/compile.lisp @@ -1374,7 +1374,7 @@ (compile-form symbols env (new-context context :receiving 1)) (compile-form values env (new-context context :receiving 1)) (assemble context m:progv (env-index context)) - (compile-progn body env context) + (compile-progn body env (new-context context :dynenv '(:special))) (emit-unbind context 1))) (defmethod compile-special ((op (eql 'unwind-protect)) From a8fa4e19da1908e7f3bc2e67f50f66765be4c96a Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 6 Aug 2024 08:58:36 -0400 Subject: [PATCH 08/10] FASL: Simpler strings Most character arrays are strings so this makes things simpler. Possibly this should be generalized to other arrays? Dunno yet. --- FASL.md | 1 + compile-file/cmpltv.lisp | 36 +++++++++++++++++++++++ compile-file/encode.lisp | 17 +++++++++++ loadltv.lisp | 63 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+) diff --git a/FASL.md b/FASL.md index 00a87b9..ef98d1a 100644 --- a/FASL.md +++ b/FASL.md @@ -25,6 +25,7 @@ After the last instruction is executed, the FASL has been fully loaded. Any rema * `listify-rest-args` pushes to the stack again. `bind-optional-args` and `parse-key-args` do as well. (This makes compilation a little easier and cleaner.) * `parse-key-args` puts aokp in the low bit instead of the high bit, to simplify the long instruction. * `setf-row-major-aref` is replaced with `initialize-array`, which initializes the entire array at once - saves some space and some repeated accesses to grab array +* new FASL instructions: `base-string` and `utf8-string` for easy cases of character array. ## 0.14 diff --git a/compile-file/cmpltv.lisp b/compile-file/cmpltv.lisp index 1aa40eb..5d1a27c 100644 --- a/compile-file/cmpltv.lisp +++ b/compile-file/cmpltv.lisp @@ -61,6 +61,12 @@ ;; A list of creators as long as the array's total size. (%values :initarg :values :reader array-values :type list))) +;;; Special cases of array-creator, since they're very very common +;;; for e.g. symbol names. +(defclass base-string-creator (vcreator) ()) +(defclass utf8-string-creator (vcreator) + ((%nbytes :initarg :nbytes :reader nbytes :type (unsigned-byte 16)))) + (defclass hash-table-creator (vcreator) (;; used in disltv (%test :initarg :test :reader hash-table-creator-test :type symbol) @@ -428,6 +434,36 @@ collect (ensure-constant e))))) arr)) +(defun utf8-length (string) + (loop for c across string + for cpoint = (char-code c) + sum (cond ((< cpoint #x80) 1) + ((< cpoint #x800) 2) + ((< cpoint #x10000) 3) + ((< cpoint #x110000) 4) + #-sbcl ; whines about deleted code + (t (error "Codepoint #x~x for ~:c too big" cpoint c))))) + +(defmethod add-constant ((value string)) + (case (array-element-type value) + (base-char (let ((L (length value))) + (if (< L #.(ash 1 16)) + ;; FIXME: Check that characters are all ASCII? + (add-creator + value + (make-instance 'base-string-creator + :prototype value)) + (call-next-method)))) + (character (let ((L (utf8-length value))) + (if (< L #.(ash 1 16)) + (add-creator + value + (make-instance 'utf8-string-creator + :nbytes L + :prototype value)) + (call-next-method)))) + (otherwise (call-next-method)))) + (defmethod add-constant ((value hash-table)) (let* ((count (hash-table-count value)) (ht (add-creator diff --git a/compile-file/encode.lisp b/compile-file/encode.lisp index 6d24947..aa04212 100644 --- a/compile-file/encode.lisp +++ b/compile-file/encode.lisp @@ -31,6 +31,8 @@ (cons 69 sind) (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) (rplacd 71 ind1 ind2) + (base-string 72 size . data) + (utf8-string 73 nbytes . data) (make-array 74 sind rank . dims) (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) @@ -257,6 +259,21 @@ (loop for c in (array-values inst) do (write-index c stream))) +(defmethod encode ((inst base-string-creator) stream) + (write-mnemonic 'base-string stream) + (write-b16 (length (prototype inst)) stream) + (loop for c across (prototype inst) + for code = (char-code c) + do (write-byte code stream))) + +;;; Here we encode the number of bytes rather than the number of chars. +;;; This is smarter, since it means the I/O can be batched. We should +;;; do it for general arrays as well. +(defmethod encode ((inst utf8-string-creator) stream) + (write-mnemonic 'utf8-string stream) + (write-b16 (nbytes inst) stream) + (write-utf8 (prototype inst) stream)) + (defmethod encode ((inst hash-table-creator) stream) (let* ((ht (prototype inst)) ;; TODO: Custom hash-table tests. diff --git a/loadltv.lisp b/loadltv.lisp index b40bff6..00520af 100644 --- a/loadltv.lisp +++ b/loadltv.lisp @@ -14,6 +14,8 @@ (cons 69 sind) (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) (rplacd 71 ind1 ind2) + (base-string 72 size . data) + (utf8-string 73 nbytes . data) (make-array 74 sind rank . dims) (initialize-array 75 arrayind . valueinds) (make-hash-table 76 sind test count) @@ -228,6 +230,67 @@ Did not initialize constants~{ #~d~}" (dbgprint " (rplacd ~d ~d)" cons value) (setf (cdr (constant cons)) (constant value)))) +(defmethod %load-instruction ((mnemonic (eql 'base-string)) stream) + (let ((len (read-ub16 stream))) + (dbgprint " (base-string ~d)" len) + ;; Read everything in one go. + (let ((bytes (make-array len :element-type '(unsigned-byte 8)))) + (read-sequence bytes stream) + (setf (next-constant) (map 'base-string #'code-char bytes))))) + +;;; FIXME: Kinda duplicate code with the stream reader make-array uses. +;;; We could use babel for this but that's a whole lot of dependency +;;; for one pretty simple function. +(defun utf8-octets-to-string (bytes) + (declare (type (simple-array (unsigned-byte 8) (*)))) + (coerce + (loop with len = (length bytes) + with i = 0 + for codepoint + = (let ((byte0 (aref bytes i))) + (cond + ((= #b00000000 (mask-field (byte 1 7) byte0)) ; one byte + byte0) + ((= #b11000000 (mask-field (byte 3 5) byte0)) ; two bytes + (let ((byte1 (aref bytes (incf i)))) + (unless (continuation-byte-p byte1) + (illegal-utf8-continuation byte0 byte1)) + (logior (ash (ldb (byte 5 0) byte0) 6) + (ldb (byte 6 0) byte1)))) + ((= #b11100000 (mask-field (byte 4 4) byte0)) ; three + (let ((byte1 (aref bytes (incf i))) + (byte2 (aref bytes (incf i)))) + (unless (and (continuation-byte-p byte1) + (continuation-byte-p byte2)) + (illegal-utf8-continuation byte0 byte1 byte2)) + (logior (ash (ldb (byte 4 0) byte0) 12) + (ash (ldb (byte 6 0) byte1) 6) + (ldb (byte 6 0) byte2)))) + ((= #b11110000 (mask-field (byte 5 3) byte0)) ; four + (let ((byte1 (aref bytes (incf i))) + (byte2 (aref bytes (incf i))) + (byte3 (aref bytes (incf i)))) + (unless (and (continuation-byte-p byte1) + (continuation-byte-p byte2) + (continuation-byte-p byte3)) + (illegal-utf8-continuation byte0 byte1 byte2 byte3)) + (logior (ash (ldb (byte 3 0) byte0) 18) + (ash (ldb (byte 6 0) byte1) 12) + (ash (ldb (byte 6 0) byte2) 6) + (ldb (byte 6 0) byte3)))) + (t (illegal-utf8-head byte0)))) + do (incf i) + collect (code-char codepoint) + until (>= i len)) + '(simple-array character (*)))) + +(defmethod %load-instruction ((mnemonic (eql 'utf8-string)) stream) + (let ((len (read-ub16 stream))) + (dbgprint " (utf8-string ~d)" len) + (let ((bytes (make-array len :element-type '(unsigned-byte 8)))) + (read-sequence bytes stream) + (setf (next-constant) (utf8-octets-to-string bytes))))) + (defmacro read-sub-byte (array stream nbits) (let ((perbyte (floor 8 nbits)) (a (gensym "ARRAY")) (s (gensym "STREAM"))) From 7da5da347bcbf1fb96551d563afb1936f1e68ba9 Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 6 Aug 2024 09:00:35 -0400 Subject: [PATCH 09/10] FASL: Instructions to modify fcells Not used yet, but these can be used to communicate defuns without calling a #'(setf fdefinition) function. As you might guess from the *primitive* thing, I'm seeing if I can't set things up to minimize what's necessary in the "primitive" loading system. --- FASL.md | 1 + compile-file/cmpltv.lisp | 53 ++++++++++++++++++++++++++-------------- compile-file/encode.lisp | 9 +++++++ 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/FASL.md b/FASL.md index ef98d1a..e18c182 100644 --- a/FASL.md +++ b/FASL.md @@ -26,6 +26,7 @@ After the last instruction is executed, the FASL has been fully loaded. Any rema * `parse-key-args` puts aokp in the low bit instead of the high bit, to simplify the long instruction. * `setf-row-major-aref` is replaced with `initialize-array`, which initializes the entire array at once - saves some space and some repeated accesses to grab array * new FASL instructions: `base-string` and `utf8-string` for easy cases of character array. +* new FASL instructions: `fcell-set` for simple defuns. ## 0.14 diff --git a/compile-file/cmpltv.lisp b/compile-file/cmpltv.lisp index 5d1a27c..29fcb81 100644 --- a/compile-file/cmpltv.lisp +++ b/compile-file/cmpltv.lisp @@ -138,6 +138,11 @@ (defclass fcell-lookup (creator) ((%name :initarg :name :reader name :type creator))) +;;; Set what's in an fcell. +(defclass fcell-set (effect) + ((%fcell :initarg :fcell :reader fcell :type creator) + (%value :initarg :value :reader value :type creator))) + ;;; Look up the "cell" for special variable binding. This is used by the ;;; SPECIAL-BIND, SYMBOL-VALUE, and SYMBOL-VALUE-SET VM instructions ;;; as a lookup key for the binding, as well as for establishing new @@ -230,6 +235,12 @@ ;;; +;;; If this is true, symbols are avoided when possible, and attributes +;;; are not dumped. Experimental for use with chalybeate. +(defvar *primitive* nil) + +;;; + ;;; Return true iff the value is similar to the existing creator. (defgeneric similarp (creator value) (:method (creator value) (declare (ignore creator value)) nil)) @@ -732,27 +743,33 @@ ;; Something to consider: Any of these, but most likely the lambda list, ;; could contain unexternalizable data. In this case we should find a way ;; to gracefully and silently not dump the attribute. - (when (cmp:cfunction-name value) - (add-instruction (make-instance 'name-attr - :object inst - :objname (ensure-constant - (cmp:cfunction-name value))))) - (when (cmp:cfunction-doc value) - (add-instruction (make-instance 'docstring-attr - :object inst - :docstring (ensure-constant - (cmp:cfunction-doc value))))) - (when (cmp:cfunction-lambda-list-p value) - (add-instruction (make-instance 'lambda-list-attr - :function inst - :lambda-list (ensure-constant - (cmp:cfunction-lambda-list value))))) + (unless *primitive* + (when (cmp:cfunction-name value) + (add-instruction (make-instance 'name-attr + :object inst + :objname (ensure-constant + (cmp:cfunction-name value))))) + (when (cmp:cfunction-doc value) + (add-instruction (make-instance 'docstring-attr + :object inst + :docstring (ensure-constant + (cmp:cfunction-doc value))))) + (when (cmp:cfunction-lambda-list-p value) + (add-instruction (make-instance 'lambda-list-attr + :function inst + :lambda-list (ensure-constant + (cmp:cfunction-lambda-list value)))))) inst)) (defclass bytemodule-creator (vcreator) ((%cmodule :initarg :cmodule :reader bytemodule-cmodule) (%lispcode :initform nil :initarg :lispcode :reader bytemodule-lispcode))) +(defmethod print-object ((object bytemodule-creator) stream) + (print-unreadable-object (object stream :type t) + (format stream "~d" (index object))) + object) + (defclass setf-literals (effect) ((%module :initarg :module :reader setf-literals-module :type creator) ;; The literals are not practically coalesceable and are always a T vector, @@ -781,9 +798,9 @@ (defun ensure-fcell (name) (or (find-fcell name) - (add-fcell name - (make-instance 'fcell-lookup - :name (ensure-constant name))))) + (add-fcell name + (make-instance 'fcell-lookup + :name (ensure-constant name))))) (defmethod ensure-module-literal ((info cmp:fdefinition-info)) (ensure-fcell (cmp:fdefinition-info-name info))) diff --git a/compile-file/encode.lisp b/compile-file/encode.lisp index aa04212..501ca83 100644 --- a/compile-file/encode.lisp +++ b/compile-file/encode.lisp @@ -57,6 +57,9 @@ (find-class 98 sind cnind) (init-object-array 99 ub64) (environment 100) + (fcell-set 101 nameind) + (vcell-set 102 nameind) + (ccell-set 103 nameind) (attribute 255 name nbytes . data))) ;; how many bytes are needed to represent an index? @@ -184,6 +187,7 @@ ;; The following is deleted as unreachable on e.g. SBCL because ;; it knows that char-code doesn't go this high. ;; Don't worry about it. + #-sbcl (t ; not allowed by RFC3629 (error "Code point #x~x for character ~:c is out of range for UTF-8" cpoint char))))) @@ -380,6 +384,11 @@ (write-mnemonic 'fcell stream) (write-index (name inst) stream)) +(defmethod encode ((inst fcell-set) stream) + (write-mnemonic 'fcell-set stream) + (write-index (fcell inst) stream) + (write-index (value inst) stream)) + (defmethod encode ((inst vcell-lookup) stream) (write-mnemonic 'vcell stream) (write-index (name inst) stream)) From 1580075b6fa77dde922baeb8a8473e52e01c99da Mon Sep 17 00:00:00 2001 From: Alex Wood Date: Tue, 6 Aug 2024 09:09:14 -0400 Subject: [PATCH 10/10] FASL: unify rplaca and rplacd --- FASL.md | 1 + compile-file/cmpltv.lisp | 17 +++++++---------- compile-file/encode.lisp | 15 +++++---------- loadltv.lisp | 19 ++++++++----------- 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/FASL.md b/FASL.md index e18c182..e067cb6 100644 --- a/FASL.md +++ b/FASL.md @@ -25,6 +25,7 @@ After the last instruction is executed, the FASL has been fully loaded. Any rema * `listify-rest-args` pushes to the stack again. `bind-optional-args` and `parse-key-args` do as well. (This makes compilation a little easier and cleaner.) * `parse-key-args` puts aokp in the low bit instead of the high bit, to simplify the long instruction. * `setf-row-major-aref` is replaced with `initialize-array`, which initializes the entire array at once - saves some space and some repeated accesses to grab array +* `rplaca` and `rplacd` instructions are unified into `initialize-cons`: simpler and a byte shorter. * new FASL instructions: `base-string` and `utf8-string` for easy cases of character array. * new FASL instructions: `fcell-set` for simple defuns. diff --git a/compile-file/cmpltv.lisp b/compile-file/cmpltv.lisp index 29fcb81..55ec504 100644 --- a/compile-file/cmpltv.lisp +++ b/compile-file/cmpltv.lisp @@ -39,13 +39,10 @@ ;;; coalescence is still really possible. (defclass cons-creator (vcreator) ()) -(defclass rplaca-init (effect) +(defclass initialize-cons (effect) ((%cons :initarg :cons :reader rplac-cons :type cons-creator) - (%value :initarg :value :reader rplac-value :type creator))) - -(defclass rplacd-init (effect) - ((%cons :initarg :cons :reader rplac-cons :type cons-creator) - (%value :initarg :value :reader rplac-value :type creator))) + (%car :initarg :car :reader rplac-car :type creator) + (%cdr :initarg :cdr :reader rplac-cdr :type creator))) ;;; dimensions and element-type are encoded with the array since ;;; they shouldn't really need to be coalesced. @@ -353,10 +350,10 @@ (defmethod add-constant ((value cons)) (let ((cons (add-creator value (make-instance 'cons-creator :prototype value)))) - (add-instruction (make-instance 'rplaca-init - :cons cons :value (ensure-constant (car value)))) - (add-instruction (make-instance 'rplacd-init - :cons cons :value (ensure-constant (cdr value)))) + (add-instruction (make-instance 'initialize-cons + :cons cons + :car (ensure-constant (car value)) + :cdr (ensure-constant (cdr value)))) cons)) ;;; Arrays are encoded with a code describing how elements are packed. diff --git a/compile-file/encode.lisp b/compile-file/encode.lisp index 501ca83..d4e4d83 100644 --- a/compile-file/encode.lisp +++ b/compile-file/encode.lisp @@ -29,8 +29,7 @@ (ratio 67) (complex 68) (cons 69 sind) - (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) - (rplacd 71 ind1 ind2) + (initialize-cons 70 consind carind cdrind) (base-string 72 size . data) (utf8-string 73 nbytes . data) (make-array 74 sind rank . dims) @@ -117,15 +116,11 @@ (defmethod encode ((inst cons-creator) stream) (write-mnemonic 'cons stream)) -(defmethod encode ((inst rplaca-init) stream) - (write-mnemonic 'rplaca stream) +(defmethod encode ((inst initialize-cons) stream) + (write-mnemonic 'initialize-cons stream) (write-index (rplac-cons inst) stream) - (write-index (rplac-value inst) stream)) - -(defmethod encode ((inst rplacd-init) stream) - (write-mnemonic 'rplacd stream) - (write-index (rplac-cons inst) stream) - (write-index (rplac-value inst) stream)) + (write-index (rplac-car inst) stream) + (write-index (rplac-cdr inst) stream)) (defun write-dimensions (dimensions stream) (let ((rank (length dimensions))) diff --git a/loadltv.lisp b/loadltv.lisp index 00520af..1dfeb77 100644 --- a/loadltv.lisp +++ b/loadltv.lisp @@ -12,8 +12,7 @@ (ratio 67) (complex 68) (cons 69 sind) - (rplaca 70 ind1 ind2) ; (setf (car [ind1]) [ind2]) - (rplacd 71 ind1 ind2) + (initialize-cons 70 consind carind cdrind) (base-string 72 size . data) (utf8-string 73 nbytes . data) (make-array 74 sind rank . dims) @@ -220,15 +219,13 @@ Did not initialize constants~{ #~d~}" (dbgprint " (cons)") (setf (next-constant) (cons nil nil))) -(defmethod %load-instruction ((mnemonic (eql 'rplaca)) stream) - (let ((cons (read-index stream)) (value (read-index stream))) - (dbgprint " (rplaca ~d ~d)" cons value) - (setf (car (constant cons)) (constant value)))) - -(defmethod %load-instruction ((mnemonic (eql 'rplacd)) stream) - (let ((cons (read-index stream)) (value (read-index stream))) - (dbgprint " (rplacd ~d ~d)" cons value) - (setf (cdr (constant cons)) (constant value)))) +(defmethod %load-instruction ((mnemonic (eql 'initialize-cons)) stream) + (let ((cons (read-index stream)) + (car (read-index stream)) (cdr (read-index stream))) + (dbgprint " (rplaca ~d ~d ~d)" cons car cdr) + (let ((cons (constant cons))) + (setf (car cons) (constant car) + (cdr cons) (constant cdr))))) (defmethod %load-instruction ((mnemonic (eql 'base-string)) stream) (let ((len (read-ub16 stream)))