From 40beae67490cd2f03c597c5f13a6c2a63b574fcb Mon Sep 17 00:00:00 2001 From: Tim Nelson Date: Thu, 29 Feb 2024 17:43:49 -0500 Subject: [PATCH] Fix: better handling for multiple-tests (#245) --- forge/send-to-kodkod.rkt | 5 + forge/sigs-structs.rkt | 46 ++++-- forge/sigs.rkt | 137 ++++++++++-------- forge/tests/error/bsl_multiple_failures.frg | 2 +- .../multiple-positive-examples-failing.frg | 1 + forge/tests/error/multiple_test_failures.frg | 1 + ...d_tree_underconstraint_multiple_errors.frg | 1 + forge/tests/error/unstated_bounds.frg | 11 ++ .../forge-core/other/test_behavior_first.rkt | 42 ++++++ .../forge-core/other/test_behavior_last.rkt | 44 ++++++ 10 files changed, 223 insertions(+), 67 deletions(-) create mode 100644 forge/tests/error/unstated_bounds.frg create mode 100644 forge/tests/forge-core/other/test_behavior_first.rkt create mode 100644 forge/tests/forge-core/other/test_behavior_last.rkt diff --git a/forge/send-to-kodkod.rkt b/forge/send-to-kodkod.rkt index 66a6c988..afcd9f63 100644 --- a/forge/send-to-kodkod.rkt +++ b/forge/send-to-kodkod.rkt @@ -343,8 +343,13 @@ ; Print solve (define (get-next-model [mode ""]) + ; If the solver isn't running at all, error: (unless (is-running?) (raise-user-error "KodKod server is not running.")) + ; If the solver is running, but this specific run ID is closed, user error + (when (is-run-closed? run-name) + (raise-user-error (format "Run ~a has been closed." run-name))) + (pardinus-print (pardinus:solve run-name mode)) (define result (translate-from-kodkod-cli 'run diff --git a/forge/sigs-structs.rkt b/forge/sigs-structs.rkt index 9b14dbcd..de68a087 100644 --- a/forge/sigs-structs.rkt +++ b/forge/sigs-structs.rkt @@ -169,6 +169,7 @@ [run_sterling (or/c string? symbol?)] [sterling_port nonnegative-integer?] [engine_verbosity nonnegative-integer?] + [test_keep symbol?] ) #:transparent) (struct/contract State ( @@ -209,7 +210,7 @@ [name symbol?] [command syntax?] [run-spec Run-spec?] - [result tree:node?] ; TODO: specify + [result tree:node?] [server-ports Server-ports?] [atoms (listof (or/c symbol? number?))] [kodkod-currents Kodkod-current?] @@ -224,7 +225,8 @@ (define DEFAULT-SIG-SCOPE (Range 0 4)) ; an engine_verbosity of 1 logs SEVERE level in the Java engine; ; this will send back info about crashes, but shouldn't spam (and possibly overfill) stderr. -(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default 'close-noretarget 'fast 0 'off 'on 0 1)) +(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default + 'close-noretarget 'fast 0 'off 'on 0 1 'first)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;; Constants ;;;;;;; @@ -255,7 +257,11 @@ 'local_necessity Options-local_necessity 'run_sterling Options-run_sterling 'sterling_port Options-sterling_port - 'engine_verbosity Options-engine_verbosity)) + 'engine_verbosity Options-engine_verbosity + 'test_keep Options-test_keep)) + +(define (oneof-pred lst) + (lambda (x) (member x lst))) (define option-types (hash 'eval-language symbol? @@ -268,14 +274,14 @@ 'min_tracelength exact-positive-integer? 'max_tracelength exact-positive-integer? 'problem_type symbol? - 'target_mode (lambda (x) - (member x '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover))) + 'target_mode (oneof-pred '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover)) 'core_minimization symbol? 'skolem_depth exact-integer? 'local_necessity symbol? 'run_sterling (lambda (x) (or (symbol? x) (string? x))) ; allow for custom visualizer path 'sterling_port exact-nonnegative-integer? - 'engine_verbosity exact-nonnegative-integer?)) + 'engine_verbosity exact-nonnegative-integer? + 'test_keep (oneof-pred '(first last)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;; Initial State ;;;;;;; @@ -564,24 +570,46 @@ Returns whether the given run resulted in sat or unsat, respectively. (assert-is-running run) (Server-ports-stderr (Run-server-ports run))) +;;;;;;;;;;;;;;;;;;;;;;; +; Per-run closed status + +; Keep track of which runs have been closed via close-run +(define closed-run-names (box (list))) +; Allows other modules to let this layer know a run is closed; this box +; is referenced by the instance generator for each run. +(define (add-closed-run-name! name) + (set-box! closed-run-names (cons name (unbox closed-run-names)))) +(define (is-run-closed? name-or-run) + (define (truthify x) (if x #t #f)) + (truthify + (cond [(Run? name-or-run) + (member (Run-name name-or-run) (unbox closed-run-names))] + [else + (member name-or-run (unbox closed-run-names))]))) + ; close-run :: Run -> void (define (close-run run) (assert-is-running run) (when (>= (get-verbosity) VERBOSITY_HIGH) - (printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run))) + (printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run))) + ; Cut off this Run's ability to query the solver, since it's about to be closed + ; This state is referenced in the instance-generator thunk + (add-closed-run-name! (Run-name run)) + ; Since we're using a single process now, send it instructions to clear this run (pardinus:cmd [(get-stdin run)] - (pardinus:clear (Run-name run)))) ; is-running :: Run -> Boolean +; This reports whether the _solver server_ is running; +; *NOT* whether an individual run is still open. (define (is-running? run) ((Server-ports-is-running? (Run-server-ports run)))) (define (assert-is-running run) (unless (is-running? run) - (raise-user-error "KodKod server is not running."))) + (raise-user-error "KodKod/Pardinus solver process is not running."))) (require (for-syntax syntax/srcloc)) ; for these macros diff --git a/forge/sigs.rkt b/forge/sigs.rkt index 045c4c0d..f7d73312 100644 --- a/forge/sigs.rkt +++ b/forge/sigs.rkt @@ -1,7 +1,7 @@ #lang racket/base (require (only-in racket/function thunk) - (only-in racket/list first rest empty empty? flatten remove-duplicates) + (only-in racket/list first rest empty empty? flatten remove-duplicates last) (only-in racket/pretty pretty-print) (prefix-in @ (only-in racket/base display max min - +)) (prefix-in @ racket/set) @@ -188,6 +188,8 @@ (define (state-set-option state option value #:original-path [original-path #f]) (define options (State-options state)) + (unless (hash-ref option-types option #f) + (raise-user-error (format "No such option: ~a" option))) (unless ((hash-ref option-types option) value) (raise-user-error (format "Setting option ~a requires ~a; received ~a" option (hash-ref option-types option) value))) @@ -258,7 +260,10 @@ [sterling_port value])] [(equal? option 'engine_verbosity) (struct-copy Options options - [engine_verbosity value])])) + [engine_verbosity value])] + [(equal? option 'test_keep) + (struct-copy Options options + [test_keep value])])) (struct-copy State state [options new-options])) @@ -586,40 +591,32 @@ [(member 'expected '(sat unsat)) #,(syntax/loc stx (run name args ...)) (define first-instance (tree:get-value (Run-result name))) - (unless (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected) - ;(when (> (get-verbosity) 0) - ; (printf "Unexpected result found, with statistics and metadata:~n") - ; (pretty-print first-instance)) - ;(display name) ;; Display in Sterling since the test failed. - (report-test-failure - #:name 'name - #:msg (format "Failed test ~a. Expected ~a, got ~a.~a" - 'name 'expected (if (Sat? first-instance) 'sat 'unsat) - (if (Sat? first-instance) - (format " Found instance ~a" first-instance) - (if (Unsat-core first-instance) - (format " Core: ~a" (Unsat-core first-instance)) - ""))) - #:context loc - #:instance first-instance - #:run name)) - (close-run name)] + (if (not (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected)) + (report-test-failure + #:name 'name + #:msg (format "Failed test ~a. Expected ~a, got ~a.~a" + 'name 'expected (if (Sat? first-instance) 'sat 'unsat) + (if (Sat? first-instance) + (format " Found instance ~a" first-instance) + (if (Unsat-core first-instance) + (format " Core: ~a" (Unsat-core first-instance)) + ""))) + #:context loc + #:instance first-instance + #:run name) + (close-run name))] [(equal? 'expected 'theorem) #,(syntax/loc stx (check name args ...)) (define first-instance (tree:get-value (Run-result name))) - (when (Sat? first-instance) - ;(when (> (get-verbosity) 0) - ; (printf "Instance found, with statistics and metadata:~n") - ; (pretty-print first-instance)) - ;(display name) ;; Display in sterling since the test failed. - (report-test-failure #:name 'name - #:msg (format "Theorem ~a failed. Found instance:~n~a" - 'name first-instance) - #:context loc - #:instance first-instance - #:run name)) - (close-run name)] + (if (Sat? first-instance) + (report-test-failure #:name 'name + #:msg (format "Theorem ~a failed. Found instance:~n~a" + 'name first-instance) + #:context loc + #:instance first-instance + #:run name) + (close-run name))] [else (raise-forge-error #:msg (format "Illegal argument to test. Received ~a, expected sat, unsat, or theorem." @@ -638,21 +635,27 @@ #:context #,(build-source-location stx))) #,(syntax/loc stx (run name #:preds [pred] #:bounds [bounds ...])) (define first-instance (tree:get-value (Run-result name))) - (when (Unsat? first-instance) - #,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...])) - (define double-check-instance (tree:get-value (Run-result double-check-name))) - (if (Sat? double-check-instance) - (report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name) - #:context #,(build-source-location stx) - #:instance first-instance - #:run name) - (report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. " + (cond + [(Unsat? first-instance) + ; Run a second check to see if {} would have also failed, meaning this example + ; violates the sig/field declarations. + #,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...])) + (define double-check-instance (tree:get-value (Run-result double-check-name))) + (close-run double-check-name) ;; always close the double-check run immediately + + (if (Sat? double-check-instance) + (report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name) + #:context #,(build-source-location stx) + #:instance first-instance + #:run name) + (report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. " "This means that the specified bounds conflict with each other " - "or with the sig/relation definitions.") - 'name) - #:context #,(build-source-location stx) - #:instance first-instance - #:run name)))))))])) + "or with the sig/field definitions.") + 'name) + #:context #,(build-source-location stx) + #:instance first-instance + #:run name))] + [else (close-run name)])))))])) ; Checks that some predicates are always true. ; (check name @@ -702,7 +705,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; make-model-generator :: Stream -> (-> model) -; Creates a thunk which generates a new model on each call. +; Creates a thunk which generates a new instance on each call. (define (make-model-generator model-lazy-tree [mode 'next]) (thunk (define ret (tree:get-value model-lazy-tree)) @@ -981,28 +984,44 @@ ;; Infrastructure for handling multiple test failures / multiple runs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Struct to hold test-failure information for eventual reporting (struct test-failure (name msg context instance run)) - -(define delayed-test-failures null) - +; Mutable value to store list of test-failure structs +(define delayed-test-failures null) +; Called to clear the mutable list (define (reset-test-failures!) (set! delayed-test-failures null)) -(define delay-test-failure-reporting? (make-parameter #t)) ; Record (or report, depending on the value of the delay-test-failure-reporting? ; parameter) a test failure. (define (report-test-failure #:name name #:msg msg #:context context #:instance [instance #f] #:run run) - (cond [(delay-test-failure-reporting?) - (printf "Test ~a failed. Continuing to run, and will report details at the end.~n" name) + ; Default is to not delay, but options may affect this. + (cond [(not (equal? (get-option run 'test_keep) 'first)) + (unless (equal? (get-verbosity) 0) + (printf "Test ~a failed. Continuing to run and will report details at the end.~n" name)) + ; close previous failure run, since we are keeping only the final failure for Sterling + (unless (empty? delayed-test-failures) + (close-run (test-failure-run (first delayed-test-failures)))) + ; then add this failure to the queue (set! delayed-test-failures (cons (test-failure name msg context instance run) delayed-test-failures))] + [else + ; Raise a Forge error and stop execution; show Sterling if enabled. + (when (>= (get-verbosity) 1) + (printf "Test ~a failed. Opening Sterling (if able) and stopping.~n" name)) + (true-display run) (raise-forge-error #:msg msg #:context context)])) +; To be run at the very end of the Forge execution; reports test failures and opens +; only one failure (as only one solver will be kept open, at the moment) (define (output-all-test-failures) ; In order, for each failure: (define failures (remove-duplicates (reverse delayed-test-failures))) - (unless (empty? failures) - (printf "~nSome tests failed. Reporting failures in order.~n~n" )) + (unless (or (< (get-verbosity) 1) (empty? failures)) + (printf "~nSome tests failed. Reporting failures in order:~n~n" )) + + (define last-failure (if (empty? failures) #f (last failures))) + (for ([failure failures]) (define-values (name msg context instance run) (values (test-failure-name failure) (test-failure-msg failure) @@ -1012,12 +1031,16 @@ ; Print the error (don't raise an actual exception) (define sterling-or-instance (if (equal? (get-option run 'run_sterling) 'off) (format "Sterling disabled, so reporting raw instance data:~n~a" instance) - "Running Sterling to show instance (or unsat) generated...")) + (format "Running Sterling to show instance generated, if any.~n~a" + (if (equal? failure last-failure) + "Solver is active; evaluator and next are available." + "For all but the final test failure, the solver was closed to save memory; evaluator and next are unavailable.")))) (raise-forge-error #:msg (format "~a ~a~n~n" msg sterling-or-instance) #:context context #:raise? #f) ; Display in Sterling (if run_sterling is enabled) - (true-display run)) + (when (equal? failure last-failure) + (true-display run))) ; Return to empty-failure-list state. (reset-test-failures!)) diff --git a/forge/tests/error/bsl_multiple_failures.frg b/forge/tests/error/bsl_multiple_failures.frg index b7f214ac..cf2e5059 100644 --- a/forge/tests/error/bsl_multiple_failures.frg +++ b/forge/tests/error/bsl_multiple_failures.frg @@ -1,6 +1,6 @@ #lang forge/bsl option run_sterling off - +option test_keep last option verbose 0 sig Node { diff --git a/forge/tests/error/multiple-positive-examples-failing.frg b/forge/tests/error/multiple-positive-examples-failing.frg index a5abb2a8..bf327ad2 100644 --- a/forge/tests/error/multiple-positive-examples-failing.frg +++ b/forge/tests/error/multiple-positive-examples-failing.frg @@ -1,5 +1,6 @@ #lang forge option run_sterling off +option test_keep last option verbose 0 -- Regression test to confirm that the "double-check" done in example tests diff --git a/forge/tests/error/multiple_test_failures.frg b/forge/tests/error/multiple_test_failures.frg index 62dfc52f..b4fe517c 100644 --- a/forge/tests/error/multiple_test_failures.frg +++ b/forge/tests/error/multiple_test_failures.frg @@ -1,6 +1,7 @@ #lang forge option run_sterling off +option test_keep last option verbose 0 diff --git a/forge/tests/error/properties_undirected_tree_underconstraint_multiple_errors.frg b/forge/tests/error/properties_undirected_tree_underconstraint_multiple_errors.frg index 3dfc1cf0..240dddaf 100644 --- a/forge/tests/error/properties_undirected_tree_underconstraint_multiple_errors.frg +++ b/forge/tests/error/properties_undirected_tree_underconstraint_multiple_errors.frg @@ -1,5 +1,6 @@ #lang forge option run_sterling off +option test_keep last option verbose 0 sig Node {edges: set Node} diff --git a/forge/tests/error/unstated_bounds.frg b/forge/tests/error/unstated_bounds.frg new file mode 100644 index 00000000..1356b6dd --- /dev/null +++ b/forge/tests/error/unstated_bounds.frg @@ -0,0 +1,11 @@ +#lang forge + +// Detect when sufficient bounds have not been provided. + +sig Match {} +sig B, C extends Match {} + +test expect { + should_error: {} for exactly 4 B, 3 C is sat +} + diff --git a/forge/tests/forge-core/other/test_behavior_first.rkt b/forge/tests/forge-core/other/test_behavior_first.rkt new file mode 100644 index 00000000..bd68ba92 --- /dev/null +++ b/forge/tests/forge-core/other/test_behavior_first.rkt @@ -0,0 +1,42 @@ +#lang forge/core + +; Confirm that test runs are closed or kept open as expected. +; (This is related to multiple_runs.rkt, but has a different focus.) + +(require (only-in rackunit check-eq? check-not-eq? check-exn check-not-exn check-true)) +(set-option! 'verbose 0) +(set-option! 'run_sterling 'off) + +; 'first is the default, so no need to run this line: +;(set-option! 'test_keep 'first) + +(sig A) +(sig B) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Default option: 'test_keep is 'first +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This test should fail and produce an exception immediately in 'first mode: +(check-exn #rx"Failed test sat-run-A." + (lambda () (test sat-run-A + #:preds [(some A) (no B)] + #:expect unsat))) + +; This is not bound to an accessible identifier, but it _is_ kept in the state: +(define local-sat-run-A (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-A)) +; The test run should remain open, so that Sterling's evaluator is available, etc. +; DO NOT USE is-running?; that is for the solver process as a whole. +(check-true (forge:is-sat? local-sat-run-A)) +(check-true (not (forge:is-run-closed? local-sat-run-A))) + +;;;;;;;;;;;;;;;;;;;;; + +; This test should pass, and its run should be closed immediately. +(check-not-exn + (lambda () (test sat-run-B + #:preds [(some A) (no B)] + #:expect sat))) +(define local-sat-run-B (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-B)) +(check-true (forge:is-sat? local-sat-run-B)) +(check-true (forge:is-run-closed? local-sat-run-B)) diff --git a/forge/tests/forge-core/other/test_behavior_last.rkt b/forge/tests/forge-core/other/test_behavior_last.rkt new file mode 100644 index 00000000..ee593c08 --- /dev/null +++ b/forge/tests/forge-core/other/test_behavior_last.rkt @@ -0,0 +1,44 @@ +#lang forge/core + +; Confirm that test runs are closed or kept open as expected. +; (This is related to multiple_runs.rkt, but has a different focus.) + +(require (only-in rackunit check-eq? check-not-eq? check-exn check-not-exn check-true)) +(set-option! 'verbose 0) +(set-option! 'run_sterling 'off) + +; Not the default: +(set-option! 'test_keep 'last) + +(sig A) +(sig B) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Modified option: 'test_keep is 'last +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This test should fail, but not immediately produce an exception: +(check-not-exn + (lambda () (test sat-run-A + #:preds [(some A) (no B)] + #:expect unsat))) +; Same with this test. +(check-not-exn + (lambda () (test sat-run-B + #:preds [(some A) (no B)] + #:expect unsat))) + +; This is not bound to an accessible identifier, but it _is_ kept in the state: +(define local-sat-run-A (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-A)) +(define local-sat-run-B (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-B)) +; Under the 'last option, the first run should have been closed +; DO NOT USE is-running?; that is for the solver process as a whole. +; Under the 'last option, the second run should remain open (since it is the last failure) +(check-true (forge:is-sat? local-sat-run-A)) +(check-true (forge:is-run-closed? local-sat-run-A)) +(check-true (forge:is-sat? local-sat-run-B)) +(check-true (not (forge:is-run-closed? local-sat-run-B))) + +; This is, however, not calling the failure-reporter that the Forge surface languages +; invoke at the end of their execution -- since this is forge/core. +; (output-all-test-failures)