Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: better handling for multiple-tests #245

Merged
merged 6 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions forge/send-to-kodkod.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,13 @@

; Print solve
(define (get-next-model [mode ""])
; If the solver isn't running at all, error:
(unless (is-running?)
(raise-user-error "KodKod server is not running."))
; If the solver is running, but this specific run ID is closed, user error
(when (is-run-closed? run-name)
(raise-user-error (format "Run ~a has been closed." run-name)))

(pardinus-print (pardinus:solve run-name mode))
(define result (translate-from-kodkod-cli
'run
Expand Down
46 changes: 37 additions & 9 deletions forge/sigs-structs.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
[run_sterling (or/c string? symbol?)]
[sterling_port nonnegative-integer?]
[engine_verbosity nonnegative-integer?]
[test_keep symbol?]
) #:transparent)

(struct/contract State (
Expand Down Expand Up @@ -209,7 +210,7 @@
[name symbol?]
[command syntax?]
[run-spec Run-spec?]
[result tree:node?] ; TODO: specify
[result tree:node?]
[server-ports Server-ports?]
[atoms (listof (or/c symbol? number?))]
[kodkod-currents Kodkod-current?]
Expand All @@ -224,7 +225,8 @@
(define DEFAULT-SIG-SCOPE (Range 0 4))
; an engine_verbosity of 1 logs SEVERE level in the Java engine;
; this will send back info about crashes, but shouldn't spam (and possibly overfill) stderr.
(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default 'close-noretarget 'fast 0 'off 'on 0 1))
(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default
'close-noretarget 'fast 0 'off 'on 0 1 'first))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;; Constants ;;;;;;;
Expand Down Expand Up @@ -255,7 +257,11 @@
'local_necessity Options-local_necessity
'run_sterling Options-run_sterling
'sterling_port Options-sterling_port
'engine_verbosity Options-engine_verbosity))
'engine_verbosity Options-engine_verbosity
'test_keep Options-test_keep))

(define (oneof-pred lst)
(lambda (x) (member x lst)))

(define option-types
(hash 'eval-language symbol?
Expand All @@ -268,14 +274,14 @@
'min_tracelength exact-positive-integer?
'max_tracelength exact-positive-integer?
'problem_type symbol?
'target_mode (lambda (x)
(member x '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover)))
'target_mode (oneof-pred '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover))
'core_minimization symbol?
'skolem_depth exact-integer?
'local_necessity symbol?
'run_sterling (lambda (x) (or (symbol? x) (string? x))) ; allow for custom visualizer path
'sterling_port exact-nonnegative-integer?
'engine_verbosity exact-nonnegative-integer?))
'engine_verbosity exact-nonnegative-integer?
'test_keep (oneof-pred '(first last))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;; Initial State ;;;;;;;
Expand Down Expand Up @@ -564,24 +570,46 @@ Returns whether the given run resulted in sat or unsat, respectively.
(assert-is-running run)
(Server-ports-stderr (Run-server-ports run)))

;;;;;;;;;;;;;;;;;;;;;;;
; Per-run closed status

; Keep track of which runs have been closed via close-run
(define closed-run-names (box (list)))
; Allows other modules to let this layer know a run is closed; this box
; is referenced by the instance generator for each run.
(define (add-closed-run-name! name)
(set-box! closed-run-names (cons name (unbox closed-run-names))))
(define (is-run-closed? name-or-run)
(define (truthify x) (if x #t #f))
(truthify
(cond [(Run? name-or-run)
(member (Run-name name-or-run) (unbox closed-run-names))]
[else
(member name-or-run (unbox closed-run-names))])))

; close-run :: Run -> void
(define (close-run run)
(assert-is-running run)
(when (>= (get-verbosity) VERBOSITY_HIGH)
(printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run)))
(printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run)))
; Cut off this Run's ability to query the solver, since it's about to be closed
; This state is referenced in the instance-generator thunk
(add-closed-run-name! (Run-name run))

; Since we're using a single process now, send it instructions to clear this run
(pardinus:cmd
[(get-stdin run)]

(pardinus:clear (Run-name run))))

; is-running :: Run -> Boolean
; This reports whether the _solver server_ is running;
; *NOT* whether an individual run is still open.
(define (is-running? run)
((Server-ports-is-running? (Run-server-ports run))))

(define (assert-is-running run)
(unless (is-running? run)
(raise-user-error "KodKod server is not running.")))
(raise-user-error "KodKod/Pardinus solver process is not running.")))

(require (for-syntax syntax/srcloc)) ; for these macros

Expand Down
135 changes: 79 additions & 56 deletions forge/sigs.rkt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#lang racket/base

(require (only-in racket/function thunk)
(only-in racket/list first rest empty empty? flatten remove-duplicates)
(only-in racket/list first rest empty empty? flatten remove-duplicates last)
(only-in racket/pretty pretty-print)
(prefix-in @ (only-in racket/base display max min - +))
(prefix-in @ racket/set)
Expand Down Expand Up @@ -188,6 +188,8 @@
(define (state-set-option state option value #:original-path [original-path #f])
(define options (State-options state))

(unless (hash-ref option-types option #f)
(raise-user-error (format "No such option: ~a" option)))
(unless ((hash-ref option-types option) value)
(raise-user-error (format "Setting option ~a requires ~a; received ~a"
option (hash-ref option-types option) value)))
Expand Down Expand Up @@ -258,7 +260,10 @@
[sterling_port value])]
[(equal? option 'engine_verbosity)
(struct-copy Options options
[engine_verbosity value])]))
[engine_verbosity value])]
[(equal? option 'test_keep)
(struct-copy Options options
[test_keep value])]))

(struct-copy State state
[options new-options]))
Expand Down Expand Up @@ -586,40 +591,32 @@
[(member 'expected '(sat unsat))
#,(syntax/loc stx (run name args ...))
(define first-instance (tree:get-value (Run-result name)))
(unless (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected)
;(when (> (get-verbosity) 0)
; (printf "Unexpected result found, with statistics and metadata:~n")
; (pretty-print first-instance))
;(display name) ;; Display in Sterling since the test failed.
(report-test-failure
#:name 'name
#:msg (format "Failed test ~a. Expected ~a, got ~a.~a"
'name 'expected (if (Sat? first-instance) 'sat 'unsat)
(if (Sat? first-instance)
(format " Found instance ~a" first-instance)
(if (Unsat-core first-instance)
(format " Core: ~a" (Unsat-core first-instance))
"")))
#:context loc
#:instance first-instance
#:run name))
(close-run name)]
(if (not (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected))
(report-test-failure
#:name 'name
#:msg (format "Failed test ~a. Expected ~a, got ~a.~a"
'name 'expected (if (Sat? first-instance) 'sat 'unsat)
(if (Sat? first-instance)
(format " Found instance ~a" first-instance)
(if (Unsat-core first-instance)
(format " Core: ~a" (Unsat-core first-instance))
"")))
#:context loc
#:instance first-instance
#:run name)
(close-run name))]

[(equal? 'expected 'theorem)
#,(syntax/loc stx (check name args ...))
(define first-instance (tree:get-value (Run-result name)))
(when (Sat? first-instance)
;(when (> (get-verbosity) 0)
; (printf "Instance found, with statistics and metadata:~n")
; (pretty-print first-instance))
;(display name) ;; Display in sterling since the test failed.
(report-test-failure #:name 'name
#:msg (format "Theorem ~a failed. Found instance:~n~a"
'name first-instance)
#:context loc
#:instance first-instance
#:run name))
(close-run name)]
(if (Sat? first-instance)
(report-test-failure #:name 'name
#:msg (format "Theorem ~a failed. Found instance:~n~a"
'name first-instance)
#:context loc
#:instance first-instance
#:run name)
(close-run name))]

[else (raise-forge-error
#:msg (format "Illegal argument to test. Received ~a, expected sat, unsat, or theorem."
Expand All @@ -638,21 +635,27 @@
#:context #,(build-source-location stx)))
#,(syntax/loc stx (run name #:preds [pred] #:bounds [bounds ...]))
(define first-instance (tree:get-value (Run-result name)))
(when (Unsat? first-instance)
#,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...]))
(define double-check-instance (tree:get-value (Run-result double-check-name)))
(if (Sat? double-check-instance)
(report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)
(report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. "
(cond
[(Unsat? first-instance)
; Run a second check to see if {} would have also failed, meaning this example
; violates the sig/field declarations.
#,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...]))
(define double-check-instance (tree:get-value (Run-result double-check-name)))
(close-run double-check-instance) ;; always close the double-check run immediately

(if (Sat? double-check-instance)
(report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)
(report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. "
"This means that the specified bounds conflict with each other "
"or with the sig/relation definitions.")
'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)))))))]))
"or with the sig/field definitions.")
'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name))]
[else (close-run name)])))))]))

; Checks that some predicates are always true.
; (check name
Expand Down Expand Up @@ -702,7 +705,7 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; make-model-generator :: Stream<model> -> (-> model)
; Creates a thunk which generates a new model on each call.
; Creates a thunk which generates a new instance on each call.
(define (make-model-generator model-lazy-tree [mode 'next])
(thunk
(define ret (tree:get-value model-lazy-tree))
Expand Down Expand Up @@ -981,28 +984,44 @@
;; Infrastructure for handling multiple test failures / multiple runs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Struct to hold test-failure information for eventual reporting
(struct test-failure (name msg context instance run))

(define delayed-test-failures null)

; Mutable value to store list of test-failure structs
(define delayed-test-failures null)
; Called to clear the mutable list
(define (reset-test-failures!) (set! delayed-test-failures null))
(define delay-test-failure-reporting? (make-parameter #t))

; Record (or report, depending on the value of the delay-test-failure-reporting?
; parameter) a test failure.
(define (report-test-failure #:name name #:msg msg #:context context #:instance [instance #f] #:run run)
(cond [(delay-test-failure-reporting?)
(printf "Test ~a failed. Continuing to run, and will report details at the end.~n" name)
; Default is to not delay, but options may affect this.
(cond [(not (equal? (get-option run 'test_keep) 'first))
(unless (equal? (get-verbosity) 0)
(printf "Test ~a failed. Continuing to run and will report details at the end.~n" name))
; close previous failure run, since we are keeping only the final failure for Sterling
(unless (empty? delayed-test-failures)
(close-run (test-failure-run (first delayed-test-failures))))
; then add this failure to the queue
(set! delayed-test-failures (cons (test-failure name msg context instance run)
delayed-test-failures))]

[else
; Raise a Forge error and stop execution; show Sterling if enabled.
(when (>= (get-verbosity) 1)
(printf "Test ~a failed. Opening Sterling (if able) and stopping.~n" name))
(true-display run)
(raise-forge-error #:msg msg #:context context)]))

; To be run at the very end of the Forge execution; reports test failures and opens
; only one failure (as only one solver will be kept open, at the moment)
(define (output-all-test-failures)
; In order, for each failure:
(define failures (remove-duplicates (reverse delayed-test-failures)))
(unless (empty? failures)
(printf "~nSome tests failed. Reporting failures in order.~n~n" ))
(printf "~nSome tests failed. Reporting failures in order:~n~n" ))

(define last-failure (if (empty? failures) #f (last failures)))

(for ([failure failures])
(define-values (name msg context instance run)
(values (test-failure-name failure) (test-failure-msg failure)
Expand All @@ -1012,12 +1031,16 @@
; Print the error (don't raise an actual exception)
(define sterling-or-instance (if (equal? (get-option run 'run_sterling) 'off)
(format "Sterling disabled, so reporting raw instance data:~n~a" instance)
"Running Sterling to show instance (or unsat) generated..."))
(format "Running Sterling to show instance generated, if any.~n~a"
(if (equal? failure last-failure)
"Solver is active; evaluator and next are available."
"For all but the final test failure, the solver was closed to save memory; evaluator and next are unavailable."))))
(raise-forge-error #:msg (format "~a ~a~n~n" msg sterling-or-instance)
#:context context
#:raise? #f)
; Display in Sterling (if run_sterling is enabled)
(true-display run))
(when (equal? failure last-failure)
(true-display run)))

; Return to empty-failure-list state.
(reset-test-failures!))
Expand Down
42 changes: 42 additions & 0 deletions forge/tests/forge-core/other/test_behavior_first.rkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#lang forge/core

; Confirm that test runs are closed or kept open as expected.
; (This is related to multiple_runs.rkt, but has a different focus.)

(require (only-in rackunit check-eq? check-not-eq? check-exn check-not-exn check-true))
(set-option! 'verbose 0)
(set-option! 'run_sterling 'off)

; 'first is the default, so no need to run this line:
;(set-option! 'test_keep 'first)

(sig A)
(sig B)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Default option: 'test_keep is 'first
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; This test should fail and produce an exception immediately in 'first mode:
(check-exn #rx"Failed test sat-run-A."
(lambda () (test sat-run-A
#:preds [(some A) (no B)]
#:expect unsat)))

; This is not bound to an accessible identifier, but it _is_ kept in the state:
(define local-sat-run-A (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-A))
; The test run should remain open, so that Sterling's evaluator is available, etc.
; DO NOT USE is-running?; that is for the solver process as a whole.
(check-true (forge:is-sat? local-sat-run-A))
(check-true (not (forge:is-run-closed? local-sat-run-A)))

;;;;;;;;;;;;;;;;;;;;;

; This test should pass, and its run should be closed immediately.
(check-not-exn
(lambda () (test sat-run-B
#:preds [(some A) (no B)]
#:expect sat)))
(define local-sat-run-B (hash-ref (forge:State-runmap forge:curr-state) 'sat-run-B))
(check-true (forge:is-sat? local-sat-run-B))
(check-true (forge:is-run-closed? local-sat-run-B))
Loading
Loading