Skip to content

Commit

Permalink
Fix: better handling for multiple-tests (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
tnelson authored Feb 29, 2024
1 parent e314f80 commit 40beae6
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 67 deletions.
5 changes: 5 additions & 0 deletions forge/send-to-kodkod.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,13 @@

; Print solve
(define (get-next-model [mode ""])
; If the solver isn't running at all, error:
(unless (is-running?)
(raise-user-error "KodKod server is not running."))
; If the solver is running, but this specific run ID is closed, user error
(when (is-run-closed? run-name)
(raise-user-error (format "Run ~a has been closed." run-name)))

(pardinus-print (pardinus:solve run-name mode))
(define result (translate-from-kodkod-cli
'run
Expand Down
46 changes: 37 additions & 9 deletions forge/sigs-structs.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
[run_sterling (or/c string? symbol?)]
[sterling_port nonnegative-integer?]
[engine_verbosity nonnegative-integer?]
[test_keep symbol?]
) #:transparent)

(struct/contract State (
Expand Down Expand Up @@ -209,7 +210,7 @@
[name symbol?]
[command syntax?]
[run-spec Run-spec?]
[result tree:node?] ; TODO: specify
[result tree:node?]
[server-ports Server-ports?]
[atoms (listof (or/c symbol? number?))]
[kodkod-currents Kodkod-current?]
Expand All @@ -224,7 +225,8 @@
(define DEFAULT-SIG-SCOPE (Range 0 4))
; an engine_verbosity of 1 logs SEVERE level in the Java engine;
; this will send back info about crashes, but shouldn't spam (and possibly overfill) stderr.
(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default 'close-noretarget 'fast 0 'off 'on 0 1))
(define DEFAULT-OPTIONS (Options 'surface 'SAT4J 'pardinus 20 0 0 1 5 'default
'close-noretarget 'fast 0 'off 'on 0 1 'first))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;; Constants ;;;;;;;
Expand Down Expand Up @@ -255,7 +257,11 @@
'local_necessity Options-local_necessity
'run_sterling Options-run_sterling
'sterling_port Options-sterling_port
'engine_verbosity Options-engine_verbosity))
'engine_verbosity Options-engine_verbosity
'test_keep Options-test_keep))

(define (oneof-pred lst)
(lambda (x) (member x lst)))

(define option-types
(hash 'eval-language symbol?
Expand All @@ -268,14 +274,14 @@
'min_tracelength exact-positive-integer?
'max_tracelength exact-positive-integer?
'problem_type symbol?
'target_mode (lambda (x)
(member x '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover)))
'target_mode (oneof-pred '(close_noretarget far_noretarget close_retarget far_retarget hamming_cover))
'core_minimization symbol?
'skolem_depth exact-integer?
'local_necessity symbol?
'run_sterling (lambda (x) (or (symbol? x) (string? x))) ; allow for custom visualizer path
'sterling_port exact-nonnegative-integer?
'engine_verbosity exact-nonnegative-integer?))
'engine_verbosity exact-nonnegative-integer?
'test_keep (oneof-pred '(first last))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;; Initial State ;;;;;;;
Expand Down Expand Up @@ -564,24 +570,46 @@ Returns whether the given run resulted in sat or unsat, respectively.
(assert-is-running run)
(Server-ports-stderr (Run-server-ports run)))

;;;;;;;;;;;;;;;;;;;;;;;
; Per-run closed status

; Keep track of which runs have been closed via close-run
(define closed-run-names (box (list)))
; Allows other modules to let this layer know a run is closed; this box
; is referenced by the instance generator for each run.
(define (add-closed-run-name! name)
(set-box! closed-run-names (cons name (unbox closed-run-names))))
(define (is-run-closed? name-or-run)
(define (truthify x) (if x #t #f))
(truthify
(cond [(Run? name-or-run)
(member (Run-name name-or-run) (unbox closed-run-names))]
[else
(member name-or-run (unbox closed-run-names))])))

; close-run :: Run -> void
(define (close-run run)
(assert-is-running run)
(when (>= (get-verbosity) VERBOSITY_HIGH)
(printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run)))
(printf "Clearing run ~a. Keeping engine process active...~n" (Run-name run)))
; Cut off this Run's ability to query the solver, since it's about to be closed
; This state is referenced in the instance-generator thunk
(add-closed-run-name! (Run-name run))

; Since we're using a single process now, send it instructions to clear this run
(pardinus:cmd
[(get-stdin run)]

(pardinus:clear (Run-name run))))

; is-running :: Run -> Boolean
; This reports whether the _solver server_ is running;
; *NOT* whether an individual run is still open.
(define (is-running? run)
((Server-ports-is-running? (Run-server-ports run))))

(define (assert-is-running run)
(unless (is-running? run)
(raise-user-error "KodKod server is not running.")))
(raise-user-error "KodKod/Pardinus solver process is not running.")))

(require (for-syntax syntax/srcloc)) ; for these macros

Expand Down
137 changes: 80 additions & 57 deletions forge/sigs.rkt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#lang racket/base

(require (only-in racket/function thunk)
(only-in racket/list first rest empty empty? flatten remove-duplicates)
(only-in racket/list first rest empty empty? flatten remove-duplicates last)
(only-in racket/pretty pretty-print)
(prefix-in @ (only-in racket/base display max min - +))
(prefix-in @ racket/set)
Expand Down Expand Up @@ -188,6 +188,8 @@
(define (state-set-option state option value #:original-path [original-path #f])
(define options (State-options state))

(unless (hash-ref option-types option #f)
(raise-user-error (format "No such option: ~a" option)))
(unless ((hash-ref option-types option) value)
(raise-user-error (format "Setting option ~a requires ~a; received ~a"
option (hash-ref option-types option) value)))
Expand Down Expand Up @@ -258,7 +260,10 @@
[sterling_port value])]
[(equal? option 'engine_verbosity)
(struct-copy Options options
[engine_verbosity value])]))
[engine_verbosity value])]
[(equal? option 'test_keep)
(struct-copy Options options
[test_keep value])]))

(struct-copy State state
[options new-options]))
Expand Down Expand Up @@ -586,40 +591,32 @@
[(member 'expected '(sat unsat))
#,(syntax/loc stx (run name args ...))
(define first-instance (tree:get-value (Run-result name)))
(unless (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected)
;(when (> (get-verbosity) 0)
; (printf "Unexpected result found, with statistics and metadata:~n")
; (pretty-print first-instance))
;(display name) ;; Display in Sterling since the test failed.
(report-test-failure
#:name 'name
#:msg (format "Failed test ~a. Expected ~a, got ~a.~a"
'name 'expected (if (Sat? first-instance) 'sat 'unsat)
(if (Sat? first-instance)
(format " Found instance ~a" first-instance)
(if (Unsat-core first-instance)
(format " Core: ~a" (Unsat-core first-instance))
"")))
#:context loc
#:instance first-instance
#:run name))
(close-run name)]
(if (not (equal? (if (Sat? first-instance) 'sat 'unsat) 'expected))
(report-test-failure
#:name 'name
#:msg (format "Failed test ~a. Expected ~a, got ~a.~a"
'name 'expected (if (Sat? first-instance) 'sat 'unsat)
(if (Sat? first-instance)
(format " Found instance ~a" first-instance)
(if (Unsat-core first-instance)
(format " Core: ~a" (Unsat-core first-instance))
"")))
#:context loc
#:instance first-instance
#:run name)
(close-run name))]

[(equal? 'expected 'theorem)
#,(syntax/loc stx (check name args ...))
(define first-instance (tree:get-value (Run-result name)))
(when (Sat? first-instance)
;(when (> (get-verbosity) 0)
; (printf "Instance found, with statistics and metadata:~n")
; (pretty-print first-instance))
;(display name) ;; Display in sterling since the test failed.
(report-test-failure #:name 'name
#:msg (format "Theorem ~a failed. Found instance:~n~a"
'name first-instance)
#:context loc
#:instance first-instance
#:run name))
(close-run name)]
(if (Sat? first-instance)
(report-test-failure #:name 'name
#:msg (format "Theorem ~a failed. Found instance:~n~a"
'name first-instance)
#:context loc
#:instance first-instance
#:run name)
(close-run name))]

[else (raise-forge-error
#:msg (format "Illegal argument to test. Received ~a, expected sat, unsat, or theorem."
Expand All @@ -638,21 +635,27 @@
#:context #,(build-source-location stx)))
#,(syntax/loc stx (run name #:preds [pred] #:bounds [bounds ...]))
(define first-instance (tree:get-value (Run-result name)))
(when (Unsat? first-instance)
#,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...]))
(define double-check-instance (tree:get-value (Run-result double-check-name)))
(if (Sat? double-check-instance)
(report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)
(report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. "
(cond
[(Unsat? first-instance)
; Run a second check to see if {} would have also failed, meaning this example
; violates the sig/field declarations.
#,(syntax/loc stx (run double-check-name #:preds [] #:bounds [bounds ...]))
(define double-check-instance (tree:get-value (Run-result double-check-name)))
(close-run double-check-name) ;; always close the double-check run immediately

(if (Sat? double-check-instance)
(report-test-failure #:name 'name #:msg (format "Invalid example '~a'; the instance specified does not satisfy the given predicate." 'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)
(report-test-failure #:name 'name #:msg (format (string-append "Invalid example '~a'; the instance specified is impossible. "
"This means that the specified bounds conflict with each other "
"or with the sig/relation definitions.")
'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name)))))))]))
"or with the sig/field definitions.")
'name)
#:context #,(build-source-location stx)
#:instance first-instance
#:run name))]
[else (close-run name)])))))]))

; Checks that some predicates are always true.
; (check name
Expand Down Expand Up @@ -702,7 +705,7 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; make-model-generator :: Stream<model> -> (-> model)
; Creates a thunk which generates a new model on each call.
; Creates a thunk which generates a new instance on each call.
(define (make-model-generator model-lazy-tree [mode 'next])
(thunk
(define ret (tree:get-value model-lazy-tree))
Expand Down Expand Up @@ -981,28 +984,44 @@
;; Infrastructure for handling multiple test failures / multiple runs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Struct to hold test-failure information for eventual reporting
(struct test-failure (name msg context instance run))

(define delayed-test-failures null)

; Mutable value to store list of test-failure structs
(define delayed-test-failures null)
; Called to clear the mutable list
(define (reset-test-failures!) (set! delayed-test-failures null))
(define delay-test-failure-reporting? (make-parameter #t))

; Record (or report, depending on the value of the delay-test-failure-reporting?
; parameter) a test failure.
(define (report-test-failure #:name name #:msg msg #:context context #:instance [instance #f] #:run run)
(cond [(delay-test-failure-reporting?)
(printf "Test ~a failed. Continuing to run, and will report details at the end.~n" name)
; Default is to not delay, but options may affect this.
(cond [(not (equal? (get-option run 'test_keep) 'first))
(unless (equal? (get-verbosity) 0)
(printf "Test ~a failed. Continuing to run and will report details at the end.~n" name))
; close previous failure run, since we are keeping only the final failure for Sterling
(unless (empty? delayed-test-failures)
(close-run (test-failure-run (first delayed-test-failures))))
; then add this failure to the queue
(set! delayed-test-failures (cons (test-failure name msg context instance run)
delayed-test-failures))]

[else
; Raise a Forge error and stop execution; show Sterling if enabled.
(when (>= (get-verbosity) 1)
(printf "Test ~a failed. Opening Sterling (if able) and stopping.~n" name))
(true-display run)
(raise-forge-error #:msg msg #:context context)]))

; To be run at the very end of the Forge execution; reports test failures and opens
; only one failure (as only one solver will be kept open, at the moment)
(define (output-all-test-failures)
; In order, for each failure:
(define failures (remove-duplicates (reverse delayed-test-failures)))
(unless (empty? failures)
(printf "~nSome tests failed. Reporting failures in order.~n~n" ))
(unless (or (< (get-verbosity) 1) (empty? failures))
(printf "~nSome tests failed. Reporting failures in order:~n~n" ))

(define last-failure (if (empty? failures) #f (last failures)))

(for ([failure failures])
(define-values (name msg context instance run)
(values (test-failure-name failure) (test-failure-msg failure)
Expand All @@ -1012,12 +1031,16 @@
; Print the error (don't raise an actual exception)
(define sterling-or-instance (if (equal? (get-option run 'run_sterling) 'off)
(format "Sterling disabled, so reporting raw instance data:~n~a" instance)
"Running Sterling to show instance (or unsat) generated..."))
(format "Running Sterling to show instance generated, if any.~n~a"
(if (equal? failure last-failure)
"Solver is active; evaluator and next are available."
"For all but the final test failure, the solver was closed to save memory; evaluator and next are unavailable."))))
(raise-forge-error #:msg (format "~a ~a~n~n" msg sterling-or-instance)
#:context context
#:raise? #f)
; Display in Sterling (if run_sterling is enabled)
(true-display run))
(when (equal? failure last-failure)
(true-display run)))

; Return to empty-failure-list state.
(reset-test-failures!))
Expand Down
2 changes: 1 addition & 1 deletion forge/tests/error/bsl_multiple_failures.frg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#lang forge/bsl
option run_sterling off

option test_keep last
option verbose 0

sig Node {
Expand Down
1 change: 1 addition & 0 deletions forge/tests/error/multiple-positive-examples-failing.frg
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#lang forge
option run_sterling off
option test_keep last
option verbose 0

-- Regression test to confirm that the "double-check" done in example tests
Expand Down
1 change: 1 addition & 0 deletions forge/tests/error/multiple_test_failures.frg
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#lang forge

option run_sterling off
option test_keep last
option verbose 0


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#lang forge
option run_sterling off
option test_keep last
option verbose 0

sig Node {edges: set Node}
Expand Down
11 changes: 11 additions & 0 deletions forge/tests/error/unstated_bounds.frg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#lang forge

// Detect when sufficient bounds have not been provided.

sig Match {}
sig B, C extends Match {}

test expect {
should_error: {} for exactly 4 B, 3 C is sat
}

Loading

0 comments on commit 40beae6

Please sign in to comment.