From 8f0c1aa8ebe6e6fb66626f5a380105b480d40faf Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Thu, 10 Aug 2023 14:54:13 +0200 Subject: [PATCH] Rename evaluator to oracle --- tested/configs.py | 2 +- tested/dsl/schema.json | 16 +- tested/dsl/translate_parser.py | 30 ++-- tested/evaluators/__init__.py | 110 ------------ tested/features.py | 6 +- tested/judge/core.py | 4 +- tested/judge/evaluation.py | 10 +- tested/judge/programmed.py | 46 ++--- tested/languages/c/generators.py | 2 +- tested/languages/config.py | 8 +- tested/languages/csharp/config.py | 2 +- tested/languages/generation.py | 12 +- tested/languages/java/config.py | 2 +- tested/languages/javascript/generators.py | 2 +- tested/languages/kotlin/config.py | 2 +- tested/languages/preparation.py | 38 ++-- tested/languages/python/generators.py | 2 +- tested/oracles/__init__.py | 86 +++++++++ tested/{evaluators => oracles}/common.py | 51 +++--- tested/{evaluators => oracles}/exception.py | 19 +- tested/{evaluators => oracles}/exitcode.py | 10 +- tested/{evaluators => oracles}/ignored.py | 15 +- tested/{evaluators => oracles}/nothing.py | 25 +-- tested/{evaluators => oracles}/programmed.py | 49 +++-- tested/{evaluators => oracles}/specific.py | 26 +-- tested/{evaluators => oracles}/text.py | 42 +++-- tested/{evaluators => oracles}/value.py | 32 ++-- tested/testsuite.py | 167 +++++++++++------- tests/test_dsl_yaml.py | 64 +++---- tests/{test_evaluators.py => test_oracles.py} | 114 ++++++------ tests/test_suite.py | 79 +++++++++ 31 files changed, 565 insertions(+), 508 deletions(-) delete mode 100644 tested/evaluators/__init__.py create mode 100644 tested/oracles/__init__.py rename tested/{evaluators => oracles}/common.py (65%) rename tested/{evaluators => oracles}/exception.py (91%) rename tested/{evaluators => oracles}/exitcode.py (85%) rename tested/{evaluators => oracles}/ignored.py (62%) rename tested/{evaluators => oracles}/nothing.py (59%) rename tested/{evaluators => oracles}/programmed.py (83%) rename tested/{evaluators => oracles}/specific.py (75%) rename tested/{evaluators => oracles}/text.py (81%) rename tested/{evaluators => oracles}/value.py (93%) rename tests/{test_evaluators.py => test_oracles.py} (83%) create mode 100644 tests/test_suite.py diff --git a/tested/configs.py b/tested/configs.py index 2e619a6f..fd84ef9b 100644 --- a/tested/configs.py +++ b/tested/configs.py @@ -56,7 +56,7 @@ class Options: """ optimized: bool = True """ - If the custom Python evaluator should be optimized or not. + If the Python oracles should be optimized or not. """ compiler_optimizations: bool = False """ diff --git a/tested/dsl/schema.json b/tested/dsl/schema.json index cbde7921..87965ff8 100644 --- a/tested/dsl/schema.json +++ b/tested/dsl/schema.json @@ -248,7 +248,7 @@ "oneOf" : [ { "properties" : { - "evaluator" : { + "oracle" : { "type" : "string", "enum" : [ "builtin" @@ -258,15 +258,15 @@ }, { "required" : [ - "evaluator", + "oracle", "language", "file" ], "properties" : { - "evaluator" : { + "oracle" : { "type" : "string", "enum" : [ - "custom" + "custom_check" ] }, "language" : { @@ -314,7 +314,7 @@ "oneOf" : [ { "properties" : { - "evaluator" : { + "oracle" : { "type" : "string", "enum" : [ "builtin" @@ -324,15 +324,15 @@ }, { "required" : [ - "evaluator", + "oracle", "language", "file" ], "properties" : { - "evaluator" : { + "oracle" : { "type" : "string", "enum" : [ - "custom" + "custom_check" ] }, "language" : { diff --git a/tested/dsl/translate_parser.py b/tested/dsl/translate_parser.py index 1c217c9b..c15f9242 100644 --- a/tested/dsl/translate_parser.py +++ b/tested/dsl/translate_parser.py @@ -27,16 +27,16 @@ ) from tested.testsuite import ( Context, + CustomCheckOracle, EmptyChannel, EvaluationFunction, ExceptionOutputChannel, ExitCodeOutputChannel, ExpectedException, FileUrl, - GenericTextEvaluator, + GenericTextOracle, MainInput, Output, - ProgrammedEvaluator, Suite, Tab, Testcase, @@ -147,8 +147,8 @@ def _convert_file(link_file: YamlDict) -> FileUrl: return FileUrl(name=link_file["name"], url=link_file["url"]) -def _convert_programmed_evaluator(stream: dict) -> ProgrammedEvaluator: - return ProgrammedEvaluator( +def _convert_custom_check_oracle(stream: dict) -> CustomCheckOracle: + return CustomCheckOracle( language=stream["language"], function=EvaluationFunction( file=stream["file"], name=stream.get("name", "evaluate") @@ -165,23 +165,21 @@ def _convert_text_output_channel( if isinstance(stream, str): data = stream config = config.get(config_name, {}) - return TextOutputChannel( - data=data, evaluator=GenericTextEvaluator(options=config) - ) + return TextOutputChannel(data=data, oracle=GenericTextOracle(options=config)) else: assert isinstance(stream, dict) data = str(stream["data"]) - if "evaluator" not in stream or stream["evaluator"] == "builtin": + if "oracle" not in stream or stream["oracle"] == "builtin": existing_config = config.get(config_name, {}) config = _deepen_config_level(stream, existing_config) return TextOutputChannel( - data=data, evaluator=GenericTextEvaluator(options=config) + data=data, oracle=GenericTextOracle(options=config) ) - elif stream["evaluator"] == "custom": + elif stream["oracle"] == "custom_check": return TextOutputChannel( - data=data, evaluator=_convert_programmed_evaluator(stream) + data=data, oracle=_convert_custom_check_oracle(stream) ) - raise TypeError(f"Unknown text evaluator type: {stream['evaluator']}") + raise TypeError(f"Unknown text oracle type: {stream['oracle']}") def _convert_advanced_value_output_channel(stream: YamlObject) -> ValueOutputChannel: @@ -194,14 +192,14 @@ def _convert_advanced_value_output_channel(stream: YamlObject) -> ValueOutputCha assert isinstance(stream["value"], str) value = parse_string(stream["value"], is_return=True) assert isinstance(value, Value) - if "evaluator" not in stream or stream["evaluator"] == "builtin": + if "oracle" not in stream or stream["oracle"] == "builtin": return ValueOutputChannel(value=value) - elif stream["evaluator"] == "custom": + elif stream["oracle"] == "custom_check": return ValueOutputChannel( value=value, - evaluator=_convert_programmed_evaluator(stream), + oracle=_convert_custom_check_oracle(stream), ) - raise TypeError(f"Unknown value evaluator type: {stream['evaluator']}") + raise TypeError(f"Unknown value oracle type: {stream['oracle']}") def _convert_testcase(testcase: YamlDict, previous_config: dict) -> Testcase: diff --git a/tested/evaluators/__init__.py b/tested/evaluators/__init__.py deleted file mode 100644 index cb9dfb28..00000000 --- a/tested/evaluators/__init__.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -Evaluators actually compare values to determine the result of a test. - -## Implementing an evaluator - -An evaluator is just a function that receives some configuration parameters and -returns a result. - -The following parameters are passed to the function: - -- RawEvaluator configs, consisting of: - - The global configuration for the run of TESTed - - The configuration for the evaluator instance - - The judge instance -- The output channel from the test suite. -- The raw actual output. -- The maximum time for the evaluation. Simple evaluators can ignore this, but more - advanced ones need more time. - -For example, such a function looks like this: - - def evaluate_text(configs, channel, actual): - pass -""" -import functools -from pathlib import Path -from typing import Callable, Optional, Union - -from tested.configs import Bundle -from tested.dodona import Status -from tested.evaluators.common import Evaluator, RawEvaluator, _curry_evaluator -from tested.testsuite import ( - EmptyChannel, - ExceptionBuiltin, - ExitCodeOutputChannel, - GenericExceptionEvaluator, - GenericTextEvaluator, - GenericValueEvaluator, - IgnoredChannel, - NormalOutputChannel, - ProgrammedEvaluator, - SpecialOutputChannel, - SpecificEvaluator, - TextBuiltin, - ValueBuiltin, -) - - -def get_evaluator( - bundle: Bundle, - context_dir: Path, - output: Union[NormalOutputChannel, SpecialOutputChannel], - unexpected_status: Status = Status.WRONG, -) -> Evaluator: - """ - Get the evaluator for a given output channel. - """ - from ..evaluators import ( - exception, - exitcode, - ignored, - nothing, - programmed, - specific, - text, - value, - ) - - currier: Callable[[RawEvaluator, Optional[dict]], Evaluator] = functools.partial( - _curry_evaluator, bundle, context_dir - ) - - # Handle channel states. - if output == EmptyChannel.NONE: - evaluator = functools.partial( - nothing.evaluate, unexpected_status=unexpected_status - ) - return currier(evaluator) - if output == IgnoredChannel.IGNORED: - return currier(ignored.evaluate) - if isinstance(output, ExitCodeOutputChannel): - return currier(exitcode.evaluate) - - assert hasattr(output, "evaluator") - - # Handle actual evaluators. - evaluator = output.evaluator - - # Handle built-in text evaluators - if isinstance(evaluator, GenericTextEvaluator): - if evaluator.name == TextBuiltin.TEXT: - return currier(text.evaluate_text, evaluator.options) - elif evaluator.name == TextBuiltin.FILE: - return currier(text.evaluate_file, evaluator.options) - raise AssertionError("Unknown built-in text evaluator") - # Handle built-in value evaluators - elif isinstance(evaluator, GenericValueEvaluator): - assert evaluator.name == ValueBuiltin.VALUE - return currier(value.evaluate, evaluator.options) - # Handle built-in exception evaluators - elif isinstance(evaluator, GenericExceptionEvaluator): - assert evaluator.name == ExceptionBuiltin.EXCEPTION - return currier(exception.evaluate, evaluator.options) - # Handle programmed evaluators - elif isinstance(evaluator, ProgrammedEvaluator): - return currier(programmed.evaluate) - elif isinstance(evaluator, SpecificEvaluator): - return currier(specific.evaluate) - else: - raise AssertionError(f"Unknown evaluator type: {type(evaluator)}") diff --git a/tested/features.py b/tested/features.py index 0356cce2..a4b7e4df 100644 --- a/tested/features.py +++ b/tested/features.py @@ -148,16 +148,16 @@ def is_supported(language: "Language") -> bool: _logger.warning(f"Test suite requires unsupported type {t}") return False - # Check language-specific evaluators + # Check language-specific oracles for tab in language.config.suite.tabs: assert tab.contexts is not None for context in tab.contexts: for testcase in context.testcases: - languages = testcase.output.get_specific_eval_languages() + languages = testcase.output.get_specific_oracle_languages() if languages is not None: if language.config.dodona.programming_language not in languages: _logger.warning( - f"Specific evaluators are available only in " + f"Language-specific oracles are available only in " f"{languages}!" ) return False diff --git a/tested/judge/core.py b/tested/judge/core.py index b3e52646..82c2f133 100644 --- a/tested/judge/core.py +++ b/tested/judge/core.py @@ -334,10 +334,10 @@ def _generate_files( execution_unit=unit, execution_name=exec_name, ) - # Copy evaluators to the directory. + # Copy functions to the directory. for evaluator in evaluators: source = Path(bundle.config.resources) / evaluator - _logger.debug("Copying evaluator from %s to %s", source, common_dir) + _logger.debug("Copying oracle from %s to %s", source, common_dir) shutil.copy2(source, common_dir) dependencies.extend(evaluators) dependencies.append(generated) diff --git a/tested/judge/evaluation.py b/tested/judge/evaluation.py index 74173923..7ab890ea 100644 --- a/tested/judge/evaluation.py +++ b/tested/judge/evaluation.py @@ -25,7 +25,6 @@ StatusMessage, Update, ) -from tested.evaluators import get_evaluator from tested.internationalization import get_i18n_string from tested.judge.collector import OutputManager, TestcaseCollector from tested.judge.execution import ContextResult @@ -34,6 +33,7 @@ generate_statement, get_readable_input, ) +from tested.oracles import get_oracle from tested.testsuite import ( Context, ExceptionOutput, @@ -103,10 +103,10 @@ def _evaluate_channel( :return: True if successful, otherwise False. """ - evaluator = get_evaluator( + evaluator = get_oracle( bundle, context_directory, output, unexpected_status=unexpected_status ) - # Run the evaluator. + # Run the oracle. evaluation_result = evaluator(output, actual if actual else "") status = evaluation_result.result @@ -260,7 +260,7 @@ def evaluate_context_results( inlined_files = inlined_files.union(seen) t_col = TestcaseCollector(StartTestcase(description=readable_input)) - # Get the evaluators + # Get the functions output = testcase.output # Get the values produced by the execution. If there are no values, @@ -420,7 +420,7 @@ def should_show(test: OutputChannel, channel: Channel) -> bool: def guess_expected_value(bundle: Bundle, test: OutputChannel) -> str: """ Try and get the expected value for an output channel. In some cases, such as - a programmed or language specific evaluator, there will be no expected value + a programmed or language specific oracle, there will be no expected value available in the test suite. In that case, we use an empty string. :param bundle: Configuration bundle. diff --git a/tested/judge/programmed.py b/tested/judge/programmed.py index 7d7ea9d2..c5167dd0 100644 --- a/tested/judge/programmed.py +++ b/tested/judge/programmed.py @@ -25,7 +25,7 @@ generate_statement, ) from tested.serialisation import BooleanEvalResult, EvalResult, Value -from tested.testsuite import ProgrammedEvaluator +from tested.testsuite import CustomCheckOracle from tested.utils import get_identifier _logger = logging.getLogger(__name__) @@ -33,13 +33,13 @@ def evaluate_programmed( bundle: Bundle, - evaluator: ProgrammedEvaluator, + evaluator: CustomCheckOracle, expected: Value, actual: Value, ) -> Union[BaseExecutionResult, EvalResult]: """ Run the custom evaluation. Concerning structure and execution, the custom - evaluator is very similar to the execution of the whole evaluation. It a + oracle is very similar to the execution of the whole evaluation. It a mini-evaluation if you will. """ @@ -54,7 +54,7 @@ def evaluate_programmed( def _evaluate_others( bundle: Bundle, - evaluator: ProgrammedEvaluator, + evaluator: CustomCheckOracle, expected: Value, actual: Value, ) -> BaseExecutionResult: @@ -64,18 +64,18 @@ def _evaluate_others( """ _logger.debug("Doing evaluation in non-Python mode.") - # Create a directory for this evaluator. If one exists, delete it first. + # Create a directory for this oracle. If one exists, delete it first. evaluator_dir_name = evaluator.function.file.stem custom_directory_name = f"{get_identifier()}_{evaluator_dir_name}" - custom_path = Path(bundle.config.workdir, "evaluators", custom_directory_name) + custom_path = Path(bundle.config.workdir, "functions", custom_directory_name) if custom_path.exists(): - _logger.debug("Removing existing directory for custom evaluator.") + _logger.debug("Removing existing directory for custom oracle.") shutil.rmtree(custom_path, ignore_errors=True) custom_path.mkdir(parents=True) _logger.info("Will do custom evaluation in %s", custom_path) - # Create a configs bundle for the language of the evaluator. + # Create a configs bundle for the language of the oracle. eval_bundle = create_bundle( bundle.config, bundle.out, bundle.suite, evaluator.language ) @@ -97,7 +97,7 @@ def _evaluate_others( memory=False, ) - # Copy the evaluator + # Copy the oracle origin_path = Path(bundle.config.resources, evaluator.function.file) _logger.debug("Copying %s to %s", origin_path, custom_path) shutil.copy2(origin_path, custom_path) @@ -106,11 +106,11 @@ def _evaluate_others( dependencies = eval_bundle.lang_config.initial_dependencies() origin = eval_bundle.lang_config.path_to_dependencies() copy_from_paths_to_path(origin, dependencies, custom_path) - # Include the actual evaluator in the dependencies. + # Include the actual oracle in the dependencies. dependencies.append(evaluator.function.file.name) - # Generate the evaluator. - _logger.debug("Generating custom evaluator.") + # Generate the oracle. + _logger.debug("Generating custom oracle.") evaluator_name = generate_custom_evaluator( eval_bundle, destination=custom_path, @@ -119,17 +119,17 @@ def _evaluate_others( actual_value=actual, ) dependencies.append(evaluator_name) - _logger.debug("Generated evaluator executor %s", evaluator_name) + _logger.debug("Generated oracle executor %s", evaluator_name) # Do compilation for those configs that require it. command, files = eval_bundle.lang_config.compilation(dependencies) - _logger.debug("Compiling custom evaluator with command %s", command) + _logger.debug("Compiling custom oracle with command %s", command) result = run_command(custom_path, None, command) if result and result.stderr: raise ValueError("Error while compiling specific test case:" + result.stderr) files = filter_files(files, custom_path) - # Execute the custom evaluator. + # Execute the custom oracle. evaluator_name = Path(evaluator_name).stem files = eval_bundle.lang_config.filter_dependencies(files, evaluator_name) @@ -190,7 +190,7 @@ class _EvaluationResult: def _evaluate_python( bundle: Bundle, - evaluator: ProgrammedEvaluator, + evaluator: CustomCheckOracle, expected: Value, actual: Value, ) -> EvalResult: @@ -201,30 +201,30 @@ def _evaluate_python( assert evaluator.language == "python" _logger.debug("Doing evaluation in Python mode.") - # Create a configs bundle for the language of the evaluator. + # Create a configs bundle for the language of the oracle. eval_bundle = create_bundle( bundle.config, bundle.out, bundle.suite, evaluator.language ) - # Path to the evaluator. + # Path to the oracle. origin_path = Path(bundle.config.resources, evaluator.function.file) - # Read evaluator to file. + # Read oracle to file. with open(origin_path, "r") as file: evaluator_code = file.read() # We must provide the globals from the "evaluation_utils" to the code. # Begin by defining the module. utils = types.ModuleType("evaluation_utils") - utils.__dict__["EvaluationResult"] = _EvaluationResult + utils.__dict__["OracleResult"] = _EvaluationResult utils.__dict__["Message"] = ExtendedMessage # The context in which to execute. global_env = {"__tested_test__": utils} exec("import sys\n" "sys.modules['evaluation_utils'] = __tested_test__", global_env) - # Make the evaluator available. + # Make the oracle available. exec(evaluator_code, global_env) - # Call the evaluator. + # Call the oracle. literal_expected = generate_statement(eval_bundle, expected) literal_actual = generate_statement(eval_bundle, actual) arguments = custom_evaluator_arguments(evaluator) @@ -265,7 +265,7 @@ def _evaluate_python( result_ = cast(_EvaluationResult | None, global_env["__tested_test__result"]) - # If the result is None, the evaluator is broken. + # If the result is None, the oracle is broken. if result_ is None: messages.append( ExtendedMessage( diff --git a/tested/languages/c/generators.py b/tested/languages/c/generators.py index f12fc035..ebc9fd02 100644 --- a/tested/languages/c/generators.py +++ b/tested/languages/c/generators.py @@ -201,7 +201,7 @@ def convert_execution_unit(pu: PreparedExecutionUnit) -> str: #include "{pu.submission_name}.c" """ - # Import evaluators + # Import functions for name in pu.evaluator_names: result += f'#include "{name}.c"\n' diff --git a/tested/languages/config.py b/tested/languages/config.py index 6e0a819e..303c63af 100644 --- a/tested/languages/config.py +++ b/tested/languages/config.py @@ -289,13 +289,13 @@ def modify_solution(self, solution: Path): def modify_specific_evaluator(self, evaluator: Path): """ - An opportunity to modify the language specific evaluator. By default, - this does nothing. If you modify the evaluator, you must overwrite the - contents of the evaluator in-place. + An opportunity to modify the language specific oracle. By default, + this does nothing. If you modify the oracle, you must overwrite the + contents of the oracle in-place. This callback is called before any compilation. - :param evaluator: Path to the evaluator and path for the modified evaluator. + :param evaluator: Path to the oracle and path for the modified oracle. """ pass diff --git a/tested/languages/csharp/config.py b/tested/languages/csharp/config.py index f24dd606..bb68bf88 100644 --- a/tested/languages/csharp/config.py +++ b/tested/languages/csharp/config.py @@ -34,7 +34,7 @@ class CSharp(Language): def initial_dependencies(self) -> List[str]: - return ["dotnet.csproj", "Values.cs", "EvaluationResult.cs"] + return ["dotnet.csproj", "Values.cs", "OracleResult.cs"] def needs_selector(self): return True diff --git a/tested/languages/generation.py b/tested/languages/generation.py index cad7b85b..85ba3e37 100644 --- a/tested/languages/generation.py +++ b/tested/languages/generation.py @@ -43,9 +43,9 @@ ) from tested.testsuite import ( Context, + CustomCheckOracle, FileUrl, MainInput, - ProgrammedEvaluator, Testcase, TextData, ) @@ -276,7 +276,7 @@ def generate_execution( :param execution_name: The name of the execution module. :return: The name of the generated file in the given destination and a set - of evaluator names that will also be needed. + of oracle names that will also be needed. """ prepared_execution = prepare_execution_unit( bundle, destination, execution_name, execution_unit @@ -319,23 +319,23 @@ def generate_selector( return selector_filename -def custom_evaluator_arguments(evaluator: ProgrammedEvaluator) -> Value: +def custom_evaluator_arguments(evaluator: CustomCheckOracle) -> Value: return SequenceType(type=BasicSequenceTypes.SEQUENCE, data=evaluator.arguments) def generate_custom_evaluator( bundle: Bundle, destination: Path, - evaluator: ProgrammedEvaluator, + evaluator: CustomCheckOracle, expected_value: Value, actual_value: Value, ) -> str: """ - Generate the code for running a programmed evaluator. + Generate the code for running a programmed oracle. :param bundle: The configuration bundle. :param destination: The folder where the code should be generated. - :param evaluator: The evaluator data from the test suite. + :param evaluator: The oracle data from the test suite. :param expected_value: The preprocessed expected value. :param actual_value: The preprocessed actual value. diff --git a/tested/languages/java/config.py b/tested/languages/java/config.py index 243cefd8..9b35b433 100644 --- a/tested/languages/java/config.py +++ b/tested/languages/java/config.py @@ -28,7 +28,7 @@ class Java(Language): def initial_dependencies(self) -> List[str]: - return ["Values.java", "EvaluationResult.java"] + return ["Values.java", "OracleResult.java"] def needs_selector(self): return True diff --git a/tested/languages/javascript/generators.py b/tested/languages/javascript/generators.py index 73c17086..c7498714 100644 --- a/tested/languages/javascript/generators.py +++ b/tested/languages/javascript/generators.py @@ -188,7 +188,7 @@ def convert_execution_unit(pu: PreparedExecutionUnit) -> str: const values = require("./values.js"); """ - # Import the language specific evaluators we will need. + # Import the language specific functions we will need. for name in pu.evaluator_names: result += f'const {name} = require("./{name}.js");\n' diff --git a/tested/languages/kotlin/config.py b/tested/languages/kotlin/config.py index fa97497c..62fcb323 100644 --- a/tested/languages/kotlin/config.py +++ b/tested/languages/kotlin/config.py @@ -37,7 +37,7 @@ def get_executable(name): class Kotlin(Language): def initial_dependencies(self) -> List[str]: - return ["Values.kt", "EvaluationResult.kt"] + return ["Values.kt", "OracleResult.kt"] def needs_selector(self): return True diff --git a/tested/languages/preparation.py b/tested/languages/preparation.py index de44905b..76192608 100644 --- a/tested/languages/preparation.py +++ b/tested/languages/preparation.py @@ -35,11 +35,11 @@ from tested.testsuite import ( Context, EmptyChannel, - EvaluatorOutputChannel, ExceptionOutput, IgnoredChannel, + LanguageSpecificOracle, MainInput, - SpecificEvaluator, + OracleOutputChannel, Testcase, TextData, ValueOutput, @@ -157,7 +157,7 @@ class PreparedExecutionUnit: testcase_separator_secret: str "Secret for use in the testcase separator." evaluator_names: Set[str] - "The names of the language-specific evaluators we will need." + "The names of the language-specific functions we will need." def prepare_argument( @@ -245,13 +245,13 @@ def _create_handling_function( Create a function to handle the result of a return value or an exception. There are two possibilities: - - There is a language-specific evaluator. In that case, we wrap the value in - a function call to the evaluator, and then send off the result. An example of + - There is a language-specific oracle. In that case, we wrap the value in + a function call to the oracle, and then send off the result. An example of the result: send_evaluated(evaluate(value)) - - There is no language-specific evaluator. In that case, we just send off the + - There is no language-specific oracle. In that case, we just send off the value directly. An example of the result: send_value(value) @@ -260,21 +260,21 @@ def _create_handling_function( :param send_evaluated: The name of the function that will handle sending the result of an evaluation. :param send_value: The name of the function that will handle sending the value. - :param output: The evaluator. - :return: A tuple containing the call and the name of the evaluator if present. + :param output: The oracle. + :return: A tuple containing the call and the name of the oracle if present. """ lang_config = bundle.lang_config - if isinstance(output, EvaluatorOutputChannel) and isinstance( - output.evaluator, SpecificEvaluator + if isinstance(output, OracleOutputChannel) and isinstance( + output.oracle, LanguageSpecificOracle ): - evaluator = output.evaluator.for_language(bundle.config.programming_language) + evaluator = output.oracle.for_language(bundle.config.programming_language) evaluator_name = conventionalize_namespace(lang_config, evaluator.file.stem) else: evaluator_name = None def generator(expression: Expression) -> Statement: - if isinstance(output, EvaluatorOutputChannel) and isinstance( - output.evaluator, SpecificEvaluator + if isinstance(output, OracleOutputChannel) and isinstance( + output.oracle, LanguageSpecificOracle ): arguments = [ PreparedFunctionCall( @@ -312,9 +312,9 @@ def _create_exception_function( :param bundle: The configuration bundle. :param testcase: The testcase to create the function for. - :return: The function and optionally the name of the evaluator file. + :return: The function and optionally the name of the oracle file. """ - # If we have a regular testcase, handle special evaluators. + # If we have a regular testcase, handle special functions. exception_channel = testcase.output.exception return _create_handling_function( @@ -334,8 +334,8 @@ def prepare_testcase( :param bundle: The configuration bundle. :param testcase: The testcase to prepare. - :return: Arguments containing the preparation results and the evaluator name or - None if no language-specific evaluator is needed. + :return: Arguments containing the preparation results and the oracle name or + None if no language-specific oracle is needed. """ names = [] @@ -413,7 +413,7 @@ def prepare_context( :param context: The context to prepare :return: The prepared context arguments and a set - of evaluator names. + of oracle names. """ language = bundle.config.programming_language resources = bundle.config.resources @@ -473,7 +473,7 @@ def prepare_execution_unit( :param execution_name: The name of the execution module. :return: The name of the generated file in the given destination and a set - of evaluator names that will also be needed. + of oracle names that will also be needed. """ evaluator_names = set() contexts = [] diff --git a/tested/languages/python/generators.py b/tested/languages/python/generators.py index baafb9d9..f391b933 100644 --- a/tested/languages/python/generators.py +++ b/tested/languages/python/generators.py @@ -144,7 +144,7 @@ def convert_execution_unit(pu: PreparedExecutionUnit) -> str: from decimal import Decimal """ - # Import the language specific evaluators we will need. + # Import the language specific functions we will need. for name in pu.evaluator_names: result += f"import {name}\n" diff --git a/tested/oracles/__init__.py b/tested/oracles/__init__.py new file mode 100644 index 00000000..c777d7cc --- /dev/null +++ b/tested/oracles/__init__.py @@ -0,0 +1,86 @@ +import functools +from pathlib import Path +from typing import Callable, Optional, Union + +from tested.configs import Bundle +from tested.dodona import Status +from tested.oracles.common import Oracle, RawOracle, _curry_oracle +from tested.testsuite import ( + CustomCheckOracle, + EmptyChannel, + ExceptionBuiltin, + ExitCodeOutputChannel, + GenericExceptionOracle, + GenericTextOracle, + GenericValueOracle, + IgnoredChannel, + LanguageSpecificOracle, + NormalOutputChannel, + SpecialOutputChannel, + TextBuiltin, + ValueBuiltin, +) + + +def get_oracle( + bundle: Bundle, + context_dir: Path, + output: Union[NormalOutputChannel, SpecialOutputChannel], + unexpected_status: Status = Status.WRONG, +) -> Oracle: + """ + Get the oracle for a given output channel. + """ + from ..oracles import ( + exception, + exitcode, + ignored, + nothing, + programmed, + specific, + text, + value, + ) + + currier: Callable[[RawOracle, Optional[dict]], Oracle] = functools.partial( + _curry_oracle, bundle, context_dir + ) + + # Handle channel states. + if output == EmptyChannel.NONE: + oracle = functools.partial( + nothing.evaluate, unexpected_status=unexpected_status + ) + return currier(oracle) + if output == IgnoredChannel.IGNORED: + return currier(ignored.evaluate) + if isinstance(output, ExitCodeOutputChannel): + return currier(exitcode.evaluate) + + assert hasattr(output, "oracle") + + # Handle actual functions. + oracle = output.oracle + + # Handle built-in text functions + if isinstance(oracle, GenericTextOracle): + if oracle.name == TextBuiltin.TEXT: + return currier(text.evaluate_text, oracle.options) + elif oracle.name == TextBuiltin.FILE: + return currier(text.evaluate_file, oracle.options) + raise AssertionError("Unknown built-in text oracle") + # Handle built-in value functions + elif isinstance(oracle, GenericValueOracle): + assert oracle.name == ValueBuiltin.VALUE + return currier(value.evaluate, oracle.options) + # Handle built-in exception functions + elif isinstance(oracle, GenericExceptionOracle): + assert oracle.name == ExceptionBuiltin.EXCEPTION + return currier(exception.evaluate, oracle.options) + # Handle programmed functions + elif isinstance(oracle, CustomCheckOracle): + return currier(programmed.evaluate) + elif isinstance(oracle, LanguageSpecificOracle): + return currier(specific.evaluate) + else: + raise AssertionError(f"Unknown oracle type: {type(oracle)}") diff --git a/tested/evaluators/common.py b/tested/oracles/common.py similarity index 65% rename from tested/evaluators/common.py rename to tested/oracles/common.py index 895e66fb..039809eb 100644 --- a/tested/evaluators/common.py +++ b/tested/oracles/common.py @@ -1,20 +1,20 @@ """ -Evaluators actually compare values to determine the result of a test. +Oracles compare values to determine the result of a test. -## Implementing an evaluator +## Implementing oracles -An evaluator is just a function that receives some configuration parameters and +An oracle is just a function that receives some configuration parameters and returns a result. The following parameters are passed to the function: -- RawEvaluator configs, consisting of: +- RawOracle configs, consisting of: - The global configuration for the run of TESTed - - The configuration for the evaluator instance + - The configuration for the oracle instance - The judge instance - The output channel from the test suite. - The raw actual output. -- The maximum time for the evaluation. Simple evaluators can ignore this, but more +- The maximum time for the oracle. Simple oracles can ignore this, but more advanced ones need more time. For example, such a function looks like this: @@ -37,45 +37,40 @@ def evaluate_text(configs, channel, actual): @dataclass -class EvaluationResult: - """Provides the result of an evaluation for a specific output channel.""" - - result: StatusMessage # The result of the evaluation. - readable_expected: str - """ - A human-friendly version of what the channel should have been. - """ - readable_actual: str +class OracleResult: """ - A human-friendly version (on a best-efforts basis) of what the channel is. + Represents the result of applying an oracle to evaluate some result. """ + + result: StatusMessage # The result of the evaluation. + readable_expected: str # A human-friendly version of what the channel should have been. + readable_actual: str # A human-friendly version (on a best-efforts basis) of what the channel is. messages: List[Message] = field(default_factory=list) - is_multiline_string: bool = False - """ - Indicates if the evaluation result is a multiline string - """ + is_multiline_string: bool = ( + False # Indicates if the evaluation result is a multiline string. + ) -class EvaluatorConfig(NamedTuple): +class OracleConfig(NamedTuple): bundle: Bundle options: Dict[str, Any] context_dir: Path -RawEvaluator = Callable[[EvaluatorConfig, OutputChannel, str], EvaluationResult] +RawOracle = Callable[[OracleConfig, OutputChannel, str], OracleResult] -Evaluator = Callable[[OutputChannel, str], EvaluationResult] +Oracle = Callable[[OutputChannel, str], OracleResult] -def _curry_evaluator( +def _curry_oracle( bundle: Bundle, context_dir: Path, - function: RawEvaluator, + function: RawOracle, options: Optional[dict] = None, -) -> Evaluator: +) -> Oracle: if options is None: options = dict() - config = EvaluatorConfig(bundle, options, context_dir) + config = OracleConfig(bundle, options, context_dir) # noinspection PyTypeChecker return functools.partial(function, config) @@ -93,7 +88,7 @@ def try_outputs( def cleanup_specific_programmed( - config: EvaluatorConfig, channel: NormalOutputChannel, actual: EvalResult + config: OracleConfig, channel: NormalOutputChannel, actual: EvalResult ) -> EvalResult: if isinstance(channel, ExceptionOutputChannel): lang_config = config.bundle.lang_config diff --git a/tested/evaluators/exception.py b/tested/oracles/exception.py similarity index 91% rename from tested/evaluators/exception.py rename to tested/oracles/exception.py index ae1348b7..f78d071f 100644 --- a/tested/evaluators/exception.py +++ b/tested/oracles/exception.py @@ -1,15 +1,12 @@ -""" -Exception evaluator. -""" import logging from typing import Optional, Tuple from pydantic import BaseModel from tested.dodona import ExtendedMessage, Permission, Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig from tested.internationalization import get_i18n_string from tested.languages.utils import convert_stacktrace_to_clickable_feedback +from tested.oracles.common import OracleConfig, OracleResult from tested.serialisation import ExceptionValue from tested.testsuite import ExceptionOutputChannel, OutputChannel @@ -20,14 +17,14 @@ class _ExceptionValue(BaseModel): __root__: ExceptionValue -def try_as_exception(config: EvaluatorConfig, value: str) -> ExceptionValue: +def try_as_exception(config: OracleConfig, value: str) -> ExceptionValue: actual = _ExceptionValue.parse_raw(value).__root__ actual.stacktrace = config.bundle.lang_config.cleanup_stacktrace(actual.stacktrace) return actual def try_as_readable_exception( - config: EvaluatorConfig, value: str + config: OracleConfig, value: str ) -> Tuple[Optional[str], Optional[ExtendedMessage]]: try: actual = _ExceptionValue.parse_raw(value).__root__ @@ -45,8 +42,8 @@ def try_as_readable_exception( def evaluate( - config: EvaluatorConfig, channel: OutputChannel, actual_str: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual_str: str +) -> OracleResult: """ Evaluate an exception. @@ -64,7 +61,7 @@ def evaluate( readable_expected = expected.readable(language) if not actual_str: - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.WRONG), readable_expected=readable_expected, readable_actual="", @@ -82,7 +79,7 @@ def evaluate( permission=Permission.STAFF, ) student_message = get_i18n_string("evaluators.exception.student") - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.INTERNAL_ERROR, human=get_i18n_string("evaluators.exception.status"), @@ -124,7 +121,7 @@ def evaluate( get_i18n_string(message, actual_type=(actual.type or actual.message)) ) - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=status), readable_expected=readable_expected, readable_actual=actual.readable( diff --git a/tested/evaluators/exitcode.py b/tested/oracles/exitcode.py similarity index 85% rename from tested/evaluators/exitcode.py rename to tested/oracles/exitcode.py index 65ed5d97..cc137c89 100644 --- a/tested/evaluators/exitcode.py +++ b/tested/oracles/exitcode.py @@ -2,8 +2,8 @@ from typing import Optional from tested.dodona import Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig from tested.internationalization import get_i18n_string +from tested.oracles.common import OracleConfig, OracleResult from tested.testsuite import ExitCodeOutputChannel, OutputChannel logger = logging.getLogger(__name__) @@ -16,14 +16,12 @@ def _as_int(value: str) -> Optional[int]: return None -def evaluate( - _config: EvaluatorConfig, channel: OutputChannel, value: str -) -> EvaluationResult: +def evaluate(_config: OracleConfig, channel: OutputChannel, value: str) -> OracleResult: assert isinstance(channel, ExitCodeOutputChannel) exit_code = _as_int(value) if exit_code is None: - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.WRONG, human=get_i18n_string( @@ -46,7 +44,7 @@ def evaluate( else: status = StatusMessage(enum=Status.CORRECT) - return EvaluationResult( + return OracleResult( result=status, readable_expected=str(expected_exit_code), readable_actual=str(exit_code), diff --git a/tested/evaluators/ignored.py b/tested/oracles/ignored.py similarity index 62% rename from tested/evaluators/ignored.py rename to tested/oracles/ignored.py index 0ce35a59..e1d46def 100644 --- a/tested/evaluators/ignored.py +++ b/tested/oracles/ignored.py @@ -1,18 +1,13 @@ -""" -RawEvaluator for ignored channels. -""" import functools from tested.dodona import Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig, try_outputs -from tested.evaluators.exception import try_as_readable_exception -from tested.evaluators.value import try_as_readable_value +from tested.oracles.common import OracleConfig, OracleResult, try_outputs +from tested.oracles.exception import try_as_readable_exception +from tested.oracles.value import try_as_readable_value from tested.testsuite import IgnoredChannel, OutputChannel -def evaluate( - config: EvaluatorConfig, channel: OutputChannel, actual: str -) -> EvaluationResult: +def evaluate(config: OracleConfig, channel: OutputChannel, actual: str) -> OracleResult: assert isinstance(channel, IgnoredChannel) # If there is something in the channel, try parsing it as @@ -24,7 +19,7 @@ def evaluate( actual, msg = try_outputs(actual, parsers) messages = [msg] if msg else [] - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.CORRECT), readable_expected="", readable_actual=actual, diff --git a/tested/evaluators/nothing.py b/tested/oracles/nothing.py similarity index 59% rename from tested/evaluators/nothing.py rename to tested/oracles/nothing.py index 8545d132..e7743976 100644 --- a/tested/evaluators/nothing.py +++ b/tested/oracles/nothing.py @@ -1,22 +1,19 @@ -""" -RawEvaluator for channels without output. -""" import functools from tested.dodona import Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig, try_outputs -from tested.evaluators.exception import try_as_readable_exception -from tested.evaluators.value import try_as_readable_value from tested.internationalization import get_i18n_string +from tested.oracles.common import OracleConfig, OracleResult, try_outputs +from tested.oracles.exception import try_as_readable_exception +from tested.oracles.value import try_as_readable_value from tested.testsuite import EmptyChannel, OutputChannel def evaluate( - config: EvaluatorConfig, + config: OracleConfig, channel: OutputChannel, actual: str, unexpected_status: Status = Status.WRONG, -) -> EvaluationResult: +) -> OracleResult: assert isinstance(channel, EmptyChannel) messages = [] @@ -28,20 +25,14 @@ def evaluate( actual, msg = try_outputs(actual, parsers) if msg: messages.append(msg) + error = "runtime" if unexpected_status == Status.RUNTIME_ERROR else "unexpected" result = StatusMessage( enum=unexpected_status, - human=get_i18n_string( - "evaluators.nothing." - + ( - "runtime" - if unexpected_status == Status.RUNTIME_ERROR - else "unexpected" - ) - ), + human=get_i18n_string(f"evaluators.nothing.{error}"), ) else: result = StatusMessage(enum=Status.CORRECT) - return EvaluationResult( + return OracleResult( result=result, readable_expected="", readable_actual=actual, messages=messages ) diff --git a/tested/evaluators/programmed.py b/tested/oracles/programmed.py similarity index 83% rename from tested/evaluators/programmed.py rename to tested/oracles/programmed.py index 119fac7e..1e0be6f5 100644 --- a/tested/evaluators/programmed.py +++ b/tested/oracles/programmed.py @@ -1,23 +1,20 @@ -""" -Programmed evaluator. -""" import logging import traceback from typing import List, Optional from tested.datatypes import BasicStringTypes from tested.dodona import ExtendedMessage, Message, Permission, Status, StatusMessage -from tested.evaluators.common import ( - EvaluationResult, - EvaluatorConfig, - cleanup_specific_programmed, -) -from tested.evaluators.value import get_values from tested.internationalization import get_i18n_string from tested.judge.programmed import evaluate_programmed from tested.judge.utils import BaseExecutionResult +from tested.oracles.common import ( + OracleConfig, + OracleResult, + cleanup_specific_programmed, +) +from tested.oracles.value import get_values from tested.serialisation import BooleanEvalResult, EvalResult, StringType, Value -from tested.testsuite import EvaluatorOutputChannel, OutputChannel, ProgrammedEvaluator +from tested.testsuite import CustomCheckOracle, OracleOutputChannel, OutputChannel _logger = logging.getLogger(__name__) @@ -36,31 +33,31 @@ def _try_specific(value_: str) -> EvalResult: def evaluate( - config: EvaluatorConfig, channel: OutputChannel, actual_str: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual_str: str +) -> OracleResult: """ - Evaluate using a programmed evaluator. This evaluator is unique, in that it is - also responsible for running the evaluator (all other evaluators don't do that). + Evaluate using a programmed oracle. This oracle is unique, in that it is + also responsible for running the oracle (all other functions don't do that). """ - assert isinstance(channel, EvaluatorOutputChannel) - assert isinstance(channel.evaluator, ProgrammedEvaluator) + assert isinstance(channel, OracleOutputChannel) + assert isinstance(channel.oracle, CustomCheckOracle) - _logger.debug(f"Programmed evaluator for output {actual_str}") + _logger.debug(f"Programmed oracle for output {actual_str}") # Convert the expected item to a Value, which is then passed to the - # evaluator for evaluation. + # oracle for evaluation. # This is slightly tricky, since the actual value must also be converted # to a value, and we are not yet sure what the actual value is exactly result = get_values(config.bundle, channel, actual_str or "") # TODO: why is this? - if isinstance(result, EvaluationResult): + if isinstance(result, OracleResult): return result else: expected, readable_expected, actual, readable_actual = result # If there is no actual result, stop early. if actual is None: - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.WRONG), readable_expected=readable_expected, readable_actual=readable_actual, @@ -72,19 +69,19 @@ def evaluate( f"actual: {actual}" ) result = evaluate_programmed( - config.bundle, evaluator=channel.evaluator, expected=expected, actual=actual + config.bundle, evaluator=channel.oracle, expected=expected, actual=actual ) if isinstance(result, BaseExecutionResult): if result.timeout: - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.TIME_LIMIT_EXCEEDED), readable_expected=readable_expected, readable_actual=readable_actual, messages=[result.stdout, result.stderr], ) if result.memory: - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.MEMORY_LIMIT_EXCEEDED), readable_expected=readable_expected, readable_actual=readable_actual, @@ -94,7 +91,7 @@ def evaluate( if not result.stdout: stdout = ExtendedMessage(description=result.stdout, format="text") stderr = ExtendedMessage(description=result.stderr, format="text") - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.INTERNAL_ERROR), readable_expected=readable_expected, readable_actual=readable_actual, @@ -136,7 +133,7 @@ def evaluate( permission=Permission.STAFF, ), ] - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.INTERNAL_ERROR), readable_expected=readable_expected, readable_actual=readable_actual, @@ -169,7 +166,7 @@ def evaluate( ), ) - return EvaluationResult( + return OracleResult( result=result_status, readable_expected=cleaned.readable_expected or "", readable_actual=cleaned.readable_actual or "", diff --git a/tested/evaluators/specific.py b/tested/oracles/specific.py similarity index 75% rename from tested/evaluators/specific.py rename to tested/oracles/specific.py index 9495f909..1e947c3a 100644 --- a/tested/evaluators/specific.py +++ b/tested/oracles/specific.py @@ -3,28 +3,28 @@ """ from tested.dodona import ExtendedMessage, Permission, Status, StatusMessage -from tested.evaluators.common import ( - EvaluationResult, - EvaluatorConfig, +from tested.internationalization import get_i18n_string +from tested.oracles.common import ( + OracleConfig, + OracleResult, cleanup_specific_programmed, ) -from tested.internationalization import get_i18n_string from tested.serialisation import BooleanEvalResult -from tested.testsuite import EvaluatorOutputChannel, OutputChannel, SpecificEvaluator +from tested.testsuite import LanguageSpecificOracle, OracleOutputChannel, OutputChannel def evaluate( - config: EvaluatorConfig, channel: OutputChannel, actual_str: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual_str: str +) -> OracleResult: """ - Compare the result of a specific evaluator. This evaluator has no options. + Compare the result of a specific oracle. This oracle has no options. """ - assert isinstance(channel, EvaluatorOutputChannel) - assert isinstance(channel.evaluator, SpecificEvaluator) + assert isinstance(channel, OracleOutputChannel) + assert isinstance(channel.oracle, LanguageSpecificOracle) # Special support for no values to have a better error message. if actual_str == "": - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.WRONG, human=get_i18n_string("evaluators.specific.missing.status"), @@ -45,7 +45,7 @@ def evaluate( permission=Permission.STAFF, ) student_message = get_i18n_string("evaluators.specific.student.default") - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.INTERNAL_ERROR, human=get_i18n_string("evaluators.specific.status"), @@ -57,7 +57,7 @@ def evaluate( actual = cleanup_specific_programmed(config, channel, actual) - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=actual.result), readable_expected=actual.readable_expected or "", readable_actual=actual.readable_actual or "", diff --git a/tested/evaluators/text.py b/tested/oracles/text.py similarity index 81% rename from tested/evaluators/text.py rename to tested/oracles/text.py index 61527b15..0e62c831 100644 --- a/tested/evaluators/text.py +++ b/tested/oracles/text.py @@ -5,8 +5,8 @@ from typing import Any, Dict, Optional from tested.dodona import Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig from tested.internationalization import get_i18n_string +from tested.oracles.common import OracleConfig, OracleResult from tested.testsuite import FileOutputChannel, OutputChannel, TextOutputChannel @@ -17,7 +17,7 @@ def _is_number(string: str) -> Optional[float]: return None -def _text_options(config: EvaluatorConfig) -> dict: +def _text_options(config: OracleConfig) -> dict: defaults = { # Options for textual comparison "ignoreWhitespace": True, @@ -31,17 +31,15 @@ def _text_options(config: EvaluatorConfig) -> dict: return defaults -def _file_defaults(config: EvaluatorConfig) -> dict: +def _file_defaults(config: OracleConfig) -> dict: defaults = {"mode": "exact"} defaults.update(config.options) - if defaults["mode"] not in {"exact", "lines", "values"}: - raise ValueError(f"Unknown mode for file evaluator: {defaults['mode']}") + if defaults["mode"] not in ("exact", "lines", "values"): + raise ValueError(f"Unknown mode for file oracle: {defaults['mode']}") return defaults -def compare_text( - options: Dict[str, Any], expected: str, actual: str -) -> EvaluationResult: +def compare_text(options: Dict[str, Any], expected: str, actual: str) -> OracleResult: # Temporary variables that may modified by the evaluation options, # Don't modify the actual values, otherwise there maybe confusion with the # solution submitted by the student @@ -69,7 +67,7 @@ def compare_text( else: result = actual_eval == expected_eval - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.CORRECT if result else Status.WRONG), readable_expected=str(expected), readable_actual=str(actual), @@ -77,10 +75,10 @@ def compare_text( def evaluate_text( - config: EvaluatorConfig, channel: OutputChannel, actual: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual: str +) -> OracleResult: """ - The base evaluator, used to compare two strings. As this evaluator is + The base oracle, used to compare two strings. As this oracle is intended for evaluating stdout, it supports various options to make life easier: @@ -101,21 +99,21 @@ def evaluate_text( def evaluate_file( - config: EvaluatorConfig, channel: OutputChannel, actual: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual: str +) -> OracleResult: """ - Evaluate the contents of two files. The file evaluator supports one option, - ``mode``, used to define in which mode the evaluator should operate: + Evaluate the contents of two files. The file oracle supports one option, + ``mode``, used to define in which mode the oracle should operate: 1. ``full``: The complete contents are passed to the :class:`TextEvaluator`. 2. ``line``: The file is split by lines and each line is compared to the corresponding line with the :class:`TextEvaluator`. The lines are compared without newlines. - Since the text evaluator is used behind the scenes, this evaluator also supports - all parameters of that evaluator. + Since the text oracle is used behind the scenes, this oracle also supports + all parameters of that oracle. - When no mode is passed, the evaluator will default to ``full``. + When no mode is passed, the oracle will default to ``full``. """ assert isinstance(channel, FileOutputChannel) options = _text_options(config) @@ -125,7 +123,7 @@ def evaluate_file( message = get_i18n_string( "evaluators.text.file.unexpected.message", actual=actual ) - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.WRONG, human=get_i18n_string("evaluators.text.file.unexpected.status"), @@ -149,7 +147,7 @@ def evaluate_file( with open(str(actual_path), "r") as file: actual = file.read() except FileNotFoundError: - return EvaluationResult( + return OracleResult( result=StatusMessage( enum=Status.RUNTIME_ERROR, human=get_i18n_string("evaluators.text.file.not-found"), @@ -169,7 +167,7 @@ def evaluate_file( for expected_line, actual_line in zip(expected_lines, actual_lines): r = compare_text(options, expected_line, actual_line) correct = correct and r.result.enum == Status.CORRECT - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.CORRECT if correct else Status.WRONG), readable_expected=expected, readable_actual=actual, diff --git a/tested/evaluators/value.py b/tested/oracles/value.py similarity index 93% rename from tested/evaluators/value.py rename to tested/oracles/value.py index f2e3929b..23d4eacf 100644 --- a/tested/evaluators/value.py +++ b/tested/oracles/value.py @@ -1,5 +1,5 @@ """ -Value evaluator. +Value oracle. """ import logging from typing import Optional, Tuple, Union, cast @@ -13,10 +13,10 @@ SimpleTypes, ) from tested.dodona import ExtendedMessage, Permission, Status, StatusMessage -from tested.evaluators.common import EvaluationResult, EvaluatorConfig from tested.features import TypeSupport, fallback_type_support_map from tested.internationalization import get_i18n_string from tested.languages.generation import generate_statement +from tested.oracles.common import OracleConfig, OracleResult from tested.serialisation import ( ObjectKeyValuePair, ObjectType, @@ -28,7 +28,7 @@ to_python_comparable, ) from tested.testsuite import ( - EvaluatorOutputChannel, + OracleOutputChannel, OutputChannel, TextOutputChannel, ValueOutputChannel, @@ -50,8 +50,8 @@ def try_as_readable_value( def get_values( - bundle: Bundle, output_channel: EvaluatorOutputChannel, actual_str: str -) -> Union[EvaluationResult, Tuple[Value, str, Optional[Value], str]]: + bundle: Bundle, output_channel: OracleOutputChannel, actual_str: str +) -> Union[OracleResult, Tuple[Value, str, Optional[Value], str]]: if isinstance(output_channel, TextOutputChannel): expected = output_channel.get_data_as_string(bundle.config.resources) expected_value = StringType(type=BasicStringTypes.TEXT, data=expected) @@ -78,7 +78,7 @@ def get_values( description=raw_message, format="text", permission=Permission.STAFF ) student = "An error occurred while collecting the return value. Contact staff for more information." - return EvaluationResult( + return OracleResult( result=StatusMessage(enum=Status.INTERNAL_ERROR, human=student), readable_expected=readable_expected, readable_actual=actual_str, @@ -233,19 +233,19 @@ def _check_data_type( def evaluate( - config: EvaluatorConfig, channel: OutputChannel, actual_str: str -) -> EvaluationResult: + config: OracleConfig, channel: OutputChannel, actual_str: str +) -> OracleResult: """ - Evaluate two values. The values must match exact. Currently, this evaluator + Evaluate two values. The values must match exact. Currently, this oracle has no options, but it might receive them in the future (e.g. options on how to deal with strings or floats). """ assert isinstance(channel, ValueOutputChannel) - # Try parsing the value as an EvaluationResult. - # This is the result of a custom evaluator. + # Try parsing the value as an OracleResult. + # This is the result of a custom oracle. try: - evaluation_result = EvaluationResult.__pydantic_model__.parse_raw(actual_str) # type: ignore + evaluation_result = OracleResult.__pydantic_model__.parse_raw(actual_str) # type: ignore except (TypeError, ValueError): pass else: @@ -253,7 +253,7 @@ def evaluate( # Try parsing the value as an actual Value. result = get_values(config.bundle, channel, actual_str) - if isinstance(result, EvaluationResult): + if isinstance(result, OracleResult): return result else: expected, readable_expected, actual, readable_actual = result @@ -269,9 +269,9 @@ def evaluate( # If the channel value is not None, but actual is, error. if actual is None: - return EvaluationResult( + return OracleResult( result=StatusMessage( - enum=Status.WRONG, human=get_i18n_string("evaluators.value.missing") + enum=Status.WRONG, human=get_i18n_string("functions.value.missing") ), readable_expected=readable_expected, readable_actual=readable_actual, @@ -301,7 +301,7 @@ def evaluate( ) ) - return EvaluationResult( + return OracleResult( result=StatusMessage( human=type_status, enum=Status.CORRECT if correct else Status.WRONG ), diff --git a/tested/testsuite.py b/tested/testsuite.py index b46fea54..ea47525c 100644 --- a/tested/testsuite.py +++ b/tested/testsuite.py @@ -38,7 +38,7 @@ @unique class TextBuiltin(StrEnum): - """Textual built in evaluators.""" + """Textual built-in functions.""" TEXT = auto() FILE = auto() @@ -46,26 +46,26 @@ class TextBuiltin(StrEnum): @unique class ValueBuiltin(StrEnum): - """Built in evaluators for values.""" + """Built-in functions for values.""" VALUE = auto() @unique class ExceptionBuiltin(StrEnum): - """Built in evaluators for exceptions.""" + """Built-in functions for exceptions.""" EXCEPTION = auto() @dataclass -class BaseBuiltinEvaluator: +class BaseBuiltinOracle: """ - A built-in evaluator in TESTed. Some basic evaluators are available, as + A built-in oracle in TESTed. Some basic functions are available, as enumerated by :class:`Builtin`. These are useful for things like comparing text, files or values. - This is the recommended and default evaluator, since it is the least amount + This is the recommended and default oracle, since it is the least amount of work and the most language independent. """ @@ -74,17 +74,17 @@ class BaseBuiltinEvaluator: @dataclass -class GenericTextEvaluator(BaseBuiltinEvaluator): +class GenericTextOracle(BaseBuiltinOracle): name: TextBuiltin = TextBuiltin.TEXT @dataclass -class GenericValueEvaluator(BaseBuiltinEvaluator): +class GenericValueOracle(BaseBuiltinOracle): name: ValueBuiltin = ValueBuiltin.VALUE @dataclass -class GenericExceptionEvaluator(BaseBuiltinEvaluator): +class GenericExceptionOracle(BaseBuiltinOracle): name: ExceptionBuiltin = ExceptionBuiltin.EXCEPTION @@ -100,55 +100,66 @@ class EvaluationFunction: @dataclass -class ProgrammedEvaluator: +class CustomCheckOracle: """ - Evaluate the responses with custom code. This is still a language-independent - method; the evaluator is run as part of the judge and receives its values from - that judge. This type is useful, for example, when doing exercises on sequence - alignments. + Evaluate the result with a custom check function. + + This oracle enables doing custom checks while still being programming-language + independent. The oracle is run through the judge infrastructure to translate + values between different programming languages. + + Although most programming languages are supported, we recommend using Python, + as TESTed can then apply specific optimisations, meaning it will be faster than + other languages. + + Some examples of intended use of this oracle are sequence alignment checking, + or evaluating non-deterministic return values. """ language: str function: EvaluationFunction arguments: List[Value] = field(default_factory=list) - type: Literal["programmed"] = "programmed" + type: Literal["programmed", "custom_check"] = "custom_check" @dataclass -class SpecificEvaluator: +class LanguageSpecificOracle: """ - Provide language-specific code that will be run in the same environment as the - user's code. While this is very powerful and allows you to test language-specific - constructs, there are a few caveats: + Evaluate the result with a custom check function written in a specific programming + language. Every programming language needs its own check function. + + While this is very powerful and allows you to test language-specific constructs, + there are a few caveats: 1. The code is run alongside the user code. This means the user can potentially take control of the code. - 2. This will limit the context_number of language an exercise is available in, - since you need to provide tests for all configs you want to support. + 2. This will limit the number of programming languages an exercise is available + in, since you need to provide tests for all configs you want to support. 3. It is a lot of work. You need to return the correct values, since the judge needs to understand what the result was. The code you must write should be a function that accepts the result of a user - expression. Note: this type of evaluator is only supported when using function - calls. If you want to evaluate_text stdout, you should use the custom evaluator + expression. Note: this type of oracle is only supported when using function + calls. If you want to evaluate stdout, you should use the custom check oracle instead. """ - evaluators: Dict[str, EvaluationFunction] + functions: Dict[str, EvaluationFunction] type: Literal["specific"] = "specific" def for_language(self, language: str) -> EvaluationFunction: - return self.evaluators[language] - - # noinspection PyMethodParameters - @validator("evaluators") - def validate_evaluator(cls, v): - """There should be at least one evaluator.""" + return self.functions[language] - if len(v.keys()) == 0: - raise ValueError("At least one specific evaluator is required.") + @root_validator(pre=True) + def compatability_migrator(cls, values: dict) -> dict: + if "evaluators" in values: + values["functions"] = values.pop("evaluators") + return values - return v + @validator("functions") + def validate_functions(cls, value: dict) -> dict: + assert len(value.keys()), "At least one check function is required." + return value @unique @@ -194,11 +205,17 @@ def get_used_features(self) -> FeatureSet: class TextOutputChannel(TextData): """Describes the output for textual channels.""" - evaluator: Union[GenericTextEvaluator, ProgrammedEvaluator] = field( - default_factory=GenericTextEvaluator + oracle: GenericTextOracle | CustomCheckOracle = field( + default_factory=GenericTextOracle ) show_expected: bool = True + @root_validator(pre=True) + def compatability_migrator(cls, values: dict) -> dict: + if "evaluator" in values: + values["oracle"] = values.pop("evaluator") + return values + @dataclass class FileOutputChannel(WithFeatures): @@ -206,8 +223,8 @@ class FileOutputChannel(WithFeatures): expected_path: str # Path to the file to compare to. actual_path: str # Path to the generated file (by the user code) - evaluator: Union[GenericTextEvaluator, ProgrammedEvaluator] = field( - default_factory=lambda: GenericTextEvaluator(name=TextBuiltin.FILE) + oracle: GenericTextOracle | CustomCheckOracle = field( + default_factory=lambda: GenericTextOracle(name=TextBuiltin.FILE) ) show_expected: bool = True @@ -219,15 +236,21 @@ def get_data_as_string(self, resources: Path) -> str: with open(file_path, "r") as file: return file.read() + @root_validator(pre=True) + def compatability_migrator(cls, values: dict) -> dict: + if "evaluator" in values: + values["oracle"] = values.pop("evaluator") + return values + @dataclass class ValueOutputChannel(WithFeatures): """Handles return values of function calls.""" value: Optional[Value] = None - evaluator: Union[ - GenericValueEvaluator, ProgrammedEvaluator, SpecificEvaluator - ] = field(default_factory=GenericValueEvaluator) + oracle: GenericValueOracle | CustomCheckOracle | LanguageSpecificOracle = field( + default_factory=GenericValueOracle + ) show_expected: bool = True def get_used_features(self) -> FeatureSet: @@ -235,13 +258,17 @@ def get_used_features(self) -> FeatureSet: return self.value.get_used_features() return NOTHING - # noinspection PyMethodParameters - @root_validator - def value_requirements(cls, values): + @validator("oracle") + def value_requirements(cls, oracle, values: dict): value = values.get("value") - evaluator = values.get("evaluator") - if isinstance(evaluator, GenericValueEvaluator) and not value: - raise ValueError("The generic evaluator needs an channel value.") + if isinstance(oracle, GenericValueOracle) and not value: + raise ValueError("When using the built-in oracle, a value is required.") + return oracle + + @root_validator(pre=True) + def compatability_migrator(cls, values: dict) -> dict: + if "evaluator" in values: + values["oracle"] = values.pop("evaluator") return values @@ -294,8 +321,8 @@ class ExceptionOutputChannel(WithFeatures): """Handles exceptions caused by the submission.""" exception: Optional[ExpectedException] = None - evaluator: Union[GenericExceptionEvaluator, SpecificEvaluator] = field( - default_factory=GenericExceptionEvaluator + oracle: GenericExceptionOracle | LanguageSpecificOracle = field( + default_factory=GenericExceptionOracle ) show_expected: bool = True @@ -304,13 +331,19 @@ def get_used_features(self) -> FeatureSet: return self.exception.get_used_features() return NOTHING - # noinspection PyMethodParameters - @root_validator - def value_requirements(cls, values): + @validator("oracle") + def value_requirements(cls, oracle, values: dict): exception = values.get("exception") - evaluator = values.get("evaluator") - if isinstance(evaluator, GenericExceptionEvaluator) and not exception: - raise ValueError("The generic evaluator needs a channel exception.") + if isinstance(oracle, GenericExceptionOracle) and not exception: + raise ValueError( + "When using the built-in oracle, an exception message is required." + ) + return oracle + + @root_validator(pre=True) + def compatability_migrator(cls, values: dict) -> dict: + if "evaluator" in values: + values["oracle"] = values.pop("evaluator") return values @@ -346,11 +379,11 @@ def get_used_features(self) -> FeatureSet: SpecialOutputChannel = EmptyChannel | IgnoredChannel -EvaluatorOutputChannel = Union[ +OracleOutputChannel = Union[ TextOutputChannel, FileOutputChannel, ValueOutputChannel, ExceptionOutputChannel ] -NormalOutputChannel = EvaluatorOutputChannel | ExitCodeOutputChannel +NormalOutputChannel = OracleOutputChannel | ExitCodeOutputChannel OutputChannel = NormalOutputChannel | SpecialOutputChannel @@ -383,22 +416,22 @@ def get_used_features(self) -> FeatureSet: ] ) - def get_specific_eval_languages(self) -> Optional[Set[str]]: + def get_specific_oracle_languages(self) -> Optional[Set[str]]: """ - Get the languages supported by this output if language-specific evaluators + Get the languages supported by this output if language-specific oracles are used. If none are used, None is returned, otherwise a set of languages. """ languages = None if isinstance(self.exception, ExceptionOutputChannel): - if isinstance(self.exception.evaluator, SpecificEvaluator): - languages = set(self.exception.evaluator.evaluators.keys()) + if isinstance(self.exception.oracle, LanguageSpecificOracle): + languages = set(self.exception.oracle.functions.keys()) elif ( self.exception.exception is not None and self.exception.exception.types ): languages = set(self.exception.exception.types.keys()) if isinstance(self.result, ValueOutputChannel): - if isinstance(self.result.evaluator, SpecificEvaluator): - langs = set(self.result.evaluator.evaluators.keys()) + if isinstance(self.result.oracle, LanguageSpecificOracle): + langs = set(self.result.oracle.functions.keys()) if languages is not None: languages &= langs else: @@ -603,22 +636,22 @@ def unique_evaluation_functions(cls, contexts: List[Context]) -> List[Context]: for testcase in context.testcases: output = testcase.output if isinstance(output.result, ValueOutputChannel) and isinstance( - output.result.evaluator, SpecificEvaluator + output.result.oracle, LanguageSpecificOracle ): # noinspection PyTypeChecker for ( language, function, - ) in output.result.evaluator.evaluators.items(): + ) in output.result.oracle.functions.items(): eval_functions[language].append(function) if isinstance(output.exception, ExceptionOutputChannel) and isinstance( - output.exception.evaluator, SpecificEvaluator + output.exception.oracle, LanguageSpecificOracle ): # noinspection PyTypeChecker for ( language, function, - ) in output.exception.evaluator.evaluators.items(): + ) in output.exception.oracle.functions.items(): eval_functions[language].append(function) # Check within each language that the functions are unique over the @@ -636,7 +669,7 @@ def unique_evaluation_functions(cls, contexts: List[Context]) -> List[Context]: for function, file in function_file.items(): if len(file) > 1: raise ValueError( - f"Evaluator function names must be unique within the same " + f"Oracle function names must be unique within the same " f"run. {function} was used in multiple files: {file}" ) diff --git a/tests/test_dsl_yaml.py b/tests/test_dsl_yaml.py index e51e3299..c01a242e 100644 --- a/tests/test_dsl_yaml.py +++ b/tests/test_dsl_yaml.py @@ -20,9 +20,9 @@ StringType, ) from tested.testsuite import ( - GenericTextEvaluator, - GenericValueEvaluator, - ProgrammedEvaluator, + CustomCheckOracle, + GenericTextOracle, + GenericValueOracle, TextOutputChannel, ValueOutputChannel, parse_test_suite, @@ -205,7 +205,7 @@ def test_parse_ctx_with_config(): stdout = tc0.output.stdout assert stdout.data == "3.34" - options = stdout.evaluator.options + options = stdout.oracle.options assert len(options) == 3 assert options["tryFloatingPoint"] assert options["applyRounding"] @@ -213,7 +213,7 @@ def test_parse_ctx_with_config(): stdout = tc1.output.stdout assert stdout.data == "3.337" - options = stdout.evaluator.options + options = stdout.oracle.options assert len(options) == 3 assert options["tryFloatingPoint"] assert options["applyRounding"] @@ -221,7 +221,7 @@ def test_parse_ctx_with_config(): stdout = tc2.output.stdout assert stdout.data == "3.3" - options = stdout.evaluator.options + options = stdout.oracle.options assert len(options) == 3 assert options["tryFloatingPoint"] assert options["applyRounding"] @@ -229,7 +229,7 @@ def test_parse_ctx_with_config(): stderr = tc3.output.stderr assert stderr.data == " Fail " - options = stderr.evaluator.options + options = stderr.oracle.options assert len(options) == 2 assert not options["caseInsensitive"] assert options["ignoreWhitespace"] @@ -267,14 +267,14 @@ def test_statements(): assert len(tests0) == 2 assert isinstance(tests0[0].input, Assignment) assert tests0[0].output.stdout.data == "New safe" - assert tests0[0].output.stdout.evaluator.options["ignoreWhitespace"] + assert tests0[0].output.stdout.oracle.options["ignoreWhitespace"] assert isinstance(tests0[1].input, FunctionCall) assert tests0[1].output.result.value.data == "Ignore whitespace" assert len(tests1) == 2 assert isinstance(tests1[0].input, Assignment) assert tests1[0].output.stdout.data == "New safe" - assert not tests1[0].output.stdout.evaluator.options["ignoreWhitespace"] + assert not tests1[0].output.stdout.oracle.options["ignoreWhitespace"] assert isinstance(tests1[1].input, FunctionCall) assert tests1[1].output.result.value.data == 5 assert tests1[1].output.result.value.type == AdvancedNumericTypes.U_INT_8 @@ -304,7 +304,7 @@ def test_statement_and_main(): assert tc.input.main_call assert tc.input.arguments == ["-a", "5", "7"] assert tc.output.stdout.data == "12" - assert tc.output.stdout.evaluator.options["tryFloatingPoint"] + assert tc.output.stdout.oracle.options["tryFloatingPoint"] test = ctx.testcases[1] assert isinstance(test.input, FunctionCall) assert test.output.result.value.data == 12 @@ -419,8 +419,8 @@ def test_global_config_trickles_down(): json_str = translate_to_test_suite(yaml_str) suite = parse_test_suite(json_str) stdout = suite.tabs[0].contexts[0].testcases[0].output.stdout - assert isinstance(stdout.evaluator, GenericTextEvaluator) - config = stdout.evaluator.options + assert isinstance(stdout.oracle, GenericTextOracle) + config = stdout.oracle.options assert config["applyRounding"] assert config["roundTo"] == 63 assert config["tryFloatingPoint"] @@ -529,7 +529,7 @@ def test_text_built_in_checks_implied(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.stdout, TextOutputChannel) - assert isinstance(test.output.stdout.evaluator, GenericTextEvaluator) + assert isinstance(test.output.stdout.oracle, GenericTextOracle) assert test.output.stdout.data == "hallo" @@ -541,7 +541,7 @@ def test_text_built_in_checks_explicit(): - statement: 'test()' stdout: data: "hallo" - evaluator: "builtin" + oracle: "builtin" """ json_str = translate_to_test_suite(yaml_str) suite = parse_test_suite(json_str) @@ -553,7 +553,7 @@ def test_text_built_in_checks_explicit(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.stdout, TextOutputChannel) - assert isinstance(test.output.stdout.evaluator, GenericTextEvaluator) + assert isinstance(test.output.stdout.oracle, GenericTextOracle) assert test.output.stdout.data == "hallo" @@ -565,7 +565,7 @@ def test_text_custom_checks_correct(): - statement: 'test()' stdout: data: "hallo" - evaluator: "custom" + oracle: "custom_check" language: "python" file: "test.py" name: "evaluate_test" @@ -581,13 +581,13 @@ def test_text_custom_checks_correct(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.stdout, TextOutputChannel) - assert isinstance(test.output.stdout.evaluator, ProgrammedEvaluator) + assert isinstance(test.output.stdout.oracle, CustomCheckOracle) assert test.output.stdout.data == "hallo" - evaluator = test.output.stdout.evaluator - assert evaluator.language == "python" - assert evaluator.function.name == "evaluate_test" - assert evaluator.function.file == Path("test.py") - assert evaluator.arguments == [ + oracle = test.output.stdout.oracle + assert oracle.language == "python" + assert oracle.function.name == "evaluate_test" + assert oracle.function.file == Path("test.py") + assert oracle.arguments == [ StringType(type=BasicStringTypes.TEXT, data="yes"), NumberType(type=BasicNumericTypes.INTEGER, data=5), SequenceType( @@ -619,7 +619,7 @@ def test_value_built_in_checks_implied(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.result, ValueOutputChannel) - assert isinstance(test.output.result.evaluator, GenericValueEvaluator) + assert isinstance(test.output.result.oracle, GenericValueOracle) assert test.output.result.value == StringType( type=BasicStringTypes.TEXT, data="hallo" ) @@ -633,7 +633,7 @@ def test_value_built_in_checks_explicit(): - statement: 'test()' return_raw: value: "'hallo'" - evaluator: "builtin" + oracle: "builtin" """ json_str = translate_to_test_suite(yaml_str) suite = parse_test_suite(json_str) @@ -645,7 +645,7 @@ def test_value_built_in_checks_explicit(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.result, ValueOutputChannel) - assert isinstance(test.output.result.evaluator, GenericValueEvaluator) + assert isinstance(test.output.result.oracle, GenericValueOracle) assert test.output.result.value == StringType( type=BasicStringTypes.TEXT, data="hallo" ) @@ -659,7 +659,7 @@ def test_value_custom_checks_correct(): - statement: 'test()' return_raw: value: "'hallo'" - evaluator: "custom" + oracle: "custom_check" language: "python" file: "test.py" name: "evaluate_test" @@ -675,15 +675,15 @@ def test_value_custom_checks_correct(): test = testcases[0] assert isinstance(test.input, FunctionCall) assert isinstance(test.output.result, ValueOutputChannel) - assert isinstance(test.output.result.evaluator, ProgrammedEvaluator) + assert isinstance(test.output.result.oracle, CustomCheckOracle) assert test.output.result.value == StringType( type=BasicStringTypes.TEXT, data="hallo" ) - evaluator = test.output.result.evaluator - assert evaluator.language == "python" - assert evaluator.function.name == "evaluate_test" - assert evaluator.function.file == Path("test.py") - assert evaluator.arguments == [ + oracle = test.output.result.oracle + assert oracle.language == "python" + assert oracle.function.name == "evaluate_test" + assert oracle.function.file == Path("test.py") + assert oracle.arguments == [ StringType(type=BasicStringTypes.TEXT, data="yes"), NumberType(type=BasicNumericTypes.INTEGER, data=5), SequenceType( diff --git a/tests/test_evaluators.py b/tests/test_oracles.py similarity index 83% rename from tests/test_evaluators.py rename to tests/test_oracles.py index 041bfd3c..8841394b 100644 --- a/tests/test_evaluators.py +++ b/tests/test_oracles.py @@ -9,10 +9,10 @@ from tested.configs import create_bundle from tested.datatypes import BasicObjectTypes, BasicSequenceTypes, BasicStringTypes from tested.dodona import Status -from tested.evaluators.common import EvaluationResult, EvaluatorConfig -from tested.evaluators.exception import evaluate as evaluate_exception -from tested.evaluators.text import evaluate_file, evaluate_text -from tested.evaluators.value import evaluate as evaluate_value +from tested.oracles.common import OracleConfig, OracleResult +from tested.oracles.exception import evaluate as evaluate_exception +from tested.oracles.text import evaluate_file, evaluate_text +from tested.oracles.value import evaluate as evaluate_value from tested.serialisation import ( ExceptionValue, ObjectKeyValuePair, @@ -31,19 +31,19 @@ from tests.manual_utils import configuration -def evaluator_config( +def oracle_config( tmp_path: Path, pytestconfig, options=None, language="python" -) -> EvaluatorConfig: +) -> OracleConfig: if options is None: options = dict() conf = configuration(pytestconfig, "", language, tmp_path) plan = Suite() bundle = create_bundle(conf, sys.stdout, plan) - return EvaluatorConfig(bundle=bundle, options=options, context_dir=tmp_path) + return OracleConfig(bundle=bundle, options=options, context_dir=tmp_path) -def test_text_evaluator(tmp_path: Path, pytestconfig): - config = evaluator_config(tmp_path, pytestconfig, {"ignoreWhitespace": False}) +def test_text_oracle(tmp_path: Path, pytestconfig): + config = oracle_config(tmp_path, pytestconfig, {"ignoreWhitespace": False}) channel = TextOutputChannel(data="expected") result = evaluate_text(config, channel, "expected") assert result.result.enum == Status.CORRECT @@ -56,8 +56,8 @@ def test_text_evaluator(tmp_path: Path, pytestconfig): assert result.readable_actual == "nothing" -def test_text_evaluator_whitespace(tmp_path: Path, pytestconfig): - config = evaluator_config(tmp_path, pytestconfig, {"ignoreWhitespace": True}) +def test_text_oracle_whitespace(tmp_path: Path, pytestconfig): + config = oracle_config(tmp_path, pytestconfig, {"ignoreWhitespace": True}) channel = TextOutputChannel(data="expected") result = evaluate_text(config, channel, "expected ") assert result.result.enum == Status.CORRECT @@ -70,8 +70,8 @@ def test_text_evaluator_whitespace(tmp_path: Path, pytestconfig): assert result.readable_actual == "nothing" -def test_text_evaluator_case_sensitive(tmp_path: Path, pytestconfig): - config = evaluator_config(tmp_path, pytestconfig, {"caseInsensitive": True}) +def test_text_oracle_case_sensitive(tmp_path: Path, pytestconfig): + config = oracle_config(tmp_path, pytestconfig, {"caseInsensitive": True}) channel = TextOutputChannel(data="expected") result = evaluate_text(config, channel, "Expected") assert result.result.enum == Status.CORRECT @@ -84,8 +84,8 @@ def test_text_evaluator_case_sensitive(tmp_path: Path, pytestconfig): assert result.readable_actual == "nothing" -def test_text_evaluator_combination(tmp_path: Path, pytestconfig): - config = evaluator_config( +def test_text_oracle_combination(tmp_path: Path, pytestconfig): + config = oracle_config( tmp_path, pytestconfig, {"caseInsensitive": True, "ignoreWhitespace": True} ) channel = TextOutputChannel(data="expected") @@ -100,8 +100,8 @@ def test_text_evaluator_combination(tmp_path: Path, pytestconfig): assert result.readable_actual == "nothing" -def test_text_evaluator_rounding(tmp_path: Path, pytestconfig): - config = evaluator_config( +def test_text_oracle_rounding(tmp_path: Path, pytestconfig): + config = oracle_config( tmp_path, pytestconfig, {"tryFloatingPoint": True, "applyRounding": True} ) channel = TextOutputChannel(data="1.333") @@ -116,8 +116,8 @@ def test_text_evaluator_rounding(tmp_path: Path, pytestconfig): assert result.readable_actual == "1.5" -def test_text_evaluator_round_to(tmp_path: Path, pytestconfig): - config = evaluator_config( +def test_text_oracle_round_to(tmp_path: Path, pytestconfig): + config = oracle_config( tmp_path, pytestconfig, {"tryFloatingPoint": True, "applyRounding": True, "roundTo": 1}, @@ -134,9 +134,9 @@ def test_text_evaluator_round_to(tmp_path: Path, pytestconfig): assert result.readable_actual == "1.5" -def test_file_evaluator_full_wrong(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config(tmp_path, pytestconfig, {"mode": "full"}) - s = mocker.spy(tested.evaluators.text, name="compare_text") +def test_file_oracle_full_wrong(tmp_path: Path, pytestconfig, mocker): + config = oracle_config(tmp_path, pytestconfig, {"mode": "full"}) + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected", "actual\nactual"] @@ -154,9 +154,9 @@ def test_file_evaluator_full_wrong(tmp_path: Path, pytestconfig, mocker): assert result.readable_actual == "actual\nactual" -def test_file_evaluator_full_correct(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config(tmp_path, pytestconfig, {"mode": "full"}) - s = mocker.spy(tested.evaluators.text, name="compare_text") +def test_file_oracle_full_correct(tmp_path: Path, pytestconfig, mocker): + config = oracle_config(tmp_path, pytestconfig, {"mode": "full"}) + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected", "expected\nexpected"] @@ -174,11 +174,11 @@ def test_file_evaluator_full_correct(tmp_path: Path, pytestconfig, mocker): assert result.readable_actual == "expected\nexpected" -def test_file_evaluator_line_wrong(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config( +def test_file_oracle_line_wrong(tmp_path: Path, pytestconfig, mocker): + config = oracle_config( tmp_path, pytestconfig, {"mode": "line", "stripNewlines": True} ) - s = mocker.spy(tested.evaluators.text, name="compare_text") + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected2", "actual\nactual2"] @@ -198,11 +198,11 @@ def test_file_evaluator_line_wrong(tmp_path: Path, pytestconfig, mocker): assert result.readable_actual == "actual\nactual2" -def test_file_evaluator_line_correct(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config( +def test_file_oracle_line_correct(tmp_path: Path, pytestconfig, mocker): + config = oracle_config( tmp_path, pytestconfig, {"mode": "line", "stripNewlines": True} ) - s = mocker.spy(tested.evaluators.text, name="compare_text") + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected2", "expected\nexpected2"] @@ -222,11 +222,11 @@ def test_file_evaluator_line_correct(tmp_path: Path, pytestconfig, mocker): assert result.readable_actual == "expected\nexpected2" -def test_file_evaluator_strip_lines_correct(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config( +def test_file_oracle_strip_lines_correct(tmp_path: Path, pytestconfig, mocker): + config = oracle_config( tmp_path, pytestconfig, {"mode": "line", "stripNewlines": True} ) - s = mocker.spy(tested.evaluators.text, name="compare_text") + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected2\n", "expected\nexpected2"] @@ -246,11 +246,11 @@ def test_file_evaluator_strip_lines_correct(tmp_path: Path, pytestconfig, mocker assert result.readable_actual == "expected\nexpected2" -def test_file_evaluator_dont_strip_lines_correct(tmp_path: Path, pytestconfig, mocker): - config = evaluator_config( +def test_file_oracle_dont_strip_lines_correct(tmp_path: Path, pytestconfig, mocker): + config = oracle_config( tmp_path, pytestconfig, {"mode": "line", "stripNewlines": False} ) - s = mocker.spy(tested.evaluators.text, name="compare_text") + s = mocker.spy(tested.oracles.text, name="compare_text") mock_files = [ mocker.mock_open(read_data=content).return_value for content in ["expected\nexpected2\n", "expected\nexpected2"] @@ -270,8 +270,8 @@ def test_file_evaluator_dont_strip_lines_correct(tmp_path: Path, pytestconfig, m assert result.readable_actual == "expected\nexpected2" -def test_exception_evaluator_only_messages_correct(tmp_path: Path, pytestconfig): - config = evaluator_config(tmp_path, pytestconfig) +def test_exception_oracle_only_messages_correct(tmp_path: Path, pytestconfig): + config = oracle_config(tmp_path, pytestconfig) channel = ExceptionOutputChannel(exception=ExpectedException(message="Test error")) actual_value = json.dumps( ExceptionValue(message="Test error", type="ZeroDivisionError"), @@ -283,8 +283,8 @@ def test_exception_evaluator_only_messages_correct(tmp_path: Path, pytestconfig) assert result.readable_actual == "ZeroDivisionError: Test error" -def test_exception_evaluator_only_messages_wrong(tmp_path: Path, pytestconfig): - config = evaluator_config(tmp_path, pytestconfig) +def test_exception_oracle_only_messages_wrong(tmp_path: Path, pytestconfig): + config = oracle_config(tmp_path, pytestconfig) channel = ExceptionOutputChannel(exception=ExpectedException(message="Test error")) actual_value = json.dumps( ExceptionValue(message="Pief poef", type="ZeroDivisionError"), @@ -296,7 +296,7 @@ def test_exception_evaluator_only_messages_wrong(tmp_path: Path, pytestconfig): assert result.readable_actual == "Pief poef" -def test_exception_evaluator_correct_message_wrong_type(tmp_path: Path, pytestconfig): +def test_exception_oracle_correct_message_wrong_type(tmp_path: Path, pytestconfig): channel = ExceptionOutputChannel( exception=ExpectedException( message="Test error", @@ -309,21 +309,21 @@ def test_exception_evaluator_correct_message_wrong_type(tmp_path: Path, pytestco ) # Test for Python - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_exception(config, channel, actual_value) assert result.result.enum == Status.WRONG assert result.readable_expected == "PiefError: Test error" assert result.readable_actual == "ZeroDivisionError: Test error" # Test for JavaScript - config = evaluator_config(tmp_path, pytestconfig, language="javascript") + config = oracle_config(tmp_path, pytestconfig, language="javascript") result = evaluate_exception(config, channel, actual_value) assert result.result.enum == Status.WRONG assert result.readable_expected == "PafError: Test error" assert result.readable_actual == "ZeroDivisionError: Test error" -def test_exception_evaluator_wrong_message_correct_type(tmp_path: Path, pytestconfig): +def test_exception_oracle_wrong_message_correct_type(tmp_path: Path, pytestconfig): channel = ExceptionOutputChannel( exception=ExpectedException( message="Test error", @@ -332,7 +332,7 @@ def test_exception_evaluator_wrong_message_correct_type(tmp_path: Path, pytestco ) # Test for Python - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") actual_value = json.dumps( ExceptionValue(message="Test errors", type="PiefError"), default=pydantic_encoder, @@ -343,7 +343,7 @@ def test_exception_evaluator_wrong_message_correct_type(tmp_path: Path, pytestco assert result.readable_actual == "PiefError: Test errors" # Test for JavaScript - config = evaluator_config(tmp_path, pytestconfig, language="javascript") + config = oracle_config(tmp_path, pytestconfig, language="javascript") actual_value = json.dumps( ExceptionValue(message="Test errors", type="PafError"), default=pydantic_encoder ) @@ -353,7 +353,7 @@ def test_exception_evaluator_wrong_message_correct_type(tmp_path: Path, pytestco assert result.readable_actual == "PafError: Test errors" -def test_exception_evaluator_correct_type_and_message(tmp_path: Path, pytestconfig): +def test_exception_oracle_correct_type_and_message(tmp_path: Path, pytestconfig): channel = ExceptionOutputChannel( exception=ExpectedException( message="Test error", @@ -362,7 +362,7 @@ def test_exception_evaluator_correct_type_and_message(tmp_path: Path, pytestconf ) # Test for Python - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") actual_value = json.dumps( ExceptionValue(message="Test error", type="PiefError"), default=pydantic_encoder ) @@ -372,7 +372,7 @@ def test_exception_evaluator_correct_type_and_message(tmp_path: Path, pytestconf assert result.readable_actual == "PiefError: Test error" # Test for JavaScript - config = evaluator_config(tmp_path, pytestconfig, language="javascript") + config = oracle_config(tmp_path, pytestconfig, language="javascript") actual_value = json.dumps( ExceptionValue(message="Test error", type="PafError"), default=pydantic_encoder ) @@ -390,7 +390,7 @@ def test_value_string_as_text_is_detected(tmp_path: Path, pytestconfig): StringType(type=BasicStringTypes.TEXT, data="multi\nline\nstring"), default=pydantic_encoder, ) - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_value(config, channel, actual_value) assert result.result.enum == Status.CORRECT assert result.readable_expected == "multi\nline\nstring" @@ -405,7 +405,7 @@ def test_value_string_as_text_is_not_detected_if_disabled(tmp_path: Path, pytest StringType(type=BasicStringTypes.TEXT, data="multi\nline\nstring"), default=pydantic_encoder, ) - config = evaluator_config( + config = oracle_config( tmp_path, pytestconfig, language="python", options={"stringsAsText": False} ) result = evaluate_value(config, channel, actual_value) @@ -424,7 +424,7 @@ def test_value_string_as_text_is_not_detected_if_not_multiline( StringType(type=BasicStringTypes.TEXT, data="multi\nline\nstring"), default=pydantic_encoder, ) - config = evaluator_config( + config = oracle_config( tmp_path, pytestconfig, language="python", options={"stringsAsText": False} ) result = evaluate_value(config, channel, actual_value) @@ -437,7 +437,7 @@ def test_value_string_as_text_is_detected_when_no_actual(tmp_path: Path, pytestc channel = ValueOutputChannel( value=StringType(type=BasicStringTypes.TEXT, data="multi\nline\nstring") ) - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_value(config, channel, "") assert result.result.enum == Status.WRONG assert result.readable_expected == "multi\nline\nstring" @@ -486,7 +486,7 @@ def test_nested_sets_type_check_works_if_correct(tmp_path: Path, pytestconfig): ], ) channel = ValueOutputChannel(value=expected_value) - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_value( config, channel, @@ -532,7 +532,7 @@ def test_too_many_sequence_values_dont_crash(tmp_path: Path, pytestconfig): ], ) channel = ValueOutputChannel(value=expected_value) - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_value( config, channel, @@ -568,7 +568,7 @@ def test_too_many_object_values_dont_crash(tmp_path: Path, pytestconfig): ], ) channel = ValueOutputChannel(value=expected_value) - config = evaluator_config(tmp_path, pytestconfig, language="python") + config = oracle_config(tmp_path, pytestconfig, language="python") result = evaluate_value( config, channel, diff --git a/tests/test_suite.py b/tests/test_suite.py new file mode 100644 index 00000000..221d7767 --- /dev/null +++ b/tests/test_suite.py @@ -0,0 +1,79 @@ +from tested.testsuite import ( + CustomCheckOracle, + FileOutputChannel, + TextOutputChannel, + ValueOutputChannel, +) + + +def test_text_output_is_compatible_oracle(): + old_structure = { + "evaluator": { + "language": "python", + "function": {"file": "evaluate.py"}, + "type": "custom_check", + }, + "data": "example", + } + + # noinspection PyUnresolvedReferences + result = TextOutputChannel.__pydantic_model__.parse_obj(old_structure) + + assert isinstance(result.oracle, CustomCheckOracle) + assert result.oracle.language == "python" + assert result.oracle.function.file.name == "evaluate.py" + + +def test_file_output_is_compatible_oracle(): + old_structure = { + "evaluator": { + "language": "python", + "function": {"file": "evaluate.py"}, + "type": "custom_check", + }, + "expected_path": "one.py", + "actual_path": "two.py", + } + + # noinspection PyUnresolvedReferences + result = FileOutputChannel.__pydantic_model__.parse_obj(old_structure) + + assert isinstance(result.oracle, CustomCheckOracle) + assert result.oracle.language == "python" + assert result.oracle.function.file.name == "evaluate.py" + + +def test_value_output_is_compatible_oracle(): + old_structure = { + "evaluator": { + "language": "python", + "function": {"file": "evaluate.py"}, + "type": "custom_check", + }, + "value": {"type": "text", "data": "yes"}, + } + + # noinspection PyUnresolvedReferences + result = ValueOutputChannel.__pydantic_model__.parse_obj(old_structure) + + assert isinstance(result.oracle, CustomCheckOracle) + assert result.oracle.language == "python" + assert result.oracle.function.file.name == "evaluate.py" + + +def test_exception_output_is_compatible_oracle(): + old_structure = { + "evaluator": { + "language": "python", + "function": {"file": "evaluate.py"}, + "type": "custom_check", + }, + "exception": {"message": "Yes", "types": {"python": "yes"}}, + } + + # noinspection PyUnresolvedReferences + result = ValueOutputChannel.__pydantic_model__.parse_obj(old_structure) + + assert isinstance(result.oracle, CustomCheckOracle) + assert result.oracle.language == "python" + assert result.oracle.function.file.name == "evaluate.py"