diff --git a/llm_cooperation/experiments/dictator.py b/llm_cooperation/experiments/dictator.py index 4d7336d..c399219 100644 --- a/llm_cooperation/experiments/dictator.py +++ b/llm_cooperation/experiments/dictator.py @@ -45,7 +45,11 @@ round_instructions, run_and_record_experiment, ) -from llm_cooperation.experiments.dilemma import CONDITION_LABEL, Label +from llm_cooperation.experiments.dilemma import ( + CONDITION_LABEL, + CONDITION_LABELS_REVERSED, + Label, +) from llm_cooperation.gametypes.oneshot import OneShotResults, run_experiment TOTAL_SHARE = 4 @@ -115,12 +119,14 @@ def description(self, participant_condition: Participant) -> str: def donation(self) -> float: return float(self.value.value) - @property - def payoff_ego(self) -> float: + def payoff_ego(self, participant_condition: Participant) -> float: + if participant_condition[CONDITION_LABELS_REVERSED]: + return self.donation return TOTAL_SHARE - self.donation - @property - def payoff_allo(self) -> float: + def payoff_allo(self, participant_condition: Participant) -> float: + if participant_condition[CONDITION_LABELS_REVERSED]: + return TOTAL_SHARE - self.donation return self.donation @@ -132,14 +138,13 @@ def payoff_allo(self) -> float: all_dictator_choices = [DictatorChoice(c) for c in DictatorEnum] - DICTATOR_ATTRIBUTES: Grid = { CONDITION_CHAIN_OF_THOUGHT: [True, False], CONDITION_LABEL: all_values(Label), CONDITION_CASE: all_values(Case), CONDITION_PRONOUN: all_values(Pronoun), CONDITION_DEFECT_FIRST: [True, False], - # CONDITION_LABELS_REVERSED: [True, False], + CONDITION_LABELS_REVERSED: [True, False], } @@ -147,19 +152,19 @@ def inverted(mappings: Dict[DictatorEnum, str]) -> Dict[str, DictatorEnum]: return {value: key for key, value in mappings.items()} -def payout_ego(choice: DictatorChoice) -> str: - return amount_as_str(choice.payoff_ego) +def payout_ego(participant: Participant, choice: DictatorChoice) -> str: + return amount_as_str(choice.payoff_ego(participant)) -def payout_allo(choice: DictatorChoice) -> str: - return amount_as_str(choice.payoff_allo) +def payout_allo(participant: Participant, choice: DictatorChoice) -> str: + return amount_as_str(choice.payoff_allo(participant)) def describe_payoffs(participant: Participant, choice: DictatorChoice) -> str: description: str = choice.description(participant) return f""" - If you choose '{description}, then you will earn {payout_ego(choice)} -and your partner will also earn {payout_allo(choice)}. + If you choose '{description}, then you will earn {payout_ego(participant, choice)} +and your partner will also earn {payout_allo(participant, choice)}. """ @@ -216,12 +221,12 @@ def extract_choice_dictator( raise ValueError(f"Cannot determine choice from {completion}") -def payoffs_dictator(player1: DictatorChoice) -> float: - return player1.payoff_ego +def payoffs_dictator(participant: Participant, player1: DictatorChoice) -> float: + return player1.payoff_ego(participant) -def compute_freq_dictator(history: DictatorChoice) -> float: - return history.donation / TOTAL_SHARE +def compute_freq_dictator(participant: Participant, history: DictatorChoice) -> float: + return history.payoff_allo(participant) / TOTAL_SHARE @lru_cache diff --git a/llm_cooperation/experiments/principalagent.py b/llm_cooperation/experiments/principalagent.py index c0594a3..5d34046 100644 --- a/llm_cooperation/experiments/principalagent.py +++ b/llm_cooperation/experiments/principalagent.py @@ -121,11 +121,11 @@ def extract_choice_pa(participant: Participant, completion: Completion) -> PACho raise ValueError(f"Cannot determine choice from {completion}") -def payoffs_pa(__choice__: PAChoice) -> float: +def payoffs_pa(__participant__: Participant, __choice__: PAChoice) -> float: return np.nan -def compute_freq_pa(__choice__: PAChoice) -> float: +def compute_freq_pa(__participant__: Participant, __choice__: PAChoice) -> float: return np.nan diff --git a/llm_cooperation/gametypes/oneshot.py b/llm_cooperation/gametypes/oneshot.py index faac505..26cdb5e 100644 --- a/llm_cooperation/gametypes/oneshot.py +++ b/llm_cooperation/gametypes/oneshot.py @@ -91,21 +91,21 @@ def play_game( def compute_scores( conversation: List[Completion], - payoffs: Callable[[CT], float], + payoffs: Callable[[Participant, CT], float], extract_choice: Callable[[Participant, Completion], CT], participant_condition: Participant, ) -> Tuple[float, CT]: ai_choice = extract_choice(participant_condition, conversation[1]) logger.debug("ai_choice = %s", ai_choice) - score = payoffs(ai_choice) + score = payoffs(participant_condition, ai_choice) return score, ai_choice def analyse( conversation: List[Completion], - payoffs: Callable[[CT], float], + payoffs: Callable[[Participant, CT], float], extract_choice: Callable[[Participant, Completion], CT], - compute_freq: Callable[[CT], float], + compute_freq: Callable[[Participant, CT], float], participant_condition: Participant, ) -> Tuple[float, float, Optional[CT], List[str]]: try: @@ -113,7 +113,7 @@ def analyse( score, ai_choice = compute_scores( list(conversation), payoffs, extract_choice, participant_condition ) - freq = compute_freq(ai_choice) + freq = compute_freq(participant_condition, ai_choice) return score, freq, ai_choice, history except ValueError as e: logger.error("ValueError while running sample: %s", e) @@ -124,9 +124,9 @@ def generate_replications( participant: Participant, num_replications: int, generate_instruction_prompt: PromptGenerator, - payoffs: Callable[[CT], float], + payoffs: Callable[[Participant, CT], float], extract_choice: Callable[[Participant, Completion], CT], - compute_freq: Callable[[CT], float], + compute_freq: Callable[[Participant, CT], float], model_setup: ModelSetup, ) -> Iterable[Tuple[float, float, Optional[CT], List[str]]]: # pylint: disable=R0801 @@ -143,9 +143,9 @@ def run_experiment( participants: Iterable[Participant], num_replications: int, generate_instruction_prompt: PromptGenerator, - payoffs: Callable[[CT], float], + payoffs: Callable[[Participant, CT], float], extract_choice: Callable[[Participant, Completion], CT], - compute_freq: Callable[[CT], float], + compute_freq: Callable[[Participant, CT], float], model_setup: ModelSetup, ) -> OneShotResults[CT]: return OneShotResults( diff --git a/tests/conftest.py b/tests/conftest.py index 25bc4c0..41c35f9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -121,3 +121,8 @@ def with_defect_first(base_condition: Participant) -> Participant: @pytest.fixture def with_gender_neutral_pronoun(base_condition: Participant) -> Participant: return modify_condition(base_condition, CONDITION_PRONOUN, Pronoun.THEY.value) + + +@pytest.fixture +def with_labels_reversed(base_condition: Participant) -> Participant: + return modify_condition(base_condition, CONDITION_LABELS_REVERSED, True) diff --git a/tests/test_dictator.py b/tests/test_dictator.py index f0cc1f3..31afc66 100644 --- a/tests/test_dictator.py +++ b/tests/test_dictator.py @@ -57,26 +57,26 @@ @pytest.mark.parametrize( - "enum, expected_description, expected_payoff_ego, expected_payoff_allo", + "condition, enum, expected_description, expected_payoff_ego, expected_payoff_allo", [ - (DictatorEnum.BLACK, "black", 4.0, 0.0), - (DictatorEnum.BROWN, "brown", 3.0, 1.0), - (DictatorEnum.GREEN, "green", 2.0, 2.0), - (DictatorEnum.BLUE, "blue", 1.0, 3.0), - (DictatorEnum.WHITE, "white", 0.0, 4.0), + (lazy_fixture("base_condition"), DictatorEnum.BLACK, "black", 4.0, 0.0), + (lazy_fixture("base_condition"), DictatorEnum.BROWN, "brown", 3.0, 1.0), + (lazy_fixture("base_condition"), DictatorEnum.GREEN, "green", 2.0, 2.0), + (lazy_fixture("base_condition"), DictatorEnum.BLUE, "blue", 1.0, 3.0), + (lazy_fixture("base_condition"), DictatorEnum.WHITE, "white", 0.0, 4.0), ], ) def test_dictator_choice( + condition: Participant, enum: DictatorEnum, expected_description: str, expected_payoff_ego: float, expected_payoff_allo, ): choice = DictatorChoice(enum) - condition: Participant = Participant(dict()) assert expected_description in choice.description(condition).lower() - assert choice.payoff_ego == expected_payoff_ego - assert choice.payoff_allo == expected_payoff_allo + assert choice.payoff_ego(condition) == expected_payoff_ego + assert choice.payoff_allo(condition) == expected_payoff_allo @pytest.mark.parametrize( @@ -109,27 +109,55 @@ def test_extract_choice_dictator( @pytest.mark.parametrize( - "test_choice, expected_payoff", - [(BLACK, 4), (BROWN, 3), (GREEN, 2), (BLUE, 1), (WHITE, 0)], + "condition, test_choice, expected_payoff", + [ + (lazy_fixture("base_condition"), BLACK, 4), + (lazy_fixture("base_condition"), BROWN, 3), + (lazy_fixture("base_condition"), GREEN, 2), + (lazy_fixture("base_condition"), BLUE, 1), + (lazy_fixture("base_condition"), WHITE, 0), + (lazy_fixture("with_labels_reversed"), BLACK, 0), + (lazy_fixture("with_labels_reversed"), BROWN, 1), + (lazy_fixture("with_labels_reversed"), GREEN, 2), + (lazy_fixture("with_labels_reversed"), BLUE, 3), + (lazy_fixture("with_labels_reversed"), WHITE, 4), + ], ) -def test_payoffs_dictator(test_choice: DictatorChoice, expected_payoff): - result = payoffs_dictator(test_choice) +def test_payoffs_dictator( + condition: Participant, test_choice: DictatorChoice, expected_payoff +): + result = payoffs_dictator(condition, test_choice) assert result == expected_payoff @pytest.mark.parametrize( - "test_choice, expected_payoff", - [(BLACK, 0), (BROWN, 1), (GREEN, 2), (BLUE, 3), (WHITE, 4)], + "condition, test_choice, expected_payoff", + [ + (lazy_fixture("base_condition"), BLACK, 0), + (lazy_fixture("base_condition"), BROWN, 1), + (lazy_fixture("base_condition"), GREEN, 2), + (lazy_fixture("base_condition"), BLUE, 3), + (lazy_fixture("base_condition"), WHITE, 4), + (lazy_fixture("with_labels_reversed"), BLACK, 4), + (lazy_fixture("with_labels_reversed"), BROWN, 3), + (lazy_fixture("with_labels_reversed"), GREEN, 2), + (lazy_fixture("with_labels_reversed"), BLUE, 1), + (lazy_fixture("with_labels_reversed"), WHITE, 0), + ], ) -def test_payoff_allo(test_choice: DictatorChoice, expected_payoff): - result = test_choice.payoff_allo +def test_payoff_allo( + condition: Participant, test_choice: DictatorChoice, expected_payoff +): + result = test_choice.payoff_allo(condition) assert result == expected_payoff @pytest.mark.parametrize("test_choice", all_dictator_choices) -def test_compute_freq_dictator(test_choice: DictatorChoice): - result = compute_freq_dictator(test_choice) - assert result == test_choice.payoff_allo / TOTAL_SHARE +def test_compute_freq_dictator( + base_condition: Participant, test_choice: DictatorChoice +): + result = compute_freq_dictator(base_condition, test_choice) + assert result == test_choice.payoff_allo(base_condition) / TOTAL_SHARE @pytest.mark.parametrize( @@ -173,9 +201,23 @@ def test_choice_menu(condition: Participant): assert re.search(rf"{white}.*{black}", result) -def test_payout_ego(): - assert payout_ego(BLACK) == "$4.00" +@pytest.mark.parametrize( + "condition, expected", + [ + (lazy_fixture("base_condition"), "$4.00"), + (lazy_fixture("with_labels_reversed"), "$0.00"), + ], +) +def test_payout_ego(condition: Participant, expected: str): + assert payout_ego(condition, BLACK) == expected -def test_payout_allo(): - assert payout_allo(BLACK) == "$0.00" +@pytest.mark.parametrize( + "condition, expected", + [ + (lazy_fixture("base_condition"), "$0.00"), + (lazy_fixture("with_labels_reversed"), "$4.00"), + ], +) +def test_payout_allo(condition: Participant, expected: str): + assert payout_allo(condition, BLACK) == expected diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py index daf7d7a..b3abcd4 100644 --- a/tests/test_oneshot.py +++ b/tests/test_oneshot.py @@ -97,7 +97,7 @@ def test_compute_scores(base_condition: Participant): mock_choice = Mock(spec=Choice) result = compute_scores( conversation=[user_message("prompt"), user_message("answer")], - payoffs=lambda _: mock_payoff, + payoffs=lambda __condition, __choice__: mock_payoff, extract_choice=lambda __condition__, __completion__: mock_choice, participant_condition=base_condition, )