Skip to content

Commit

Permalink
Only doing LLM evaluation on phase 2 and sandbox (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
gsproston-scottlogic authored Aug 15, 2023
1 parent e892bec commit d902b9a
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions backend/src/openai.js
Original file line number Diff line number Diff line change
Expand Up @@ -234,18 +234,22 @@ async function chatGptSendMessage(message, session, currentPhase) {
let wonPhase = false;

// evaluate the message for prompt injection
const evalPrompt = await queryPromptEvaluationModel(message);
if (evalPrompt.isMalicious) {
defenceInfo.triggeredDefences.push("LLM_EVALUATION");
if (isDefenceActive("LLM_EVALUATION", session.defences)) {
console.debug("LLM evalutation defence active.");
defenceInfo.blocked = true;
const evalResponse =
"Message blocked by the malicious prompt evaluator." +
evalPrompt.reason;
return { reply: evalResponse, defenceInfo: defenceInfo };
// to speed up replies, only do this on phases where defences are active
if (currentPhase >= 2) {
const evalPrompt = await queryPromptEvaluationModel(message);
if (evalPrompt.isMalicious) {
defenceInfo.triggeredDefences.push("LLM_EVALUATION");
if (isDefenceActive("LLM_EVALUATION", session.defences)) {
console.debug("LLM evalutation defence active.");
defenceInfo.blocked = true;
const evalResponse =
"Message blocked by the malicious prompt evaluator." +
evalPrompt.reason;
return { reply: evalResponse, defenceInfo: defenceInfo };
}
}
}

// add user message to chat
session.chatHistory.push({ role: "user", content: message });

Expand Down

0 comments on commit d902b9a

Please sign in to comment.