Skip to content

Commit

Permalink
Merge pull request #87 from aiverify-foundation/dev_main
Browse files Browse the repository at this point in the history
[Sprint 21] Fix
  • Loading branch information
imda-jacksonboey authored Jan 16, 2025
2 parents 5175e8d + aebef94 commit 1a5f589
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 23 deletions.
67 changes: 54 additions & 13 deletions app/benchmarking/report/components/rawScoresTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,46 @@ export function RawRecipeMetricsScoresTable({
</thead>
<tbody ref={tbodyRef}>
{resultPromptData.map((promptData, idx) => {
let stringifiedMetrics = '';
try {
stringifiedMetrics = JSON.stringify(
promptData.metrics,
null,
2
let stringifiedMetrics: string | React.ReactNode = '';
let tooLong = false;
for (const metric of promptData.metrics) {
if ('grading_criteria' in metric) {
for (const [_, value] of Object.entries(metric)) {
if (
typeof value === 'object' &&
'individual_scores' in value
) {
if (
(value.individual_scores.successful &&
value.individual_scores.successful.length > 20) ||
(value.individual_scores.unsuccessful &&
value.individual_scores.unsuccessful.length > 20)
) {
tooLong = true;
break;
}
}
}
}
}
if (tooLong) {
stringifiedMetrics = (
<>
Metric result is too long to display. <br />
Click on &quot;Download Details Scoring JSON&quot; <br />
below to view raw scores.
</>
);
} catch (error) {
console.log(error);
} else {
try {
stringifiedMetrics = JSON.stringify(
promptData.metrics,
null,
2
);
} catch (error) {
console.log(error);
}
}
return (
<tr
Expand All @@ -69,22 +100,32 @@ export function RawRecipeMetricsScoresTable({
idx > 0 && idx % rowCount === 0 ? 'break-before-page' : ''
}>
<td className="py-3 px-6 border-r border-moongray-700 align-top">
{promptData.dataset_id}
<div className="break-all max-w-[100px]">
{promptData.dataset_id}
</div>
</td>
<td className="py-3 px-6 border-r border-moongray-700 align-top">
{promptData.prompt_template_id}
</td>
<td className="py-3 px-6 border-r border-moongray-700 align-top">
<td className="py-3 px-6 border-r border-moongray-700 align-top ">
{recipe.metrics.map((metricName, idx) => {
const name =
idx < promptData.metrics.length - 1
? `${metricName}, `
: metricName;
return <span key={metricName}>{name}</span>;
return (
<span
key={metricName}
className="break-all max-w-[100px]">
{name}
</span>
);
})}
</td>
<td className="py-3 px-6">
<pre>{stringifiedMetrics}</pre>
<td className="py-3 px-6 truncate max-w-[450px]">
<pre className="whitespace-pre-wrap break-words">
{stringifiedMetrics}
</pre>
</td>
</tr>
);
Expand Down
33 changes: 23 additions & 10 deletions app/benchmarking/report/types/benchmarkReportTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,30 @@ type RecipePromptData = {
duration: number;
};

type Metric = {
type MetricPromptAndScore = {
prompt: string;
predicted_value: string;
target: string;
};

type IndividualScore = {
individual_scores: {
successful?: MetricPromptAndScore[];
unsuccessful?: MetricPromptAndScore[];
};
};

type GradingCriteria = {
accuracy?: number;
attack_success_rate?: number;
toxicity_rate?: number;
refusal_rate?: number;
safe?: number;
unsafe?: number;
unknown?: number;
grading_criteria: {
accuracy?: number;
attack_success_rate?: number;
toxicity_rate?: number;
refusal_rate?: number;
};
};

type Metric = {
grading_criteria: GradingCriteria;
} & {
[key: string]: IndividualScore | number | GradingCriteria;
};

type RougeScore = {
Expand Down Expand Up @@ -111,4 +123,5 @@ export type {
GradingScale,
GradingColors,
CookbookCategoryLabels,
IndividualScore,
};

0 comments on commit 1a5f589

Please sign in to comment.