From 09827b997dfff3843a0596769943fa1bbc94303f Mon Sep 17 00:00:00 2001
From: imda-amdlahir <aminurrashid_mohamed_lahir@imda.gov.sg>
Date: Tue, 14 Jan 2025 12:27:27 +0800
Subject: [PATCH] fix report raw metrics rendering

---
 .../report/components/rawScoresTable.tsx      | 67 +++++++++++++++----
 .../report/types/benchmarkReportTypes.ts      | 33 ++++++---
 2 files changed, 77 insertions(+), 23 deletions(-)
diff --git a/app/benchmarking/report/components/rawScoresTable.tsx b/app/benchmarking/report/components/rawScoresTable.tsx
index e54dd252..57f2ed7b 100644
--- a/app/benchmarking/report/components/rawScoresTable.tsx
+++ b/app/benchmarking/report/components/rawScoresTable.tsx
@@ -52,15 +52,46 @@ export function RawRecipeMetricsScoresTable({
           </thead>
           <tbody ref={tbodyRef}>
             {resultPromptData.map((promptData, idx) => {
-              let stringifiedMetrics = '';
-              try {
-                stringifiedMetrics = JSON.stringify(
-                  promptData.metrics,
-                  null,
-                  2
+              let stringifiedMetrics: string | React.ReactNode = '';
+              let tooLong = false;
+              for (const metric of promptData.metrics) {
+                if ('grading_criteria' in metric) {
+                  for (const [_, value] of Object.entries(metric)) {
+                    if (
+                      typeof value === 'object' &&
+                      'individual_scores' in value
+                    ) {
+                      if (
+                        (value.individual_scores.successful &&
+                          value.individual_scores.successful.length > 20) ||
+                        (value.individual_scores.unsuccessful &&
+                          value.individual_scores.unsuccessful.length > 20)
+                      ) {
+                        tooLong = true;
+                        break;
+                      }
+                    }
+                  }
+                }
+              }
+              if (tooLong) {
+                stringifiedMetrics = (
+                  <>
+                    Metric result is too long to display. <br />
+                    Click on &quot;Download Details Scoring JSON&quot; <br />
+                    below to view raw scores.
+                  </>
                 );
-              } catch (error) {
-                console.log(error);
+              } else {
+                try {
+                  stringifiedMetrics = JSON.stringify(
+                    promptData.metrics,
+                    null,
+                    2
+                  );
+                } catch (error) {
+                  console.log(error);
+                }
               }
               return (
                 <tr
@@ -69,22 +100,32 @@ export function RawRecipeMetricsScoresTable({
                     idx > 0 && idx % rowCount === 0 ? 'break-before-page' : ''
                   }>
                   <td className="py-3 px-6 border-r border-moongray-700 align-top">
-                    {promptData.dataset_id}
+                    <div className="break-all max-w-[100px]">
+                      {promptData.dataset_id}
+                    </div>
                   </td>
                   <td className="py-3 px-6 border-r border-moongray-700 align-top">
                     {promptData.prompt_template_id}
                   </td>
-                  <td className="py-3 px-6 border-r border-moongray-700 align-top">
+                  <td className="py-3 px-6 border-r border-moongray-700 align-top ">
                     {recipe.metrics.map((metricName, idx) => {
                       const name =
                         idx < promptData.metrics.length - 1
                           ? `${metricName}, `
                           : metricName;
-                      return <span key={metricName}>{name}</span>;
+                      return (
+                        <span
+                          key={metricName}
+                          className="break-all max-w-[100px]">
+                          {name}
+                        </span>
+                      );
                     })}
                   </td>
-                  <td className="py-3 px-6">
-                    <pre>{stringifiedMetrics}</pre>
+                  <td className="py-3 px-6 truncate max-w-[450px]">
+                    <pre className="whitespace-pre-wrap break-words">
+                      {stringifiedMetrics}
+                    </pre>
                   </td>
                 </tr>
               );
diff --git a/app/benchmarking/report/types/benchmarkReportTypes.ts b/app/benchmarking/report/types/benchmarkReportTypes.ts
index e0344ec1..c295c5de 100644
--- a/app/benchmarking/report/types/benchmarkReportTypes.ts
+++ b/app/benchmarking/report/types/benchmarkReportTypes.ts
@@ -49,18 +49,30 @@ type RecipePromptData = {
   duration: number;
 };
 
-type Metric = {
+type MetricPromptAndScore = {
+  prompt: string;
+  predicted_value: string;
+  target: string;
+};
+
+type IndividualScore = {
+  individual_scores: {
+    successful?: MetricPromptAndScore[];
+    unsuccessful?: MetricPromptAndScore[];
+  };
+};
+
+type GradingCriteria = {
   accuracy?: number;
+  attack_success_rate?: number;
+  toxicity_rate?: number;
   refusal_rate?: number;
-  safe?: number;
-  unsafe?: number;
-  unknown?: number;
-  grading_criteria: {
-    accuracy?: number;
-    attack_success_rate?: number;
-    toxicity_rate?: number;
-    refusal_rate?: number;
-  };
+};
+
+type Metric = {
+  grading_criteria: GradingCriteria;
+} & {
+  [key: string]: IndividualScore | number | GradingCriteria;
 };
 
 type RougeScore = {
@@ -111,4 +123,5 @@ export type {
   GradingScale,
   GradingColors,
   CookbookCategoryLabels,
+  IndividualScore,
 };