feat: add upload receipt image to fill up and proceed transactions (#91)

uigywnkiub · Jan 1, 2025 · 457fda3 · 457fda3
1 parent b2cb830
commit 457fda3
Show file tree

Hide file tree

Showing 7 changed files with 413 additions and 146 deletions.
diff --git a/app/lib/actions.ts b/app/lib/actions.ts
@@ -17,7 +17,11 @@ import { ROUTE } from '@/config/constants/routes'
 
 import TransactionModel from '@/app/lib/models/transaction.model'
 
-import { CompletionAIModel, ExpenseTipsAIModel } from './ai'
+import {
+  CompletionAIModel,
+  ExpenseTipsAIModel,
+  UploadReceiptAIModel,
+} from './ai'
 import {
   capitalizeFirstLetter,
   getCategoryItemNames,
@@ -689,3 +693,33 @@ export async function getExpenseTipsAI(categories: string[]): Promise<string> {
   }
 }
 export const getCachedExpenseTipsAI = cache(getExpenseTipsAI)
+
+export async function getAnalyzedReceiptAI(file: Blob): Promise<string> {
+  if (!file) {
+    throw new Error('File blob is required.')
+  }
+
+  const prompt =
+    'Analyze the provided image of a receipt and return valid information about each item on the receipt. Stop analyzing before the general summary amount. If this is not the receipt image, return an empty array.'
+
+  try {
+    const imageParts = [
+      {
+        inlineData: {
+          data: Buffer.from(await file.arrayBuffer()).toString('base64'),
+          mimeType: file.type,
+        },
+      },
+    ]
+
+    const content = await UploadReceiptAIModel.generateContent([
+      prompt,
+      ...imageParts,
+    ])
+    const text = content.response.text().trim()
+
+    return text
+  } catch (err) {
+    throw err
+  }
+}
diff --git a/app/lib/ai.ts b/app/lib/ai.ts
@@ -51,7 +51,6 @@ export const CompletionAIModel = genAI.getGenerativeModel({
   model: process.env.GEMINI_MODEL,
   // Docs https://ai.google.dev/api/generate-content#v1beta.GenerationConfig
   generationConfig: {
-    candidateCount: 1,
     stopSequences: ['\n'],
     // A token is equivalent to about 4 characters for Gemini models. 100 tokens are about 60-80 English words.
     maxOutputTokens: 10,
@@ -63,7 +62,6 @@ export const CompletionAIModel = genAI.getGenerativeModel({
 export const ExpenseTipsAIModel = genAI.getGenerativeModel({
   model: process.env.GEMINI_RICHER_MODEL,
   generationConfig: {
-    candidateCount: 1,
     temperature: 2,
     // Docs https://ai.google.dev/gemini-api/docs/json-mode?lang=node
     responseMimeType: 'application/json',
@@ -89,3 +87,27 @@ export const ExpenseTipsAIModel = genAI.getGenerativeModel({
   },
   safetySettings,
 })
+
+export const UploadReceiptAIModel = genAI.getGenerativeModel({
+  model: process.env.GEMINI_RICHER_MODEL,
+  generationConfig: {
+    // Docs https://ai.google.dev/gemini-api/docs/json-mode?lang=node
+    responseMimeType: 'application/json',
+    responseSchema: {
+      type: SchemaType.ARRAY,
+      items: {
+        type: SchemaType.OBJECT,
+        properties: {
+          description: {
+            type: SchemaType.STRING,
+          },
+          amount: {
+            type: SchemaType.NUMBER,
+          },
+        },
+        required: ['description', 'amount'],
+      },
+    },
+  },
+  safetySettings,
+})
diff --git a/app/lib/types.ts b/app/lib/types.ts
@@ -186,3 +186,8 @@ export type TExpenseAdvice = {
   tip: string
   savings: string
 }
+
+export type TReceipt = {
+  description: TTransaction['description']
+  amount: number
+}