From fd2a653970801b2bd7bb1e17559b95e420d762f0 Mon Sep 17 00:00:00 2001 From: Manuel Sanchez Date: Sun, 21 Apr 2024 22:50:22 +0200 Subject: [PATCH] Update VideoAnalyzer --- infra/main.tf | 8 +++++ infra/modules/cv/main.tf | 7 +++++ infra/modules/cv/outputs.tf | 0 infra/modules/cv/variables.tf | 3 ++ infra/modules/st/main.tf | 6 ++++ infra/variables.tf | 4 +++ .../Controllers/VideoAnalyzerController.cs | 29 +++++++++++-------- .../Views/VideoAnalyzer/VideoAnalyzer.cshtml | 4 ++- src/AIHub/appsettings.template.json | 4 +-- src/AIHub/wwwroot/js/ui/videoanalyzer.js | 24 ++++++++++----- 10 files changed, 67 insertions(+), 22 deletions(-) create mode 100644 infra/modules/cv/main.tf create mode 100644 infra/modules/cv/outputs.tf create mode 100644 infra/modules/cv/variables.tf diff --git a/infra/main.tf b/infra/main.tf index c39e4f1..9c40d05 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -24,6 +24,7 @@ locals { ca_prep_docs_name = "${var.ca_prep_docs_name}${local.name_sufix}" ca_aihub_name = "${var.ca_aihub_name}${local.name_sufix}" func_name = "plugin${local.sufix}" + cv_name = "${var.cv_name}${local.name_sufix}" } resource "azurerm_resource_group" "rg" { @@ -245,3 +246,10 @@ module "plugin" { openai_model = module.openai.gpt_deployment_name openai_endpoint = module.openai.openai_endpoint } + +module "cv" { + source = "./modules/cv" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + cv_name = local.cv_name +} diff --git a/infra/modules/cv/main.tf b/infra/modules/cv/main.tf new file mode 100644 index 0000000..a4d3d94 --- /dev/null +++ b/infra/modules/cv/main.tf @@ -0,0 +1,7 @@ +resource "azurerm_cognitive_account" "cv" { + name = var.cv_name + location = var.location + resource_group_name = var.resource_group_name + kind = "Face" + sku_name = "S0" +} \ No newline at end of file diff --git a/infra/modules/cv/outputs.tf b/infra/modules/cv/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/infra/modules/cv/variables.tf b/infra/modules/cv/variables.tf new file mode 100644 index 0000000..3fdf9dc --- /dev/null +++ b/infra/modules/cv/variables.tf @@ -0,0 +1,3 @@ +variable "resource_group_name" {} +variable "location" {} +variable "cv_name" {} \ No newline at end of file diff --git a/infra/modules/st/main.tf b/infra/modules/st/main.tf index 0f69b88..9ce2d0b 100644 --- a/infra/modules/st/main.tf +++ b/infra/modules/st/main.tf @@ -41,6 +41,12 @@ resource "azurerm_storage_container" "image-analyzer" { storage_account_name = azurerm_storage_account.sa.name } +resource "azurerm_storage_container" "image-analyzer" { + name = "video-analyzer" + container_access_type = "private" + storage_account_name = azurerm_storage_account.sa.name +} + resource "azurerm_storage_container" "image-moderator" { name = "image-moderator" container_access_type = "private" diff --git a/infra/variables.tf b/infra/variables.tf index 563df2f..fdd3034 100644 --- a/infra/variables.tf +++ b/infra/variables.tf @@ -86,6 +86,10 @@ variable "ca_aihub_name" { default = "ca-aihub" } +variable "cv_name" { + default = "cv-aihub" +} + variable "ca_chat_image" { default = "ghcr.io/azure/activate-genai/aihub-chat:1.0.0-preview.0" } diff --git a/src/AIHub/Controllers/VideoAnalyzerController.cs b/src/AIHub/Controllers/VideoAnalyzerController.cs index 75855e9..25c0622 100644 --- a/src/AIHub/Controllers/VideoAnalyzerController.cs +++ b/src/AIHub/Controllers/VideoAnalyzerController.cs @@ -26,6 +26,8 @@ public class VideoAnalyzerController : Controller private VideoAnalyzerModel model; private HttpClient httpClient; + + public VideoAnalyzerController(IConfiguration config, IHttpClientFactory clientFactory) { AOAIendpoint = config.GetValue("VideoAnalyzer:OpenAIEndpoint") ?? throw new ArgumentNullException("OpenAIEndpoint"); @@ -117,9 +119,6 @@ public async Task DenseCaptionVideo(string video_url, string prom string VISION_API_ENDPOINT = $"{Visionendpoint}computervision"; string VISION_API_KEY = VisionKey; string VIDEO_INDEX_NAME = Regex.Replace(video_url.Split("/").Last().Split(".").First().GetHashCode().ToString(), "[^a-zA-Z0-9]", ""); - - - string VIDEO_FILE_SAS_URL = video_url + sasUri.Query; // Step 1: Create an Index @@ -149,6 +148,7 @@ public async Task DenseCaptionVideo(string video_url, string prom } var payload = new { + model = "gpt-4-vision-preview", dataSources = new[] { new @@ -157,8 +157,8 @@ public async Task DenseCaptionVideo(string video_url, string prom parameters = new { computerVisionBaseUrl = VISION_API_ENDPOINT, - computerVisionApiKey = VisionKey, - indexName = Regex.Replace(video_url.Split("/").Last().Split(".").First().GetHashCode().ToString(), "[^a-zA-Z0-9]", ""), + computerVisionApiKey = VISION_API_KEY, + indexName = VIDEO_INDEX_NAME, videoUrls = new[] { VIDEO_FILE_SAS_URL } } } @@ -180,26 +180,31 @@ public async Task DenseCaptionVideo(string video_url, string prom role = "user", content = new object[] { - new acvDocumentIdWrapper() {AcvDocumentId = VIDEO_DOCUMENT_ID}, - prompt + new { + type = "acv_document_id", + acv_document_id = VIDEO_DOCUMENT_ID + }, + new { + type = "text", + text = prompt + } }, } }, temperature = 0.7, top_p = 0.95, - max_tokens = 800 + max_tokens = 4096 }; try { var chatResponse = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json")); chatResponse.EnsureSuccessStatusCode(); - var responseContent = JsonSerializer.Deserialize(await chatResponse.Content.ReadAsStringAsync()); + var responseContent = JsonSerializer.Deserialize(await chatResponse.Content.ReadAsStringAsync()); Console.WriteLine(responseContent); - - model.Message = responseContent; //responseContent!.choices[0].message.content; + + model.Message = responseContent?["choices"]?[0]?["message"]?["content"]?.ToString(); model.Video = VIDEO_FILE_SAS_URL; - } catch { diff --git a/src/AIHub/Views/VideoAnalyzer/VideoAnalyzer.cshtml b/src/AIHub/Views/VideoAnalyzer/VideoAnalyzer.cshtml index b53e614..80a5f05 100644 --- a/src/AIHub/Views/VideoAnalyzer/VideoAnalyzer.cshtml +++ b/src/AIHub/Views/VideoAnalyzer/VideoAnalyzer.cshtml @@ -102,7 +102,9 @@
- +
diff --git a/src/AIHub/appsettings.template.json b/src/AIHub/appsettings.template.json index 24657a0..8d6c353 100644 --- a/src/AIHub/appsettings.template.json +++ b/src/AIHub/appsettings.template.json @@ -37,9 +37,9 @@ "VideoAnalyzer": { "OpenAIEndpoint": "", "GPT4Vision": "openai/deployments//extensions/chat/completions?api-version=2023-07-01-preview", - "VisionAPI": "https://.cognitiveservices.azure.com/", - "VisionKey": "", "OpenAISubscriptionKey": "", + "VisionEndpoint": "https://.cognitiveservices.azure.com/", + "VisionSubscriptionKey": "", "ContainerName": "video-analyzer", "DeploymentName": "gpt4-vision" }, diff --git a/src/AIHub/wwwroot/js/ui/videoanalyzer.js b/src/AIHub/wwwroot/js/ui/videoanalyzer.js index 54a16c6..fb0ef33 100644 --- a/src/AIHub/wwwroot/js/ui/videoanalyzer.js +++ b/src/AIHub/wwwroot/js/ui/videoanalyzer.js @@ -1,17 +1,17 @@ -Dropzone.options.dropaiimage = { - paramName: "videoFile", +Dropzone.options.dropaiimage = { + paramName: "videoFile", maxFilesize: 200, // MB createImageThumbnails:true, previewsContainer: "#file-previews", accept: function(file, done) { done(); }, - init: function() { + init: function() { var myDropzone = this; - myDropzone.on("sending", function(file) { + myDropzone.on("sending", function(file) { $("#loader").removeClass("d-none"); }); - myDropzone.on("complete", + myDropzone.on("complete", function(file) { $("#loader").addClass("d-none"); }); @@ -19,12 +19,22 @@ Dropzone.options.dropaiimage = { success: function (file, response) { if (typeof response === "object") { $("#show-message-result").text(response.message); - $("#bigpic").attr('src', response.video); + + var isrc = document.createElement("source"); + isrc.setAttribute("src", response.video); + isrc.setAttribute("type", "video/mp4"); + document.getElementById("bigpic").appendChild(isrc); + } else { try { var parsedResponse = JSON.parse(response); $("#show-message-result").val(parsedResponse.message); - $("#bigpic").attr('src',parsedResponse.video); + + var isrc = document.createElement("source"); + isrc.setAttribute("src", parsedResponse.video); + isrc.setAttribute("type", "video/mp4"); + document.getElementById("bigpic").appendChild(isrc); + } catch (e) { console.error("Error parsing the response:", e); }