diff --git a/eventbridge-bedrock-s3-aoss/.gitignore b/eventbridge-bedrock-s3-aoss/.gitignore new file mode 100644 index 000000000..37833f8be --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/.gitignore @@ -0,0 +1,10 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.venv +*.egg-info + +# CDK asset staging directory +.cdk.staging +cdk.out diff --git a/eventbridge-bedrock-s3-aoss/README.md b/eventbridge-bedrock-s3-aoss/README.md new file mode 100644 index 000000000..d161132fc --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/README.md @@ -0,0 +1,196 @@ +# Automatically sync your data with your Amazon Bedrock knowledge base using Amazon EventBridge Scheduler +![architecture](architecture/architecture.png) + +This pattern demonstrates an approach to automatically sync datasource associated with [Amazon Bedrock Knowledge Bases](https://aws.amazon.com/bedrock/knowledge-bases/). Knowledge Bases help you take advantage of [Retrieval Augmented Generation](https://aws.amazon.com/what-is/retrieval-augmented-generation/) (RAG), a popular technique that involves drawing information from a data store to augment the responses generated by Large Language Models (LLMs). When you set up a knowledge base with your data sources, your application can query the knowledge base to return information to answer the query either with direct quotations from sources or with natural responses generated from the query results. + +After you create your knowledge base, you ingest your data source/sources into your knowledge base so that they're indexed and are able to be queried. Additionally each time you add, modify, or remove files from your data source, you must sync the data source so that it is re-indexed to the knowledge base. Syncing is incremental, so Bedrock only processes added, modified, or deleted documents since the last sync. + +At the time of writing, knowledge bases doesn't have a native feature to periodically sync the datasource associated with a Knowledge Base. So customers who need to refresh their datasources periodically to ensure their knowledge base is up-to-date have to rely on bespoke solution. This pattern shows one way of implementing the solution, using [Amazon EventBridge Scheduler](https://docs.aws.amazon.com/scheduler/latest/UserGuide/what-is-scheduler.html). + +EventBridge Scheduler simplifies scheduling tasks by providing a centralized, serverless service that reliably executes schedules and invokes targets across various AWS services. In this particular pattern, we configure an EventBridge schedule that runs periodically (using a schedule expression). As part of the EventBridge schedule creation, we configure a target. A target is an API operation that EventBridge Scheduler invokes on your behalf whenever the schedule runs. In our case the target API would be the [`StartIngestionJob`](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent_StartIngestionJob.html) operation on the Bedrock Agents API. + +Learn more about this pattern at Serverless Land Patterns: https://serverlessland.com/patterns/eventbridge-bedrock-s3-aoss + +> [!Important] +>This application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the [AWS Pricing page](https://aws.amazon.com/pricing/) for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. + +## Requirements + +* [Create an AWS account](https://portal.aws.amazon.com/gp/aws/developer/registration/index.html) if you do not already have one and log in. The IAM user that you use must have sufficient permissions to make necessary AWS service calls and manage AWS resources. +* [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) installed and configured +* [Git Installed](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) +* [Node and NPM](https://nodejs.org/en/download/) installed +* [AWS Cloud Development Kit](https://docs.aws.amazon.com/cdk/latest/guide/cli.html) (AWS CDK) installed + +> [!Important] +> This pattern uses Knowledge Bases and the Titan Text Embeddings V2 model. See [Supported regions and models for Amazon Bedrock knowledge bases](https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-supported.html) to select a region where knowledge bases is supported + +## Enable Model Access in Bedrock console + Knowledge bases use a foundation model to embed your data sources in a vector store. Before creating a knowledge base and selecting an embeddings model for the Knowledge Base, You must request access to the model. If you try to use the model (with the API or console) before you have requested access to it, you receive an error message. For more information, see [Model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html). + +1. In the AWS console, select the region from which you want to access Bedrock. + + ![Region Selection](images/region-selection.png) + +2. Find **Amazon Bedrock** by searching in the AWS console. + + ![Bedrock Search](images/bedrock-search.png) + +3. Expand the side menu. + + ![Bedrock Expand Menu](images/bedrock-menu-expand.png) + +4. From the side menu, select **Model access**. + + ![Model Access](images/model-access-link.png) + +5. Depending on your view, Select the **Enable specific models** button or the **Modify Model Access** button + + ![Model Access View](images/model-access-view.png) + + +6. Use the checkboxes to select the models you wish to enable. Review the applicable EULAs as needed. Click **Next** to go to the Review screen and then **Submit** to enable the required models in your account. For this pattern, by default, we would only need Titan Text Embeddings V2 / model id: _amazon.titan-embed-text-v2:0_. + +## Deployment Instructions + +1. Create a new directory, navigate to that directory in a terminal and clone the GitHub repository: + ``` + git clone https://github.com/aws-samples/serverless-patterns + ``` +2. Change directory to the pattern directory: + ``` + cd serverless-patterns/eventbridge-bedrock-s3-aoss + ``` +3. Create virtual environment for Python + ``` + python3 -m venv .venv + ``` +4. Activate the virtualenv like this: + + ``` + source .venv/bin/activate + ``` +5. Install the Python required dependencies: + ``` + pip install -r requirements.txt + ``` +6. Install dependencies to be used in Lambda Layer + + ``` + pip install --target layers/python -r layers/requirements.txt + ``` + +7. Run the command below to bootstrap your account. CDK needs it to deploy + ``` + cdk bootstrap + ``` +8. see the list of the IDs of the stacks in the AWS CDK application: + ``` + cdk list + ``` + +9. Review the CloudFormation template CDK generates for the included stacks using the following AWS CDK CLI command: + + +> [!NOTE] +> Substitute the stack_id with one from the list in output from the `cdk list` command + ``` + cdk synth + ``` + +10. From the command line, use AWS CDK to deploy the AWS resources. + + ``` + cdk deploy --all + ``` + Enter `y` if prompted `Do you wish to deploy these changes (y/n)?` + +> [!NOTE] +> You can optionally change the `collection_name`, `index_name`, `knowledge_base_name`, `kb_s3_datasource_name` +parameters in the `cdk.context.json`. The parameters are used to name the OpenSearch Serverless collection, index, the knowledge base and the associated S3 data source, respectively. + +## How it works +Upon deployment, the CDK stack will create a Bedrock Knowledge Base configured with S3 Bucket as data source and an OpenSearch Serverless collection to store vector data. A data source repository contains files or content with information that can be retrieved when your knowledge base is queried. The stack also include an EventBridge scheduler that is configured to run every 5 mins and invoke the `StartIngestionJob` operation on Bedrock Agents API. Bedrock supports a monitoring system to help you understand the execution of any data ingestion jobs. The Stack would create the neccessary CloudWatch log groups and CloudWatch delivery. You can gain visibility into the ingestion of your knowledge base resources with this logging system. Additionally, Bedrock is integrated with AWS CloudTrail, a service that provides a record of actions taken by a user, role, or an AWS service in Bedrock. CloudTrail captures all API calls for Bedrock as events. + + +## Testing + +### Verify Event Scheduler is ENABLED +The EventScheduler should be enabled by default when the stack creation is complete. You can verify this by running the below command. The expected output of the command is the text `ENABLED`. This means that the scheduler is enabled and is ready to run at the next schedule time. + +``` +aws scheduler get-schedule --name BedrockKBDataSourceSyncSchedule --group BedrockKBSyncScheduleGroup --query 'State' --output text +``` +### Upload Document(s) to S3 Bucket +Upload a sample pdf document to S3 bucket that is configured as the KB Datasource. You can provide your own or use one of the pdfs provided in ```examples``` folder. You can find the bucketname in the Outputs section of the CDK command output of the BedrockKBStack +> [!NOTE] +> Substitute the value from `BedrockKBStack.bucketname` found in the Outputs section of the `cdk deploy` command output of the `BedrockKBStack` + +``` +aws s3 cp examples/2022-Shareholder-Letter.pdf s3:// +``` + + +> [!Important] +> Wait for for the next scheduled run before running the below commands. By default, this stack configures a scheduler to run every 5 minutes. You can find the scheduler rate by running the below command. The expected output is `rate(5 minutes)` +``` +aws scheduler get-schedule --name BedrockKBDataSourceSyncSchedule --group BedrockKBSyncScheduleGroup --query 'ScheduleExpression' --output text +``` + +### View CloudTrail log for StartIngestionJob +1. In the CloudTrail console, click on Event history. Event history provides a viewable, searchable, downloadable, and immutable record of the past 90 days of management events. +![CloudTrail Event History](images/cloudtrail-eventhistory.png) + +2. Filter using the Event Name as StartIngestionJob as well as by date and time (for example, Last 20 minutes) +![StartIngestionJob Event](images/startingestionjob-event.png) + +3. In the Event Record, notice that the `sessionContext.sessionIssuer.userName` mentions `EventBridgeSchedulerRole` which is the role that was created by the CDK stack, and assigned to the EventBridge Schedule. Also the `userAgent` indicates `AmazonEventBridgeScheduler` as the agent through which the request was made. + +### Tail the CloudWatch Logs to look for Sync Events +The CDK creates resources to enable logging for a knowledge base using the CloudWatch contructs. +See [Knowledge bases logging](https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-bases-logging.html) for more information. +The following command tails the CloudWatch log to view KnowledgeBase events as they are logged. + +> [!NOTE] +> Substitute the `BedrockKBStack.knowledgebaseid` found in the CDK Output section of the `cdk deploy` command output of the `BedrockKBStack` + +``` +aws logs tail --follow --since 20m BedrockKnowledgeBase-`` +``` + +The command should output cloudwatch log entries, for the various stages of the ingestion process (such as INGESTION_JOB_STARTED, CRAWLING_COMPLETED, EMBEDDING_STARTED and so on). The final log statement for a given ingestion job id should be the entry to indicate the COMPLETED status of the job as in the screenshot below. The log entry also outputs the resource stats include the number of documents ingested to the Knowledge Base. + +Sample Output + +![cloudwatch-log](images/cloudwatch-log.png) + +### View Ingestion Job timestamp and status +You can also use the following command to check the status of ingestion job(s). The command outputs the most recent ingestion job. + +> [!NOTE] +> Substitute the BedrockKBStack.knowledgebaseid and BedrockKBStack.datasourceid found in the Output section of the `cdk deploy` command output of the `BedrockKBStack` + +``` +aws bedrock-agent list-ingestion-jobs --knowledge-base-id --data-source-id --query 'reverse(sort_by(ingestionJobSummaries,&startedAt))[:1].{startedAt:startedAt, updatedAt:updatedAt,ingestionJobId:ingestionJobId,status:status}' +``` +Sample Output + +![list-ingestion-jobs-output](images/list-ingestion-jobs-output.png) + +## Cleanup + +1. Run below script in the `eventbridge-bedrock-s3-aoss` directory to delete AWS resources created by this sample stack. + ```bash + cdk destroy --all + ``` + +## Extra Resources +* [Bedrock Api Reference](https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html) +* [Sync to ingest your data sources into the knowledge base](https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-ingest.html) +* [What is Amazon EventBridge Scheduler?](https://docs.aws.amazon.com/scheduler/latest/UserGuide/what-is-scheduler.html) +* [Using universal targets with EventBridge Scheduler](https://docs.aws.amazon.com/scheduler/latest/UserGuide/managing-targets-universal.html) + +---- +Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +SPDX-License-Identifier: MIT-0 \ No newline at end of file diff --git a/eventbridge-bedrock-s3-aoss/app.py b/eventbridge-bedrock-s3-aoss/app.py new file mode 100644 index 000000000..a9bced6f3 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/app.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +import os +import aws_cdk as cdk +from stacks.bedrock_knowledgebase_stack import BedrockKnowledgebaseStack +from stacks.opensearch_serverless_stack import OpenSearchServerlessStack +from stacks.ingestion_job_resources_stack import IngestionJobResourcesStack +from stacks.bedrock_service_role_stack import BedrockServiceRoleStack + + +app = cdk.App() + +bedrock_sr_ap_stack = BedrockServiceRoleStack(app, + "BedrockServiceRoleStack", +) + +opensearch_serverless_stack = OpenSearchServerlessStack(app, "AOSSStack", + bedrock_kb_service_role_arn = bedrock_sr_ap_stack.bedrock_kb_service_role_arn +) + +bedrock_kb_stack = BedrockKnowledgebaseStack(app, + "BedrockKBStack", + cfn_aoss_collection_arn = opensearch_serverless_stack.cfn_aoss_collection_arn, + index_name = opensearch_serverless_stack.index_name, + bedrock_kb_service_role_arn = bedrock_sr_ap_stack.bedrock_kb_service_role_arn +) +ingestion_job_resources_stack = IngestionJobResourcesStack(app, + "SchedulerStack", + knowledge_base_id=bedrock_kb_stack.knowledge_base_id, + data_source_id=bedrock_kb_stack.knowledgebase_datasource_id +) + +app.synth() diff --git a/eventbridge-bedrock-s3-aoss/architecture/architecture.png b/eventbridge-bedrock-s3-aoss/architecture/architecture.png new file mode 100644 index 000000000..2da270d91 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/architecture/architecture.png differ diff --git a/eventbridge-bedrock-s3-aoss/cdk.context.json b/eventbridge-bedrock-s3-aoss/cdk.context.json new file mode 100644 index 000000000..e85d868fb --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/cdk.context.json @@ -0,0 +1,16 @@ +{ + "opensearch_serverless_params": { + "collection_name": "bedrock-kb", + "index_name": "bedrock-kb-index" + }, + "bedrock_knowledgebase_params": { + "knowledge_base_name": "rag-knowledge-base", + "kb_s3_datasource_name":"kb-s3-datasource", + "embedding_model_id": "amazon.titan-embed-text-v2:0", + "vector_index_metadata_field":"text-metadata", + "vector_index_text_field":"text", + "vector_index_vector_field":"vector", + "kb_cw_log_group_name_prefix":"BedrockKnowledgeBase", + "bedrock_kb_log_delivery_source":"bedrock_kb_log_delivery_source" + } +} \ No newline at end of file diff --git a/eventbridge-bedrock-s3-aoss/cdk.json b/eventbridge-bedrock-s3-aoss/cdk.json new file mode 100644 index 000000000..c9710ade4 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/cdk.json @@ -0,0 +1,52 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "source.bat", + "**/__init__.py", + "python/__pycache__", + "tests" + ] + }, + "context": { + "@aws-cdk/aws-lambda:recognizeLayerVersion": true, + "@aws-cdk/core:checkSecretUsage": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ], + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, + "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, + "@aws-cdk/aws-iam:minimizePolicies": true, + "@aws-cdk/core:validateSnapshotRemovalPolicy": true, + "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, + "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, + "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, + "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, + "@aws-cdk/core:enablePartitionLiterals": true, + "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, + "@aws-cdk/aws-iam:standardizedServicePrincipals": true, + "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, + "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, + "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, + "@aws-cdk/aws-route53-patters:useCertificate": true, + "@aws-cdk/customresources:installLatestAwsSdkDefault": false, + "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, + "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, + "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, + "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, + "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, + "@aws-cdk/aws-redshift:columnId": true, + "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, + "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, + "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, + "@aws-cdk/aws-kms:aliasNameRef": true + } +} diff --git a/eventbridge-bedrock-s3-aoss/eventbridge-bedrock-s3-aoss.json b/eventbridge-bedrock-s3-aoss/eventbridge-bedrock-s3-aoss.json new file mode 100644 index 000000000..2d51cfcbe --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/eventbridge-bedrock-s3-aoss.json @@ -0,0 +1,92 @@ +{ + "title": "Automatic sync of Amazon Bedrock Knowledge Base using EventBridge Scheduler", + "description": "Use Amazon EventBridge Scheduler to trigger an Ingestion Job periodically to keep the Knowledge Bases vector store up-to-date", + "language": "Python", + "level": "200", + "framework": "CDK", + "introBox": { + "headline": "How it works", + "text": [ + "This sample project demonstrates how to use an EventBridge rule to periodically sync a Bedrock Knowledge Base data source to Amazon OpenSearch Serverless.", + "The sample project uses an EventBridge Scheduler that runs on a regular interval. The scheduler has a universal target configured to trigger the StartIngestionJob API on Amazon Bedrock Agent service. The StartIngestionJob runs a job to sync the data source to the Knowledge Base" + ] + }, + "gitHub": { + "template": { + "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/eventbridge-bedrock-s3-aoss", + "templateURL": "serverless-patterns/eventbridge-bedrock-s3-aoss", + "projectFolder": "eventbridge-bedrock-s3-aoss", + "templateFile": "app.py" + } + }, + "resources": { + "bullets": [ + { + "text": "Sync to ingest your data sources into the knowledge base", + "link": "https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-ingest.html" + }, + { + "text": "Knowledge bases for Amazon Bedrock", + "link": "https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base.html" + }, + { + "text": "What is Amazon EventBridge Scheduler?", + "link": "https://docs.aws.amazon.com/scheduler/latest/UserGuide/what-is-scheduler.html" + } + ] + }, + "deploy": { + "text": [ + "cdk deploy --all" + ] + }, + "testing": { + "text": [ + "See the GitHub repo for detailed testing instructions." + ] + }, + "cleanup": { + "text": [ + "Delete the stack: cdk destroy." + ] + }, + "authors": [ + { + "name": "Raja Vaidyanathan", + "image": "https://avatars.githubusercontent.com/u/125882914?v=4", + "bio": "Solutions Architect @ AWS", + "linkedin": "rajav", + "twitter": "rajav" + } + ], + "patternArch": { + "icon1": { + "x": 20, + "y": 50, + "service": "eventbridge-scheduler", + "label": "Amazon EventBridge Scheduler" + }, + "icon2": { + "x": 50, + "y": 50, + "service": "bedrock", + "label": "Amazon Bedrock" + }, + "icon3": { + "x": 80, + "y": 50, + "service": "opensearch", + "label": "Amazon OpenSearch" + }, + "line1": { + "from": "icon1", + "to": "icon2", + "label": "Ingestion job" + }, + "line2": { + "from": "icon2", + "to": "icon3", + "label": "vectors" + } + } +} diff --git a/eventbridge-bedrock-s3-aoss/example-pattern.json b/eventbridge-bedrock-s3-aoss/example-pattern.json new file mode 100644 index 000000000..77b4f1fe2 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/example-pattern.json @@ -0,0 +1,62 @@ +{ + "title": "Automatic sync of Amazon Bedrock Knowledge Base using EventBridge Scheduler", + "description": "Use Amazon EventBridge Scheduler to trigger an Ingestion Job periodically to keep the Knowledge Bases vector store up-to-date", + "language": "Python", + "level": "200", + "framework": "CDK", + "introBox": { + "headline": "How it works", + "text": [ + "This sample project demonstrates how to use an EventBridge rule to periodically sync a Bedrock Knowledge Base data source to Amazon OpenSearch Serverless.", + "The sample project uses an EventBridge Scheduler that runs on a regular interval. The scheduler has a universal target configured to trigger the StartIngestionJob API on Amazon Bedrock Agent service. The StartIngestionJob runs a job to sync the data source to the Knowledge Base" + ] + }, + "gitHub": { + "template": { + "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/eventbridge-bedrock-s3-aoss", + "templateURL": "serverless-patterns/eventbridge-bedrock-s3-aoss", + "projectFolder": "eventbridge-bedrock-s3-aoss", + "templateFile": "app.py" + } + }, + "resources": { + "bullets": [ + { + "text": "Sync to ingest your data sources into the knowledge base", + "link": "https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-ingest.html" + }, + { + "text": "Knowledge bases for Amazon Bedrock", + "link": "https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base.html" + }, + { + "text": "What is Amazon EventBridge Scheduler?", + "link": "https://docs.aws.amazon.com/scheduler/latest/UserGuide/what-is-scheduler.html" + } + ] + }, + "deploy": { + "text": [ + "cdk deploy --all" + ] + }, + "testing": { + "text": [ + "See the GitHub repo for detailed testing instructions." + ] + }, + "cleanup": { + "text": [ + "Delete the stack: cdk destroy." + ] + }, + "authors": [ + { + "name": "Raja Vaidyanathan", + "image": "https://avatars.githubusercontent.com/u/125882914?v=4", + "bio": "Solutions Architect @ AWS", + "linkedin": "rajav", + "twitter": "rajav" + } + ] +} diff --git a/eventbridge-bedrock-s3-aoss/examples/2022-Shareholder-Letter.pdf b/eventbridge-bedrock-s3-aoss/examples/2022-Shareholder-Letter.pdf new file mode 100644 index 000000000..5819ce875 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/examples/2022-Shareholder-Letter.pdf differ diff --git a/eventbridge-bedrock-s3-aoss/functions/index_creation.py b/eventbridge-bedrock-s3-aoss/functions/index_creation.py new file mode 100644 index 000000000..b148ed34c --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/functions/index_creation.py @@ -0,0 +1,177 @@ +import boto3 +from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth +import time + +def on_event(event, context): + """ + This function is the entry point for the Lambda function. + It receives an event and a context object, and based on the request type + in the event, it calls the appropriate function to handle the request. + """ + print(event) + request_type = event['RequestType'] + if request_type == 'Create': return on_create(event) + if request_type == 'Update': return on_update(event) + if request_type == 'Delete': return on_delete(event) + raise Exception("Invalid request type: %s" % request_type) + +def on_create(event): + """ + This function is called when a new resource is being created. + It prints the resource properties, calls the create_or_update_index function + to create or update the index, and returns the response from that function. + """ + props = event["ResourceProperties"] + print("create new resource with props %s" % props) + response = create_or_update_index(event) + print(response) + return { + "Data": { + "response": response + } + } + +def on_update(event): + """ + This function is called when an existing resource is being updated. + It prints the resource properties, calls the create_or_update_index function + to create or update the index, and returns the response from that function. + """ + props = event["ResourceProperties"] + print("create new resource with props %s" % props) + response = create_or_update_index(event) + print(response) + return { + "Data": { + "response": response + } + } + +def on_delete(event): + """ + This function is called when a resource is being deleted. + It returns the physical resource ID of the resource being deleted. + """ + physical_id = event["PhysicalResourceId"] + return {'PhysicalResourceId': physical_id} + +def is_complete(event, context): + """ + This function checks if the resource is in a stable state based on the request type. + It returns a dictionary indicating whether the resource is complete or not. + """ + physical_id = event["PhysicalResourceId"] + request_type = event["RequestType"] + + # check if resource is stable based on request_type + # is_ready = ... + + return {'IsComplete': True} + +def removeHttpsPrefix(endpoint): + """ + This function removes the "https://" prefix from a given endpoint string, + if present, and returns the modified string. + """ + if endpoint.startswith("https://"): + return endpoint[8:] + return endpoint + +def get_aoss_host(resource_properties): + """ + This function retrieves the Amazon OpenSearch Service (AOSS) host from the + resource properties. It raises an exception if the AOSSHost property is not provided. + """ + if "AOSSHost" not in resource_properties: + raise Exception("AOSSHost not provided from resource properties") + return removeHttpsPrefix(resource_properties["AOSSHost"]) + +def get_aoss_client(host): + """ + This function creates and returns an Amazon OpenSearch Service (AOSS) client + using the provided host. It authenticates the client using AWS credentials + and the AWS Signature Version 4 signer. + """ + auth = AWSV4SignerAuth( + boto3.Session().get_credentials(), + boto3.session.Session().region_name, + "aoss" + ) + # create an opensearch client and use the request-signer + return OpenSearch( + hosts=[{'host': host, 'port': 443}], + http_auth=auth, + use_ssl=True, + verify_certs=True, + connection_class=RequestsHttpConnection + ) + +def get_aoss_index_name(resource_properties): + """ + This function retrieves the Amazon OpenSearch Service (AOSS) index name from the + resource properties. It raises an exception if the AOSSIndexName property is not provided. + """ + if "AOSSIndexName" not in resource_properties: + raise Exception("AOSSIndexName not provided from resource properties") + return resource_properties["AOSSIndexName"] + +def create_aoss_index(index_name, aos_client): + """ + This function creates an index in the Amazon OpenSearch Service (AOSS) using the + provided index name and client. It configures the index settings and mappings + for vector search and returns the response from the index creation operation. + """ + index_body = { + "settings": { + "index.knn": True + }, + "mappings": { + "properties": { + "vector": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "faiss", + "parameters": { + "ef_construction": 512, + "m": 16 + } + } + }, + "text": { + "type": "text" + }, + "id": { + "type": "text" + }, + "text-metadata": { + "type": "text" + }, + "x-amz-bedrock-kb-source-uri": { + "type": "text" + } + } + } + } + response = aos_client.indices.create(index=index_name, body=index_body) + print(f"Created index {index_name}") + return response + +def create_or_update_index(event): + """ + This function creates or updates an index in the Amazon OpenSearch Service (AOSS). + It retrieves the AOSS host and index name from the resource properties, creates + an AOSS client, and checks if the index exists. If the index doesn't exist, + it creates a new index using the create_aoss_index function. It returns the + response from the index creation or update operation. + """ + resource_properties = event['ResourceProperties'] + aoss_host = get_aoss_host(resource_properties) + aos_client = get_aoss_client(aoss_host) + index_name = get_aoss_index_name(resource_properties) + response = None + if not aos_client.indices.exists(index=index_name): + response = create_aoss_index(index_name=index_name, aos_client=aos_client) + return response diff --git a/eventbridge-bedrock-s3-aoss/functions/sync_bedrock_kb.py b/eventbridge-bedrock-s3-aoss/functions/sync_bedrock_kb.py new file mode 100644 index 000000000..432dbb93c --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/functions/sync_bedrock_kb.py @@ -0,0 +1,26 @@ +import json +import os +import boto3 + +def lambda_handler(event, context): + + bedrock_agent_client = boto3.client('bedrock-agent') + + # Specify the Knowledge Base ID + knowledge_base_id = os.environ['KNOWLEDGE_BASE_ID'] + data_source_id = os.environ['DATA_SOURCE_ID'] + + response = bedrock_agent_client.start_ingestion_job( + dataSourceId=data_source_id, + description=f"Scheduled Knowledge Base Sync - {event['time']}", + knowledgeBaseId=knowledge_base_id + ) + message = f"Ingestion job with ID: {response['ingestionJob']['ingestionJobId']} started at {response['ingestionJob']['startedAt'] } with current status:{response['ingestionJob']['status']}" + # Print the sync job ID + print(message) + return { + 'statusCode': 200, + 'body': json.dumps({ + 'result': message + }) + } \ No newline at end of file diff --git a/eventbridge-bedrock-s3-aoss/images/bedrock-menu-expand.png b/eventbridge-bedrock-s3-aoss/images/bedrock-menu-expand.png new file mode 100644 index 000000000..6727346e2 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/bedrock-menu-expand.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/bedrock-search.png b/eventbridge-bedrock-s3-aoss/images/bedrock-search.png new file mode 100644 index 000000000..f5c290319 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/bedrock-search.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/cloudtrail-eventhistory.png b/eventbridge-bedrock-s3-aoss/images/cloudtrail-eventhistory.png new file mode 100644 index 000000000..2e3845e80 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/cloudtrail-eventhistory.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/cloudwatch-log.png b/eventbridge-bedrock-s3-aoss/images/cloudwatch-log.png new file mode 100644 index 000000000..b8f5c8c7f Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/cloudwatch-log.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/edit-model-access.png b/eventbridge-bedrock-s3-aoss/images/edit-model-access.png new file mode 100644 index 000000000..81340cdb3 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/edit-model-access.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/list-ingestion-jobs-output.png b/eventbridge-bedrock-s3-aoss/images/list-ingestion-jobs-output.png new file mode 100644 index 000000000..d57dc1afe Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/list-ingestion-jobs-output.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/model-access-link.png b/eventbridge-bedrock-s3-aoss/images/model-access-link.png new file mode 100644 index 000000000..a476fe9ad Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/model-access-link.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/model-access-view-2.png b/eventbridge-bedrock-s3-aoss/images/model-access-view-2.png new file mode 100644 index 000000000..92a5c1bf3 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/model-access-view-2.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/model-access-view.png b/eventbridge-bedrock-s3-aoss/images/model-access-view.png new file mode 100644 index 000000000..5363495b2 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/model-access-view.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/region-selection.png b/eventbridge-bedrock-s3-aoss/images/region-selection.png new file mode 100644 index 000000000..2c542baad Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/region-selection.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/startingestionjob-event.png b/eventbridge-bedrock-s3-aoss/images/startingestionjob-event.png new file mode 100644 index 000000000..10a9dcf12 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/startingestionjob-event.png differ diff --git a/eventbridge-bedrock-s3-aoss/images/submit.png b/eventbridge-bedrock-s3-aoss/images/submit.png new file mode 100644 index 000000000..fa44e49f9 Binary files /dev/null and b/eventbridge-bedrock-s3-aoss/images/submit.png differ diff --git a/eventbridge-bedrock-s3-aoss/layers/requirements.txt b/eventbridge-bedrock-s3-aoss/layers/requirements.txt new file mode 100644 index 000000000..63c4ec34e --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/layers/requirements.txt @@ -0,0 +1 @@ +opensearch-py diff --git a/eventbridge-bedrock-s3-aoss/requirements.txt b/eventbridge-bedrock-s3-aoss/requirements.txt new file mode 100644 index 000000000..504b5d2dc --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/requirements.txt @@ -0,0 +1,5 @@ +aws-cdk-lib==2.148.0 +constructs>=10.0.0,<11.0.0 +opensearch-py +crhelper +boto3 diff --git a/eventbridge-bedrock-s3-aoss/source.bat b/eventbridge-bedrock-s3-aoss/source.bat new file mode 100644 index 000000000..9e1a83442 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/eventbridge-bedrock-s3-aoss/stacks/__init__.py b/eventbridge-bedrock-s3-aoss/stacks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/eventbridge-bedrock-s3-aoss/stacks/bedrock_knowledgebase_stack.py b/eventbridge-bedrock-s3-aoss/stacks/bedrock_knowledgebase_stack.py new file mode 100644 index 000000000..dceb225a5 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/stacks/bedrock_knowledgebase_stack.py @@ -0,0 +1,135 @@ +import json +from aws_cdk import ( + CfnOutput, + Stack, + aws_bedrock as bedrock, + aws_s3 as s3, + aws_opensearchserverless as aoss, + aws_iam as iam, + aws_logs as logs, +) +from constructs import Construct + +class BedrockKnowledgebaseStack(Stack): + def __init__(self, scope: Construct, construct_id: str, + cfn_aoss_collection_arn, + index_name, + bedrock_kb_service_role_arn, + **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + #setup variables for use in constructs + BEDROCK_KNOWLEDGEBASE_PARAMS = self.node.try_get_context('bedrock_knowledgebase_params') + embedding_model_id = BEDROCK_KNOWLEDGEBASE_PARAMS['embedding_model_id'] + embedding_model_arn = f"arn:aws:bedrock:{self.region}::foundation-model/{embedding_model_id}" + metadata_field = BEDROCK_KNOWLEDGEBASE_PARAMS['vector_index_metadata_field'] + text_field = BEDROCK_KNOWLEDGEBASE_PARAMS['vector_index_text_field'] + vector_field = BEDROCK_KNOWLEDGEBASE_PARAMS['vector_index_vector_field'] + knowledge_base_name = BEDROCK_KNOWLEDGEBASE_PARAMS['knowledge_base_name'] + kb_s3_datasource_name = BEDROCK_KNOWLEDGEBASE_PARAMS['kb_s3_datasource_name'] + kb_cw_log_group_name_prefix = BEDROCK_KNOWLEDGEBASE_PARAMS['kb_cw_log_group_name_prefix'] + bedrock_kb_log_delivery_source = BEDROCK_KNOWLEDGEBASE_PARAMS['bedrock_kb_log_delivery_source'] + + #Create an S3 bucket to store the data files needed for RAG Knowledge Base + knowledgebase_datasource_bucket = s3.Bucket( + self, + "KBDataSourceS3Bucket", + public_read_access=False + ) + + #Create the Bedrock Knowledge Base with the s3 bucket as knowledge base + bedrock_knowledgebase = bedrock.CfnKnowledgeBase(self, "BedrockKB", + name=knowledge_base_name, + knowledge_base_configuration=bedrock.CfnKnowledgeBase.KnowledgeBaseConfigurationProperty( + type="VECTOR", + vector_knowledge_base_configuration=bedrock.CfnKnowledgeBase.VectorKnowledgeBaseConfigurationProperty( + embedding_model_arn=f"{embedding_model_arn}" + ) + ), + role_arn=bedrock_kb_service_role_arn, + storage_configuration=bedrock.CfnKnowledgeBase.StorageConfigurationProperty( + type="OPENSEARCH_SERVERLESS", + # the properties below are optional + opensearch_serverless_configuration=bedrock.CfnKnowledgeBase.OpenSearchServerlessConfigurationProperty( + collection_arn=cfn_aoss_collection_arn, + vector_index_name=index_name, + field_mapping=bedrock.CfnKnowledgeBase.OpenSearchServerlessFieldMappingProperty( + metadata_field=metadata_field, + text_field=text_field, + vector_field=vector_field + ) + ) + ), + description="RAG Knowledge Base for Amazon Bedrock" + ) + + + knowledge_base_id = bedrock_knowledgebase.attr_knowledge_base_id + #Add a KB datasource with S3 datasource configuration + knowledgebase_datasource = bedrock.CfnDataSource(self, "BedrockKBDataSource", + name=kb_s3_datasource_name, + description="Bedrock Knowledgebase DataSource Configuration", + data_source_configuration=bedrock.CfnDataSource.DataSourceConfigurationProperty( + s3_configuration=bedrock.CfnDataSource.S3DataSourceConfigurationProperty( + bucket_arn=knowledgebase_datasource_bucket.bucket_arn, + ), + type="S3" + ), + vector_ingestion_configuration=bedrock.CfnDataSource.VectorIngestionConfigurationProperty ( + chunking_configuration=bedrock.CfnDataSource.ChunkingConfigurationProperty( + chunking_strategy="FIXED_SIZE", + fixed_size_chunking_configuration=bedrock.CfnDataSource.FixedSizeChunkingConfigurationProperty( + max_tokens=1024, + overlap_percentage=30 + ) + ) + ), + knowledge_base_id=knowledge_base_id + ) + + log_group = logs.LogGroup(self, + "BedrockKBLogGroup", + log_group_name=f"{kb_cw_log_group_name_prefix}-{knowledge_base_id}", + retention=logs.RetentionDays.TWO_WEEKS + ) + log_group.add_to_resource_policy( + iam.PolicyStatement( + sid="AWSLogDeliveryWriteBedrockKB20240719", + effect=iam.Effect.ALLOW, + principals=[iam.ServicePrincipal('delivery.logs.amazonaws.com')], + actions=["logs:CreateLogStream", "logs:PutLogEvents"], + resources=[log_group.log_group_arn], + conditions={ + "StringEquals": { + "aws:SourceAccount": self.account + }, + "ArnLike": { + "aws:SourceArn": f"arn:aws:logs:{self.region}:{self.account}:delivery-source:{bedrock_kb_log_delivery_source}" + } + } + ) + ) + cfn_delivery_destination = logs.CfnDeliveryDestination(self, "BedrockKBDeliveryDestination", + name="BedrockKBDeliveryDestination", + destination_resource_arn=log_group.log_group_arn + ) + cfn_delivery_source = logs.CfnDeliverySource(self, "BedrockKBDeliverySource", + name=bedrock_kb_log_delivery_source, + log_type="APPLICATION_LOGS", + resource_arn=f"arn:aws:bedrock:{self.region}:{self.account}:knowledge-base/{knowledge_base_id}" + ) + + cfn_delivery = logs.CfnDelivery(self, "BedrockKBDelivery", + delivery_destination_arn=cfn_delivery_destination.attr_arn, + delivery_source_name=cfn_delivery_source.name, + ) + cfn_delivery.node.add_dependency(cfn_delivery_destination) + cfn_delivery.node.add_dependency(cfn_delivery_source) + + self.knowledge_base_id = bedrock_knowledgebase.attr_knowledge_base_id + self.knowledgebase_datasource_id = knowledgebase_datasource.attr_data_source_id + self.bucket_name = knowledgebase_datasource_bucket.bucket_name + CfnOutput( + self, "knowledge_base_id", value=self.knowledge_base_id,export_name="knowledgeBaseId") + CfnOutput(self, "data_source_id", value=self.knowledgebase_datasource_id, export_name="DataSourceId") + CfnOutput(self, "bucket_name", value=self.bucket_name, export_name="DataSourceBucketName") \ No newline at end of file diff --git a/eventbridge-bedrock-s3-aoss/stacks/bedrock_service_role_stack.py b/eventbridge-bedrock-s3-aoss/stacks/bedrock_service_role_stack.py new file mode 100644 index 000000000..ffa6592db --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/stacks/bedrock_service_role_stack.py @@ -0,0 +1,58 @@ +import json +from aws_cdk import ( + Stack, + aws_opensearchserverless as aoss, + aws_iam as iam, +) +from constructs import Construct + +class BedrockServiceRoleStack(Stack): + def __init__(self, scope: Construct, construct_id: str, + **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + #Create an IAM Service Role for Bedrock Knowledge Base + bedrock_kb_service_role = iam.Role(self, "BedrockKBServiceRole", + role_name="BedrockKBServiceRole", + managed_policies=[ + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3ReadOnlyAccess"), + ], + inline_policies={ + "StartIngestionJob": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["bedrock:StartIngestionJob"], + resources=["*"] + ) + ] + ), + "EmbeddingModelAccess": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["bedrock:InvokeModel"], + resources=[f"arn:aws:bedrock:{self.region}::foundation-model/*"] + ) + ] + ), + "OpenSearchServerlessAccess": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["aoss:APIAccessAll"], + resources=[f"arn:aws:aoss:{self.region}:{self.account}:collection/*"] + ) + ] + ) + }, + assumed_by=iam.ServicePrincipal("bedrock.amazonaws.com", + conditions={ + "StringEquals": { + "aws:SourceAccount": self.account + }, + "ArnLike": { + "aws:SourceArn": f"arn:aws:bedrock:{self.region}:{self.account}:knowledge-base/*" + } + } + ) + ) + self.bedrock_kb_service_role_arn = bedrock_kb_service_role.role_arn + diff --git a/eventbridge-bedrock-s3-aoss/stacks/ingestion_job_resources_stack.py b/eventbridge-bedrock-s3-aoss/stacks/ingestion_job_resources_stack.py new file mode 100644 index 000000000..e229513c5 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/stacks/ingestion_job_resources_stack.py @@ -0,0 +1,59 @@ +from aws_cdk import ( + CfnOutput, + Duration, + Stack, + aws_iam as iam, + aws_lambda as _lambda, + aws_scheduler as scheduler, + aws_sqs as sqs, + aws_logs as logs, +) +from constructs import Construct + +class IngestionJobResourcesStack(Stack): + + def __init__(self, scope: Construct, construct_id: str, + knowledge_base_id, + data_source_id, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + #Create an IAM Service Role for Bedrock Knowledge Base + eventbridge_scheduler_role = iam.Role(self, "EventBridgeSchedulerRole", + role_name="EventBridgeSchedulerRole", + inline_policies={ + "BedrockKBSyncPolicy": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["bedrock:StartIngestionJob"], + resources=["*"] + ) + ] + ) + }, + assumed_by=iam.ServicePrincipal("scheduler.amazonaws.com", + conditions={ + "StringEquals": { + "aws:SourceAccount": self.account + } + } + ) + ) + + cfn_schedule_group = scheduler.CfnScheduleGroup(self, + "BedrockKBSyncScheduleGroup", + name="BedrockKBSyncScheduleGroup") + cfn_schedule = scheduler.CfnSchedule(self, "BedrockKBDataSourceSyncSchedule", + name="BedrockKBDataSourceSyncSchedule", + description="Schedule to Sync Bedrock Knowledge Base Data Source Periodically", + group_name=cfn_schedule_group.name, + flexible_time_window=scheduler.CfnSchedule.FlexibleTimeWindowProperty( + mode="OFF" + ), + schedule_expression="rate(5 minutes)", + schedule_expression_timezone="UTC+01:00", + target=scheduler.CfnSchedule.TargetProperty( + arn="arn:aws:scheduler:::aws-sdk:bedrockagent:startIngestionJob", + role_arn=eventbridge_scheduler_role.role_arn, + input="{\"KnowledgeBaseId\":\""+knowledge_base_id+"\",\"DataSourceId\":\""+data_source_id+"\"}" + ) + ) diff --git a/eventbridge-bedrock-s3-aoss/stacks/opensearch_serverless_stack.py b/eventbridge-bedrock-s3-aoss/stacks/opensearch_serverless_stack.py new file mode 100644 index 000000000..d81e6bf52 --- /dev/null +++ b/eventbridge-bedrock-s3-aoss/stacks/opensearch_serverless_stack.py @@ -0,0 +1,183 @@ +import json +from aws_cdk import ( + Duration, + Names, + RemovalPolicy, + Stack, + aws_lambda as _lambda, + aws_iam as iam, + aws_opensearchserverless as aoss, + custom_resources, + CustomResource +) +from constructs import Construct + +class OpenSearchServerlessStack(Stack): + def __init__(self, scope: Construct, construct_id: str, + bedrock_kb_service_role_arn, + **kwargs) -> None: + + super().__init__(scope, construct_id, **kwargs) + + #retrieve the context parameters that would be used in creating the resources + OPENSEARCH_SERVERLESS_PARAMS = self.node.try_get_context('opensearch_serverless_params') + collection_name = OPENSEARCH_SERVERLESS_PARAMS['collection_name'] + index_name = f"{OPENSEARCH_SERVERLESS_PARAMS['index_name']}" + + # Index Creation Lambda Role + create_aoss_index_lambda_role = iam.Role(self, "create-index-lambda-role", + assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), + description="Role for the CreateAOSSIndex Lambda", + managed_policies=[ + iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole") + ], + inline_policies={ + "aoss-policy": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["aoss:*"], + resources=["*"] + ) + ] + ) + } + ) + + #Create the neccessary network policy, encryption policy and data access policy for the OpenSearch serverless collection + network_policy = json.dumps([{ + "Description":f"Public access for {collection_name} collection", + "Rules":[{ + "ResourceType":"dashboard", + "Resource":[ + f"collection/{collection_name}" + ]}, + { + "ResourceType":"collection", + "Resource":[ + f"collection/{collection_name}" + ] + }], + "AllowFromPublic":True + }], indent=2) + encryption_policy = json.dumps({ + "Rules":[ + { + "ResourceType":"collection", + "Resource":[ + f"collection/{collection_name}" + ] + } + ], + "AWSOwnedKey":True + }, indent=2) + data_access_policy = json.dumps([{ + "Rules": [{ + "Resource": [ + f"collection/{collection_name}" + ], + "Permission": [ + "aoss:CreateCollectionItems", + "aoss:DeleteCollectionItems", + "aoss:UpdateCollectionItems", + "aoss:DescribeCollectionItems" + ], + "ResourceType": "collection" + }, + { + "Resource": [ + f"index/{collection_name}/*" + ], + "Permission": [ + "aoss:CreateIndex", + "aoss:DeleteIndex", + "aoss:UpdateIndex", + "aoss:DescribeIndex", + "aoss:ReadDocument", + "aoss:WriteDocument" + ], + "ResourceType": "index" + }], + "Principal": [ + create_aoss_index_lambda_role.role_arn, + bedrock_kb_service_role_arn + ] + }], indent=2) + + #Create the CFN resources for the security and access policies + cfn_data_access_policy = aoss.CfnAccessPolicy(self, + "BedrockKBDataAccessPolicy", + name=f"{collection_name}-ap", + description="Access policy for Admin and Create Index Function", + policy= data_access_policy, + type="data" + ) + cfn_network_access_policy = aoss.CfnSecurityPolicy(self, + "BedrockKBDataSecurityPolicy", + name=f"{collection_name}-np", + description="Security policy for RAG Knowledge Base", + policy= network_policy, + type="network" + ) + cfn_encryption_policy = aoss.CfnSecurityPolicy (self, + "BedrockKBDataEncryptionPolicy", + name=f"{collection_name}-ep", + description="Encryption policy for RAG Knowledge Base", + policy= encryption_policy, + type="encryption" + ) + + #Create the AOSS Collection + cfn_aoss_collection = aoss.CfnCollection(self, + "BedrockKBDataCollection", + name=collection_name, + type="VECTORSEARCH", + description="Collection for Bedrock Knowledge Base" + ) + + # Create layer + layer = _lambda.LayerVersion(self, 'lambda_layer', + description='Dependencies for the lambda functions', + code= _lambda.Code.from_asset( 'layers/'), # required + compatible_runtimes=[ + _lambda.Runtime.PYTHON_3_11 + ], + ) + + # Define the Lambda function resource and give the associated Execution role the permission to call the relevant Bedrock service api to start ingestion job + create_aoss_index_function = _lambda.Function( + self, 'CreateAOSSIndex', + runtime=_lambda.Runtime.PYTHON_3_11, + code=_lambda.Code.from_asset('functions'), + handler='index_creation.on_event', + timeout=Duration.seconds(30), + role=iam.Role.from_role_arn(self, "LambdaRole", create_aoss_index_lambda_role.role_arn), + layers=[layer] + ) + create_aoss_index_function.node.add_dependency(layer) + + #Create the Custom Resource Provider backed by Lambda Function + crProvider = custom_resources.Provider( + self, 'CreateAOSSIndexCustomResourceProvider', + on_event_handler=create_aoss_index_function, + provider_function_name="create-aoss-index-provider" + ) + + #Creates the custom Resource + create_aoss_index_custom_resource = CustomResource( + self, 'CreateAOSSIndexCustomResource', + service_token=crProvider.service_token, + properties={ + "AOSSIndexName": index_name, + "AOSSHost": cfn_aoss_collection.attr_collection_endpoint + } + ) + + #Add resource dependencies + cfn_aoss_collection.add_dependency (cfn_data_access_policy) + cfn_aoss_collection.add_dependency (cfn_network_access_policy) + cfn_aoss_collection.add_dependency (cfn_encryption_policy) + create_aoss_index_custom_resource.node.add_dependency(cfn_aoss_collection) + + self.index_name = index_name + self.collection_name = collection_name + self.cfn_aoss_collection_arn = cfn_aoss_collection.attr_arn