Skip to content

Commit

Permalink
Add azure blob storage support (#2919)
Browse files Browse the repository at this point in the history
Co-authored-by: jyong <jyong@dify.ai>
  • Loading branch information
JohnJyong and JohnJyong authored Mar 20, 2024
1 parent 9a3d572 commit 86e474f
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 3 deletions.
7 changes: 6 additions & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,19 @@ DB_DATABASE=dify

# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3
# storage type: local, s3, azure-blob
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
S3_BUCKET_NAME=your-bucket-name
S3_ACCESS_KEY=your-access-key
S3_SECRET_KEY=your-secret-key
S3_REGION=your-region
# Azure Blob Storage configuration
AZURE_BLOB_ACCOUNT_NAME=your-account-name
AZURE_BLOB_ACCOUNT_KEY=your-account-key
AZURE_BLOB_CONTAINER_NAME=yout-container-name
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net

# CORS configuration
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
Expand Down
4 changes: 4 additions & 0 deletions api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ def __init__(self):
self.S3_ACCESS_KEY = get_env('S3_ACCESS_KEY')
self.S3_SECRET_KEY = get_env('S3_SECRET_KEY')
self.S3_REGION = get_env('S3_REGION')
self.AZURE_BLOB_ACCOUNT_NAME = get_env('AZURE_BLOB_ACCOUNT_NAME')
self.AZURE_BLOB_ACCOUNT_KEY = get_env('AZURE_BLOB_ACCOUNT_KEY')
self.AZURE_BLOB_CONTAINER_NAME = get_env('AZURE_BLOB_CONTAINER_NAME')
self.AZURE_BLOB_ACCOUNT_URL = get_env('AZURE_BLOB_ACCOUNT_URL')

# ------------------------
# Vector Store Configurations.
Expand Down
34 changes: 34 additions & 0 deletions api/extensions/ext_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import shutil
from collections.abc import Generator
from contextlib import closing
from datetime import datetime, timedelta
from typing import Union

import boto3
from azure.storage.blob import AccountSasPermissions, BlobServiceClient, ResourceTypes, generate_account_sas
from botocore.exceptions import ClientError
from flask import Flask

Expand All @@ -27,6 +29,18 @@ def init_app(self, app: Flask):
endpoint_url=app.config.get('S3_ENDPOINT'),
region_name=app.config.get('S3_REGION')
)
elif self.storage_type == 'azure-blob':
self.bucket_name = app.config.get('AZURE_BLOB_CONTAINER_NAME')
sas_token = generate_account_sas(
account_name=app.config.get('AZURE_BLOB_ACCOUNT_NAME'),
account_key=app.config.get('AZURE_BLOB_ACCOUNT_KEY'),
resource_types=ResourceTypes(service=True, container=True, object=True),
permission=AccountSasPermissions(read=True, write=True, delete=True, list=True, add=True, create=True),
expiry=datetime.utcnow() + timedelta(hours=1)
)
self.client = BlobServiceClient(account_url=app.config.get('AZURE_BLOB_ACCOUNT_URL'),
credential=sas_token)

else:
self.folder = app.config.get('STORAGE_LOCAL_PATH')
if not os.path.isabs(self.folder):
Expand All @@ -35,6 +49,9 @@ def init_app(self, app: Flask):
def save(self, filename, data):
if self.storage_type == 's3':
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
elif self.storage_type == 'azure-blob':
blob_container = self.client.get_container_client(container=self.bucket_name)
blob_container.upload_blob(filename, data)
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
Expand Down Expand Up @@ -63,6 +80,10 @@ def load_once(self, filename: str) -> bytes:
raise FileNotFoundError("File not found")
else:
raise
elif self.storage_type == 'azure-blob':
blob = self.client.get_container_client(container=self.bucket_name)
blob = blob.get_blob_client(blob=filename)
data = blob.download_blob().readall()
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
Expand Down Expand Up @@ -90,6 +111,11 @@ def generate(filename: str = filename) -> Generator:
raise FileNotFoundError("File not found")
else:
raise
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
with closing(blob.download_blob()) as blob_stream:
while chunk := blob_stream.readall(4096):
yield chunk
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
Expand All @@ -109,6 +135,11 @@ def download(self, filename, target_filepath):
if self.storage_type == 's3':
with closing(self.client) as client:
client.download_file(self.bucket_name, filename, target_filepath)
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
with open(target_filepath, "wb") as my_blob:
blob_data = blob.download_blob()
blob_data.readinto(my_blob)
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
Expand All @@ -128,6 +159,9 @@ def exists(self, filename):
return True
except:
return False
elif self.storage_type == 'azure-blob':
blob = self.client.get_blob_client(container=self.bucket_name, blob=filename)
return blob.exists()
else:
if not self.folder or self.folder.endswith('/'):
filename = self.folder + filename
Expand Down
2 changes: 2 additions & 0 deletions api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ arxiv==2.1.0
yarl~=1.9.4
twilio==9.0.0
qrcode~=7.4.2
azure-storage-blob==12.9.0
azure-identity==1.15.0
20 changes: 18 additions & 2 deletions docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ services:
# If you want to enable cross-origin support,
# you must use the HTTPS protocol and set the configuration to `SameSite=None, Secure=true, HttpOnly=true`.
#
# The type of storage to use for storing user files. Supported values are `local` and `s3`, Default: `local`
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob`, Default: `local`
STORAGE_TYPE: local
# The path to the local storage directory, the directory relative the root path of API service codes or absolute path. Default: `storage` or `/home/john/storage`.
# only available when STORAGE_TYPE is `local`.
Expand All @@ -81,6 +81,11 @@ services:
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
Expand Down Expand Up @@ -164,9 +169,20 @@ services:
REDIS_USE_SSL: 'false'
# The configurations of celery broker.
CELERY_BROKER_URL: redis://:difyai123456@redis:6379/1
# The type of storage to use for storing user files. Supported values are `local` and `s3`, Default: `local`
# The type of storage to use for storing user files. Supported values are `local` and `s3` and `azure-blob`, Default: `local`
STORAGE_TYPE: local
STORAGE_LOCAL_PATH: storage
# The S3 storage configurations, only available when STORAGE_TYPE is `s3`.
S3_ENDPOINT: 'https://xxx.r2.cloudflarestorage.com'
S3_BUCKET_NAME: 'difyai'
S3_ACCESS_KEY: 'ak-difyai'
S3_SECRET_KEY: 'sk-difyai'
S3_REGION: 'us-east-1'
# The Azure Blob storage configurations, only available when STORAGE_TYPE is `azure-blob`.
AZURE_BLOB_ACCOUNT_NAME: 'difyai'
AZURE_BLOB_ACCOUNT_KEY: 'difyai'
AZURE_BLOB_CONTAINER_NAME: 'difyai-container'
AZURE_BLOB_ACCOUNT_URL: 'https://<your_account_name>.blob.core.windows.net'
# The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
VECTOR_STORE: weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
Expand Down

0 comments on commit 86e474f

Please sign in to comment.