Skip to content

Commit

Permalink
Merge pull request #14 from OpenSesame/feature/api_delete
Browse files Browse the repository at this point in the history
Feature/api delete
  • Loading branch information
os-rss authored Aug 14, 2023
2 parents 86b9ff4 + 058babe commit 99d5fcf
Show file tree
Hide file tree
Showing 23 changed files with 442 additions and 69 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Manifest.toml
/docs/Manifest.toml
debug_out
envs*
sysimage/

# VS Code
.vscode/*
Expand Down
1 change: 1 addition & 0 deletions ext/OpenSearchExt/OpenSearchExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ function single_query(
)

full_query = Dict(
:size => query.top_k,
:query => Dict(
:bool => Dict(
:must => [knn_query]
Expand Down
47 changes: 45 additions & 2 deletions resources/dev-openai/.well-known/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ paths:
post:
summary: Query
description: Accepts search query objects array each with query and optional filter. Break down complex questions into sub-questions. Refine results by criteria, e.g. time / source, don't do this often. Split queries if ResponseTooLargeError occurs.
operationId: query_query_post
operationId: query_post
requestBody:
content:
application/json:
Expand Down Expand Up @@ -41,6 +41,7 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/UpsertRequest"
required: true
responses:
"200":
description: Successful Response
Expand All @@ -57,7 +58,26 @@ paths:
security:
- HTTPBearer: []
# /upsert-file:
# /delete:
/delete:
delete:
summary: Delete
description: Delete one or more documents
operationId: delete_docs
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/DeleteRequest"
required: true
responses:
"200":
description: Successful Response
content:
application/json:
schema:
$ref: "#/components/schemas/DeleteResponse"
security:
- HTTPBearer: []
components:
schemas:
DocumentChunkMetadata:
Expand Down Expand Up @@ -299,6 +319,29 @@ components:
type: array
items:
type: string

DeleteRequest:
type: object
properties:
ids:
type: array
items:
type: string
filter:
type: array
items:
$ref: "#/components/schemas/DocumentMetadataFilter"
delete_all:
type: boolean
default: false
DeleteResponse:
type: object
required:
- success
properties:
success:
type: boolean

securitySchemes:
HTTPBearer:
type: http
Expand Down
36 changes: 28 additions & 8 deletions src/datastore/datastore.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ using ..AppServer
using Mocking
using DebugDataWriter

include("datastor_common.jl")
include("datastore_common.jl")
include("factory.jl")

STORAGE = nothing
const STORAGE = Ref{AbstractStorage}()

"""
Takes in a list of documents and inserts them into the database.
Expand All @@ -17,10 +17,10 @@ then inserts the new ones.
Return a list of document ids.
"""
function upsert(documents::AbstractVector{Document}; chunk_token_size=0)::UpsertResponse
@assert !isnothing(STORAGE) "Storage is not initialized"
@assert isassigned(STORAGE) "Storage is not initialized"

delete(
STORAGE,
STORAGE[],
filter=map(
document -> DocumentMetadataFilter(document_id=document.id),
documents
Expand All @@ -29,14 +29,14 @@ function upsert(documents::AbstractVector{Document}; chunk_token_size=0)::Upsert

chunks = AppServer.get_document_chunks(documents, chunk_token_size)

return upsert(STORAGE, chunks)
return upsert(STORAGE[], chunks)
end

"""
Takes in a list of queries and filters and returns a list of query results with matching document chunks and scores.
"""
function query(queries::AbstractVector{Query})::Vector{QueryResult}
@assert !isnothing(STORAGE) "Storage is not initialized"
@assert isassigned(STORAGE) "Storage is not initialized"

# get a list of of just the queries from the Query list
query_texts = [query.query for query in queries]
Expand All @@ -55,10 +55,30 @@ function query(queries::AbstractVector{Query})::Vector{QueryResult}

@debug_output get_debug_id("datastore") "storage query" queries_with_embeddings

return query(STORAGE, queries_with_embeddings)
return query(STORAGE[], queries_with_embeddings)
end

"""
Removes documents by ids, filter or all together
Multiple parameters can be used at once.
Returns whether the operation was successful.
"""
function delete(;
ids::Union{Vector{<:AbstractString}, Nothing},
filter::Union{Vector{DocumentMetadataFilter}, Nothing},
delete_all::Bool
)::Bool
@assert isassigned(STORAGE) "Storage is not initialized"

delete_all && return DataStore.delete_all(STORAGE[])

doc_filter = isnothing(filter) ? map(id -> DocumentMetadataFilter(document_id=id), ids) : filter

return delete(STORAGE[]; filter=doc_filter)
end

function __init__()
global STORAGE = get_datastore()
global STORAGE[] = get_datastore()
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ using ..GptPluginServer

abstract type AbstractStorage end

include("teststorage.jl")

"""
Takes in a list of list of document chunks and inserts them into the database.
Expand Down Expand Up @@ -45,6 +43,6 @@ Removes everything in the datastore
Returns whether the operation was successful.
"""
function delete_all(storage::AbstractStorage)
function delete_all(storage::AbstractStorage)::Bool
error("The method 'delete_all' is not implemeted for $(typeof(storage))")
end
21 changes: 14 additions & 7 deletions src/datastore/factory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
using Pkg
datastore_env() = get(ENV, "DATASTORE", "")

global DATASTORE_MODULE = let
const DATASTORE_MODULE = Ref{Module}()

begin
datastore = datastore_env() |> uppercase

@info string("Requested datastore: ", datastore_env())

# workaround to detect running in test mode
if isnothing(Pkg.project().name)
datastore = "TEST"
end

if isequal(datastore, "ELASTICSEARCH")
global DATASTORE_MODULE[] = if isequal(datastore, "ELASTICSEARCH")
@info "Pluging ElasticsearchClientExt"
include("../../ext/ElasticsearchClientExt/ElasticsearchClientExt.jl")

Expand All @@ -21,15 +25,18 @@ global DATASTORE_MODULE = let

OpenSearchExt
elseif isequal(datastore, "TEST")
@info "Dummy storage for the logic check only"
include("teststorage.jl")

TestStorageExt
else
nothing
error("DATASTORE environment variable must be non empty and valid")
end
end
using Pkg

function get_datastore()::Union{AbstractStorage,Nothing}
@info string("Requested datastore: ", datastore_env())
isnothing(DATASTORE_MODULE) && error("DATASTORE environment variable must be non empty and valid")
global DATASTORE_MODULE
isassigned(DATASTORE_MODULE) || error("DATASTORE environment variable must be non empty and valid")

DATASTORE_MODULE.create_storage()
DATASTORE_MODULE[].create_storage()
end
60 changes: 58 additions & 2 deletions src/datastore/teststorage.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,63 @@
# Test storage module for logic check only
module TestStorageExt
using ..DataStore
using ..DataStore: AbstractStorage

using ...AppServer:
DocumentChunk,
QueryWithEmbedding, QueryResult,
DocumentMetadataFilter, DocumentChunkWithScore,
UpsertResponse

mutable struct TestStorage <: AbstractStorage
data::Any
data::Vector{String}

TestStorage() = new([])
end
create_storage() = TestStorage(nothing)
create_storage() = TestStorage()

function DataStore.upsert(
storage::TestStorage,
chunks::Dict{String, <:AbstractVector{DocumentChunk}},
)::UpsertResponse
# add documents ids only into the storage-array
ids = keys(chunks) |> collect
append!(storage.data, ids)
return UpsertResponse(ids = ids)
end

function DataStore.query(
storage::TestStorage,
queries::AbstractVector{QueryWithEmbedding},
)::Vector{QueryResult}
isempty(storage.data) && return []

# assume the query contains exact document id as we are storing
# Otherwise, we need to implement a real mechanism for calculating embeddings.
return map(queries) do query_with_embedding
text_query = query_with_embedding.query
res = findfirst(s -> isequal(s, text_query), storage.data)

QueryResult(
query = text_query,
results = isnothing(res) ? [] : [DocumentChunkWithScore(id = text_query, score = 1)],
)
end
end

function DataStore.delete(
storage::TestStorage;
filter::Vector{DocumentMetadataFilter},
)::Bool
ids = getfield.(filter, :document_id)
initial_length = length(storage.data)
filter!(i -> i ids, storage.data)
return initial_length != length(storage.data)
end

function DataStore.delete_all(storage::TestStorage)::Bool
empty!(storage.data)
return true
end

end
4 changes: 4 additions & 0 deletions src/generated/.openapi-generator/FILES
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
README.md
docs/DefaultApi.md
docs/DeleteRequest.md
docs/DeleteResponse.md
docs/Document.md
docs/DocumentChunk.md
docs/DocumentChunkMetadata.md
Expand All @@ -20,6 +22,8 @@ docs/ValidationError.md
src/GptPluginServer.jl
src/apis/api_DefaultApi.jl
src/modelincludes.jl
src/models/model_DeleteRequest.jl
src/models/model_DeleteResponse.jl
src/models/model_Document.jl
src/models/model_DocumentChunk.jl
src/models/model_DocumentChunkMetadata.jl
Expand Down
5 changes: 4 additions & 1 deletion src/generated/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ The following server methods must be implemented:

Class | Method | HTTP request | Description
------------ | ------------- | ------------- | -------------
*DefaultApi* | [**query_query_post**](docs/DefaultApi.md#query_query_post) | **POST** /query | Query
*DefaultApi* | [**delete_docs**](docs/DefaultApi.md#delete_docs) | **DELETE** /delete | Delete
*DefaultApi* | [**query_post**](docs/DefaultApi.md#query_post) | **POST** /query | Query
*DefaultApi* | [**upsert_post**](docs/DefaultApi.md#upsert_post) | **POST** /upsert | Data upload



## Models

- [DeleteRequest](docs/DeleteRequest.md)
- [DeleteResponse](docs/DeleteResponse.md)
- [Document](docs/Document.md)
- [DocumentChunk](docs/DocumentChunk.md)
- [DocumentChunkMetadata](docs/DocumentChunkMetadata.md)
Expand Down
45 changes: 35 additions & 10 deletions src/generated/docs/DefaultApi.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,42 @@ All URIs are relative to *http://localhost*

Method | HTTP request | Description
------------- | ------------- | -------------
[**query_query_post**](DefaultApi.md#query_query_post) | **POST** /query | Query
[**delete_docs**](DefaultApi.md#delete_docs) | **DELETE** /delete | Delete
[**query_post**](DefaultApi.md#query_post) | **POST** /query | Query
[**upsert_post**](DefaultApi.md#upsert_post) | **POST** /upsert | Data upload


# **query_query_post**
> query_query_post(req::HTTP.Request, query_request::QueryRequest;) -> QueryResponse
# **delete_docs**
> delete_docs(req::HTTP.Request, delete_request::DeleteRequest;) -> DeleteResponse
Delete

Delete one or more documents

### Required Parameters

Name | Type | Description | Notes
------------- | ------------- | ------------- | -------------
**req** | **HTTP.Request** | The HTTP Request object |
**delete_request** | [**DeleteRequest**](DeleteRequest.md)| |

### Return type

[**DeleteResponse**](DeleteResponse.md)

### Authorization

[HTTPBearer](../README.md#HTTPBearer)

### HTTP request headers

- **Content-Type**: application/json
- **Accept**: application/json

[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)

# **query_post**
> query_post(req::HTTP.Request, query_request::QueryRequest;) -> QueryResponse
Query

Expand Down Expand Up @@ -38,7 +68,7 @@ Name | Type | Description | Notes
[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)

# **upsert_post**
> upsert_post(req::HTTP.Request; upsert_request=nothing,) -> UpsertResponse
> upsert_post(req::HTTP.Request, upsert_request::UpsertRequest;) -> UpsertResponse
Data upload

Expand All @@ -49,12 +79,7 @@ Upload JSON document description
Name | Type | Description | Notes
------------- | ------------- | ------------- | -------------
**req** | **HTTP.Request** | The HTTP Request object |

### Optional Parameters

Name | Type | Description | Notes
------------- | ------------- | ------------- | -------------
**upsert_request** | [**UpsertRequest**](UpsertRequest.md)| |
**upsert_request** | [**UpsertRequest**](UpsertRequest.md)| |

### Return type

Expand Down
Loading

0 comments on commit 99d5fcf

Please sign in to comment.