Skip to content

Commit

Permalink
Fix vector index (TuGraph-family#671)
Browse files Browse the repository at this point in the history
* update

* update

* update

* fix cpplint

* fix lint

* add vector index doc

* fix test case error
  • Loading branch information
ljcui authored Sep 23, 2024
1 parent 1a665ef commit 0579709
Show file tree
Hide file tree
Showing 30 changed files with 576 additions and 584 deletions.
78 changes: 78 additions & 0 deletions docs/zh-CN/source/8.query/3.vector_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Vector index
## 创建向量索引
如下json定义了一个点类型,名字是`person`, 里面有个字段是`embedding`,类型是`FLOAT_VECTOR`,用来存储向量数据。
目前向量数据只能在点上创建。

```json
{
"label": "person",
"primary": "id",
"type": "VERTEX",
"properties": [{
"name": "id",
"type": "INT32",
"optional": false
}, {
"name": "age",
"type": "INT32",
"optional": false
}, {
"name": "embedding",
"type": "FLOAT_VECTOR",
"optional": false
}]
}

```
把上面这个json序列化成字符串,作为参数传入,建议使用驱动的参数化特性,避免自己拼接语句。
```
CALL db.createVertexLabelByJson($json_data)
```
`embedding`字段添加向量索引,第三个参数是个map,里面可以设置一些向量索引的配置参数,如下,`dimension`设置向量维度是4
```
CALL db.addVertexVectorIndex('person','embedding', {dimension: 4});
```

再定义一个边,用来测试,如下json定义了一个边类型,名字是`like`
```json
{
"label": "like",
"type": "EDGE",
"constraints": [
["person", "person"]
],
"properties": []
}
```
把上面这个json序列化成字符串,作为参数传入。
```
CALL db.createEdgeLabelByJson($json_data)
```

写入几条测试数据
```
CREATE (n1:person {id:1, age:10, embedding: [1.0,1.0,1.0,1.0]})
CREATE (n2:person {id:2, age:20, embedding: [2.0,2.0,2.0,2.0]})
CREATE (n3:person {id:3, age:30, embedding: [3.0,3.0,3.0,3.0]})
CREATE (n1)-[r:like]->(n2),
(n2)-[r:like]->(n3),
(n3)-[r:like]->(n1);
```
## 向量查询

根据向量搜索出点,第四个参数是个map,里面可以指定一些向量搜索的参数。
```
CALL db.vertexVectorIndexQuery('person','embedding', [1.0,2.0,3.0,4.0], {top_k:2, hnsw_ef_search:10})
yield node return node
```
根据向量搜索出点,返回`age`小于30的
```
CALL db.vertexVectorIndexQuery('person','embedding',[1.0,2.0,3.0,4.0], {top_k:2, hnsw_ef_search:10})
yield node where node.age < 30 return node
```
根据向量搜索出点,返回age小于30的点,然后再查这些点的一度邻居是谁。
```
CALL db.vertexVectorIndexQuery('person','embedding',[1.0,2.0,3.0,4.0], {top_k:2, hnsw_ef_search:10})
yield node where node.age < 30 with node as p
match(p)-[r]->(m) return m
```
2 changes: 1 addition & 1 deletion docs/zh-CN/source/development_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ CALL db.deleteIndex('node1', 'field1')
"detach_property": true,
"constraints": [
["node1", "node2"]
]
],
"properties": [{
"name": "id",
"type": "INT32",
Expand Down
15 changes: 5 additions & 10 deletions include/lgraph/lgraph_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ class GraphDB {
* level.
* @exception InputError Thrown if label:field does not exist, or not
* indexable.
*
* @param is_vertex vertex or edge.
* @param label The label.
* @param field The field.
* @param is_unique True if the field content is unique for each vertex.
Expand All @@ -470,10 +470,9 @@ class GraphDB {
*
* @returns True if it succeeds, false if the index already exists.
*/
bool AddVectorIndex(const std::string& label, const std::string& field,
bool AddVectorIndex(bool is_vertex, const std::string& label, const std::string& field,
const std::string& index_type, int vec_dimension,
const std::string& distance_type, std::vector<int>& index_spec,
IndexType type);
const std::string& distance_type, std::vector<int>& index_spec);

/**
* @brief Check if this vertex_label:field is indexed.
Expand Down Expand Up @@ -544,17 +543,13 @@ class GraphDB {
* @exception WriteNotAllowed Thrown when called on a GraphDB with read-only access level.
* @exception InputError Thrown if label or field does not exist.
*
* @param is_vertex vertex or edge.
* @param label The label.
* @param field The field.
* @param index_type Type of the index
* @param vec_dimension Dimension of the vector
* @param distance_type Type of the distance
*
* @returns True if it succeeds, false if the index does not exists.
*/
bool DeleteVectorIndex(const std::string& label, const std::string& field,
const std::string& index_type, int vec_dimension,
const std::string& distance_type);
bool DeleteVectorIndex(bool is_vertex, const std::string& label, const std::string& field);

/**
* @brief Get graph description
Expand Down
3 changes: 2 additions & 1 deletion include/lgraph/lgraph_exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ X(CypherParameterTypeError, "Cypher parameter type error.") \
X(ReachMaximumEid, "Edge eid exceeds the limit.") \
X(ReachMaximumCompositeIndexField, "The size of composite index fields exceeds the limit.") \
X(PluginDisabled, "Plugin disabled!") \
X(BoltDataException, "Bolt data exception")
X(BoltDataException, "Bolt data exception") \
X(VectorIndexException, "Vector index exception")

enum class ErrorCode {
#define X(code, msg) code,
Expand Down
10 changes: 10 additions & 0 deletions include/lgraph/lgraph_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,16 @@ struct CompositeIndexSpec {
CompositeIndexType type;
};

struct VectorIndexSpec {
std::string label;
std::string field;
std::string index_type;
int dimension;
std::string distance_type;
int hnsm_m;
int hnsm_ef_construction;
};

struct EdgeUid {
EdgeUid() : src(0), dst(0), lid(0), tid(0), eid(0) {}
EdgeUid(int64_t s, int64_t d, uint16_t l, int64_t t, int64_t e)
Expand Down
1 change: 1 addition & 0 deletions src/core/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ typedef lgraph_api::IndexType IndexType;
typedef lgraph_api::IndexSpec IndexSpec;
typedef lgraph_api::CompositeIndexType CompositeIndexType;
typedef lgraph_api::CompositeIndexSpec CompositeIndexSpec;
typedef lgraph_api::VectorIndexSpec VectorIndexSpec;
typedef lgraph_api::FieldSpec FieldSpec;
typedef lgraph_api::EdgeUid EdgeUid;
typedef lgraph_api::Date Date;
Expand Down
2 changes: 1 addition & 1 deletion src/core/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ static const char* const EDGE_FULLTEXT_INDEX = "edge_fulltext";
static const char* const VERTEX_INDEX = "vertex_index";
static const char* const COMPOSITE_INDEX = "composite_index";
static const char* const EDGE_INDEX = "edge_index";
static const char* const VECTOR_INDEX = "vector_index";
static const char* const VERTEX_VECTOR_INDEX = "vertex_vector_index";
static const char* const USER_TABLE_NAME = "_user_table_";
static const char* const ROLE_TABLE_NAME = "_role_table_";
static const char* const GRAPH_CONFIG_TABLE_NAME = "_graph_config_table_";
Expand Down
24 changes: 3 additions & 21 deletions src/core/field_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class FieldExtractor {
// fulltext index
bool fulltext_indexed_ = false;
// vector index
std::unique_ptr<VectorIndex> vector_index_;
std::shared_ptr<VectorIndex> vector_index_;

public:
FieldExtractor() : null_bit_off_(0), vertex_index_(nullptr),
Expand All @@ -74,16 +74,7 @@ class FieldExtractor {
vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr);
edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr);
fulltext_indexed_ = rhs.fulltext_indexed_;
if (rhs.vector_index_ != nullptr) {
if (rhs.vector_index_->GetIndexType() == "HNSW") {
vector_index_.reset(new HNSW(
dynamic_cast<HNSW&>(*rhs.vector_index_)));
} else {
vector_index_.reset(nullptr);
}
} else {
vector_index_.reset(nullptr);
}
vector_index_ = rhs.vector_index_;
}

FieldExtractor& operator=(const FieldExtractor& rhs) {
Expand All @@ -97,16 +88,7 @@ class FieldExtractor {
vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr);
edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr);
fulltext_indexed_ = rhs.fulltext_indexed_;
if (rhs.vector_index_ != nullptr) {
if (rhs.vector_index_->GetIndexType() == "HNSW") {
vector_index_.reset(new HNSW(
dynamic_cast<HNSW&>(*rhs.vector_index_)));
} else {
vector_index_.reset(nullptr);
}
} else {
vector_index_.reset(nullptr);
}
vector_index_ = rhs.vector_index_;
return *this;
}

Expand Down
110 changes: 38 additions & 72 deletions src/core/index_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ IndexManager::IndexManager(KvTransaction& txn, SchemaManager* v_schema_manager,
size_t c_index_len = strlen(_detail::COMPOSITE_INDEX);
size_t v_ft_index_len = strlen(_detail::VERTEX_FULLTEXT_INDEX);
size_t e_ft_index_len = strlen(_detail::EDGE_FULLTEXT_INDEX);
size_t vector_index_len = strlen(_detail::VECTOR_INDEX);
size_t vector_index_len = strlen(_detail::VERTEX_VECTOR_INDEX);
auto it = index_list_table_->GetIterator(txn);
for (it->GotoFirstKey(); it->IsValid(); it->Next()) {
std::string index_name = it->GetKey().AsString();
Expand Down Expand Up @@ -95,46 +95,19 @@ IndexManager::IndexManager(KvTransaction& txn, SchemaManager* v_schema_manager,
schema->SetCompositeIndex(idx.field_names, index.release());
} else if (index_name.size() > vector_index_len &&
index_name.substr(index_name.size() - vector_index_len) ==
_detail::VECTOR_INDEX) {
_detail::IndexEntry idx = LoadIndex(it->GetValue());
FMA_DBG_CHECK_EQ(idx.table_name, it->GetKey().AsString());
_detail::VERTEX_VECTOR_INDEX) {
_detail::VectorIndexEntry idx = LoadVectorIndex(it->GetValue());
Schema* schema = v_schema_manager->GetSchema(idx.label);
FMA_DBG_ASSERT(schema);
std::vector<std::string> vector_index;
std::regex re(R"(_@lgraph@_|vector_index)");
auto words_begin = std::sregex_token_iterator(index_name.begin(),
index_name.end(), re, -1,
std::regex_constants::match_not_bol |
std::regex_constants::match_not_eol);
auto words_end = std::sregex_token_iterator();
for (std::sregex_token_iterator i = words_begin; i != words_end; ++i) {
if (!i->str().empty()) {
vector_index.emplace_back(i->str());
}
}
auto label = vector_index[0];
auto field = vector_index[1];
auto index_type = vector_index[2];
auto distance_type = vector_index[4];
int vec_dimension = std::stoi(vector_index[3]);
std::vector<int> index_spec;
std::regex pattern("-?[0-9]+\\.?[0-9]*");
std::sregex_iterator begin_it(vector_index[5].begin(),
vector_index[5].end(), pattern), end_it;
while (begin_it != end_it) {
std::smatch match = *begin_it;
index_spec.push_back(std::stof(match.str()));
++begin_it;
}
FMA_DBG_ASSERT(index_type == "HNSW");
FMA_DBG_ASSERT(schema->DetachProperty());
LOG_INFO() << FMA_FMT("start building vertex index for {}:{} in detached model",
label, field);
idx.label, idx.field);
const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(idx.field);
FMA_DBG_ASSERT(extractor);
std::unique_ptr<VectorIndex> vsag_index;
vsag_index.reset(dynamic_cast<lgraph::VectorIndex*> (
new HNSW(label, field, distance_type, index_type, vec_dimension, index_spec)));
new HNSW(idx.label, idx.field, idx.distance_type, idx.index_type,
idx.dimension, {idx.hnsm_m, idx.hnsm_ef_construction})));
uint64_t count = 0;
std::vector<std::vector<float>> floatvector;
std::vector<int64_t> vids;
Expand All @@ -154,16 +127,9 @@ IndexManager::IndexManager(KvTransaction& txn, SchemaManager* v_schema_manager,
vsag_index->Add(floatvector, vids, count);
kv_iter.reset();
LOG_DEBUG() << "index count: " << count;

std::unique_ptr<VertexIndex> vertex_index;
vertex_index = std::make_unique<VertexIndex>(
nullptr, extractor->Type(), IndexType::NonuniqueIndex);
vertex_index->SetReady();
schema->MarkVertexIndexed(extractor->GetFieldId(), vertex_index.release());
schema->MarkVectorIndexed(extractor->GetFieldId(), vsag_index.release());

LOG_INFO() << FMA_FMT("end building vector index for {}:{} in detached model",
label, field);
idx.label, idx.field);
} else {
LOG_ERROR() << "Unknown index type: " << index_name;
}
Expand Down Expand Up @@ -211,29 +177,25 @@ bool IndexManager::AddVertexIndex(KvTransaction& txn, const std::string& label,
bool IndexManager::AddVectorIndex(KvTransaction& txn, const std::string& label,
const std::string& field, const std::string& index_type,
int vec_dimension, const std::string& distance_type,
std::vector<int>& index_spec, FieldType dt, IndexType type,
std::unique_ptr<VertexIndex>& index,
std::vector<int>& index_spec,
std::unique_ptr<VectorIndex>& vector_index) {
_detail::IndexEntry idx;
_detail::VectorIndexEntry idx;
idx.label = label;
idx.field = field;
idx.table_name = GetVectorIndexTableName(label, field, index_type,
vec_dimension, distance_type, index_spec);
idx.type = type;

auto it = index_list_table_->GetIterator(txn, Value::ConstRef(idx.table_name));
idx.index_type = index_type;
idx.dimension = vec_dimension;
idx.distance_type = distance_type;
idx.hnsm_m = index_spec[0];
idx.hnsm_ef_construction = index_spec[1];
auto table_name = GetVertexVectorIndexTableName(label, field);
auto it = index_list_table_->GetIterator(txn, Value::ConstRef(table_name));
if (it->IsValid()) return false; // already exist

Value idxv;
StoreIndex(idx, idxv);
it->AddKeyValue(Value::ConstRef(idx.table_name), idxv);

index = std::make_unique<VertexIndex>(nullptr, dt, type); // no need to creates index table

if (index_type == "HNSW") {
vector_index.reset(dynamic_cast<lgraph::VectorIndex*> (new HNSW(label, field, distance_type,
index_type, vec_dimension, index_spec)));
}
StoreVectorIndex(idx, idxv);
it->AddKeyValue(Value::ConstRef(table_name), idxv);
vector_index = std::make_unique<HNSW>(label, field, distance_type,
index_type, vec_dimension, index_spec);
return true;
}

Expand Down Expand Up @@ -330,31 +292,35 @@ bool IndexManager::DeleteVertexCompositeIndex(lgraph::KvTransaction& txn,
}

bool IndexManager::DeleteVectorIndex(KvTransaction& txn, const std::string& label,
const std::string& field, const std::string& index_type,
int vec_dimension, const std::string& distance_type) {
std::string closest_table_name = label + _detail::NAME_SEPARATOR + field +
_detail::NAME_SEPARATOR + index_type + _detail::NAME_SEPARATOR +
std::to_string(vec_dimension) + _detail::NAME_SEPARATOR;
auto table_name = (index_list_table_->GetClosestIterator(txn,
Value::ConstRef(closest_table_name))->GetKey()).AsString();
// delete the entry from index list table
const std::string& field) {
auto table_name = GetVertexVectorIndexTableName(label, field);
if (!index_list_table_->DeleteKey(txn, Value::ConstRef(table_name)))
return false; // does not exist
// now delete the index table
return true;
}

bool IndexManager::GetVectorIndexListTableName(KvTransaction& txn,
std::vector<std::string>& table_name) {
std::vector<VectorIndexSpec> IndexManager::ListVectorIndex(KvTransaction& txn) {
std::vector<VectorIndexSpec> ret;
auto it = index_list_table_->GetIterator(txn);
for (it->GotoFirstKey(); it->IsValid(); it->Next()) {
auto key = it->GetKey();
auto val = it->GetValue();
auto name = key.AsString();
auto find = name.find(_detail::VECTOR_INDEX);
auto find = name.find(_detail::VERTEX_VECTOR_INDEX);
if (find != std::string::npos) {
table_name.emplace_back(name);
auto vi = LoadVectorIndex(val);
VectorIndexSpec vs;
vs.label = vi.label;
vs.field = vi.field;
vs.index_type = vi.index_type;
vs.dimension = vi.dimension;
vs.distance_type = vi.distance_type;
vs.hnsm_m = vi.hnsm_m;
vs.hnsm_ef_construction = vi.hnsm_ef_construction;
ret.emplace_back(vs);
}
}
return !table_name.empty();
return ret;
}

} // namespace lgraph
Loading

0 comments on commit 0579709

Please sign in to comment.