Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Trace v3 schema migration #447

Merged
merged 3 commits into from
Nov 10, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
357 changes: 357 additions & 0 deletions cmd/signozschemamigrator/schema_migrator/traces_migrations.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,360 @@
package schemamigrator

var TracesMigrations = []SchemaMigrationRecord{}

// move them to TracesMigrations once it's ready to deploy
var TracesMigrations_Staging = []SchemaMigrationRecord{
{
MigrationID: 1000,
UpItems: []Operation{
CreateTableOperation{
Database: "signoz_traces",
Table: "signoz_index_v3",
Columns: []Column{
{Name: "ts_bucket_start", Type: ColumnTypeUInt64, Codec: "DoubleDelta, LZ4"},
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved
{Name: "resource_fingerprint", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "timestamp", Type: DateTime64ColumnType{Precision: 9}, Codec: "DoubleDelta, LZ4"},
{Name: "id", Type: FixedStringColumnType{Length: 27}, Codec: "ZSTD"},
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved
{Name: "traceID", Type: FixedStringColumnType{Length: 32}, Codec: "ZSTD(1)"},
{Name: "spanID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "traceState", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "parentSpanID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "flags", Type: ColumnTypeUInt32, Codec: "T64, ZSTD(1)"},
{Name: "name", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "kind", Type: ColumnTypeInt8, Codec: "T64, ZSTD(1)"},
{Name: "spanKind", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "durationNano", Type: ColumnTypeUInt64, Codec: "T64, ZSTD(1)"},
{Name: "statusCode", Type: ColumnTypeInt16, Codec: "T64, ZSTD(1)"},
{Name: "statusMessage", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "statusCodeString", Type: ColumnTypeString, Codec: "ZSTD(1)"},
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved

{Name: "attributes_string", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "attributes_number", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeFloat64}, Codec: "ZSTD(1)"},
{Name: "attributes_bool", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeBool}, Codec: "ZSTD(1)"},
{Name: "resources_string", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeString}, Codec: "ZSTD(1)"},

{Name: "events", Type: ArrayColumnType{ColumnTypeString}, Codec: "ZSTD(2)"},
srikanthccv marked this conversation as resolved.
Show resolved Hide resolved

{Name: "serviceName", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "references", Type: ColumnTypeString, Codec: "ZSTD(1)"},
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved

// custom composite columns
{Name: "responseStatusCode", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "externalHttpUrl", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpUrl", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "externalHttpMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpHost", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbName", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbOperation", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "hasError", Type: ColumnTypeBool, Codec: "T64, ZSTD(1)"},
{Name: "isRemote", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},

// attribute cols
{Name: "httpRoute", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "msgSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "msgOperation", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcService", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "peerService", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},

// alias for attribute cols
{Name: "attribute_string_http$$route", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "httpRoute"},
{Name: "attribute_string_messaging$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "msgSystem"},
{Name: "attribute_string_messaging$$operation", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "msgOperation"},
{Name: "attribute_string_db$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "dbSystem"},
{Name: "attribute_string_rpc$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcSystem"},
{Name: "attribute_string_rpc$$service", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcService"},
{Name: "attribute_string_rpc$$method", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcMethod"},
{Name: "attribute_string_peer$$service", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "peerService"},
},
Indexes: []Index{
{Name: "idx_id", Expression: "id", Type: "minmax", Granularity: 1},
{Name: "idx_traceID", Expression: "traceID", Type: "tokenbf_v1(10000, 5,0)", Granularity: 1},
{Name: "idx_spanID", Expression: "spanID", Type: "tokenbf_v1(5000, 5,0)", Granularity: 1},
{Name: "idx_duration", Expression: "durationNano", Type: "minmax", Granularity: 1},
{Name: "idx_name", Expression: "name", Type: "ngrambf_v1(4, 5000, 2, 0)", Granularity: 1},
{Name: "idx_kind", Expression: "kind", Type: "minmax", Granularity: 4},
{Name: "idx_httpRoute", Expression: "httpRoute", Type: "bloom_filter", Granularity: 4},
{Name: "idx_httpUrl", Expression: "httpUrl", Type: "bloom_filter", Granularity: 4},
{Name: "idx_httpHost", Expression: "httpHost", Type: "bloom_filter", Granularity: 4},
{Name: "idx_httpMethod", Expression: "httpMethod", Type: "bloom_filter", Granularity: 4},
{Name: "idx_timestamp", Expression: "timestamp", Type: "minmax", Granularity: 1},
{Name: "idx_rpcMethod", Expression: "rpcMethod", Type: "bloom_filter", Granularity: 4},
{Name: "idx_responseStatusCode", Expression: "responseStatusCode", Type: "set(0)", Granularity: 1},
{Name: "idx_statusCodeString", Expression: "statusCodeString", Type: "set(3)", Granularity: 4},
{Name: "idx_spanKind", Expression: "spanKind", Type: "set(5)", Granularity: 4},
{Name: "attributes_string_idx_key", Expression: "mapKeys(attributes_string)", Type: "tokenbf_v1(1024, 2, 0)", Granularity: 1},
{Name: "attributes_string_idx_val", Expression: "mapValues(attributes_string)", Type: "ngrambf_v1(4, 5000, 2, 0)", Granularity: 1},
{Name: "attributes_number_idx_key", Expression: "mapKeys(attributes_number)", Type: "tokenbf_v1(1024, 2, 0)", Granularity: 1},
{Name: "attributes_number_idx_val", Expression: "mapValues(attributes_number)", Type: "bloom_filter", Granularity: 1},
{Name: "attributes_bool_idx_key", Expression: "mapKeys(attributes_bool)", Type: "tokenbf_v1(1024, 2, 0)", Granularity: 1},
{Name: "resources_string_idx_key", Expression: "mapKeys(resources_string)", Type: "tokenbf_v1(1024, 2, 0)", Granularity: 1},
{Name: "resources_string_idx_val", Expression: "mapValues(resources_string)", Type: "ngrambf_v1(4, 5000, 2, 0)", Granularity: 1},
},
Engine: MergeTree{
PartitionBy: "toDate(timestamp)",
OrderBy: "(ts_bucket_start, resource_fingerprint, hasError, name, timestamp, id)",
TTL: "toDateTime(timestamp) + toIntervalSecond(1296000)",
Settings: TableSettings{
{Name: "index_granularity", Value: "8192"},
{Name: "ttl_only_drop_parts", Value: "1"},
},
},
},
CreateTableOperation{
Database: "signoz_traces",
Table: "distributed_signoz_index_v3",
Columns: []Column{
{Name: "ts_bucket_start", Type: ColumnTypeUInt64, Codec: "DoubleDelta, LZ4"},
{Name: "resource_fingerprint", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "timestamp", Type: DateTime64ColumnType{Precision: 9}, Codec: "DoubleDelta, LZ4"},
{Name: "id", Type: FixedStringColumnType{Length: 27}, Codec: "ZSTD"},
{Name: "traceID", Type: FixedStringColumnType{Length: 32}, Codec: "ZSTD(1)"},
{Name: "spanID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "traceState", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "parentSpanID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "flags", Type: ColumnTypeUInt32, Codec: "T64, ZSTD(1)"},
{Name: "name", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "kind", Type: ColumnTypeInt8, Codec: "T64, ZSTD(1)"},
{Name: "spanKind", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "durationNano", Type: ColumnTypeUInt64, Codec: "T64, ZSTD(1)"},
{Name: "statusCode", Type: ColumnTypeInt16, Codec: "T64, ZSTD(1)"},
{Name: "statusMessage", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "statusCodeString", Type: ColumnTypeString, Codec: "ZSTD(1)"},

{Name: "attributes_string", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "attributes_number", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeFloat64}, Codec: "ZSTD(1)"},
{Name: "attributes_bool", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeBool}, Codec: "ZSTD(1)"},
{Name: "resources_string", Type: MapColumnType{LowCardinalityColumnType{ColumnTypeString}, ColumnTypeString}, Codec: "ZSTD(1)"},

{Name: "events", Type: ArrayColumnType{ColumnTypeString}, Codec: "ZSTD(2)"},

{Name: "serviceName", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "references", Type: ColumnTypeString, Codec: "ZSTD(1)"},

// custom columns
{Name: "responseStatusCode", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "externalHttpUrl", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpUrl", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "externalHttpMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "httpHost", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbName", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbOperation", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "hasError", Type: ColumnTypeBool, Codec: "T64, ZSTD(1)"},
{Name: "isRemote", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},

// attribute cols
{Name: "httpRoute", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "msgSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "msgOperation", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "dbSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcSystem", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcService", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "rpcMethod", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},
{Name: "peerService", Type: LowCardinalityColumnType{ColumnTypeString}, Codec: "ZSTD(1)"},

// alias for attribute cols
{Name: "attribute_string_http$$route", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "httpRoute"},
{Name: "attribute_string_messaging$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "msgSystem"},
{Name: "attribute_string_messaging$$operation", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "msgOperation"},
{Name: "attribute_string_db$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "dbSystem"},
{Name: "attribute_string_rpc$$system", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcSystem"},
{Name: "attribute_string_rpc$$service", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcService"},
{Name: "attribute_string_rpc$$method", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "rpcMethod"},
{Name: "attribute_string_peer$$service", Type: LowCardinalityColumnType{ColumnTypeString}, Alias: "peerService"},
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved
},
Engine: Distributed{
Database: "signoz_traces",
Table: "signoz_index_v3",
ShardingKey: "cityHash64(traceID)",
},
},
CreateTableOperation{
Database: "signoz_traces",
Table: "traces_v3_resource",
Columns: []Column{
{Name: "labels", Type: ColumnTypeString, Codec: "ZSTD(5)"},
{Name: "fingerprint", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "seen_at_ts_bucket_start", Type: ColumnTypeInt64, Codec: "Delta(8), ZSTD(1)"},
},
Indexes: []Index{
{Name: "idx_labels", Expression: "lower(labels)", Type: "ngrambf_v1(4, 1024, 3, 0)", Granularity: 1},
{Name: "idx_labels_v1", Expression: "labels", Type: "ngrambf_v1(4, 1024, 3, 0)", Granularity: 1},
},
Engine: ReplacingMergeTree{
MergeTree: MergeTree{
PartitionBy: "toDate(seen_at_ts_bucket_start / 1000)",
OrderBy: "(labels, fingerprint, seen_at_ts_bucket_start)",
TTL: "toDateTime(seen_at_ts_bucket_start) + INTERVAL 1296000 SECOND + INTERVAL 1800 SECOND DELETE",
Settings: TableSettings{
{Name: "ttl_only_drop_parts", Value: "1"},
{Name: "index_granularity", Value: "8192"},
},
},
},
},
CreateTableOperation{
Database: "signoz_traces",
Table: "distributed_traces_v3_resource",
Columns: []Column{
{Name: "labels", Type: ColumnTypeString, Codec: "ZSTD(5)"},
{Name: "fingerprint", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "seen_at_ts_bucket_start", Type: ColumnTypeInt64, Codec: "Delta(8), ZSTD(1)"},
},
Engine: Distributed{
Database: "signoz_traces",
Table: "traces_v3_resource",
ShardingKey: "cityHash64(labels, fingerprint)",
},
},
CreateTableOperation{
Database: "signoz_traces",
Table: "trace_summary",
Columns: []Column{
{Name: "traceID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "first_reported",
Type: SimpleAggregateFunction{
FunctionName: "min",
Arguments: []ColumnType{DateTime64ColumnType{Precision: 9}},
},
Codec: "ZSTD(1)"},
{Name: "last_reported",
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved
Type: SimpleAggregateFunction{
FunctionName: "max",
Arguments: []ColumnType{DateTime64ColumnType{Precision: 9}},
},
Codec: "ZSTD(1)"},
{Name: "num_spans",
Type: SimpleAggregateFunction{
FunctionName: "sum",
Arguments: []ColumnType{ColumnTypeUInt64},
},
Codec: "ZSTD(1)"},
},
Engine: AggregatingMergeTree{
MergeTree: MergeTree{
PartitionBy: "toDate(first_reported)",
OrderBy: "(traceID)",
TTL: "toDateTime(first_reported) + toIntervalSecond(1296000)",
Settings: TableSettings{
{Name: "index_granularity", Value: "8192"},
{Name: "ttl_only_drop_parts", Value: "1"},
},
},
},
},
CreateTableOperation{
Database: "signoz_traces",
Table: "distributed_trace_summary",
Columns: []Column{
{Name: "traceID", Type: ColumnTypeString, Codec: "ZSTD(1)"},
{Name: "first_reported",
Type: SimpleAggregateFunction{
FunctionName: "min",
Arguments: []ColumnType{DateTime64ColumnType{Precision: 9}},
},
Codec: "ZSTD(1)"},
{Name: "last_reported",
Type: SimpleAggregateFunction{
FunctionName: "max",
Arguments: []ColumnType{DateTime64ColumnType{Precision: 9}},
},
Codec: "ZSTD(1)"},
{Name: "num_spans",
Type: SimpleAggregateFunction{
FunctionName: "sum",
Arguments: []ColumnType{ColumnTypeUInt64},
},
Codec: "ZSTD(1)"},
},
Engine: Distributed{
Database: "signoz_traces",
Table: "trace_summary",
ShardingKey: "cityHash64(traceID)",
},
},
CreateMaterializedViewOperation{
Database: "signoz_traces",
ViewName: "trace_summary_mv",
DestTable: "trace_summary",
Query: `SELECT
traceID,
minSimpleState(timestamp) AS first_reported,
maxSimpleState(timestamp) AS last_reported,
sumSimpleState(toUInt64(1)) AS num_spans
nityanandagohain marked this conversation as resolved.
Show resolved Hide resolved
FROM signoz_traces.signoz_index_v3
GROUP BY traceID;`,
},
ModifyQueryMaterializedViewOperation{
Database: "signoz_traces",
ViewName: "root_operations",
Query: `SELECT DISTINCT
name,
serviceName
FROM signoz_traces.signoz_index_v3
WHERE parentSpanID = ''`,
},
ModifyQueryMaterializedViewOperation{
Database: "signoz_traces",
ViewName: "sub_root_operations",
Query: `SELECT DISTINCT
name,
serviceName
FROM signoz_traces.signoz_index_v3 AS A, signoz_traces.signoz_index_v3 AS B
WHERE (A.serviceName != B.serviceName) AND (A.parentSpanID = B.spanID)`,
},
},
DownItems: []Operation{
DropTableOperation{
Database: "signoz_traces",
Table: "signoz_index_v3",
},
DropTableOperation{
Database: "signoz_traces",
Table: "distributed_signoz_index_v3",
},
DropTableOperation{
Database: "signoz_traces",
Table: "traces_v3_resource",
},
DropTableOperation{
Database: "signoz_traces",
Table: "distributed_traces_v3_resource",
},
DropTableOperation{
Database: "signoz_traces",
Table: "trace_summary",
},
DropTableOperation{
Database: "signoz_traces",
Table: "distributed_trace_summary",
},
DropTableOperation{
Database: "signoz_traces",
Table: "trace_summary_mv",
},
ModifyQueryMaterializedViewOperation{
Database: "signoz_traces",
ViewName: "root_operations",
Query: `SELECT DISTINCT
name,
serviceName
FROM signoz_traces.signoz_index_v2
WHERE parentSpanID = ''`,
},
ModifyQueryMaterializedViewOperation{
Database: "signoz_traces",
ViewName: "sub_root_operations",
Query: `SELECT DISTINCT
name,
serviceName
FROM signoz_traces.signoz_index_v2 AS A, signoz_traces.signoz_index_v2 AS B
WHERE (A.serviceName != B.serviceName) AND (A.parentSpanID = B.spanID)`,
},
},
},
}