From 8fca034b463990b9fe0d8181ed54d36f3e7133f6 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 11:22:56 +0530 Subject: [PATCH 01/16] feat: init dedicated stanza operator registry for signozlogspipelineprocessor --- .../signozlogspipelineprocessor/config.go | 15 +++++-- .../config_test.go | 9 ++-- .../signozlogspipelineprocessor/factory.go | 9 ++-- .../factory_test.go | 12 ++--- .../signozlogspipelineprocessor/processor.go | 2 +- .../stanza/README.md | 1 + .../stanza/adapter/config.go | 9 ++++ .../stanza/operator/config.go | 44 +++++++++++++++++++ .../stanza/operator/registry.go | 5 +++ 9 files changed, 87 insertions(+), 19 deletions(-) create mode 100644 processor/signozlogspipelineprocessor/stanza/README.md create mode 100644 processor/signozlogspipelineprocessor/stanza/adapter/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/registry.go diff --git a/processor/signozlogspipelineprocessor/config.go b/processor/signozlogspipelineprocessor/config.go index d7819703..4b6048ba 100644 --- a/processor/signozlogspipelineprocessor/config.go +++ b/processor/signozlogspipelineprocessor/config.go @@ -4,13 +4,13 @@ package signozlogspipelineprocessor import ( "errors" + signozlogspipelinestanzaadapter "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/adapter" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "go.opentelemetry.io/collector/component" - - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/adapter" ) type Config struct { - adapter.BaseConfig `mapstructure:",squash"` + signozlogspipelinestanzaadapter.BaseConfig `mapstructure:",squash"` } var _ component.Config = (*Config)(nil) @@ -21,3 +21,12 @@ func (cfg *Config) Validate() error { } return nil } + +func (cfg *Config) OperatorConfigs() []operator.Config { + ops := []operator.Config{} + + for _, op := range cfg.BaseConfig.Operators { + ops = append(ops, operator.Config(op)) + } + return ops +} diff --git a/processor/signozlogspipelineprocessor/config_test.go b/processor/signozlogspipelineprocessor/config_test.go index 29c6cda6..99c7bba2 100644 --- a/processor/signozlogspipelineprocessor/config_test.go +++ b/processor/signozlogspipelineprocessor/config_test.go @@ -10,11 +10,12 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap/confmaptest" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/adapter" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/regex" + + signozlogspipelinestanzaadapter "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/adapter" + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" ) func TestLoadConfig(t *testing.T) { @@ -24,8 +25,8 @@ func TestLoadConfig(t *testing.T) { cfg := factory.CreateDefaultConfig() assert.NoError(t, component.UnmarshalConfig(cm, cfg)) assert.Equal(t, &Config{ - BaseConfig: adapter.BaseConfig{ - Operators: []operator.Config{ + BaseConfig: signozlogspipelinestanzaadapter.BaseConfig{ + Operators: []signozlogspipelinestanzaoperator.Config{ { Builder: func() *regex.Config { cfg := regex.NewConfig() diff --git a/processor/signozlogspipelineprocessor/factory.go b/processor/signozlogspipelineprocessor/factory.go index 1c0ca21d..abe5805b 100644 --- a/processor/signozlogspipelineprocessor/factory.go +++ b/processor/signozlogspipelineprocessor/factory.go @@ -7,13 +7,12 @@ import ( "errors" "fmt" + signozlogspipelinestanzaadapter "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/adapter" + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/processor" "go.opentelemetry.io/collector/processor/processorhelper" - - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/adapter" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" ) func NewFactory() processor.Factory { @@ -26,8 +25,8 @@ func NewFactory() processor.Factory { // Note: This isn't a valid configuration (no operators would lead to no work being done) func createDefaultConfig() component.Config { return &Config{ - BaseConfig: adapter.BaseConfig{ - Operators: []operator.Config{}, + BaseConfig: signozlogspipelinestanzaadapter.BaseConfig{ + Operators: []signozlogspipelinestanzaoperator.Config{}, }, } } diff --git a/processor/signozlogspipelineprocessor/factory_test.go b/processor/signozlogspipelineprocessor/factory_test.go index 4e62ea0b..ed20fe90 100644 --- a/processor/signozlogspipelineprocessor/factory_test.go +++ b/processor/signozlogspipelineprocessor/factory_test.go @@ -5,14 +5,14 @@ import ( "context" "testing" + signozlogspipelinestanzaadapter "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/adapter" + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer/consumertest" "go.opentelemetry.io/collector/processor/processortest" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/adapter" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/regex" ) @@ -27,8 +27,8 @@ func TestCreateDefaultConfig(t *testing.T) { func TestCreateProcessor(t *testing.T) { factory := NewFactory() cfg := &Config{ - BaseConfig: adapter.BaseConfig{ - Operators: []operator.Config{ + BaseConfig: signozlogspipelinestanzaadapter.BaseConfig{ + Operators: []signozlogspipelinestanzaoperator.Config{ { Builder: func() *regex.Config { cfg := regex.NewConfig() @@ -57,8 +57,8 @@ func TestCreateProcessor(t *testing.T) { func TestInvalidOperators(t *testing.T) { factory := NewFactory() cfg := &Config{ - BaseConfig: adapter.BaseConfig{ - Operators: []operator.Config{ + BaseConfig: signozlogspipelinestanzaadapter.BaseConfig{ + Operators: []signozlogspipelinestanzaoperator.Config{ { // invalid due to missing regex Builder: regex.NewConfig(), diff --git a/processor/signozlogspipelineprocessor/processor.go b/processor/signozlogspipelineprocessor/processor.go index 056ce2f5..a0375474 100644 --- a/processor/signozlogspipelineprocessor/processor.go +++ b/processor/signozlogspipelineprocessor/processor.go @@ -28,7 +28,7 @@ func newLogsPipelineProcessor( } stanzaPipeline, err := pipeline.Config{ - Operators: processorConfig.BaseConfig.Operators, + Operators: processorConfig.OperatorConfigs(), DefaultOutput: sink, }.Build(telemetrySettings) if err != nil { diff --git a/processor/signozlogspipelineprocessor/stanza/README.md b/processor/signozlogspipelineprocessor/stanza/README.md new file mode 100644 index 00000000..d6913649 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/README.md @@ -0,0 +1 @@ +# Copy of stanza components used for powering signozlogspipelineprocessor \ No newline at end of file diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/config.go b/processor/signozlogspipelineprocessor/stanza/adapter/config.go new file mode 100644 index 00000000..814498cc --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/adapter/config.go @@ -0,0 +1,9 @@ +package signozlogspipelinestanzaadapter + +import ( + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" +) + +type BaseConfig struct { + Operators []signozlogspipelinestanzaoperator.Config `mapstructure:"operators"` +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/config.go b/processor/signozlogspipelineprocessor/stanza/operator/config.go new file mode 100644 index 00000000..81a31d69 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/config.go @@ -0,0 +1,44 @@ +package signozlogspipelinestanzaoperator + +import ( + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "go.opentelemetry.io/collector/confmap" +) + +// Config is the configuration of an operator +type Config struct { + operator.Builder +} + +// NewConfig wraps the builder interface in a concrete struct +func NewConfig(b operator.Builder) Config { + return Config{Builder: b} +} + +func (c *Config) Unmarshal(component *confmap.Conf) error { + if !component.IsSet("type") { + return fmt.Errorf("missing required field 'type'") + } + + typeInterface := component.Get("type") + + typeString, ok := typeInterface.(string) + if !ok { + return fmt.Errorf("non-string type %T for field 'type'", typeInterface) + } + + builderFunc, ok := SignozStanzaOperatorsRegistry.Lookup(typeString) + if !ok { + return fmt.Errorf("unsupported type '%s'", typeString) + } + + builder := builderFunc() + if err := component.Unmarshal(builder, confmap.WithIgnoreUnused()); err != nil { + return fmt.Errorf("unmarshal to %s: %w", typeString, err) + } + + c.Builder = builder + return nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/registry.go b/processor/signozlogspipelineprocessor/stanza/operator/registry.go new file mode 100644 index 00000000..197d6ec4 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/registry.go @@ -0,0 +1,5 @@ +package signozlogspipelinestanzaoperator + +import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + +var SignozStanzaOperatorsRegistry = operator.NewRegistry() From 176844e0e9fa92bcc0bb16ec7818ee542e9c114f Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 11:40:14 +0530 Subject: [PATCH 02/16] feat: bring in acopy of the stanza operator and get test passing --- .../stanza/adapter/config.go | 1 + .../stanza/adapter/register_ops.go | 6 + .../stanza/operator/config.go | 4 + .../stanza/operator/operators/copy/config.go | 61 ++++ .../operator/operators/copy/config_test.go | 74 +++++ .../operator/operators/copy/package_test.go | 13 + .../operators/copy/testdata/config.yaml | 24 ++ .../operator/operators/copy/transformer.go | 32 ++ .../operators/copy/transformer_test.go | 307 ++++++++++++++++++ .../stanza/operator/registry.go | 13 + 10 files changed, 535 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/testdata/config.yaml create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer_test.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/config.go b/processor/signozlogspipelineprocessor/stanza/adapter/config.go index 814498cc..4598a372 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/config.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/config.go @@ -5,5 +5,6 @@ import ( ) type BaseConfig struct { + // Using our own version of Config allows using a dedicated registry of stanza ops for logs pipelines. Operators []signozlogspipelinestanzaoperator.Config `mapstructure:"operators"` } diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go new file mode 100644 index 00000000..27658227 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -0,0 +1,6 @@ +// Register copies of stanza operators dedicated to signoz logs pipelines +package signozlogspipelinestanzaadapter + +import ( + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" +) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/config.go b/processor/signozlogspipelineprocessor/stanza/operator/config.go index 81a31d69..5a60a954 100644 --- a/processor/signozlogspipelineprocessor/stanza/operator/config.go +++ b/processor/signozlogspipelineprocessor/stanza/operator/config.go @@ -1,3 +1,7 @@ +// Mostly Brought in as is from opentelemetry-collector-contrib +// Maintaining our own copy/version of Config allows us to use our own +// registry of stanza operators in Config.Unmarshal + package signozlogspipelinestanzaoperator import ( diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config.go new file mode 100644 index 00000000..3fa82fa7 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config.go @@ -0,0 +1,61 @@ +// Brought in as is from opentelemetry-collector-contrib + +package copy + +import ( + "fmt" + + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "copy" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new copy operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new copy operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a copy operator +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + From entry.Field `mapstructure:"from"` + To entry.Field `mapstructure:"to"` +} + +// Build will build a copy operator from the supplied configuration +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + if c.From == entry.NewNilField() { + return nil, fmt.Errorf("copy: missing from field") + } + + if c.To == entry.NewNilField() { + return nil, fmt.Errorf("copy: missing to field") + } + + return &Transformer{ + TransformerOperator: transformerOperator, + From: c.From, + To: c.To, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go new file mode 100644 index 00000000..efec79aa --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go @@ -0,0 +1,74 @@ +// Brought in as is from opentelemetry-collector-contrib +package copy + +import ( + "path/filepath" + "testing" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/operatortest" +) + +// test unmarshalling of values into config struct +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "body_to_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("key2") + return cfg + }(), + }, + { + Name: "body_to_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewAttributeField("key2") + return cfg + }(), + }, + { + Name: "attribute_to_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("key") + cfg.To = entry.NewResourceField("key2") + return cfg + }(), + }, + { + Name: "attribute_to_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("key") + cfg.To = entry.NewBodyField("key2") + return cfg + }(), + }, + { + Name: "attribute_to_nested_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("key") + cfg.To = entry.NewAttributeField("one", "two", "three") + return cfg + }(), + }, + { + Name: "resource_to_nested_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewResourceField("key") + cfg.To = entry.NewResourceField("one", "two", "three") + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/package_test.go new file mode 100644 index 00000000..10685bb1 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package copy + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/testdata/config.yaml new file mode 100644 index 00000000..70b22e9b --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/testdata/config.yaml @@ -0,0 +1,24 @@ +attribute_to_body: + type: copy + from: attributes.key + to: body.key2 +attribute_to_nested_attribute: + type: copy + from: attributes.key + to: attributes.one.two.three +attribute_to_resource: + type: copy + from: attributes.key + to: resource.key2 +body_to_attribute: + type: copy + from: body.key + to: attributes.key2 +body_to_body: + type: copy + from: body.key + to: body.key2 +resource_to_nested_resource: + type: copy + from: resource.key + to: resource.one.two.three diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer.go new file mode 100644 index 00000000..db856d97 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer.go @@ -0,0 +1,32 @@ +// Brought in as is from opentelemetry-collector-contrib + +package copy + +import ( + "context" + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer copies a value from one field and creates a new field with that value +type Transformer struct { + helper.TransformerOperator + From entry.Field + To entry.Field +} + +// Process will process an entry with a copy transformation. +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + return t.ProcessWith(ctx, entry, t.Transform) +} + +// Transform will apply the copy operation to an entry +func (t *Transformer) Transform(e *entry.Entry) error { + val, exist := t.From.Get(e) + if !exist { + return fmt.Errorf("copy: from field does not exist in this entry: %s", t.From.String()) + } + return t.To.Set(e, val) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer_test.go new file mode 100644 index 00000000..6ee55b32 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/transformer_test.go @@ -0,0 +1,307 @@ +// Brought in as is from opentelemetry-collector-contrib +package copy + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +type testCase struct { + name string + expectErr bool + op *Config + input func() *entry.Entry + output func() *entry.Entry +} + +// Test building and processing a Config +func TestBuildAndProcess(t *testing.T) { + now := time.Now() + newTestEntry := func() *entry.Entry { + e := entry.New() + e.ObservedTimestamp = now + e.Timestamp = time.Unix(1586632809, 0) + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + } + + cases := []testCase{ + { + "body_to_body", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("key2") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "key2": "val", + } + return e + }, + }, + { + "nested_to_body", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nestedkey") + cfg.To = entry.NewBodyField("key2") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "key2": "nestedval", + } + return e + }, + }, + { + "body_to_nested", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("nested", "key2") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + "key2": "val", + }, + } + return e + }, + }, + { + "body_to_attribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewAttributeField("key2") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + e.Attributes = map[string]any{"key2": "val"} + return e + }, + }, + { + "body_to_nested_attribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField() + cfg.To = entry.NewAttributeField("one", "two") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + }, + }, + } + return e + }, + }, + { + "body_to_nested_resource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField() + cfg.To = entry.NewResourceField("one", "two") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + }, + }, + } + return e + }, + }, + { + "attribute_to_body", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("key") + cfg.To = entry.NewBodyField("key2") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"key": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "key2": "val", + } + e.Attributes = map[string]any{"key": "val"} + return e + }, + }, + { + "attribute_to_resource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("key") + cfg.To = entry.NewResourceField("key2") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"key": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"key": "val"} + e.Resource = map[string]any{"key2": "val"} + return e + }, + }, + { + "overwrite", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("nested") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": "val", + } + return e + }, + }, + { + "invalid_copy_to_attribute_root", + true, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewAttributeField() + return cfg + }(), + newTestEntry, + nil, + }, + { + "invalid_copy_to_resource_root", + true, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewResourceField() + return cfg + }(), + newTestEntry, + nil, + }, + { + "invalid_key", + true, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("nonexistentkey") + cfg.To = entry.NewResourceField("key2") + return cfg + }(), + newTestEntry, + nil, + }, + } + + for _, tc := range cases { + t.Run("BuildAndProcess/"+tc.name, func(t *testing.T) { + cfg := tc.op + cfg.OutputIDs = []string{"fake"} + cfg.OnError = "drop" + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + cp := op.(*Transformer) + fake := testutil.NewFakeOutput(t) + require.NoError(t, cp.SetOutputs([]operator.Operator{fake})) + val := tc.input() + err = cp.Process(context.Background(), val) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + fake.ExpectEntry(t, tc.output()) + } + }) + } +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/registry.go b/processor/signozlogspipelineprocessor/stanza/operator/registry.go index 197d6ec4..edfce117 100644 --- a/processor/signozlogspipelineprocessor/stanza/operator/registry.go +++ b/processor/signozlogspipelineprocessor/stanza/operator/registry.go @@ -1,5 +1,18 @@ +// Stanza operators registry dedicated to Signoz logs pipelines + package signozlogspipelinestanzaoperator import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" var SignozStanzaOperatorsRegistry = operator.NewRegistry() + +// Register will register an operator in the default registry +func Register(operatorType string, newBuilder func() operator.Builder) { + SignozStanzaOperatorsRegistry.Register(operatorType, newBuilder) +} + +// Lookup looks up a given operator type.Its second return value will +// be false if no builder is registered for that type. +func Lookup(configType string) (func() operator.Builder, bool) { + return SignozStanzaOperatorsRegistry.Lookup(configType) +} From dcf145d140b73a5ffbbca4a43b824b6cec43a503 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 13:57:44 +0530 Subject: [PATCH 03/16] feat: bring in add operator and bring in operatortest to get tests passing --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/add/config.go | 68 ++++ .../operator/operators/add/config_test.go | 144 +++++++ .../operator/operators/add/package_test.go | 13 + .../operators/add/testdata/config.yaml | 58 +++ .../operator/operators/add/transformer.go | 50 +++ .../operators/add/transformer_test.go | 360 ++++++++++++++++++ .../operator/operators/copy/config_test.go | 2 +- .../stanza/operator/operatortest/confmap.go | 67 ++++ 9 files changed, 762 insertions(+), 1 deletion(-) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/testdata/config.yaml create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operatortest/confmap.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 27658227..40f54452 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -2,5 +2,6 @@ package signozlogspipelinestanzaadapter import ( + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config.go new file mode 100644 index 00000000..f4fe6310 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config.go @@ -0,0 +1,68 @@ +// Brought in as is from opentelemetry-collector-contrib + +package add + +import ( + "fmt" + "strings" + + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "add" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new add operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new add operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + } +} + +// Config is the configuration of an add operator +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + Field entry.Field `mapstructure:"field"` + Value any `mapstructure:"value,omitempty"` +} + +// Build will build an add operator from the supplied configuration +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + addOperator := &Transformer{ + TransformerOperator: transformerOperator, + Field: c.Field, + } + strVal, ok := c.Value.(string) + if !ok || !isExpr(strVal) { + addOperator.Value = c.Value + return addOperator, nil + } + exprStr := strings.TrimPrefix(strVal, "EXPR(") + exprStr = strings.TrimSuffix(exprStr, ")") + + compiled, err := helper.ExprCompile(exprStr) + if err != nil { + return nil, fmt.Errorf("failed to compile expression '%s': %w", c.IfExpr, err) + } + + addOperator.program = compiled + return addOperator, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config_test.go new file mode 100644 index 00000000..429888e6 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/config_test.go @@ -0,0 +1,144 @@ +// Brought in as is from opentelemetry-collector-contrib + +package add + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "add_value", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = "randomMessage" + return cfg + }(), + }, + { + Name: "add_expr", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = `EXPR(body.key + "_suffix")` + return cfg + }(), + }, + { + Name: "add_nest", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = map[string]any{ + "nest": map[string]any{"key": "val"}, + } + return cfg + }(), + }, + { + Name: "add_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("new") + cfg.Value = "newVal" + return cfg + }(), + }, + { + Name: "add_nested_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("one", "two") + cfg.Value = "newVal" + return cfg + }(), + }, + { + Name: "add_nested_obj_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("one", "two") + cfg.Value = map[string]any{ + "nest": map[string]any{"key": "val"}, + } + return cfg + }(), + }, + { + Name: "add_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = "newVal" + return cfg + }(), + }, + { + Name: "add_nested_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("one", "two") + cfg.Value = "newVal" + return cfg + }(), + }, + { + Name: "add_nested_obj_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("one", "two") + cfg.Value = map[string]any{ + "nest": map[string]any{"key": "val"}, + } + return cfg + }(), + }, + { + Name: "add_resource_expr", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = `EXPR(body.key + "_suffix")` + return cfg + }(), + }, + { + Name: "add_array_to_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = []any{1, 2, 3, 4} + return cfg + }(), + }, + { + Name: "add_array_to_attributes", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("new") + cfg.Value = []any{1, 2, 3, 4} + return cfg + }(), + }, + + { + Name: "add_array_to_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = []any{1, 2, 3, 4} + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/package_test.go new file mode 100644 index 00000000..3999b75d --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package add + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/testdata/config.yaml new file mode 100644 index 00000000..a9a12893 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/testdata/config.yaml @@ -0,0 +1,58 @@ +add_array_to_attributes: + type: add + field: attributes.new + value: [1,2,3,4] +add_array_to_body: + type: add + field: body.new + value: [1,2,3,4] +add_array_to_resource: + type: add + field: resource.new + value: [1,2,3,4] +add_attribute: + type: add + field: attributes.new + value: newVal +add_expr: + type: add + field: body.new + value: EXPR(body.key + "_suffix") +add_nest: + type: add + field: body.new + value: + nest: + key: val +add_nested_attribute: + type: add + field: attributes.one.two + value: newVal +add_nested_obj_attribute: + type: add + field: attributes.one.two + value: + nest: + key: val +add_nested_obj_resource: + type: add + field: resource.one.two + value: + nest: + key: val +add_nested_resource: + type: add + field: resource.one.two + value: newVal +add_resource: + type: add + field: resource.new + value: newVal +add_resource_expr: + type: add + field: resource.new + value: EXPR(body.key + "_suffix") +add_value: + type: add + field: body.new + value: randomMessage diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer.go new file mode 100644 index 00000000..4160219a --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer.go @@ -0,0 +1,50 @@ +// Brought in as is from opentelemetry-collector-contrib + +package add + +import ( + "context" + "fmt" + "strings" + + "github.com/expr-lang/expr/vm" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer is an operator that adds a string value or an expression value +type Transformer struct { + helper.TransformerOperator + + Field entry.Field + Value any + program *vm.Program +} + +// Process will process an entry with a add transformation. +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + return t.ProcessWith(ctx, entry, t.Transform) +} + +// Transform will apply the add operations to an entry +func (t *Transformer) Transform(e *entry.Entry) error { + if t.Value != nil { + return e.Set(t.Field, t.Value) + } + if t.program != nil { + env := helper.GetExprEnv(e) + defer helper.PutExprEnv(env) + + result, err := vm.Run(t.program, env) + if err != nil { + return fmt.Errorf("evaluate value_expr: %w", err) + } + return e.Set(t.Field, result) + } + return fmt.Errorf("add: missing required field 'value'") +} + +func isExpr(str string) bool { + return strings.HasPrefix(str, "EXPR(") && strings.HasSuffix(str, ")") +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer_test.go new file mode 100644 index 00000000..af8d87c3 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/add/transformer_test.go @@ -0,0 +1,360 @@ +// Brought in as is from opentelemetry-collector-contrib +package add + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +type testCase struct { + name string + op *Config + input func() *entry.Entry + output func() *entry.Entry + expectErr bool +} + +func TestProcessAndBuild(t *testing.T) { + t.Setenv("TEST_EXPR_STRING_ENV", "val") + now := time.Now() + newTestEntry := func() *entry.Entry { + e := entry.New() + e.ObservedTimestamp = now + e.Timestamp = time.Unix(1586632809, 0) + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + } + + cases := []testCase{ + { + "add_value", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = "randomMessage" + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body.(map[string]any)["new"] = "randomMessage" + return e + }, + false, + }, + { + "add_expr", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = `EXPR(body.key + "_suffix")` + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body.(map[string]any)["new"] = "val_suffix" + return e + }, + false, + }, + { + "add_nest", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = map[any]any{ + "nest": map[any]any{ + "key": "val", + }, + } + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "new": map[any]any{ + "nest": map[any]any{ + "key": "val", + }, + }, + } + return e + }, + false, + }, + { + "add_attribute", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("new") + cfg.Value = "some.attribute" + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"new": "some.attribute"} + return e + }, + false, + }, + { + "add_resource", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = "some.resource" + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"new": "some.resource"} + return e + }, + false, + }, + { + "add_resource_expr", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = `EXPR(body.key + "_suffix")` + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"new": "val_suffix"} + return e + }, + false, + }, + { + "add_int_to_body", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = 1 + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "new": 1, + } + return e + }, + false, + }, + { + "add_array_to_body", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("new") + cfg.Value = []int{1, 2, 3, 4} + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "new": []int{1, 2, 3, 4}, + } + return e + }, + false, + }, + { + "overwrite", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewBodyField("key") + cfg.Value = []int{1, 2, 3, 4} + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": []int{1, 2, 3, 4}, + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + false, + }, + { + "add_int_to_resource", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("new") + cfg.Value = 1 + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "new": 1, + } + return e + }, + false, + }, + { + "add_int_to_attributes", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("new") + cfg.Value = 1 + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "new": 1, + } + return e + }, + false, + }, + { + "add_nested_to_attributes", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("one", "two") + cfg.Value = map[string]any{ + "new": 1, + } + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "new": 1, + }, + }, + } + return e + }, + false, + }, + { + "add_nested_to_resource", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewResourceField("one", "two") + cfg.Value = map[string]any{ + "new": 1, + } + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "new": 1, + }, + }, + } + return e + }, + false, + }, + { + "add_expr", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("fookey") + cfg.Value = "EXPR('foo_' + body.key)" + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "fookey": "foo_val", + } + return e + }, + false, + }, + { + "add_expr_env", + func() *Config { + cfg := NewConfig() + cfg.Field = entry.NewAttributeField("fookey") + cfg.Value = "EXPR('foo_' + env('TEST_EXPR_STRING_ENV'))" + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "fookey": "foo_val", + } + return e + }, + false, + }, + } + for _, tc := range cases { + t.Run("BuildandProcess/"+tc.name, func(t *testing.T) { + cfg := tc.op + cfg.OutputIDs = []string{"fake"} + cfg.OnError = "drop" + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + add := op.(*Transformer) + fake := testutil.NewFakeOutput(t) + require.NoError(t, add.SetOutputs([]operator.Operator{fake})) + val := tc.input() + err = add.Process(context.Background(), val) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + fake.ExpectEntry(t, tc.output()) + } + }) + } +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go index efec79aa..c57295d2 100644 --- a/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/copy/config_test.go @@ -5,8 +5,8 @@ import ( "path/filepath" "testing" + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/operatortest" ) // test unmarshalling of values into config struct diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operatortest/confmap.go b/processor/signozlogspipelineprocessor/stanza/operator/operatortest/confmap.go new file mode 100644 index 00000000..8a2c3c4e --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operatortest/confmap.go @@ -0,0 +1,67 @@ +// Brought in as is from opentelemetry-collector-contrib with anyOpConfig implementation updated to use operator config in logspipelineprocessor +package operatortest + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + "go.opentelemetry.io/collector/confmap/confmaptest" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" +) + +// ConfigUnmarshalTest is used for testing golden configs +type ConfigUnmarshalTests struct { + DefaultConfig operator.Builder + TestsFile string + Tests []ConfigUnmarshalTest +} + +// ConfigUnmarshalTest is used for testing golden configs +type ConfigUnmarshalTest struct { + Name string + Expect any + ExpectErr bool +} + +// Run Unmarshals yaml files and compares them against the expected. +func (c ConfigUnmarshalTests) Run(t *testing.T) { + testConfMaps, err := confmaptest.LoadConf(c.TestsFile) + require.NoError(t, err) + + for _, tc := range c.Tests { + t.Run(tc.Name, func(t *testing.T) { + testConfMap, err := testConfMaps.Sub(tc.Name) + require.NoError(t, err) + require.NotZero(t, len(testConfMap.AllKeys()), fmt.Sprintf("config not found: '%s'", tc.Name)) + + cfg := newAnyOpConfig(c.DefaultConfig) + err = component.UnmarshalConfig(testConfMap, cfg) + + if tc.ExpectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tc.Expect, cfg.Operator.Builder) + } + }) + } +} + +type anyOpConfig struct { + Operator signozlogspipelinestanzaoperator.Config `mapstructure:"operator"` +} + +func newAnyOpConfig(opCfg operator.Builder) *anyOpConfig { + return &anyOpConfig{ + Operator: signozlogspipelinestanzaoperator.Config{Builder: opCfg}, + } +} + +func (a *anyOpConfig) Unmarshal(component *confmap.Conf) error { + return a.Operator.Unmarshal(component) +} From 9da5f860c0b14f7ca2b05f0bc767282da7bfa4dd Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 14:02:35 +0530 Subject: [PATCH 04/16] feat: bring in move operator --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/move/config.go | 57 ++ .../operator/operators/move/config_test.go | 155 ++++++ .../operator/operators/move/package_test.go | 13 + .../operators/move/testdata/config.yaml | 60 ++ .../operator/operators/move/transformer.go | 32 ++ .../operators/move/transformer_test.go | 522 ++++++++++++++++++ 7 files changed, 840 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/testdata/config.yaml create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer_test.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 40f54452..330f4877 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -4,4 +4,5 @@ package signozlogspipelinestanzaadapter import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config.go new file mode 100644 index 00000000..ed4eca83 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config.go @@ -0,0 +1,57 @@ +// Brought in as is from opentelemetry-collector-contrib + +package move + +import ( + "fmt" + + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "move" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new move operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new move operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a move operator +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + From entry.Field `mapstructure:"from"` + To entry.Field `mapstructure:"to"` +} + +// Build will build a Move operator from the supplied configuration +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + if c.To == entry.NewNilField() || c.From == entry.NewNilField() { + return nil, fmt.Errorf("move: missing to or from field") + } + + return &Transformer{ + TransformerOperator: transformerOperator, + From: c.From, + To: c.To, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config_test.go new file mode 100644 index 00000000..06368086 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/config_test.go @@ -0,0 +1,155 @@ +// Brought in as is from opentelemetry-collector-contrib +package move + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +// test unmarshalling of values into config struct +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "move_body_to_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("new") + return cfg + }(), + }, + { + Name: "move_body_to_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewAttributeField("new") + return cfg + }(), + }, + { + Name: "move_attribute_to_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("new") + cfg.To = entry.NewBodyField("new") + return cfg + }(), + }, + { + Name: "move_attribute_to_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("new") + cfg.To = entry.NewResourceField("new") + return cfg + }(), + }, + { + Name: "move_bracketed_attribute_to_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("dotted.field.name") + cfg.To = entry.NewResourceField("new") + return cfg + }(), + }, + { + Name: "move_resource_to_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewResourceField("new") + cfg.To = entry.NewAttributeField("new") + return cfg + }(), + }, + { + Name: "move_nested", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewBodyField("NewNested") + return cfg + }(), + }, + { + Name: "move_from_nested_object", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nestedkey") + cfg.To = entry.NewBodyField("unnestedkey") + return cfg + }(), + }, + { + Name: "move_to_nested_object", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("newnestedkey") + cfg.To = entry.NewBodyField("nested", "newnestedkey") + return cfg + }(), + }, + { + Name: "move_double_nested_object", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nested2") + cfg.To = entry.NewBodyField("nested2") + return cfg + }(), + }, + { + Name: "move_nested_to_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewResourceField("NewNested") + return cfg + }(), + }, + { + Name: "move_nested_to_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewAttributeField("NewNested") + return cfg + }(), + }, + { + Name: "move_nested_body_to_nested_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("one", "two") + cfg.To = entry.NewAttributeField("three", "four") + return cfg + }(), + }, + { + Name: "move_nested_body_to_nested_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("one", "two") + cfg.To = entry.NewResourceField("three", "four") + return cfg + }(), + }, + { + Name: "replace_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewBodyField() + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/package_test.go new file mode 100644 index 00000000..65abd1f9 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package move + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/testdata/config.yaml new file mode 100644 index 00000000..d9ca66a1 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/testdata/config.yaml @@ -0,0 +1,60 @@ +move_attribute_to_body: + type: move + from: attributes.new + to: body.new +move_attribute_to_resource: + type: move + from: attributes.new + to: resource.new +move_body_to_attribute: + type: move + from: body.key + to: attributes.new +move_body_to_body: + type: move + from: body.key + to: body.new +move_bracketed_attribute_to_resource: + type: move + from: attributes["dotted.field.name"] + to: resource.new +move_double_nested_object: + type: move + from: body.nested.nested2 + to: body.nested2 +move_from_nested_object: + type: move + from: body.nested.nestedkey + to: body.unnestedkey +move_nested: + type: move + from: body.nested + to: body.NewNested +move_nested_body_to_nested_attribute: + type: move + from: body.one.two + to: attributes.three.four +move_nested_body_to_nested_resource: + type: move + from: body.one.two + to: resource.three.four +move_nested_to_attribute: + type: move + from: body.nested + to: attributes.NewNested +move_nested_to_resource: + type: move + from: body.nested + to: resource.NewNested +move_resource_to_attribute: + type: move + from: resource.new + to: attributes.new +move_to_nested_object: + type: move + from: body.newnestedkey + to: body.nested.newnestedkey +replace_body: + type: move + from: body.nested + to: body diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer.go new file mode 100644 index 00000000..c5de26a3 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer.go @@ -0,0 +1,32 @@ +// Brought in as is from opentelemetry-collector-contrib + +package move + +import ( + "context" + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer is an operator that moves a field's value to a new field +type Transformer struct { + helper.TransformerOperator + From entry.Field + To entry.Field +} + +// Process will process an entry with a move transformation. +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + return t.ProcessWith(ctx, entry, t.Transform) +} + +// Transform will apply the move operation to an entry +func (t *Transformer) Transform(e *entry.Entry) error { + val, exist := t.From.Delete(e) + if !exist { + return fmt.Errorf("move: field does not exist: %s", t.From.String()) + } + return t.To.Set(e, val) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer_test.go new file mode 100644 index 00000000..59d1e4ba --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/move/transformer_test.go @@ -0,0 +1,522 @@ +// Brought in as is from opentelemetry-collector-contrib + +package move + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +type processTestCase struct { + name string + expectErr bool + op *Config + input func() *entry.Entry + output func() *entry.Entry +} + +func TestProcessAndBuild(t *testing.T) { + now := time.Now() + newTestEntry := func() *entry.Entry { + e := entry.New() + e.ObservedTimestamp = now + e.Timestamp = time.Unix(1586632809, 0) + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + } + + cases := []processTestCase{ + { + "MoveBodyToBody", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField("new") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "new": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveBodyToAttribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewAttributeField("new") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + e.Attributes = map[string]any{"new": "val"} + return e + }, + }, + { + "MoveAttributeToBody", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("new") + cfg.To = entry.NewBodyField("new") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"new": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "new": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + e.Attributes = map[string]any{} + return e + }, + }, + { + "MoveAttributeToResource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("new") + cfg.To = entry.NewResourceField("new") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"new": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"new": "val"} + e.Attributes = map[string]any{} + return e + }, + }, + { + "MoveBracketedAttributeToResource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("dotted.field.name") + cfg.To = entry.NewResourceField("new") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"dotted.field.name": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"new": "val"} + e.Attributes = map[string]any{} + return e + }, + }, + { + "MoveBracketedAttributeToBracketedResource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("dotted.field.name") + cfg.To = entry.NewResourceField("dotted.field.name") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"dotted.field.name": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"dotted.field.name": "val"} + e.Attributes = map[string]any{} + return e + }, + }, + { + "MoveAttributeToBracketedResource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("new") + cfg.To = entry.NewResourceField("dotted.field.name") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{"new": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"dotted.field.name": "val"} + e.Attributes = map[string]any{} + return e + }, + }, + { + "MoveResourceToAttribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewResourceField("new") + cfg.To = entry.NewAttributeField("new") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{"new": "val"} + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{} + e.Attributes = map[string]any{"new": "val"} + return e + }, + }, + { + "MoveNest", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewBodyField("NewNested") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "NewNested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveFromNestedObj", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nestedkey") + cfg.To = entry.NewBodyField("unnestedkey") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{}, + "unnestedkey": "nestedval", + } + return e + }, + }, + { + "MoveToNestedObj", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("newnestedkey") + cfg.To = entry.NewBodyField("nested", "newnestedkey") + + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "newnestedkey": "nestedval", + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + "newnestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveDoubleNestedObj", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nested2") + cfg.To = entry.NewBodyField("nested2") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + "nested2": map[string]any{ + "nestedkey": "nestedval", + }, + }, + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + "nested2": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveNestToResource", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewResourceField("NewNested") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + } + e.Resource = map[string]any{ + "NewNested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveNestToAttribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewAttributeField("NewNested") + + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + } + e.Attributes = map[string]any{ + "NewNested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "MoveNestedBodyStringToNestedAttribute", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested", "nestedkey") + cfg.To = entry.NewAttributeField("one", "two", "three") + + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{}, + } + e.Attributes = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "three": "nestedval", + }, + }, + } + return e + }, + }, + { + "MoveAttributeTodBody", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewAttributeField("one", "two", "three") + cfg.To = entry.NewBodyField() + + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "one": map[string]any{ + "two": map[string]any{ + "three": "nestedval", + }, + }, + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = "nestedval" + e.Attributes = map[string]any{ + "one": map[string]any{ + "two": map[string]any{}, + }, + } + return e + }, + }, + { + "ReplaceBodyObj", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("wrapper") + cfg.To = entry.NewBodyField() + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "wrapper": map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + }, + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + }, + { + "ReplaceBodyString", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("key") + cfg.To = entry.NewBodyField() + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = "val" + return e + }, + }, + { + "MergeObjToBody", + false, + func() *Config { + cfg := NewConfig() + cfg.From = entry.NewBodyField("nested") + cfg.To = entry.NewBodyField() + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nestedkey": "nestedval", + } + return e + }, + }, + } + for _, tc := range cases { + t.Run("BuildandProcess/"+tc.name, func(t *testing.T) { + cfg := tc.op + cfg.OutputIDs = []string{"fake"} + cfg.OnError = "drop" + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + move := op.(*Transformer) + fake := testutil.NewFakeOutput(t) + require.NoError(t, move.SetOutputs([]operator.Operator{fake})) + val := tc.input() + err = move.Process(context.Background(), val) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + fake.ExpectEntry(t, tc.output()) + } + }) + } +} From 7c2ace9faf0404a905144fbfdc0ce5866969c035 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 14:11:37 +0530 Subject: [PATCH 05/16] feat: bring in remove operator --- .../stanza/adapter/register_ops.go | 1 + .../operator/operators/remove/config.go | 56 ++++ .../operator/operators/remove/config_test.go | 107 +++++++ .../operator/operators/remove/package_test.go | 13 + .../operators/remove/rootable_field.go | 72 +++++ .../operators/remove/testdata/config.yaml | 27 ++ .../operator/operators/remove/transformer.go | 40 +++ .../operators/remove/transformer_test.go | 272 ++++++++++++++++++ 8 files changed, 588 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/rootable_field.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/testdata/config.yaml create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer_test.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 330f4877..5750334e 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -5,4 +5,5 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config.go new file mode 100644 index 00000000..2738adb6 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config.go @@ -0,0 +1,56 @@ +// Brought in as is from opentelemetry-collector-contrib + +package remove + +import ( + "fmt" + + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "remove" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new remove operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new remove operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a remove operator +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + + Field rootableField `mapstructure:"field"` +} + +// Build will build a Remove operator from the supplied configuration +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + if c.Field.Field == entry.NewNilField() { + return nil, fmt.Errorf("remove: field is empty") + } + + return &Transformer{ + TransformerOperator: transformerOperator, + Field: c.Field, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config_test.go new file mode 100644 index 00000000..c8c8d1d8 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/config_test.go @@ -0,0 +1,107 @@ +// Brought in as is from opentelemetry-collector-contrib +package remove + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +// test unmarshalling of values into config struct +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "remove_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newBodyField("nested") + return cfg + }(), + }, + { + Name: "remove_single_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newAttributeField("key") + return cfg + }(), + }, + { + Name: "remove_single_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newResourceField("key") + return cfg + }(), + }, + { + Name: "remove_entire_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field.allResource = true + return cfg + }(), + }, + { + Name: "remove_entire_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field.Field = entry.NewBodyField() + return cfg + }(), + }, + { + Name: "remove_entire_attributes", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field.allAttributes = true + return cfg + }(), + }, + { + Name: "remove_nested_body", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newBodyField("one", "two") + return cfg + }(), + }, + { + Name: "remove_nested_attribute", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newAttributeField("one", "two") + return cfg + }(), + }, + { + Name: "remove_nested_resource", + Expect: func() *Config { + cfg := NewConfig() + cfg.Field = newResourceField("one", "two") + return cfg + }(), + }, + }, + }.Run(t) +} + +func newBodyField(keys ...string) rootableField { + field := entry.NewBodyField(keys...) + return rootableField{Field: field} +} + +func newResourceField(keys ...string) rootableField { + field := entry.NewResourceField(keys...) + return rootableField{Field: field} +} + +func newAttributeField(keys ...string) rootableField { + field := entry.NewAttributeField(keys...) + return rootableField{Field: field} +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/package_test.go new file mode 100644 index 00000000..3e146786 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package remove + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/rootable_field.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/rootable_field.go new file mode 100644 index 00000000..443442c1 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/rootable_field.go @@ -0,0 +1,72 @@ +// Brought in as is from opentelemetry-collector-contrib +package remove + +import ( + "encoding/json" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +// RootableField represents a potential field on an entry. +// It differs from a normal Field in that it allows users to +// specify `resource` or `attributes` with the intention +// of referring to "all" fields within those groups. +// It is used to get, set, and delete values at this field. +// It is deserialized from JSON dot notation. +type rootableField struct { + entry.Field + allResource bool + allAttributes bool +} + +// UnmarshalJSON will unmarshal a field from JSON +func (f *rootableField) UnmarshalJSON(raw []byte) error { + var s string + err := json.Unmarshal(raw, &s) + if err != nil { + return err + } + return f.unmarshalCheckString(s) +} + +// UnmarshalYAML will unmarshal a field from YAML +func (f *rootableField) UnmarshalYAML(unmarshal func(any) error) error { + var s string + err := unmarshal(&s) + if err != nil { + return err + } + return f.unmarshalCheckString(s) +} + +// UnmarshalText will unmarshal a field from text +func (f *rootableField) UnmarshalText(text []byte) error { + return f.unmarshalCheckString(string(text)) +} + +func (f *rootableField) unmarshalCheckString(s string) error { + if s == entry.ResourcePrefix { + *f = rootableField{allResource: true} + return nil + } + + if s == entry.AttributesPrefix { + *f = rootableField{allAttributes: true} + return nil + } + + field, err := entry.NewField(s) + if err != nil { + return err + } + *f = rootableField{Field: field} + return nil +} + +// Get gets the value of the field if the flags for 'allAttributes' or 'allResource' isn't set +func (f *rootableField) Get(entry *entry.Entry) (any, bool) { + if f.allAttributes || f.allResource { + return nil, false + } + return f.Field.Get(entry) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/testdata/config.yaml new file mode 100644 index 00000000..90ceaa3a --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/testdata/config.yaml @@ -0,0 +1,27 @@ +remove_body: + type: remove + field: body.nested +remove_entire_attributes: + type: remove + field: attributes +remove_entire_body: + type: remove + field: body +remove_entire_resource: + type: remove + field: resource +remove_nested_attribute: + type: remove + field: attributes.one.two +remove_nested_body: + type: remove + field: body.one.two +remove_nested_resource: + type: remove + field: resource.one.two +remove_single_attribute: + type: remove + field: attributes.key +remove_single_resource: + type: remove + field: resource.key diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer.go new file mode 100644 index 00000000..37d0f130 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer.go @@ -0,0 +1,40 @@ +// Brought in as is from opentelemetry-collector-contrib +package remove + +import ( + "context" + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer is an operator that deletes a field +type Transformer struct { + helper.TransformerOperator + Field rootableField +} + +// Process will process an entry with a remove transformation. +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + return t.ProcessWith(ctx, entry, t.Transform) +} + +// Transform will apply the remove operation to an entry +func (t *Transformer) Transform(entry *entry.Entry) error { + if t.Field.allAttributes { + entry.Attributes = nil + return nil + } + + if t.Field.allResource { + entry.Resource = nil + return nil + } + + _, exist := entry.Delete(t.Field.Field) + if !exist { + return fmt.Errorf("remove: field does not exist: %s", t.Field.Field.String()) + } + return nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer_test.go new file mode 100644 index 00000000..f57e48ed --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/remove/transformer_test.go @@ -0,0 +1,272 @@ +// Brought in as is from opentelemetry-collector-contrib +package remove + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +type testCase struct { + name string + op *Config + input func() *entry.Entry + output func() *entry.Entry + expectErr bool +} + +// Test building and processing a given remove config +func TestProcessAndBuild(t *testing.T) { + now := time.Now() + newTestEntry := func() *entry.Entry { + e := entry.New() + e.ObservedTimestamp = now + e.Timestamp = time.Unix(1586632809, 0) + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + } + + cases := []testCase{ + { + "remove_one", + func() *Config { + cfg := NewConfig() + cfg.Field = newBodyField("key") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + false, + }, + { + "remove_nestedkey", + func() *Config { + cfg := NewConfig() + cfg.Field = newBodyField("nested", "nestedkey") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + "nested": map[string]any{}, + } + return e + }, + false, + }, + { + "remove_nested_attribute", + func() *Config { + cfg := NewConfig() + cfg.Field = newAttributeField("nested", "nestedkey") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "key": "val", + "nested": map[string]any{}, + } + return e + }, + false, + }, + { + "remove_nested_resource", + func() *Config { + cfg := NewConfig() + cfg.Field = newResourceField("nested", "nestedkey") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "key": "val", + "nested": map[string]any{ + "nestedkey": "nestedval", + }, + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "key": "val", + "nested": map[string]any{}, + } + return e + }, + false, + }, + { + "remove_obj", + func() *Config { + cfg := NewConfig() + cfg.Field = newBodyField("nested") + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = map[string]any{ + "key": "val", + } + return e + }, + false, + }, + { + "remove_single_attribute", + func() *Config { + cfg := NewConfig() + cfg.Field = newAttributeField("key") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "key": "val", + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{} + return e + }, + false, + }, + { + "remove_single_resource", + func() *Config { + cfg := NewConfig() + cfg.Field = newResourceField("key") + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "key": "val", + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{} + return e + }, + false, + }, + { + "remove_body", + func() *Config { + cfg := NewConfig() + cfg.Field.Field = entry.NewBodyField() + return cfg + }(), + newTestEntry, + func() *entry.Entry { + e := newTestEntry() + e.Body = nil + return e + }, + false, + }, + { + "remove_resource", + func() *Config { + cfg := NewConfig() + cfg.Field.allResource = true + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Resource = map[string]any{ + "key": "val", + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Resource = nil + return e + }, + false, + }, + { + "remove_attributes", + func() *Config { + cfg := NewConfig() + cfg.Field.allAttributes = true + return cfg + }(), + func() *entry.Entry { + e := newTestEntry() + e.Attributes = map[string]any{ + "key": "val", + } + return e + }, + func() *entry.Entry { + e := newTestEntry() + e.Attributes = nil + return e + }, + false, + }, + } + for _, tc := range cases { + t.Run("BuildandProcess/"+tc.name, func(t *testing.T) { + cfg := tc.op + cfg.OutputIDs = []string{"fake"} + cfg.OnError = "drop" + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + remove := op.(*Transformer) + fake := testutil.NewFakeOutput(t) + require.NoError(t, remove.SetOutputs([]operator.Operator{fake})) + val := tc.input() + err = remove.Process(context.Background(), val) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + fake.ExpectEntry(t, tc.output()) + } + }) + } +} From 3e12830ab9c7cbb06c5d676a667d95b6089305c7 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 14:18:10 +0530 Subject: [PATCH 06/16] feat: bring in regex operator --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/regex/cache.go | 215 +++++++++++ .../operator/operators/regex/cache_test.go | 271 ++++++++++++++ .../stanza/operator/operators/regex/config.go | 91 +++++ .../operator/operators/regex/config_test.go | 132 +++++++ .../operator/operators/regex/package_test.go | 13 + .../stanza/operator/operators/regex/parser.go | 62 ++++ .../operator/operators/regex/parser_test.go | 345 ++++++++++++++++++ .../operators/regex/testdata/config.yaml | 47 +++ 9 files changed, 1177 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/regex/testdata/config.yaml diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 5750334e..23d6bea4 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -5,5 +5,6 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache.go new file mode 100644 index 00000000..054df47a --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache.go @@ -0,0 +1,215 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "math" + "sync" + "sync/atomic" + "time" +) + +// cache allows operators to cache a value and look it up later +type cache interface { + get(key string) any + add(key string, data any) bool + copy() map[string]any + maxSize() uint16 + stop() +} + +// newMemoryCache takes a cache size and a limiter interval and +// returns a new memory backed cache +func newMemoryCache(maxSize uint16, interval uint64) *memoryCache { + // start throttling when cache turnover is above 100% + limit := uint64(maxSize) + 1 + + return &memoryCache{ + cache: make(map[string]any), + keys: make(chan string, maxSize), + limiter: newStartedAtomicLimiter(limit, interval), + } +} + +// memoryCache is an in memory cache of items with a pre defined +// max size. Memory's underlying storage is a map[string]item +// and does not perform any manipulation of the data. Memory +// is designed to be as fast as possible while being thread safe. +// When the cache is full, new items will evict the oldest +// item using a FIFO style queue. +type memoryCache struct { + // Key / Value pairs of cached items + cache map[string]any + + // When the cache is full, the oldest entry's key is + // read from the channel and used to index into the + // cache during cleanup + keys chan string + + // All read options will trigger a read lock while all + // write options will trigger a lock + mutex sync.RWMutex + + // Limiter rate limits the cache + limiter limiter +} + +var _ cache = (&memoryCache{}) + +// get returns a cached entry, nil if it does not exist +func (m *memoryCache) get(key string) any { + // Read and unlock as fast as possible + m.mutex.RLock() + data := m.cache[key] + m.mutex.RUnlock() + + return data +} + +// add inserts an item into the cache, if the cache is full, the +// oldest item is removed +func (m *memoryCache) add(key string, data any) bool { + if m.limiter.throttled() { + return false + } + + m.mutex.Lock() + defer m.mutex.Unlock() + + if len(m.keys) == cap(m.keys) { + // Pop the oldest key from the channel + // and remove it from the cache + delete(m.cache, <-m.keys) + + // notify the rate limiter that an entry + // was evicted + m.limiter.increment() + } + + // Write the cached entry and add the key + // to the channel + m.cache[key] = data + m.keys <- key + return true +} + +// copy returns a deep copy of the cache +func (m *memoryCache) copy() map[string]any { + cp := make(map[string]any, cap(m.keys)) + + m.mutex.Lock() + defer m.mutex.Unlock() + + for k, v := range m.cache { + cp[k] = v + } + return cp +} + +// maxSize returns the max size of the cache +func (m *memoryCache) maxSize() uint16 { + return uint16(cap(m.keys)) +} + +func (m *memoryCache) stop() { + m.limiter.stop() +} + +// limiter provides rate limiting methods for +// the cache +type limiter interface { + init() + increment() + currentCount() uint64 + limit() uint64 + resetInterval() time.Duration + throttled() bool + stop() +} + +// newStartedAtomicLimiter returns a started atomicLimiter +func newStartedAtomicLimiter(max uint64, interval uint64) *atomicLimiter { + if interval == 0 { + interval = 5 + } + + a := &atomicLimiter{ + count: &atomic.Uint64{}, + max: max, + interval: time.Second * time.Duration(interval), + done: make(chan struct{}), + } + + a.init() + return a +} + +// atomicLimiter enables rate limiting using an atomic +// counter. When count is >= max, throttled will return +// true. The count is reset on an interval. +type atomicLimiter struct { + count *atomic.Uint64 + max uint64 + interval time.Duration + start sync.Once + done chan struct{} +} + +var _ limiter = &atomicLimiter{count: &atomic.Uint64{}} + +// init initializes the limiter +func (l *atomicLimiter) init() { + // start the reset go routine once + l.start.Do(func() { + go func() { + // During every interval period, reduce the counter + // by 10% + x := math.Round(-0.10 * float64(l.max)) + ticker := time.NewTicker(l.interval) + for { + select { + case <-l.done: + ticker.Stop() + return + case <-ticker.C: + if l.currentCount() > 0 { + l.count.Add(^uint64(x)) + } + } + } + }() + }) +} + +// increment increments the atomic counter +func (l *atomicLimiter) increment() { + if l.count.Load() == l.max { + return + } + l.count.Add(1) +} + +// Returns true if the cache is currently throttled, meaning a high +// number of evictions have recently occurred due to the cache being +// full. When the cache is constantly locked, reads and writes are +// blocked, causing the regex parser to be slower than if it was +// not caching at all. +func (l *atomicLimiter) throttled() bool { + return l.currentCount() >= l.max +} + +func (l *atomicLimiter) currentCount() uint64 { + return l.count.Load() +} + +func (l *atomicLimiter) limit() uint64 { + return l.max +} + +func (l *atomicLimiter) resetInterval() time.Duration { + return l.interval +} + +func (l *atomicLimiter) stop() { + close(l.done) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache_test.go new file mode 100644 index 00000000..77e5238b --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/cache_test.go @@ -0,0 +1,271 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "strconv" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestNewMemoryCache(t *testing.T) { + cases := []struct { + name string + maxSize uint16 + expect *memoryCache + expectSize int + }{ + { + "size-50", + 50, + &memoryCache{ + cache: make(map[string]any), + keys: make(chan string, 50), + }, + 50, + }, + } + + for _, tc := range cases { + output := newMemoryCache(tc.maxSize, 0) + defer output.stop() + require.Equal(t, tc.expect.cache, output.cache) + require.Len(t, output.cache, 0, "new memory should always be empty") + require.Len(t, output.keys, 0, "new memory should always be empty") + require.Equal(t, tc.expectSize, cap(output.keys), "keys channel should have cap of expected size") + } +} + +func TestMemory(t *testing.T) { + cases := []struct { + name string + cache *memoryCache + input map[string]any + expect *memoryCache + }{ + { + "basic", + func() *memoryCache { + return newMemoryCache(3, 0) + }(), + map[string]any{ + "key": "value", + "map-value": map[string]string{ + "x": "y", + "dev": "stanza", + }, + }, + &memoryCache{ + cache: map[string]any{ + "key": "value", + "map-value": map[string]string{ + "x": "y", + "dev": "stanza", + }, + }, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + defer tc.cache.stop() + for key, value := range tc.input { + tc.cache.add(key, value) + out := tc.cache.get(key) + require.NotNil(t, out, "expected to get value from cache immediately after adding it") + require.Equal(t, value, out, "expected value to equal the value that was added to the cache") + } + + require.Equal(t, len(tc.expect.cache), len(tc.cache.cache)) + + for expectKey, expectItem := range tc.expect.cache { + actual := tc.cache.get(expectKey) + require.NotNil(t, actual) + require.Equal(t, expectItem, actual) + } + }) + } +} + +// A full cache should replace the oldest element with the new element +func TestCleanupLast(t *testing.T) { + maxSize := 10 + + m := newMemoryCache(uint16(maxSize), 0) + defer m.stop() + + // Add to cache until it is full + for i := 0; i <= cap(m.keys); i++ { + str := strconv.Itoa(i) + m.add(str, i) + } + + // make sure the cache looks the way we expect + expectCache := map[string]any{ + "1": 1, // oldest key, will be removed when 11 is added + "2": 2, + "3": 3, + "4": 4, + "5": 5, + "6": 6, + "7": 7, + "8": 8, + "9": 9, + "10": 10, // youngest key, will be removed when 20 is added + } + require.Equal(t, expectCache, m.cache) + require.Len(t, m.cache, maxSize) + require.Len(t, m.keys, maxSize) + + // for every additional key, the oldest should be removed + // 1, 2, 3 and so on. + for i := 11; i <= 20; i++ { + str := strconv.Itoa(i) + m.add(str, i) + + removedKey := strconv.Itoa(i - 10) + x := m.get(removedKey) + require.Nil(t, x, "expected key %s to have been removed", removedKey) + require.Len(t, m.cache, maxSize) + } + + // All entries should have been replaced by now + expectCache = map[string]any{ + "11": 11, + "12": 12, + "13": 13, + "14": 14, + "15": 15, + "16": 16, + "17": 17, + "18": 18, + "19": 19, + "20": 20, + } + require.Equal(t, expectCache, m.cache) + require.Len(t, m.cache, maxSize) +} + +func TestNewStartedAtomicLimiter(t *testing.T) { + cases := []struct { + name string + max uint64 + interval uint64 + }{ + { + "default", + 0, + 0, + }, + { + "max", + 30, + 0, + }, + { + "interval", + 0, + 3, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + l := newStartedAtomicLimiter(tc.max, tc.interval) + require.Equal(t, tc.max, l.max) + defer l.stop() + if tc.interval == 0 { + // default + tc.interval = 5 + } + require.Equal(t, float64(tc.interval), l.interval.Seconds()) + require.Equal(t, uint64(0), l.currentCount()) + }) + } +} + +// Start a limiter with a max of 3 and ensure throttling begins +func TestLimiter(t *testing.T) { + max := uint64(3) + + l := newStartedAtomicLimiter(max, 120) + require.NotNil(t, l) + require.Equal(t, max, l.max) + defer l.stop() + + require.False(t, l.throttled(), "new limiter should not be throttling") + require.Equal(t, uint64(0), l.currentCount()) + + var i uint64 + for i = 1; i < max; i++ { + l.increment() + require.Equal(t, i, l.currentCount()) + require.False(t, l.throttled()) + } + + l.increment() + require.True(t, l.throttled()) +} + +func TestThrottledLimiter(t *testing.T) { + max := uint64(3) + + // Limiter with a count higher than the max, which will force + // it to be throttled by default. Also note that the init method + // has not been called yet, so the reset go routine is not running + count := &atomic.Uint64{} + count.Add(max + 1) + l := atomicLimiter{ + max: max, + count: count, + interval: 1, + done: make(chan struct{}), + } + + require.True(t, l.throttled()) + + // Test the reset go routine by calling init() and waiting + // for it to reset the counter. The limiter will no longer + // be in a throttled state and the count will be reset. + l.init() + defer l.stop() + wait := 2 * l.interval + time.Sleep(time.Second * wait) + require.False(t, l.throttled()) + require.Equal(t, uint64(0), l.currentCount()) +} + +func TestThrottledCache(t *testing.T) { + c := newMemoryCache(3, 120) + defer c.stop() + require.False(t, c.limiter.throttled()) + require.Equal(t, 4, int(c.limiter.limit()), "expected limit be cache size + 1") + require.Equal(t, float64(120), c.limiter.resetInterval().Seconds(), "expected reset interval to be 120 seconds") + + // fill the cache and cause 100% evictions + for i := 1; i <= 6; i++ { + key := strconv.Itoa(i) + value := i + c.add(key, value) + require.False(t, c.limiter.throttled()) + } + + // limiter is incremented after cache is full. a cache of size 3 + // with 6 additions will cause the limiter to be set to 3. + require.Equal(t, 3, int(c.limiter.currentCount()), "expected limit count to be 3 after 6 additions to the cache") + + // 7th addition will be throttled because the cache + // has already reached 100% eviction rate + c.add("7", "should be limited") + require.True(t, c.limiter.throttled()) + + // 8th addition will skip adding to the cache + // because the 7th addition enabled the limiter + result := c.add("8", "add miss") + require.True(t, c.limiter.throttled()) + require.False(t, result, "expected add to return false when cache writes are throttled") +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config.go new file mode 100644 index 00000000..be92e20b --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config.go @@ -0,0 +1,91 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "fmt" + "regexp" + + "go.opentelemetry.io/collector/component" + "go.uber.org/zap" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/errors" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "regex_parser" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new regex parser config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new regex parser config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + ParserConfig: helper.NewParserConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a regex parser operator. +type Config struct { + helper.ParserConfig `mapstructure:",squash"` + + Regex string `mapstructure:"regex"` + + Cache struct { + Size uint16 `mapstructure:"size"` + } `mapstructure:"cache"` +} + +// Build will build a regex parser operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + parserOperator, err := c.ParserConfig.Build(set) + if err != nil { + return nil, err + } + + if c.Regex == "" { + return nil, fmt.Errorf("missing required field 'regex'") + } + + r, err := regexp.Compile(c.Regex) + if err != nil { + return nil, fmt.Errorf("compiling regex: %w", err) + } + + namedCaptureGroups := 0 + for _, groupName := range r.SubexpNames() { + if groupName != "" { + namedCaptureGroups++ + } + } + if namedCaptureGroups == 0 { + return nil, errors.NewError( + "no named capture groups in regex pattern", + "use named capture groups like '^(?P.*)$' to specify the key name for the parsed field", + ) + } + + op := &Parser{ + ParserOperator: parserOperator, + regexp: r, + } + + if c.Cache.Size > 0 { + op.cache = newMemoryCache(c.Cache.Size, 0) + set.Logger.Debug( + "configured memory cache", + zap.String("operator_id", op.ID()), + zap.Uint16("size", op.cache.maxSize()), + ) + } + + return op, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config_test.go new file mode 100644 index 00000000..c430eaee --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/config_test.go @@ -0,0 +1,132 @@ +// Brought in as is from opentelemetry-collector-contrib +package regex + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +func TestParserGoldenConfig(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "cache", + Expect: func() *Config { + cfg := NewConfig() + cfg.Cache.Size = 50 + return cfg + }(), + }, + { + Name: "parse_from_simple", + Expect: func() *Config { + cfg := NewConfig() + cfg.ParseFrom = entry.NewBodyField("from") + return cfg + }(), + }, + { + Name: "parse_to_simple", + Expect: func() *Config { + cfg := NewConfig() + cfg.ParseTo = entry.RootableField{Field: entry.NewBodyField("log")} + return cfg + }(), + }, + { + Name: "on_error_drop", + Expect: func() *Config { + cfg := NewConfig() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "timestamp", + Expect: func() *Config { + cfg := NewConfig() + parseField := entry.NewBodyField("timestamp_field") + newTime := helper.TimeParser{ + LayoutType: "strptime", + Layout: "%Y-%m-%d", + ParseFrom: &parseField, + } + cfg.TimeParser = &newTime + return cfg + }(), + }, + { + Name: "severity", + Expect: func() *Config { + cfg := NewConfig() + parseField := entry.NewBodyField("severity_field") + severityParser := helper.NewSeverityConfig() + severityParser.ParseFrom = &parseField + mapping := map[string]any{ + "critical": "5xx", + "error": "4xx", + "info": "3xx", + "debug": "2xx", + } + severityParser.Mapping = mapping + cfg.SeverityConfig = &severityParser + return cfg + }(), + }, + { + Name: "regex", + Expect: func() *Config { + cfg := NewConfig() + cfg.Regex = "^Host=(?P[^,]+), Type=(?P.*)$" + return cfg + }(), + }, + { + Name: "scope_name", + Expect: func() *Config { + cfg := NewConfig() + cfg.Regex = "^Host=(?P[^,]+), Logger=(?P.*)$" + parseField := entry.NewBodyField("logger_name_field") + loggerNameParser := helper.NewScopeNameParser() + loggerNameParser.ParseFrom = parseField + cfg.ScopeNameParser = &loggerNameParser + return cfg + }(), + }, + { + Name: "parse_to_attributes", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()} + return p + }(), + }, + { + Name: "parse_to_body", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewBodyField()} + return p + }(), + }, + { + Name: "parse_to_resource", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewResourceField()} + return p + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/package_test.go new file mode 100644 index 00000000..c137ebbf --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser.go new file mode 100644 index 00000000..84983f71 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser.go @@ -0,0 +1,62 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "context" + "fmt" + "regexp" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Parser is an operator that parses regex in an entry. +type Parser struct { + helper.ParserOperator + regexp *regexp.Regexp + cache cache +} + +func (p *Parser) Stop() error { + if p.cache != nil { + p.cache.stop() + } + return nil +} + +// Process will parse an entry for regex. +func (p *Parser) Process(ctx context.Context, entry *entry.Entry) error { + return p.ParserOperator.ProcessWith(ctx, entry, p.parse) +} + +// parse will parse a value using the supplied regex. +func (p *Parser) parse(value any) (any, error) { + var raw string + switch m := value.(type) { + case string: + raw = m + default: + return nil, fmt.Errorf("type '%T' cannot be parsed as regex", value) + } + return p.match(raw) +} + +func (p *Parser) match(value string) (any, error) { + if p.cache != nil { + if x := p.cache.get(value); x != nil { + return x, nil + } + } + + parsedValues, err := helper.MatchValues(value, p.regexp) + if err != nil { + return nil, err + } + + if p.cache != nil { + p.cache.add(value, parsedValues) + } + + return parsedValues, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser_test.go new file mode 100644 index 00000000..96c96fd4 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/parser_test.go @@ -0,0 +1,345 @@ +// Brought in as is from opentelemetry-collector-contrib + +package regex + +import ( + "context" + "fmt" + "math/rand" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +func newTestParser(t *testing.T, regex string, cacheSize uint16) *Parser { + cfg := NewConfigWithID("test") + cfg.Regex = regex + if cacheSize > 0 { + cfg.Cache.Size = cacheSize + } + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + return op.(*Parser) +} + +func TestParserBuildFailure(t *testing.T) { + cfg := NewConfigWithID("test") + cfg.OnError = "invalid_on_error" + set := componenttest.NewNopTelemetrySettings() + _, err := cfg.Build(set) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid `on_error` field") +} + +func TestParserByteFailure(t *testing.T) { + parser := newTestParser(t, "^(?Ptest)", 0) + _, err := parser.parse([]byte("invalid")) + require.Error(t, err) + require.Contains(t, err.Error(), "type '[]uint8' cannot be parsed as regex") +} + +func TestParserStringFailure(t *testing.T) { + parser := newTestParser(t, "^(?Ptest)", 0) + _, err := parser.parse("invalid") + require.Error(t, err) + require.Contains(t, err.Error(), "regex pattern does not match") +} + +func TestParserInvalidType(t *testing.T) { + parser := newTestParser(t, "^(?Ptest)", 0) + _, err := parser.parse([]int{}) + require.Error(t, err) + require.Contains(t, err.Error(), "type '[]int' cannot be parsed as regex") +} + +func TestParserCache(t *testing.T) { + parser := newTestParser(t, "^(?Pcache)", 200) + defer func() { + require.NoError(t, parser.Stop()) + }() + _, err := parser.parse([]int{}) + require.Error(t, err) + require.Contains(t, err.Error(), "type '[]int' cannot be parsed as regex") + require.NotNil(t, parser.cache, "expected cache to be configured") + require.Equal(t, parser.cache.maxSize(), uint16(200)) +} + +func TestParserRegex(t *testing.T) { + cases := []struct { + name string + configure func(*Config) + input *entry.Entry + expected *entry.Entry + }{ + { + "RootString", + func(p *Config) { + p.Regex = "a=(?P.*)" + }, + &entry.Entry{ + Body: "a=b", + }, + &entry.Entry{ + Body: "a=b", + Attributes: map[string]any{ + "a": "b", + }, + }, + }, + { + "MemeoryCache", + func(p *Config) { + p.Regex = "a=(?P.*)" + p.Cache.Size = 100 + }, + &entry.Entry{ + Body: "a=b", + }, + &entry.Entry{ + Body: "a=b", + Attributes: map[string]any{ + "a": "b", + }, + }, + }, + { + "K8sFileCache", + func(p *Config) { + p.Regex = `^(?P[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?P[^_]+)_(?P.+)-(?P[a-z0-9]{64})\.log$` + p.Cache.Size = 100 + }, + &entry.Entry{ + Body: "coredns-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log", + }, + &entry.Entry{ + Body: "coredns-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log", + Attributes: map[string]any{ + "container_id": "901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6", + "container_name": "coredns", + "namespace": "kube-system", + "pod_name": "coredns-5644d7b6d9-mzngq", + }, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := NewConfigWithID("test") + cfg.OutputIDs = []string{"fake"} + tc.configure(cfg) + + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + defer func() { + require.NoError(t, op.Stop()) + }() + + fake := testutil.NewFakeOutput(t) + require.NoError(t, op.SetOutputs([]operator.Operator{fake})) + + ots := time.Now() + tc.input.ObservedTimestamp = ots + tc.expected.ObservedTimestamp = ots + + err = op.Process(context.Background(), tc.input) + require.NoError(t, err) + + fake.ExpectEntry(t, tc.expected) + }) + } +} + +func TestBuildParserRegex(t *testing.T) { + newBasicParser := func() *Config { + cfg := NewConfigWithID("test") + cfg.OutputIDs = []string{"test"} + cfg.Regex = "(?P.*)" + return cfg + } + + t.Run("BasicConfig", func(t *testing.T) { + c := newBasicParser() + set := componenttest.NewNopTelemetrySettings() + _, err := c.Build(set) + require.NoError(t, err) + }) + + t.Run("MissingRegexField", func(t *testing.T) { + c := newBasicParser() + c.Regex = "" + set := componenttest.NewNopTelemetrySettings() + _, err := c.Build(set) + require.Error(t, err) + }) + + t.Run("InvalidRegexField", func(t *testing.T) { + c := newBasicParser() + c.Regex = "())()" + set := componenttest.NewNopTelemetrySettings() + _, err := c.Build(set) + require.Error(t, err) + }) + + t.Run("NoNamedGroups", func(t *testing.T) { + c := newBasicParser() + c.Regex = ".*" + set := componenttest.NewNopTelemetrySettings() + _, err := c.Build(set) + require.Error(t, err) + require.Contains(t, err.Error(), "no named capture groups") + }) + + t.Run("NoNamedGroups", func(t *testing.T) { + c := newBasicParser() + c.Regex = "(.*)" + set := componenttest.NewNopTelemetrySettings() + _, err := c.Build(set) + require.Error(t, err) + require.Contains(t, err.Error(), "no named capture groups") + }) +} + +// return 100 unique file names, example: +// dafplsjfbcxoeff-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log +// rswxpldnjobcsnv-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log +// lgtemapezqleqyh-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log +func benchParseInput() (patterns []string) { + const letterBytes = "abcdefghijklmnopqrstuvwxyz" + for i := 1; i <= 100; i++ { + b := make([]byte, 15) + for i := range b { + b[i] = letterBytes[rand.Intn(len(letterBytes))] + } + randomStr := string(b) + p := fmt.Sprintf("%s-5644d7b6d9-mzngq_kube-system_coredns-901f7510281180a402936c92f5bc0f3557f5a21ccb5a4591c5bf98f3ddbffdd6.log", randomStr) + patterns = append(patterns, p) + } + return patterns +} + +// Regex used to parse a kubernetes container log file name, which contains the +// pod name, namespace, container name, container. +const benchParsePattern = `^(?P[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?P[^_]+)_(?P.+)-(?P[a-z0-9]{64})\.log$` + +var benchParsePatterns = benchParseInput() + +func newTestBenchParser(t *testing.T, cacheSize uint16) *Parser { + cfg := NewConfigWithID("bench") + cfg.Regex = benchParsePattern + cfg.Cache.Size = cacheSize + + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + return op.(*Parser) +} + +func benchmarkParseThreaded(b *testing.B, parser *Parser, input []string) { + wg := sync.WaitGroup{} + + for _, i := range input { + wg.Add(1) + + go func(i string) { + if _, err := parser.match(i); err != nil { + b.Error(err) + } + wg.Done() + }(i) + } + + wg.Wait() +} + +func benchmarkParse(b *testing.B, parser *Parser, input []string) { + for _, i := range input { + if _, err := parser.match(i); err != nil { + b.Error(err) + } + } +} + +// No cache +func BenchmarkParseNoCache(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 0) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache at capacity +func BenchmarkParseWithMemoryCache(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 100) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache over capacity by one +func BenchmarkParseWithMemoryCacheFullByOne(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 99) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache over capacity by 10 +func BenchmarkParseWithMemoryCacheFullBy10(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 90) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache over capacity by 50 +func BenchmarkParseWithMemoryCacheFullBy50(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 50) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache over capacity by 90 +func BenchmarkParseWithMemoryCacheFullBy90(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 10) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// Memory cache over capacity by 99 +func BenchmarkParseWithMemoryCacheFullBy99(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 1) + for n := 0; n < b.N; n++ { + benchmarkParseThreaded(b, parser, benchParsePatterns) + } +} + +// No cache one file +func BenchmarkParseNoCacheOneFile(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 0) + for n := 0; n < b.N; n++ { + pattern := []string{benchParsePatterns[0]} + benchmarkParse(b, parser, pattern) + } +} + +// Memory cache one file +func BenchmarkParseWithMemoryCacheOneFile(b *testing.B) { + parser := newTestBenchParser(&testing.T{}, 100) + for n := 0; n < b.N; n++ { + pattern := []string{benchParsePatterns[0]} + benchmarkParse(b, parser, pattern) + } +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/testdata/config.yaml new file mode 100644 index 00000000..1c3a81cd --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/regex/testdata/config.yaml @@ -0,0 +1,47 @@ +cache: + type: regex_parser + cache: + size: 50 +default: + type: regex_parser +on_error_drop: + type: regex_parser + on_error: "drop" +parse_from_simple: + type: regex_parser + parse_from: "body.from" +parse_to_attributes: + type: regex_parser + parse_to: attributes +parse_to_body: + type: regex_parser + parse_to: body +parse_to_resource: + type: regex_parser + parse_to: resource +parse_to_simple: + type: regex_parser + parse_to: "body.log" +regex: + type: regex_parser + regex: '^Host=(?P[^,]+), Type=(?P.*)$' +scope_name: + type: regex_parser + regex: '^Host=(?P[^,]+), Logger=(?P.*)$' + scope_name: + parse_from: body.logger_name_field +severity: + type: regex_parser + severity: + parse_from: body.severity_field + mapping: + critical: 5xx + error: 4xx + info: 3xx + debug: 2xx +timestamp: + type: regex_parser + timestamp: + parse_from: body.timestamp_field + layout_type: strptime + layout: '%Y-%m-%d' From 69ad2e3188b8126449f2b89ee2206ee368c989c8 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:27:16 +0530 Subject: [PATCH 07/16] feat: bring in JSON operator --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/json/config.go | 48 +++++ .../operator/operators/json/config_test.go | 114 ++++++++++++ .../operator/operators/json/package_test.go | 13 ++ .../stanza/operator/operators/json/parser.go | 39 +++++ .../operator/operators/json/parser_test.go | 165 ++++++++++++++++++ .../operators/json/testdata/config.yaml | 39 +++++ 7 files changed, 419 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/json/testdata/config.yaml diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 23d6bea4..8246e087 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -4,6 +4,7 @@ package signozlogspipelinestanzaadapter import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/json" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config.go new file mode 100644 index 00000000..5792f598 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config.go @@ -0,0 +1,48 @@ +// Brought in as is from opentelemetry-collector-contrib + +package json + +import ( + jsoniter "github.com/json-iterator/go" + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "json_parser" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new JSON parser config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new JSON parser config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + ParserConfig: helper.NewParserConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a JSON parser operator. +type Config struct { + helper.ParserConfig `mapstructure:",squash"` +} + +// Build will build a JSON parser operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + parserOperator, err := c.ParserConfig.Build(set) + if err != nil { + return nil, err + } + + return &Parser{ + ParserOperator: parserOperator, + json: jsoniter.ConfigFastest, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config_test.go new file mode 100644 index 00000000..3a0267f1 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/config_test.go @@ -0,0 +1,114 @@ +// Brought in as is from opentelemetry-collector-contrib +package json + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +func TestConfig(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "on_error_drop", + Expect: func() *Config { + cfg := NewConfig() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "parse_from_simple", + Expect: func() *Config { + cfg := NewConfig() + cfg.ParseFrom = entry.NewBodyField("from") + return cfg + }(), + }, + { + Name: "parse_to_simple", + Expect: func() *Config { + cfg := NewConfig() + cfg.ParseTo = entry.RootableField{Field: entry.NewBodyField("log")} + return cfg + }(), + }, + { + Name: "timestamp", + Expect: func() *Config { + cfg := NewConfig() + parseField := entry.NewBodyField("timestamp_field") + newTime := helper.TimeParser{ + LayoutType: "strptime", + Layout: "%Y-%m-%d", + ParseFrom: &parseField, + } + cfg.TimeParser = &newTime + return cfg + }(), + }, + { + Name: "severity", + Expect: func() *Config { + cfg := NewConfig() + parseField := entry.NewBodyField("severity_field") + severityParser := helper.NewSeverityConfig() + severityParser.ParseFrom = &parseField + mapping := map[string]any{ + "critical": "5xx", + "error": "4xx", + "info": "3xx", + "debug": "2xx", + } + severityParser.Mapping = mapping + cfg.SeverityConfig = &severityParser + return cfg + }(), + }, + { + Name: "scope_name", + Expect: func() *Config { + cfg := NewConfig() + loggerNameParser := helper.NewScopeNameParser() + loggerNameParser.ParseFrom = entry.NewBodyField("logger_name_field") + cfg.ScopeNameParser = &loggerNameParser + return cfg + }(), + }, + { + Name: "parse_to_attributes", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()} + return p + }(), + }, + { + Name: "parse_to_body", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewBodyField()} + return p + }(), + }, + { + Name: "parse_to_resource", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewResourceField()} + return p + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/package_test.go new file mode 100644 index 00000000..d0db963c --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package json + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser.go new file mode 100644 index 00000000..e0b234ee --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser.go @@ -0,0 +1,39 @@ +// Brought in as is from opentelemetry-collector-contrib + +package json + +import ( + "context" + "fmt" + + jsoniter "github.com/json-iterator/go" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Parser is an operator that parses JSON. +type Parser struct { + helper.ParserOperator + json jsoniter.API +} + +// Process will parse an entry for JSON. +func (p *Parser) Process(ctx context.Context, entry *entry.Entry) error { + return p.ParserOperator.ProcessWith(ctx, entry, p.parse) +} + +// parse will parse a value as JSON. +func (p *Parser) parse(value any) (any, error) { + var parsedValue map[string]any + switch m := value.(type) { + case string: + err := p.json.UnmarshalFromString(m, &parsedValue) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("type %T cannot be parsed as JSON", value) + } + return parsedValue, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser_test.go new file mode 100644 index 00000000..6d30ea9b --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/parser_test.go @@ -0,0 +1,165 @@ +// Brought in as is from opentelemetry-collector-contrib + +package json + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +func newTestParser(t *testing.T) *Parser { + config := NewConfigWithID("test") + set := componenttest.NewNopTelemetrySettings() + op, err := config.Build(set) + require.NoError(t, err) + return op.(*Parser) +} + +func TestConfigBuild(t *testing.T) { + config := NewConfigWithID("test") + set := componenttest.NewNopTelemetrySettings() + op, err := config.Build(set) + require.NoError(t, err) + require.IsType(t, &Parser{}, op) +} + +func TestConfigBuildFailure(t *testing.T) { + config := NewConfigWithID("test") + config.OnError = "invalid_on_error" + set := componenttest.NewNopTelemetrySettings() + _, err := config.Build(set) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid `on_error` field") +} + +func TestParserStringFailure(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse("invalid") + require.Error(t, err) + require.Contains(t, err.Error(), "error found in #1 byte") +} + +func TestParserByteFailure(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse([]byte("invalid")) + require.Error(t, err) + require.Contains(t, err.Error(), "type []uint8 cannot be parsed as JSON") +} + +func TestParserInvalidType(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse([]int{}) + require.Error(t, err) + require.Contains(t, err.Error(), "type []int cannot be parsed as JSON") +} + +func TestJSONImplementations(t *testing.T) { + require.Implements(t, (*operator.Operator)(nil), new(Parser)) +} + +func TestParser(t *testing.T) { + cases := []struct { + name string + configure func(*Config) + input *entry.Entry + expect *entry.Entry + }{ + { + "simple", + func(_ *Config) {}, + &entry.Entry{ + Body: `{}`, + }, + &entry.Entry{ + Attributes: map[string]any{}, + Body: `{}`, + }, + }, + { + "nested", + func(_ *Config) {}, + &entry.Entry{ + Body: `{"superkey":"superval"}`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "superkey": "superval", + }, + Body: `{"superkey":"superval"}`, + }, + }, + { + "with_timestamp", + func(p *Config) { + parseFrom := entry.NewAttributeField("timestamp") + p.TimeParser = &helper.TimeParser{ + ParseFrom: &parseFrom, + LayoutType: "epoch", + Layout: "s", + } + }, + &entry.Entry{ + Body: `{"superkey":"superval","timestamp":1136214245}`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "superkey": "superval", + "timestamp": float64(1136214245), + }, + Body: `{"superkey":"superval","timestamp":1136214245}`, + Timestamp: time.Unix(1136214245, 0), + }, + }, + { + "with_scope", + func(p *Config) { + p.ScopeNameParser = &helper.ScopeNameParser{ + ParseFrom: entry.NewAttributeField("logger_name"), + } + }, + &entry.Entry{ + Body: `{"superkey":"superval","logger_name":"logger"}`, + }, + &entry.Entry{ + Attributes: map[string]any{ + "superkey": "superval", + "logger_name": "logger", + }, + Body: `{"superkey":"superval","logger_name":"logger"}`, + ScopeName: "logger", + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := NewConfigWithID("test") + cfg.OutputIDs = []string{"fake"} + tc.configure(cfg) + + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + fake := testutil.NewFakeOutput(t) + require.NoError(t, op.SetOutputs([]operator.Operator{fake})) + + ots := time.Now() + tc.input.ObservedTimestamp = ots + tc.expect.ObservedTimestamp = ots + + err = op.Process(context.Background(), tc.input) + require.NoError(t, err) + fake.ExpectEntry(t, tc.expect) + }) + } +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/json/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/testdata/config.yaml new file mode 100644 index 00000000..4de9e105 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/json/testdata/config.yaml @@ -0,0 +1,39 @@ +default: + type: json_parser +on_error_drop: + type: json_parser + on_error: drop +parse_from_simple: + type: json_parser + parse_from: body.from +parse_to_attributes: + type: json_parser + parse_to: attributes +parse_to_body: + type: json_parser + parse_to: body +parse_to_resource: + type: json_parser + parse_to: resource +parse_to_simple: + type: json_parser + parse_to: body.log +scope_name: + type: json_parser + scope_name: + parse_from: body.logger_name_field +severity: + type: json_parser + severity: + parse_from: body.severity_field + mapping: + critical: 5xx + error: 4xx + info: 3xx + debug: 2xx +timestamp: + type: json_parser + timestamp: + parse_from: body.timestamp_field + layout_type: strptime + layout: '%Y-%m-%d' From b215bbd2b68e0a1027db3f04e9780d0034854191 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:32:19 +0530 Subject: [PATCH 08/16] feat: bring in time parser operator --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/time/config.go | 58 ++ .../operator/operators/time/config_test.go | 53 ++ .../operator/operators/time/package_test.go | 13 + .../stanza/operator/operators/time/parser.go | 21 + .../operator/operators/time/parser_test.go | 540 ++++++++++++++++++ .../operators/time/testdata/config.yaml | 21 + 7 files changed, 707 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/time/testdata/config.yaml diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 8246e087..9010bcb9 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -8,4 +8,5 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/time" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config.go new file mode 100644 index 00000000..8174c6ac --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config.go @@ -0,0 +1,58 @@ +// Brought in as is from opentelemetry-collector-contrib + +package time + +import ( + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "time_parser" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new time parser config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new time parser config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + TimeParser: helper.NewTimeParser(), + } +} + +// Config is the configuration of a time parser operator. +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + helper.TimeParser `mapstructure:",omitempty,squash"` +} + +func (c *Config) Unmarshal(component *confmap.Conf) error { + return component.Unmarshal(c, confmap.WithIgnoreUnused()) +} + +// Build will build a time parser operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + if err := c.TimeParser.Validate(); err != nil { + return nil, err + } + + return &Parser{ + TransformerOperator: transformerOperator, + TimeParser: c.TimeParser, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config_test.go new file mode 100644 index 00000000..8cd80e42 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/config_test.go @@ -0,0 +1,53 @@ +// Brought in as is from opentelemetry-collector-contrib +package time + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "on_error_drop", + Expect: func() *Config { + cfg := NewConfig() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "parse_strptime", + Expect: func() *Config { + cfg := NewConfig() + from := entry.NewBodyField("from") + cfg.ParseFrom = &from + cfg.LayoutType = "strptime" + cfg.Layout = "%Y-%m-%d" + return cfg + }(), + }, + { + Name: "parse_gotime", + Expect: func() *Config { + cfg := NewConfig() + from := entry.NewBodyField("from") + cfg.ParseFrom = &from + cfg.LayoutType = "gotime" + cfg.Layout = "2006-01-02" + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/package_test.go new file mode 100644 index 00000000..833b764c --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package time + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser.go new file mode 100644 index 00000000..a6609e08 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser.go @@ -0,0 +1,21 @@ +// Brought in as is from opentelemetry-collector-contrib + +package time + +import ( + "context" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Parser is an operator that parses time from a field to an entry. +type Parser struct { + helper.TransformerOperator + helper.TimeParser +} + +// Process will parse time from an entry. +func (p *Parser) Process(ctx context.Context, entry *entry.Entry) error { + return p.ProcessWith(ctx, entry, p.TimeParser.Parse) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser_test.go new file mode 100644 index 00000000..140b8f30 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/parser_test.go @@ -0,0 +1,540 @@ +// Brought in as is from opentelemetry-collector-contrib + +package time + +import ( + "context" + "math" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +func TestIsZero(t *testing.T) { + require.True(t, (&helper.TimeParser{}).IsZero()) + require.False(t, (&helper.TimeParser{Layout: "strptime"}).IsZero()) +} + +func TestBuild(t *testing.T) { + testCases := []struct { + name string + input func() (*Config, error) + expectErr bool + }{ + { + "empty", + func() (*Config, error) { + return &Config{}, nil + }, + true, + }, + { + "basic", + func() (*Config, error) { + cfg := NewConfigWithID("test_id") + parseFrom, err := entry.NewField("body.app_time") + if err != nil { + return cfg, err + } + cfg.ParseFrom = &parseFrom + cfg.LayoutType = "gotime" + cfg.Layout = "Mon Jan 2 15:04:05 MST 2006" + return cfg, nil + }, + false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cfg, err := tc.input() + require.NoError(t, err, "expected nil error when running test cases input func") + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + if tc.expectErr { + require.Error(t, err, "expected error while building time_parser operator") + return + } + require.NoError(t, err, "did not expect error while building time_parser operator") + require.NotNil(t, op, "expected Build to return an operator") + }) + } +} + +func TestProcess(t *testing.T) { + now := time.Now() + + testCases := []struct { + name string + config func() (*Config, error) + input *entry.Entry + expect *entry.Entry + }{ + { + name: "promote", + config: func() (*Config, error) { + cfg := NewConfigWithID("test_id") + parseFrom, err := entry.NewField("body.app_time") + if err != nil { + return nil, err + } + cfg.ParseFrom = &parseFrom + cfg.LayoutType = "gotime" + cfg.Layout = "Mon Jan 2 15:04:05 MST 2006" + return cfg, nil + }, + input: func() *entry.Entry { + e := entry.New() + e.ObservedTimestamp = now + e.Body = map[string]any{ + "app_time": "Mon Jan 2 15:04:05 UTC 2006", + } + return e + }(), + expect: &entry.Entry{ + ObservedTimestamp: now, + Timestamp: time.Date(2006, time.January, 2, 15, 4, 5, 0, time.UTC), + Body: map[string]any{ + "app_time": "Mon Jan 2 15:04:05 UTC 2006", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cfg, err := tc.config() + if err != nil { + require.NoError(t, err) + return + } + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + if err != nil { + require.NoError(t, err) + return + } + + require.True(t, op.CanOutput(), "expected test operator CanOutput to return true") + + err = op.Process(context.Background(), tc.input) + require.NoError(t, err) + require.Equal(t, tc.expect, tc.input) + }) + } +} + +func TestTimeParser(t *testing.T) { + // Mountain Standard Time + mst, err := time.LoadLocation("MST") + require.NoError(t, err) + + // Hawaiian Standard Time + hst, err := time.LoadLocation("HST") + require.NoError(t, err) + + testCases := []struct { + name string + sample any + expected time.Time + gotimeLayout string + strptimeLayout string + }{ + { + name: "unix-utc", + sample: "Mon Jan 2 15:04:05 UTC 2006", + expected: time.Date(2006, time.January, 2, 15, 4, 5, 0, time.UTC), + gotimeLayout: "Mon Jan 2 15:04:05 MST 2006", + strptimeLayout: "%a %b %e %H:%M:%S %Z %Y", + }, + { + name: "unix-mst", + sample: "Mon Jan 2 15:04:05 MST 2006", + expected: time.Date(2006, time.January, 2, 15, 4, 5, 0, mst), + gotimeLayout: "Mon Jan 2 15:04:05 MST 2006", + strptimeLayout: "%a %b %e %H:%M:%S %Z %Y", + }, + { + name: "unix-hst", + sample: "Mon Jan 2 15:04:05 HST 2006", + expected: time.Date(2006, time.January, 2, 15, 4, 5, 0, hst), + gotimeLayout: "Mon Jan 2 15:04:05 MST 2006", + strptimeLayout: "%a %b %e %H:%M:%S %Z %Y", + }, + { + name: "almost-unix", + sample: "Mon Jan 02 15:04:05 MST 2006", + expected: time.Date(2006, time.January, 2, 15, 4, 5, 0, mst), + gotimeLayout: "Mon Jan 02 15:04:05 MST 2006", + strptimeLayout: "%a %b %d %H:%M:%S %Z %Y", + }, + + { + name: "opendistro", + sample: "2020-06-09T15:39:58", + expected: time.Date(2020, time.June, 9, 15, 39, 58, 0, time.Local), + gotimeLayout: "2006-01-02T15:04:05", + strptimeLayout: "%Y-%m-%dT%H:%M:%S", + }, + { + name: "postgres", + sample: "2019-11-05 10:38:35.118 HST", + expected: time.Date(2019, time.November, 5, 10, 38, 35, 118*1000*1000, hst), + gotimeLayout: "2006-01-02 15:04:05.999 MST", + strptimeLayout: "%Y-%m-%d %H:%M:%S.%L %Z", + }, + { + name: "ibm-mq", + sample: "3/4/2018 11:52:29", + expected: time.Date(2018, time.March, 4, 11, 52, 29, 0, time.Local), + gotimeLayout: "1/2/2006 15:04:05", + strptimeLayout: "%q/%g/%Y %H:%M:%S", + }, + { + name: "cassandra", + sample: "2019-11-27T09:34:32.901-1000", + expected: time.Date(2019, time.November, 27, 9, 34, 32, 901*1000*1000, hst), + gotimeLayout: "2006-01-02T15:04:05.999-0700", + strptimeLayout: "%Y-%m-%dT%H:%M:%S.%L%z", + }, + { + name: "oracle", + sample: "2019-10-15T10:42:01.900436-10:00", + expected: time.Date(2019, time.October, 15, 10, 42, 01, 900436*1000, hst), + gotimeLayout: "2006-01-02T15:04:05.999999-07:00", + strptimeLayout: "%Y-%m-%dT%H:%M:%S.%f%j", + }, + { + name: "oracle-listener", + sample: "22-JUL-2019 15:16:13", + expected: time.Date(2019, time.July, 22, 15, 16, 13, 0, time.Local), + gotimeLayout: "02-Jan-2006 15:04:05", + strptimeLayout: "%d-%b-%Y %H:%M:%S", + }, + { + name: "k8s", + sample: "2019-03-08T18:41:12.152531115Z", + expected: time.Date(2019, time.March, 8, 18, 41, 12, 152531115, time.UTC), + gotimeLayout: "2006-01-02T15:04:05.999999999Z", + strptimeLayout: "%Y-%m-%dT%H:%M:%S.%sZ", + }, + { + name: "jetty", + sample: "05/Aug/2019:20:38:46 +0000", + expected: time.Date(2019, time.August, 5, 20, 38, 46, 0, time.UTC), + gotimeLayout: "02/Jan/2006:15:04:05 -0700", + strptimeLayout: "%d/%b/%Y:%H:%M:%S %z", + }, + { + name: "esxi", + sample: "2020-12-16T21:43:28.391Z", + expected: time.Date(2020, 12, 16, 21, 43, 28, 391*1000*1000, time.UTC), + gotimeLayout: "2006-01-02T15:04:05.999Z", + strptimeLayout: "%Y-%m-%dT%H:%M:%S.%LZ", + }, + } + + rootField := entry.NewBodyField() + someField := entry.NewBodyField("some_field") + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + gotimeRootCfg := parseTimeTestConfig(helper.GotimeKey, tc.gotimeLayout, rootField) + t.Run("gotime-root", runTimeParseTest(t, gotimeRootCfg, makeTestEntry(t, rootField, tc.sample), false, false, tc.expected)) + + gotimeNonRootCfg := parseTimeTestConfig(helper.GotimeKey, tc.gotimeLayout, someField) + t.Run("gotime-non-root", runTimeParseTest(t, gotimeNonRootCfg, makeTestEntry(t, someField, tc.sample), false, false, tc.expected)) + + strptimeRootCfg := parseTimeTestConfig(helper.StrptimeKey, tc.strptimeLayout, rootField) + t.Run("strptime-root", runTimeParseTest(t, strptimeRootCfg, makeTestEntry(t, rootField, tc.sample), false, false, tc.expected)) + + strptimeNonRootCfg := parseTimeTestConfig(helper.StrptimeKey, tc.strptimeLayout, someField) + t.Run("strptime-non-root", runTimeParseTest(t, strptimeNonRootCfg, makeTestEntry(t, someField, tc.sample), false, false, tc.expected)) + }) + } +} + +func TestTimeEpochs(t *testing.T) { + testCases := []struct { + name string + sample any + layout string + expected time.Time + maxLoss time.Duration + }{ + { + name: "s-default-string", + sample: "1136214245", + layout: "s", + expected: time.Unix(1136214245, 0), + }, + { + name: "s-default-bytes", + sample: []byte("1136214245"), + layout: "s", + expected: time.Unix(1136214245, 0), + }, + { + name: "s-default-int", + sample: 1136214245, + layout: "s", + expected: time.Unix(1136214245, 0), + }, + { + name: "s-default-float", + sample: 1136214245.0, + layout: "s", + expected: time.Unix(1136214245, 0), + }, + { + name: "ms-default-string", + sample: "1136214245123", + layout: "ms", + expected: time.Unix(1136214245, 123000000), + }, + { + name: "ms-default-int", + sample: 1136214245123, + layout: "ms", + expected: time.Unix(1136214245, 123000000), + }, + { + name: "ms-default-float", + sample: 1136214245123.0, + layout: "ms", + expected: time.Unix(1136214245, 123000000), + }, + { + name: "us-default-string", + sample: "1136214245123456", + layout: "us", + expected: time.Unix(1136214245, 123456000), + }, + { + name: "us-default-int", + sample: 1136214245123456, + layout: "us", + expected: time.Unix(1136214245, 123456000), + }, + { + name: "us-default-float", + sample: 1136214245123456.0, + layout: "us", + expected: time.Unix(1136214245, 123456000), + }, + { + name: "ns-default-string", + sample: "1136214245123456789", + layout: "ns", + expected: time.Unix(1136214245, 123456789), + }, + { + name: "ns-default-int", + sample: 1136214245123456789, + layout: "ns", + expected: time.Unix(1136214245, 123456789), + }, + { + name: "ns-default-float", + sample: 1136214245123456789.0, + layout: "ns", + expected: time.Unix(1136214245, 123456789), + maxLoss: time.Nanosecond * 100, + }, + { + name: "s.ms-default-string", + sample: "1136214245.123", + layout: "s.ms", + expected: time.Unix(1136214245, 123000000), + }, + { + name: "s.ms-default-int", + sample: 1136214245, + layout: "s.ms", + expected: time.Unix(1136214245, 0), // drops subseconds + maxLoss: time.Nanosecond * 100, + }, + { + name: "s.ms-default-float", + sample: 1136214245.123, + layout: "s.ms", + expected: time.Unix(1136214245, 123000000), + }, + { + name: "s.us-default-string", + sample: "1136214245.123456", + layout: "s.us", + expected: time.Unix(1136214245, 123456000), + }, + { + name: "s.us-default-int", + sample: 1136214245, + layout: "s.us", + expected: time.Unix(1136214245, 0), // drops subseconds + maxLoss: time.Nanosecond * 100, + }, + { + name: "s.us-default-float", + sample: 1136214245.123456, + layout: "s.us", + expected: time.Unix(1136214245, 123456000), + }, + { + name: "s.ns-default-string", + sample: "1136214245.123456789", + layout: "s.ns", + expected: time.Unix(1136214245, 123456789), + }, + { + name: "s.ns-default-int", + sample: 1136214245, + layout: "s.ns", + expected: time.Unix(1136214245, 0), // drops subseconds + maxLoss: time.Nanosecond * 100, + }, + { + name: "s.ns-default-float", + sample: 1136214245.123456789, + layout: "s.ns", + expected: time.Unix(1136214245, 123456789), + maxLoss: time.Nanosecond * 100, + }, + } + + rootField := entry.NewBodyField() + someField := entry.NewBodyField("some_field") + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + rootCfg := parseTimeTestConfig(helper.EpochKey, tc.layout, rootField) + t.Run("epoch-root", runLossyTimeParseTest(t, rootCfg, makeTestEntry(t, rootField, tc.sample), false, false, tc.expected, tc.maxLoss)) + + nonRootCfg := parseTimeTestConfig(helper.EpochKey, tc.layout, someField) + t.Run("epoch-non-root", runLossyTimeParseTest(t, nonRootCfg, makeTestEntry(t, someField, tc.sample), false, false, tc.expected, tc.maxLoss)) + }) + } +} + +func TestTimeErrors(t *testing.T) { + testCases := []struct { + name string + sample any + layoutType string + layout string + buildErr bool + parseErr bool + }{ + { + name: "bad-layout-type", + layoutType: "fake", + buildErr: true, + }, + { + name: "bad-strptime-directive", + layoutType: "strptime", + layout: "%1", + buildErr: true, + }, + { + name: "bad-epoch-layout", + layoutType: "epoch", + layout: "years", + buildErr: true, + }, + { + name: "bad-native-value", + layoutType: "native", + sample: 1, + parseErr: true, + }, + { + name: "bad-gotime-value", + layoutType: "gotime", + layout: time.Kitchen, + sample: 1, + parseErr: true, + }, + { + name: "bad-epoch-value", + layoutType: "epoch", + layout: "s", + sample: "not-a-number", + parseErr: true, + }, + } + + rootField := entry.NewBodyField() + someField := entry.NewBodyField("some_field") + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + rootCfg := parseTimeTestConfig(tc.layoutType, tc.layout, rootField) + t.Run("err-root", runTimeParseTest(t, rootCfg, makeTestEntry(t, rootField, tc.sample), tc.buildErr, tc.parseErr, time.Now())) + + nonRootCfg := parseTimeTestConfig(tc.layoutType, tc.layout, someField) + t.Run("err-non-root", runTimeParseTest(t, nonRootCfg, makeTestEntry(t, someField, tc.sample), tc.buildErr, tc.parseErr, time.Now())) + }) + } +} + +func makeTestEntry(t *testing.T, field entry.Field, value any) *entry.Entry { + e := entry.New() + require.NoError(t, e.Set(field, value)) + return e +} + +func runTimeParseTest(t *testing.T, cfg *Config, ent *entry.Entry, buildErr bool, parseErr bool, expected time.Time) func(*testing.T) { + return runLossyTimeParseTest(t, cfg, ent, buildErr, parseErr, expected, time.Duration(0)) +} + +func runLossyTimeParseTest(_ *testing.T, cfg *Config, ent *entry.Entry, buildErr bool, parseErr bool, expected time.Time, maxLoss time.Duration) func(*testing.T) { + return func(t *testing.T) { + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + if buildErr { + require.Error(t, err, "expected error when configuring operator") + return + } + require.NoError(t, err) + + mockOutput := &testutil.Operator{} + resultChan := make(chan *entry.Entry, 1) + mockOutput.On("Process", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + resultChan <- args.Get(1).(*entry.Entry) + }).Return(nil) + + timeParser := op.(*Parser) + timeParser.OutputOperators = []operator.Operator{mockOutput} + + ots := ent.ObservedTimestamp + err = timeParser.Parse(ent) + if parseErr { + require.Error(t, err, "expected error when configuring operator") + return + } + require.NoError(t, err) + require.Equal(t, ots, ent.ObservedTimestamp, "time parsing should not change observed timestamp") + + diff := time.Duration(math.Abs(float64(expected.Sub(ent.Timestamp)))) + require.True(t, diff <= maxLoss) + } +} + +func parseTimeTestConfig(layoutType, layout string, parseFrom entry.Field) *Config { + cfg := NewConfigWithID("test_operator_id") + cfg.OutputIDs = []string{"output1"} + cfg.TimeParser = helper.TimeParser{ + LayoutType: layoutType, + Layout: layout, + ParseFrom: &parseFrom, + } + return cfg +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/time/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/testdata/config.yaml new file mode 100644 index 00000000..ca2efaac --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/time/testdata/config.yaml @@ -0,0 +1,21 @@ +default: + type: time_parser +on_error_drop: + type: time_parser + on_error: drop +parse_strptime: + type: time_parser + parse_from: body.from + layout_type: strptime + layout: '%Y-%m-%d' +parse_gotime: + type: time_parser + parse_from: body.from + layout_type: gotime + layout: '2006-01-02' +no_nested: + type: time_parser + timestamp: + parse_from: body.timestamp_field + layout_type: strptime + layout: '%Y-%m-%d' From d0181ea331cf2c71c1091eb6a4d9c19628442447 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:35:22 +0530 Subject: [PATCH 09/16] feat: bring in severity parser operator --- .../stanza/adapter/register_ops.go | 1 + .../operator/operators/severity/config.go | 54 ++++ .../operators/severity/config_test.go | 50 ++++ .../operators/severity/package_test.go | 14 + .../operator/operators/severity/parser.go | 21 ++ .../operators/severity/parser_test.go | 268 ++++++++++++++++++ .../operators/severity/testdata/config.yaml | 21 ++ 7 files changed, 429 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/severity/testdata/config.yaml diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 9010bcb9..69dfe015 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -8,5 +8,6 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/severity" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/time" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config.go new file mode 100644 index 00000000..b4e5b213 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config.go @@ -0,0 +1,54 @@ +// Brought in as is from opentelemetry-collector-contrib + +package severity + +import ( + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "severity_parser" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new severity parser config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new severity parser config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + SeverityConfig: helper.NewSeverityConfig(), + } +} + +// Config is the configuration of a severity parser operator. +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + helper.SeverityConfig `mapstructure:",omitempty,squash"` +} + +// Build will build a severity parser operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + severityParser, err := c.SeverityConfig.Build(set) + if err != nil { + return nil, err + } + + return &Parser{ + TransformerOperator: transformerOperator, + SeverityParser: severityParser, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config_test.go new file mode 100644 index 00000000..888312bc --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/config_test.go @@ -0,0 +1,50 @@ +// Brought in as is from opentelemetry-collector-contrib +package severity + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" +) + +func TestUnmarshal(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "on_error_drop", + Expect: func() *Config { + cfg := NewConfig() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "parse_from_simple", + Expect: func() *Config { + cfg := NewConfig() + from := entry.NewBodyField("from") + cfg.ParseFrom = &from + return cfg + }(), + }, + { + Name: "parse_with_preset", + Expect: func() *Config { + cfg := NewConfig() + from := entry.NewBodyField("from") + cfg.ParseFrom = &from + cfg.Preset = "http" + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go new file mode 100644 index 00000000..51a2fb84 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go @@ -0,0 +1,14 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package severity + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser.go new file mode 100644 index 00000000..133f7f8e --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser.go @@ -0,0 +1,21 @@ +// Brought in as is from opentelemetry-collector-contrib + +package severity // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/severity" + +import ( + "context" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Parser is an operator that parses severity from a field to an entry. +type Parser struct { + helper.TransformerOperator + helper.SeverityParser +} + +// Process will parse severity from an entry. +func (p *Parser) Process(ctx context.Context, entry *entry.Entry) error { + return p.ProcessWith(ctx, entry, p.Parse) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser_test.go new file mode 100644 index 00000000..b61cadef --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/parser_test.go @@ -0,0 +1,268 @@ +// Brought in as is from opentelemetry-collector-contrib + +package severity + +import ( + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +type severityTestCase struct { + name string + sample any + mappingSet string + mapping map[string]any + buildErr bool + parseErr bool + expected entry.Severity +} + +func TestSeverityParser(t *testing.T) { + allTheThingsMap := map[string]any{ + "info": "3xx", + "error3": "4xx", + "debug4": "5xx", + "trace2": []any{ + "ttttttracer", + []byte{100, 100, 100}, + map[string]any{"min": 1111, "max": 1234}, + }, + "fatal2": "", + } + + testCases := []severityTestCase{ + { + name: "unknown", + sample: "blah", + mapping: nil, + expected: entry.Default, + }, + { + name: "error", + sample: "error", + mapping: nil, + expected: entry.Error, + }, + { + name: "error-capitalized", + sample: "Error", + mapping: nil, + expected: entry.Error, + }, + { + name: "error-all-caps", + sample: "ERROR", + mapping: nil, + expected: entry.Error, + }, + { + name: "custom-string", + sample: "NOOOOOOO", + mapping: map[string]any{"error": "NOOOOOOO"}, + expected: entry.Error, + }, + { + name: "custom-string-caps-key", + sample: "NOOOOOOO", + mapping: map[string]any{"ErRoR": "NOOOOOOO"}, + expected: entry.Error, + }, + { + name: "custom-int", + sample: 1234, + mapping: map[string]any{"error": 1234}, + expected: entry.Error, + }, + { + name: "mixed-list-string", + sample: "ThiS Is BaD", + mapping: map[string]any{"error": []any{"NOOOOOOO", "this is bad", 1234}}, + expected: entry.Error, + }, + { + name: "mixed-list-int", + sample: 1234, + mapping: map[string]any{"error": []any{"NOOOOOOO", "this is bad", 1234}}, + expected: entry.Error, + }, + { + name: "in-range", + sample: 123, + mapping: map[string]any{"error": map[string]any{"min": 120, "max": 125}}, + expected: entry.Error, + }, + { + name: "in-range-min", + sample: 120, + mapping: map[string]any{"error": map[string]any{"min": 120, "max": 125}}, + expected: entry.Error, + }, + { + name: "in-range-max", + sample: 125, + mapping: map[string]any{"error": map[string]any{"min": 120, "max": 125}}, + expected: entry.Error, + }, + { + name: "out-of-range-min-minus", + sample: 119, + mapping: map[string]any{"error": map[string]any{"min": 120, "max": 125}}, + expected: entry.Default, + }, + { + name: "out-of-range-max-plus", + sample: 126, + mapping: map[string]any{"error": map[string]any{"min": 120, "max": 125}}, + expected: entry.Default, + }, + { + name: "range-out-of-order", + sample: 123, + mapping: map[string]any{"error": map[string]any{"min": 125, "max": 120}}, + expected: entry.Error, + }, + { + name: "Http2xx-hit", + sample: 201, + mapping: map[string]any{"error": "2xx"}, + expected: entry.Error, + }, + { + name: "Http2xx-miss", + sample: 301, + mapping: map[string]any{"error": "2xx"}, + expected: entry.Default, + }, + { + name: "Http3xx-hit", + sample: 301, + mapping: map[string]any{"error": "3xx"}, + expected: entry.Error, + }, + { + name: "Http4xx-hit", + sample: "404", + mapping: map[string]any{"error": "4xx"}, + expected: entry.Error, + }, + { + name: "Http5xx-hit", + sample: 555, + mapping: map[string]any{"error": "5xx"}, + expected: entry.Error, + }, + { + name: "Http-All", + sample: "301", + mapping: map[string]any{"debug": "2xx", "info": "3xx", "error": "4xx", "warn": "5xx"}, + expected: entry.Info, + }, + { + name: "all-the-things-midrange", + sample: 1234, + mapping: allTheThingsMap, + expected: entry.Trace2, + }, + { + name: "all-the-things-bytes", + sample: []byte{100, 100, 100}, + mapping: allTheThingsMap, + expected: entry.Trace2, + }, + { + name: "all-the-things-empty", + sample: "", + mapping: allTheThingsMap, + expected: entry.Fatal2, + }, + { + name: "all-the-things-3xx", + sample: "399", + mapping: allTheThingsMap, + expected: entry.Info, + }, + { + name: "all-the-things-miss", + sample: "miss", + mapping: allTheThingsMap, + expected: entry.Default, + }, + { + name: "base-mapping-none", + sample: "error", + mappingSet: "none", + mapping: nil, + expected: entry.Default, // not error + }, + } + + rootField := entry.NewBodyField() + someField := entry.NewBodyField("some_field") + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + rootCfg := parseSeverityTestConfig(rootField, tc.mappingSet, tc.mapping) + rootEntry := makeTestEntry(t, rootField, tc.sample) + t.Run("root", runSeverityParseTest(rootCfg, rootEntry, tc.buildErr, tc.parseErr, tc.expected)) + + nonRootCfg := parseSeverityTestConfig(someField, tc.mappingSet, tc.mapping) + nonRootEntry := makeTestEntry(t, someField, tc.sample) + t.Run("non-root", runSeverityParseTest(nonRootCfg, nonRootEntry, tc.buildErr, tc.parseErr, tc.expected)) + }) + } +} + +func runSeverityParseTest(cfg *Config, ent *entry.Entry, buildErr bool, parseErr bool, expected entry.Severity) func(*testing.T) { + return func(t *testing.T) { + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + if buildErr { + require.Error(t, err, "expected error when configuring operator") + return + } + require.NoError(t, err, "unexpected error when configuring operator") + + mockOutput := &testutil.Operator{} + resultChan := make(chan *entry.Entry, 1) + mockOutput.On("Process", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + resultChan <- args.Get(1).(*entry.Entry) + }).Return(nil) + + severityParser := op.(*Parser) + severityParser.OutputOperators = []operator.Operator{mockOutput} + + err = severityParser.Parse(ent) + if parseErr { + require.Error(t, err, "expected error when parsing sample") + return + } + require.NoError(t, err) + + require.Equal(t, expected, ent.Severity) + } +} + +func parseSeverityTestConfig(parseFrom entry.Field, preset string, mapping map[string]any) *Config { + cfg := NewConfigWithID("test_operator_id") + cfg.OutputIDs = []string{"output1"} + cfg.SeverityConfig = helper.SeverityConfig{ + ParseFrom: &parseFrom, + Preset: preset, + Mapping: mapping, + } + return cfg +} + +func makeTestEntry(t *testing.T, field entry.Field, value any) *entry.Entry { + e := entry.New() + require.NoError(t, e.Set(field, value)) + return e +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/testdata/config.yaml new file mode 100644 index 00000000..2cc4fb85 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/testdata/config.yaml @@ -0,0 +1,21 @@ +default: + type: severity_parser +on_error_drop: + type: severity_parser + on_error: drop +parse_from_simple: + type: severity_parser + parse_from: body.from +parse_with_preset: + type: severity_parser + parse_from: body.from + preset: http +no_nested: + type: severity_parser + severity: + parse_from: body.severity_field + mapping: + critical: 5xx + error: 4xx + info: 3xx + debug: 2xx From a30e743000594a38242986047732c96d33650c63 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:39:44 +0530 Subject: [PATCH 10/16] feat: bring in trace parser operator --- .../stanza/adapter/register_ops.go | 1 + .../operators/severity/package_test.go | 3 +- .../stanza/operator/operators/trace/config.go | 53 ++++ .../operator/operators/trace/config_test.go | 68 ++++ .../operator/operators/trace/package_test.go | 13 + .../stanza/operator/operators/trace/parser.go | 21 ++ .../operator/operators/trace/parser_test.go | 290 ++++++++++++++++++ .../operators/trace/testdata/config.yaml | 17 + 8 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/trace/testdata/config.yaml diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 69dfe015..08f74eae 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -10,4 +10,5 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/severity" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/time" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/trace" ) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go index 51a2fb84..8ff1e4db 100644 --- a/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/severity/package_test.go @@ -1,5 +1,4 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 +// Brought in as is from opentelemetry-collector-contrib package severity diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config.go new file mode 100644 index 00000000..9baff777 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config.go @@ -0,0 +1,53 @@ +// Brought in as is from opentelemetry-collector-contrib + +package trace + +import ( + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "trace_parser" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new trace parser config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new trace parser config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + TraceParser: helper.NewTraceParser(), + } +} + +// Config is the configuration of a trace parser operator. +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` + helper.TraceParser `mapstructure:",omitempty,squash"` +} + +// Build will build a trace parser operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + if err := c.TraceParser.Validate(); err != nil { + return nil, err + } + + return &Parser{ + TransformerOperator: transformerOperator, + TraceParser: c.TraceParser, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config_test.go new file mode 100644 index 00000000..c96d9e99 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/config_test.go @@ -0,0 +1,68 @@ +// Brought in as is from opentelemetry-collector-contrib +package trace + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +func TestConfig(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "on_error_drop", + Expect: func() *Config { + cfg := NewConfig() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "spanid", + Expect: func() *Config { + parseFrom := entry.NewBodyField("app_span_id") + cfg := helper.SpanIDConfig{} + cfg.ParseFrom = &parseFrom + + c := NewConfig() + c.SpanID = &cfg + return c + }(), + }, + { + Name: "traceid", + Expect: func() *Config { + parseFrom := entry.NewBodyField("app_trace_id") + cfg := helper.TraceIDConfig{} + cfg.ParseFrom = &parseFrom + + c := NewConfig() + c.TraceID = &cfg + return c + }(), + }, + { + Name: "trace_flags", + Expect: func() *Config { + parseFrom := entry.NewBodyField("app_trace_flags_id") + cfg := helper.TraceFlagsConfig{} + cfg.ParseFrom = &parseFrom + + c := NewConfig() + c.TraceFlags = &cfg + return c + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/package_test.go new file mode 100644 index 00000000..13746055 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package trace + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser.go new file mode 100644 index 00000000..07d180e7 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser.go @@ -0,0 +1,21 @@ +// Brought in as is from opentelemetry-collector-contrib + +package trace + +import ( + "context" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Config is an operator that parses traces from fields to an entry. +type Parser struct { + helper.TransformerOperator + helper.TraceParser +} + +// Process will parse traces from an entry. +func (p *Parser) Process(ctx context.Context, entry *entry.Entry) error { + return p.ProcessWith(ctx, entry, p.Parse) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser_test.go new file mode 100644 index 00000000..460ae41b --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/parser_test.go @@ -0,0 +1,290 @@ +// Brought in as is from opentelemetry-collector-contrib + +package trace + +import ( + "context" + "encoding/hex" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" +) + +func TestDefaultParser(t *testing.T) { + traceParserConfig := NewConfig() + set := componenttest.NewNopTelemetrySettings() + _, err := traceParserConfig.Build(set) + require.NoError(t, err) +} + +func TestBuild(t *testing.T) { + testCases := []struct { + name string + input func() (*Config, error) + expectErr bool + }{ + { + "empty", + func() (*Config, error) { + return &Config{}, nil + }, + true, + }, + { + "default", + func() (*Config, error) { + cfg := NewConfigWithID("test_id") + return cfg, nil + }, + false, + }, + { + "spanid", + func() (*Config, error) { + parseFrom := entry.NewBodyField("app_span_id") + cfg := NewConfigWithID("test_id") + cfg.SpanID.ParseFrom = &parseFrom + return cfg, nil + }, + false, + }, + { + "traceid", + func() (*Config, error) { + parseFrom := entry.NewBodyField("app_trace_id") + cfg := NewConfigWithID("test_id") + cfg.TraceID.ParseFrom = &parseFrom + return cfg, nil + }, + false, + }, + { + "trace-flags", + func() (*Config, error) { + parseFrom := entry.NewBodyField("trace-flags-field") + cfg := NewConfigWithID("test_id") + cfg.TraceFlags.ParseFrom = &parseFrom + return cfg, nil + }, + false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cfg, err := tc.input() + require.NoError(t, err, "expected nil error when running test cases input func") + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + if tc.expectErr { + require.Error(t, err, "expected error while building trace_parser operator") + return + } + require.NoError(t, err, "did not expect error while building trace_parser operator") + require.NotNil(t, op, "expected Build to return an operator") + }) + } +} + +func TestProcess(t *testing.T) { + testSpanIDBytes, _ := hex.DecodeString("480140f3d770a5ae32f0a22b6a812cff") + testTraceIDBytes, _ := hex.DecodeString("92c3792d54ba94f3") + testTraceFlagsBytes, _ := hex.DecodeString("01") + + cases := []struct { + name string + op func() (operator.Operator, error) + input *entry.Entry + expect *entry.Entry + }{ + { + "no-op", + func() (operator.Operator, error) { + cfg := NewConfigWithID("test_id") + set := componenttest.NewNopTelemetrySettings() + return cfg.Build(set) + }, + &entry.Entry{ + Body: "https://google.com:443/path?user=dev", + }, + &entry.Entry{ + Body: "https://google.com:443/path?user=dev", + }, + }, + { + "all", + func() (operator.Operator, error) { + cfg := NewConfigWithID("test_id") + spanFrom := entry.NewBodyField("app_span_id") + traceFrom := entry.NewBodyField("app_trace_id") + flagsFrom := entry.NewBodyField("trace_flags_field") + cfg.SpanID.ParseFrom = &spanFrom + cfg.TraceID.ParseFrom = &traceFrom + cfg.TraceFlags.ParseFrom = &flagsFrom + set := componenttest.NewNopTelemetrySettings() + return cfg.Build(set) + }, + &entry.Entry{ + Body: map[string]any{ + "app_span_id": "480140f3d770a5ae32f0a22b6a812cff", + "app_trace_id": "92c3792d54ba94f3", + "trace_flags_field": "01", + }, + }, + &entry.Entry{ + SpanID: testSpanIDBytes, + TraceID: testTraceIDBytes, + TraceFlags: testTraceFlagsBytes, + Body: map[string]any{ + "app_span_id": "480140f3d770a5ae32f0a22b6a812cff", + "app_trace_id": "92c3792d54ba94f3", + "trace_flags_field": "01", + }, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + op, err := tc.op() + require.NoError(t, err, "did not expect operator function to return an error, this is a bug with the test case") + + err = op.Process(context.Background(), tc.input) + require.NoError(t, err) + require.Equal(t, tc.expect, tc.input) + }) + } +} + +func TestTraceParserParse(t *testing.T) { + cases := []struct { + name string + inputRecord map[string]any + expectedRecord map[string]any + expectErr bool + traceID string + spanID string + traceFlags string + }{ + { + "AllFields", + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + "span_id": "92c3792d54ba94f3", + "trace_flags": "01", + }, + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + "span_id": "92c3792d54ba94f3", + "trace_flags": "01", + }, + false, + "480140f3d770a5ae32f0a22b6a812cff", + "92c3792d54ba94f3", + "01", + }, + { + "WrongFields", + map[string]any{ + "traceId": "480140f3d770a5ae32f0a22b6a812cff", + "traceFlags": "01", + "spanId": "92c3792d54ba94f3", + }, + map[string]any{ + "traceId": "480140f3d770a5ae32f0a22b6a812cff", + "spanId": "92c3792d54ba94f3", + "traceFlags": "01", + }, + false, + "", + "", + "", + }, + { + "OnlyTraceId", + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + }, + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + }, + false, + "480140f3d770a5ae32f0a22b6a812cff", + "", + "", + }, + { + "WrongTraceIdFormat", + map[string]any{ + "trace_id": "foo_bar", + "span_id": "92c3792d54ba94f3", + "trace_flags": "01", + }, + map[string]any{ + "trace_id": "foo_bar", + "span_id": "92c3792d54ba94f3", + "trace_flags": "01", + }, + true, + "", + "92c3792d54ba94f3", + "01", + }, + { + "WrongTraceFlagFormat", + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + "span_id": "92c3792d54ba94f3", + "trace_flags": "foo_bar", + }, + map[string]any{ + "trace_id": "480140f3d770a5ae32f0a22b6a812cff", + "span_id": "92c3792d54ba94f3", + "trace_flags": "foo_bar", + }, + true, + "480140f3d770a5ae32f0a22b6a812cff", + "92c3792d54ba94f3", + "", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + traceParserConfig := NewConfigWithID("") + set := componenttest.NewNopTelemetrySettings() + _, _ = traceParserConfig.Build(set) + e := entry.New() + e.Body = tc.inputRecord + err := traceParserConfig.Parse(e) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + require.Equal(t, tc.expectedRecord, e.Body) + traceID, _ := hex.DecodeString(tc.traceID) + if len(tc.traceID) == 0 { + require.Nil(t, e.TraceID) + } else { + require.Equal(t, traceID, e.TraceID) + } + spanID, _ := hex.DecodeString(tc.spanID) + if len(tc.spanID) == 0 { + require.Nil(t, e.SpanID) + } else { + require.Equal(t, spanID, e.SpanID) + } + traceFlags, _ := hex.DecodeString(tc.traceFlags) + if len(tc.traceFlags) == 0 { + require.Nil(t, e.TraceFlags) + } else { + require.Equal(t, traceFlags, e.TraceFlags) + } + }) + } +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/testdata/config.yaml new file mode 100644 index 00000000..25e1aceb --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/trace/testdata/config.yaml @@ -0,0 +1,17 @@ +default: + type: trace_parser +on_error_drop: + type: trace_parser + on_error: drop +spanid: + type: trace_parser + span_id: + parse_from: body.app_span_id +trace_flags: + type: trace_parser + trace_flags: + parse_from: body.app_trace_flags_id +traceid: + type: trace_parser + trace_id: + parse_from: body.app_trace_id From c44c3aa5fdf282801fe600c3aa15c47d45cb0ead Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:43:29 +0530 Subject: [PATCH 11/16] feat: register grok parser with signoz stanza op registry --- pkg/parser/grok/grok.go | 2 ++ .../signozlogspipelineprocessor/stanza/adapter/register_ops.go | 1 + 2 files changed, 3 insertions(+) diff --git a/pkg/parser/grok/grok.go b/pkg/parser/grok/grok.go index f09162cd..92aaa5cd 100644 --- a/pkg/parser/grok/grok.go +++ b/pkg/parser/grok/grok.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" @@ -15,6 +16,7 @@ const operatorType = "grok_parser" func init() { operator.Register(operatorType, func() operator.Builder { return NewConfig() }) + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) } // NewConfig creates a new grok parser config with default values diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 08f74eae..0ef124b9 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -2,6 +2,7 @@ package signozlogspipelinestanzaadapter import ( + _ "github.com/SigNoz/signoz-otel-collector/pkg/parser/grok" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/add" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/json" From e4454792495ef289834fe3d2fa7aeef54f69caac Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:48:18 +0530 Subject: [PATCH 12/16] feat: bring in noop operator --- .../stanza/adapter/register_ops.go | 1 + .../stanza/operator/operators/noop/config.go | 46 +++++++++++++++++++ .../operator/operators/noop/config_test.go | 27 +++++++++++ .../operator/operators/noop/package_test.go | 13 ++++++ .../operator/operators/noop/transformer.go | 21 +++++++++ .../operators/noop/transformer_test.go | 39 ++++++++++++++++ 6 files changed, 147 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/noop/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer_test.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index 0ef124b9..e32020eb 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -7,6 +7,7 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/copy" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/json" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/move" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/noop" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/severity" diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config.go new file mode 100644 index 00000000..2f254391 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config.go @@ -0,0 +1,46 @@ +// Brought in as is from opentelemetry-collector-contrib + +package noop + +import ( + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "noop" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig creates a new noop operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID creates a new noop operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + TransformerConfig: helper.NewTransformerConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a noop operator. +type Config struct { + helper.TransformerConfig `mapstructure:",squash"` +} + +// Build will build a noop operator. +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + transformerOperator, err := c.TransformerConfig.Build(set) + if err != nil { + return nil, err + } + + return &Transformer{ + TransformerOperator: transformerOperator, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config_test.go new file mode 100644 index 00000000..a3abbecb --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/config_test.go @@ -0,0 +1,27 @@ +// Brought in as is from opentelemetry-collector-contrib + +package noop + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" +) + +func TestBuildValid(t *testing.T) { + cfg := NewConfigWithID("test") + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + require.IsType(t, &Transformer{}, op) +} + +func TestBuildInvalid(t *testing.T) { + cfg := NewConfigWithID("test") + set := componenttest.NewNopTelemetrySettings() + set.Logger = nil + _, err := cfg.Build(set) + require.Error(t, err) + require.Contains(t, err.Error(), "build context is missing a logger") +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/package_test.go new file mode 100644 index 00000000..838a0a5e --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package noop + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer.go new file mode 100644 index 00000000..78054f72 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer.go @@ -0,0 +1,21 @@ +// Brought in as is from opentelemetry-collector-contrib + +package noop + +import ( + "context" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer is an operator that performs no operations on an entry. +type Transformer struct { + helper.TransformerOperator +} + +// Process will forward the entry to the next output without any alterations. +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + t.Write(ctx, entry) + return nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer_test.go new file mode 100644 index 00000000..3392b793 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/noop/transformer_test.go @@ -0,0 +1,39 @@ +// Brought in as is from opentelemetry-collector-contrib + +package noop + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +func TestProcess(t *testing.T) { + cfg := NewConfigWithID("test") + cfg.OutputIDs = []string{"fake"} + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + fake := testutil.NewFakeOutput(t) + require.NoError(t, op.SetOutputs([]operator.Operator{fake})) + + entry := entry.New() + entry.AddAttribute("label", "value") + entry.AddResourceKey("resource", "value") + entry.TraceID = []byte{0x01} + entry.SpanID = []byte{0x01} + entry.TraceFlags = []byte{0x01} + + expected := entry.Copy() + err = op.Process(context.Background(), entry) + require.NoError(t, err) + + fake.ExpectEntry(t, expected) +} From 1eb51e548ee1d1d79feeba1d98f79650b532cb43 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 15:50:53 +0530 Subject: [PATCH 13/16] feat: bring in router operator --- .../stanza/adapter/register_ops.go | 1 + .../operator/operators/router/config.go | 86 +++++++ .../operator/operators/router/config_test.go | 90 +++++++ .../operator/operators/router/package_test.go | 13 ++ .../operators/router/testdata/config.yaml | 29 +++ .../operator/operators/router/transformer.go | 125 ++++++++++ .../operators/router/transformer_test.go | 220 ++++++++++++++++++ 7 files changed, 564 insertions(+) create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/config.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/config_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/package_test.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/testdata/config.yaml create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer.go create mode 100644 processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer_test.go diff --git a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go index e32020eb..a607ca77 100644 --- a/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go +++ b/processor/signozlogspipelineprocessor/stanza/adapter/register_ops.go @@ -10,6 +10,7 @@ import ( _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/noop" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/remove" + _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/router" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/severity" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/time" _ "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/trace" diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config.go new file mode 100644 index 00000000..0be31c30 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config.go @@ -0,0 +1,86 @@ +// Brought in as is from opentelemetry-collector-contrib + +package router + +import ( + "fmt" + + "go.opentelemetry.io/collector/component" + + signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +const operatorType = "router" + +func init() { + signozlogspipelinestanzaoperator.Register(operatorType, func() operator.Builder { return NewConfig() }) +} + +// NewConfig config creates a new router operator config with default values +func NewConfig() *Config { + return NewConfigWithID(operatorType) +} + +// NewConfigWithID config creates a new router operator config with default values +func NewConfigWithID(operatorID string) *Config { + return &Config{ + BasicConfig: helper.NewBasicConfig(operatorID, operatorType), + } +} + +// Config is the configuration of a router operator +type Config struct { + helper.BasicConfig `mapstructure:",squash"` + Routes []*RouteConfig `mapstructure:"routes"` + Default []string `mapstructure:"default"` +} + +// RouteConfig is the configuration of a route on a router operator +type RouteConfig struct { + helper.AttributerConfig `mapstructure:",squash"` + Expression string `mapstructure:"expr"` + OutputIDs []string `mapstructure:"output"` +} + +// Build will build a router operator from the supplied configuration +func (c Config) Build(set component.TelemetrySettings) (operator.Operator, error) { + basicOperator, err := c.BasicConfig.Build(set) + if err != nil { + return nil, err + } + + if c.Default != nil { + defaultRoute := &RouteConfig{ + Expression: "true", + OutputIDs: c.Default, + } + c.Routes = append(c.Routes, defaultRoute) + } + + routes := make([]*Route, 0, len(c.Routes)) + for _, routeConfig := range c.Routes { + compiled, err := helper.ExprCompileBool(routeConfig.Expression) + if err != nil { + return nil, fmt.Errorf("failed to compile expression '%s': %w", routeConfig.Expression, err) + } + + attributer, err := routeConfig.AttributerConfig.Build() + if err != nil { + return nil, fmt.Errorf("failed to build attributer for route '%s': %w", routeConfig.Expression, err) + } + + route := Route{ + Attributer: attributer, + Expression: compiled, + OutputIDs: routeConfig.OutputIDs, + } + routes = append(routes, &route) + } + + return &Transformer{ + BasicOperator: basicOperator, + routes: routes, + }, nil +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config_test.go new file mode 100644 index 00000000..253d91ab --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/config_test.go @@ -0,0 +1,90 @@ +// Brought in as is from opentelemetry-collector-contrib +package router + +import ( + "path/filepath" + "testing" + + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operatortest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +func TestRouterGoldenConfig(t *testing.T) { + operatortest.ConfigUnmarshalTests{ + DefaultConfig: NewConfig(), + TestsFile: filepath.Join(".", "testdata", "config.yaml"), + Tests: []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: NewConfig(), + }, + { + Name: "routes_one", + Expect: func() *Config { + cfg := NewConfig() + newRoute := &RouteConfig{ + Expression: `body.format == "json"`, + OutputIDs: []string{"my_json_parser"}, + } + cfg.Routes = append(cfg.Routes, newRoute) + return cfg + }(), + }, + { + Name: "routes_multi", + Expect: func() *Config { + cfg := NewConfig() + newRoute := []*RouteConfig{ + { + Expression: `body.format == "json"`, + OutputIDs: []string{"my_json_parser"}, + }, + { + Expression: `body.format == "json"2`, + OutputIDs: []string{"my_json_parser2"}, + }, + { + Expression: `body.format == "json"3`, + OutputIDs: []string{"my_json_parser3"}, + }, + } + cfg.Routes = newRoute + return cfg + }(), + }, + { + Name: "routes_attributes", + Expect: func() *Config { + cfg := NewConfig() + + attVal := helper.NewAttributerConfig() + attVal.Attributes = map[string]helper.ExprStringConfig{ + "key1": "val1", + } + + cfg.Routes = []*RouteConfig{ + { + Expression: `body.format == "json"`, + OutputIDs: []string{"my_json_parser"}, + AttributerConfig: attVal, + }, + } + return cfg + }(), + }, + { + Name: "routes_default", + Expect: func() *Config { + cfg := NewConfig() + newRoute := &RouteConfig{ + Expression: `body.format == "json"`, + OutputIDs: []string{"my_json_parser"}, + } + cfg.Routes = append(cfg.Routes, newRoute) + cfg.Default = append(cfg.Default, "catchall") + return cfg + }(), + }, + }, + }.Run(t) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/package_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/package_test.go new file mode 100644 index 00000000..425296fd --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/package_test.go @@ -0,0 +1,13 @@ +// Brought in as is from opentelemetry-collector-contrib + +package router + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/testdata/config.yaml b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/testdata/config.yaml new file mode 100644 index 00000000..b2ebea21 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/testdata/config.yaml @@ -0,0 +1,29 @@ +default: + type: router +routes_attributes: + type: router + routes: + - output: my_json_parser + expr: 'body.format == "json"' + attributes: + key1: val1 +routes_default: + type: router + routes: + - output: my_json_parser + expr: 'body.format == "json"' + default: catchall +routes_multi: + type: router + routes: + - output: my_json_parser + expr: 'body.format == "json"' + - output: my_json_parser2 + expr: 'body.format == "json"2' + - output: my_json_parser3 + expr: 'body.format == "json"3' +routes_one: + type: router + routes: + - output: my_json_parser + expr: 'body.format == "json"' diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer.go new file mode 100644 index 00000000..509d7210 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer.go @@ -0,0 +1,125 @@ +// Brought in as is from opentelemetry-collector-contrib + +package router + +import ( + "context" + "fmt" + + "github.com/expr-lang/expr/vm" + "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" +) + +// Transformer is an operator that routes entries based on matching expressions +type Transformer struct { + helper.BasicOperator + routes []*Route +} + +// Route is a route on a router operator +type Route struct { + helper.Attributer + Expression *vm.Program + OutputIDs []string + OutputOperators []operator.Operator +} + +// CanProcess will always return true for a router operator +func (t *Transformer) CanProcess() bool { + return true +} + +// Process will route incoming entries based on matching expressions +func (t *Transformer) Process(ctx context.Context, entry *entry.Entry) error { + env := helper.GetExprEnv(entry) + defer helper.PutExprEnv(env) + + for _, route := range t.routes { + matches, err := vm.Run(route.Expression, env) + if err != nil { + t.Logger().Warn("Running expression returned an error", zap.Error(err)) + continue + } + + // we compile the expression with "AsBool", so this should be safe + if matches.(bool) { + if err := route.Attribute(entry); err != nil { + t.Logger().Error("Failed to label entry", zap.Error(err)) + return err + } + + for _, output := range route.OutputOperators { + _ = output.Process(ctx, entry) + } + break + } + } + + return nil +} + +// CanOutput will always return true for a router operator +func (t *Transformer) CanOutput() bool { + return true +} + +// Outputs will return all connected operators. +func (t *Transformer) Outputs() []operator.Operator { + outputs := make([]operator.Operator, 0, len(t.routes)) + for _, route := range t.routes { + outputs = append(outputs, route.OutputOperators...) + } + return outputs +} + +// GetOutputIDs will return all connected operators. +func (t *Transformer) GetOutputIDs() []string { + outputs := make([]string, 0, len(t.routes)) + for _, route := range t.routes { + outputs = append(outputs, route.OutputIDs...) + } + return outputs +} + +// SetOutputs will set the outputs of the router operator. +func (t *Transformer) SetOutputs(operators []operator.Operator) error { + for _, route := range t.routes { + outputOperators, err := t.findOperators(operators, route.OutputIDs) + if err != nil { + return fmt.Errorf("failed to set outputs on route: %w", err) + } + route.OutputOperators = outputOperators + } + + return nil +} + +// SetOutputIDs will do nothing. +func (t *Transformer) SetOutputIDs(_ []string) {} + +// findOperators will find a subset of operators from a collection. +func (t *Transformer) findOperators(operators []operator.Operator, operatorIDs []string) ([]operator.Operator, error) { + result := make([]operator.Operator, len(operatorIDs)) + for i, operatorID := range operatorIDs { + operator, err := t.findOperator(operators, operatorID) + if err != nil { + return nil, err + } + result[i] = operator + } + return result, nil +} + +// findOperator will find an operator from a collection. +func (t *Transformer) findOperator(operators []operator.Operator, operatorID string) (operator.Operator, error) { + for _, operator := range operators { + if operator.ID() == operatorID { + return operator, nil + } + } + return nil, fmt.Errorf("operator %s does not exist", operatorID) +} diff --git a/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer_test.go b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer_test.go new file mode 100644 index 00000000..c81195a2 --- /dev/null +++ b/processor/signozlogspipelineprocessor/stanza/operator/operators/router/transformer_test.go @@ -0,0 +1,220 @@ +// Brought in as is from opentelemetry-collector-contrib + +package router + +import ( + "context" + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +func TestTransformer(t *testing.T) { + t.Setenv("TEST_ROUTER_OPERATOR_ENV", "foo") + + basicConfig := func() *Config { + return &Config{ + BasicConfig: helper.BasicConfig{ + OperatorID: "test_operator_id", + OperatorType: "router", + }, + } + } + + cases := []struct { + name string + input *entry.Entry + routes []*RouteConfig + defaultOutput []string + expectedCounts map[string]int + expectedAttributes map[string]any + }{ + { + "DefaultRoute", + entry.New(), + []*RouteConfig{ + { + helper.NewAttributerConfig(), + "true", + []string{"output1"}, + }, + }, + nil, + map[string]int{"output1": 1}, + nil, + }, + { + "NoMatch", + entry.New(), + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `false`, + []string{"output1"}, + }, + }, + nil, + map[string]int{}, + nil, + }, + { + "SimpleMatch", + &entry.Entry{ + Body: map[string]any{ + "message": "test_message", + }, + }, + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `body.message == "non_match"`, + []string{"output1"}, + }, + { + helper.NewAttributerConfig(), + `body.message == "test_message"`, + []string{"output2"}, + }, + }, + nil, + map[string]int{"output2": 1}, + nil, + }, + { + "MatchWithAttribute", + &entry.Entry{ + Body: map[string]any{ + "message": "test_message", + }, + }, + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `body.message == "non_match"`, + []string{"output1"}, + }, + { + helper.AttributerConfig{ + Attributes: map[string]helper.ExprStringConfig{ + "label-key": "label-value", + }, + }, + `body.message == "test_message"`, + []string{"output2"}, + }, + }, + nil, + map[string]int{"output2": 1}, + map[string]any{ + "label-key": "label-value", + }, + }, + { + "MatchEnv", + &entry.Entry{ + Body: map[string]any{ + "message": "test_message", + }, + }, + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `env("TEST_ROUTER_OPERATOR_ENV") == "foo"`, + []string{"output1"}, + }, + { + helper.NewAttributerConfig(), + `true`, + []string{"output2"}, + }, + }, + nil, + map[string]int{"output1": 1}, + nil, + }, + { + "UseDefault", + &entry.Entry{ + Body: map[string]any{ + "message": "test_message", + }, + }, + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `false`, + []string{"output1"}, + }, + }, + []string{"output2"}, + map[string]int{"output2": 1}, + nil, + }, + { + "MatchBeforeDefault", + &entry.Entry{ + Body: map[string]any{ + "message": "test_message", + }, + }, + []*RouteConfig{ + { + helper.NewAttributerConfig(), + `true`, + []string{"output1"}, + }, + }, + []string{"output2"}, + map[string]int{"output1": 1}, + nil, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := basicConfig() + cfg.Routes = tc.routes + cfg.Default = tc.defaultOutput + + set := componenttest.NewNopTelemetrySettings() + op, err := cfg.Build(set) + require.NoError(t, err) + + results := map[string]int{} + var attributes map[string]any + + mock1 := testutil.NewMockOperator("output1") + mock1.On("Process", mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { + results["output1"]++ + if entry, ok := args[1].(*entry.Entry); ok { + attributes = entry.Attributes + } + }) + + mock2 := testutil.NewMockOperator("output2") + mock2.On("Process", mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { + results["output2"]++ + if entry, ok := args[1].(*entry.Entry); ok { + attributes = entry.Attributes + } + }) + + routerOperator := op.(*Transformer) + err = routerOperator.SetOutputs([]operator.Operator{mock1, mock2}) + require.NoError(t, err) + + err = routerOperator.Process(context.Background(), tc.input) + require.NoError(t, err) + + require.Equal(t, tc.expectedCounts, results) + require.Equal(t, tc.expectedAttributes, attributes) + }) + } +} From 14f02f8bfa32db94145cff07aeeaecb88a7f1f36 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 19:20:37 +0530 Subject: [PATCH 14/16] chore: minor cleanup to get all tests passing --- processor/signozlogspipelineprocessor/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/signozlogspipelineprocessor/config_test.go b/processor/signozlogspipelineprocessor/config_test.go index 99c7bba2..93f7caeb 100644 --- a/processor/signozlogspipelineprocessor/config_test.go +++ b/processor/signozlogspipelineprocessor/config_test.go @@ -12,10 +12,10 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/regex" signozlogspipelinestanzaadapter "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/adapter" signozlogspipelinestanzaoperator "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator" + "github.com/SigNoz/signoz-otel-collector/processor/signozlogspipelineprocessor/stanza/operator/operators/regex" ) func TestLoadConfig(t *testing.T) { From 2e5731c52dc1096ad8420394f1059e1ef63cb378 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 19:20:51 +0530 Subject: [PATCH 15/16] chore: run go mod tidy --- go.mod | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index e7e4fea2..21ff0908 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.0 github.com/hashicorp/golang-lru v1.0.2 + github.com/jellydator/ttlcache/v3 v3.2.0 github.com/knadh/koanf v1.5.0 github.com/lightstep/go-expohisto v1.0.0 github.com/oklog/ulid v1.3.1 @@ -168,6 +169,7 @@ require ( go.opentelemetry.io/collector/semconv v0.102.0 go.opentelemetry.io/otel/trace v1.27.0 go.uber.org/atomic v1.11.0 + go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/text v0.16.0 @@ -312,7 +314,6 @@ require ( github.com/imdario/mergo v0.3.16 // indirect github.com/ionos-cloud/sdk-go/v6 v6.1.11 // indirect github.com/jcmturner/goidentity/v6 v6.0.1 // indirect - github.com/jellydator/ttlcache/v3 v3.2.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/karrick/godirwalk v1.17.0 // indirect From 628d9bc8bc22a35d8362b4cc2381efb03a65f837 Mon Sep 17 00:00:00 2001 From: Raj Date: Mon, 16 Sep 2024 19:34:37 +0530 Subject: [PATCH 16/16] chore: minor cleanup --- processor/signozlogspipelineprocessor/stanza/operator/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/signozlogspipelineprocessor/stanza/operator/config.go b/processor/signozlogspipelineprocessor/stanza/operator/config.go index 5a60a954..d8d2f4f7 100644 --- a/processor/signozlogspipelineprocessor/stanza/operator/config.go +++ b/processor/signozlogspipelineprocessor/stanza/operator/config.go @@ -1,6 +1,6 @@ // Mostly Brought in as is from opentelemetry-collector-contrib // Maintaining our own copy/version of Config allows us to use our own -// registry of stanza operators in Config.Unmarshal +// registry of stanza operators used in Config.Unmarshal in this file package signozlogspipelinestanzaoperator