diff --git a/transformer/auto.go b/transformer/auto.go index b96b03be..6aa58fd7 100644 --- a/transformer/auto.go +++ b/transformer/auto.go @@ -81,6 +81,12 @@ func init() { Alloc: func() interface{} { return &Replace{} }, Help: "Uses a regular expression to replace the content of a metadata key, storing it to either a different metadata key, or overwriting the original.", }) + Auto.Add(skogul.Module{ + Name: "replacedata", + Aliases: []string{}, + Alloc: func() interface{} { return &ReplaceData{} }, + Help: "Uses a regular expression to replace the content of a data key, storing it to either a different data key, or overwriting the original.", + }) Auto.Add(skogul.Module{ Name: "switch", Aliases: []string{}, @@ -122,4 +128,11 @@ func init() { Help: "Ban values from nested structure using a path e.g. Path looking like this foo.bar.1 has a structure looking like this { foo: { bar: { 1: hello } } }. The last element in the path will get removed from the tree in this case 1: hello, you will end up having a tree looking like this { foo: { bar: {} } }.", AutoMake: false, }) + Auto.Add(skogul.Module{ + Name: "banfield", + Aliases: []string{}, + Alloc: func() interface{} { return &BanField{} }, + Help: "Remove single fields in a metric based on a regular expression criteria", + AutoMake: false, + }) } diff --git a/transformer/ban_field.go b/transformer/ban_field.go new file mode 100644 index 00000000..628f9b19 --- /dev/null +++ b/transformer/ban_field.go @@ -0,0 +1,75 @@ +package transformer + +import ( + "fmt" + "github.com/telenornms/skogul" + "regexp" + "sync" +) + +type BanField struct { + SourceData string `doc:"Data field to ban"` + RegexpData string `doc:"Regex to match value of source-data field"` + regexpData *regexp.Regexp + SourceMetadata string `doc:"Metadata field to ban"` + RegexpMetadata string `doc:"Regex to match value of source-metadata field"` + regexpMetadata *regexp.Regexp + errData error + errMetadata error + init sync.Once +} + +func (b *BanField) Transform(c *skogul.Container) error { + b.init.Do(func() { + b.regexpData, b.errData = regexp.Compile(b.RegexpData) + b.regexpMetadata, b.errMetadata = regexp.Compile(b.RegexpMetadata) + }) + + if b.errData != nil { + return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpData, b.errData) + } + if b.errMetadata != nil { + return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpMetadata, b.errMetadata) + } + + for _, metric := range c.Metrics { + if b.SourceData != "" { + if str, ok := metric.Data[b.SourceData]; ok { + if b.regexpData.Match([]byte(str.(string))) { + delete(metric.Data, b.SourceData) + } + } + } + if b.SourceMetadata != "" { + if str, ok := metric.Metadata[b.SourceMetadata]; ok { + if b.regexpMetadata.Match([]byte(str.(string))) { + delete(metric.Metadata, b.SourceMetadata) + } + } + } + } + + return nil +} + +func (b *BanField) Verify() error { + if b.SourceData != "" && b.RegexpData == "" { + return fmt.Errorf("regexpdata field has to have a value when sourcedata is provided") + } + if b.SourceMetadata != "" && b.RegexpMetadata == "" { + return fmt.Errorf("regexpmetadata field has to have a value when sourcemetadata is provided") + } + + var err error + + _, err = regexp.Compile(b.RegexpData) + if err != nil { + return fmt.Errorf("failed to compile regexp for regexpdata field %v %v", b.RegexpData, err) + } + + _, err = regexp.Compile(b.RegexpMetadata) + if err != nil { + return fmt.Errorf("failed to compile regexp for regexpmetadata field %v %v", b.RegexpMetadata, err) + } + return nil +} diff --git a/transformer/ban_field_test.go b/transformer/ban_field_test.go new file mode 100644 index 00000000..815db4f2 --- /dev/null +++ b/transformer/ban_field_test.go @@ -0,0 +1,41 @@ +package transformer_test + +import ( + "testing" + + "github.com/telenornms/skogul" + "github.com/telenornms/skogul/transformer" +) + +func TestBanField(t *testing.T) { + metric := skogul.Metric{} + metric.Metadata = make(map[string]interface{}) + metric.Metadata["foofoo"] = "barBAR" + metric.Data = make(map[string]interface{}) + metric.Data["foo"] = "BAR" + metric.Data["baz"] = "foobar" + c := skogul.Container{} + c.Metrics = []*skogul.Metric{&metric} + + ban := transformer.BanField{ + SourceData: "foo", + RegexpData: "BAR", + SourceMetadata: "foofoo", + RegexpMetadata: "barBAR", + } + + t.Logf("Container before transform:\n%v", c) + err := ban.Transform(&c) + if err != nil { + t.Errorf("ban_field returned non-nil err: %v", err) + } + + t.Logf("Container after transform:\n%v", c) + + if _, ok := c.Metrics[0].Metadata["foofoo"]; ok { + t.Fatal("ban_field transformer failed to ban key-value pair") + } + if _, ok := c.Metrics[0].Data["foo"]; ok { + t.Fatal("ban_field transformer failed to ban key-value pair") + } +} diff --git a/transformer/cast.go b/transformer/cast.go index beeb4741..730bf861 100644 --- a/transformer/cast.go +++ b/transformer/cast.go @@ -35,20 +35,24 @@ import ( ) type Cast struct { - MetadataStrings []string `doc:"List of metadatafields that should be strings"` - MetadataInts []string `doc:"List of metadatafields that should be integers"` - MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"` - MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."` - MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"` - MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."` - MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."` - DataStrings []string `doc:"List of datafields that should be strings"` - DataInts []string `doc:"List of datafields that should be integers"` - DataFloats []string `doc:"List of datafields that should be 64-bit floats"` - DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."` - DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"` - DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."` - DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."` + MetadataStrings []string `doc:"List of metadatafields that should be strings"` + MetadataInts []string `doc:"List of metadatafields that should be integers"` + MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"` + MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."` + MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"` + MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."` + MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."` + DataStrings []string `doc:"List of datafields that should be strings"` + DataInts []string `doc:"List of datafields that should be integers"` + DataFloats []string `doc:"List of datafields that should be 64-bit floats"` + DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."` + DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"` + DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."` + DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."` + DataBlobsToStrings []string `doc:"List of datafields containning blob (byte-array) values that should be strings"` + DataStringsToBlobs []string `doc:"List of datafields containing a string that should be blobs (byte-arrays)"` + MetadataBlobsToStrings []string `doc:"List of metadatafields containning blob (byte-array) values that should be strings"` + MetadataStringsToBlobs []string `doc:"List of metadatafields containing a string that should be blobs (byte-arrays)"` } // Transform enforces the Cast rules @@ -95,6 +99,24 @@ func (cast *Cast) Transform(c *skogul.Container) error { } } + for _, value := range cast.DataBlobsToStrings { + if c.Metrics[mi].Data[value] != nil { + _, ok := c.Metrics[mi].Data[value].(string) + if ok { + continue + } + c.Metrics[mi].Data[value] = fmt.Sprintf("%s", c.Metrics[mi].Data[value]) + } + } + for _, value := range cast.DataStringsToBlobs { + if c.Metrics[mi].Data[value] != nil { + cpy := fmt.Sprintf("%s", c.Metrics[mi].Data[value]) + delete(c.Metrics[mi].Data, value) + c.Metrics[mi].Data[value] = make([]byte, len(cpy)) + c.Metrics[mi].Data[value] = []byte(cpy) + } + } + for _, value := range cast.MetadataJson { if c.Metrics[mi].Metadata[value] != nil { tmp1, ok := c.Metrics[mi].Metadata[value].(string) @@ -232,6 +254,24 @@ func (cast *Cast) Transform(c *skogul.Container) error { c.Metrics[mi].Metadata[value] = cast.Inet6Aton(s) } } + + for _, value := range cast.MetadataBlobsToStrings { + if c.Metrics[mi].Metadata[value] != nil { + _, ok := c.Metrics[mi].Metadata[value].(string) + if ok { + continue + } + c.Metrics[mi].Metadata[value] = fmt.Sprintf("%s", c.Metrics[mi].Metadata[value]) + } + } + for _, value := range cast.MetadataStringsToBlobs { + if c.Metrics[mi].Metadata[value] != nil { + cpy := fmt.Sprintf("%s", c.Metrics[mi].Metadata[value]) + delete(c.Metrics[mi].Metadata, value) + c.Metrics[mi].Metadata[value] = make([]byte, len(cpy)) + c.Metrics[mi].Metadata[value] = []byte(cpy) + } + } } return nil } diff --git a/transformer/edit.go b/transformer/edit.go index 0ce9695a..72e16d94 100644 --- a/transformer/edit.go +++ b/transformer/edit.go @@ -107,3 +107,79 @@ func (replace *Replace) Verify() error { skogul.Assert(regex != nil) return nil } + +// ReplaceData is a copy of the Replace transformer but works on the Data field +// Replace executes a regular expression replacement of metric data. +type ReplaceData struct { + Source string `doc:"Data key to read from."` + Destination string `doc:"Data key to write to. Defaults to overwriting the source-key if left blank. Destination key will always be overwritten, e.g., even if the source key is missing, the key located at the destination will be removed."` + Regex string `doc:"Regular expression to match."` + Replacement string `doc:"Replacement text. Can also use $1, $2, etc to reference sub-matches. Defaults to empty string - remove matching items."` + regex *regexp.Regexp + once sync.Once + err error +} + +// Transform executes the regular expression replacement +func (replace *ReplaceData) Transform(c *skogul.Container) error { + replace.once.Do(func() { + if replace.Destination == "" { + replace.Destination = replace.Source + } + replace.regex, replace.err = regexp.Compile(replace.Regex) + }) + // Verify() should catch this, so there's no reasonable way this + // should happen. But in the off chance that a regex compiles on + // the first attempt but not the second.... (e.g.: some serious + // bugs). It will also catch our own bugs, if, for some reason, we + // manage to botch up Verify() under some corner case. + skogul.Assert(replace.err == nil) + + for mi := range c.Metrics { + if c.Metrics[mi].Data == nil { + continue + } + if c.Metrics[mi].Data[replace.Source] == nil { + delete(c.Metrics[mi].Data, replace.Destination) + continue + } + // FIXME: This should be a type cast to allow working with + // both text strings (as per now) and []byte strings. + // Similar to what is done in the ban transformer. + str, ok := c.Metrics[mi].Data[replace.Source].(string) + if !ok { + // FIXME: What to do? It's tempting to copy the + // key, but that could mean multiple references to + // the same memory, which can create unexpected + // behavior if other transformers want to modify + // just one of the headers. + repLog.WithField("source", replace.Source).Printf("Unable to transform non-string field %s with content %v", replace.Source, c.Metrics[mi].Data[replace.Source]) + // This is to confirm with the documentation and + // ensure that this isn't exploited by providing a + // bogus Source-field only to be able to provide a + // custom destination field. + delete(c.Metrics[mi].Data, replace.Destination) + continue + } + c.Metrics[mi].Data[replace.Destination] = string(replace.regex.ReplaceAll([]byte(str), []byte(replace.Replacement))) + } + return nil +} + +// Verify checks that the required variables are set and that the regular +// expression compiles +func (replace *ReplaceData) Verify() error { + if replace.Source == "" { + return skogul.MissingArgument("Source") + } + if replace.Regex == "" { + return skogul.MissingArgument("Regex") + } + regex, err := regexp.Compile(replace.Regex) + + if err != nil { + return fmt.Errorf("replace transformer regex `%s' didn't compile: %w", replace.Regex, err) + } + skogul.Assert(regex != nil) + return nil +}