Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add banfield transformer #309

Merged
merged 4 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions transformer/auto.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ func init() {
Alloc: func() interface{} { return &Replace{} },
Help: "Uses a regular expression to replace the content of a metadata key, storing it to either a different metadata key, or overwriting the original.",
})
Auto.Add(skogul.Module{
Name: "replacedata",
Aliases: []string{},
Alloc: func() interface{} { return &ReplaceData{} },
Help: "Uses a regular expression to replace the content of a data key, storing it to either a different data key, or overwriting the original.",
})
Auto.Add(skogul.Module{
Name: "switch",
Aliases: []string{},
Expand Down Expand Up @@ -122,4 +128,11 @@ func init() {
Help: "Ban values from nested structure using a path e.g. Path looking like this foo.bar.1 has a structure looking like this { foo: { bar: { 1: hello } } }. The last element in the path will get removed from the tree in this case 1: hello, you will end up having a tree looking like this { foo: { bar: {} } }.",
AutoMake: false,
})
Auto.Add(skogul.Module{
Name: "banfield",
Aliases: []string{},
Alloc: func() interface{} { return &BanField{} },
Help: "Remove single fields in a metric based on a regular expression criteria",
AutoMake: false,
})
}
75 changes: 75 additions & 0 deletions transformer/ban_field.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package transformer

import (
"fmt"
"github.com/telenornms/skogul"
"regexp"
"sync"
)

type BanField struct {
SourceData string `doc:"Data field to ban"`
RegexpData string `doc:"Regex to match value of source-data field"`
regexpData *regexp.Regexp
SourceMetadata string `doc:"Metadata field to ban"`
RegexpMetadata string `doc:"Regex to match value of source-metadata field"`
regexpMetadata *regexp.Regexp
errData error
errMetadata error
init sync.Once
}

func (b *BanField) Transform(c *skogul.Container) error {
b.init.Do(func() {
b.regexpData, b.errData = regexp.Compile(b.RegexpData)
b.regexpMetadata, b.errMetadata = regexp.Compile(b.RegexpMetadata)
})

if b.errData != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpData, b.errData)
}
if b.errMetadata != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpMetadata, b.errMetadata)
}

for _, metric := range c.Metrics {
if b.SourceData != "" {
if str, ok := metric.Data[b.SourceData]; ok {
if b.regexpData.Match([]byte(str.(string))) {
delete(metric.Data, b.SourceData)
}
}
}
if b.SourceMetadata != "" {
if str, ok := metric.Metadata[b.SourceMetadata]; ok {
if b.regexpMetadata.Match([]byte(str.(string))) {
delete(metric.Metadata, b.SourceMetadata)
}
}
}
}

return nil
}

func (b *BanField) Verify() error {
if b.SourceData != "" && b.RegexpData == "" {
return fmt.Errorf("regexpdata field has to have a value when sourcedata is provided")
}
if b.SourceMetadata != "" && b.RegexpMetadata == "" {
return fmt.Errorf("regexpmetadata field has to have a value when sourcemetadata is provided")
}

var err error

_, err = regexp.Compile(b.RegexpData)
if err != nil {
return fmt.Errorf("failed to compile regexp for regexpdata field %v %v", b.RegexpData, err)
}

_, err = regexp.Compile(b.RegexpMetadata)
if err != nil {
return fmt.Errorf("failed to compile regexp for regexpmetadata field %v %v", b.RegexpMetadata, err)
}
return nil
}
41 changes: 41 additions & 0 deletions transformer/ban_field_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package transformer_test

import (
"testing"

"github.com/telenornms/skogul"
"github.com/telenornms/skogul/transformer"
)

func TestBanField(t *testing.T) {
metric := skogul.Metric{}
metric.Metadata = make(map[string]interface{})
metric.Metadata["foofoo"] = "barBAR"
metric.Data = make(map[string]interface{})
metric.Data["foo"] = "BAR"
metric.Data["baz"] = "foobar"
c := skogul.Container{}
c.Metrics = []*skogul.Metric{&metric}

ban := transformer.BanField{
SourceData: "foo",
RegexpData: "BAR",
SourceMetadata: "foofoo",
RegexpMetadata: "barBAR",
}

t.Logf("Container before transform:\n%v", c)
err := ban.Transform(&c)
if err != nil {
t.Errorf("ban_field returned non-nil err: %v", err)
}

t.Logf("Container after transform:\n%v", c)

if _, ok := c.Metrics[0].Metadata["foofoo"]; ok {
t.Fatal("ban_field transformer failed to ban key-value pair")
}
if _, ok := c.Metrics[0].Data["foo"]; ok {
t.Fatal("ban_field transformer failed to ban key-value pair")
}
}
68 changes: 54 additions & 14 deletions transformer/cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,24 @@ import (
)

type Cast struct {
MetadataStrings []string `doc:"List of metadatafields that should be strings"`
MetadataInts []string `doc:"List of metadatafields that should be integers"`
MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"`
MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"`
MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."`
DataStrings []string `doc:"List of datafields that should be strings"`
DataInts []string `doc:"List of datafields that should be integers"`
DataFloats []string `doc:"List of datafields that should be 64-bit floats"`
DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"`
DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."`
MetadataStrings []string `doc:"List of metadatafields that should be strings"`
MetadataInts []string `doc:"List of metadatafields that should be integers"`
MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"`
MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"`
MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."`
DataStrings []string `doc:"List of datafields that should be strings"`
DataInts []string `doc:"List of datafields that should be integers"`
DataFloats []string `doc:"List of datafields that should be 64-bit floats"`
DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"`
DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."`
DataBlobsToStrings []string `doc:"List of datafields containning blob (byte-array) values that should be strings"`
DataStringsToBlobs []string `doc:"List of datafields containing a string that should be blobs (byte-arrays)"`
MetadataBlobsToStrings []string `doc:"List of metadatafields containning blob (byte-array) values that should be strings"`
MetadataStringsToBlobs []string `doc:"List of metadatafields containing a string that should be blobs (byte-arrays)"`
}

// Transform enforces the Cast rules
Expand Down Expand Up @@ -95,6 +99,24 @@ func (cast *Cast) Transform(c *skogul.Container) error {

}
}
for _, value := range cast.DataBlobsToStrings {
if c.Metrics[mi].Data[value] != nil {
_, ok := c.Metrics[mi].Data[value].(string)
if ok {
continue
}
c.Metrics[mi].Data[value] = fmt.Sprintf("%s", c.Metrics[mi].Data[value])
}
}
for _, value := range cast.DataStringsToBlobs {
if c.Metrics[mi].Data[value] != nil {
cpy := fmt.Sprintf("%s", c.Metrics[mi].Data[value])
delete(c.Metrics[mi].Data, value)
c.Metrics[mi].Data[value] = make([]byte, len(cpy))
c.Metrics[mi].Data[value] = []byte(cpy)
}
}

for _, value := range cast.MetadataJson {
if c.Metrics[mi].Metadata[value] != nil {
tmp1, ok := c.Metrics[mi].Metadata[value].(string)
Expand Down Expand Up @@ -232,6 +254,24 @@ func (cast *Cast) Transform(c *skogul.Container) error {
c.Metrics[mi].Metadata[value] = cast.Inet6Aton(s)
}
}

for _, value := range cast.MetadataBlobsToStrings {
if c.Metrics[mi].Metadata[value] != nil {
_, ok := c.Metrics[mi].Metadata[value].(string)
if ok {
continue
}
c.Metrics[mi].Metadata[value] = fmt.Sprintf("%s", c.Metrics[mi].Metadata[value])
}
}
for _, value := range cast.MetadataStringsToBlobs {
if c.Metrics[mi].Metadata[value] != nil {
cpy := fmt.Sprintf("%s", c.Metrics[mi].Metadata[value])
delete(c.Metrics[mi].Metadata, value)
c.Metrics[mi].Metadata[value] = make([]byte, len(cpy))
c.Metrics[mi].Metadata[value] = []byte(cpy)
}
}
}
return nil
}
Expand Down
76 changes: 76 additions & 0 deletions transformer/edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,79 @@ func (replace *Replace) Verify() error {
skogul.Assert(regex != nil)
return nil
}

// ReplaceData is a copy of the Replace transformer but works on the Data field
// Replace executes a regular expression replacement of metric data.
type ReplaceData struct {
Source string `doc:"Data key to read from."`
Destination string `doc:"Data key to write to. Defaults to overwriting the source-key if left blank. Destination key will always be overwritten, e.g., even if the source key is missing, the key located at the destination will be removed."`
Regex string `doc:"Regular expression to match."`
Replacement string `doc:"Replacement text. Can also use $1, $2, etc to reference sub-matches. Defaults to empty string - remove matching items."`
regex *regexp.Regexp
once sync.Once
err error
}

// Transform executes the regular expression replacement
func (replace *ReplaceData) Transform(c *skogul.Container) error {
replace.once.Do(func() {
if replace.Destination == "" {
replace.Destination = replace.Source
}
replace.regex, replace.err = regexp.Compile(replace.Regex)
})
// Verify() should catch this, so there's no reasonable way this
// should happen. But in the off chance that a regex compiles on
// the first attempt but not the second.... (e.g.: some serious
// bugs). It will also catch our own bugs, if, for some reason, we
// manage to botch up Verify() under some corner case.
skogul.Assert(replace.err == nil)

for mi := range c.Metrics {
if c.Metrics[mi].Data == nil {
continue
}
if c.Metrics[mi].Data[replace.Source] == nil {
delete(c.Metrics[mi].Data, replace.Destination)
continue
}
// FIXME: This should be a type cast to allow working with
// both text strings (as per now) and []byte strings.
// Similar to what is done in the ban transformer.
str, ok := c.Metrics[mi].Data[replace.Source].(string)
if !ok {
// FIXME: What to do? It's tempting to copy the
// key, but that could mean multiple references to
// the same memory, which can create unexpected
// behavior if other transformers want to modify
// just one of the headers.
repLog.WithField("source", replace.Source).Printf("Unable to transform non-string field %s with content %v", replace.Source, c.Metrics[mi].Data[replace.Source])
// This is to confirm with the documentation and
// ensure that this isn't exploited by providing a
// bogus Source-field only to be able to provide a
// custom destination field.
delete(c.Metrics[mi].Data, replace.Destination)
continue
}
c.Metrics[mi].Data[replace.Destination] = string(replace.regex.ReplaceAll([]byte(str), []byte(replace.Replacement)))
}
return nil
}

// Verify checks that the required variables are set and that the regular
// expression compiles
func (replace *ReplaceData) Verify() error {
if replace.Source == "" {
return skogul.MissingArgument("Source")
}
if replace.Regex == "" {
return skogul.MissingArgument("Regex")
}
regex, err := regexp.Compile(replace.Regex)

if err != nil {
return fmt.Errorf("replace transformer regex `%s' didn't compile: %w", replace.Regex, err)
}
skogul.Assert(regex != nil)
return nil
}
Loading