diff --git a/README.md b/README.md index e24ab077..be82ffef 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@
- + - +
@@ -76,6 +76,7 @@ - **[Transformers](./docs/transformers.md)** + - Detect [Newly Observed Domains](docs/transformers/transform_newdomaintracker.md) - [Rewrite](docs/transformers/transform_rewrite.md) DNS messages or custom [Relabeling](docs/transformers/transform_relabeling.md) for JSON output - Add additionnal [Tags](docs/transformers/transform_atags.md) in DNS messages - Traffic [Filtering](docs/transformers/transform_trafficfiltering.md) and [Reducer](docs/transformers/transform_trafficreducer.md) diff --git a/docs/_examples/use-case-31.yml b/docs/_examples/use-case-31.yml new file mode 100644 index 00000000..d5256ba1 --- /dev/null +++ b/docs/_examples/use-case-31.yml @@ -0,0 +1,33 @@ +global: + trace: + verbose: true + +pipelines: + - name: tap + dnstap: + listen-ip: 0.0.0.0 + listen-port: 6000 + transforms: + normalize: + qname-lowercase: true + qname-replace-nonprintable: true + routing-policy: + forward: [ detect_new_domain ] + dropped: [ ] + + - name: detect_new_domain + dnsmessage: + matching: + include: + dnstap.operation: "CLIENT_QUERY" + transforms: + new-domain-tracker: + ttl: 3600 + cache-size: 1000 + routing-policy: + forward: [ console ] + dropped: [ ] + + - name: console + stdout: + mode: text \ No newline at end of file diff --git a/docs/collectors/collector_dnsmessage.md b/docs/collectors/collector_dnsmessage.md index d2319c8e..54c30ec3 100644 --- a/docs/collectors/collector_dnsmessage.md +++ b/docs/collectors/collector_dnsmessage.md @@ -71,6 +71,6 @@ Finally a complete full example: atags: tags: [ "TXT:apple", "TXT:google" ] routing-policy: - dropped: [ outputfile ] + forward: [ outputfile ] default: [ console ] ``` \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index ccda96b2..18e1694a 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -7,6 +7,7 @@ You will find below some examples of configurations to manage your DNS logs. - [x] [Advanced example with DNSmessage collector](./_examples/use-case-24.yml) - [x] [How can I log only slow responses and errors?"](./_examples/use-case-25.yml) - [x] [Filter DNStap messages where the response ip address is 0.0.0.0](./_examples/use-case-26.yml) + - [x] [Detect Newly Observed Domains](./_examples/use-case-31.yml) - **Capture DNS traffic from incoming DNSTap streams** - [x] [Read from UNIX DNSTap socket and forward it to TLS stream](./_examples/use-case-5.yml) diff --git a/docs/transformers.md b/docs/transformers.md index c6d3f46e..663fb61a 100644 --- a/docs/transformers.md +++ b/docs/transformers.md @@ -27,4 +27,5 @@ Transformers processing is currently in this order : | [Traffic Prediction](transformers/transform_trafficprediction.md) | Features to train machine learning models | | [Additionnal Tags](transformers/transform_atags.md) | Add additionnal tags | | [JSON relabeling](transformers/transform_relabeling.md) | JSON relabeling to rename or remove keys | -| [DNS message rewrite](transformers/transform_rewrite.md) | Rewrite value for DNS messages structure | +| [DNS message rewrite](transformers/transform_rewrite.md) | Rewrite value for DNS messages structure | +| [Newly Observed Domains](transformers/transform_newdomaintracker.md) | Detect Newly Observed Domains | diff --git a/docs/transformers/transform_newdomaintracker.md b/docs/transformers/transform_newdomaintracker.md new file mode 100644 index 00000000..b31ffdf2 --- /dev/null +++ b/docs/transformers/transform_newdomaintracker.md @@ -0,0 +1,72 @@ +# Transformer: New Domain Tracker Transformer + +The **New Domain Tracker** transformer identifies domains that are newly observed within a configurable time window. It is particularly useful for detecting potentially malicious or suspicious domains in DNS traffic, such as those used for phishing, malware, or botnets. + +## Features + +- **Configurable Time Window**: Define how long a domain is considered new. +- **LRU-based Memory Management**: Ensures efficient memory usage with a finite cache size. +- **Persistence**: Optionally save the domain cache to disk for continuity after restarts. +- **Whitelist Support**: Exclude specific domains or patterns from detection. + +## How It Works + +1. When a DNS query is processed, the transformer checks if the queried domain exists in its cache. +2. If the domain is not in the cache or has not been seen within the specified TTL, it is marked as newly observed. +3. The domain is added to the cache with a timestamp of when it was last seen. +4. Whitelisted domains are ignored and never marked as new. + +## Configuration: + +* `ttl` (integer) + > time window in seconds (e.g., 1 hour) + +* `cache-size` (integer) + > Maximum number of domains to track + +* `white-domains-file` (string) + > path file to domain white list, domains list can be a partial domain name with regexp expression + + +```yaml +transforms: + new-domain-tracker: + ttl: 3600 + cache-size: 100000 + white-domains-file: "" + persistence-file: "" +``` + +## Cache + +The New Domain Tracker uses an **LRU Cache** to manage memory consumption efficiently. You can configure the maximum number of domains stored in the cache using the max_size parameter. Once the cache reaches its maximum size, the least recently used entries will be removed to make room for new ones. +The LRU Cache ensures finite memory usage but may cause some domains to be forgotten if the cache size is too small. + + +## Whitelist + +Example of configuration to load a whitelist of domains to ignore. + +```yaml +transforms: + new-domain-tracker: + white-domains-file: /tmp/whitelist_domain.txt +``` + +Example of content for the file `/tmp/whitelist_domain.txt` + +``` +(mail|wwww).google.com +github.com +``` + +## Persistence + +To ensure continuity across application restarts, you can enable the persistence feature by specifying a file path (persistence). +The transformer will save the domain cache to this file and reload it on startup. + +```yaml +transforms: + new-domain-tracker: + persistence-file: /tmp/nod-state.json +``` \ No newline at end of file diff --git a/go.mod b/go.mod index 0b227028..c6e4b3f9 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/google/uuid v1.6.0 github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b github.com/grafana/loki/v3 v3.2.1 + github.com/hashicorp/golang-lru v0.6.0 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/hpcloud/tail v1.0.0 github.com/influxdata/influxdb-client-go v1.4.0 @@ -92,7 +93,6 @@ require ( github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/go-sockaddr v1.0.6 // indirect github.com/hashicorp/go-uuid v1.0.3 // indirect - github.com/hashicorp/golang-lru v0.6.0 // indirect github.com/hashicorp/memberlist v0.5.0 // indirect github.com/hashicorp/serf v0.10.1 // indirect github.com/huandu/xstrings v1.3.3 // indirect diff --git a/pkgconfig/transformers.go b/pkgconfig/transformers.go index 81ce79c8..a6c83dc3 100644 --- a/pkgconfig/transformers.go +++ b/pkgconfig/transformers.go @@ -95,6 +95,13 @@ type ConfigTransformers struct { Enable bool `yaml:"enable" default:"false"` Identifiers map[string]interface{} `yaml:"identifiers,flow"` } `yaml:"rewrite"` + NewDomainTracker struct { + Enable bool `yaml:"enable" default:"false"` + TTL int `yaml:"ttl" default:"3600"` + CacheSize int `yaml:"cache-size" default:"100000"` + WhiteDomainsFile string `yaml:"white-domains-file" default:""` + PersistenceFile string `yaml:"persistence-file" default:""` + } `yaml:"new-domain-tracker"` } func (c *ConfigTransformers) SetDefault() { diff --git a/tests/testsdata/newdomain_whitelist_regex.txt b/tests/testsdata/newdomain_whitelist_regex.txt new file mode 100644 index 00000000..0196c3c2 --- /dev/null +++ b/tests/testsdata/newdomain_whitelist_regex.txt @@ -0,0 +1,2 @@ +.*\.google\.com +github\.com \ No newline at end of file diff --git a/transformers/newdomaintracker.go b/transformers/newdomaintracker.go new file mode 100644 index 00000000..6e2e80f7 --- /dev/null +++ b/transformers/newdomaintracker.go @@ -0,0 +1,204 @@ +package transformers + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "os" + "regexp" + "strings" + "time" + + "github.com/dmachard/go-dnscollector/dnsutils" + "github.com/dmachard/go-dnscollector/pkgconfig" + "github.com/dmachard/go-logger" + "github.com/hashicorp/golang-lru/v2/expirable" +) + +type NewDomainTracker struct { + ttl time.Duration // Time window to consider a domain as "new" + cache *expirable.LRU[string, struct{}] // Expirable LRU Cache + whitelist map[string]*regexp.Regexp // Whitelisted domains + persistencePath string + logInfo func(msg string, v ...interface{}) + logError func(msg string, v ...interface{}) +} + +func NewNewDomainTracker(ttl time.Duration, maxSize int, whitelist map[string]*regexp.Regexp, persistencePath string, logInfo, logError func(msg string, v ...interface{})) (*NewDomainTracker, error) { + + if ttl <= 0 { + return nil, fmt.Errorf("invalid TTL value: %v", ttl) + } + + cache := expirable.NewLRU[string, struct{}](maxSize, nil, ttl) + + tracker := &NewDomainTracker{ + ttl: ttl, + cache: cache, + whitelist: whitelist, + persistencePath: persistencePath, + logInfo: logInfo, + logError: logError, + } + // Load cache state from disk if persistence is enabled + if persistencePath != "" { + if err := tracker.loadCacheFromDisk(); err != nil { + return nil, fmt.Errorf("failed to load cache state: %w", err) + } + } + + return tracker, nil +} + +func (ndt *NewDomainTracker) isWhitelisted(domain string) bool { + for _, d := range ndt.whitelist { + if d.MatchString(domain) { + return true + } + } + return false +} + +func (ndt *NewDomainTracker) IsNewDomain(domain string) bool { + // Check if the domain is whitelisted + if ndt.isWhitelisted(domain) { + return false + } + + // Check if the domain exists in the cache + if _, exists := ndt.cache.Get(domain); exists { + // Domain was recently seen, not new + return false + } + + // Otherwise, mark the domain as new + ndt.cache.Add(domain, struct{}{}) + return true +} + +func (ndt *NewDomainTracker) SaveCacheToDisk() error { + keys := ndt.cache.Keys() + data, err := json.Marshal(keys) + if err != nil { + return err + } + + return os.WriteFile(ndt.persistencePath, data, 0644) +} + +// loadCacheFromDisk loads the cache state from a file +func (ndt *NewDomainTracker) loadCacheFromDisk() error { + if ndt.persistencePath == "" { + return errors.New("persistence filepath not set") + } + + data, err := os.ReadFile(ndt.persistencePath) + if err != nil { + if os.IsNotExist(err) { + return nil // File does not exist, no previous state to load + } + return err + } + + var keys []string + if err := json.Unmarshal(data, &keys); err != nil { + return err + } + + for _, key := range keys { + ndt.cache.Add(key, struct{}{}) + } + + return nil +} + +// NewDomainTransform is the Transformer for DNS messages +type NewDomainTrackerTransform struct { + GenericTransformer + domainTracker *NewDomainTracker + listDomainsRegex map[string]*regexp.Regexp +} + +// NewNewDomainTransform creates a new instance of the transformer +func NewNewDomainTrackerTransform(config *pkgconfig.ConfigTransformers, logger *logger.Logger, name string, instance int, nextWorkers []chan dnsutils.DNSMessage) *NewDomainTrackerTransform { + t := &NewDomainTrackerTransform{GenericTransformer: NewTransformer(config, logger, "new-domain-tracker", name, instance, nextWorkers)} + t.listDomainsRegex = make(map[string]*regexp.Regexp) + return t +} + +// ReloadConfig reloads the configuration +func (t *NewDomainTrackerTransform) ReloadConfig(config *pkgconfig.ConfigTransformers) { + t.GenericTransformer.ReloadConfig(config) + ttl := time.Duration(config.NewDomainTracker.TTL) * time.Second + t.domainTracker.ttl = ttl + t.LogInfo("new-domain-transformer configuration reloaded") +} + +func (t *NewDomainTrackerTransform) GetTransforms() ([]Subtransform, error) { + subtransforms := []Subtransform{} + if t.config.NewDomainTracker.Enable { + // init whitelist + if err := t.LoadWhiteDomainsList(); err != nil { + return nil, err + } + + // Initialize the domain tracker + ttl := time.Duration(t.config.NewDomainTracker.TTL) * time.Second + maxSize := t.config.NewDomainTracker.CacheSize + tracker, err := NewNewDomainTracker(ttl, maxSize, t.listDomainsRegex, t.config.NewDomainTracker.PersistenceFile, t.LogInfo, t.LogError) + if err != nil { + return nil, err + } + t.domainTracker = tracker + + subtransforms = append(subtransforms, Subtransform{name: "new-domain-tracker:detect", processFunc: t.trackNewDomain}) + } + return subtransforms, nil +} + +func (t *NewDomainTrackerTransform) LoadWhiteDomainsList() error { + // before to start, reset all maps + for key := range t.listDomainsRegex { + delete(t.listDomainsRegex, key) + } + + if len(t.config.NewDomainTracker.WhiteDomainsFile) > 0 { + file, err := os.Open(t.config.NewDomainTracker.WhiteDomainsFile) + if err != nil { + return fmt.Errorf("unable to open regex list file: %w", err) + } else { + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + domain := strings.ToLower(scanner.Text()) + t.listDomainsRegex[domain] = regexp.MustCompile(domain) + } + t.LogInfo("loaded with %d domains in the whitelist", len(t.listDomainsRegex)) + } + } + return nil +} + +// Process processes DNS messages and detects newly observed domains +func (t *NewDomainTrackerTransform) trackNewDomain(dm *dnsutils.DNSMessage) (int, error) { + // Log a warning if the cache is full (before adding the new domain) + if t.domainTracker.cache.Len() == t.config.NewDomainTracker.CacheSize { + return ReturnError, fmt.Errorf("LRU cache is full. Consider increasing cache-size to avoid frequent evictions") + } + + // Check if the domain is newly observed + if t.domainTracker.IsNewDomain(dm.DNS.Qname) { + return ReturnKeep, nil + } + return ReturnDrop, nil +} + +func (t *NewDomainTrackerTransform) Reset() { + if len(t.domainTracker.persistencePath) != 0 { + if err := t.domainTracker.SaveCacheToDisk(); err != nil { + t.LogError("failed to save cache state: %v", err) + } + t.LogInfo("cache content saved on disk with success") + } +} diff --git a/transformers/newdomaintracker_test.go b/transformers/newdomaintracker_test.go new file mode 100644 index 00000000..f63ade2e --- /dev/null +++ b/transformers/newdomaintracker_test.go @@ -0,0 +1,115 @@ +package transformers + +import ( + "testing" + "time" + + "github.com/dmachard/go-dnscollector/dnsutils" + "github.com/dmachard/go-dnscollector/pkgconfig" + "github.com/dmachard/go-logger" +) + +func TestNewDomainTracker_IsNew(t *testing.T) { + // config + config := pkgconfig.GetFakeConfigTransformers() + config.NewDomainTracker.Enable = true + config.NewDomainTracker.TTL = 2 + config.NewDomainTracker.CacheSize = 10 + + outChans := []chan dnsutils.DNSMessage{} + + // init subproccesor + tracker := NewNewDomainTrackerTransform(config, logger.New(false), "test", 0, outChans) + + // init transforms + _, err := tracker.GetTransforms() + if err != nil { + t.Error("fail to init transform", err) + } + + // first send + dm := dnsutils.GetFakeDNSMessage() + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("1. this domain should be new!!") + } + if result, _ := tracker.trackNewDomain(&dm); result != ReturnDrop { + t.Errorf("2. this domain should NOT be new!!") + } + + // wait ttl for expiration + time.Sleep(3 * time.Second) + + // recheck + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("3. this domain should be new!!") + } +} + +func TestNewDomainTracker_Whitelist(t *testing.T) { + // config + config := pkgconfig.GetFakeConfigTransformers() + config.NewDomainTracker.Enable = true + config.NewDomainTracker.TTL = 2 + config.NewDomainTracker.CacheSize = 10 + config.NewDomainTracker.WhiteDomainsFile = "../tests/testsdata/newdomain_whitelist_regex.txt" + + // init subproccesor + outChans := []chan dnsutils.DNSMessage{} + tracker := NewNewDomainTrackerTransform(config, logger.New(false), "test", 0, outChans) + _, err := tracker.GetTransforms() + if err != nil { + t.Error("fail to init transform", err) + } + + // first test, check domain in whilist + dm := dnsutils.GetFakeDNSMessage() + dm.DNS.Qname = testURL1 + if result, _ := tracker.trackNewDomain(&dm); result != ReturnDrop { + t.Errorf("2. this domain should NOT be new!!") + } + + // second test, check domain in whilist + dm = dnsutils.GetFakeDNSMessage() + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("2. this domain should be new!!") + } +} + +func TestNewDomainTracker_LRUCacheFull(t *testing.T) { + // config + config := pkgconfig.GetFakeConfigTransformers() + config.NewDomainTracker.Enable = true + config.NewDomainTracker.TTL = 2 + config.NewDomainTracker.CacheSize = 1 + + outChans := []chan dnsutils.DNSMessage{} + + // init subproccesor + tracker := NewNewDomainTrackerTransform(config, logger.New(false), "test", 0, outChans) + + // init transforms + _, err := tracker.GetTransforms() + if err != nil { + t.Error("fail to init transform", err) + } + + // Send the first domain + dm := dnsutils.GetFakeDNSMessage() + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("This domain should be new!") + } + + // Send the same domain again (should return an error because cache is full) + result, _ := tracker.trackNewDomain(&dm) + if result != ReturnError { + t.Errorf("Cache full check failed, expected ReturnError") + } + + // Wait for TTL expiration + time.Sleep(4 * time.Second) + + // Retry the domain after TTL expiration (should be considered new again) + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("recheck, this domain should be new!!") + } +} diff --git a/transformers/transformers.go b/transformers/transformers.go index 915c4886..808bcc67 100644 --- a/transformers/transformers.go +++ b/transformers/transformers.go @@ -9,8 +9,9 @@ import ( ) var ( - ReturnKeep = 1 - ReturnDrop = 2 + ReturnError = 0 + ReturnKeep = 1 + ReturnDrop = 2 ) type Subtransform struct { @@ -63,8 +64,9 @@ type Transforms struct { name string instance int - availableTransforms []TransformEntry - activeTransforms []func(dm *dnsutils.DNSMessage) (int, error) + availableTransforms []TransformEntry + activeTransforms []TransformEntry + activeProcessTransforms []func(dm *dnsutils.DNSMessage) (int, error) } func NewTransforms(config *pkgconfig.ConfigTransformers, logger *logger.Logger, name string, nextWorkers []chan dnsutils.DNSMessage, instance int) Transforms { @@ -84,6 +86,7 @@ func NewTransforms(config *pkgconfig.ConfigTransformers, logger *logger.Logger, d.availableTransforms = append(d.availableTransforms, TransformEntry{NewLatencyTransform(config, logger, name, instance, nextWorkers)}) d.availableTransforms = append(d.availableTransforms, TransformEntry{NewDNSGeoIPTransform(config, logger, name, instance, nextWorkers)}) d.availableTransforms = append(d.availableTransforms, TransformEntry{NewRewriteTransform(config, logger, name, instance, nextWorkers)}) + d.availableTransforms = append(d.availableTransforms, TransformEntry{NewNewDomainTrackerTransform(config, logger, name, instance, nextWorkers)}) d.Prepare() return d @@ -101,17 +104,22 @@ func (p *Transforms) ReloadConfig(config *pkgconfig.ConfigTransformers) { func (p *Transforms) Prepare() error { // clean the slice + p.activeProcessTransforms = p.activeProcessTransforms[:0] p.activeTransforms = p.activeTransforms[:0] tranformsList := []string{} for _, transform := range p.availableTransforms { subtransforms, err := transform.GetTransforms() if err != nil { - p.LogError("error on init subtransforms:", err) + p.LogError("error on init subtransforms: %v", err) continue } + if len(subtransforms) > 0 { + p.activeTransforms = append(p.activeTransforms, transform) + } for _, subtransform := range subtransforms { - p.activeTransforms = append(p.activeTransforms, subtransform.processFunc) + p.activeProcessTransforms = append(p.activeProcessTransforms, subtransform.processFunc) + tranformsList = append(tranformsList, subtransform.name) } } @@ -123,7 +131,7 @@ func (p *Transforms) Prepare() error { } func (p *Transforms) Reset() { - for _, transform := range p.availableTransforms { + for _, transform := range p.activeTransforms { transform.Reset() } } @@ -138,7 +146,7 @@ func (p *Transforms) LogError(msg string, v ...interface{}) { } func (p *Transforms) ProcessMessage(dm *dnsutils.DNSMessage) (int, error) { - for _, transform := range p.activeTransforms { + for _, transform := range p.activeProcessTransforms { if result, err := transform(dm); err != nil { return ReturnKeep, fmt.Errorf("error on transform processing: %v", err.Error()) } else if result == ReturnDrop { diff --git a/workers/dnsmessage.go b/workers/dnsmessage.go index d338e052..2641573f 100644 --- a/workers/dnsmessage.go +++ b/workers/dnsmessage.go @@ -182,6 +182,7 @@ func (w *DNSMessage) StartCollect() { for { select { case <-w.OnStop(): + subprocessors.Reset() return // save the new config