From 5ab06563957dd8b08f32e90f16f049c4c2228fcf Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Thu, 16 Jan 2025 09:18:50 +0100 Subject: [PATCH 1/8] moderator plugin --- examples/test_prompt_moderation.sh | 222 ++++++++++++++++++++++++ pkg/plugins/manager.go | 5 + pkg/plugins/prompt_moderation/plugin.go | 219 +++++++++++++++++++++++ 3 files changed, 446 insertions(+) create mode 100755 examples/test_prompt_moderation.sh create mode 100644 pkg/plugins/prompt_moderation/plugin.go diff --git a/examples/test_prompt_moderation.sh b/examples/test_prompt_moderation.sh new file mode 100755 index 0000000..92c7153 --- /dev/null +++ b/examples/test_prompt_moderation.sh @@ -0,0 +1,222 @@ +#!/bin/bash + +# Colors for output +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' + +# Use environment variables or defaults +ADMIN_URL=${ADMIN_URL:-"http://localhost:8080/api/v1"} +PROXY_URL=${PROXY_URL:-"http://localhost:8081"} +BASE_DOMAIN=${BASE_DOMAIN:-"example.com"} +SUBDOMAIN="prompt-mod-$(date +%s)" + +echo -e "${GREEN}Testing Prompt Moderation${NC}\n" + +# 1. Create a gateway with prompt moderation plugin +echo -e "${GREEN}1. Creating gateway with prompt moderation plugin...${NC}" +GATEWAY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Prompt Moderation Gateway", + "subdomain": "'$SUBDOMAIN'", + "required_plugins": [ + { + "name": "prompt_moderation", + "enabled": true, + "stage": "pre_request", + "priority": 1, + "settings": { + "similarity_threshold": 0.5, + "keywords": [ + "hack", + "exploit", + "vulnerability" + ], + "regex": [ + "password.*dump", + "sql.*injection", + "CVE-\\d{4}-\\d{4,7}" + ], + "actions": { + "type": "block", + "message": "Content blocked due to prohibited content: %s" + } + } + } + ] +}') + +# Extract gateway details +GATEWAY_ID=$(echo $GATEWAY_RESPONSE | jq -r '.id') +SUBDOMAIN=$(echo $GATEWAY_RESPONSE | jq -r '.subdomain') + +if [ "$GATEWAY_ID" == "null" ] || [ -z "$GATEWAY_ID" ]; then + echo -e "${RED}Failed to create gateway. Response: $GATEWAY_RESPONSE${NC}" + exit 1 +fi + +echo "Gateway created with ID: $GATEWAY_ID" + +# Create API key +echo -e "\n${GREEN}2. Creating API key...${NC}" +API_KEY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/keys" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Test Key", + "expires_at": "2026-01-01T00:00:00Z" +}') + +API_KEY=$(echo $API_KEY_RESPONSE | jq -r '.key') + +if [ "$API_KEY" == "null" ] || [ -z "$API_KEY" ]; then + echo -e "${RED}Failed to create API key. Response: $API_KEY_RESPONSE${NC}" + exit 1 +fi + +echo "API Key created: $API_KEY" + +# Create upstream +echo -e "\n${GREEN}3. Creating upstream...${NC}" +UPSTREAM_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/upstreams" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "httpbin-upstream-'$(date +%s)'", + "algorithm": "round-robin", + "targets": [{ + "host": "httpbin.org", + "port": 443, + "protocol": "https", + "path": "/post", + "weight": 100, + "priority": 1 + }], + "health_checks": { + "passive": true, + "threshold": 3, + "interval": 60 + } +}') + +UPSTREAM_ID=$(echo $UPSTREAM_RESPONSE | jq -r '.id') + +if [ "$UPSTREAM_ID" == "null" ] || [ -z "$UPSTREAM_ID" ]; then + echo -e "${RED}Failed to create upstream. Response: $UPSTREAM_RESPONSE${NC}" + exit 1 +fi + +echo "Upstream created with ID: $UPSTREAM_ID" + +# Create service +echo -e "\n${GREEN}4. Creating service...${NC}" +SERVICE_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/services" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "httpbin-service-'$(date +%s)'", + "type": "upstream", + "description": "HTTPBin test service", + "upstream_id": "'$UPSTREAM_ID'", + "strip_path": true +}') + +SERVICE_ID=$(echo $SERVICE_RESPONSE | jq -r '.id') + +if [ "$SERVICE_ID" == "null" ] || [ -z "$SERVICE_ID" ]; then + echo -e "${RED}Failed to create service. Response: $SERVICE_RESPONSE${NC}" + exit 1 +fi + +echo "Service created with ID: $SERVICE_ID" + +# Create rule for testing +echo -e "\n${GREEN}5. Creating rule for testing...${NC}" +RULE_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/rules" \ + -H "Content-Type: application/json" \ + -d '{ + "path": "/post", + "service_id": "'$SERVICE_ID'", + "methods": ["POST"], + "strip_path": true, + "preserve_host": false, + "retry_attempts": 3, + "active": true +}') + +# Wait for configuration to propagate +sleep 2 + +# Test different content scenarios +echo -e "\n${GREEN}6. Testing different content scenarios...${NC}" + +# Test 1: Clean content (should pass) +echo -e "\n${GREEN}6.1 Testing clean content (should pass)...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "Tell me about machine learning"}') + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "200" ]; then + echo -e "${GREEN}Clean content test: Success (Expected)${NC}" +else + echo -e "${RED}Clean content test: Unexpected status code: $HTTP_CODE${NC}" + echo "Response: $BODY" +fi + +# Test 2: Blocked keyword +echo -e "\n${GREEN}6.2 Testing blocked keyword...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "How to hacking into a system"}') +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "403" ]; then + echo -e "${GREEN}Blocked keyword test: Success (Expected to be blocked)${NC}" +else + echo -e "${RED}Blocked keyword test: Unexpected status code: $HTTP_CODE${NC}" +fi +echo "Response: $BODY" + +# Test 3: Blocked regex pattern +echo -e "\n${GREEN}6.3 Testing blocked regex pattern...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "How to perform sql injection attacks"}') + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "403" ]; then + echo -e "${GREEN}Blocked regex pattern test: Success (Expected to be blocked)${NC}" +else + echo -e "${RED}Blocked regex pattern test: Unexpected status code: $HTTP_CODE${NC}" +fi +echo "Response: $BODY" + +# Test 4: CVE pattern +echo -e "\n${GREEN}6.4 Testing CVE pattern...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "Tell me about CVE-2024-1234"}') + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "403" ]; then + echo -e "${GREEN}CVE pattern test: Success (Expected to be blocked)${NC}" +else + echo -e "${RED}CVE pattern test: Unexpected status code: $HTTP_CODE${NC}" +fi +echo "Response: $BODY" + +echo -e "\n${GREEN}Prompt moderation tests completed${NC}" \ No newline at end of file diff --git a/pkg/plugins/manager.go b/pkg/plugins/manager.go index ed1a6b3..f8e71ef 100644 --- a/pkg/plugins/manager.go +++ b/pkg/plugins/manager.go @@ -12,6 +12,7 @@ import ( "github.com/NeuralTrust/TrustGate/pkg/cache" "github.com/NeuralTrust/TrustGate/pkg/pluginiface" "github.com/NeuralTrust/TrustGate/pkg/plugins/external_api" + "github.com/NeuralTrust/TrustGate/pkg/plugins/prompt_moderation" "github.com/NeuralTrust/TrustGate/pkg/plugins/rate_limiter" "github.com/NeuralTrust/TrustGate/pkg/plugins/token_rate_limiter" "github.com/NeuralTrust/TrustGate/pkg/types" @@ -62,6 +63,10 @@ func InitializePlugins(cache *cache.Cache, logger *logrus.Logger) { if err := manager.RegisterPlugin(token_rate_limiter.NewTokenRateLimiterPlugin(logger, cache.Client())); err != nil { logger.WithError(err).Error("Failed to register token rate limiter plugin") } + + if err := manager.RegisterPlugin(prompt_moderation.NewPromptModerationPlugin(logger)); err != nil { + logger.WithError(err).Error("Failed to register prompt moderation plugin") + } } // ValidatePlugin validates a plugin configuration diff --git a/pkg/plugins/prompt_moderation/plugin.go b/pkg/plugins/prompt_moderation/plugin.go new file mode 100644 index 0000000..76c8662 --- /dev/null +++ b/pkg/plugins/prompt_moderation/plugin.go @@ -0,0 +1,219 @@ +package prompt_moderation + +import ( + "context" + "fmt" + "regexp" + "strings" + + "github.com/mitchellh/mapstructure" + "github.com/sirupsen/logrus" + + "github.com/NeuralTrust/TrustGate/pkg/pluginiface" + "github.com/NeuralTrust/TrustGate/pkg/types" +) + +const ( + PluginName = "prompt_moderation" + // Similarity threshold (0-1), where 1 means exact match + SimilarityThreshold = 0.8 +) + +type PromptModerationPlugin struct { + logger *logrus.Logger + keywords []string + regexRules []*regexp.Regexp +} + +type Config struct { + Keywords []string `mapstructure:"keywords"` + Regex []string `mapstructure:"regex"` + Actions struct { + Type string `mapstructure:"type"` + Message string `mapstructure:"message"` + } `mapstructure:"actions"` + SimilarityThreshold float64 `mapstructure:"similarity_threshold"` +} + +// levenshteinDistance calculates the minimum number of single-character edits required to change one word into another +func levenshteinDistance(s1, s2 string) int { + s1 = strings.ToLower(s1) + s2 = strings.ToLower(s2) + + if len(s1) == 0 { + return len(s2) + } + if len(s2) == 0 { + return len(s1) + } + + matrix := make([][]int, len(s1)+1) + for i := range matrix { + matrix[i] = make([]int, len(s2)+1) + matrix[i][0] = i + } + for j := range matrix[0] { + matrix[0][j] = j + } + + for i := 1; i <= len(s1); i++ { + for j := 1; j <= len(s2); j++ { + cost := 1 + if s1[i-1] == s2[j-1] { + cost = 0 + } + matrix[i][j] = min( + matrix[i-1][j]+1, + matrix[i][j-1]+1, + matrix[i-1][j-1]+cost, + ) + } + } + return matrix[len(s1)][len(s2)] +} + +// min returns the minimum of three integers +func min(a, b, c int) int { + if a < b { + if a < c { + return a + } + return c + } + if b < c { + return b + } + return c +} + +// calculateSimilarity returns a similarity score between 0 and 1 +func calculateSimilarity(s1, s2 string) float64 { + distance := levenshteinDistance(s1, s2) + maxLen := float64(max(len(s1), len(s2))) + if maxLen == 0 { + return 1.0 + } + return 1.0 - float64(distance)/maxLen +} + +// max returns the maximum of two integers +func max(a, b int) int { + if a > b { + return a + } + return b +} + +// findSimilarKeyword checks if any word in the text is similar to the blocked keywords +func (p *PromptModerationPlugin) findSimilarKeyword(text string, threshold float64) (string, string, bool) { + words := strings.Fields(text) + for _, word := range words { + for _, keyword := range p.keywords { + similarity := calculateSimilarity(word, keyword) + if similarity >= threshold { + return word, keyword, true + } + } + } + return "", "", false +} + +func NewPromptModerationPlugin(logger *logrus.Logger) pluginiface.Plugin { + return &PromptModerationPlugin{ + logger: logger, + keywords: make([]string, 0), + regexRules: make([]*regexp.Regexp, 0), + } +} + +func (p *PromptModerationPlugin) Name() string { + return PluginName +} + +func (p *PromptModerationPlugin) Stages() []types.Stage { + return []types.Stage{types.PreRequest} +} + +func (p *PromptModerationPlugin) AllowedStages() []types.Stage { + return []types.Stage{types.PreRequest} +} + +type PromptModerationValidator struct{} + +func (v *PromptModerationValidator) ValidateConfig(config types.PluginConfig) error { + var cfg Config + if err := mapstructure.Decode(config.Settings, &cfg); err != nil { + return fmt.Errorf("failed to decode config: %v", err) + } + + // Validate keywords + if len(cfg.Keywords) == 0 && len(cfg.Regex) == 0 { + return fmt.Errorf("at least one keyword or regex pattern must be specified") + } + + // Validate regex patterns + for _, pattern := range cfg.Regex { + if _, err := regexp.Compile(pattern); err != nil { + return fmt.Errorf("invalid regex pattern '%s': %v", pattern, err) + } + } + + // Validate actions + if cfg.Actions.Type == "" { + return fmt.Errorf("action type must be specified") + } + + return nil +} + +func (p *PromptModerationPlugin) Execute(ctx context.Context, cfg types.PluginConfig, req *types.RequestContext, resp *types.ResponseContext) (*types.PluginResponse, error) { + var config Config + if err := mapstructure.Decode(cfg.Settings, &config); err != nil { + return nil, fmt.Errorf("failed to decode config: %v", err) + } + + threshold := config.SimilarityThreshold + if threshold == 0 { + threshold = SimilarityThreshold + } + + // Initialize or update rules + p.keywords = config.Keywords + p.regexRules = make([]*regexp.Regexp, len(config.Regex)) + for i, pattern := range config.Regex { + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile regex pattern '%s': %v", pattern, err) + } + p.regexRules[i] = regex + } + + // Check request body for keywords and patterns + content := string(req.Body) + + // Check for similar keywords + if foundWord, keyword, found := p.findSimilarKeyword(content, threshold); found { + return nil, &types.PluginError{ + StatusCode: 403, + Message: fmt.Sprintf(config.Actions.Message+" (similar to '%s')", foundWord, keyword), + Err: fmt.Errorf("word '%s' is similar to blocked keyword '%s'", foundWord, keyword), + } + } + + // Check regex patterns + for _, regex := range p.regexRules { + if regex.MatchString(content) { + return nil, &types.PluginError{ + StatusCode: 403, + Message: fmt.Sprintf(config.Actions.Message, regex.String()), + Err: fmt.Errorf("regex pattern %s found in request body", regex.String()), + } + } + } + + // No matches found, allow the request to proceed + return &types.PluginResponse{ + StatusCode: 200, + Message: "Request allowed", + }, nil +} From 6436ff7973eead2d71df7a552381ce7a39ee09e9 Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Thu, 16 Jan 2025 14:04:39 +0100 Subject: [PATCH 2/8] Added data masking plugin and test --- examples/test_data_masking.sh | 195 +++++++++++++++++++++++ pkg/plugins/data_masking/plugin.go | 241 +++++++++++++++++++++++++++++ 2 files changed, 436 insertions(+) create mode 100755 examples/test_data_masking.sh create mode 100644 pkg/plugins/data_masking/plugin.go diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh new file mode 100755 index 0000000..df07393 --- /dev/null +++ b/examples/test_data_masking.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +# Colors for output +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' + +# Use environment variables or defaults +ADMIN_URL=${ADMIN_URL:-"http://localhost:8080/api/v1"} +PROXY_URL=${PROXY_URL:-"http://localhost:8081"} +BASE_DOMAIN=${BASE_DOMAIN:-"example.com"} +SUBDOMAIN="datamask-$(date +%s)" + +echo -e "${GREEN}Testing Data Masking Plugin${NC}\n" + +# 1. Create a gateway with data masking plugin +echo -e "${GREEN}1. Creating gateway with data masking plugin...${NC}" +GATEWAY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Data Masking Gateway", + "subdomain": "'$SUBDOMAIN'", + "required_plugins": [ + { + "name": "data_masking", + "enabled": true, + "stage": "pre_response", + "priority": 1, + "settings": { + "rules": [ + { + "pattern": "credit_card", + "type": "keyword", + "mask_with": "****", + "preserve_len": true + }, + { + "pattern": "\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{4}\\b", + "type": "regex", + "mask_with": "X", + "preserve_len": true + }, + { + "pattern": "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b", + "type": "regex", + "mask_with": "[MASKED_EMAIL]", + "preserve_len": false + } + ] + } + } + ] +}') + +# Extract gateway details +GATEWAY_ID=$(echo $GATEWAY_RESPONSE | jq -r '.id') +SUBDOMAIN=$(echo $GATEWAY_RESPONSE | jq -r '.subdomain') + +if [ "$GATEWAY_ID" == "null" ] || [ -z "$GATEWAY_ID" ]; then + echo -e "${RED}Failed to create gateway. Response: $GATEWAY_RESPONSE${NC}" + exit 1 +fi + +echo "Gateway created with ID: $GATEWAY_ID" + +# Create API key +echo -e "\n${GREEN}2. Creating API key...${NC}" +API_KEY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/keys" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Test Key", + "expires_at": "2026-01-01T00:00:00Z" +}') + +API_KEY=$(echo $API_KEY_RESPONSE | jq -r '.key') + +if [ "$API_KEY" == "null" ] || [ -z "$API_KEY" ]; then + echo -e "${RED}Failed to create API key. Response: $API_KEY_RESPONSE${NC}" + exit 1 +fi + +echo "API Key created: $API_KEY" + +# Create upstream +echo -e "\n${GREEN}3. Creating upstream...${NC}" +UPSTREAM_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/upstreams" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "echo-upstream-'$(date +%s)'", + "algorithm": "round-robin", + "targets": [{ + "host": "postman-echo.com", + "port": 443, + "protocol": "https", + "weight": 100, + "priority": 1 + }], + "health_checks": { + "passive": true, + "threshold": 3, + "interval": 60 + } +}') + +UPSTREAM_ID=$(echo $UPSTREAM_RESPONSE | jq -r '.id') + +if [ "$UPSTREAM_ID" == "null" ] || [ -z "$UPSTREAM_ID" ]; then + echo -e "${RED}Failed to create upstream. Response: $UPSTREAM_RESPONSE${NC}" + exit 1 +fi + +echo "Upstream created with ID: $UPSTREAM_ID" + +# Create service +echo -e "\n${GREEN}4. Creating service...${NC}" +SERVICE_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/services" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "echo-service-'$(date +%s)'", + "type": "upstream", + "description": "Echo test service", + "upstream_id": "'$UPSTREAM_ID'" +}') + +SERVICE_ID=$(echo $SERVICE_RESPONSE | jq -r '.id') + +if [ "$SERVICE_ID" == "null" ] || [ -z "$SERVICE_ID" ]; then + echo -e "${RED}Failed to create service. Response: $SERVICE_RESPONSE${NC}" + exit 1 +fi + +echo "Service created with ID: $SERVICE_ID" + +# Create rule for testing +echo -e "\n${GREEN}5. Creating rule for testing...${NC}" +RULE_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/rules" \ + -H "Content-Type: application/json" \ + -d '{ + "path": "/post", + "service_id": "'$SERVICE_ID'", + "methods": ["POST"], + "strip_path": false, + "active": true +}') + +# Wait for configuration to propagate +sleep 2 + +# Test data masking +echo -e "\n${GREEN}6. Testing data masking...${NC}" + +# Test keyword masking +echo -e "\n${GREEN}6.1 Testing keyword masking...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "My credit_card number is 4111-2222-3333-4444", + "email": "test@example.com", + "notes": "This is a test message" + }') + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "200" ]; then + echo -e "${GREEN}Request successful${NC}" + echo "Response body:" + echo "$BODY" | jq '.' + + # Check if masking worked + if echo "$BODY" | grep -q "credit_card"; then + echo -e "${RED}WARNING: Keyword 'credit_card' was not masked${NC}" + else + echo -e "${GREEN}Keyword masking successful${NC}" + fi + + if echo "$BODY" | grep -q "4111-2222-3333-4444"; then + echo -e "${RED}WARNING: Credit card number was not masked${NC}" + else + echo -e "${GREEN}Credit card number masking successful${NC}" + fi + + if echo "$BODY" | grep -q "test@example.com"; then + echo -e "${RED}WARNING: Email was not masked${NC}" + else + echo -e "${GREEN}Email masking successful${NC}" + fi +else + echo -e "${RED}Request failed with status code: $HTTP_CODE${NC}" + echo "Response: $BODY" +fi + +echo -e "\n${GREEN}Data masking tests completed${NC}" \ No newline at end of file diff --git a/pkg/plugins/data_masking/plugin.go b/pkg/plugins/data_masking/plugin.go new file mode 100644 index 0000000..5bbbf8f --- /dev/null +++ b/pkg/plugins/data_masking/plugin.go @@ -0,0 +1,241 @@ +package data_masking + +import ( + "context" + "fmt" + "regexp" + "strings" + + "github.com/mitchellh/mapstructure" + "github.com/sirupsen/logrus" + + "github.com/NeuralTrust/TrustGate/pkg/pluginiface" + "github.com/NeuralTrust/TrustGate/pkg/types" +) + +const ( + PluginName = "data_masking" + DefaultMaskChar = "*" + SimilarityThreshold = 0.8 +) + +type DataMaskingPlugin struct { + logger *logrus.Logger + keywords map[string]string // map of keyword to mask value + regexRules map[string]*regexp.Regexp // map of regex pattern to mask value +} + +type Config struct { + Rules []Rule `mapstructure:"rules"` + SimilarityThreshold float64 `mapstructure:"similarity_threshold"` +} + +type Rule struct { + Pattern string `mapstructure:"pattern"` // Keyword or regex pattern + Type string `mapstructure:"type"` // "keyword" or "regex" + MaskWith string `mapstructure:"mask_with"` // Character or string to mask with + PreserveLen bool `mapstructure:"preserve_len"` // Whether to preserve the length of masked content +} + +// levenshteinDistance calculates the minimum number of single-character edits required to change one word into another +func levenshteinDistance(s1, s2 string) int { + s1 = strings.ToLower(s1) + s2 = strings.ToLower(s2) + + if len(s1) == 0 { + return len(s2) + } + if len(s2) == 0 { + return len(s1) + } + + matrix := make([][]int, len(s1)+1) + for i := range matrix { + matrix[i] = make([]int, len(s2)+1) + matrix[i][0] = i + } + for j := range matrix[0] { + matrix[0][j] = j + } + + for i := 1; i <= len(s1); i++ { + for j := 1; j <= len(s2); j++ { + cost := 1 + if s1[i-1] == s2[j-1] { + cost = 0 + } + matrix[i][j] = min( + matrix[i-1][j]+1, + matrix[i][j-1]+1, + matrix[i-1][j-1]+cost, + ) + } + } + return matrix[len(s1)][len(s2)] +} + +// min returns the minimum of three integers +func min(a, b, c int) int { + if a < b { + if a < c { + return a + } + return c + } + if b < c { + return b + } + return c +} + +// calculateSimilarity returns a similarity score between 0 and 1 +func calculateSimilarity(s1, s2 string) float64 { + distance := levenshteinDistance(s1, s2) + maxLen := float64(max(len(s1), len(s2))) + if maxLen == 0 { + return 1.0 + } + return 1.0 - float64(distance)/maxLen +} + +// max returns the maximum of two integers +func max(a, b int) int { + if a > b { + return a + } + return b +} + +// findSimilarKeyword checks if any word in the text is similar to the keywords +func (p *DataMaskingPlugin) findSimilarKeyword(text string, threshold float64) (string, string, string, bool) { + words := strings.Fields(text) + for _, word := range words { + for keyword, maskWith := range p.keywords { + similarity := calculateSimilarity(word, keyword) + if similarity >= threshold { + return word, keyword, maskWith, true + } + } + } + return "", "", "", false +} + +func NewDataMaskingPlugin(logger *logrus.Logger) pluginiface.Plugin { + return &DataMaskingPlugin{ + logger: logger, + keywords: make(map[string]string), + regexRules: make(map[string]*regexp.Regexp), + } +} + +func (p *DataMaskingPlugin) Name() string { + return PluginName +} + +func (p *DataMaskingPlugin) Stages() []types.Stage { + return []types.Stage{types.PreResponse} +} + +func (p *DataMaskingPlugin) AllowedStages() []types.Stage { + return []types.Stage{types.PreResponse} +} + +type DataMaskingValidator struct{} + +func (v *DataMaskingValidator) ValidateConfig(config types.PluginConfig) error { + var cfg Config + if err := mapstructure.Decode(config.Settings, &cfg); err != nil { + return fmt.Errorf("failed to decode config: %v", err) + } + + if len(cfg.Rules) == 0 { + return fmt.Errorf("at least one masking rule must be specified") + } + + for _, rule := range cfg.Rules { + if rule.Type != "keyword" && rule.Type != "regex" { + return fmt.Errorf("invalid rule type '%s': must be 'keyword' or 'regex'", rule.Type) + } + + if rule.Type == "regex" { + if _, err := regexp.Compile(rule.Pattern); err != nil { + return fmt.Errorf("invalid regex pattern '%s': %v", rule.Pattern, err) + } + } + + if rule.MaskWith == "" { + return fmt.Errorf("mask_with value must be specified for each rule") + } + } + + return nil +} + +func (p *DataMaskingPlugin) maskContent(content string, pattern string, maskWith string, preserveLen bool) string { + if preserveLen { + mask := strings.Repeat(maskWith[0:1], len(content)) + return mask + } + return maskWith +} + +func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, req *types.RequestContext, resp *types.ResponseContext) (*types.PluginResponse, error) { + var config Config + if err := mapstructure.Decode(cfg.Settings, &config); err != nil { + return nil, fmt.Errorf("failed to decode config: %v", err) + } + + threshold := config.SimilarityThreshold + if threshold == 0 { + threshold = SimilarityThreshold + } + + // Initialize rules + p.keywords = make(map[string]string) + p.regexRules = make(map[string]*regexp.Regexp) + + for _, rule := range config.Rules { + maskValue := rule.MaskWith + if maskValue == "" { + maskValue = DefaultMaskChar + } + + if rule.Type == "keyword" { + p.keywords[rule.Pattern] = maskValue + } else if rule.Type == "regex" { + regex, err := regexp.Compile(rule.Pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile regex pattern '%s': %v", rule.Pattern, err) + } + p.regexRules[maskValue] = regex + } + } + + // Get response content + content := string(resp.Body) + maskedContent := content + + // Apply fuzzy keyword masking + for { + foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) + if !found { + break + } + maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) + } + + // Apply regex masking + for maskWith, regex := range p.regexRules { + maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { + return p.maskContent(match, regex.String(), maskWith, true) + }) + } + + // Update response with masked content + resp.Body = []byte(maskedContent) + + return &types.PluginResponse{ + StatusCode: 200, + Message: "Content masked successfully", + }, nil +} From 4d610545a77d5e640e1988dd24450d57bc823941 Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Fri, 17 Jan 2025 15:30:07 +0100 Subject: [PATCH 3/8] pre-defined masked entities --- examples/test_data_masking.sh | 148 +++++++++++++++++++------ pkg/plugins/data_masking/plugin.go | 168 +++++++++++++++++++++++++---- 2 files changed, 262 insertions(+), 54 deletions(-) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index df07393..5e99d34 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -27,24 +27,51 @@ GATEWAY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways" \ "stage": "pre_response", "priority": 1, "settings": { - "rules": [ + "similarity_threshold": 0.8, + "predefined_entities": [ { - "pattern": "credit_card", - "type": "keyword", - "mask_with": "****", - "preserve_len": true + "entity": "credit_card", + "enabled": true, + "mask_with": "[MASKED_CC]", + "preserve_len": false }, { - "pattern": "\\b\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{4}[-\\s]?\\d{4}\\b", - "type": "regex", - "mask_with": "X", - "preserve_len": true + "entity": "email", + "enabled": true, + "mask_with": "[MASKED_EMAIL]", + "preserve_len": false }, { - "pattern": "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b", - "type": "regex", - "mask_with": "[MASKED_EMAIL]", + "entity": "iban", + "enabled": true, + "mask_with": "[MASKED_IBAN]", + "preserve_len": false + }, + { + "entity": "swift_bic", + "enabled": true, + "mask_with": "[MASKED_BIC]", "preserve_len": false + }, + { + "entity": "crypto_wallet", + "enabled": true, + "mask_with": "[MASKED_WALLET]", + "preserve_len": false + }, + { + "entity": "tax_id", + "enabled": true, + "mask_with": "[MASKED_TAX_ID]", + "preserve_len": true + } + ], + "rules": [ + { + "pattern": "secret_key", + "type": "keyword", + "mask_with": "****", + "preserve_len": true } ] } @@ -149,16 +176,21 @@ sleep 2 # Test data masking echo -e "\n${GREEN}6. Testing data masking...${NC}" -# Test keyword masking -echo -e "\n${GREEN}6.1 Testing keyword masking...${NC}" +# Test all masking patterns +echo -e "\n${GREEN}6.1 Testing all masking patterns...${NC}" RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ -H "X-API-Key: ${API_KEY}" \ -H "Content-Type: application/json" \ -d '{ - "message": "My credit_card number is 4111-2222-3333-4444", + "credit_card": "4111-2222-3333-4444", "email": "test@example.com", - "notes": "This is a test message" + "iban": "DE89370400440532013000", + "swift_bic": "DEUTDEFF500", + "crypto_wallet": "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa", + "tax_id": "12-3456789", + "secret_key": "this_is_secret", + "similar_secrets": "secret_keys_here" }') HTTP_CODE=$(echo "$RESPONSE" | tail -n1) @@ -169,24 +201,76 @@ if [ "$HTTP_CODE" == "200" ]; then echo "Response body:" echo "$BODY" | jq '.' - # Check if masking worked - if echo "$BODY" | grep -q "credit_card"; then - echo -e "${RED}WARNING: Keyword 'credit_card' was not masked${NC}" - else - echo -e "${GREEN}Keyword masking successful${NC}" - fi + # Check each pattern + PATTERNS=( + "4111-2222-3333-4444" + "test@example.com" + "DE89370400440532013000" + "DEUTDEFF500" + "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa" + "12-3456789" + "this_is_secret" + "secret_keys_here" + ) + + for pattern in "${PATTERNS[@]}"; do + if echo "$BODY" | grep -q "$pattern"; then + echo -e "${RED}WARNING: Pattern '$pattern' was not masked${NC}" + else + echo -e "${GREEN}Successfully masked: $pattern${NC}" + fi + done + + # Verify masked values are present + MASKS=( + "[MASKED_CC]" + "[MASKED_EMAIL]" + "[MASKED_IBAN]" + "[MASKED_BIC]" + "[MASKED_WALLET]" + "[MASKED_TAX_ID]" + ) - if echo "$BODY" | grep -q "4111-2222-3333-4444"; then - echo -e "${RED}WARNING: Credit card number was not masked${NC}" - else - echo -e "${GREEN}Credit card number masking successful${NC}" - fi + for mask in "${MASKS[@]}"; do + if echo "$BODY" | grep -q "$mask"; then + echo -e "${GREEN}Found expected mask: $mask${NC}" + else + echo -e "${RED}WARNING: Expected mask '$mask' not found${NC}" + fi + done +else + echo -e "${RED}Request failed with status code: $HTTP_CODE${NC}" + echo "Response: $BODY" +fi + +# Test fuzzy matching +echo -e "\n${GREEN}6.2 Testing fuzzy matching...${NC}" +RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ + -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ + -H "X-API-Key: ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "message": "my sekret_key and secret-key should be masked", + "notes": "Testing fuzzy matching" + }') + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | head -n1) + +if [ "$HTTP_CODE" == "200" ]; then + echo -e "${GREEN}Request successful${NC}" + echo "Response body:" + echo "$BODY" | jq '.' - if echo "$BODY" | grep -q "test@example.com"; then - echo -e "${RED}WARNING: Email was not masked${NC}" - else - echo -e "${GREEN}Email masking successful${NC}" - fi + # Check fuzzy matches + FUZZY_TERMS=("sekret_key" "secret-key") + for term in "${FUZZY_TERMS[@]}"; do + if echo "$BODY" | grep -q "$term"; then + echo -e "${RED}WARNING: Similar term '$term' was not masked${NC}" + else + echo -e "${GREEN}Fuzzy masking successful for '$term'${NC}" + fi + done else echo -e "${RED}Request failed with status code: $HTTP_CODE${NC}" echo "Response: $BODY" diff --git a/pkg/plugins/data_masking/plugin.go b/pkg/plugins/data_masking/plugin.go index 5bbbf8f..54f7dc7 100644 --- a/pkg/plugins/data_masking/plugin.go +++ b/pkg/plugins/data_masking/plugin.go @@ -19,6 +19,62 @@ const ( SimilarityThreshold = 0.8 ) +// PredefinedEntity represents a pre-defined entity type to mask +type PredefinedEntity string + +const ( + CreditCard PredefinedEntity = "credit_card" + Email PredefinedEntity = "email" + PhoneNumber PredefinedEntity = "phone_number" + SSN PredefinedEntity = "ssn" + IPAddress PredefinedEntity = "ip_address" + BankAccount PredefinedEntity = "bank_account" + Password PredefinedEntity = "password" + APIKey PredefinedEntity = "api_key" + AccessToken PredefinedEntity = "access_token" + IBAN PredefinedEntity = "iban" + SwiftBIC PredefinedEntity = "swift_bic" + CryptoWallet PredefinedEntity = "crypto_wallet" + TaxID PredefinedEntity = "tax_id" + RoutingNumber PredefinedEntity = "routing_number" +) + +// predefinedEntityPatterns maps entity types to their regex patterns +var predefinedEntityPatterns = map[PredefinedEntity]string{ + CreditCard: `\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b`, + Email: `\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b`, + PhoneNumber: `\b\+?[\d\s-]{10,}\b`, + SSN: `\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b`, + IPAddress: `\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`, + BankAccount: `\b\d{8,17}\b`, + Password: `(?i)password[\s]*[=:]\s*\S+`, + APIKey: `(?i)(api[_-]?key|access[_-]?key)[\s]*[=:]\s*\S+`, + AccessToken: `(?i)(access[_-]?token|bearer)[\s]*[=:]\s*\S+`, + IBAN: `\b[A-Z]{2}\d{2}[A-Z0-9]{4,34}\b`, + SwiftBIC: `\b[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?\b`, + CryptoWallet: `\b(bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}\b|0x[a-fA-F0-9]{40}\b`, + TaxID: `\b\d{2}[-\s]?\d{7}\b`, + RoutingNumber: `\b\d{9}\b`, +} + +// defaultEntityMasks defines default masking for pre-defined entities +var defaultEntityMasks = map[PredefinedEntity]string{ + CreditCard: "[MASKED_CC]", + Email: "[MASKED_EMAIL]", + PhoneNumber: "[MASKED_PHONE]", + SSN: "[MASKED_SSN]", + IPAddress: "[MASKED_IP]", + BankAccount: "[MASKED_ACCOUNT]", + Password: "[MASKED_PASSWORD]", + APIKey: "[MASKED_API_KEY]", + AccessToken: "[MASKED_TOKEN]", + IBAN: "[MASKED_IBAN]", + SwiftBIC: "[MASKED_BIC]", + CryptoWallet: "[MASKED_WALLET]", + TaxID: "[MASKED_TAX_ID]", + RoutingNumber: "[MASKED_ROUTING]", +} + type DataMaskingPlugin struct { logger *logrus.Logger keywords map[string]string // map of keyword to mask value @@ -26,8 +82,16 @@ type DataMaskingPlugin struct { } type Config struct { - Rules []Rule `mapstructure:"rules"` - SimilarityThreshold float64 `mapstructure:"similarity_threshold"` + Rules []Rule `mapstructure:"rules"` + SimilarityThreshold float64 `mapstructure:"similarity_threshold"` + PredefinedEntities []EntityConfig `mapstructure:"predefined_entities"` +} + +type EntityConfig struct { + Entity string `mapstructure:"entity"` // Pre-defined entity type + Enabled bool `mapstructure:"enabled"` // Whether to enable this entity + MaskWith string `mapstructure:"mask_with"` // Optional custom mask + PreserveLen bool `mapstructure:"preserve_len"` // Whether to preserve length } type Rule struct { @@ -133,11 +197,11 @@ func (p *DataMaskingPlugin) Name() string { } func (p *DataMaskingPlugin) Stages() []types.Stage { - return []types.Stage{types.PreResponse} + return []types.Stage{types.PreRequest, types.PreResponse} } func (p *DataMaskingPlugin) AllowedStages() []types.Stage { - return []types.Stage{types.PreResponse} + return []types.Stage{types.PreRequest, types.PreResponse} } type DataMaskingValidator struct{} @@ -148,10 +212,11 @@ func (v *DataMaskingValidator) ValidateConfig(config types.PluginConfig) error { return fmt.Errorf("failed to decode config: %v", err) } - if len(cfg.Rules) == 0 { - return fmt.Errorf("at least one masking rule must be specified") + if len(cfg.Rules) == 0 && len(cfg.PredefinedEntities) == 0 { + return fmt.Errorf("at least one rule or predefined entity must be specified") } + // Validate custom rules for _, rule := range cfg.Rules { if rule.Type != "keyword" && rule.Type != "regex" { return fmt.Errorf("invalid rule type '%s': must be 'keyword' or 'regex'", rule.Type) @@ -168,6 +233,13 @@ func (v *DataMaskingValidator) ValidateConfig(config types.PluginConfig) error { } } + // Validate predefined entities + for _, entity := range cfg.PredefinedEntities { + if _, exists := predefinedEntityPatterns[PredefinedEntity(entity.Entity)]; !exists { + return fmt.Errorf("invalid predefined entity type: %s", entity.Entity) + } + } + return nil } @@ -194,6 +266,7 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, p.keywords = make(map[string]string) p.regexRules = make(map[string]*regexp.Regexp) + // Add custom rules for _, rule := range config.Rules { maskValue := rule.MaskWith if maskValue == "" { @@ -211,28 +284,79 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, } } - // Get response content - content := string(resp.Body) - maskedContent := content + // Add predefined entity rules + for _, entity := range config.PredefinedEntities { + if !entity.Enabled { + continue + } + + entityType := PredefinedEntity(entity.Entity) + pattern, exists := predefinedEntityPatterns[entityType] + if !exists { + continue + } + + maskValue := entity.MaskWith + if maskValue == "" { + maskValue = defaultEntityMasks[entityType] + } - // Apply fuzzy keyword masking - for { - foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) - if !found { - break + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile predefined pattern for entity %s: %v", entity.Entity, err) } - maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) + p.regexRules[maskValue] = regex } - // Apply regex masking - for maskWith, regex := range p.regexRules { - maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { - return p.maskContent(match, regex.String(), maskWith, true) - }) + // Process request body if in PreRequest stage + if req != nil && len(req.Body) > 0 { + content := string(req.Body) + maskedContent := content + + // Apply fuzzy keyword masking + for { + foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) + if !found { + break + } + maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) + } + + // Apply regex masking + for maskWith, regex := range p.regexRules { + maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { + return p.maskContent(match, regex.String(), maskWith, true) + }) + } + + // Update request with masked content + req.Body = []byte(maskedContent) } - // Update response with masked content - resp.Body = []byte(maskedContent) + // Process response body if in PreResponse stage + if resp != nil && len(resp.Body) > 0 { + content := string(resp.Body) + maskedContent := content + + // Apply fuzzy keyword masking + for { + foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) + if !found { + break + } + maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) + } + + // Apply regex masking + for maskWith, regex := range p.regexRules { + maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { + return p.maskContent(match, regex.String(), maskWith, true) + }) + } + + // Update response with masked content + resp.Body = []byte(maskedContent) + } return &types.PluginResponse{ StatusCode: 200, From 2fd91cc518de0a0249465990874c5054e9091498 Mon Sep 17 00:00:00 2001 From: Victor Date: Mon, 20 Jan 2025 09:36:11 +0100 Subject: [PATCH 4/8] add plugin to manager --- examples/test_data_masking.sh | 3 ++- pkg/plugins/manager.go | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index 5e99d34..5f632f8 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -116,9 +116,10 @@ UPSTREAM_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways/$GATEWAY_ID/upstreams" "name": "echo-upstream-'$(date +%s)'", "algorithm": "round-robin", "targets": [{ - "host": "postman-echo.com", + "host": "httpbin.org", "port": 443, "protocol": "https", + "path": "/post", "weight": 100, "priority": 1 }], diff --git a/pkg/plugins/manager.go b/pkg/plugins/manager.go index f8e71ef..1a3297e 100644 --- a/pkg/plugins/manager.go +++ b/pkg/plugins/manager.go @@ -11,6 +11,7 @@ import ( "github.com/NeuralTrust/TrustGate/pkg/cache" "github.com/NeuralTrust/TrustGate/pkg/pluginiface" + "github.com/NeuralTrust/TrustGate/pkg/plugins/data_masking" "github.com/NeuralTrust/TrustGate/pkg/plugins/external_api" "github.com/NeuralTrust/TrustGate/pkg/plugins/prompt_moderation" "github.com/NeuralTrust/TrustGate/pkg/plugins/rate_limiter" @@ -67,6 +68,10 @@ func InitializePlugins(cache *cache.Cache, logger *logrus.Logger) { if err := manager.RegisterPlugin(prompt_moderation.NewPromptModerationPlugin(logger)); err != nil { logger.WithError(err).Error("Failed to register prompt moderation plugin") } + + if err := manager.RegisterPlugin(data_masking.NewDataMaskingPlugin(logger)); err != nil { + logger.WithError(err).Error("Failed to register data masking plugin") + } } // ValidatePlugin validates a plugin configuration From a08c228e3afc825a70a5b005a8720c70b24fca7e Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Mon, 20 Jan 2025 10:25:41 +0100 Subject: [PATCH 5/8] data masking fix --- examples/test_data_masking.sh | 32 +++++++++++++++++++----------- pkg/plugins/data_masking/plugin.go | 6 ++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index 5f632f8..7b55214 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -64,6 +64,18 @@ GATEWAY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways" \ "enabled": true, "mask_with": "[MASKED_TAX_ID]", "preserve_len": true + }, + { + "entity": "key_pattern", + "enabled": true, + "mask_with": "[MASKED_KEY]", + "preserve_len": false + }, + { + "entity": "mask_pattern", + "enabled": true, + "mask_with": "[MASKED_VALUE]", + "preserve_len": false } ], "rules": [ @@ -179,7 +191,7 @@ echo -e "\n${GREEN}6. Testing data masking...${NC}" # Test all masking patterns echo -e "\n${GREEN}6.1 Testing all masking patterns...${NC}" -RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ +RESPONSE=$(curl -s -w "\nSTATUS_CODE:%{http_code}" "$PROXY_URL/post" \ -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ -H "X-API-Key: ${API_KEY}" \ -H "Content-Type: application/json" \ @@ -194,13 +206,11 @@ RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ "similar_secrets": "secret_keys_here" }') -HTTP_CODE=$(echo "$RESPONSE" | tail -n1) -BODY=$(echo "$RESPONSE" | head -n1) +HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) +BODY=$(echo "$RESPONSE" | sed '$d') if [ "$HTTP_CODE" == "200" ]; then echo -e "${GREEN}Request successful${NC}" - echo "Response body:" - echo "$BODY" | jq '.' # Check each pattern PATTERNS=( @@ -230,6 +240,8 @@ if [ "$HTTP_CODE" == "200" ]; then "[MASKED_BIC]" "[MASKED_WALLET]" "[MASKED_TAX_ID]" + "[MASKED_KEY]" + "[MASKED_VALUE]" ) for mask in "${MASKS[@]}"; do @@ -241,12 +253,11 @@ if [ "$HTTP_CODE" == "200" ]; then done else echo -e "${RED}Request failed with status code: $HTTP_CODE${NC}" - echo "Response: $BODY" fi # Test fuzzy matching echo -e "\n${GREEN}6.2 Testing fuzzy matching...${NC}" -RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ +RESPONSE=$(curl -s -w "\nSTATUS_CODE:%{http_code}" "$PROXY_URL/post" \ -H "Host: ${SUBDOMAIN}.${BASE_DOMAIN}" \ -H "X-API-Key: ${API_KEY}" \ -H "Content-Type: application/json" \ @@ -255,13 +266,11 @@ RESPONSE=$(curl -s -w "\n%{http_code}" "$PROXY_URL/post" \ "notes": "Testing fuzzy matching" }') -HTTP_CODE=$(echo "$RESPONSE" | tail -n1) -BODY=$(echo "$RESPONSE" | head -n1) +HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) +BODY=$(echo "$RESPONSE" | sed '$d') if [ "$HTTP_CODE" == "200" ]; then echo -e "${GREEN}Request successful${NC}" - echo "Response body:" - echo "$BODY" | jq '.' # Check fuzzy matches FUZZY_TERMS=("sekret_key" "secret-key") @@ -274,7 +283,6 @@ if [ "$HTTP_CODE" == "200" ]; then done else echo -e "${RED}Request failed with status code: $HTTP_CODE${NC}" - echo "Response: $BODY" fi echo -e "\n${GREEN}Data masking tests completed${NC}" \ No newline at end of file diff --git a/pkg/plugins/data_masking/plugin.go b/pkg/plugins/data_masking/plugin.go index 54f7dc7..1d9e2b1 100644 --- a/pkg/plugins/data_masking/plugin.go +++ b/pkg/plugins/data_masking/plugin.go @@ -37,6 +37,8 @@ const ( CryptoWallet PredefinedEntity = "crypto_wallet" TaxID PredefinedEntity = "tax_id" RoutingNumber PredefinedEntity = "routing_number" + KeyPattern PredefinedEntity = "key_pattern" + MaskPattern PredefinedEntity = "mask_pattern" ) // predefinedEntityPatterns maps entity types to their regex patterns @@ -55,6 +57,8 @@ var predefinedEntityPatterns = map[PredefinedEntity]string{ CryptoWallet: `\b(bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}\b|0x[a-fA-F0-9]{40}\b`, TaxID: `\b\d{2}[-\s]?\d{7}\b`, RoutingNumber: `\b\d{9}\b`, + KeyPattern: `(?i).*key.*`, + MaskPattern: `(?i).*mask.*`, } // defaultEntityMasks defines default masking for pre-defined entities @@ -73,6 +77,8 @@ var defaultEntityMasks = map[PredefinedEntity]string{ CryptoWallet: "[MASKED_WALLET]", TaxID: "[MASKED_TAX_ID]", RoutingNumber: "[MASKED_ROUTING]", + KeyPattern: "[MASKED_KEY]", + MaskPattern: "[MASKED_VALUE]", } type DataMaskingPlugin struct { From 9d51822530f4df4626e7c022a9fa3eea3e24b6a5 Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Mon, 20 Jan 2025 10:49:05 +0100 Subject: [PATCH 6/8] pre request data masking --- examples/test_data_masking.sh | 67 +++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index 7b55214..2576514 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -21,6 +21,73 @@ GATEWAY_RESPONSE=$(curl -s -X POST "$ADMIN_URL/gateways" \ "name": "Data Masking Gateway", "subdomain": "'$SUBDOMAIN'", "required_plugins": [ + { + "name": "data_masking", + "enabled": true, + "stage": "pre_request", + "priority": 1, + "settings": { + "similarity_threshold": 0.8, + "predefined_entities": [ + { + "entity": "credit_card", + "enabled": true, + "mask_with": "[MASKED_CC]", + "preserve_len": false + }, + { + "entity": "email", + "enabled": true, + "mask_with": "[MASKED_EMAIL]", + "preserve_len": false + }, + { + "entity": "iban", + "enabled": true, + "mask_with": "[MASKED_IBAN]", + "preserve_len": false + }, + { + "entity": "swift_bic", + "enabled": true, + "mask_with": "[MASKED_BIC]", + "preserve_len": false + }, + { + "entity": "crypto_wallet", + "enabled": true, + "mask_with": "[MASKED_WALLET]", + "preserve_len": false + }, + { + "entity": "tax_id", + "enabled": true, + "mask_with": "[MASKED_TAX_ID]", + "preserve_len": true + }, + { + "entity": "key_pattern", + "enabled": true, + "mask_with": "[MASKED_KEY]", + "preserve_len": false + }, + { + "entity": "mask_pattern", + "enabled": true, + "mask_with": "[MASKED_VALUE]", + "preserve_len": false + } + ], + "rules": [ + { + "pattern": "secret_key", + "type": "keyword", + "mask_with": "****", + "preserve_len": true + } + ] + } + }, { "name": "data_masking", "enabled": true, From 0b7ca0cefe66c42bcef99faecc05d403c9b095c8 Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Mon, 20 Jan 2025 16:02:08 +0100 Subject: [PATCH 7/8] fix --- examples/test_data_masking.sh | 12 +- pkg/plugins/data_masking/plugin.go | 212 ++++++++++++++++++++--------- 2 files changed, 159 insertions(+), 65 deletions(-) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index 2576514..eac03a4 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -273,9 +273,12 @@ RESPONSE=$(curl -s -w "\nSTATUS_CODE:%{http_code}" "$PROXY_URL/post" \ "similar_secrets": "secret_keys_here" }') -HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) +# Extract body and status code from response BODY=$(echo "$RESPONSE" | sed '$d') + +HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) + if [ "$HTTP_CODE" == "200" ]; then echo -e "${GREEN}Request successful${NC}" @@ -290,7 +293,7 @@ if [ "$HTTP_CODE" == "200" ]; then "this_is_secret" "secret_keys_here" ) - + for pattern in "${PATTERNS[@]}"; do if echo "$BODY" | grep -q "$pattern"; then echo -e "${RED}WARNING: Pattern '$pattern' was not masked${NC}" @@ -333,9 +336,12 @@ RESPONSE=$(curl -s -w "\nSTATUS_CODE:%{http_code}" "$PROXY_URL/post" \ "notes": "Testing fuzzy matching" }') -HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) +# Extract body and status code from response BODY=$(echo "$RESPONSE" | sed '$d') + +HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) + if [ "$HTTP_CODE" == "200" ]; then echo -e "${GREEN}Request successful${NC}" diff --git a/pkg/plugins/data_masking/plugin.go b/pkg/plugins/data_masking/plugin.go index 1d9e2b1..7c0974f 100644 --- a/pkg/plugins/data_masking/plugin.go +++ b/pkg/plugins/data_masking/plugin.go @@ -2,6 +2,7 @@ package data_masking import ( "context" + "encoding/json" "fmt" "regexp" "strings" @@ -37,8 +38,6 @@ const ( CryptoWallet PredefinedEntity = "crypto_wallet" TaxID PredefinedEntity = "tax_id" RoutingNumber PredefinedEntity = "routing_number" - KeyPattern PredefinedEntity = "key_pattern" - MaskPattern PredefinedEntity = "mask_pattern" ) // predefinedEntityPatterns maps entity types to their regex patterns @@ -57,8 +56,6 @@ var predefinedEntityPatterns = map[PredefinedEntity]string{ CryptoWallet: `\b(bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}\b|0x[a-fA-F0-9]{40}\b`, TaxID: `\b\d{2}[-\s]?\d{7}\b`, RoutingNumber: `\b\d{9}\b`, - KeyPattern: `(?i).*key.*`, - MaskPattern: `(?i).*mask.*`, } // defaultEntityMasks defines default masking for pre-defined entities @@ -77,8 +74,6 @@ var defaultEntityMasks = map[PredefinedEntity]string{ CryptoWallet: "[MASKED_WALLET]", TaxID: "[MASKED_TAX_ID]", RoutingNumber: "[MASKED_ROUTING]", - KeyPattern: "[MASKED_KEY]", - MaskPattern: "[MASKED_VALUE]", } type DataMaskingPlugin struct { @@ -272,24 +267,6 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, p.keywords = make(map[string]string) p.regexRules = make(map[string]*regexp.Regexp) - // Add custom rules - for _, rule := range config.Rules { - maskValue := rule.MaskWith - if maskValue == "" { - maskValue = DefaultMaskChar - } - - if rule.Type == "keyword" { - p.keywords[rule.Pattern] = maskValue - } else if rule.Type == "regex" { - regex, err := regexp.Compile(rule.Pattern) - if err != nil { - return nil, fmt.Errorf("failed to compile regex pattern '%s': %v", rule.Pattern, err) - } - p.regexRules[maskValue] = regex - } - } - // Add predefined entity rules for _, entity := range config.PredefinedEntities { if !entity.Enabled { @@ -311,57 +288,65 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, if err != nil { return nil, fmt.Errorf("failed to compile predefined pattern for entity %s: %v", entity.Entity, err) } - p.regexRules[maskValue] = regex + p.regexRules[pattern] = regex + p.keywords[pattern] = maskValue } - // Process request body if in PreRequest stage - if req != nil && len(req.Body) > 0 { - content := string(req.Body) - maskedContent := content + // Add custom rules + for _, rule := range config.Rules { + maskValue := rule.MaskWith + if maskValue == "" { + maskValue = DefaultMaskChar + } - // Apply fuzzy keyword masking - for { - foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) - if !found { - break + if rule.Type == "keyword" { + p.keywords[rule.Pattern] = maskValue + } else if rule.Type == "regex" { + regex, err := regexp.Compile(rule.Pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile regex pattern '%s': %v", rule.Pattern, err) } - maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) + p.regexRules[rule.Pattern] = regex + p.keywords[rule.Pattern] = maskValue } + } - // Apply regex masking - for maskWith, regex := range p.regexRules { - maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { - return p.maskContent(match, regex.String(), maskWith, true) - }) + // Process request body if in PreRequest stage + if req != nil && len(req.Body) > 0 { + var jsonData interface{} + if err := json.Unmarshal(req.Body, &jsonData); err == nil { + // If it's valid JSON, process it as JSON + maskedData := p.maskJSONData(jsonData, threshold) + maskedJSON, err := json.Marshal(maskedData) + if err != nil { + return nil, fmt.Errorf("failed to marshal masked JSON: %v", err) + } + req.Body = maskedJSON + } else { + // If it's not JSON, process it as plain text + content := string(req.Body) + maskedContent := p.maskPlainText(content, threshold) + req.Body = []byte(maskedContent) } - - // Update request with masked content - req.Body = []byte(maskedContent) } // Process response body if in PreResponse stage if resp != nil && len(resp.Body) > 0 { - content := string(resp.Body) - maskedContent := content - - // Apply fuzzy keyword masking - for { - foundWord, keyword, maskWith, found := p.findSimilarKeyword(maskedContent, threshold) - if !found { - break + var jsonData interface{} + if err := json.Unmarshal(resp.Body, &jsonData); err == nil { + // If it's valid JSON, process it as JSON + maskedData := p.maskJSONData(jsonData, threshold) + maskedJSON, err := json.Marshal(maskedData) + if err != nil { + return nil, fmt.Errorf("failed to marshal masked JSON: %v", err) } - maskedContent = strings.ReplaceAll(maskedContent, foundWord, p.maskContent(foundWord, keyword, maskWith, true)) - } - - // Apply regex masking - for maskWith, regex := range p.regexRules { - maskedContent = regex.ReplaceAllStringFunc(maskedContent, func(match string) string { - return p.maskContent(match, regex.String(), maskWith, true) - }) + resp.Body = maskedJSON + } else { + // If it's not JSON, process it as plain text + content := string(resp.Body) + maskedContent := p.maskPlainText(content, threshold) + resp.Body = []byte(maskedContent) } - - // Update response with masked content - resp.Body = []byte(maskedContent) } return &types.PluginResponse{ @@ -369,3 +354,106 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, Message: "Content masked successfully", }, nil } + +// maskPlainText processes plain text content and applies masking rules +func (p *DataMaskingPlugin) maskPlainText(content string, threshold float64) string { + maskedContent := content + + // Split content into words to handle fuzzy matching + words := strings.Fields(content) + for i, word := range words { + // Check for fuzzy matches first + for keyword, maskWith := range p.keywords { + if _, isRegex := p.regexRules[keyword]; !isRegex { + similarity := calculateSimilarity(word, keyword) + if similarity >= threshold { + words[i] = maskWith + break + } + } + } + } + maskedContent = strings.Join(words, " ") + + // Apply regex masking after fuzzy matching + for pattern, regex := range p.regexRules { + // Try to identify the entity type from the pattern + for entityType, entityPattern := range predefinedEntityPatterns { + if pattern == entityPattern { + maskedContent = regex.ReplaceAllString(maskedContent, defaultEntityMasks[entityType]) + break + } + } + } + + return maskedContent +} + +// maskJSONData recursively processes JSON data and masks sensitive information +func (p *DataMaskingPlugin) maskJSONData(data interface{}, threshold float64) interface{} { + switch v := data.(type) { + case map[string]interface{}: + result := make(map[string]interface{}) + for key, value := range v { + switch val := value.(type) { + case string: + maskedValue := val + // Split the string into words for fuzzy matching + words := strings.Fields(val) + needsMasking := false + + // Check for fuzzy matches first + for i, word := range words { + for keyword, maskWith := range p.keywords { + if _, isRegex := p.regexRules[keyword]; !isRegex { + similarity := calculateSimilarity(word, keyword) + if similarity >= threshold { + words[i] = maskWith + needsMasking = true + break + } + } + } + } + + if needsMasking { + maskedValue = strings.Join(words, " ") + } + + // Check for predefined entities if no fuzzy match was found + if maskedValue == val { + for pattern, regex := range p.regexRules { + if regex.MatchString(val) { + // Find the corresponding entity type + for entityType, entityPattern := range predefinedEntityPatterns { + if pattern == entityPattern { + maskedValue = defaultEntityMasks[entityType] + break + } + } + } + } + } + + // Check for sensitive keywords in the key name + if maskedValue == val && (strings.Contains(strings.ToLower(key), "secret") || strings.Contains(strings.ToLower(key), "key")) { + maskedValue = "[MASKED_KEY]" + } + result[key] = maskedValue + default: + result[key] = p.maskJSONData(value, threshold) + } + } + return result + case []interface{}: + result := make([]interface{}, len(v)) + for i, value := range v { + result[i] = p.maskJSONData(value, threshold) + } + return result + case string: + return p.maskPlainText(v, threshold) + default: + return v + } +} From e006ad07f4ab88a0102a46239d4385afe8d8f123 Mon Sep 17 00:00:00 2001 From: ayoubelqadi Date: Mon, 20 Jan 2025 16:26:48 +0100 Subject: [PATCH 8/8] fix not used func --- examples/test_data_masking.sh | 2 -- pkg/plugins/data_masking/plugin.go | 39 ++++++++++++++++-------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/examples/test_data_masking.sh b/examples/test_data_masking.sh index eac03a4..64f0cac 100755 --- a/examples/test_data_masking.sh +++ b/examples/test_data_masking.sh @@ -275,8 +275,6 @@ RESPONSE=$(curl -s -w "\nSTATUS_CODE:%{http_code}" "$PROXY_URL/post" \ # Extract body and status code from response BODY=$(echo "$RESPONSE" | sed '$d') - - HTTP_CODE=$(echo "$RESPONSE" | grep "STATUS_CODE:" | cut -d':' -f2) if [ "$HTTP_CODE" == "200" ]; then diff --git a/pkg/plugins/data_masking/plugin.go b/pkg/plugins/data_masking/plugin.go index 7c0974f..e3dccc3 100644 --- a/pkg/plugins/data_masking/plugin.go +++ b/pkg/plugins/data_masking/plugin.go @@ -359,33 +359,36 @@ func (p *DataMaskingPlugin) Execute(ctx context.Context, cfg types.PluginConfig, func (p *DataMaskingPlugin) maskPlainText(content string, threshold float64) string { maskedContent := content - // Split content into words to handle fuzzy matching - words := strings.Fields(content) - for i, word := range words { - // Check for fuzzy matches first - for keyword, maskWith := range p.keywords { - if _, isRegex := p.regexRules[keyword]; !isRegex { - similarity := calculateSimilarity(word, keyword) - if similarity >= threshold { - words[i] = maskWith + // Apply regex masking first + for pattern, regex := range p.regexRules { + matches := regex.FindAllString(maskedContent, -1) + for _, match := range matches { + // Try to identify the entity type from the pattern + for entityType, entityPattern := range predefinedEntityPatterns { + if pattern == entityPattern { + maskedContent = strings.ReplaceAll(maskedContent, match, defaultEntityMasks[entityType]) break } } } } - maskedContent = strings.Join(words, " ") - // Apply regex masking after fuzzy matching - for pattern, regex := range p.regexRules { - // Try to identify the entity type from the pattern - for entityType, entityPattern := range predefinedEntityPatterns { - if pattern == entityPattern { - maskedContent = regex.ReplaceAllString(maskedContent, defaultEntityMasks[entityType]) - break - } + // Split content into words to handle fuzzy matching + words := strings.Fields(maskedContent) + modified := false + for i, word := range words { + // Check for fuzzy matches using findSimilarKeyword + if origWord, keyword, maskWith, found := p.findSimilarKeyword(word, threshold); found { + // Use maskContent to apply the masking + words[i] = p.maskContent(origWord, keyword, maskWith, true) + modified = true } } + if modified { + maskedContent = strings.Join(words, " ") + } + return maskedContent }