Skip to content

Commit

Permalink
Merge pull request #36 from nixys/feat/35
Browse files Browse the repository at this point in the history
feat(#35): Add regex capturing groups for column data types within the filters
  • Loading branch information
borisershov authored Aug 8, 2024
2 parents 1f9588e + c1432b9 commit 08d2d5d
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 86 deletions.
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ nxs-data-anonymizer is a tool for anonymizing **PostgreSQL** and **MySQL/MariaDB
- MySQL/MariaDB/Percona (5.7/8.0/8.1/all versions)
- Flexible data faking based on:
- Go templates and [Sprig template’s library](https://masterminds.github.io/sprig/) like [Helm](https://helm.sh/docs/chart_template_guide/functions_and_pipelines/). You may also use values of other columns for same row to build more flexible rules
- External commands you may execute to create table field value2
- External commands you may execute to create table field values
- Security enforcement rules
- Link cells across the database to generate the same values
- Stream data processing. It means that you can a use the tool through a pipe in command line and redirect dump from source DB directly to the destination DB with required transformations
Expand Down Expand Up @@ -304,19 +304,25 @@ Additional filter functions:
- `isNull`: compare a field value with `NULL`

You may also use the following data in a templates:
- Current table name. Statement: `{{ .TableName }}`
- Current column name. Statement: `{{ .CurColumnName }}`
- Values of other columns in the rules for same row (with values before substitutions). Statement: `{{ .Values.COLUMN_NAME }}` (e.g.: `{{ .Values.username }}`)
- Global variables. Statement: `{{ .Variables.VARIABLE_NAME }}` (e.g.: `{{ .Variables.password }}`)
- Raw column data type. Statement: `{{ .ColumnTypeRaw }}`
- Regex's capturing groups for the column data type. This variable has array type so you need to use `range` or `index` to access specific element. Statement: `{{ index .ColumnTypeGroups 0 0 }}`. See [Types](#types-settings) for details

**Command**

To anonymize a database fields you may use a commands (scripts or binaries) with any logic you need. The command's concept has following properties:
- The command's `stdout` will be used as a new value for the anonymized field
- Command must return zero exit code, otherwise nxs-data-anonymizer will falls with error (in this case `stderr` will be used as an error text)
- Environment variables with the row data are available within the command:
- `ENVVARTABLE`: contains a name of the filtered table
- `ENVVARCURCOLUMN`: contains the current column name
- `ENVVARGLOBAL_{VARIABLE_NAME}`: contains value for specified global variable
- `ENVVARTABLE`: contains a name of the current table
- `ENVVARCURCOLUMN`: contains the current column name
- `ENVVARCOLUMN_{COLUMN_NAME}`: contains values (before substitutions) for all columns for the current row
- `ENVVARGLOBAL_{VARIABLE_NAME}`: contains value for specified global variable
- `ENVVARCOLUMNTYPERAW`: contains raw column data type
- `ENVVARCOLUMNTYPEGROUP_{GROUP_NUM}_{SUBGROUPNUM}`: contains regex's capturing groups for the column data type. See [Types](#types-settings) for details

##### Security settings

Expand Down Expand Up @@ -410,7 +416,7 @@ _Values to masquerade a columns in accordance with the types see below._

| Option | Type | Required | Default value | Description |
|--- | :---: | :---: | :---: |--- |
| `regex` | String | Yes | - | Regular expression. Will be checked for match for column data type (in `CREATE TABLE` section) |
| `regex` | String | Yes | - | Regular expression. Will be checked for match for column data type (in `CREATE TABLE` section). Able to use capturing groups within the regex that available as an additional variable data in the filters (see [Columns](#columns-settings) for details). This ability helps to create more flexible rules to generate the cells value in accordance with data type features |
| `rule` | [Columns](#columns-settings) | Yes | - | Rule will be applied columns with data types matched for specified regular expression |

#### Example
Expand Down
12 changes: 3 additions & 9 deletions modules/anonymizers/mysql/mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,16 @@ var typeKeys = map[string]columnType{

func userCtxInit(s InitOpts) (*userCtx, error) {

trc := []relfilter.TypeRuleOpts{}
trd := []relfilter.TypeRuleOpts{}
if s.Security.ColumnsPolicy == misc.SecurityPolicyColumnsRandomize {
trc = s.Rules.TypeRuleCustom
trd = typeRuleDefault
}

f, err := relfilter.Init(
relfilter.InitOpts{
Variables: s.Variables,
Link: s.Link,
TableRules: s.Rules.TableRules,
DefaultRules: s.Rules.DefaultRules,
ExceptionColumns: s.Rules.ExceptionColumns,
TypeRuleCustom: trc,
TypeRuleDefault: trd,
TypeRuleCustom: s.Rules.TypeRuleCustom,
TypeRuleDefault: typeRuleDefault,
ColumnsPolicy: s.Security.ColumnsPolicy,
},
)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion modules/anonymizers/mysql/security_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ var typeRuleDefault = []relfilter.TypeRuleOpts{

// String
{
Selector: "(?i)^char",
Selector: "(?i)^char\\((\\d+)\\)|^char ",
Rule: relfilter.ColumnRuleOpts{
Type: misc.ValueTypeTemplate,
Value: securityTypeString,
Expand Down
12 changes: 3 additions & 9 deletions modules/anonymizers/pgsql/pgsql.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,16 @@ type securityCtx struct {

func userCtxInit(s InitOpts) (*userCtx, error) {

trc := []relfilter.TypeRuleOpts{}
trd := []relfilter.TypeRuleOpts{}
if s.Security.ColumnsPolicy == misc.SecurityPolicyColumnsRandomize {
trc = s.Rules.TypeRuleCustom
trd = typeRuleDefault
}

f, err := relfilter.Init(
relfilter.InitOpts{
Variables: s.Variables,
Link: s.Link,
TableRules: s.Rules.TableRules,
DefaultRules: s.Rules.DefaultRules,
ExceptionColumns: s.Rules.ExceptionColumns,
TypeRuleCustom: trc,
TypeRuleDefault: trd,
TypeRuleCustom: s.Rules.TypeRuleCustom,
TypeRuleDefault: typeRuleDefault,
ColumnsPolicy: s.Security.ColumnsPolicy,
},
)
if err != nil {
Expand Down
45 changes: 40 additions & 5 deletions modules/filters/relfilter/column.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
package relfilter

import "fmt"

type columns struct {
cc []*column
m map[string]*column
}

type column struct {
n string
rawType string
n string
t columnTypes
}

type columnTypes struct {
raw string
groups [][]string
r *ColumnRuleOpts
env []string
}

func columnsInit() columns {
Expand All @@ -17,11 +26,37 @@ func columnsInit() columns {
}
}

func (c *columns) add(name string, rt string) {
func (c *columns) add(name string, rt string, pts [][]string, r *ColumnRuleOpts) {

env := []string{fmt.Sprintf("%s=%s", envVarColumnTypeRAW, rt)}

if pts != nil {
for i, g := range pts {
for j, sg := range g {

if j == 0 {
env = append(
env,
fmt.Sprintf("%s%d=%s", envVarColumnTypeGroupPrefix, i, sg),
)
} else {
env = append(
env,
fmt.Sprintf("%s%d_%d=%s", envVarColumnTypeGroupPrefix, i, j-1, sg),
)
}
}
}
}

v := column{
n: name,
rawType: rt,
n: name,
t: columnTypes{
raw: rt,
groups: pts,
r: r,
env: env,
},
}

c.cc = append(c.cc, &v)
Expand Down
117 changes: 60 additions & 57 deletions modules/filters/relfilter/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ type InitOpts struct {
DefaultRules map[string]ColumnRuleOpts
ExceptionColumns []string

ColumnsPolicy misc.SecurityPolicyColumnsType

TypeRuleCustom []TypeRuleOpts
TypeRuleDefault []TypeRuleOpts
}
Expand Down Expand Up @@ -63,6 +65,8 @@ type rules struct {
defaultRules map[string]ColumnRuleOpts
exceptionColumns map[string]any

columnsPolicy misc.SecurityPolicyColumnsType

typeRuleCustom []typeRule
typeRuleDefault []typeRule

Expand Down Expand Up @@ -108,10 +112,12 @@ type execFilterOpts struct {
const uniqueAttempts = 5

const (
envVarGlobalPrefix = "ENVVARGLOBAL_"
envVarTable = "ENVVARTABLE"
envVarColumnPrefix = "ENVVARCOLUMN_"
envVarCurColumn = "ENVVARCURCOLUMN"
envVarGlobalPrefix = "ENVVARGLOBAL_"
envVarTable = "ENVVARTABLE"
envVarColumnPrefix = "ENVVARCOLUMN_"
envVarCurColumn = "ENVVARCURCOLUMN"
envVarColumnTypeRAW = "ENVVARCOLUMNTYPERAW"
envVarColumnTypeGroupPrefix = "ENVVARCOLUMNTYPEGROUP_"
)

type applyRule struct {
Expand Down Expand Up @@ -219,6 +225,7 @@ func Init(opts InitOpts) (*Filter, error) {
exceptionColumns: excpts,
typeRuleCustom: trc,
typeRuleDefault: trd,
columnsPolicy: opts.ColumnsPolicy,
},
}, nil
}
Expand Down Expand Up @@ -247,7 +254,26 @@ func (filter *Filter) TableRulesLookup(name string) map[string]ColumnRuleOpts {

// ColumnAdd adds new column into current data set
func (filter *Filter) ColumnAdd(name string, rt string) {
filter.tableData.columns.add(name, rt)

//var rl *ColumnRuleOpts

for _, r := range filter.rules.typeRuleCustom {
gd := r.Rgx.FindAllStringSubmatch(rt, -1)
if len(gd) > 0 {
filter.tableData.columns.add(name, rt, gd, &r.Rule)
return
}
}

for _, r := range filter.rules.typeRuleDefault {
gd := r.Rgx.FindAllStringSubmatch(rt, -1)
if len(gd) > 0 {
filter.tableData.columns.add(name, rt, gd, &r.Rule)
return
}
}

filter.tableData.columns.add(name, rt, nil, nil)
}

func (filter *Filter) ColumnGetName(index int) string {
Expand Down Expand Up @@ -346,47 +372,21 @@ func (filter *Filter) Apply() error {
continue
}

// Check custom type rule for column
if b := func() bool {
for _, r := range filter.rules.typeRuleCustom {
if r.Rgx.Match([]byte(c.rawType)) {
rls = append(
rls,
applyRule{
c: c,
i: i,
cr: r.Rule,
},
)
return true
}
}
return false
}(); b {
continue
}
// Other rules if required

// Check default type rule for column
if b := func() bool {
for _, r := range filter.rules.typeRuleDefault {
if r.Rgx.Match([]byte(c.rawType)) {
rls = append(
rls,
applyRule{
c: c,
i: i,
cr: r.Rule,
},
)
return true
}
// Default rules for types
if filter.rules.columnsPolicy == misc.SecurityPolicyColumnsRandomize {
if c.t.r != nil {
rls = append(
rls,
applyRule{
c: c,
i: i,
cr: *c.t.r,
},
)
}
return false
}(); b {
continue
}

// Other rules if required
}

// Apply rules
Expand Down Expand Up @@ -454,28 +454,31 @@ func (filter *Filter) applyRules(tname string, rls []applyRule) error {
} else {

type tplData struct {
TableName string
Values map[string]string
Variables map[string]string
TableName string
CurColumnName string
Values map[string]string
Variables map[string]string
ColumnTypeRaw string
ColumnTypeGroups [][]string
}

td := tplData{
TableName: tname,
Values: valOld,
Variables: filter.rules.variables,
TableName: tname,
CurColumnName: r.c.n,
Values: valOld,
Variables: filter.rules.variables,
ColumnTypeRaw: r.c.t.raw,
ColumnTypeGroups: r.c.t.groups,
}

tdenv := []string{
tde := []string{
fmt.Sprintf("%s=%s", envVarTable, tname),
}

// Create tmp env variables with current column name
tde := append(
tdenv,
fmt.Sprintf("%s=%s", envVarCurColumn, r.c.n),
)
}

tdenv = append(tdenv, valEnvGlob...)
tde = append(tde, valEnvOld...)
tde = append(tde, valEnvGlob...)
tde = append(tde, r.c.t.env...)

v, err = filter.applyColumnFilter(r.c.n, r.cr, td, tde)
if err != nil {
Expand Down

0 comments on commit 08d2d5d

Please sign in to comment.