Skip to content

Commit

Permalink
feat(#3): Add column types for PgSQL
Browse files Browse the repository at this point in the history
  • Loading branch information
borisershov committed May 29, 2024
1 parent fa35272 commit 48853c6
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 20 deletions.
71 changes: 59 additions & 12 deletions modules/anonymizers/pgsql/dh.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,53 @@ import (
"github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter"
)

func dhCreateTableName(usrCtx any, deferred, token []byte) ([]byte, error) {

tname := string(bytes.TrimSpace(deferred))

uctx := usrCtx.(*userCtx)
uctx.tn = &tname

return append(deferred, token...), nil
}

func dhCreateTableDesc(usrCtx any, deferred, token []byte) ([]byte, error) {

uctx := usrCtx.(*userCtx)

clmns := make(map[string]relfilter.ColumnType)

ss := bytes.Split(deferred, []byte{'\n'})

for _, s := range ss {

s = bytes.TrimSuffix(bytes.TrimSpace(s), []byte{','})

if len(s) > 0 {

u := bytes.SplitN(s, []byte{' '}, 3)

// If column type does not specified within the dump
if len(u) < 2 {
clmns[string(u[0])] = relfilter.ColumnTypeNone
} else {
clmns[string(u[0])] = columnType(string(u[1]))
}
}
}

uctx.tables[*uctx.tn] = clmns
uctx.tn = nil

return append(deferred, token...), nil
}

func dhTableName(usrCtx any, deferred, token []byte) ([]byte, error) {

tname := bytes.TrimSpace(deferred)

filter := usrCtx.(*relfilter.Filter)
filter.TableCreate(string(tname))
uctx := usrCtx.(*userCtx)
uctx.filter.TableCreate(string(tname))

return append(deferred, token...), nil
}
Expand All @@ -21,41 +62,47 @@ func dhFieldName(usrCtx any, deferred, token []byte) ([]byte, error) {

fname := bytes.Trim(bytes.TrimSpace(deferred), "\"")

filter := usrCtx.(*relfilter.Filter)
filter.ColumnAdd(string(fname), relfilter.ColumnTypeNone)
uctx := usrCtx.(*userCtx)

t, b := uctx.tables[uctx.filter.TableNameGet()][string(fname)]
if b == false {
t = relfilter.ColumnTypeNone
}

uctx.filter.ColumnAdd(string(fname), t)

return append(deferred, token...), nil
}

func dhValue(usrCtx any, deferred, token []byte) ([]byte, error) {

filter := usrCtx.(*relfilter.Filter)
uctx := usrCtx.(*userCtx)

if bytes.Compare(deferred, []byte("\\N")) == 0 {
filter.ValueAdd(nil)
uctx.filter.ValueAdd(nil)
} else {
filter.ValueAdd(deferred)
uctx.filter.ValueAdd(deferred)
}

return []byte{}, nil
}

func dhValueEnd(usrCtx any, deferred, token []byte) ([]byte, error) {

filter := usrCtx.(*relfilter.Filter)
uctx := usrCtx.(*userCtx)

if bytes.Compare(deferred, []byte("\\N")) == 0 {
filter.ValueAdd(nil)
uctx.filter.ValueAdd(nil)
} else {
filter.ValueAdd(deferred)
uctx.filter.ValueAdd(deferred)
}

// Apply filter for row
if err := filter.Apply(); err != nil {
if err := uctx.filter.Apply(); err != nil {
return []byte{}, err
}

return rowDataGen(filter), nil
return rowDataGen(uctx.filter), nil
}

func rowDataGen(filter *relfilter.Filter) []byte {
Expand Down
81 changes: 79 additions & 2 deletions modules/anonymizers/pgsql/pgsql.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,40 @@ import (
fsm "github.com/nixys/nxs-go-fsm"
)

func Init(ctx context.Context, r io.Reader, rules relfilter.Rules) io.Reader {
type InitSettings struct {
Rules relfilter.Rules
}

type userCtx struct {
filter *relfilter.Filter

tn *string
tables map[string]map[string]relfilter.ColumnType
}

var typeKeys = map[string]relfilter.ColumnType{

// Strings
"character": relfilter.ColumnTypeString,

// Numeric
"integer": relfilter.ColumnTypeNum,
}

func userCtxInit(s InitSettings) *userCtx {
return &userCtx{
filter: relfilter.Init(s.Rules),
tables: make(map[string]map[string]relfilter.ColumnType),
}
}

func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader {

return fsm.Init(
r,
fsm.Description{
Ctx: ctx,
UserCtx: relfilter.Init(rules),
UserCtx: userCtxInit(s),
InitState: stateCopySearch,
States: map[fsm.StateName]fsm.State{

Expand All @@ -32,8 +59,50 @@ func Init(ctx context.Context, r io.Reader, rules relfilter.Rules) io.Reader {
},
DataHandler: nil,
},
{
Name: stateCreateTableName,
Switch: fsm.Switch{
Trigger: []byte("CREATE TABLE"),
Delimiters: fsm.Delimiters{
L: []byte{'\n'},
R: []byte{' '},
},
},
DataHandler: nil,
},
},
},

stateCreateTableName: {
NextStates: []fsm.NextState{
{
Name: stateCreateTableTail,
Switch: fsm.Switch{
Trigger: []byte("("),
Delimiters: fsm.Delimiters{
R: []byte{'\n'},
},
},
DataHandler: dhCreateTableName,
},
},
},

stateCreateTableTail: {
NextStates: []fsm.NextState{
{
Name: stateCopySearch,
Switch: fsm.Switch{
Trigger: []byte(");"),
Delimiters: fsm.Delimiters{
R: []byte{'\n'},
},
},
DataHandler: dhCreateTableDesc,
},
},
},

stateTableName: {
NextStates: []fsm.NextState{
{
Expand Down Expand Up @@ -108,3 +177,11 @@ func Init(ctx context.Context, r io.Reader, rules relfilter.Rules) io.Reader {
},
)
}

func columnType(key string) relfilter.ColumnType {
t, b := typeKeys[key]
if b == false {
return relfilter.ColumnTypeNone
}
return t
}
12 changes: 7 additions & 5 deletions modules/anonymizers/pgsql/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package pgsql_anonymize
import fsm "github.com/nixys/nxs-go-fsm"

var (
stateCopySearch = fsm.StateName("copy search")
stateTableName = fsm.StateName("table name")
stateFieldName = fsm.StateName("field name")
stateCopyTail = fsm.StateName("copy tail")
stateTableValues = fsm.StateName("table values")
stateCreateTableName = fsm.StateName("creat table name")
stateCreateTableTail = fsm.StateName("creat table tail")
stateCopySearch = fsm.StateName("copy search")
stateTableName = fsm.StateName("table name")
stateFieldName = fsm.StateName("field name")
stateCopyTail = fsm.StateName("copy tail")
stateTableValues = fsm.StateName("table values")
)
8 changes: 7 additions & 1 deletion routines/anonymizer/anonymizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,13 @@ func anonymize(st anonymizeSettings) error {
},
)
case ctx.DBTypePgSQL:
ar = pgsql_anonymize.Init(st.c, st.pr, st.rs)
ar = pgsql_anonymize.Init(
st.c,
st.pr,
pgsql_anonymize.InitSettings{
Rules: st.rs,
},
)
default:

st.l.WithFields(logrus.Fields{
Expand Down

0 comments on commit 48853c6

Please sign in to comment.