Skip to content

Commit

Permalink
add and export MongoDB command string obfuscation func for python che…
Browse files Browse the repository at this point in the history
…cks (#25569)

* add and export MongoDB command string obfuscation func for python checks

* fix typo

* ObfuscateMongoDBString returns no error

* add default mongo obfuscate settings

* fix test

* fix unit test

* add release note

* Update mongodb-command-obfuscation-in-python-checks-454afd8726166736.yaml

Co-authored-by: May Lee <may.lee@datadoghq.com>

* only parse tuple as obfuscate_mongodb_string does not have kwargs

---------

Co-authored-by: May Lee <may.lee@datadoghq.com>
  • Loading branch information
lu-zhengda and maycmlee authored May 16, 2024
1 parent 49c10b7 commit 0d05c69
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 0 deletions.
46 changes: 46 additions & 0 deletions pkg/collector/python/datadog_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ func lazyInitObfuscator() *obfuscate.Obfuscator {
if !cfg.SQLExecPlanNormalize.Enabled {
cfg.SQLExecPlanNormalize = defaultSQLPlanNormalizeSettings
}
if !cfg.Mongo.Enabled {
cfg.Mongo = defaultMongoObfuscateSettings
}
obfuscator = obfuscate.NewObfuscator(cfg)
})
return obfuscator
Expand Down Expand Up @@ -526,7 +529,50 @@ var defaultSQLPlanObfuscateSettings = obfuscate.JSONConfig{
ObfuscateSQLValues: defaultSQLPlanNormalizeSettings.ObfuscateSQLValues,
}

// defaultMongoObfuscateSettings are the default JSON obfuscator settings for obfuscating mongodb commands
var defaultMongoObfuscateSettings = obfuscate.JSONConfig{
Enabled: true,
KeepValues: []string{
"find",
"sort",
"projection",
"skip",
"batchSize",
"$db",
"getMore",
"collection",
"delete",
"findAndModify",
"insert",
"ordered",
"update",
"aggregate",
"comment",
},
}

//export getProcessStartTime
func getProcessStartTime() float64 {
return float64(config.StartTime.Unix())
}

// ObfuscateMongoDBString obfuscates the MongoDB query
//
//export ObfuscateMongoDBString
func ObfuscateMongoDBString(cmd *C.char, errResult **C.char) *C.char {
if C.GoString(cmd) == "" {
// memory will be freed by caller
*errResult = TrackedCString("Empty MongoDB command")
return nil
}
obfuscatedMongoDBString := lazyInitObfuscator().ObfuscateMongoDBString(
C.GoString(cmd),
)
if obfuscatedMongoDBString == "" {
// memory will be freed by caller
*errResult = TrackedCString("Failed to obfuscate MongoDB command")
return nil
}
// memory will be freed by caller
return TrackedCString(obfuscatedMongoDBString)
}
2 changes: 2 additions & 0 deletions pkg/collector/python/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ bool TracemallocEnabled();
char* ObfuscateSQL(char *, char *, char **);
char* ObfuscateSQLExecPlan(char *, bool, char **);
double getProcessStartTime();
char* ObfuscateMongoDBString(char *, char **);
void initDatadogAgentModule(rtloader_t *rtloader) {
set_get_clustername_cb(rtloader, GetClusterName);
Expand All @@ -107,6 +108,7 @@ void initDatadogAgentModule(rtloader_t *rtloader) {
set_obfuscate_sql_cb(rtloader, ObfuscateSQL);
set_obfuscate_sql_exec_plan_cb(rtloader, ObfuscateSQLExecPlan);
set_get_process_start_time_cb(rtloader, getProcessStartTime);
set_obfuscate_mongodb_string_cb(rtloader, ObfuscateMongoDBString);
}
//
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
features:
- |
Expose the Agent's MongoDB command string obfuscation to Python checks using the new `datadog_agent.obfuscate_mongodb_string` method.
55 changes: 55 additions & 0 deletions rtloader/common/builtins/datadog_agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ static cb_read_persistent_cache_t cb_read_persistent_cache = NULL;
static cb_obfuscate_sql_t cb_obfuscate_sql = NULL;
static cb_obfuscate_sql_exec_plan_t cb_obfuscate_sql_exec_plan = NULL;
static cb_get_process_start_time_t cb_get_process_start_time = NULL;
static cb_obfuscate_mongodb_string_t cb_obfuscate_mongodb_string = NULL;

// forward declarations
static PyObject *get_clustername(PyObject *self, PyObject *args);
Expand All @@ -39,6 +40,7 @@ static PyObject *read_persistent_cache(PyObject *self, PyObject *args);
static PyObject *obfuscate_sql(PyObject *self, PyObject *args, PyObject *kwargs);
static PyObject *obfuscate_sql_exec_plan(PyObject *self, PyObject *args, PyObject *kwargs);
static PyObject *get_process_start_time(PyObject *self, PyObject *args, PyObject *kwargs);
static PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs);

static PyMethodDef methods[] = {
{ "get_clustername", get_clustername, METH_NOARGS, "Get the cluster name." },
Expand All @@ -55,6 +57,7 @@ static PyMethodDef methods[] = {
{ "obfuscate_sql", (PyCFunction)obfuscate_sql, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a SQL string." },
{ "obfuscate_sql_exec_plan", (PyCFunction)obfuscate_sql_exec_plan, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a SQL Execution Plan." },
{ "get_process_start_time", (PyCFunction)get_process_start_time, METH_NOARGS, "Get agent process startup time, in seconds since the epoch." },
{ "obfuscate_mongodb_string", (PyCFunction)obfuscate_mongodb_string, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a MongoDB command string." },
{ NULL, NULL } // guards
};

Expand Down Expand Up @@ -139,6 +142,11 @@ void _set_get_process_start_time_cb(cb_get_process_start_time_t cb) {
cb_get_process_start_time = cb;
}

void _set_obfuscate_mongodb_string_cb(cb_obfuscate_mongodb_string_t cb) {
cb_obfuscate_mongodb_string = cb;

}


/*! \fn PyObject *get_version(PyObject *self, PyObject *args)
\brief This function implements the `datadog-agent.get_version` method, collecting
Expand Down Expand Up @@ -791,3 +799,50 @@ static PyObject *get_process_start_time(PyObject *self, PyObject *args, PyObject

return retval;
}

/*! \fn PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs)
\brief This function implements the `datadog_agent.obfuscate_mongodb_string` method, obfuscating
the provided mongodb command string.
\param self A PyObject* pointer to the `datadog_agent` module.
\param args A PyObject* pointer to a tuple containing the key to retrieve.
\param kwargs A PyObject* pointer to a map of key value pairs.
\return A PyObject* pointer to the value.
This function is callable as the `datadog_agent.obfuscate_mongodb_string` Python method and
uses the `cb_obfuscate_mongodb_string()` callback to retrieve the value from the agent
with CGO. If the callback has not been set `None` will be returned.
*/
static PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs)
{
// callback must be set
if (cb_obfuscate_mongodb_string == NULL) {
Py_RETURN_NONE;
}

PyGILState_STATE gstate = PyGILState_Ensure();

char *cmd = NULL;
if (!PyArg_ParseTuple(args, "s", &cmd)) {
PyGILState_Release(gstate);
return NULL;
}

char *obfCmd = NULL;
char *error_message = NULL;
obfCmd = cb_obfuscate_mongodb_string(cmd, &error_message);

PyObject *retval = NULL;
if (error_message != NULL) {
PyErr_SetString(PyExc_RuntimeError, error_message);
} else if (obfCmd == NULL) {
// no error message and a null response. this should never happen so the go code is misbehaving
PyErr_SetString(PyExc_RuntimeError, "internal error: empty cb_obfuscate_mongodb_string response");
} else {
retval = PyStringFromCString(obfCmd);
}

cgo_free(error_message);
cgo_free(obfCmd);
PyGILState_Release(gstate);
return retval;
}
1 change: 1 addition & 0 deletions rtloader/common/builtins/datadog_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ void _set_read_persistent_cache_cb(cb_read_persistent_cache_t);
void _set_obfuscate_sql_cb(cb_obfuscate_sql_t);
void _set_obfuscate_sql_exec_plan_cb(cb_obfuscate_sql_exec_plan_t);
void _set_get_process_start_time_cb(cb_get_process_start_time_t);
void _set_obfuscate_mongodb_string_cb(cb_obfuscate_mongodb_string_t);

PyObject *_public_headers(PyObject *self, PyObject *args, PyObject *kwargs);

Expand Down
11 changes: 11 additions & 0 deletions rtloader/include/datadog_agent_rtloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,17 @@ DATADOG_AGENT_RTLOADER_API void init_pymem_stats(rtloader_t *);
*/
DATADOG_AGENT_RTLOADER_API void get_pymem_stats(rtloader_t *, pymem_stats_t *);

/*! \fn void set_obfuscate_mongodb_string_cb(rtloader_t *, cb_obfuscate_mongodb_string_t)
\brief Sets a callback to be used by rtloader to allow retrieving a value for a given
check instance.
\param rtloader_t A rtloader_t * pointer to the RtLoader instance.
\param object A function pointer with cb_obfuscate_mongodb_string_t prototype to the callback
function.
The callback is expected to be provided by the rtloader caller - in go-context: CGO.
*/
DATADOG_AGENT_RTLOADER_API void set_obfuscate_mongodb_string_cb(rtloader_t *, cb_obfuscate_mongodb_string_t);

#ifdef __cplusplus
}
#endif
Expand Down
9 changes: 9 additions & 0 deletions rtloader/include/rtloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,15 @@ class RtLoader
{
}

//! setObfuscateMongoDBStringCb member.
/*!
\param A cb_obfuscate_mongodb_string_t function pointer to the CGO callback.
This allows us to set the relevant CGO callback that will allow retrieving value for
specific check instances.
*/
virtual void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t) = 0;

protected:
//! _allocateInternalErrorDiagnoses member.
/*!
Expand Down
2 changes: 2 additions & 0 deletions rtloader/include/rtloader_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ typedef char *(*cb_obfuscate_sql_t)(char *, char *, char **);
typedef char *(*cb_obfuscate_sql_exec_plan_t)(char *, bool, char **);
// ()
typedef double (*cb_get_process_start_time_t)(void);
// (cmd, error_message)
typedef char *(*cb_obfuscate_mongodb_string_t)(char *, char **);

// _util
// (argv, env, stdout, stderr, ret_code, exception)
Expand Down
5 changes: 5 additions & 0 deletions rtloader/rtloader/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,11 @@ void set_get_process_start_time_cb(rtloader_t *rtloader, cb_get_process_start_ti
AS_TYPE(RtLoader, rtloader)->setGetProcessStartTimeCb(cb);
}

void set_obfuscate_mongodb_string_cb(rtloader_t *rtloader, cb_obfuscate_mongodb_string_t cb)
{
AS_TYPE(RtLoader, rtloader)->setObfuscateMongoDBStringCb(cb);
}

/*
* _util API
*/
Expand Down
16 changes: 16 additions & 0 deletions rtloader/test/datadog_agent/datadog_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern char* readPersistentCache(char*);
extern char* obfuscateSQL(char*, char*, char**);
extern char* obfuscateSQLExecPlan(char*, bool, char**);
extern double getProcessStartTime();
extern char* obfuscateMongoDBString(char*, char**);
static void initDatadogAgentTests(rtloader_t *rtloader) {
Expand All @@ -57,6 +58,7 @@ static void initDatadogAgentTests(rtloader_t *rtloader) {
set_obfuscate_sql_cb(rtloader, obfuscateSQL);
set_obfuscate_sql_exec_plan_cb(rtloader, obfuscateSQLExecPlan);
set_get_process_start_time_cb(rtloader, getProcessStartTime);
set_obfuscate_mongodb_string_cb(rtloader, obfuscateMongoDBString);
}
*/
import "C"
Expand Down Expand Up @@ -327,3 +329,17 @@ var processStartTime = float64(time.Now().Unix())
func getProcessStartTime() float64 {
return processStartTime
}

//export obfuscateMongoDBString
func obfuscateMongoDBString(cmd *C.char, errResult **C.char) *C.char {
switch C.GoString(cmd) {
case "{\"find\": \"customer\"}":
return (*C.char)(helpers.TrackedCString("{\"find\": \"customer\"}"))
case "":
*errResult = (*C.char)(helpers.TrackedCString("Empty MongoDB command"))
return nil
default:
*errResult = (*C.char)(helpers.TrackedCString("unknown test case"))
return nil
}
}
33 changes: 33 additions & 0 deletions rtloader/test/datadog_agent/datadog_agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,3 +652,36 @@ func TestProcessStartTime(t *testing.T) {
// Check for leaks
helpers.AssertMemoryUsage(t)
}

func TestObfuscateMongoDBString(t *testing.T) {
// Reset memory counters
helpers.ResetMemoryStats()

cases := []struct {
args string
expected string
}{
{
"'{\"find\": \"customer\"}'",
"{\"find\": \"customer\"}",
},
}

for _, testCase := range cases {
code := fmt.Sprintf(`
result = datadog_agent.obfuscate_mongodb_string(%s)
with open(r'%s', 'w') as f:
f.write(str(result))
`, testCase.args, tmpfile.Name())
out, err := run(code)
if err != nil {
t.Fatal(err)
}
if out != testCase.expected {
t.Fatalf("args: (%s) expected: '%s', found: '%s'", testCase.args, testCase.expected, out)
}
}

// Check for leaks
helpers.AssertMemoryUsage(t)
}
5 changes: 5 additions & 0 deletions rtloader/three/three.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,11 @@ void Three::setGetProcessStartTimeCb(cb_get_process_start_time_t cb)
_set_get_process_start_time_cb(cb);
}

void Three::setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t cb)
{
_set_obfuscate_mongodb_string_cb(cb);
}

// Python Helpers

// get_integration_list return a list of every datadog's wheels installed.
Expand Down
1 change: 1 addition & 0 deletions rtloader/three/three.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class Three : public RtLoader
void setObfuscateSqlCb(cb_obfuscate_sql_t);
void setObfuscateSqlExecPlanCb(cb_obfuscate_sql_exec_plan_t);
void setGetProcessStartTimeCb(cb_get_process_start_time_t);
void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t);

void initPymemStats();
void getPymemStats(pymem_stats_t &);
Expand Down
5 changes: 5 additions & 0 deletions rtloader/two/two.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,11 @@ void Two::setGetProcessStartTimeCb(cb_get_process_start_time_t cb)
_set_get_process_start_time_cb(cb);
}

void Two::setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t cb)
{
_set_obfuscate_mongodb_string_cb(cb);
}

// Python Helpers

// get_integration_list return a list of every datadog's wheels installed.
Expand Down
1 change: 1 addition & 0 deletions rtloader/two/two.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class Two : public RtLoader
void setObfuscateSqlCb(cb_obfuscate_sql_t);
void setObfuscateSqlExecPlanCb(cb_obfuscate_sql_exec_plan_t);
void setGetProcessStartTimeCb(cb_get_process_start_time_t);
void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t);

// _util API
virtual void setSubprocessOutputCb(cb_get_subprocess_output_t);
Expand Down

0 comments on commit 0d05c69

Please sign in to comment.