diff --git a/pkg/collector/python/datadog_agent.go b/pkg/collector/python/datadog_agent.go index 4ef8aef8725bb..d157ab904002a 100644 --- a/pkg/collector/python/datadog_agent.go +++ b/pkg/collector/python/datadog_agent.go @@ -227,6 +227,9 @@ func lazyInitObfuscator() *obfuscate.Obfuscator { if !cfg.SQLExecPlanNormalize.Enabled { cfg.SQLExecPlanNormalize = defaultSQLPlanNormalizeSettings } + if !cfg.Mongo.Enabled { + cfg.Mongo = defaultMongoObfuscateSettings + } obfuscator = obfuscate.NewObfuscator(cfg) }) return obfuscator @@ -526,7 +529,50 @@ var defaultSQLPlanObfuscateSettings = obfuscate.JSONConfig{ ObfuscateSQLValues: defaultSQLPlanNormalizeSettings.ObfuscateSQLValues, } +// defaultMongoObfuscateSettings are the default JSON obfuscator settings for obfuscating mongodb commands +var defaultMongoObfuscateSettings = obfuscate.JSONConfig{ + Enabled: true, + KeepValues: []string{ + "find", + "sort", + "projection", + "skip", + "batchSize", + "$db", + "getMore", + "collection", + "delete", + "findAndModify", + "insert", + "ordered", + "update", + "aggregate", + "comment", + }, +} + //export getProcessStartTime func getProcessStartTime() float64 { return float64(config.StartTime.Unix()) } + +// ObfuscateMongoDBString obfuscates the MongoDB query +// +//export ObfuscateMongoDBString +func ObfuscateMongoDBString(cmd *C.char, errResult **C.char) *C.char { + if C.GoString(cmd) == "" { + // memory will be freed by caller + *errResult = TrackedCString("Empty MongoDB command") + return nil + } + obfuscatedMongoDBString := lazyInitObfuscator().ObfuscateMongoDBString( + C.GoString(cmd), + ) + if obfuscatedMongoDBString == "" { + // memory will be freed by caller + *errResult = TrackedCString("Failed to obfuscate MongoDB command") + return nil + } + // memory will be freed by caller + return TrackedCString(obfuscatedMongoDBString) +} diff --git a/pkg/collector/python/init.go b/pkg/collector/python/init.go index 3e05a2e610104..6c74bd61bb95a 100644 --- a/pkg/collector/python/init.go +++ b/pkg/collector/python/init.go @@ -92,6 +92,7 @@ bool TracemallocEnabled(); char* ObfuscateSQL(char *, char *, char **); char* ObfuscateSQLExecPlan(char *, bool, char **); double getProcessStartTime(); +char* ObfuscateMongoDBString(char *, char **); void initDatadogAgentModule(rtloader_t *rtloader) { set_get_clustername_cb(rtloader, GetClusterName); @@ -107,6 +108,7 @@ void initDatadogAgentModule(rtloader_t *rtloader) { set_obfuscate_sql_cb(rtloader, ObfuscateSQL); set_obfuscate_sql_exec_plan_cb(rtloader, ObfuscateSQLExecPlan); set_get_process_start_time_cb(rtloader, getProcessStartTime); + set_obfuscate_mongodb_string_cb(rtloader, ObfuscateMongoDBString); } // diff --git a/releasenotes/notes/mongodb-command-obfuscation-in-python-checks-454afd8726166736.yaml b/releasenotes/notes/mongodb-command-obfuscation-in-python-checks-454afd8726166736.yaml new file mode 100644 index 0000000000000..f70c9709e483e --- /dev/null +++ b/releasenotes/notes/mongodb-command-obfuscation-in-python-checks-454afd8726166736.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +features: + - | + Expose the Agent's MongoDB command string obfuscation to Python checks using the new `datadog_agent.obfuscate_mongodb_string` method. diff --git a/rtloader/common/builtins/datadog_agent.c b/rtloader/common/builtins/datadog_agent.c index f8731d773b7ca..165d962134755 100644 --- a/rtloader/common/builtins/datadog_agent.c +++ b/rtloader/common/builtins/datadog_agent.c @@ -23,6 +23,7 @@ static cb_read_persistent_cache_t cb_read_persistent_cache = NULL; static cb_obfuscate_sql_t cb_obfuscate_sql = NULL; static cb_obfuscate_sql_exec_plan_t cb_obfuscate_sql_exec_plan = NULL; static cb_get_process_start_time_t cb_get_process_start_time = NULL; +static cb_obfuscate_mongodb_string_t cb_obfuscate_mongodb_string = NULL; // forward declarations static PyObject *get_clustername(PyObject *self, PyObject *args); @@ -39,6 +40,7 @@ static PyObject *read_persistent_cache(PyObject *self, PyObject *args); static PyObject *obfuscate_sql(PyObject *self, PyObject *args, PyObject *kwargs); static PyObject *obfuscate_sql_exec_plan(PyObject *self, PyObject *args, PyObject *kwargs); static PyObject *get_process_start_time(PyObject *self, PyObject *args, PyObject *kwargs); +static PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs); static PyMethodDef methods[] = { { "get_clustername", get_clustername, METH_NOARGS, "Get the cluster name." }, @@ -55,6 +57,7 @@ static PyMethodDef methods[] = { { "obfuscate_sql", (PyCFunction)obfuscate_sql, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a SQL string." }, { "obfuscate_sql_exec_plan", (PyCFunction)obfuscate_sql_exec_plan, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a SQL Execution Plan." }, { "get_process_start_time", (PyCFunction)get_process_start_time, METH_NOARGS, "Get agent process startup time, in seconds since the epoch." }, + { "obfuscate_mongodb_string", (PyCFunction)obfuscate_mongodb_string, METH_VARARGS|METH_KEYWORDS, "Obfuscate & normalize a MongoDB command string." }, { NULL, NULL } // guards }; @@ -139,6 +142,11 @@ void _set_get_process_start_time_cb(cb_get_process_start_time_t cb) { cb_get_process_start_time = cb; } +void _set_obfuscate_mongodb_string_cb(cb_obfuscate_mongodb_string_t cb) { + cb_obfuscate_mongodb_string = cb; + +} + /*! \fn PyObject *get_version(PyObject *self, PyObject *args) \brief This function implements the `datadog-agent.get_version` method, collecting @@ -791,3 +799,50 @@ static PyObject *get_process_start_time(PyObject *self, PyObject *args, PyObject return retval; } + +/*! \fn PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs) + \brief This function implements the `datadog_agent.obfuscate_mongodb_string` method, obfuscating + the provided mongodb command string. + \param self A PyObject* pointer to the `datadog_agent` module. + \param args A PyObject* pointer to a tuple containing the key to retrieve. + \param kwargs A PyObject* pointer to a map of key value pairs. + \return A PyObject* pointer to the value. + + This function is callable as the `datadog_agent.obfuscate_mongodb_string` Python method and + uses the `cb_obfuscate_mongodb_string()` callback to retrieve the value from the agent + with CGO. If the callback has not been set `None` will be returned. +*/ +static PyObject *obfuscate_mongodb_string(PyObject *self, PyObject *args, PyObject *kwargs) +{ + // callback must be set + if (cb_obfuscate_mongodb_string == NULL) { + Py_RETURN_NONE; + } + + PyGILState_STATE gstate = PyGILState_Ensure(); + + char *cmd = NULL; + if (!PyArg_ParseTuple(args, "s", &cmd)) { + PyGILState_Release(gstate); + return NULL; + } + + char *obfCmd = NULL; + char *error_message = NULL; + obfCmd = cb_obfuscate_mongodb_string(cmd, &error_message); + + PyObject *retval = NULL; + if (error_message != NULL) { + PyErr_SetString(PyExc_RuntimeError, error_message); + } else if (obfCmd == NULL) { + // no error message and a null response. this should never happen so the go code is misbehaving + PyErr_SetString(PyExc_RuntimeError, "internal error: empty cb_obfuscate_mongodb_string response"); + } else { + retval = PyStringFromCString(obfCmd); + } + + cgo_free(error_message); + cgo_free(obfCmd); + PyGILState_Release(gstate); + return retval; +} diff --git a/rtloader/common/builtins/datadog_agent.h b/rtloader/common/builtins/datadog_agent.h index cfc799ea5323a..b51f1a43f35e6 100644 --- a/rtloader/common/builtins/datadog_agent.h +++ b/rtloader/common/builtins/datadog_agent.h @@ -149,6 +149,7 @@ void _set_read_persistent_cache_cb(cb_read_persistent_cache_t); void _set_obfuscate_sql_cb(cb_obfuscate_sql_t); void _set_obfuscate_sql_exec_plan_cb(cb_obfuscate_sql_exec_plan_t); void _set_get_process_start_time_cb(cb_get_process_start_time_t); +void _set_obfuscate_mongodb_string_cb(cb_obfuscate_mongodb_string_t); PyObject *_public_headers(PyObject *self, PyObject *args, PyObject *kwargs); diff --git a/rtloader/include/datadog_agent_rtloader.h b/rtloader/include/datadog_agent_rtloader.h index b621d1470496a..7887a5ef19935 100644 --- a/rtloader/include/datadog_agent_rtloader.h +++ b/rtloader/include/datadog_agent_rtloader.h @@ -643,6 +643,17 @@ DATADOG_AGENT_RTLOADER_API void init_pymem_stats(rtloader_t *); */ DATADOG_AGENT_RTLOADER_API void get_pymem_stats(rtloader_t *, pymem_stats_t *); +/*! \fn void set_obfuscate_mongodb_string_cb(rtloader_t *, cb_obfuscate_mongodb_string_t) + \brief Sets a callback to be used by rtloader to allow retrieving a value for a given + check instance. + \param rtloader_t A rtloader_t * pointer to the RtLoader instance. + \param object A function pointer with cb_obfuscate_mongodb_string_t prototype to the callback + function. + + The callback is expected to be provided by the rtloader caller - in go-context: CGO. +*/ +DATADOG_AGENT_RTLOADER_API void set_obfuscate_mongodb_string_cb(rtloader_t *, cb_obfuscate_mongodb_string_t); + #ifdef __cplusplus } #endif diff --git a/rtloader/include/rtloader.h b/rtloader/include/rtloader.h index 97567cd8f4ba6..51a9ad6e66b95 100644 --- a/rtloader/include/rtloader.h +++ b/rtloader/include/rtloader.h @@ -469,6 +469,15 @@ class RtLoader { } + //! setObfuscateMongoDBStringCb member. + /*! + \param A cb_obfuscate_mongodb_string_t function pointer to the CGO callback. + + This allows us to set the relevant CGO callback that will allow retrieving value for + specific check instances. + */ + virtual void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t) = 0; + protected: //! _allocateInternalErrorDiagnoses member. /*! diff --git a/rtloader/include/rtloader_types.h b/rtloader/include/rtloader_types.h index 3f51959b596ba..edf72d53cf82c 100644 --- a/rtloader/include/rtloader_types.h +++ b/rtloader/include/rtloader_types.h @@ -137,6 +137,8 @@ typedef char *(*cb_obfuscate_sql_t)(char *, char *, char **); typedef char *(*cb_obfuscate_sql_exec_plan_t)(char *, bool, char **); // () typedef double (*cb_get_process_start_time_t)(void); +// (cmd, error_message) +typedef char *(*cb_obfuscate_mongodb_string_t)(char *, char **); // _util // (argv, env, stdout, stderr, ret_code, exception) diff --git a/rtloader/rtloader/api.cpp b/rtloader/rtloader/api.cpp index d6e3118e34b67..71beaf2e16c16 100644 --- a/rtloader/rtloader/api.cpp +++ b/rtloader/rtloader/api.cpp @@ -572,6 +572,11 @@ void set_get_process_start_time_cb(rtloader_t *rtloader, cb_get_process_start_ti AS_TYPE(RtLoader, rtloader)->setGetProcessStartTimeCb(cb); } +void set_obfuscate_mongodb_string_cb(rtloader_t *rtloader, cb_obfuscate_mongodb_string_t cb) +{ + AS_TYPE(RtLoader, rtloader)->setObfuscateMongoDBStringCb(cb); +} + /* * _util API */ diff --git a/rtloader/test/datadog_agent/datadog_agent.go b/rtloader/test/datadog_agent/datadog_agent.go index 9cad1d1ece6f3..58c7260598f4b 100644 --- a/rtloader/test/datadog_agent/datadog_agent.go +++ b/rtloader/test/datadog_agent/datadog_agent.go @@ -39,6 +39,7 @@ extern char* readPersistentCache(char*); extern char* obfuscateSQL(char*, char*, char**); extern char* obfuscateSQLExecPlan(char*, bool, char**); extern double getProcessStartTime(); +extern char* obfuscateMongoDBString(char*, char**); static void initDatadogAgentTests(rtloader_t *rtloader) { @@ -57,6 +58,7 @@ static void initDatadogAgentTests(rtloader_t *rtloader) { set_obfuscate_sql_cb(rtloader, obfuscateSQL); set_obfuscate_sql_exec_plan_cb(rtloader, obfuscateSQLExecPlan); set_get_process_start_time_cb(rtloader, getProcessStartTime); + set_obfuscate_mongodb_string_cb(rtloader, obfuscateMongoDBString); } */ import "C" @@ -327,3 +329,17 @@ var processStartTime = float64(time.Now().Unix()) func getProcessStartTime() float64 { return processStartTime } + +//export obfuscateMongoDBString +func obfuscateMongoDBString(cmd *C.char, errResult **C.char) *C.char { + switch C.GoString(cmd) { + case "{\"find\": \"customer\"}": + return (*C.char)(helpers.TrackedCString("{\"find\": \"customer\"}")) + case "": + *errResult = (*C.char)(helpers.TrackedCString("Empty MongoDB command")) + return nil + default: + *errResult = (*C.char)(helpers.TrackedCString("unknown test case")) + return nil + } +} diff --git a/rtloader/test/datadog_agent/datadog_agent_test.go b/rtloader/test/datadog_agent/datadog_agent_test.go index 06a00e0617ff7..6cacc39696ef6 100644 --- a/rtloader/test/datadog_agent/datadog_agent_test.go +++ b/rtloader/test/datadog_agent/datadog_agent_test.go @@ -652,3 +652,36 @@ func TestProcessStartTime(t *testing.T) { // Check for leaks helpers.AssertMemoryUsage(t) } + +func TestObfuscateMongoDBString(t *testing.T) { + // Reset memory counters + helpers.ResetMemoryStats() + + cases := []struct { + args string + expected string + }{ + { + "'{\"find\": \"customer\"}'", + "{\"find\": \"customer\"}", + }, + } + + for _, testCase := range cases { + code := fmt.Sprintf(` + result = datadog_agent.obfuscate_mongodb_string(%s) + with open(r'%s', 'w') as f: + f.write(str(result)) + `, testCase.args, tmpfile.Name()) + out, err := run(code) + if err != nil { + t.Fatal(err) + } + if out != testCase.expected { + t.Fatalf("args: (%s) expected: '%s', found: '%s'", testCase.args, testCase.expected, out) + } + } + + // Check for leaks + helpers.AssertMemoryUsage(t) +} diff --git a/rtloader/three/three.cpp b/rtloader/three/three.cpp index 0c5c4650e0ac4..de46ac22d4d2b 100644 --- a/rtloader/three/three.cpp +++ b/rtloader/three/three.cpp @@ -985,6 +985,11 @@ void Three::setGetProcessStartTimeCb(cb_get_process_start_time_t cb) _set_get_process_start_time_cb(cb); } +void Three::setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t cb) +{ + _set_obfuscate_mongodb_string_cb(cb); +} + // Python Helpers // get_integration_list return a list of every datadog's wheels installed. diff --git a/rtloader/three/three.h b/rtloader/three/three.h index 9f344d0513473..4c37747e6ff38 100644 --- a/rtloader/three/three.h +++ b/rtloader/three/three.h @@ -111,6 +111,7 @@ class Three : public RtLoader void setObfuscateSqlCb(cb_obfuscate_sql_t); void setObfuscateSqlExecPlanCb(cb_obfuscate_sql_exec_plan_t); void setGetProcessStartTimeCb(cb_get_process_start_time_t); + void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t); void initPymemStats(); void getPymemStats(pymem_stats_t &); diff --git a/rtloader/two/two.cpp b/rtloader/two/two.cpp index 9f2a55bf4c881..38ab2bb5fdd99 100644 --- a/rtloader/two/two.cpp +++ b/rtloader/two/two.cpp @@ -983,6 +983,11 @@ void Two::setGetProcessStartTimeCb(cb_get_process_start_time_t cb) _set_get_process_start_time_cb(cb); } +void Two::setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t cb) +{ + _set_obfuscate_mongodb_string_cb(cb); +} + // Python Helpers // get_integration_list return a list of every datadog's wheels installed. diff --git a/rtloader/two/two.h b/rtloader/two/two.h index 44b9ec7ab6df2..9896f7b48a71f 100644 --- a/rtloader/two/two.h +++ b/rtloader/two/two.h @@ -109,6 +109,7 @@ class Two : public RtLoader void setObfuscateSqlCb(cb_obfuscate_sql_t); void setObfuscateSqlExecPlanCb(cb_obfuscate_sql_exec_plan_t); void setGetProcessStartTimeCb(cb_get_process_start_time_t); + void setObfuscateMongoDBStringCb(cb_obfuscate_mongodb_string_t); // _util API virtual void setSubprocessOutputCb(cb_get_subprocess_output_t);