From d70871154ea3821cf740551176e4dd5531e0572a Mon Sep 17 00:00:00 2001 From: dongzhancai1 Date: Wed, 8 Jan 2025 19:11:50 +0800 Subject: [PATCH 1/2] fix(json_utils): fix function find_json_objects --- dbgpt/util/json_utils.py | 16 ++++---- dbgpt/util/tests/test_json_utils.py | 63 +++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 dbgpt/util/tests/test_json_utils.py diff --git a/dbgpt/util/json_utils.py b/dbgpt/util/json_utils.py index f5e41f431..4ba081c1b 100644 --- a/dbgpt/util/json_utils.py +++ b/dbgpt/util/json_utils.py @@ -48,6 +48,7 @@ def find_json_objects(text): escape_character = False stack = [] start_index = -1 + modified_text = list(text) # Convert text to a list for easy modification for i, char in enumerate(text): # Handle escape characters @@ -59,12 +60,12 @@ def find_json_objects(text): if char == '"' and not escape_character: inside_string = not inside_string - if not inside_string and char == "\n": - continue - if inside_string and char == "\n": - char = "\\n" - if inside_string and char == "\t": - char = "\\t" + # Replace newline and tab characters inside strings + if inside_string: + if char == "\n": + modified_text[i] = "\\n" + elif char == "\t": + modified_text[i] = "\\t" # Handle opening brackets if char in "{[" and not inside_string: @@ -78,7 +79,8 @@ def find_json_objects(text): if not stack: end_index = i + 1 try: - json_obj = json.loads(text[start_index:end_index]) + json_str = "".join(modified_text[start_index:end_index]) + json_obj = json.loads(json_str) json_objects.append(json_obj) except json.JSONDecodeError: pass diff --git a/dbgpt/util/tests/test_json_utils.py b/dbgpt/util/tests/test_json_utils.py new file mode 100644 index 000000000..ea1252681 --- /dev/null +++ b/dbgpt/util/tests/test_json_utils.py @@ -0,0 +1,63 @@ +import pytest + +from dbgpt.util.json_utils import find_json_objects + +# 定义参数化测试数据 +test_data = [ + ( + """ + ```json + + { + "serial_number": "1", + "agent": "CodeOptimizer", + "content": "```json +select * +from table +where column = 'value' +``` optimize the code above.", + "rely": "" + } + ``` + """, + [ + { + "serial_number": "1", + "agent": "CodeOptimizer", + "content": "```json\nselect * \nfrom table\nwhere column = 'value'\n``` optimize the code above.", + "rely": "", + } + ], + "Test case with nested code block", + ), + ( + """ + { + "key": "value" + } + """, + [{"key": "value"}], + "Test case with simple JSON", + ), + ( + """ + { + "key1": "value1" + } + { + "key2": "value2" + } + """, + [{"key1": "value1"}, {"key2": "value2"}], + "Test case with multiple JSON objects", + ), + ("", [], "Test case with empty input"), + ("This is not a JSON string", [], "Test case with non-JSON input"), +] + +@pytest.mark.parametrize("text, expected, description", test_data) +def test_find_json_objects(text, expected, description): + result = find_json_objects(text) + assert ( + result == expected + ), f"Test failed: {description}\nExpected: {expected}\nGot: {result}" From 26eaa661c15a811db35f14705f66c35aec86318d Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Thu, 9 Jan 2025 18:54:16 +0800 Subject: [PATCH 2/2] chore:fmt --- dbgpt/util/tests/test_json_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dbgpt/util/tests/test_json_utils.py b/dbgpt/util/tests/test_json_utils.py index ea1252681..414290a62 100644 --- a/dbgpt/util/tests/test_json_utils.py +++ b/dbgpt/util/tests/test_json_utils.py @@ -55,6 +55,7 @@ ("This is not a JSON string", [], "Test case with non-JSON input"), ] + @pytest.mark.parametrize("text, expected, description", test_data) def test_find_json_objects(text, expected, description): result = find_json_objects(text)