Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(json_utils): fix function find_json_objects #2289

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions dbgpt/util/json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def find_json_objects(text):
escape_character = False
stack = []
start_index = -1
modified_text = list(text) # Convert text to a list for easy modification

for i, char in enumerate(text):
# Handle escape characters
Expand All @@ -59,12 +60,12 @@ def find_json_objects(text):
if char == '"' and not escape_character:
inside_string = not inside_string

if not inside_string and char == "\n":
continue
if inside_string and char == "\n":
char = "\\n"
if inside_string and char == "\t":
char = "\\t"
# Replace newline and tab characters inside strings
if inside_string:
if char == "\n":
modified_text[i] = "\\n"
elif char == "\t":
modified_text[i] = "\\t"

# Handle opening brackets
if char in "{[" and not inside_string:
Expand All @@ -78,7 +79,8 @@ def find_json_objects(text):
if not stack:
end_index = i + 1
try:
json_obj = json.loads(text[start_index:end_index])
json_str = "".join(modified_text[start_index:end_index])
json_obj = json.loads(json_str)
json_objects.append(json_obj)
except json.JSONDecodeError:
pass
Expand Down
64 changes: 64 additions & 0 deletions dbgpt/util/tests/test_json_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest

from dbgpt.util.json_utils import find_json_objects

# 定义参数化测试数据
test_data = [
(
"""
```json

{
"serial_number": "1",
"agent": "CodeOptimizer",
"content": "```json
select *
from table
where column = 'value'
``` optimize the code above.",
"rely": ""
}
```
""",
[
{
"serial_number": "1",
"agent": "CodeOptimizer",
"content": "```json\nselect * \nfrom table\nwhere column = 'value'\n``` optimize the code above.",
"rely": "",
}
],
"Test case with nested code block",
),
(
"""
{
"key": "value"
}
""",
[{"key": "value"}],
"Test case with simple JSON",
),
(
"""
{
"key1": "value1"
}
{
"key2": "value2"
}
""",
[{"key1": "value1"}, {"key2": "value2"}],
"Test case with multiple JSON objects",
),
("", [], "Test case with empty input"),
("This is not a JSON string", [], "Test case with non-JSON input"),
]


@pytest.mark.parametrize("text, expected, description", test_data)
def test_find_json_objects(text, expected, description):
result = find_json_objects(text)
assert (
result == expected
), f"Test failed: {description}\nExpected: {expected}\nGot: {result}"
Loading