Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for error codes with local API server #3131

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gpt4all-chat/.flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
[flake8]
exclude = .*,__pycache__
max-line-length = 120
extend-ignore = B001,C408,D,DAR,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S404,S406,S410,S603,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS304,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS332,WPS336,WPS337,WPS347,WPS360,WPS361,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615
extend-ignore = B001,C408,D,DAR,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S404,S406,S410,S603,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS304,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS332,WPS336,WPS337,WPS347,WPS360,WPS361,WPS407,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS520,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615
17 changes: 15 additions & 2 deletions gpt4all-chat/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,26 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(googletest)

# Llama-3.2-1B model
set(TEST_MODEL "Llama-3.2-1B-Instruct-Q4_0.gguf")
set(TEST_MODEL_MD5 "48ff0243978606fdba19d899b77802fc")
set(TEST_MODEL_PATH "${CMAKE_BINARY_DIR}/resources/${TEST_MODEL}")
set(TEST_MODEL_URL "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/${TEST_MODEL}")
message(STATUS "Downloading test model from ${TEST_MODEL_URL} ...")
file(DOWNLOAD
"${TEST_MODEL_URL}"
"${TEST_MODEL_PATH}"
EXPECTED_HASH "MD5=${TEST_MODEL_MD5}"
)
Comment on lines +18 to +22
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to be downloaded by default for anyone building GPT4All with Python installed, at configure time. If we expect this to be shared by the C++ and Python tests, I would prefer if it was downloaded as the first step of the check target, before it runs ctest.

message(STATUS "Test model downloaded to ${TEST_MODEL_PATH}")

configure_file(python/config.py.in "${CMAKE_CURRENT_SOURCE_DIR}/python/config.py")

add_test(NAME ChatPythonTests
COMMAND ${Python3_EXECUTABLE} -m pytest --color=yes "${CMAKE_CURRENT_SOURCE_DIR}/python"
COMMAND ${Python3_EXECUTABLE} -m pytest -s --color=yes "${CMAKE_CURRENT_SOURCE_DIR}/python"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can imagine wanting to add -s via the PYTEST_ADDOPTS env var at the console if you are trying to debug a test that e.g. succeeds when it should fail, but why should the default be to disable capturing of output? This is going to make successful test runs quite verbose.

)
set_tests_properties(ChatPythonTests PROPERTIES
ENVIRONMENT "CHAT_EXECUTABLE=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/chat"
ENVIRONMENT "CHAT_EXECUTABLE=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/chat;TEST_MODEL_PATH=${TEST_MODEL_PATH}"
TIMEOUT 60
)

Expand Down
212 changes: 186 additions & 26 deletions gpt4all-chat/tests/python/test_server_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import shutil
import signal
import subprocess
import sys
import tempfile
import textwrap
from pathlib import Path
from requests.exceptions import HTTPError
from subprocess import CalledProcessError
from typing import Any, Iterator

Expand All @@ -23,50 +25,81 @@ def __init__(self) -> None:
def get(self, path: str, *, wait: bool = False) -> Any:
return self._request('GET', path, wait)

def _request(self, method: str, path: str, wait: bool) -> Any:
def post(self, path: str, data: dict[str, Any] | None, *, wait: bool = False) -> Any:
return self._request('POST', path, wait, data)

def _request(self, method: str, path: str, wait: bool, data: dict[str, Any] | None = None) -> Any:
if wait:
retry = Retry(total=None, connect=10, read=False, status=0, other=0, backoff_factor=.01)
else:
retry = Retry(total=False)
self.http_adapter.max_retries = retry # type: ignore[attr-defined]

resp = self.session.request(method, f'http://localhost:4891/v1/{path}')
resp = self.session.request(method, f'http://localhost:4891/v1/{path}', json=data)
resp.raise_for_status()
return resp.json()


request = Requestor()


def create_chat_server_config(tmpdir: Path, model_copied: bool = False) -> dict[str, str]:
xdg_confdir = tmpdir / 'config'
app_confdir = xdg_confdir / 'nomic.ai'
app_confdir.mkdir(parents=True)
with open(app_confdir / 'GPT4All.ini', 'w') as conf:
conf.write(textwrap.dedent(f"""\
[General]
serverChat=true

[download]
lastVersionStarted={config.APP_VERSION}

[network]
isActive=false
usageStatsActive=false
"""))

if model_copied:
app_data_dir = tmpdir / 'share' / 'nomic.ai' / 'GPT4All'
app_data_dir.mkdir(parents=True)
local_file_path_env = os.getenv('TEST_MODEL_PATH')
if local_file_path_env:
local_file_path = Path(local_file_path_env)
if local_file_path.exists():
shutil.copy(local_file_path, app_data_dir / local_file_path.name)
else:
pytest.fail(f'Model file specified in TEST_MODEL_PATH does not exist: {local_file_path}')
else:
pytest.fail('Environment variable TEST_MODEL_PATH is not set')
Comment on lines +66 to +74
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to ask for forgiveness than for permission (in Python, at least). This could be simply:

Suggested change
local_file_path_env = os.getenv('TEST_MODEL_PATH')
if local_file_path_env:
local_file_path = Path(local_file_path_env)
if local_file_path.exists():
shutil.copy(local_file_path, app_data_dir / local_file_path.name)
else:
pytest.fail(f'Model file specified in TEST_MODEL_PATH does not exist: {local_file_path}')
else:
pytest.fail('Environment variable TEST_MODEL_PATH is not set')
shutil.copy(os.environ['TEST_MODEL_PATH'], app_data_dir / local_file_path.name)

And an informative KeyError or FileNotFoundError will be raised if there is a problem. This also clarifies that this is not something being tested (pass/fail), but an expectation about the environment the test itself runs in.


return dict(
os.environ,
XDG_CACHE_HOME=str(tmpdir / 'cache'),
XDG_DATA_HOME=str(tmpdir / 'share'),
XDG_CONFIG_HOME=str(xdg_confdir),
APPIMAGE=str(tmpdir), # hack to bypass SingleApplication
)


@pytest.fixture
def chat_server_config() -> Iterator[dict[str, str]]:
if os.name != 'posix' or sys.platform == 'darwin':
pytest.skip('Need non-Apple Unix to use alternate config path')

with tempfile.TemporaryDirectory(prefix='gpt4all-test') as td:
tmpdir = Path(td)
xdg_confdir = tmpdir / 'config'
app_confdir = xdg_confdir / 'nomic.ai'
app_confdir.mkdir(parents=True)
with open(app_confdir / 'GPT4All.ini', 'w') as conf:
conf.write(textwrap.dedent(f"""\
[General]
serverChat=true

[download]
lastVersionStarted={config.APP_VERSION}

[network]
isActive=false
usageStatsActive=false
"""))
yield dict(
os.environ,
XDG_CACHE_HOME=str(tmpdir / 'cache'),
XDG_DATA_HOME=str(tmpdir / 'share'),
XDG_CONFIG_HOME=str(xdg_confdir),
APPIMAGE=str(tmpdir), # hack to bypass SingleApplication
)
yield create_chat_server_config(tmpdir, model_copied=False)


@pytest.fixture
def chat_server_with_model_config() -> Iterator[dict[str, str]]:
if os.name != 'posix' or sys.platform == 'darwin':
pytest.skip('Need non-Apple Unix to use alternate config path')

with tempfile.TemporaryDirectory(prefix='gpt4all-test') as td:
tmpdir = Path(td)
yield create_chat_server_config(tmpdir, model_copied=True)


@pytest.fixture
Expand All @@ -83,5 +116,132 @@ def chat_server(chat_server_config: dict[str, str]) -> Iterator[None]:
raise CalledProcessError(retcode, process.args)


def test_list_models_empty(chat_server: None) -> None:
assert request.get('models', wait=True) == {'object': 'list', 'data': []}
@pytest.fixture
def chat_server_with_model(chat_server_with_model_config: dict[str, str]) -> Iterator[None]:
chat_executable = Path(os.environ['CHAT_EXECUTABLE']).absolute()
with subprocess.Popen(chat_executable, env=chat_server_with_model_config) as process:
try:
yield
except:
process.kill()
raise
process.send_signal(signal.SIGINT)
if retcode := process.wait():
raise CalledProcessError(retcode, process.args)
Comment on lines +119 to +130
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need for the code duplication between chat_server_config/chat_server_with_model_config and chat_server/chat_server_with_model.

There only really need to be two fixtures: chat_server and chat_server_with_model. They can each be a single line function that calls some common function like def prepare_chat_server(with_model: bool = False). You can always return a generator, or yield from if it's called inside of a with block.



def test_with_models_empty(chat_server: None) -> None:
# non-sense endpoint
with pytest.raises(HTTPError) as excinfo:
request.get('foobarbaz', wait=True)
assert excinfo.value.response.status_code == 404
Comment on lines +135 to +137
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For checks like this I would prefer to pass an argument like raise=False that disables the raise_for_status call, and returns a resp.status_code, resp.json() tuple (typed as tuple[int, Any]).

Also, it wouldn't hurt to test the response body, even for non-200 responses.


# empty model list
response = request.get('models', wait=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait=True was intended for only the first request in a test, as the server is still starting. Subsequent requests should not need to retry.

assert response == {'object': 'list', 'data': []}

# empty model info
response = request.get('models/foo', wait=True)
assert response == {}

# POST for model list
with pytest.raises(HTTPError) as excinfo:
response = request.post('models', data=None, wait=True)
assert excinfo.value.response.status_code == 405

# POST for model info
with pytest.raises(HTTPError) as excinfo:
response = request.post('models/foo', data=None, wait=True)
assert excinfo.value.response.status_code == 405

# GET for completions
with pytest.raises(HTTPError) as excinfo:
response = request.get('completions', wait=True)
assert excinfo.value.response.status_code == 405

# GET for chat completions
with pytest.raises(HTTPError) as excinfo:
response = request.get('chat/completions', wait=True)
assert excinfo.value.response.status_code == 405


EXPECTED_MODEL_INFO = {
'created': 0,
'id': 'Llama 3.2 1B Instruct',
'object': 'model',
'owned_by': 'humanity',
'parent': None,
'permissions': [
{
'allow_create_engine': False,
'allow_fine_tuning': False,
'allow_logprobs': False,
'allow_sampling': False,
'allow_search_indices': False,
'allow_view': True,
'created': 0,
'group': None,
'id': 'placeholder',
'is_blocking': False,
'object': 'model_permission',
'organization': '*',
},
],
'root': 'Llama 3.2 1B Instruct',
}

EXPECTED_COMPLETIONS_RESPONSE = {
'choices': [
{
'finish_reason': 'stop',
'index': 0,
'logprobs': None,
'references': None,
'text': ' jumps over the lazy dog.',
},
],
'id': 'placeholder',
'model': 'Llama 3.2 1B Instruct',
'object': 'text_completion',
'usage': {
'completion_tokens': 6,
'prompt_tokens': 5,
'total_tokens': 11,
},
}


def test_with_models(chat_server_with_model: None) -> None:
response = request.get('models', wait=True)
assert response == {
'data': [EXPECTED_MODEL_INFO],
'object': 'list',
}

# Test the specific model endpoint
response = request.get('models/Llama 3.2 1B Instruct', wait=True)
assert response == EXPECTED_MODEL_INFO

# Test the completions endpoint
with pytest.raises(HTTPError) as excinfo:
request.post('completions', data=None, wait=True)
assert excinfo.value.response.status_code == 400

data = {
'model': 'Llama 3.2 1B Instruct',
'prompt': 'The quick brown fox',
'temperature': 0,
}

response = request.post('completions', data=data, wait=True)
assert 'choices' in response
assert response['choices'][0]['text'] == ' jumps over the lazy dog.'
Comment on lines +237 to +238
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can be more strict here, at least by checking the number of choices and the keys in a choice:

Suggested change
assert 'choices' in response
assert response['choices'][0]['text'] == ' jumps over the lazy dog.'
assert len(response['choices']) == 1
assert response['choices'][0].keys() == {'text', 'index', 'logprobs', 'finish_reason'}
assert response['choices'][0]['text'] == ' jumps over the lazy dog.'

assert 'created' in response
response.pop('created') # Remove the dynamic field for comparison
assert response == EXPECTED_COMPLETIONS_RESPONSE

data['temperature'] = 0.5

pytest.xfail('This causes an assertion failure in the app. See https://github.com/nomic-ai/gpt4all/issues/3133')
with pytest.raises(HTTPError) as excinfo:
response = request.post('completions', data=data, wait=True)
Comment on lines +245 to +247
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this asserts that the entire test fails, not just this part. Checks that are expected to fail should be split into their own test functions.