From 29b5b8ddd9272e5eb97778ba0a6a8b38b46326c7 Mon Sep 17 00:00:00 2001 From: Richard Edgar Date: Wed, 20 Mar 2024 17:44:46 -0400 Subject: [PATCH] Add Llama to model test matrix (#703) Working to get Llama models into the test matrix. --------- Co-authored-by: Harsha-Nori --- .github/workflows/unit_tests.yml | 5 ++++- tests/conftest.py | 15 +++++++++++---- tests/library/test_gen.py | 12 +++--------- tests/utils.py | 14 +++++++++++++- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 2c7f0c063..14018c7e1 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -14,7 +14,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - model: ["gpt2cpu", "phi2cpu"] + model: ["gpt2cpu", "phi2cpu", "hfllama7b"] runs-on: ${{ matrix.os }} steps: @@ -30,6 +30,9 @@ jobs: pip install pytest pip install -e .[test] if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Install model-specific dependencies + run: | + pip install llama-cpp-python - name: Run tests (except server) run: | pytest --cov=guidance --cov-report=xml --cov-report=term-missing --selected_model ${{ matrix.model }} -m "not server" ./tests/ diff --git a/tests/conftest.py b/tests/conftest.py index 56c7beee9..ef949db35 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,10 @@ "phi2cpu": dict( name="transformers:microsoft/phi-2", kwargs={"trust_remote_code": True} ), + "hfllama7b": dict( + name="huggingface_hubllama:TheBloke/Llama-2-7B-GGUF:llama-2-7b.Q5_K_M.gguf", + kwargs={"verbose": True}, + ), "gpt2gpu": dict(name="transformers:gpt2", kwargs={"device_map": "cuda:0"}), "phi2gpu": dict( name="transformers:microsoft/phi-2", @@ -28,7 +32,12 @@ def pytest_addoption(parser): @pytest.fixture(scope="session") -def selected_model(pytestconfig) -> models.Model: +def selected_model_name(pytestconfig) -> str: + return pytestconfig.getoption("selected_model") + + +@pytest.fixture(scope="session") +def selected_model(selected_model_name: str) -> models.Model: """Get a concrete model for tests This fixture is for tests which are supposed @@ -41,9 +50,7 @@ def selected_model(pytestconfig) -> models.Model: controlled by the '--selected_model' command line argument to pytest. """ - model_key = pytestconfig.getoption("selected_model") - - model_info = AVAILABLE_MODELS[model_key] + model_info = AVAILABLE_MODELS[selected_model_name] model = get_model(model_info["name"], **(model_info["kwargs"])) return model diff --git a/tests/library/test_gen.py b/tests/library/test_gen.py index 09032d216..fdd719291 100644 --- a/tests/library/test_gen.py +++ b/tests/library/test_gen.py @@ -52,10 +52,6 @@ def test_unicode(selected_model): lm + f'Step {i}:' + gen('steps', list_append=True, stop=['\nStep', '\n\n', '\nAnswer'], temperature=0.7, max_tokens=20) + '\n' def test_unicode2(selected_model): - # Does not work with Phi2 - model_type = type(selected_model.engine.model_obj).__name__ - if model_type == "PhiForCausalLM": - pytest.xfail("See https://github.com/guidance-ai/guidance/issues/681") lm = selected_model prompt = 'Janet’s ducks lay 16 eggs per day' lm += prompt + gen(max_tokens=10) @@ -163,11 +159,9 @@ def test_various_regexes(selected_model: models.Model, prompt: str, pattern: str # note we can't just test any regex pattern like this, we need them to have finished in less than 40 tokens assert re.match(pattern, lm2["test"], re.DOTALL) is not None -def test_long_prompt(selected_model): - # Does not work with Phi2 - model_type = type(selected_model.engine.model_obj).__name__ - if model_type == "PhiForCausalLM": - pytest.xfail("See https://github.com/guidance-ai/guidance/issues/681") +def test_long_prompt(selected_model, selected_model_name): + if selected_model_name == "hfllama7b": + pytest.xfail("Insufficient context window in model") lm = selected_model prompt = '''Question: Legoland has 5 kangaroos for each koala. If Legoland has 180 kangaroos, how many koalas and kangaroos are there altogether? Let's think step by step, and then write the answer: diff --git a/tests/utils.py b/tests/utils.py index cde04e1c5..c01349647 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,9 @@ -import json import os from typing import Any +from huggingface_hub import hf_hub_download + import pytest import guidance @@ -18,6 +19,11 @@ def get_model(model_name, caching=False, **kwargs): return get_transformers_model(model_name[13:], caching, **kwargs) elif model_name.startswith("llama_cpp:"): return get_llama_cpp_model(model_name[10:], caching, **kwargs) + elif model_name.startswith("huggingface_hubllama"): + name_parts = model_name.split(":") + return get_llama_hugging_face_model( + repo_id=name_parts[1], filename=name_parts[2], **kwargs + ) def get_openai_model(model_name, caching=False, **kwargs): @@ -35,6 +41,12 @@ def get_openai_model(model_name, caching=False, **kwargs): return lm +def get_llama_hugging_face_model(repo_id: str, filename: str, **kwargs): + downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename) + lm = guidance.models.LlamaCpp(downloaded_file, **kwargs) + return lm + + transformers_model_cache = {}