Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
AlpinDale committed Oct 4, 2024
1 parent 5cc9b99 commit f4f3556
Show file tree
Hide file tree
Showing 11 changed files with 588 additions and 56 deletions.
Empty file added tests/engine/__init__.py
Empty file.
24 changes: 24 additions & 0 deletions tests/engine/test_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

from aphrodite.common.utils import FlexibleArgumentParser
from aphrodite.engine.args_tools import EngineArgs


@pytest.mark.parametrize(("arg", "expected"), [
(None, None),
("image=16", {
"image": 16
}),
("image=16,video=2", {
"image": 16,
"video": 2
}),
])
def test_limit_mm_per_prompt_parser(arg, expected):
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
if arg is None:
args = parser.parse_args([])
else:
args = parser.parse_args(["--limit-mm-per-prompt", arg])

assert args.limit_mm_per_prompt == expected
34 changes: 34 additions & 0 deletions tests/engine/test_computed_prefix_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from aphrodite.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.sampling_params import SamplingParams


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
@pytest.mark.parametrize("block_size", [16])
def test_computed_prefix_blocks(model: str, block_size: int):
# This test checks if we are able to run the engine to completion
# without triggering asserts.
# We are in a scenario where all blocks from the second request's prompt
# are full and already computed when the second request arrives.
prompt = (
"You are a helpful assistant. How do I build a car from cardboard and "
"paper clips? Is there an easy to follow video tutorial available "
"online for free?")
prompt2 = (
" Please recommend to me some resources where I can learn not only to "
"handle technical difficulties of building a car, but also "
"decoration.")

engine_args = EngineArgs(model=model,
block_size=block_size,
enable_prefix_caching=True)

engine = LLMEngine.from_engine_args(engine_args)
sampling_params = SamplingParams()

engine.add_request("0", prompt + prompt2, sampling_params)
engine.step()
engine.add_request("1", prompt, sampling_params)
engine.step()
34 changes: 34 additions & 0 deletions tests/engine/test_computed_prefix_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from aphrodite.common.sampling_params import SamplingParams
from aphrodite.engine.aphrodite_engine import AphroditeEngine
from aphrodite.engine.args_tools import EngineArgs


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
@pytest.mark.parametrize("block_size", [16])
def test_computed_prefix_blocks(model: str, block_size: int):
# This test checks if we are able to run the engine to completion
# without triggering asserts.
# We are in a scenario where all blocks from the second request's prompt
# are full and already computed when the second request arrives.
prompt = (
"You are a helpful assistant. How do I build a car from cardboard and "
"paper clips? Is there an easy to follow video tutorial available "
"online for free?")
prompt2 = (
" Please recommend to me some resources where I can learn not only to "
"handle technical difficulties of building a car, but also "
"decoration.")

engine_args = EngineArgs(model=model,
block_size=block_size,
enable_prefix_caching=True)

engine = AphroditeEngine.from_engine_args(engine_args)
sampling_params = SamplingParams()

engine.add_request("0", prompt + prompt2, sampling_params)
engine.step()
engine.add_request("1", prompt, sampling_params)
engine.step()
90 changes: 90 additions & 0 deletions tests/engine/test_custom_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import asyncio
import os

import pytest

from aphrodite.common.sampling_params import SamplingParams
from aphrodite.engine.args_tools import AsyncEngineArgs, EngineArgs
from aphrodite.engine.async_aphrodite import AphroditeEngine, AsyncAphrodite
from aphrodite.executor.gpu_executor import GPUExecutor, GPUExecutorAsync


class Mock:
...


class CustomGPUExecutor(GPUExecutor):

def execute_model(self, *args, **kwargs):
# Drop marker to show that this was ran
with open(".marker", "w"):
...
return super().execute_model(*args, **kwargs)


class CustomGPUExecutorAsync(GPUExecutorAsync):

async def execute_model_async(self, *args, **kwargs):
with open(".marker", "w"):
...
return await super().execute_model_async(*args, **kwargs)


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
def test_custom_executor_type_checking(model):
with pytest.raises(ValueError):
engine_args = EngineArgs(model=model,
distributed_executor_backend=Mock)
AphroditeEngine.from_engine_args(engine_args)
with pytest.raises(ValueError):
engine_args = AsyncEngineArgs(model=model,
distributed_executor_backend=Mock)
AsyncAphrodite.from_engine_args(engine_args)
with pytest.raises(TypeError):
engine_args = AsyncEngineArgs(
model=model, distributed_executor_backend=CustomGPUExecutor)
AsyncAphrodite.from_engine_args(engine_args)


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
def test_custom_executor(model, tmpdir):
cwd = os.path.abspath(".")
os.chdir(tmpdir)
try:
assert not os.path.exists(".marker")

engine_args = EngineArgs(
model=model, distributed_executor_backend=CustomGPUExecutor)
engine = AphroditeEngine.from_engine_args(engine_args)
sampling_params = SamplingParams(max_tokens=1)

engine.add_request("0", "foo", sampling_params)
engine.step()

assert os.path.exists(".marker")
finally:
os.chdir(cwd)


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
def test_custom_executor_async(model, tmpdir):
cwd = os.path.abspath(".")
os.chdir(tmpdir)
try:
assert not os.path.exists(".marker")

engine_args = AsyncEngineArgs(
model=model, distributed_executor_backend=CustomGPUExecutorAsync)
engine = AsyncAphrodite.from_engine_args(engine_args)
sampling_params = SamplingParams(max_tokens=1)

async def t():
stream = await engine.add_request("0", "foo", sampling_params)
async for x in stream:
...

asyncio.run(t())

assert os.path.exists(".marker")
finally:
os.chdir(cwd)
32 changes: 32 additions & 0 deletions tests/engine/test_detokenization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest

from aphrodite.common.sampling_params import SamplingParams
from aphrodite.endpoints.llm import LLM


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
def test_computed_prefix_blocks(model: str):
# This test checks if the engine generates completions both with and
# without optional detokenization, that detokenization includes text
# and no-detokenization doesn't, and that both completions have the same
# token_ids.
prompt = (
"You are a helpful assistant. How do I build a car from cardboard and "
"paper clips? Is there an easy to follow video tutorial available "
"online for free?")

llm = LLM(model=model)
sampling_params = SamplingParams(max_tokens=10,
temperature=0.0,
detokenize=False)

outputs_no_detokenization = llm.generate(prompt,
sampling_params)[0].outputs[0]
sampling_params.detokenize = True
outputs_with_detokenization = llm.generate(prompt,
sampling_params)[0].outputs[0]

assert outputs_no_detokenization.text == ''
assert outputs_with_detokenization.text != ''
assert outputs_no_detokenization.token_ids == \
outputs_with_detokenization.token_ids
56 changes: 0 additions & 56 deletions tests/engine/test_detokenize.py

This file was deleted.

Loading

0 comments on commit f4f3556

Please sign in to comment.