diff --git a/core/llm/openai_client.py b/core/llm/openai_client.py index a58f92d66..feddaf581 100644 --- a/core/llm/openai_client.py +++ b/core/llm/openai_client.py @@ -104,13 +104,16 @@ def rate_limit_sleep(self, err: RateLimitError) -> Optional[datetime.timedelta]: match = re.search(time_regex, headers["x-ratelimit-reset-requests"]) if match: - seconds = int(match.group(1)) * 3600 + int(match.group(2)) * 60 + int(match.group(3)) + hours = int(match.group(1)) if match.group(1) else 0 + minutes = int(match.group(2)) if match.group(2) else 0 + seconds = int(match.group(3)) if match.group(3) else 0 + total_seconds = hours * 3600 + minutes * 60 + seconds else: # Not sure how this would happen, we would have to get a RateLimitError, # but nothing (or invalid entry) in the `reset` field. Using a sane default. - seconds = 5 + total_seconds = 5 - return datetime.timedelta(seconds=seconds) + return datetime.timedelta(seconds=total_seconds) __all__ = ["OpenAIClient"] diff --git a/tests/llm/test_openai.py b/tests/llm/test_openai.py index 50f4381c4..c57e54146 100644 --- a/tests/llm/test_openai.py +++ b/tests/llm/test_openai.py @@ -115,3 +115,26 @@ async def test_openai_parser_fails(mock_AsyncOpenAI): assert response is None assert req_log.status == "error" + + +@pytest.mark.parametrize( + ("remaining_tokens", "reset_tokens", "reset_requests", "expected"), + [ + (0, "1h1m1s", "", 3661), + (0, "1h1s", "", 3601), + (0, "1m", "", 60), + (0, "", "1h1m1s", 0), + (1, "", "1h1m1s", 3661), + ], +) +@patch("core.llm.openai_client.AsyncOpenAI") +def test_openai_rate_limit_parser(mock_AsyncOpenAI, remaining_tokens, reset_tokens, reset_requests, expected): + headers = { + "x-ratelimit-remaining-tokens": remaining_tokens, + "x-ratelimit-reset-tokens": reset_tokens, + "x-ratelimit-reset-requests": reset_requests, + } + err = MagicMock(response=MagicMock(headers=headers)) + + llm = OpenAIClient(LLMConfig(model="gpt-4")) + assert int(llm.rate_limit_sleep(err).total_seconds()) == expected