Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding python side retry mechanism #3354

Merged
merged 21 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
044075d
feat: adding python retry/reconnect mechanism
germa89 Aug 9, 2024
657a967
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 9, 2024
1ce2786
feat: connecting only one time per retry
germa89 Aug 9, 2024
cb0c42c
fix: avoid running commands when the instance has exited and using ex…
germa89 Aug 9, 2024
8ca9ecf
feat: check if MAPDL has died properly during processes kill
germa89 Aug 9, 2024
ba2895e
fix: looping
germa89 Aug 9, 2024
a8af85a
refactor: error message
germa89 Aug 9, 2024
33ac06c
chore: adding changelog file 3354.miscellaneous.md
pyansys-ci-bot Aug 14, 2024
6a495ea
chore: adding changelog file 3354.dependencies.md
pyansys-ci-bot Aug 14, 2024
77bb43a
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 19, 2024
2437899
chore: adding changelog file 3354.miscellaneous.md
pyansys-ci-bot Aug 19, 2024
661cc47
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 20, 2024
90e7990
fix: test
germa89 Aug 21, 2024
e653593
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 21, 2024
93bde1a
chore: Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 21, 2024
f4ac723
chore: Merge branch 'feat/adding-python-side-retry-mechanism' of http…
germa89 Aug 21, 2024
1719c3e
fix: wrong name in variable.
germa89 Aug 21, 2024
6289513
Update src/ansys/mapdl/core/errors.py
germa89 Aug 22, 2024
201e911
fix: not exiting MAPDL when faking exiting MAPDL in tests.
germa89 Aug 26, 2024
03b453b
Update the image cache
germa89 Aug 26, 2024
90793b3
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changelog.d/3354.miscellaneous.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
feat: adding python side retry mechanism
119 changes: 90 additions & 29 deletions src/ansys/mapdl/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from functools import wraps
import signal
import threading
from time import sleep
from typing import Callable, Optional

import grpc
Expand Down Expand Up @@ -306,26 +307,73 @@
old_handler = signal.signal(signal.SIGINT, handler)

# Capture gRPC exceptions
try:
out = func(*args, **kwargs)
except grpc.RpcError as error:
# Custom errors
if error.code() == grpc.StatusCode.RESOURCE_EXHAUSTED:
if "Received message larger than max" in error.details():
try:
lim_ = int(error.details().split("(")[1].split("vs")[0])
except IndexError:
lim_ = int(512 * 1024**2)

raise MapdlgRPCError(
f"RESOURCE_EXHAUSTED: {error.details()}. "
"You can try to increase the gRPC message length size using 'PYMAPDL_MAX_MESSAGE_LENGTH'"
" environment variable. For instance:\n\n"
f"$ export PYMAPDL_MAX_MESSAGE_LENGTH={lim_}"
n_attempts = 3
initial_backoff = 0.05
multiplier_backoff = 3

i_attemps = 0

while True:
try:
out = func(*args, **kwargs)

germa89 marked this conversation as resolved.
Show resolved Hide resolved
# Exit while-loop if success
break

except grpc.RpcError as error:

mapdl = retrieve_mapdl_from_args(args)

i_attemps += 1
if i_attemps <= n_attempts:

wait = (
initial_backoff * multiplier_backoff**i_attemps
) # Exponential backoff
sleep(wait)

# reconnect
mapdl._log.debug(
f"Re-connection attempt {i_attemps} after waiting {wait:0.3f} seconds"
)

connected = mapdl._connect(timeout=wait)

# Retry again
continue

# Custom errors
reason = ""
suggestion = ""
germa89 marked this conversation as resolved.
Show resolved Hide resolved

if error.code() == grpc.StatusCode.RESOURCE_EXHAUSTED:
if "Received message larger than max" in error.details():
try:
lim_ = int(error.details().split("(")[1].split("vs")[0])
except IndexError:
lim_ = int(512 * 1024**2)

Check warning on line 354 in src/ansys/mapdl/core/errors.py

View check run for this annotation

Codecov / codecov/patch

src/ansys/mapdl/core/errors.py#L353-L354

Added lines #L353 - L354 were not covered by tests

raise MapdlgRPCError(
f"RESOURCE_EXHAUSTED: {error.details()}. "
"You can try to increase the gRPC message length size using 'PYMAPDL_MAX_MESSAGE_LENGTH'"
" environment variable. For instance:\n\n"
f"$ export PYMAPDL_MAX_MESSAGE_LENGTH={lim_}"
)

if error.code() == grpc.StatusCode.UNAVAILABLE:
# Very likely the MAPDL server has died.
suggestion = (
" MAPDL *might* have died because it executed a not-allowed command or ran out of memory.\n"
" Check the MAPDL command output for more details.\n"
" Open an issue on GitHub if you need assistance: "
"https://github.com/ansys/pymapdl/issues"
)

# Generic error
handle_generic_grpc_error(error, func, args, kwargs)
# Generic error
handle_generic_grpc_error(error, func, args, kwargs, reason, suggestion)

# Break
break

Check warning on line 376 in src/ansys/mapdl/core/errors.py

View check run for this annotation

Codecov / codecov/patch

src/ansys/mapdl/core/errors.py#L376

Added line #L376 was not covered by tests
germa89 marked this conversation as resolved.
Show resolved Hide resolved

# No exceptions
if threading.current_thread().__class__.__name__ == "_MainThread":
Expand All @@ -344,15 +392,26 @@
return wrapper


def handle_generic_grpc_error(error, func, args, kwargs):
"""Handle non-custom gRPC errors"""

def retrieve_mapdl_from_args(args):
# can't use isinstance here due to circular imports
try:
class_name = args[0].__class__.__name__
except (IndexError, AttributeError):
class_name = ""

if class_name == "MapdlGrpc":
mapdl = args[0]
elif hasattr(args[0], "_mapdl"):
mapdl = args[0]._mapdl

Check warning on line 405 in src/ansys/mapdl/core/errors.py

View check run for this annotation

Codecov / codecov/patch

src/ansys/mapdl/core/errors.py#L404-L405

Added lines #L404 - L405 were not covered by tests

return mapdl


def handle_generic_grpc_error(error, func, args, kwargs, reason="", suggestion=""):
"""Handle non-custom gRPC errors"""

mapdl = retrieve_mapdl_from_args(args)

# trying to get "cmd" argument:
cmd = args[1] if len(args) >= 2 else ""
cmd = kwargs.get("cmd", cmd)
Expand All @@ -364,28 +423,30 @@
else:
msg_ = f"calling:{caller}\nwith the following arguments:\n args: {args}\n kwargs: {kwargs}"

if class_name == "MapdlGrpc":
mapdl = args[0]
elif hasattr(args[0], "_mapdl"):
mapdl = args[0]._mapdl
if reason:
reason = f"Possible reason:\n{reason}\n"

Check warning on line 427 in src/ansys/mapdl/core/errors.py

View check run for this annotation

Codecov / codecov/patch

src/ansys/mapdl/core/errors.py#L427

Added line #L427 was not covered by tests

if suggestion:
suggestion = f"Suggestions:\n{suggestion}\n"

msg = (
f"Error:\nMAPDL server connection terminated unexpectedly while {msg_}\n"
f"{reason}"
f"{suggestion}"
"Error:\n"
f" {error.details()}\n"
f"Full error:\n{error}"
)

# MAPDL gRPC is unavailable.
if error.code() == grpc.StatusCode.UNAVAILABLE:
raise MapdlExitedError(msg)

# Generic error
# Test if MAPDL is alive or not.
if mapdl.is_alive:
raise MapdlRuntimeError(msg)

else:
# Making sure we do not keep executing gRPC calls.
mapdl._exited = True

# Must close unfinished processes
mapdl._close_process()
raise MapdlExitedError(msg)
Expand Down
12 changes: 9 additions & 3 deletions src/ansys/mapdl/core/mapdl_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from ansys.mapdl.core.errors import (
ComponentNoData,
MapdlCommandIgnoredError,
MapdlExitedError,
MapdlFileNotFoundError,
MapdlInvalidRoutineError,
MapdlRuntimeError,
Expand Down Expand Up @@ -434,7 +435,7 @@

>>> mapdl.solution.converged
"""
if self._exited: # pragma: no cover
if self.exited: # pragma: no cover
germa89 marked this conversation as resolved.
Show resolved Hide resolved
raise MapdlRuntimeError("MAPDL exited.")
return self._componentmanager

Expand Down Expand Up @@ -844,7 +845,7 @@
array([1.07512979e-04, 8.59137773e-05, 5.70690047e-05, ...,
5.70333124e-05, 8.58600402e-05, 1.07445726e-04])
"""
if self._exited:
if self.exited:
raise MapdlRuntimeError(
"MAPDL exited.\n\nCan only postprocess a live " "MAPDL instance."
)
Expand Down Expand Up @@ -963,7 +964,7 @@

>>> mapdl.solution.converged
"""
if self._exited:
if self.exited:
raise MapdlRuntimeError("MAPDL exited.")
return self._solution

Expand Down Expand Up @@ -2110,6 +2111,11 @@
>>> mapdl.prep7()

"""
if self.exited:
raise MapdlExitedError(

Check warning on line 2115 in src/ansys/mapdl/core/mapdl_core.py

View check run for this annotation

Codecov / codecov/patch

src/ansys/mapdl/core/mapdl_core.py#L2115

Added line #L2115 was not covered by tests
f"The MAPDL instance has been exited before running the command: {command}"
)

# check if multiline
if "\n" in command or "\r" in command:
raise ValueError("Use ``input_strings`` for multi-line commands")
Expand Down
11 changes: 8 additions & 3 deletions src/ansys/mapdl/core/mapdl_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,9 +534,9 @@ def _multi_connect(self, n_attempts=5, timeout=15):
attempt_timeout = int(timeout / n_attempts)

max_time = time.time() + timeout
i = 0
i = 1
while time.time() < max_time and i <= n_attempts:
self._log.debug("Connection attempt %d", i + 1)
self._log.debug("Connection attempt %d", i)
connected = self._connect(timeout=attempt_timeout)
i += 1
if connected:
Expand Down Expand Up @@ -564,7 +564,7 @@ def _multi_connect(self, n_attempts=5, timeout=15):
else ""
)
raise MapdlConnectionError(
msg + f"The MAPDL process has died{pid_msg}."
msg + f" The MAPDL process has died{pid_msg}."
)

self._exited = False
Expand Down Expand Up @@ -1194,6 +1194,11 @@ def _close_process(self, timeout=2): # pragma: no cover
# Killing child processes
self._kill_child_processes(timeout=timeout)

if self.is_alive:
raise MapdlRuntimeError("MAPDL could not be exited.")
else:
self._exited = True

def _cache_pids(self):
"""Store the process IDs used when launching MAPDL.

Expand Down
21 changes: 17 additions & 4 deletions tests/test_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
MapdlExitedError,
MapdlgRPCError,
MapdlRuntimeError,
protect_grpc,
)
from ansys.mapdl.core.mapdl_grpc import MAX_MESSAGE_LENGTH, MapdlGrpc
from ansys.mapdl.core.misc import random_string
Expand Down Expand Up @@ -639,14 +640,26 @@ def code(self):
def details(self):
return self._message

def _raise_error_code(args, **kwargs):
@protect_grpc
def _raise_error_code(*args, **kwargs):
raise UnavailableError()

monkeypatch.setattr(mapdl._stub, "SendCommand", _raise_error_code)
monkeypatch.setattr(mapdl, "prep7", _raise_error_code)

with pytest.raises(
MapdlExitedError, match="MAPDL server connection terminated unexpectedly while"
MapdlRuntimeError, match="MAPDL server connection terminated unexpectedly while"
):
mapdl.prep7()
mapdl.prep7(
mapdl
) # passing mapdl to simulate the function `_raise_error_code` to be a method.

assert mapdl.is_alive

# faking exiting MAPDL
mapdl._exited = True
with pytest.raises(
MapdlExitedError, match="MAPDL server connection terminated unexpectedly while"
):
mapdl.prep7(mapdl)

mapdl._exited = False
Loading