Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding python side retry mechanism #3354

Merged
merged 21 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
044075d
feat: adding python retry/reconnect mechanism
germa89 Aug 9, 2024
657a967
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 9, 2024
1ce2786
feat: connecting only one time per retry
germa89 Aug 9, 2024
cb0c42c
fix: avoid running commands when the instance has exited and using ex…
germa89 Aug 9, 2024
8ca9ecf
feat: check if MAPDL has died properly during processes kill
germa89 Aug 9, 2024
ba2895e
fix: looping
germa89 Aug 9, 2024
a8af85a
refactor: error message
germa89 Aug 9, 2024
33ac06c
chore: adding changelog file 3354.miscellaneous.md
pyansys-ci-bot Aug 14, 2024
6a495ea
chore: adding changelog file 3354.dependencies.md
pyansys-ci-bot Aug 14, 2024
77bb43a
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 19, 2024
2437899
chore: adding changelog file 3354.miscellaneous.md
pyansys-ci-bot Aug 19, 2024
661cc47
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 20, 2024
90e7990
fix: test
germa89 Aug 21, 2024
e653593
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 21, 2024
93bde1a
chore: Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 21, 2024
f4ac723
chore: Merge branch 'feat/adding-python-side-retry-mechanism' of http…
germa89 Aug 21, 2024
1719c3e
fix: wrong name in variable.
germa89 Aug 21, 2024
6289513
Update src/ansys/mapdl/core/errors.py
germa89 Aug 22, 2024
201e911
fix: not exiting MAPDL when faking exiting MAPDL in tests.
germa89 Aug 26, 2024
03b453b
Update the image cache
germa89 Aug 26, 2024
90793b3
Merge branch 'main' into feat/adding-python-side-retry-mechanism
germa89 Aug 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changelog.d/3354.miscellaneous.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
feat: adding python side retry mechanism
119 changes: 90 additions & 29 deletions src/ansys/mapdl/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from functools import wraps
import signal
import threading
from time import sleep
from typing import Callable, Optional

import grpc
Expand Down Expand Up @@ -306,26 +307,73 @@ def wrapper(*args, **kwargs):
old_handler = signal.signal(signal.SIGINT, handler)

# Capture gRPC exceptions
try:
out = func(*args, **kwargs)
except grpc.RpcError as error:
# Custom errors
if error.code() == grpc.StatusCode.RESOURCE_EXHAUSTED:
if "Received message larger than max" in error.details():
try:
lim_ = int(error.details().split("(")[1].split("vs")[0])
except IndexError:
lim_ = int(512 * 1024**2)

raise MapdlgRPCError(
f"RESOURCE_EXHAUSTED: {error.details()}. "
"You can try to increase the gRPC message length size using 'PYMAPDL_MAX_MESSAGE_LENGTH'"
" environment variable. For instance:\n\n"
f"$ export PYMAPDL_MAX_MESSAGE_LENGTH={lim_}"
n_attempts = 3
initial_backoff = 0.05
multiplier_backoff = 3

i_attemps = 0

while True:
try:
out = func(*args, **kwargs)

germa89 marked this conversation as resolved.
Show resolved Hide resolved
# Exit while-loop if success
break

except grpc.RpcError as error:

mapdl = retrieve_mapdl_from_args(args)

i_attemps += 1
if i_attemps <= n_attempts:

wait = (
initial_backoff * multiplier_backoff**i_attemps
) # Exponential backoff
sleep(wait)

# reconnect
mapdl._log.debug(
f"Re-connection attempt {i_attemps} after waiting {wait:0.3f} seconds"
)

connected = mapdl._connect(timeout=wait)

# Retry again
continue

# Custom errors
reason = ""
suggestion = ""
germa89 marked this conversation as resolved.
Show resolved Hide resolved

if error.code() == grpc.StatusCode.RESOURCE_EXHAUSTED:
if "Received message larger than max" in error.details():
try:
lim_ = int(error.details().split("(")[1].split("vs")[0])
except IndexError:
lim_ = int(512 * 1024**2)

raise MapdlgRPCError(
f"RESOURCE_EXHAUSTED: {error.details()}. "
"You can try to increase the gRPC message length size using 'PYMAPDL_MAX_MESSAGE_LENGTH'"
" environment variable. For instance:\n\n"
f"$ export PYMAPDL_MAX_MESSAGE_LENGTH={lim_}"
)

if error.code() == grpc.StatusCode.UNAVAILABLE:
# Very likely the MAPDL server has died.
suggestion = (
" MAPDL *might* have died because it executed a not-allowed command or ran out of memory.\n"
" Check the MAPDL command output for more details.\n"
" Open an issue on GitHub if you need assistance: "
"https://github.com/ansys/pymapdl/issues"
)

# Generic error
handle_generic_grpc_error(error, func, args, kwargs)
# Generic error
handle_generic_grpc_error(error, func, args, kwargs, reason, suggestion)

# Break
break
germa89 marked this conversation as resolved.
Show resolved Hide resolved

# No exceptions
if threading.current_thread().__class__.__name__ == "_MainThread":
Expand All @@ -344,15 +392,26 @@ def wrapper(*args, **kwargs):
return wrapper


def handle_generic_grpc_error(error, func, args, kwargs):
"""Handle non-custom gRPC errors"""

def retrieve_mapdl_from_args(args):
# can't use isinstance here due to circular imports
try:
class_name = args[0].__class__.__name__
except (IndexError, AttributeError):
class_name = ""

if class_name == "MapdlGrpc":
mapdl = args[0]
elif hasattr(args[0], "_mapdl"):
mapdl = args[0]._mapdl

return mapdl


def handle_generic_grpc_error(error, func, args, kwargs, reason="", suggestion=""):
"""Handle non-custom gRPC errors"""

mapdl = retrieve_mapdl_from_args(args)

# trying to get "cmd" argument:
cmd = args[1] if len(args) >= 2 else ""
cmd = kwargs.get("cmd", cmd)
Expand All @@ -364,28 +423,30 @@ def handle_generic_grpc_error(error, func, args, kwargs):
else:
msg_ = f"calling:{caller}\nwith the following arguments:\n args: {args}\n kwargs: {kwargs}"

if class_name == "MapdlGrpc":
mapdl = args[0]
elif hasattr(args[0], "_mapdl"):
mapdl = args[0]._mapdl
if reason:
reason = f"Possible reason:\n{reason}\n"

if suggestion:
suggestion = f"Suggestions:\n{suggestion}\n"

msg = (
f"Error:\nMAPDL server connection terminated unexpectedly while {msg_}\n"
f"{reason}"
f"{suggestion}"
"Error:\n"
f" {error.details()}\n"
f"Full error:\n{error}"
)

# MAPDL gRPC is unavailable.
if error.code() == grpc.StatusCode.UNAVAILABLE:
raise MapdlExitedError(msg)

# Generic error
# Test if MAPDL is alive or not.
if mapdl.is_alive:
raise MapdlRuntimeError(msg)

else:
# Making sure we do not keep executing gRPC calls.
mapdl._exited = True

# Must close unfinished processes
mapdl._close_process()
raise MapdlExitedError(msg)
Expand Down
12 changes: 9 additions & 3 deletions src/ansys/mapdl/core/mapdl_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from ansys.mapdl.core.errors import (
ComponentNoData,
MapdlCommandIgnoredError,
MapdlExitedError,
MapdlFileNotFoundError,
MapdlInvalidRoutineError,
MapdlRuntimeError,
Expand Down Expand Up @@ -434,7 +435,7 @@ def components(self) -> "ComponentManager":

>>> mapdl.solution.converged
"""
if self._exited: # pragma: no cover
if self.exited: # pragma: no cover
germa89 marked this conversation as resolved.
Show resolved Hide resolved
raise MapdlRuntimeError("MAPDL exited.")
return self._componentmanager

Expand Down Expand Up @@ -844,7 +845,7 @@ def post_processing(self) -> "PostProcessing":
array([1.07512979e-04, 8.59137773e-05, 5.70690047e-05, ...,
5.70333124e-05, 8.58600402e-05, 1.07445726e-04])
"""
if self._exited:
if self.exited:
raise MapdlRuntimeError(
"MAPDL exited.\n\nCan only postprocess a live " "MAPDL instance."
)
Expand Down Expand Up @@ -963,7 +964,7 @@ def solution(self) -> "Solution":

>>> mapdl.solution.converged
"""
if self._exited:
if self.exited:
raise MapdlRuntimeError("MAPDL exited.")
return self._solution

Expand Down Expand Up @@ -2110,6 +2111,11 @@ def run(
>>> mapdl.prep7()

"""
if self.exited:
raise MapdlExitedError(
f"The MAPDL instance has been exited before running the command: {cmd}"
)

# check if multiline
if "\n" in command or "\r" in command:
raise ValueError("Use ``input_strings`` for multi-line commands")
Expand Down
11 changes: 8 additions & 3 deletions src/ansys/mapdl/core/mapdl_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,9 +534,9 @@ def _multi_connect(self, n_attempts=5, timeout=15):
attempt_timeout = int(timeout / n_attempts)

max_time = time.time() + timeout
i = 0
i = 1
while time.time() < max_time and i <= n_attempts:
self._log.debug("Connection attempt %d", i + 1)
self._log.debug("Connection attempt %d", i)
connected = self._connect(timeout=attempt_timeout)
i += 1
if connected:
Expand Down Expand Up @@ -564,7 +564,7 @@ def _multi_connect(self, n_attempts=5, timeout=15):
else ""
)
raise MapdlConnectionError(
msg + f"The MAPDL process has died{pid_msg}."
msg + f" The MAPDL process has died{pid_msg}."
)

self._exited = False
Expand Down Expand Up @@ -1194,6 +1194,11 @@ def _close_process(self, timeout=2): # pragma: no cover
# Killing child processes
self._kill_child_processes(timeout=timeout)

if self.is_alive:
raise MapdlRuntimeError("MAPDL could not be exited.")
else:
self._exited = True

def _cache_pids(self):
"""Store the process IDs used when launching MAPDL.

Expand Down
Loading