Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix recursive set bug #141

Merged
merged 2 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 26 additions & 4 deletions softioc/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,26 @@ static PyObject *db_put_field(PyObject *self, PyObject *args)
if (dbNameToAddr(name, &dbAddr))
return PyErr_Format(
PyExc_RuntimeError, "dbNameToAddr failed for %s", name);
if (dbPutField(&dbAddr, dbrType, pbuffer, length))

long put_result;
/* There are two important locks to consider at this point: The Global
* Interpreter Lock (GIL) and the EPICS record lock. A deadlock is possible if
* this thread holds the GIL and wants the record lock (which happens inside
* dbPutField), and there exists another EPICS thread that has the record lock
* and wants to call Python (which requires the GIL).
* This can occur if this code is called as part of an asynchronous on_update
* callback.
* Therefore, we must ensure we relinquish the GIL while we perform this
* EPICS call, to avoid potential deadlocks.
* See https://github.com/dls-controls/pythonSoftIOC/issues/119. */
Py_BEGIN_ALLOW_THREADS
put_result = dbPutField(&dbAddr, dbrType, pbuffer, length);
Py_END_ALLOW_THREADS
if (put_result)
return PyErr_Format(
PyExc_RuntimeError, "dbPutField failed for %s", name);
Py_RETURN_NONE;
else
Py_RETURN_NONE;
}

static PyObject *db_get_field(PyObject *self, PyObject *args)
Expand All @@ -127,11 +143,17 @@ static PyObject *db_get_field(PyObject *self, PyObject *args)
return PyErr_Format(
PyExc_RuntimeError, "dbNameToAddr failed for %s", name);

long get_result;
long options = 0;
if (dbGetField(&dbAddr, dbrType, pbuffer, &options, &length, NULL))
/* See reasoning for Python macros in long comment in db_put_field. */
Py_BEGIN_ALLOW_THREADS
get_result = dbGetField(&dbAddr, dbrType, pbuffer, &options, &length, NULL);
Py_END_ALLOW_THREADS
if (get_result)
return PyErr_Format(
PyExc_RuntimeError, "dbGetField failed for %s", name);
Py_RETURN_NONE;
else
Py_RETURN_NONE;
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
Expand Down
115 changes: 113 additions & 2 deletions tests/test_records.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import multiprocessing
import subprocess
import sys
import numpy
import os
import pytest
Expand Down Expand Up @@ -347,7 +348,6 @@ def test_record_wrapper_str():
# If we never receive R it probably means an assert failed
select_and_recv(parent_conn, "R")


def validate_fixture_names(params):
"""Provide nice names for the out_records fixture in TestValidate class"""
return params[0].__name__
Expand Down Expand Up @@ -1104,3 +1104,114 @@ async def test_set_too_long_value(self):
log(f"PARENT: Join completed with exitcode {process.exitcode}")
if process.exitcode is None:
pytest.fail("Process did not terminate")


class TestRecursiveSet:
"""Tests related to recursive set() calls. See original issue here:
https://github.com/dls-controls/pythonSoftIOC/issues/119"""

recursive_record_name = "RecursiveLongOut"

def recursive_set_func(self, device_name, conn):
from cothread import Event

def useless_callback(value):
log("CHILD: In callback ", value)
useless_pv.set(0)
log("CHILD: Exiting callback")

def go_away(*args):
log("CHILD: received exit signal ", args)
event.Signal()

builder.SetDeviceName(device_name)


useless_pv = builder.aOut(
self.recursive_record_name,
initial_value=0,
on_update=useless_callback
)
event = Event()
builder.Action("GO_AWAY", on_update = go_away)

builder.LoadDatabase()
softioc.iocInit()

conn.send("R") # "Ready"
log("CHILD: Sent R over Connection to Parent")

log("CHILD: About to wait")
event.Wait()
log("CHILD: Exiting")

@requires_cothread
@pytest.mark.asyncio
async def test_recursive_set(self):
"""Test that recursive sets do not cause a deadlock"""
ctx = get_multiprocessing_context()
parent_conn, child_conn = ctx.Pipe()

device_name = create_random_prefix()

process = ctx.Process(
target=self.recursive_set_func,
args=(device_name, child_conn),
)

process.start()

log("PARENT: Child started, waiting for R command")

from aioca import caput, camonitor

try:
# Wait for message that IOC has started
select_and_recv(parent_conn, "R")
log("PARENT: received R command")

record = device_name + ":" + self.recursive_record_name

log(f"PARENT: monitoring {record}")
queue = asyncio.Queue()
monitor = camonitor(record, queue.put, all_updates=True)

log("PARENT: Beginning first wait")

# Expected initial state
new_val = await asyncio.wait_for(queue.get(), TIMEOUT)
log(f"PARENT: initial new_val: {new_val}")
assert new_val == 0

# Try a series of caput calls, to maximise chance to trigger
# the deadlock
i = 1
while i < 500:
log(f"PARENT: begin loop with i={i}")
await caput(record, i)
new_val = await asyncio.wait_for(queue.get(), 1)
assert new_val == i
new_val = await asyncio.wait_for(queue.get(), 1)
assert new_val == 0 # .set() should reset value
i += 1

# Signal the IOC to cleanly shut down
await caput(device_name + ":" + "GO_AWAY", 1)

except asyncio.TimeoutError as e:
raise asyncio.TimeoutError(
f"IOC did not send data back - loop froze on iteration {i} "
"- it has probably hung/deadlocked."
) from e

finally:
monitor.close()
# Clear the cache before stopping the IOC stops
# "channel disconnected" error messages
aioca_cleanup()

process.join(timeout=TIMEOUT)
log(f"PARENT: Join completed with exitcode {process.exitcode}")
if process.exitcode is None:
process.terminate()
pytest.fail("Process did not finish cleanly, terminating")
Loading