Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass server->client command over a separate socket pair #762

Merged
merged 15 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions docs/vfio-user.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ client/server connection. Commands in the client-to-server direction are
handled on the main communication socket which the client connects to, and
replies to these commands are passed on the same socket. Commands sent in the
other direction from the server to the client as well as their corresponding
replies, are exchanged on a separate socket, which is set up during negotiation
(AF_UNIX servers just pass the file descriptor).
replies can optionally be passed across a separate socket, which is set up
during negotiation (AF_UNIX servers just pass the file descriptor).

Using separate sockets for each command channel avoids introducing an
artificial point of synchronization between the channels. This simplifies
Expand Down Expand Up @@ -524,15 +524,16 @@ Capabilities:
| | | then migration is not supported by the |
| | | sender. |
+--------------------+---------+-----------------------------------------------+
| reverse_cmd_socket | boolean | Indicates whether the client is capable of |
| | | using a separate socket as the channel for |
| | | server-to-client commands. If specified and |
| | | the server supports it, it will pass a file |
| | | descriptor along with its reply. This is |
| | | enabled by request only for the benefit of |
| | | clients that implement older drafts of this |
| | | specification which did not include |
| | | independent sockets per channel. |
| twin_socket | boolean | Indicates whether the client wants to use a |
| | | separate channel for server-to-client |
| | | commands. If specified and the server |
| | | supports it, it will include the file |
| | | descriptor for the client end of a separate |
| | | socket pair along with its reply. Some server |
| | | implementations may not support this, but it |
| | | is strongly recommended for servers which do |
| | | send server-to-client commands to implement |
| | | twin-socket support. |
+--------------------+---------+-----------------------------------------------+

The migration capability contains the following name/value pairs:
Expand All @@ -548,9 +549,11 @@ Reply
^^^^^

The same message format is used in the server's reply with the semantics
described above. In case the client specified ``reverse_cmd_socket`` in its
capabilities, the server may pass a file descriptor to use for the
server-to-client command channel.
described above. In case the client set ``twin_socket`` to true in its
capabilities, the server may include a file descriptor to use for the
server-to-client command channel in the reply. The index of the file descriptor
in the ancillary data of the reply is given by the ``twin_socket`` capability
field in the reply.

``VFIO_USER_DMA_MAP``
---------------------
Expand Down
2 changes: 1 addition & 1 deletion include/libvfio-user.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ extern "C" {
#endif

#define LIB_VFIO_USER_MAJOR 0
#define LIB_VFIO_USER_MINOR 1
#define LIB_VFIO_USER_MINOR 2

/* DMA addresses cannot be directly de-referenced. */
typedef void *vfu_dma_addr_t;
Expand Down
50 changes: 24 additions & 26 deletions lib/tran.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
* "migration": {
* "pgsize": 4096
* }
* "reverse_cmd_socket": true,
* "twin_socket": true,
* }
* }
*
Expand All @@ -66,7 +66,7 @@
int
tran_parse_version_json(const char *json_str, int *client_max_fdsp,
size_t *client_max_data_xfer_sizep, size_t *pgsizep,
bool *reverse_cmd_socket_supportedp)
bool *twin_socket_supportedp)
{
struct json_object *jo_caps = NULL;
struct json_object *jo_top = NULL;
Expand Down Expand Up @@ -132,13 +132,13 @@ tran_parse_version_json(const char *json_str, int *client_max_fdsp,
}
}

if (json_object_object_get_ex(jo_caps, "reverse_cmd_socket", &jo)) {
if (json_object_object_get_ex(jo_caps, "twin_socket", &jo)) {
if (json_object_get_type(jo) != json_type_boolean) {
goto out;
}

errno = 0;
*reverse_cmd_socket_supportedp = json_object_get_boolean(jo);
*twin_socket_supportedp = json_object_get_boolean(jo);

if (errno != 0) {
goto out;
Expand All @@ -159,7 +159,7 @@ tran_parse_version_json(const char *json_str, int *client_max_fdsp,
static int
recv_version(vfu_ctx_t *vfu_ctx, uint16_t *msg_idp,
struct vfio_user_version **versionp,
bool *reverse_cmd_socket_supportedp)
bool *twin_socket_supportedp)
{
struct vfio_user_version *cversion = NULL;
vfu_msg_t msg = { { 0 } };
Expand Down Expand Up @@ -224,7 +224,7 @@ recv_version(vfu_ctx_t *vfu_ctx, uint16_t *msg_idp,

ret = tran_parse_version_json(json_str, &vfu_ctx->client_max_fds,
&vfu_ctx->client_max_data_xfer_size,
&pgsize, reverse_cmd_socket_supportedp);
&pgsize, twin_socket_supportedp);

if (ret < 0) {
/* No client-supplied strings in the log for release build. */
Expand Down Expand Up @@ -284,14 +284,13 @@ recv_version(vfu_ctx_t *vfu_ctx, uint16_t *msg_idp,

static int
send_version(vfu_ctx_t *vfu_ctx, uint16_t msg_id,
struct vfio_user_version *cversion, int reverse_cmd_socket_fd)
struct vfio_user_version *cversion, int client_cmd_socket_fd)
{
struct vfio_user_version sversion = { 0 };
struct iovec iovecs[2] = { { 0 } };
char server_caps[1024];
vfu_msg_t msg = { { 0 } };
int slen;
int ret;

if (vfu_ctx->migration == NULL) {
slen = snprintf(server_caps, sizeof(server_caps),
Expand Down Expand Up @@ -330,55 +329,54 @@ send_version(vfu_ctx_t *vfu_ctx, uint16_t msg_id,
msg.hdr.msg_id = msg_id;
msg.out_iovecs = iovecs;
mnissler-rivos marked this conversation as resolved.
Show resolved Hide resolved
msg.nr_out_iovecs = 2;
if (reverse_cmd_socket_fd != -1) {
msg.out.fds = &reverse_cmd_socket_fd;
if (client_cmd_socket_fd != -1) {
msg.out.fds = &client_cmd_socket_fd;
msg.out.nr_fds = 1;
}

ret = vfu_ctx->tran->reply(vfu_ctx, &msg, 0);

return ret;
return vfu_ctx->tran->reply(vfu_ctx, &msg, 0);
}

int
tran_negotiate(vfu_ctx_t *vfu_ctx, int *reverse_cmd_socket_fdp)
tran_negotiate(vfu_ctx_t *vfu_ctx, int *client_cmd_socket_fdp)
{
struct vfio_user_version *client_version = NULL;
int reverse_cmd_socket_fds[2] = { -1, -1 };
bool reverse_cmd_socket_supported = false;
int client_cmd_socket_fds[2] = { -1, -1 };
bool twin_socket_supported = false;
uint16_t msg_id = 0x0bad;
int ret;

ret = recv_version(vfu_ctx, &msg_id, &client_version,
&reverse_cmd_socket_supported);
&twin_socket_supported);

if (ret < 0) {
vfu_log(vfu_ctx, LOG_ERR, "failed to recv version: %m");
return ret;
}

if (reverse_cmd_socket_supported && reverse_cmd_socket_fdp != NULL &&
if (twin_socket_supported && client_cmd_socket_fdp != NULL &&
vfu_ctx->client_max_fds > 0) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, reverse_cmd_socket_fds) == -1) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, client_cmd_socket_fds) == -1) {
vfu_log(vfu_ctx, LOG_ERR, "failed to create cmd socket: %m");
return -1;
mnissler-rivos marked this conversation as resolved.
Show resolved Hide resolved
}
}

ret = send_version(vfu_ctx, msg_id, client_version,
reverse_cmd_socket_fds[0]);
client_cmd_socket_fds[0]);

free(client_version);

/*
* The remote end of the reverse cmd socket pair is no longer needed, the
* local one only if successful.
* The remote end of the client command socket pair is no longer needed.
* The local end is kept only if passed to the caller on successful return.
*/
close_safely(&reverse_cmd_socket_fds[0]);
close_safely(&client_cmd_socket_fds[0]);
if (ret < 0) {
vfu_log(vfu_ctx, LOG_ERR, "failed to send version: %m");
close_safely(&reverse_cmd_socket_fds[1]);
} else if (reverse_cmd_socket_fdp != NULL) {
*reverse_cmd_socket_fdp = reverse_cmd_socket_fds[1];
close_safely(&client_cmd_socket_fds[1]);
} else if (client_cmd_socket_fdp != NULL) {
*client_cmd_socket_fdp = client_cmd_socket_fds[1];
}

return ret;
Expand Down
4 changes: 2 additions & 2 deletions lib/tran.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ struct transport_ops {
int
tran_parse_version_json(const char *json_str, int *client_max_fdsp,
size_t *client_max_data_xfer_sizep, size_t *pgsizep,
bool *reverse_cmd_socket_supportedp);
bool *twin_socket_supportedp);

int
tran_negotiate(vfu_ctx_t *vfu_ctx, int *reverse_cmd_socket_fdp);
tran_negotiate(vfu_ctx_t *vfu_ctx, int *client_cmd_socket_fdp);

#endif /* LIB_VFIO_USER_TRAN_H */

Expand Down
14 changes: 7 additions & 7 deletions lib/tran_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
typedef struct {
int listen_fd;
int conn_fd;
int reverse_cmd_socket_fd;
int client_cmd_socket_fd;
} tran_sock_t;

int
Expand Down Expand Up @@ -381,7 +381,7 @@ tran_sock_init(vfu_ctx_t *vfu_ctx)

ts->listen_fd = -1;
ts->conn_fd = -1;
ts->reverse_cmd_socket_fd = -1;
ts->client_cmd_socket_fd = -1;

if ((ts->listen_fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
ret = errno;
Expand Down Expand Up @@ -466,7 +466,7 @@ tran_sock_attach(vfu_ctx_t *vfu_ctx)
return -1;
}

ret = tran_negotiate(vfu_ctx, &ts->reverse_cmd_socket_fd);
ret = tran_negotiate(vfu_ctx, &ts->client_cmd_socket_fd);
if (ret < 0) {
close_safely(&ts->conn_fd);
return -1;
Expand Down Expand Up @@ -624,9 +624,9 @@ tran_sock_send_msg(vfu_ctx_t *vfu_ctx, uint16_t msg_id,

ts = vfu_ctx->tran_data;

/* NB. use the reverse command socket file descriptor if available. */
fd = ts->reverse_cmd_socket_fd != -1 ? ts->reverse_cmd_socket_fd
: ts->conn_fd;
/* NB. use the client command socket file descriptor if available. */
fd = ts->client_cmd_socket_fd != -1 ? ts->client_cmd_socket_fd
: ts->conn_fd;
return tran_sock_msg(fd, msg_id, cmd, send_data, send_len, hdr, recv_data,
recv_len);
}
Expand All @@ -642,7 +642,7 @@ tran_sock_detach(vfu_ctx_t *vfu_ctx)

if (ts != NULL) {
close_safely(&ts->conn_fd);
close_safely(&ts->reverse_cmd_socket_fd);
close_safely(&ts->client_cmd_socket_fd);
}
}

Expand Down
54 changes: 32 additions & 22 deletions test/py/libvfio_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#

from types import SimpleNamespace
import collections.abc
import ctypes as c
import array
import errno
Expand Down Expand Up @@ -482,6 +483,7 @@ class vfio_user_dma_unmap(Structure):


class vfio_user_dma_region_access(Structure):
mnissler-rivos marked this conversation as resolved.
Show resolved Hide resolved
"""Payload for VFIO_USER_DMA_READ and VFIO_USER_DMA_WRITE."""
_pack_ = 1
_fields_ = [
("addr", c.c_uint64),
Expand Down Expand Up @@ -702,38 +704,46 @@ class Client:

def __init__(self, sock=None):
self.sock = sock
self.reverse_cmd_sock = None
self.client_cmd_socket = None

def connect(self, ctx,
max_data_xfer_size=VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE,
reverse_cmd_socket=False):
def connect(self, ctx, capabilities={}):
self.sock = connect_sock()

json = f'''
{{
"capabilities": {{
"max_data_xfer_size": {max_data_xfer_size},
"max_msg_fds": 8,
"reverse_cmd_socket": {str(reverse_cmd_socket).lower()}
}}
}}
'''
effective_caps = {
"capabilities": {
"max_data_xfer_size": VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE,
"max_msg_fds": 8,
"twin_socket": False,
},
}

def update(target, overrides):
for k, v in overrides.items():
if isinstance(v, collections.abc.Mapping):
target[k] = target.get(k, {})
update(target[k], v)
else:
target[k] = v

update(effective_caps, capabilities)
caps_json = json.dumps(effective_caps)

# struct vfio_user_version
payload = struct.pack("HH%dsc" % len(json), LIBVFIO_USER_MAJOR,
LIBVFIO_USER_MINOR, json.encode(), b'\0')
payload = struct.pack("HH%dsc" % len(caps_json), LIBVFIO_USER_MAJOR,
LIBVFIO_USER_MINOR, caps_json.encode(), b'\0')
hdr = vfio_user_header(VFIO_USER_VERSION, size=len(payload))
self.sock.send(hdr + payload)
vfu_attach_ctx(ctx, expect=0)
fds, payload = get_reply_fds(self.sock, expect=0)
self.reverse_cmd_sock = socket.socket(fileno=fds[0]) if fds else None
self.client_cmd_socket = socket.socket(fileno=fds[0]) if fds else None
return self.sock

def disconnect(self, ctx):
self.sock.close()
self.sock = None
if self.reverse_cmd_sock is not None:
self.reverse_cmd_sock.close()
self.reverse_cmd_sock = None
if self.client_cmd_socket is not None:
self.client_cmd_socket.close()
self.client_cmd_socket = None

# notice client closed connection
vfu_run_ctx(ctx, errno.ENOTCONN)
Expand Down Expand Up @@ -795,7 +805,7 @@ def msg(ctx, sock, cmd, payload=bytearray(), expect=0, fds=None,
return get_reply(sock, expect=expect)


def get_msg_fds(sock, expect_type, expect=0):
def get_msg_fds(sock, expect_msg_type, expect_errno=0):
"""
Receives a message from a socket and pulls the returned file descriptors
out of the message.
Expand All @@ -805,7 +815,7 @@ def get_msg_fds(sock, expect_type, expect=0):
socket.CMSG_LEN(64 * fds.itemsize))
(msg_id, cmd, msg_size, msg_flags, errno) = struct.unpack("HHIII",
data[0:16])
assert errno == expect
assert errno == expect_errno

cmsg_level, cmsg_type, packed_fd = ancillary[0] if len(ancillary) != 0 \
else (0, 0, [])
Expand All @@ -814,7 +824,7 @@ def get_msg_fds(sock, expect_type, expect=0):
[unpacked_fd] = struct.unpack_from("i", packed_fd, offset=i)
unpacked_fds.append(unpacked_fd)
assert len(packed_fd)/4 == len(unpacked_fds)
assert (msg_flags & 0xf) == expect_type
assert (msg_flags & 0xf) == expect_msg_type
return (unpacked_fds, msg_id, cmd, data[16:])


Expand Down
Loading
Loading