Skip to content

Commit

Permalink
unix: support SO_REUSEPORT with load balancing for TCP
Browse files Browse the repository at this point in the history
  • Loading branch information
panjf2000 authored May 21, 2024
1 parent c8d4a87 commit d2d92b7
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 8 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ if(LIBUV_BUILD_TESTS)
test/test-tcp-oob.c
test/test-tcp-open.c
test/test-tcp-read-stop.c
test/test-tcp-reuseport.c
test/test-tcp-read-stop-start.c
test/test-tcp-rst.c
test/test-tcp-shutdown-after-write.c
Expand Down
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \
test/test-tcp-flags.c \
test/test-tcp-open.c \
test/test-tcp-read-stop.c \
test/test-tcp-reuseport.c \
test/test-tcp-read-stop-start.c \
test/test-tcp-rst.c \
test/test-tcp-shutdown-after-write.c \
Expand Down
53 changes: 46 additions & 7 deletions docs/src/tcp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@ Data types
TCP handle type.

.. c:type:: uv_tcp_flags
Flags used in :c:func:`uv_tcp_bind`.

::

enum uv_tcp_flags {
/* Used with uv_tcp_bind, when an IPv6 address is used. */
UV_TCP_IPV6ONLY = 1,

/* Enable SO_REUSEPORT socket option when binding the handle.
* This allows completely duplicate bindings by multiple processes
* or threads if they all set SO_REUSEPORT before binding the port.
* Incoming connections are distributed across the participating
* listener sockets.
*
* This flag is available only on Linux 3.9+, DragonFlyBSD 3.6+,
* FreeBSD 12.0+, Solaris 11.4, and AIX 7.2.5+ for now.
*/
UV_TCP_REUSEPORT = 2,
};


Public members
^^^^^^^^^^^^^^
Expand Down Expand Up @@ -81,16 +103,33 @@ API
.. c:function:: int uv_tcp_bind(uv_tcp_t* handle, const struct sockaddr* addr, unsigned int flags)
Bind the handle to an address and port. `addr` should point to an
initialized ``struct sockaddr_in`` or ``struct sockaddr_in6``.
Bind the handle to an address and port.
When the port is already taken, you can expect to see an ``UV_EADDRINUSE``
error from :c:func:`uv_listen` or :c:func:`uv_tcp_connect`. That is,
a successful call to this function does not guarantee that the call
to :c:func:`uv_listen` or :c:func:`uv_tcp_connect` will succeed as well.
error from :c:func:`uv_listen` or :c:func:`uv_tcp_connect` unless you specify
``UV_TCP_REUSEPORT`` in `flags` for all the binding sockets. That is, a successful
call to this function does not guarantee that the call to :c:func:`uv_listen` or
:c:func:`uv_tcp_connect` will succeed as well.
`flags` can contain ``UV_TCP_IPV6ONLY``, in which case dual-stack support
is disabled and only IPv6 is used.
:param handle: TCP handle. It should have been initialized with :c:func:`uv_tcp_init`.
:param addr: Address to bind to. It should point to an initialized ``struct sockaddr_in``
or ``struct sockaddr_in6``.
:param flags: Flags that control the behavior of binding the socket.
``UV_TCP_IPV6ONLY`` can be contained in `flags` to disable dual-stack
support and only use IPv6.
``UV_TCP_REUSEPORT`` can be contained in `flags` to enable the socket option
`SO_REUSEPORT` with the capability of load balancing that distribute incoming
connections across all listening sockets in multiple processes or threads.
:returns: 0 on success, or an error code < 0 on failure.
.. versionchanged:: 1.49.0 added the ``UV_TCP_REUSEPORT`` flag.
.. note::
``UV_TCP_REUSEPORT`` flag is available only on Linux 3.9+, DragonFlyBSD 3.6+,
FreeBSD 12.0+, Solaris 11.4, and AIX 7.2.5+ at the moment.
.. c:function:: int uv_tcp_getsockname(const uv_tcp_t* handle, struct sockaddr* name, int* namelen)
Expand Down
13 changes: 12 additions & 1 deletion include/uv.h
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,18 @@ UV_EXTERN int uv_tcp_simultaneous_accepts(uv_tcp_t* handle, int enable);

enum uv_tcp_flags {
/* Used with uv_tcp_bind, when an IPv6 address is used. */
UV_TCP_IPV6ONLY = 1
UV_TCP_IPV6ONLY = 1,

/* Enable SO_REUSEPORT socket option when binding the handle.
* This allows completely duplicate bindings by multiple processes
* or threads if they all set SO_REUSEPORT before binding the port.
* Incoming connections are distributed across the participating
* listener sockets.
*
* This flag is available only on Linux 3.9+, DragonFlyBSD 3.6+,
* FreeBSD 12.0+, Solaris 11.4, and AIX 7.2.5+ for now.
*/
UV_TCP_REUSEPORT = 2,
};

UV_EXTERN int uv_tcp_bind(uv_tcp_t* handle,
Expand Down
50 changes: 50 additions & 0 deletions src/unix/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,50 @@ int uv_tcp_init(uv_loop_t* loop, uv_tcp_t* tcp) {
}


static int uv__tcp_reuseport(int fd) {
int on = 1;
#if defined(__FreeBSD__) && __FreeBSD__ >= 12 && defined(SO_REUSEPORT_LB)
/* FreeBSD 12 introduced a new socket option named SO_REUSEPORT_LB
* with the capability of load balancing, it's the substitution of
* the SO_REUSEPORTs on Linux and DragonFlyBSD. */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &on, sizeof(on)))
return UV__ERR(errno);
#elif (defined(__linux__) || \
defined(_AIX73) || \
(defined(__DragonFly__) && __DragonFly_version >= 300600) || \
(defined(__sun) && defined(SO_FLOW_NAME))) && \
defined(SO_REUSEPORT)
/* On Linux 3.9+, the SO_REUSEPORT implementation distributes connections
* evenly across all of the threads (or processes) that are blocked in
* accept() on the same port.
*
* DragonFlyBSD 3.6.0 extended SO_REUSEPORT to distribute workload to
* available sockets, which made it the equivalent of Linux's SO_REUSEPORT.
*
* AIX 7.2.5 added the feature that would add the capability to distribute
* incoming connections across all listening ports for SO_REUSEPORT.
*
* Solaris 11 supported SO_REUSEPORT, but it's implemented only for
* binding to the same address and port, without load balancing.
* Solaris 11.4 extended SO_REUSEPORT with the capability of load balancing.
* Since it's impossible to detect the Solaris 11.4 version via OS macros,
* so we check the presence of the socket option SO_FLOW_NAME that was first
* introduced to Solaris 11.4. */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)))
return UV__ERR(errno);
#else
(void) (fd);
(void) (on);
/* SO_REUSEPORTs do not have the capability of load balancing on platforms
* other than those mentioned above. The semantics are completely different,
* therefore we shouldn't enable it, but fail this operation to indicate that
* UV_TCP_REUSEPORT is not supported on these platforms. */
return UV_ENOTSUP;
#endif

return 0;
}

int uv__tcp_bind(uv_tcp_t* tcp,
const struct sockaddr* addr,
unsigned int addrlen,
Expand All @@ -167,6 +211,12 @@ int uv__tcp_bind(uv_tcp_t* tcp,
if (setsockopt(tcp->io_watcher.fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)))
return UV__ERR(errno);

if (flags & UV_TCP_REUSEPORT) {
err = uv__tcp_reuseport(tcp->io_watcher.fd);
if (err)
return err;
}

#ifndef __OpenBSD__
#ifdef IPV6_V6ONLY
if (addr->sa_family == AF_INET6) {
Expand Down
6 changes: 6 additions & 0 deletions src/win/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ static int uv__tcp_try_bind(uv_tcp_t* handle,
DWORD err;
int r;

/* There is no SO_REUSEPORT on Windows, Windows only knows SO_REUSEADDR.
* so we just return an error directly when UV_TCP_REUSEPORT is requested
* for binding the socket. */
if (flags & UV_TCP_REUSEPORT)
return ERROR_NOT_SUPPORTED;

if (handle->socket == INVALID_SOCKET) {
SOCKET sock;

Expand Down
3 changes: 3 additions & 0 deletions test/test-list.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ TEST_DECLARE (tcp_write_to_half_open_connection)
TEST_DECLARE (tcp_unexpected_read)
TEST_DECLARE (tcp_read_stop)
TEST_DECLARE (tcp_read_stop_start)
TEST_DECLARE (tcp_reuseport)
TEST_DECLARE (tcp_rst)
TEST_DECLARE (tcp_bind6_error_addrinuse)
TEST_DECLARE (tcp_bind6_error_addrnotavail)
Expand Down Expand Up @@ -765,6 +766,8 @@ TASK_LIST_START

TEST_ENTRY (tcp_read_stop_start)

TEST_ENTRY (tcp_reuseport)

TEST_ENTRY (tcp_rst)
TEST_HELPER (tcp_rst, tcp4_echo_server)

Expand Down
Loading

0 comments on commit d2d92b7

Please sign in to comment.