Skip to content

Commit

Permalink
Expose TTL (on handshake) to applications (#4602)
Browse files Browse the repository at this point in the history
* modify statistics

* add winuser hoplimit-ttl support

* rename to HandshakeTTL, change connection.c

* add datapath test to assert hoplimit be > 0

* add winkernel hoplimit

* add code to set TTL

* fix spacing

* implement raw socket parsing and add stubs for freeBSD and linux

* comment out unused variable to get rid of build warnings

* add epoll impl

* remove invalid asserts, add missing flag

* type cast to int

* set epoll hoplimit instead of ip_ttl

* ip hoplimit no exist on linux

* gonna lean on the CI to see if we are crashing the socket init unit tests

* comment out ipv6

* does IP_RECVTTL socket option exist for ipv6 on linux?

* IPV6_HOPLIMIT the way to go?

* stash; don't push

* try IPV6_RECVHOPLIMIT

* test code modifications

* update datapath to be os version aware

* add version checking for WS2022, bubble up info for the tests

* fix typo for epoll, add winkernel checks

* comment out printf to get winkernel to build

* update comment and remove enabled features

* increase timeout for netperf due to recent changes
  • Loading branch information
ProjectsByJackHe authored Nov 8, 2024
1 parent a275d74 commit c761886
Show file tree
Hide file tree
Showing 13 changed files with 270 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/netperf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
runs-on: windows-latest
steps:
- name: Run NetPerf Workflow
timeout-minutes: 90
timeout-minutes: 120
shell: pwsh
run: |
$url = "https://raw.githubusercontent.com/microsoft/netperf/main/run-workflow.ps1"
Expand Down
7 changes: 7 additions & 0 deletions src/core/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -5638,6 +5638,9 @@ QuicConnRecvDatagrams(

if (!IsDeferred) {
Connection->Stats.Recv.TotalBytes += Packet->BufferLength;
if (Connection->Stats.Handshake.HandshakeHopLimitTTL == 0) {
Connection->Stats.Handshake.HandshakeHopLimitTTL = Packet->HopLimitTTL;
}
QuicConnLogInFlowStats(Connection);

if (!CurrentPath->IsPeerValidated) {
Expand Down Expand Up @@ -6823,6 +6826,10 @@ QuicConnGetV2Statistics(
Stats->SendEcnCongestionCount = Connection->Stats.Send.EcnCongestionCount;
}

if (STATISTICS_HAS_FIELD(*StatsLength, HandshakeHopLimitTTL)) {
Stats->HandshakeHopLimitTTL = Connection->Stats.Handshake.HandshakeHopLimitTTL;
}

*StatsLength = CXPLAT_MIN(*StatsLength, sizeof(QUIC_STATISTICS_V2));

return QUIC_STATUS_SUCCESS;
Expand Down
1 change: 1 addition & 0 deletions src/core/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ typedef struct QUIC_CONN_STATS {
uint32_t ClientFlight1Bytes; // Sum of TLS payloads
uint32_t ServerFlight1Bytes; // Sum of TLS payloads
uint32_t ClientFlight2Bytes; // Sum of TLS payloads
uint8_t HandshakeHopLimitTTL; // TTL value in the initial packet of the handshake.
} Handshake;

struct {
Expand Down
2 changes: 2 additions & 0 deletions src/inc/msquic.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ typedef struct QUIC_STATISTICS_V2 {

uint32_t SendEcnCongestionCount; // Number of congestion events caused by ECN.

uint8_t HandshakeHopLimitTTL; // The TTL value in the initial packet of the handshake.

// N.B. New fields must be appended to end

} QUIC_STATISTICS_V2;
Expand Down
6 changes: 6 additions & 0 deletions src/inc/quic_datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ typedef struct CXPLAT_RECV_DATA {
//
uint8_t TypeOfService;

//
// TTL Hoplimit field of the IP header of the received packet on handshake.
//
uint8_t HopLimitTTL;

//
// Flags.
//
Expand Down Expand Up @@ -438,6 +443,7 @@ CxPlatDataPathUpdateConfig(
#define CXPLAT_DATAPATH_FEATURE_PORT_RESERVATIONS 0x0010
#define CXPLAT_DATAPATH_FEATURE_TCP 0x0020
#define CXPLAT_DATAPATH_FEATURE_RAW 0x0040
#define CXPLAT_DATAPATH_FEATURE_TTL 0x0080

//
// Queries the currently supported features of the datapath.
Expand Down
73 changes: 70 additions & 3 deletions src/platform/datapath_epoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ typedef struct CXPLAT_SEND_DATA {
} CXPLAT_SEND_DATA;

typedef struct CXPLAT_RECV_MSG_CONTROL_BUFFER {
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) +
2 * CMSG_SPACE(sizeof(int))];
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) + // IP_PKTINFO
2 * CMSG_SPACE(sizeof(int)) // TOS
+ CMSG_SPACE(sizeof(int))]; // IP_TTL
} CXPLAT_RECV_MSG_CONTROL_BUFFER;

#ifdef DEBUG
Expand Down Expand Up @@ -344,6 +345,10 @@ CxPlatDataPathCalculateFeatureSupport(
}

Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TCP;
//
// TTL should always be available / enabled on Linux.
//
Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TTL;
}

void
Expand Down Expand Up @@ -853,6 +858,52 @@ CxPlatSocketContextInitialize(
goto Exit;
}

//
// TTL should always be available / enabled on Linux.
//

//
// On Linux, IP_HOPLIMIT does not exist. So we will use IP_RECVTTL, IPV6_RECVHOPLIMIT instead.
//
Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IP,
IP_RECVTTL,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IP_RECVTTL) failed");
goto Exit;
}

Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IPV6,
IPV6_RECVHOPLIMIT,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IPV6_RECVHOPLIMIT) failed");
goto Exit;
}


#ifdef UDP_GRO
if (SocketContext->DatapathPartition->Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = TRUE;
Expand Down Expand Up @@ -1782,8 +1833,9 @@ CxPlatSocketContextRecvComplete(
BytesTransferred += RecvMsgHdr[CurrentMessage].msg_len;

uint8_t TOS = 0;
int HopLimitTTL = 0;
uint16_t SegmentLength = 0;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE, FoundTTL = FALSE;
QUIC_ADDR* LocalAddr = &IoBlock->Route.LocalAddress;
QUIC_ADDR* RemoteAddr = &IoBlock->Route.RemoteAddress;
CxPlatConvertFromMappedV6(RemoteAddr, RemoteAddr);
Expand All @@ -1808,6 +1860,11 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
FoundTTL = TRUE;
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1816,6 +1873,11 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
FoundTTL = TRUE;
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1833,6 +1895,10 @@ CxPlatSocketContextRecvComplete(

CXPLAT_FRE_ASSERT(FoundLocalAddr);
CXPLAT_FRE_ASSERT(FoundTOS);
//
// TTL should always be available/enabled on Linux.
//
CXPLAT_FRE_ASSERT(FoundTTL);

QuicTraceEvent(
DatapathRecv,
Expand Down Expand Up @@ -1872,6 +1938,7 @@ CxPlatSocketContextRecvComplete(
}
RecvData->PartitionIndex = SocketContext->DatapathPartition->PartitionIndex;
RecvData->TypeOfService = TOS;
RecvData->HopLimitTTL = (uint8_t)HopLimitTTL;
RecvData->Allocated = TRUE;
RecvData->Route->DatapathType = RecvData->DatapathType = CXPLAT_DATAPATH_TYPE_USER;
RecvData->QueuedOnConnection = FALSE;
Expand Down
4 changes: 4 additions & 0 deletions src/platform/datapath_kqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,9 @@ CxPlatDataPathGetSupportedFeatures(
_In_ CXPLAT_DATAPATH* Datapath
)
{
//
// Intentionally not enabling Feature_TTL on MacOS for now.
//
return Datapath->Features;
}

Expand Down Expand Up @@ -1127,6 +1130,7 @@ CxPlatSocketContextRecvComplete(

RecvPacket->Route->Queue = SocketContext;
RecvPacket->TypeOfService = 0;
RecvPacket->HopLimitTTL = 0; // TODO: We are not supporting this on MacOS (yet) unless there's a business need.

struct cmsghdr *CMsg;
for (CMsg = CMSG_FIRSTHDR(&SocketContext->RecvMsgHdr);
Expand Down
5 changes: 4 additions & 1 deletion src/platform/datapath_raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ RawDataPathGetSupportedFeatures(
)
{
UNREFERENCED_PARAMETER(Datapath);
return CXPLAT_DATAPATH_FEATURE_RAW;
//
// TTL should always be available / enabled for XDP.
//
return CXPLAT_DATAPATH_FEATURE_RAW | CXPLAT_DATAPATH_FEATURE_TTL;
}

_IRQL_requires_max_(DISPATCH_LEVEL)
Expand Down
2 changes: 2 additions & 0 deletions src/platform/datapath_raw_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ CxPlatDpRawParseIPv4(
}

Packet->TypeOfService = IP->EcnField;
Packet->HopLimitTTL = IP->TimeToLive;
Packet->Route->RemoteAddress.Ipv4.sin_family = AF_INET;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv4.sin_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv4.sin_family = AF_INET;
Expand Down Expand Up @@ -366,6 +367,7 @@ CxPlatDpRawParseIPv6(
VersionClassEcnFlow.Value = CxPlatByteSwapUint32(IP->VersionClassEcnFlow);

Packet->TypeOfService = (uint8_t)VersionClassEcnFlow.EcnField;
Packet->HopLimitTTL = IP->HopLimit;
Packet->Route->RemoteAddress.Ipv6.sin6_family = AF_INET6;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv6.sin6_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv6.sin6_family = AF_INET6;
Expand Down
70 changes: 70 additions & 0 deletions src/platform/datapath_winkernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
--*/

#include "platform_internal.h"

#ifdef QUIC_CLOG
#include "datapath_winkernel.c.clog.h"
#endif
Expand Down Expand Up @@ -765,6 +766,25 @@ CxPlatDataPathQuerySockoptSupport(

} while (FALSE);

do {
RTL_OSVERSIONINFOW osInfo;
RtlZeroMemory(&osInfo, sizeof(osInfo));
osInfo.dwOSVersionInfoSize = sizeof(osInfo);
NTSTATUS status = RtlGetVersion(&osInfo);
if (NT_SUCCESS(status)) {
DWORD BuildNumber = osInfo.dwBuildNumber;
//
// Some USO/URO bug blocks TTL feature support on Windows Server 2022.
//
if (BuildNumber == 20348) {
break;
}
} else {
break;
}
Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TTL;
} while (FALSE);

Error:

if (UdpSocket != NULL) {
Expand Down Expand Up @@ -1672,6 +1692,46 @@ CxPlatSocketCreateUdp(
goto Error;
}

if (Datapath->Features & CXPLAT_DATAPATH_FEATURE_TTL) {
Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IP_HOPLIMIT,
IPPROTO_IP,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IP_HOPLIMIT");
goto Error;
}

Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IPV6_HOPLIMIT,
IPPROTO_IPV6,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IPV6_HOPLIMIT");
goto Error;
}
}

if (Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = MAX_URO_PAYLOAD_LENGTH;
Status =
Expand Down Expand Up @@ -2200,6 +2260,7 @@ CxPlatDataPathSocketReceive(
SOCKADDR_INET RemoteAddr;
UINT16 MessageLength = 0;
INT ECN = 0;
INT HopLimitTTL = 0;

//
// Parse the ancillary data for all the per datagram information that we
Expand Down Expand Up @@ -2231,6 +2292,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IPV6_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_IP) {
if (CMsg->cmsg_type == IP_PKTINFO) {
Expand All @@ -2250,6 +2315,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IP_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_UDP) {
if (CMsg->cmsg_type == UDP_COALESCED_INFO) {
Expand Down Expand Up @@ -2416,6 +2485,7 @@ CxPlatDataPathSocketReceive(
Datagram->Data.Next = NULL;
Datagram->Data.PartitionIndex = (uint16_t)(CurProcNumber % Binding->Datapath->ProcCount);
Datagram->Data.TypeOfService = (uint8_t)ECN;
Datagram->Data.HopLimitTTL = (uint8_t)HopLimitTTL;
Datagram->Data.Allocated = TRUE;
Datagram->Data.QueuedOnConnection = FALSE;

Expand Down
Loading

0 comments on commit c761886

Please sign in to comment.