Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose TTL (on handshake) to applications #4602

Merged
merged 28 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3eea3d0
modify statistics
ProjectsByJackHe Oct 4, 2024
2037673
add winuser hoplimit-ttl support
ProjectsByJackHe Oct 7, 2024
e3357a0
rename to HandshakeTTL, change connection.c
ProjectsByJackHe Oct 8, 2024
4ee3e03
add datapath test to assert hoplimit be > 0
ProjectsByJackHe Oct 8, 2024
89bf07a
add winkernel hoplimit
ProjectsByJackHe Oct 8, 2024
0f3dd0f
add code to set TTL
ProjectsByJackHe Oct 8, 2024
ecd56a7
fix spacing
ProjectsByJackHe Oct 9, 2024
7c08057
implement raw socket parsing and add stubs for freeBSD and linux
ProjectsByJackHe Oct 15, 2024
88fd13e
comment out unused variable to get rid of build warnings
ProjectsByJackHe Oct 15, 2024
f061498
add epoll impl
ProjectsByJackHe Oct 16, 2024
576daf7
remove invalid asserts, add missing flag
ProjectsByJackHe Oct 16, 2024
57e9af3
type cast to int
ProjectsByJackHe Oct 16, 2024
a277d70
set epoll hoplimit instead of ip_ttl
ProjectsByJackHe Oct 16, 2024
3fa6aac
ip hoplimit no exist on linux
ProjectsByJackHe Oct 16, 2024
c79d192
gonna lean on the CI to see if we are crashing the socket init unit t…
ProjectsByJackHe Oct 28, 2024
0195cca
comment out ipv6
ProjectsByJackHe Oct 28, 2024
d2db352
does IP_RECVTTL socket option exist for ipv6 on linux?
ProjectsByJackHe Oct 28, 2024
3535b53
IPV6_HOPLIMIT the way to go?
ProjectsByJackHe Oct 29, 2024
800c484
stash; don't push
ProjectsByJackHe Oct 30, 2024
a642e97
try IPV6_RECVHOPLIMIT
ProjectsByJackHe Oct 31, 2024
f871e08
test code modifications
ProjectsByJackHe Nov 1, 2024
4e3f94e
update datapath to be os version aware
ProjectsByJackHe Nov 1, 2024
0c44652
add version checking for WS2022, bubble up info for the tests
ProjectsByJackHe Nov 1, 2024
aee8df6
fix typo for epoll, add winkernel checks
ProjectsByJackHe Nov 1, 2024
b248ef0
comment out printf to get winkernel to build
ProjectsByJackHe Nov 1, 2024
a0dffdb
update comment and remove enabled features
ProjectsByJackHe Nov 4, 2024
9e6d751
increase timeout for netperf due to recent changes
ProjectsByJackHe Nov 5, 2024
80f57ff
Merge remote-tracking branch 'origin/main' into jackhe/add-ttl-to-sta…
ProjectsByJackHe Nov 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/netperf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
runs-on: windows-latest
steps:
- name: Run NetPerf Workflow
timeout-minutes: 90
timeout-minutes: 120
shell: pwsh
run: |
$url = "https://raw.githubusercontent.com/microsoft/netperf/main/run-workflow.ps1"
Expand Down
7 changes: 7 additions & 0 deletions src/core/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -5638,6 +5638,9 @@ QuicConnRecvDatagrams(

if (!IsDeferred) {
Connection->Stats.Recv.TotalBytes += Packet->BufferLength;
if (Connection->Stats.Handshake.HandshakeHopLimitTTL == 0) {
Connection->Stats.Handshake.HandshakeHopLimitTTL = Packet->HopLimitTTL;
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved
}
QuicConnLogInFlowStats(Connection);

if (!CurrentPath->IsPeerValidated) {
Expand Down Expand Up @@ -6823,6 +6826,10 @@ QuicConnGetV2Statistics(
Stats->SendEcnCongestionCount = Connection->Stats.Send.EcnCongestionCount;
}

if (STATISTICS_HAS_FIELD(*StatsLength, HandshakeHopLimitTTL)) {
Stats->HandshakeHopLimitTTL = Connection->Stats.Handshake.HandshakeHopLimitTTL;
}

*StatsLength = CXPLAT_MIN(*StatsLength, sizeof(QUIC_STATISTICS_V2));

return QUIC_STATUS_SUCCESS;
Expand Down
1 change: 1 addition & 0 deletions src/core/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ typedef struct QUIC_CONN_STATS {
uint32_t ClientFlight1Bytes; // Sum of TLS payloads
uint32_t ServerFlight1Bytes; // Sum of TLS payloads
uint32_t ClientFlight2Bytes; // Sum of TLS payloads
uint8_t HandshakeHopLimitTTL; // TTL value in the initial packet of the handshake.
} Handshake;

struct {
Expand Down
2 changes: 2 additions & 0 deletions src/inc/msquic.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,8 @@ typedef struct QUIC_STATISTICS_V2 {

uint32_t SendEcnCongestionCount; // Number of congestion events caused by ECN.

uint8_t HandshakeHopLimitTTL; // The TTL value in the initial packet of the handshake.

// N.B. New fields must be appended to end

} QUIC_STATISTICS_V2;
Expand Down
6 changes: 6 additions & 0 deletions src/inc/quic_datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ typedef struct CXPLAT_RECV_DATA {
//
uint8_t TypeOfService;

//
// TTL Hoplimit field of the IP header of the received packet on handshake.
//
uint8_t HopLimitTTL;
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved

//
// Flags.
//
Expand Down Expand Up @@ -438,6 +443,7 @@ CxPlatDataPathUpdateConfig(
#define CXPLAT_DATAPATH_FEATURE_PORT_RESERVATIONS 0x0010
#define CXPLAT_DATAPATH_FEATURE_TCP 0x0020
#define CXPLAT_DATAPATH_FEATURE_RAW 0x0040
#define CXPLAT_DATAPATH_FEATURE_TTL 0x0080

//
// Queries the currently supported features of the datapath.
Expand Down
73 changes: 70 additions & 3 deletions src/platform/datapath_epoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ typedef struct CXPLAT_SEND_DATA {
} CXPLAT_SEND_DATA;

typedef struct CXPLAT_RECV_MSG_CONTROL_BUFFER {
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) +
2 * CMSG_SPACE(sizeof(int))];
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) + // IP_PKTINFO
2 * CMSG_SPACE(sizeof(int)) // TOS
+ CMSG_SPACE(sizeof(int))]; // IP_TTL
Comment on lines +202 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, this should have just been 3 * CMSG_SPACE(sizeof(int)

} CXPLAT_RECV_MSG_CONTROL_BUFFER;

#ifdef DEBUG
Expand Down Expand Up @@ -344,6 +345,10 @@ CxPlatDataPathCalculateFeatureSupport(
}

Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TCP;
//
// TTL should always be available / enabled on Linux.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary

//
Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TTL;
}

void
Expand Down Expand Up @@ -853,6 +858,52 @@ CxPlatSocketContextInitialize(
goto Exit;
}

//
// TTL should always be available / enabled on Linux.
//

//
// On Linux, IP_HOPLIMIT does not exist. So we will use IP_RECVTTL, IPV6_RECVHOPLIMIT instead.
//
Comment on lines +861 to +867
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments unnecessary. We don't have it for all the others.

Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IP,
IP_RECVTTL,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IP_RECVTTL) failed");
goto Exit;
}

Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IPV6,
IPV6_RECVHOPLIMIT,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IPV6_RECVHOPLIMIT) failed");
goto Exit;
}


#ifdef UDP_GRO
if (SocketContext->DatapathPartition->Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = TRUE;
Expand Down Expand Up @@ -1782,8 +1833,9 @@ CxPlatSocketContextRecvComplete(
BytesTransferred += RecvMsgHdr[CurrentMessage].msg_len;

uint8_t TOS = 0;
int HopLimitTTL = 0;
uint16_t SegmentLength = 0;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE, FoundTTL = FALSE;
QUIC_ADDR* LocalAddr = &IoBlock->Route.LocalAddress;
QUIC_ADDR* RemoteAddr = &IoBlock->Route.RemoteAddress;
CxPlatConvertFromMappedV6(RemoteAddr, RemoteAddr);
Expand All @@ -1808,6 +1860,11 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
FoundTTL = TRUE;
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1816,6 +1873,11 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved
FoundTTL = TRUE;
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1833,6 +1895,10 @@ CxPlatSocketContextRecvComplete(

CXPLAT_FRE_ASSERT(FoundLocalAddr);
CXPLAT_FRE_ASSERT(FoundTOS);
//
// TTL should always be available/enabled on Linux.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment not really needed

//
CXPLAT_FRE_ASSERT(FoundTTL);

QuicTraceEvent(
DatapathRecv,
Expand Down Expand Up @@ -1872,6 +1938,7 @@ CxPlatSocketContextRecvComplete(
}
RecvData->PartitionIndex = SocketContext->DatapathPartition->PartitionIndex;
RecvData->TypeOfService = TOS;
RecvData->HopLimitTTL = (uint8_t)HopLimitTTL;
RecvData->Allocated = TRUE;
RecvData->Route->DatapathType = RecvData->DatapathType = CXPLAT_DATAPATH_TYPE_USER;
RecvData->QueuedOnConnection = FALSE;
Expand Down
4 changes: 4 additions & 0 deletions src/platform/datapath_kqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,9 @@ CxPlatDataPathGetSupportedFeatures(
_In_ CXPLAT_DATAPATH* Datapath
)
{
//
// Intentionally not enabling Feature_TTL on MacOS for now.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment not needed

//
return Datapath->Features;
}

Expand Down Expand Up @@ -1127,6 +1130,7 @@ CxPlatSocketContextRecvComplete(

RecvPacket->Route->Queue = SocketContext;
RecvPacket->TypeOfService = 0;
RecvPacket->HopLimitTTL = 0; // TODO: We are not supporting this on MacOS (yet) unless there's a business need.

struct cmsghdr *CMsg;
for (CMsg = CMSG_FIRSTHDR(&SocketContext->RecvMsgHdr);
Expand Down
5 changes: 4 additions & 1 deletion src/platform/datapath_raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ RawDataPathGetSupportedFeatures(
)
{
UNREFERENCED_PARAMETER(Datapath);
return CXPLAT_DATAPATH_FEATURE_RAW;
//
// TTL should always be available / enabled for XDP.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment not really needed

//
return CXPLAT_DATAPATH_FEATURE_RAW | CXPLAT_DATAPATH_FEATURE_TTL;
}

_IRQL_requires_max_(DISPATCH_LEVEL)
Expand Down
2 changes: 2 additions & 0 deletions src/platform/datapath_raw_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ CxPlatDpRawParseIPv4(
}

Packet->TypeOfService = IP->EcnField;
Packet->HopLimitTTL = IP->TimeToLive;
Packet->Route->RemoteAddress.Ipv4.sin_family = AF_INET;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv4.sin_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv4.sin_family = AF_INET;
Expand Down Expand Up @@ -366,6 +367,7 @@ CxPlatDpRawParseIPv6(
VersionClassEcnFlow.Value = CxPlatByteSwapUint32(IP->VersionClassEcnFlow);

Packet->TypeOfService = (uint8_t)VersionClassEcnFlow.EcnField;
Packet->HopLimitTTL = IP->HopLimit;
Packet->Route->RemoteAddress.Ipv6.sin6_family = AF_INET6;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv6.sin6_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv6.sin6_family = AF_INET6;
Expand Down
70 changes: 70 additions & 0 deletions src/platform/datapath_winkernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
--*/

#include "platform_internal.h"

#ifdef QUIC_CLOG
#include "datapath_winkernel.c.clog.h"
#endif
Expand Down Expand Up @@ -765,6 +766,25 @@ CxPlatDataPathQuerySockoptSupport(

} while (FALSE);

do {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, a do/while loop here is overkill. You could have just had the if statement, and only added the feature flag if the status was success and build not equal to 20348.

RTL_OSVERSIONINFOW osInfo;
RtlZeroMemory(&osInfo, sizeof(osInfo));
osInfo.dwOSVersionInfoSize = sizeof(osInfo);
NTSTATUS status = RtlGetVersion(&osInfo);
if (NT_SUCCESS(status)) {
DWORD BuildNumber = osInfo.dwBuildNumber;
//
// Some USO/URO bug blocks TTL feature support on Windows Server 2022.
//
if (BuildNumber == 20348) {
break;
}
} else {
break;
}
Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TTL;
} while (FALSE);

Error:

if (UdpSocket != NULL) {
Expand Down Expand Up @@ -1672,6 +1692,46 @@ CxPlatSocketCreateUdp(
goto Error;
}

if (Datapath->Features & CXPLAT_DATAPATH_FEATURE_TTL) {
Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IP_HOPLIMIT,
IPPROTO_IP,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IP_HOPLIMIT");
goto Error;
}

Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IPV6_HOPLIMIT,
IPPROTO_IPV6,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IPV6_HOPLIMIT");
goto Error;
}
}

if (Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = MAX_URO_PAYLOAD_LENGTH;
Status =
Expand Down Expand Up @@ -2200,6 +2260,7 @@ CxPlatDataPathSocketReceive(
SOCKADDR_INET RemoteAddr;
UINT16 MessageLength = 0;
INT ECN = 0;
INT HopLimitTTL = 0;

//
// Parse the ancillary data for all the per datagram information that we
Expand Down Expand Up @@ -2231,6 +2292,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IPV6_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_IP) {
if (CMsg->cmsg_type == IP_PKTINFO) {
Expand All @@ -2250,6 +2315,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IP_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_UDP) {
if (CMsg->cmsg_type == UDP_COALESCED_INFO) {
Expand Down Expand Up @@ -2416,6 +2485,7 @@ CxPlatDataPathSocketReceive(
Datagram->Data.Next = NULL;
Datagram->Data.PartitionIndex = (uint16_t)(CurProcNumber % Binding->Datapath->ProcCount);
Datagram->Data.TypeOfService = (uint8_t)ECN;
Datagram->Data.HopLimitTTL = (uint8_t)HopLimitTTL;
Datagram->Data.Allocated = TRUE;
Datagram->Data.QueuedOnConnection = FALSE;

Expand Down
Loading
Loading