Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose TTL (on handshake) to applications #4602

Merged
merged 28 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3eea3d0
modify statistics
ProjectsByJackHe Oct 4, 2024
2037673
add winuser hoplimit-ttl support
ProjectsByJackHe Oct 7, 2024
e3357a0
rename to HandshakeTTL, change connection.c
ProjectsByJackHe Oct 8, 2024
4ee3e03
add datapath test to assert hoplimit be > 0
ProjectsByJackHe Oct 8, 2024
89bf07a
add winkernel hoplimit
ProjectsByJackHe Oct 8, 2024
0f3dd0f
add code to set TTL
ProjectsByJackHe Oct 8, 2024
ecd56a7
fix spacing
ProjectsByJackHe Oct 9, 2024
7c08057
implement raw socket parsing and add stubs for freeBSD and linux
ProjectsByJackHe Oct 15, 2024
88fd13e
comment out unused variable to get rid of build warnings
ProjectsByJackHe Oct 15, 2024
f061498
add epoll impl
ProjectsByJackHe Oct 16, 2024
576daf7
remove invalid asserts, add missing flag
ProjectsByJackHe Oct 16, 2024
57e9af3
type cast to int
ProjectsByJackHe Oct 16, 2024
a277d70
set epoll hoplimit instead of ip_ttl
ProjectsByJackHe Oct 16, 2024
3fa6aac
ip hoplimit no exist on linux
ProjectsByJackHe Oct 16, 2024
c79d192
gonna lean on the CI to see if we are crashing the socket init unit t…
ProjectsByJackHe Oct 28, 2024
0195cca
comment out ipv6
ProjectsByJackHe Oct 28, 2024
d2db352
does IP_RECVTTL socket option exist for ipv6 on linux?
ProjectsByJackHe Oct 28, 2024
3535b53
IPV6_HOPLIMIT the way to go?
ProjectsByJackHe Oct 29, 2024
800c484
stash; don't push
ProjectsByJackHe Oct 30, 2024
a642e97
try IPV6_RECVHOPLIMIT
ProjectsByJackHe Oct 31, 2024
f871e08
test code modifications
ProjectsByJackHe Nov 1, 2024
4e3f94e
update datapath to be os version aware
ProjectsByJackHe Nov 1, 2024
0c44652
add version checking for WS2022, bubble up info for the tests
ProjectsByJackHe Nov 1, 2024
aee8df6
fix typo for epoll, add winkernel checks
ProjectsByJackHe Nov 1, 2024
b248ef0
comment out printf to get winkernel to build
ProjectsByJackHe Nov 1, 2024
a0dffdb
update comment and remove enabled features
ProjectsByJackHe Nov 4, 2024
9e6d751
increase timeout for netperf due to recent changes
ProjectsByJackHe Nov 5, 2024
80f57ff
Merge remote-tracking branch 'origin/main' into jackhe/add-ttl-to-sta…
ProjectsByJackHe Nov 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/core/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -5637,6 +5637,9 @@ QuicConnRecvDatagrams(

if (!IsDeferred) {
Connection->Stats.Recv.TotalBytes += Packet->BufferLength;
if (Connection->Stats.Handshake.HandshakeHopLimitTTL == 0) {
Connection->Stats.Handshake.HandshakeHopLimitTTL = Packet->HopLimitTTL;
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved
}
QuicConnLogInFlowStats(Connection);

if (!CurrentPath->IsPeerValidated) {
Expand Down Expand Up @@ -6822,6 +6825,10 @@ QuicConnGetV2Statistics(
Stats->SendEcnCongestionCount = Connection->Stats.Send.EcnCongestionCount;
}

if (STATISTICS_HAS_FIELD(*StatsLength, HandshakeHopLimitTTL)) {
Stats->HandshakeHopLimitTTL = Connection->Stats.Handshake.HandshakeHopLimitTTL;
}

*StatsLength = CXPLAT_MIN(*StatsLength, sizeof(QUIC_STATISTICS_V2));

return QUIC_STATUS_SUCCESS;
Expand Down
1 change: 1 addition & 0 deletions src/core/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ typedef struct QUIC_CONN_STATS {
uint32_t ClientFlight1Bytes; // Sum of TLS payloads
uint32_t ServerFlight1Bytes; // Sum of TLS payloads
uint32_t ClientFlight2Bytes; // Sum of TLS payloads
uint8_t HandshakeHopLimitTTL; // TTL value in the initial packet of the handshake.
} Handshake;

struct {
Expand Down
2 changes: 2 additions & 0 deletions src/inc/msquic.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,8 @@ typedef struct QUIC_STATISTICS_V2 {

uint32_t SendEcnCongestionCount; // Number of congestion events caused by ECN.

uint8_t HandshakeHopLimitTTL; // The TTL value in the initial packet of the handshake.

// N.B. New fields must be appended to end

} QUIC_STATISTICS_V2;
Expand Down
5 changes: 5 additions & 0 deletions src/inc/quic_datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ typedef struct CXPLAT_RECV_DATA {
//
uint8_t TypeOfService;

//
// TTL Hoplimit field of the IP header of the received packet on handshake.
//
uint8_t HopLimitTTL;
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved

//
// Flags.
//
Expand Down
61 changes: 58 additions & 3 deletions src/platform/datapath_epoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ typedef struct CXPLAT_SEND_DATA {
} CXPLAT_SEND_DATA;

typedef struct CXPLAT_RECV_MSG_CONTROL_BUFFER {
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) +
2 * CMSG_SPACE(sizeof(int))];
char Data[CMSG_SPACE(sizeof(struct in6_pktinfo)) + // IP_PKTINFO
2 * CMSG_SPACE(sizeof(int)) // TOS
+ CMSG_SPACE(sizeof(int))]; // IP_TTL
Comment on lines +202 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, this should have just been 3 * CMSG_SPACE(sizeof(int)

} CXPLAT_RECV_MSG_CONTROL_BUFFER;

#ifdef DEBUG
Expand Down Expand Up @@ -853,6 +854,46 @@ CxPlatSocketContextInitialize(
goto Exit;
}

// On Linux, IP_TTL is used instead of IP_HOPLIMIT on Windows.
Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IP,
IP_TTL,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IP_TTL) failed");
goto Exit;
}

// On Linux, IPV6_UNICAST_HOPS is used instead of IPV6_HOPLIMIT on Windows.
Option = TRUE;
Result =
setsockopt(
SocketContext->SocketFd,
IPPROTO_IPV6,
IPV6_UNICAST_HOPS,
(const void*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
Status = errno;
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"setsockopt(IPV6_UNICAST_HOPS) failed");
goto Exit;
}

#ifdef UDP_GRO
if (SocketContext->DatapathPartition->Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = TRUE;
Expand Down Expand Up @@ -1782,8 +1823,9 @@ CxPlatSocketContextRecvComplete(
BytesTransferred += RecvMsgHdr[CurrentMessage].msg_len;

uint8_t TOS = 0;
uint8_t HopLimitTTL = 0;
uint16_t SegmentLength = 0;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE;
BOOLEAN FoundLocalAddr = FALSE, FoundTOS = FALSE, FoundTTL = FALSE;
QUIC_ADDR* LocalAddr = &IoBlock->Route.LocalAddress;
QUIC_ADDR* RemoteAddr = &IoBlock->Route.RemoteAddress;
CxPlatConvertFromMappedV6(RemoteAddr, RemoteAddr);
Expand All @@ -1808,6 +1850,12 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
HopLimitTTL = *(uint8_t*)CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
FoundTTL = TRUE;
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1816,6 +1864,11 @@ CxPlatSocketContextRecvComplete(
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
TOS = *(uint8_t*)CMSG_DATA(CMsg);
FoundTOS = TRUE;
} else if (CMsg->cmsg_type == IP_TTL) {
CXPLAT_DBG_ASSERT_CMSG(CMsg, uint8_t);
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved
HopLimitTTL = *(uint8_t*)CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved
} else {
CXPLAT_DBG_ASSERT(FALSE);
}
Expand All @@ -1833,6 +1886,7 @@ CxPlatSocketContextRecvComplete(

CXPLAT_FRE_ASSERT(FoundLocalAddr);
CXPLAT_FRE_ASSERT(FoundTOS);
CXPLAT_FRE_ASSERT(FoundTTL);

QuicTraceEvent(
DatapathRecv,
Expand Down Expand Up @@ -1872,6 +1926,7 @@ CxPlatSocketContextRecvComplete(
}
RecvData->PartitionIndex = SocketContext->DatapathPartition->PartitionIndex;
RecvData->TypeOfService = TOS;
RecvData->HopLimitTTL = HopLimitTTL;
RecvData->Allocated = TRUE;
RecvData->Route->DatapathType = RecvData->DatapathType = CXPLAT_DATAPATH_TYPE_USER;
RecvData->QueuedOnConnection = FALSE;
Expand Down
1 change: 1 addition & 0 deletions src/platform/datapath_kqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ CxPlatSocketContextRecvComplete(

RecvPacket->Route->Queue = SocketContext;
RecvPacket->TypeOfService = 0;
RecvPacket->HopLimitTTL = 0; // TODO: Check to see if it's even possible to get this information on macOS and FreeBSD. If so, implement it. Otherwise, document that it's not supported.
ProjectsByJackHe marked this conversation as resolved.
Show resolved Hide resolved

struct cmsghdr *CMsg;
for (CMsg = CMSG_FIRSTHDR(&SocketContext->RecvMsgHdr);
Expand Down
2 changes: 2 additions & 0 deletions src/platform/datapath_raw_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ CxPlatDpRawParseIPv4(
}

Packet->TypeOfService = IP->EcnField;
Packet->HopLimitTTL = IP->TimeToLive;
Packet->Route->RemoteAddress.Ipv4.sin_family = AF_INET;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv4.sin_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv4.sin_family = AF_INET;
Expand Down Expand Up @@ -366,6 +367,7 @@ CxPlatDpRawParseIPv6(
VersionClassEcnFlow.Value = CxPlatByteSwapUint32(IP->VersionClassEcnFlow);

Packet->TypeOfService = (uint8_t)VersionClassEcnFlow.EcnField;
Packet->HopLimitTTL = IP->HopLimit;
Packet->Route->RemoteAddress.Ipv6.sin6_family = AF_INET6;
CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv6.sin6_addr, IP->Source, sizeof(IP->Source));
Packet->Route->LocalAddress.Ipv6.sin6_family = AF_INET6;
Expand Down
48 changes: 48 additions & 0 deletions src/platform/datapath_winkernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,44 @@ CxPlatSocketCreateUdp(
goto Error;
}

Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IP_HOPLIMIT,
IPPROTO_IP,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IP_HOPLIMIT");
goto Error;
}

Option = TRUE;
Status =
CxPlatDataPathSetControlSocket(
Binding,
WskSetOption,
IPV6_HOPLIMIT,
IPPROTO_IPV6,
sizeof(Option),
&Option);
if (QUIC_FAILED(Status)) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Binding,
Status,
"Set IPV6_HOPLIMIT");
goto Error;
}

if (Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) {
Option = MAX_URO_PAYLOAD_LENGTH;
Status =
Expand Down Expand Up @@ -2200,6 +2238,7 @@ CxPlatDataPathSocketReceive(
SOCKADDR_INET RemoteAddr;
UINT16 MessageLength = 0;
INT ECN = 0;
INT HopLimitTTL = 0;

//
// Parse the ancillary data for all the per datagram information that we
Expand Down Expand Up @@ -2231,6 +2270,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IPV6_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_IP) {
if (CMsg->cmsg_type == IP_PKTINFO) {
Expand All @@ -2250,6 +2293,10 @@ CxPlatDataPathSocketReceive(
} else if (CMsg->cmsg_type == IP_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_UDP) {
if (CMsg->cmsg_type == UDP_COALESCED_INFO) {
Expand Down Expand Up @@ -2416,6 +2463,7 @@ CxPlatDataPathSocketReceive(
Datagram->Data.Next = NULL;
Datagram->Data.PartitionIndex = (uint16_t)(CurProcNumber % Binding->Datapath->ProcCount);
Datagram->Data.TypeOfService = (uint8_t)ECN;
Datagram->Data.HopLimitTTL = (uint8_t)HopLimitTTL;
Datagram->Data.Allocated = TRUE;
Datagram->Data.QueuedOnConnection = FALSE;

Expand Down
54 changes: 52 additions & 2 deletions src/platform/datapath_winuser.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ typedef struct DATAPATH_RX_IO_BLOCK {
RIO_CMSG_BASE_SIZE +
WSA_CMSG_SPACE(sizeof(IN6_PKTINFO)) + // IP_PKTINFO
WSA_CMSG_SPACE(sizeof(DWORD)) + // UDP_COALESCED_INFO
WSA_CMSG_SPACE(sizeof(INT)) // IP_ECN
WSA_CMSG_SPACE(sizeof(INT)) + // IP_ECN
WSA_CMSG_SPACE(sizeof(INT)) // IP_HOP_LIMIT
];

} DATAPATH_RX_IO_BLOCK;
Expand Down Expand Up @@ -1403,6 +1404,46 @@ SocketCreateUdp(
goto Error;
}

Option = TRUE;
Result =
setsockopt(
SocketProc->Socket,
IPPROTO_IP,
IP_HOPLIMIT,
(char*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
int WsaError = WSAGetLastError();
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Socket,
WsaError,
"Set IP_HOPLIMIT");
Status = HRESULT_FROM_WIN32(WsaError);
goto Error;
}

Option = TRUE;
Result =
setsockopt(
SocketProc->Socket,
IPPROTO_IPV6,
IPV6_HOPLIMIT,
(char*)&Option,
sizeof(Option));
if (Result == SOCKET_ERROR) {
int WsaError = WSAGetLastError();
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Socket,
WsaError,
"Set IPV6_HOPLIMIT");
Status = HRESULT_FROM_WIN32(WsaError);
goto Error;
}

//
// The socket is shared by multiple endpoints, so increase the receive
// buffer size.
Expand Down Expand Up @@ -3103,7 +3144,7 @@ CxPlatDataPathUdpRecvComplete(
ULONG MessageCount = 0;
BOOLEAN IsCoalesced = FALSE;
INT ECN = 0;

INT HopLimitTTL = 0;
if (SocketProc->Parent->UseRio) {
PRIO_CMSG_BUFFER RioRcvMsg = (PRIO_CMSG_BUFFER)IoBlock->ControlBuf;
IoBlock->WsaMsgHdr.Control.buf = IoBlock->ControlBuf + RIO_CMSG_BASE_SIZE;
Expand All @@ -3126,6 +3167,10 @@ CxPlatDataPathUdpRecvComplete(
} else if (CMsg->cmsg_type == IPV6_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IPV6_HOPLIMIT) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_IP) {
if (CMsg->cmsg_type == IP_PKTINFO) {
Expand All @@ -3138,6 +3183,10 @@ CxPlatDataPathUdpRecvComplete(
} else if (CMsg->cmsg_type == IP_ECN) {
ECN = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(ECN < UINT8_MAX);
} else if (CMsg->cmsg_type == IP_TTL) {
HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg);
CXPLAT_DBG_ASSERT(HopLimitTTL < 256);
CXPLAT_DBG_ASSERT(HopLimitTTL > 0);
}
} else if (CMsg->cmsg_level == IPPROTO_UDP) {
if (CMsg->cmsg_type == UDP_COALESCED_INFO) {
Expand Down Expand Up @@ -3195,6 +3244,7 @@ CxPlatDataPathUdpRecvComplete(
Datagram->PartitionIndex =
SocketProc->DatapathProc->PartitionIndex % SocketProc->DatapathProc->Datapath->PartitionCount;
Datagram->TypeOfService = (uint8_t)ECN;
Datagram->HopLimitTTL = (uint8_t) HopLimitTTL;
Datagram->Allocated = TRUE;
Datagram->Route->DatapathType = Datagram->DatapathType = CXPLAT_DATAPATH_TYPE_USER;
Datagram->QueuedOnConnection = FALSE;
Expand Down
1 change: 1 addition & 0 deletions src/platform/unittest/DataPathTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ struct DataPathTest : public ::testing::TestWithParam<int32_t>
while (RecvData != NULL) {
ASSERT_EQ(RecvData->BufferLength, ExpectedDataSize);
ASSERT_EQ(0, memcmp(RecvData->Buffer, ExpectedData, ExpectedDataSize));
ASSERT_TRUE(RecvData->HopLimitTTL > 0);

if (RecvData->Route->LocalAddress.Ipv4.sin_port == RecvContext->DestinationAddress.Ipv4.sin_port) {

Expand Down
3 changes: 3 additions & 0 deletions src/test/lib/HandshakeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ QuicTestConnect(
}
TEST_TRUE(Client.GetIsConnected());

// After handshake, check and see if we have cached the TTL of the handshake packet.
TEST_TRUE(Client.GetStatistics().HandshakeHopLimitTTL > 0);

TEST_NOT_EQUAL(nullptr, Server);
Server->SetSslKeyLogFilePath();
if (!Server->WaitForConnectionComplete()) {
Expand Down
Loading