udp,gre: log actionable PMTU hint on EMSGSIZE

When a send fails with EMSGSIZE, log the packet size and suggest
reducing the application payload. Rate-limited to one message per
peer per 5 seconds. Both simple-profile and main-profile send paths
use the same helper.
This commit is contained in:
Sergio Ammirata
2026-05-24 03:31:39 -04:00
parent 0b8d3d849a
commit 00ee7c4027
4 changed files with 58 additions and 4 deletions
+2 -2
View File
@@ -198,8 +198,8 @@ ssize_t _librist_proto_gre_send_data(struct rist_peer *p, uint8_t payload_type,
}
if (RIST_UNLIKELY(errorcode)) {
struct rist_common_ctx *ctx = get_cctx(p);
rist_log_priv(ctx, RIST_LOG_ERROR, "Send failed: errno=%d, reason=%s, ret=%d, socket=%d, retries=%d\n", errorcode, strerror(errorcode), ret, p->sd, retries);
_librist_log_send_error(p, errorcode, (size_t)(hdr_len + payload_len),
"main-profile sendmsg");
} else if (RIST_UNLIKELY(retries > RIST_MAX_SEND_RETRIES / 5)) {
struct rist_common_ctx *ctx = get_cctx(p);
rist_log_priv(ctx, RIST_LOG_WARN, "Send Succeded after retries=%d, ret=%d, socket=%d\n", retries, ret, p->sd);
+5
View File
@@ -639,6 +639,11 @@ struct rist_peer {
uint8_t mac_addr[6];
bool send_first_connection_event;
/* Rate-limit for EMSGSIZE/PMTU-too-large send errors so a flow of
* oversized packets doesn't drown the log. Holds the last log time
* in NTP ticks. */
uint64_t last_pmtu_error_log;
uint64_t log_repeat_timer;
uint8_t data[SIZEOF_GRE_KEEPALIVE];
+6
View File
@@ -42,5 +42,11 @@ RIST_PRIV int rist_set_url(struct rist_peer *peer);
RIST_PRIV void rist_create_socket(struct rist_peer *peer);
RIST_PRIV size_t rist_get_sender_retry_queue_size(struct rist_sender *ctx);
/* Rate-limited log on send failure. Emits a PMTU-specific hint when errno
* is EMSGSIZE/WSAEMSGSIZE, otherwise the generic failure line. attempted
* is the size we tried to send; sock_errno is errno (or WSAGetLastError on
* Windows) captured immediately after the failing send call. */
RIST_PRIV void _librist_log_send_error(struct rist_peer *p, int sock_errno,
size_t attempted, const char *origin);
#endif
+45 -2
View File
@@ -28,6 +28,44 @@
#include <assert.h>
#include <fcntl.h>
/* EMSGSIZE-aware send-failure logger, rate-limited to one line per peer
* every 5 seconds to avoid drowning the log when the application is
* pushing a stream of oversized packets. The hint on the PMTU case is
* deliberately explicit and actionable: an operator who sees this once
* should be able to fix it by lowering the application packet size
* (e.g. risttunnel -m), without having to chase ICMP filtering. */
void _librist_log_send_error(struct rist_peer *p, int sock_errno,
size_t attempted, const char *origin)
{
struct rist_common_ctx *ctx = get_cctx(p);
uint64_t now = timestampNTP_u64();
bool is_pmtu;
#ifdef _WIN32
is_pmtu = (sock_errno == WSAEMSGSIZE);
#else
is_pmtu = (sock_errno == EMSGSIZE);
#endif
if (is_pmtu) {
const uint64_t five_seconds = (uint64_t)5 * 65536 * 1000;
if (p->last_pmtu_error_log != 0 &&
now - p->last_pmtu_error_log < five_seconds)
return;
p->last_pmtu_error_log = now;
rist_log_priv(ctx, RIST_LOG_ERROR,
"PMTU exceeded sending %zu-byte datagram via %s (errno=%d). The path "
"MTU to this peer is smaller than our RIST packet size and the "
"don't-fragment bit is set, so the kernel refused to fragment. "
"Lower the application packet size (e.g. risttunnel -m, or upstream "
"MPEG-TS UDP size) so that payload + ~40 bytes of headers fits in the "
"path MTU. Further PMTU errors on this peer will be silenced for 5s.\n",
attempted, origin, sock_errno);
} else {
rist_log_priv(ctx, RIST_LOG_ERROR,
"Send failed via %s: errno=%d, size=%zu, socket=%d\n",
origin, sock_errno, attempted, p->sd);
}
}
size_t rist_send_seq_rtcp(struct rist_peer *p, uint16_t seq_rtp, uint8_t payload_type, uint8_t *payload, size_t payload_len, uint64_t source_time, uint16_t src_port, uint16_t dst_port, bool retry, uint16_t ts_null_bytes)
{
struct rist_common_ctx *ctx = get_cctx(p);
@@ -120,14 +158,19 @@ size_t rist_send_seq_rtcp(struct rist_peer *p, uint16_t seq_rtp, uint8_t payload
} while (errorcode == EAGAIN && retries < RIST_MAX_SEND_RETRIES);
if (RIST_UNLIKELY(retries > (RIST_MAX_SEND_RETRIES / 5)))
rist_log_priv(ctx, RIST_LOG_WARN, "UDP Pacing Send Succeded after retries=%d, ret=%d, socket=%d\n", retries, ret, p->sd);
if (RIST_UNLIKELY(ret < 0))
_librist_log_send_error(p, errorcode, len, "simple-profile sendto");
}
else
ret = _librist_proto_gre_send_data(p, payload_type, proto_type, data, len, src_port, dst_port, p->rist_gre_version);
out:
if (RIST_UNLIKELY(ret <= 0)) {
if (RIST_UNLIKELY(ret <= 0 && ctx->profile == RIST_PROFILE_SIMPLE && errorcode == 0)) {
/* Generic safety net for ret == 0 or ret < 0 without errorcode
* captured (out-of-band failure paths). PMTU-aware send errors
* have already been logged above. */
rist_log_priv(ctx, RIST_LOG_ERROR, "\tSend failed: errno=%d, ret=%d, socket=%d\n", errno, ret, p->sd);
} else {
} else if (ret > 0) {
p->stats_sender_instant.sent++;
if (ts_null_bytes)
p->stats_sender_instant.ts_null++;