udp,gre: log actionable PMTU hint on EMSGSIZE
When a send fails with EMSGSIZE, log the packet size and suggest reducing the application payload. Rate-limited to one message per peer per 5 seconds. Both simple-profile and main-profile send paths use the same helper.
This commit is contained in:
+2
-2
@@ -198,8 +198,8 @@ ssize_t _librist_proto_gre_send_data(struct rist_peer *p, uint8_t payload_type,
|
||||
}
|
||||
|
||||
if (RIST_UNLIKELY(errorcode)) {
|
||||
struct rist_common_ctx *ctx = get_cctx(p);
|
||||
rist_log_priv(ctx, RIST_LOG_ERROR, "Send failed: errno=%d, reason=%s, ret=%d, socket=%d, retries=%d\n", errorcode, strerror(errorcode), ret, p->sd, retries);
|
||||
_librist_log_send_error(p, errorcode, (size_t)(hdr_len + payload_len),
|
||||
"main-profile sendmsg");
|
||||
} else if (RIST_UNLIKELY(retries > RIST_MAX_SEND_RETRIES / 5)) {
|
||||
struct rist_common_ctx *ctx = get_cctx(p);
|
||||
rist_log_priv(ctx, RIST_LOG_WARN, "Send Succeded after retries=%d, ret=%d, socket=%d\n", retries, ret, p->sd);
|
||||
|
||||
@@ -639,6 +639,11 @@ struct rist_peer {
|
||||
uint8_t mac_addr[6];
|
||||
bool send_first_connection_event;
|
||||
|
||||
/* Rate-limit for EMSGSIZE/PMTU-too-large send errors so a flow of
|
||||
* oversized packets doesn't drown the log. Holds the last log time
|
||||
* in NTP ticks. */
|
||||
uint64_t last_pmtu_error_log;
|
||||
|
||||
uint64_t log_repeat_timer;
|
||||
|
||||
uint8_t data[SIZEOF_GRE_KEEPALIVE];
|
||||
|
||||
@@ -42,5 +42,11 @@ RIST_PRIV int rist_set_url(struct rist_peer *peer);
|
||||
RIST_PRIV void rist_create_socket(struct rist_peer *peer);
|
||||
RIST_PRIV size_t rist_get_sender_retry_queue_size(struct rist_sender *ctx);
|
||||
|
||||
/* Rate-limited log on send failure. Emits a PMTU-specific hint when errno
|
||||
* is EMSGSIZE/WSAEMSGSIZE, otherwise the generic failure line. attempted
|
||||
* is the size we tried to send; sock_errno is errno (or WSAGetLastError on
|
||||
* Windows) captured immediately after the failing send call. */
|
||||
RIST_PRIV void _librist_log_send_error(struct rist_peer *p, int sock_errno,
|
||||
size_t attempted, const char *origin);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -28,6 +28,44 @@
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
/* EMSGSIZE-aware send-failure logger, rate-limited to one line per peer
|
||||
* every 5 seconds to avoid drowning the log when the application is
|
||||
* pushing a stream of oversized packets. The hint on the PMTU case is
|
||||
* deliberately explicit and actionable: an operator who sees this once
|
||||
* should be able to fix it by lowering the application packet size
|
||||
* (e.g. risttunnel -m), without having to chase ICMP filtering. */
|
||||
void _librist_log_send_error(struct rist_peer *p, int sock_errno,
|
||||
size_t attempted, const char *origin)
|
||||
{
|
||||
struct rist_common_ctx *ctx = get_cctx(p);
|
||||
uint64_t now = timestampNTP_u64();
|
||||
bool is_pmtu;
|
||||
#ifdef _WIN32
|
||||
is_pmtu = (sock_errno == WSAEMSGSIZE);
|
||||
#else
|
||||
is_pmtu = (sock_errno == EMSGSIZE);
|
||||
#endif
|
||||
if (is_pmtu) {
|
||||
const uint64_t five_seconds = (uint64_t)5 * 65536 * 1000;
|
||||
if (p->last_pmtu_error_log != 0 &&
|
||||
now - p->last_pmtu_error_log < five_seconds)
|
||||
return;
|
||||
p->last_pmtu_error_log = now;
|
||||
rist_log_priv(ctx, RIST_LOG_ERROR,
|
||||
"PMTU exceeded sending %zu-byte datagram via %s (errno=%d). The path "
|
||||
"MTU to this peer is smaller than our RIST packet size and the "
|
||||
"don't-fragment bit is set, so the kernel refused to fragment. "
|
||||
"Lower the application packet size (e.g. risttunnel -m, or upstream "
|
||||
"MPEG-TS UDP size) so that payload + ~40 bytes of headers fits in the "
|
||||
"path MTU. Further PMTU errors on this peer will be silenced for 5s.\n",
|
||||
attempted, origin, sock_errno);
|
||||
} else {
|
||||
rist_log_priv(ctx, RIST_LOG_ERROR,
|
||||
"Send failed via %s: errno=%d, size=%zu, socket=%d\n",
|
||||
origin, sock_errno, attempted, p->sd);
|
||||
}
|
||||
}
|
||||
|
||||
size_t rist_send_seq_rtcp(struct rist_peer *p, uint16_t seq_rtp, uint8_t payload_type, uint8_t *payload, size_t payload_len, uint64_t source_time, uint16_t src_port, uint16_t dst_port, bool retry, uint16_t ts_null_bytes)
|
||||
{
|
||||
struct rist_common_ctx *ctx = get_cctx(p);
|
||||
@@ -120,14 +158,19 @@ size_t rist_send_seq_rtcp(struct rist_peer *p, uint16_t seq_rtp, uint8_t payload
|
||||
} while (errorcode == EAGAIN && retries < RIST_MAX_SEND_RETRIES);
|
||||
if (RIST_UNLIKELY(retries > (RIST_MAX_SEND_RETRIES / 5)))
|
||||
rist_log_priv(ctx, RIST_LOG_WARN, "UDP Pacing Send Succeded after retries=%d, ret=%d, socket=%d\n", retries, ret, p->sd);
|
||||
if (RIST_UNLIKELY(ret < 0))
|
||||
_librist_log_send_error(p, errorcode, len, "simple-profile sendto");
|
||||
}
|
||||
else
|
||||
ret = _librist_proto_gre_send_data(p, payload_type, proto_type, data, len, src_port, dst_port, p->rist_gre_version);
|
||||
|
||||
out:
|
||||
if (RIST_UNLIKELY(ret <= 0)) {
|
||||
if (RIST_UNLIKELY(ret <= 0 && ctx->profile == RIST_PROFILE_SIMPLE && errorcode == 0)) {
|
||||
/* Generic safety net for ret == 0 or ret < 0 without errorcode
|
||||
* captured (out-of-band failure paths). PMTU-aware send errors
|
||||
* have already been logged above. */
|
||||
rist_log_priv(ctx, RIST_LOG_ERROR, "\tSend failed: errno=%d, ret=%d, socket=%d\n", errno, ret, p->sd);
|
||||
} else {
|
||||
} else if (ret > 0) {
|
||||
p->stats_sender_instant.sent++;
|
||||
if (ts_null_bytes)
|
||||
p->stats_sender_instant.ts_null++;
|
||||
|
||||
Reference in New Issue
Block a user