tcp: dont drop MTU reduction indications

ICMP messages generated in output path if frame length is bigger than mtu are actually lost because socket is owned by user (doing the xmit) One example is the ipgre_tunnel_xmit() calling icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); We had a similar case fixed in commit a34a101e1e6 (ipv6: disable GSO on sockets hitting dst_allfrag). Problem of such fix is that it relied on retransmit timers, so short tcp sessions paid a too big latency increase price. This patch uses the tcp_release_cb() infrastructure so that MTU reduction messages (ICMP messages) are not lost, and no extra delay is added in TCP transmits. Reported-by: Maciej Żenczykowski <[email protected]> Diagnosed-by: Neal Cardwell <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Cc: Nandita Dukkipati <[email protected]> Cc: Tom Herbert <[email protected]> Cc: Tore Anderson <[email protected]> Signed-off-by: David S. Miller <[email protected]>
author: Eric Dumazet <[email protected]> 2012-07-23 07:48:52 +0000
committer: David S. Miller <[email protected]> 2012-07-23 07:58:46 +0000
commit: 563d34d05786263893ba4a1042eb9b9374127cf5 (patch)
tree: e9ce502c1f32bea966c81d5597d0a29eb4b9d244 /net/ipv6/tcp_ipv6.c
parent: bnx2x: Add new 57840 device IDs (diff)
download: kernel-563d34d05786263893ba4a1042eb9b9374127cf5.tar.gz
kernel-563d34d05786263893ba4a1042eb9b9374127cf5.zip
1 files changed, 24 insertions, 16 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0302ec3fecfc..f49476e2d884 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -315,6 +315,23 @@ failure:
 	return err;
 }
 
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+	struct dst_entry *dst;
+
+	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+		return;
+
+	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+	if (!dst)
+		return;
+
+	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+		tcp_sync_mss(sk, dst_mtu(dst));
+		tcp_simple_retransmit(sk);
+	}
+}
+
 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		u8 type, u8 code, int offset, __be32 info)
 {
@@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	}
 
 	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk))
+	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == TCP_CLOSE)
@@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
-		struct dst_entry *dst;
-
-		if (sock_owned_by_user(sk))
-			goto out;
-		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
-			goto out;
-
-		dst = inet6_csk_update_pmtu(sk, ntohl(info));
-		if (!dst)
-			goto out;
-
-		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
-			tcp_sync_mss(sk, dst_mtu(dst));
-			tcp_simple_retransmit(sk);
-		}
+		tp->mtu_info = ntohl(info);
+		if (!sock_owned_by_user(sk))
+			tcp_v6_mtu_reduced(sk);
+		else
+			set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
 		goto out;
 	}
 
@@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = {
 	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.release_cb		= tcp_release_cb,
+	.mtu_reduced		= tcp_v6_mtu_reduced,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
author	Eric Dumazet <[email protected]>	2012-07-23 07:48:52 +0000
committer	David S. Miller <[email protected]>	2012-07-23 07:58:46 +0000
commit	563d34d05786263893ba4a1042eb9b9374127cf5 (patch)
tree	e9ce502c1f32bea966c81d5597d0a29eb4b9d244 /net/ipv6/tcp_ipv6.c
parent	bnx2x: Add new 57840 device IDs (diff)
download	kernel-563d34d05786263893ba4a1042eb9b9374127cf5.tar.gz kernel-563d34d05786263893ba4a1042eb9b9374127cf5.zip