summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2014-12-16 12:29:50 -0500
committerMike Pagano <mpagano@gentoo.org>2014-12-16 12:29:50 -0500
commitb40e4b7205dd73330cf29bf39590327f973a473b (patch)
tree4bcd3cee6d065cdec08a54659e5be171b79b5437
parentUpdate multipath patch (diff)
downloadlinux-patches-3.16.tar.gz
linux-patches-3.16.tar.bz2
linux-patches-3.16.zip
Updating multipath tcp patch3.16
-rw-r--r--0000_README2
-rw-r--r--5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch (renamed from 5010_multipath-tcp-v3.16-075df3a63833.patch)250
2 files changed, 139 insertions, 113 deletions
diff --git a/0000_README b/0000_README
index 8719a11e..7122ab12 100644
--- a/0000_README
+++ b/0000_README
@@ -118,7 +118,7 @@ Patch: 5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
Desc: BFQ v7r6 patch 3 for 3.16: Early Queue Merge (EQM)
-Patch: 5010_multipath-tcp-v3.16-075df3a63833.patch
+Patch: 5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch
From: http://multipath-tcp.org/
Desc: Patch for simultaneous use of several IP-addresses/interfaces in TCP for better resource utilization, better throughput and smoother reaction to failures.
diff --git a/5010_multipath-tcp-v3.16-075df3a63833.patch b/5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch
index 7520b4a9..2858f5b8 100644
--- a/5010_multipath-tcp-v3.16-075df3a63833.patch
+++ b/5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch
@@ -1991,7 +1991,7 @@ index 156350745700..0e23cae8861f 100644
struct timewait_sock_ops;
struct inet_hashinfo;
diff --git a/include/net/tcp.h b/include/net/tcp.h
-index 7286db80e8b8..ff92e74cd684 100644
+index 7286db80e8b8..2130c1c7fe6e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -177,6 +177,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
@@ -2030,7 +2030,7 @@ index 7286db80e8b8..ff92e74cd684 100644
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
-@@ -344,6 +366,107 @@ extern struct proto tcp_prot;
+@@ -344,6 +366,108 @@ extern struct proto tcp_prot;
#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
@@ -2040,6 +2040,7 @@ index 7286db80e8b8..ff92e74cd684 100644
+
+struct mptcp_options_received;
+
++void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited);
+void tcp_enter_quickack_mode(struct sock *sk);
+int tcp_close_state(struct sock *sk);
+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
@@ -2138,7 +2139,7 @@ index 7286db80e8b8..ff92e74cd684 100644
void tcp_tasklet_init(void);
void tcp_v4_err(struct sk_buff *skb, u32);
-@@ -440,6 +563,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -440,6 +564,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len);
void tcp_parse_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
@@ -2146,7 +2147,7 @@ index 7286db80e8b8..ff92e74cd684 100644
int estab, struct tcp_fastopen_cookie *foc);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
-@@ -493,14 +617,8 @@ static inline u32 tcp_cookie_time(void)
+@@ -493,14 +618,8 @@ static inline u32 tcp_cookie_time(void)
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
@@ -2163,7 +2164,7 @@ index 7286db80e8b8..ff92e74cd684 100644
#endif
__u32 cookie_init_timestamp(struct request_sock *req);
-@@ -516,13 +634,6 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+@@ -516,13 +635,6 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
const struct tcphdr *th, u16 *mssp);
__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
__u16 *mss);
@@ -2177,7 +2178,7 @@ index 7286db80e8b8..ff92e74cd684 100644
#endif
/* tcp_output.c */
-@@ -551,10 +662,17 @@ void tcp_send_delayed_ack(struct sock *sk);
+@@ -551,10 +663,17 @@ void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
bool tcp_schedule_loss_probe(struct sock *sk);
@@ -2195,7 +2196,7 @@ index 7286db80e8b8..ff92e74cd684 100644
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
-@@ -703,14 +821,27 @@ void tcp_send_window_probe(struct sock *sk);
+@@ -703,14 +822,27 @@ void tcp_send_window_probe(struct sock *sk);
*/
struct tcp_skb_cb {
union {
@@ -2226,7 +2227,7 @@ index 7286db80e8b8..ff92e74cd684 100644
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
__u8 sacked; /* State flags for SACK/FACK. */
-@@ -1075,7 +1206,8 @@ u32 tcp_default_init_rwnd(u32 mss);
+@@ -1075,7 +1207,8 @@ u32 tcp_default_init_rwnd(u32 mss);
/* Determine a window scaling and initial window to offer. */
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
__u32 *window_clamp, int wscale_ok,
@@ -2236,7 +2237,7 @@ index 7286db80e8b8..ff92e74cd684 100644
static inline int tcp_win_from_space(int space)
{
-@@ -1084,15 +1216,34 @@ static inline int tcp_win_from_space(int space)
+@@ -1084,6 +1217,19 @@ static inline int tcp_win_from_space(int space)
space - (space>>sysctl_tcp_adv_win_scale);
}
@@ -2256,22 +2257,7 @@ index 7286db80e8b8..ff92e74cd684 100644
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
-+ if (mptcp(tcp_sk(sk)))
-+ sk = tcp_sk(sk)->meta_sk;
-+
- return tcp_win_from_space(sk->sk_rcvbuf -
- atomic_read(&sk->sk_rmem_alloc));
- }
-
- static inline int tcp_full_space(const struct sock *sk)
- {
-+ if (mptcp(tcp_sk(sk)))
-+ sk = tcp_sk(sk)->meta_sk;
-+
- return tcp_win_from_space(sk->sk_rcvbuf);
- }
-
-@@ -1115,6 +1266,8 @@ static inline void tcp_openreq_init(struct request_sock *req,
+@@ -1115,6 +1261,8 @@ static inline void tcp_openreq_init(struct request_sock *req,
ireq->wscale_ok = rx_opt->wscale_ok;
ireq->acked = 0;
ireq->ecn_ok = 0;
@@ -2280,7 +2266,7 @@ index 7286db80e8b8..ff92e74cd684 100644
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
}
-@@ -1585,6 +1738,11 @@ int tcp4_proc_init(void);
+@@ -1585,6 +1733,11 @@ int tcp4_proc_init(void);
void tcp4_proc_exit(void);
#endif
@@ -2292,7 +2278,7 @@ index 7286db80e8b8..ff92e74cd684 100644
/* TCP af-specific functions */
struct tcp_sock_af_ops {
#ifdef CONFIG_TCP_MD5SIG
-@@ -1601,7 +1759,32 @@ struct tcp_sock_af_ops {
+@@ -1601,7 +1754,33 @@ struct tcp_sock_af_ops {
#endif
};
@@ -2317,6 +2303,7 @@ index 7286db80e8b8..ff92e74cd684 100644
+ void (*time_wait)(struct sock *sk, int state, int timeo);
+ void (*cleanup_rbuf)(struct sock *sk, int copied);
+ void (*init_congestion_control)(struct sock *sk);
++ void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
+};
+extern const struct tcp_sock_ops tcp_specific;
+
@@ -2325,7 +2312,7 @@ index 7286db80e8b8..ff92e74cd684 100644
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk,
struct request_sock *req);
-@@ -1611,8 +1794,39 @@ struct tcp_request_sock_ops {
+@@ -1611,8 +1790,39 @@ struct tcp_request_sock_ops {
const struct request_sock *req,
const struct sk_buff *skb);
#endif
@@ -2572,20 +2559,20 @@ index 4db3c2a1679c..04cb17d4b0ce 100644
goto drop;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
-index 05c57f0fcabe..811286a6aa9c 100644
+index 05c57f0fcabe..a1ba825c6acd 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -556,6 +556,38 @@ config TCP_CONG_ILLINOIS
For further details see:
http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
-+config TCP_CONG_COUPLED
-+ tristate "MPTCP COUPLED CONGESTION CONTROL"
++config TCP_CONG_LIA
++ tristate "MPTCP Linked Increase"
+ depends on MPTCP
+ default n
+ ---help---
-+ MultiPath TCP Coupled Congestion Control
-+ To enable it, just put 'coupled' in tcp_congestion_control
++ MultiPath TCP Linked Increase Congestion Control
++ To enable it, just put 'lia' in tcp_congestion_control
+
+config TCP_CONG_OLIA
+ tristate "MPTCP Opportunistic Linked Increase"
@@ -2618,8 +2605,8 @@ index 05c57f0fcabe..811286a6aa9c 100644
config DEFAULT_WESTWOOD
bool "Westwood" if TCP_CONG_WESTWOOD=y
-+ config DEFAULT_COUPLED
-+ bool "Coupled" if TCP_CONG_COUPLED=y
++ config DEFAULT_LIA
++ bool "Lia" if TCP_CONG_LIA=y
+
+ config DEFAULT_OLIA
+ bool "Olia" if TCP_CONG_OLIA=y
@@ -2637,7 +2624,7 @@ index 05c57f0fcabe..811286a6aa9c 100644
default "vegas" if DEFAULT_VEGAS
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
-+ default "coupled" if DEFAULT_COUPLED
++ default "lia" if DEFAULT_LIA
+ default "wvegas" if DEFAULT_WVEGAS
+ default "balia" if DEFAULT_BALIA
default "reno" if DEFAULT_RENO
@@ -2815,7 +2802,7 @@ index c86624b36a62..0ff3fe004d62 100644
ireq->rcv_wscale = rcv_wscale;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
-index 9d2118e5fbc7..2cb89f886d45 100644
+index 9d2118e5fbc7..cb59aef70d26 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
@@ -2826,7 +2813,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
-@@ -371,6 +372,24 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+@@ -371,6 +372,25 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
return period;
}
@@ -2846,12 +2833,13 @@ index 9d2118e5fbc7..2cb89f886d45 100644
+ .retransmit_timer = tcp_retransmit_timer,
+ .time_wait = tcp_time_wait,
+ .cleanup_rbuf = tcp_cleanup_rbuf,
++ .cwnd_validate = tcp_cwnd_validate,
+};
+
/* Address-family independent initialization for a tcp_sock.
*
* NOTE: A lot of things set to zero explicitly by call to
-@@ -419,6 +438,8 @@ void tcp_init_sock(struct sock *sk)
+@@ -419,6 +439,8 @@ void tcp_init_sock(struct sock *sk)
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -2860,7 +2848,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
local_bh_disable();
sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
-@@ -726,6 +747,14 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
+@@ -726,6 +748,14 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
int ret;
sock_rps_record_flow(sk);
@@ -2875,7 +2863,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
/*
* We can't seek on a socket input
*/
-@@ -821,8 +850,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+@@ -821,8 +851,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
return NULL;
}
@@ -2885,7 +2873,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
{
struct tcp_sock *tp = tcp_sk(sk);
u32 xmit_size_goal, old_size_goal;
-@@ -872,8 +900,13 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+@@ -872,8 +901,13 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
{
int mss_now;
@@ -2901,7 +2889,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
return mss_now;
}
-@@ -892,11 +925,32 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
+@@ -892,11 +926,32 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
* is fully established.
*/
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
@@ -2935,7 +2923,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
mss_now = tcp_send_mss(sk, &size_goal, flags);
-@@ -1001,8 +1055,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+@@ -1001,8 +1056,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
{
ssize_t res;
@@ -2947,7 +2935,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
return sock_no_sendpage(sk->sk_socket, page, offset, size,
flags);
-@@ -1018,6 +1073,9 @@ static inline int select_size(const struct sock *sk, bool sg)
+@@ -1018,6 +1074,9 @@ static inline int select_size(const struct sock *sk, bool sg)
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
@@ -2957,7 +2945,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
if (sg) {
if (sk_can_gso(sk)) {
/* Small frames wont use a full page:
-@@ -1100,11 +1158,18 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -1100,11 +1159,18 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* is fully established.
*/
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
@@ -2977,7 +2965,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
-@@ -1132,7 +1197,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -1132,7 +1198,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
@@ -2989,7 +2977,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
-@@ -1183,8 +1251,15 @@ new_segment:
+@@ -1183,8 +1252,15 @@ new_segment:
/*
* Check whether we can use HW checksum.
@@ -3006,7 +2994,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
-@@ -1422,7 +1497,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
+@@ -1422,7 +1498,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
/* Optimize, __tcp_select_window() is not cheap. */
if (2*rcv_window_now <= tp->window_clamp) {
@@ -3015,7 +3003,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
/* Send ACK now, if this read freed lots of space
* in our buffer. Certainly, new_window is new window.
-@@ -1587,7 +1662,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+@@ -1587,7 +1663,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
/* Clean up data we have read: This will do ACK frames. */
if (copied > 0) {
tcp_recv_skb(sk, seq, &offset);
@@ -3024,7 +3012,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
}
return copied;
}
-@@ -1623,6 +1698,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -1623,6 +1699,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
lock_sock(sk);
@@ -3039,7 +3027,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
err = -ENOTCONN;
if (sk->sk_state == TCP_LISTEN)
goto out;
-@@ -1761,7 +1844,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -1761,7 +1845,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
}
@@ -3048,7 +3036,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
/* Install new reader */
-@@ -1813,7 +1896,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+@@ -1813,7 +1897,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (tp->rcv_wnd == 0 &&
!skb_queue_empty(&sk->sk_async_wait_queue)) {
tcp_service_net_dma(sk, true);
@@ -3057,7 +3045,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
} else
dma_async_issue_pending(tp->ucopy.dma_chan);
}
-@@ -1993,7 +2076,7 @@ skip_copy:
+@@ -1993,7 +2077,7 @@ skip_copy:
*/
/* Clean up data we have read: This will do ACK frames. */
@@ -3066,7 +3054,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
release_sock(sk);
return copied;
-@@ -2070,7 +2153,7 @@ static const unsigned char new_state[16] = {
+@@ -2070,7 +2154,7 @@ static const unsigned char new_state[16] = {
/* TCP_CLOSING */ TCP_CLOSING,
};
@@ -3075,7 +3063,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
{
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
-@@ -2100,7 +2183,7 @@ void tcp_shutdown(struct sock *sk, int how)
+@@ -2100,7 +2184,7 @@ void tcp_shutdown(struct sock *sk, int how)
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
@@ -3084,7 +3072,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
}
}
EXPORT_SYMBOL(tcp_shutdown);
-@@ -2125,6 +2208,11 @@ void tcp_close(struct sock *sk, long timeout)
+@@ -2125,6 +2209,11 @@ void tcp_close(struct sock *sk, long timeout)
int data_was_unread = 0;
int state;
@@ -3096,7 +3084,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
-@@ -2167,7 +2255,7 @@ void tcp_close(struct sock *sk, long timeout)
+@@ -2167,7 +2256,7 @@ void tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
@@ -3105,7 +3093,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
-@@ -2247,7 +2335,7 @@ adjudge_to_death:
+@@ -2247,7 +2336,7 @@ adjudge_to_death:
struct tcp_sock *tp = tcp_sk(sk);
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
@@ -3114,7 +3102,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
-@@ -2257,7 +2345,8 @@ adjudge_to_death:
+@@ -2257,7 +2346,8 @@ adjudge_to_death:
inet_csk_reset_keepalive_timer(sk,
tmo - TCP_TIMEWAIT_LEN);
} else {
@@ -3124,7 +3112,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
goto out;
}
}
-@@ -2266,7 +2355,7 @@ adjudge_to_death:
+@@ -2266,7 +2356,7 @@ adjudge_to_death:
sk_mem_reclaim(sk);
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
@@ -3133,7 +3121,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
}
-@@ -2291,15 +2380,6 @@ out:
+@@ -2291,15 +2381,6 @@ out:
}
EXPORT_SYMBOL(tcp_close);
@@ -3149,7 +3137,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
int tcp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
-@@ -2322,7 +2402,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+@@ -2322,7 +2403,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
@@ -3158,7 +3146,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
sk->sk_err = ECONNRESET;
} else if (old_state == TCP_SYN_SENT)
sk->sk_err = ECONNRESET;
-@@ -2340,6 +2420,13 @@ int tcp_disconnect(struct sock *sk, int flags)
+@@ -2340,6 +2421,13 @@ int tcp_disconnect(struct sock *sk, int flags)
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
@@ -3172,7 +3160,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->srtt_us = 0;
-@@ -2632,6 +2719,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+@@ -2632,6 +2720,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_DEFER_ACCEPT:
@@ -3185,7 +3173,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
/* Translate value in seconds to number of retransmits */
icsk->icsk_accept_queue.rskq_defer_accept =
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
-@@ -2659,7 +2752,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+@@ -2659,7 +2753,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
inet_csk_ack_scheduled(sk)) {
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
@@ -3194,7 +3182,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
if (!(val & 1))
icsk->icsk_ack.pingpong = 1;
}
-@@ -2699,6 +2792,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+@@ -2699,6 +2793,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->notsent_lowat = val;
sk->sk_write_space(sk);
break;
@@ -3213,7 +3201,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
default:
err = -ENOPROTOOPT;
break;
-@@ -2931,6 +3036,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+@@ -2931,6 +3037,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
break;
@@ -3225,7 +3213,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644
default:
return -ENOPROTOOPT;
}
-@@ -3120,8 +3230,11 @@ void tcp_done(struct sock *sk)
+@@ -3120,8 +3231,11 @@ void tcp_done(struct sock *sk)
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
@@ -3299,7 +3287,7 @@ index 9771563ab564..5c230d96c4c1 100644
WARN_ON(req->sk == NULL);
return true;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 40639c288dc2..3273bb69f387 100644
+index 40639c288dc2..71033189797d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,9 @@
@@ -3391,7 +3379,7 @@ index 40639c288dc2..3273bb69f387 100644
- if (tp->rcv_ssthresh < tp->window_clamp &&
- (int)tp->rcv_ssthresh < tcp_space(sk) &&
+ if (meta_tp->rcv_ssthresh < meta_tp->window_clamp &&
-+ (int)meta_tp->rcv_ssthresh < tcp_space(sk) &&
++ (int)meta_tp->rcv_ssthresh < tcp_space(meta_sk) &&
!sk_under_memory_pressure(sk)) {
int incr;
@@ -5203,7 +5191,7 @@ index e68e0d4af6c9..ae6946857dff 100644
return ret;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
-index 179b51e6bda3..efd31b6c5784 100644
+index 179b51e6bda3..267d5f7eb303 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -36,6 +36,12 @@
@@ -5559,6 +5547,15 @@ index 179b51e6bda3..efd31b6c5784 100644
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
* As additional protections, we do not touch cwnd in retransmission phases,
+@@ -1402,7 +1448,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+ }
+
+-static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
++void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
@@ -1446,8 +1492,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp)
* But we can avoid doing the divide again given we already have
* skb_pcount = skb->len / mss_now
@@ -5680,7 +5677,17 @@ index 179b51e6bda3..efd31b6c5784 100644
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
if (!result) {
-@@ -2099,7 +2150,8 @@ void tcp_send_loss_probe(struct sock *sk)
+@@ -2004,7 +2055,8 @@ repair:
+ /* Send one loss probe per tail loss episode. */
+ if (push_one != 2)
+ tcp_schedule_loss_probe(sk);
+- tcp_cwnd_validate(sk, is_cwnd_limited);
++ if (tp->ops->cwnd_validate)
++ tp->ops->cwnd_validate(sk, is_cwnd_limited);
+ return false;
+ }
+ return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
+@@ -2099,7 +2151,8 @@ void tcp_send_loss_probe(struct sock *sk)
int err = -1;
if (tcp_send_head(sk) != NULL) {
@@ -5690,7 +5697,7 @@ index 179b51e6bda3..efd31b6c5784 100644
goto rearm_timer;
}
-@@ -2159,8 +2211,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+@@ -2159,8 +2212,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
if (unlikely(sk->sk_state == TCP_CLOSE))
return;
@@ -5701,7 +5708,7 @@ index 179b51e6bda3..efd31b6c5784 100644
tcp_check_probe_timer(sk);
}
-@@ -2173,7 +2225,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
+@@ -2173,7 +2226,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!skb || skb->len < mss_now);
@@ -5711,7 +5718,7 @@ index 179b51e6bda3..efd31b6c5784 100644
}
/* This function returns the amount that we can raise the
-@@ -2386,6 +2439,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
+@@ -2386,6 +2440,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -5722,7 +5729,7 @@ index 179b51e6bda3..efd31b6c5784 100644
tcp_for_write_queue_from_safe(skb, tmp, sk) {
if (!tcp_can_collapse(sk, skb))
break;
-@@ -2843,7 +2900,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+@@ -2843,7 +2901,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
@@ -5731,7 +5738,7 @@ index 179b51e6bda3..efd31b6c5784 100644
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
-@@ -2897,13 +2954,13 @@ static void tcp_connect_init(struct sock *sk)
+@@ -2897,13 +2955,13 @@ static void tcp_connect_init(struct sock *sk)
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk);
@@ -5752,7 +5759,7 @@ index 179b51e6bda3..efd31b6c5784 100644
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
-@@ -2927,6 +2984,36 @@ static void tcp_connect_init(struct sock *sk)
+@@ -2927,6 +2985,36 @@ static void tcp_connect_init(struct sock *sk)
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_retransmits = 0;
tcp_clear_retrans(tp);
@@ -5789,7 +5796,7 @@ index 179b51e6bda3..efd31b6c5784 100644
}
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
-@@ -3176,6 +3263,7 @@ void tcp_send_ack(struct sock *sk)
+@@ -3176,6 +3264,7 @@ void tcp_send_ack(struct sock *sk)
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
}
@@ -5797,7 +5804,7 @@ index 179b51e6bda3..efd31b6c5784 100644
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
-@@ -3188,7 +3276,7 @@ void tcp_send_ack(struct sock *sk)
+@@ -3188,7 +3277,7 @@ void tcp_send_ack(struct sock *sk)
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
* out-of-date with SND.UNA-1 to probe window.
*/
@@ -5806,7 +5813,7 @@ index 179b51e6bda3..efd31b6c5784 100644
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
-@@ -3270,7 +3358,7 @@ void tcp_send_probe0(struct sock *sk)
+@@ -3270,7 +3359,7 @@ void tcp_send_probe0(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int err;
@@ -5815,7 +5822,7 @@ index 179b51e6bda3..efd31b6c5784 100644
if (tp->packets_out || !tcp_send_head(sk)) {
/* Cancel probe timer, if it is not required. */
-@@ -3301,3 +3389,18 @@ void tcp_send_probe0(struct sock *sk)
+@@ -3301,3 +3390,18 @@ void tcp_send_probe0(struct sock *sk)
TCP_RTO_MAX);
}
}
@@ -7099,7 +7106,7 @@ index 000000000000..cdfc03adabf8
+
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
new file mode 100644
-index 000000000000..2feb3e873206
+index 000000000000..5c70e7cca3b3
--- /dev/null
+++ b/net/mptcp/Makefile
@@ -0,0 +1,21 @@
@@ -7113,7 +7120,7 @@ index 000000000000..2feb3e873206
+mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_ofo_queue.o mptcp_pm.o \
+ mptcp_output.o mptcp_input.o mptcp_sched.o
+
-+obj-$(CONFIG_TCP_CONG_COUPLED) += mptcp_coupled.o
++obj-$(CONFIG_TCP_CONG_LIA) += mptcp_coupled.o
+obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o
+obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o
+obj-$(CONFIG_TCP_CONG_BALIA) += mptcp_balia.o
@@ -7126,7 +7133,7 @@ index 000000000000..2feb3e873206
+
diff --git a/net/mptcp/mptcp_balia.c b/net/mptcp/mptcp_balia.c
new file mode 100644
-index 000000000000..5cc224d80b01
+index 000000000000..565cb75e2cea
--- /dev/null
+++ b/net/mptcp/mptcp_balia.c
@@ -0,0 +1,267 @@
@@ -7156,8 +7163,9 @@ index 000000000000..5cc224d80b01
+ * if max_rate > 2^rate_scale_limit
+ */
+
-+static int rate_scale_limit = 30;
-+static int scale_num = 10;
++static int rate_scale_limit = 25;
++static int alpha_scale = 10;
++static int scale_num = 5;
+
+struct mptcp_balia {
+ u64 ai;
@@ -7210,7 +7218,6 @@ index 000000000000..5cc224d80b01
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct mptcp_cb *mpcb = tp->mpcb;
+ const struct sock *sub_sk;
-+ int can_send = 0;
+ u64 max_rate = 0, rate = 0, sum_rate = 0;
+ u64 alpha = 0, ai = 0, md = 0;
+ int num_scale_down = 0;
@@ -7230,27 +7237,24 @@ index 000000000000..5cc224d80b01
+ if (!mptcp_balia_sk_can_send(sub_sk))
+ continue;
+
-+ can_send++;
-+
+ tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
+ * (USEC_PER_SEC << 3), sub_tp->srtt_us);
+ sum_rate += tmp;
+
++ if (tp == sub_tp)
++ rate = tmp;
++
+ if (tmp >= max_rate)
+ max_rate = tmp;
+ }
+
-+ /* No subflow is able to send - we don't care anymore */
-+ if (unlikely(!can_send))
++ /* At least, the current subflow should be able to send */
++ if (unlikely(!rate))
+ goto exit;
+
-+ rate = div_u64((u64)tp->mss_cache * tp->snd_cwnd *
-+ (USEC_PER_SEC << 3), tp->srtt_us);
+ alpha = div64_u64(max_rate, rate);
+
-+ /* Scale down max_rate from B/s to KB/s, MB/s, or GB/s
-+ * if max_rate is too high (i.e., >2^30)
-+ */
++ /* Scale down max_rate if it is too high (e.g., >2^25) */
+ while (max_rate > mptcp_balia_scale(1, rate_scale_limit)) {
+ max_rate >>= scale_num;
+ num_scale_down++;
@@ -7262,6 +7266,9 @@ index 000000000000..5cc224d80b01
+ struct tcp_sock *sub_tp = tcp_sk(sub_sk);
+ u64 tmp;
+
++ if (!mptcp_balia_sk_can_send(sub_sk))
++ continue;
++
+ tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd
+ * (USEC_PER_SEC << 3), sub_tp->srtt_us);
+ tmp >>= (scale_num * num_scale_down);
@@ -7283,9 +7290,9 @@ index 000000000000..5cc224d80b01
+ if (unlikely(!ai))
+ ai = tp->snd_cwnd;
+
-+ md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, scale_num),
-+ mptcp_balia_scale(3, scale_num) >> 1))
-+ >> scale_num;
++ md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, alpha_scale),
++ mptcp_balia_scale(3, alpha_scale) >> 1))
++ >> alpha_scale;
+
+exit:
+ mptcp_set_ai(sk, ai);
@@ -16520,10 +16527,10 @@ index 000000000000..53f5c43bb488
+MODULE_VERSION("0.1");
diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c
new file mode 100644
-index 000000000000..400ea254c078
+index 000000000000..e2a6a6d6522d
--- /dev/null
+++ b/net/mptcp/mptcp_output.c
-@@ -0,0 +1,1743 @@
+@@ -0,0 +1,1758 @@
+/*
+ * MPTCP implementation - Sending side
+ *
@@ -17181,11 +17188,9 @@ index 000000000000..400ea254c078
+ struct sock *subsk = NULL;
+ struct mptcp_cb *mpcb = meta_tp->mpcb;
+ struct sk_buff *skb;
-+ unsigned int sent_pkts;
+ int reinject = 0;
+ unsigned int sublimit;
-+
-+ sent_pkts = 0;
++ __u32 path_mask = 0;
+
+ while ((skb = mpcb->sched_ops->next_segment(meta_sk, &reinject, &subsk,
+ &sublimit))) {
@@ -17266,6 +17271,7 @@ index 000000000000..400ea254c078
+ * always push on the subflow
+ */
+ __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH);
++ path_mask |= mptcp_pi_to_flag(subtp->mptcp->path_index);
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+ if (!reinject) {
@@ -17276,7 +17282,6 @@ index 000000000000..400ea254c078
+ }
+
+ tcp_minshall_update(meta_tp, mss_now, skb);
-+ sent_pkts += tcp_skb_pcount(skb);
+
+ if (reinject > 0) {
+ __skb_unlink(skb, &mpcb->reinject_queue);
@@ -17287,6 +17292,22 @@ index 000000000000..400ea254c078
+ break;
+ }
+
++ mptcp_for_each_sk(mpcb, subsk) {
++ subtp = tcp_sk(subsk);
++
++ if (!(path_mask & mptcp_pi_to_flag(subtp->mptcp->path_index)))
++ continue;
++
++ /* We have pushed data on this subflow. We ignore the call to
++ * cwnd_validate in tcp_write_xmit as is_cwnd_limited will never
++ * be true (we never push more than what the cwnd can accept).
++ * We need to ensure that we call tcp_cwnd_validate with
++ * is_cwnd_limited set to true if we have filled the cwnd.
++ */
++ tcp_cwnd_validate(subsk, tcp_packets_in_flight(subtp) >=
++ subtp->snd_cwnd);
++ }
++
+ return !meta_tp->packets_out && tcp_send_head(meta_sk);
+}
+
@@ -17299,6 +17320,7 @@ index 000000000000..400ea254c078
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
++ struct sock *meta_sk = mptcp_meta_sk(sk);
+ int mss, free_space, full_space, window;
+
+ /* MSS for the peer's data. Previous versions used mss_clamp
@@ -17308,9 +17330,9 @@ index 000000000000..400ea254c078
+ * fluctuations. --SAW 1998/11/1
+ */
+ mss = icsk->icsk_ack.rcv_mss;
-+ free_space = tcp_space(sk);
++ free_space = tcp_space(meta_sk);
+ full_space = min_t(int, meta_tp->window_clamp,
-+ tcp_full_space(sk));
++ tcp_full_space(meta_sk));
+
+ if (mss > full_space)
+ mss = full_space;
@@ -18751,10 +18773,10 @@ index 000000000000..93278f684069
+MODULE_VERSION("0.89");
diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
new file mode 100644
-index 000000000000..6c7ff4eceac1
+index 000000000000..4a578821f50e
--- /dev/null
+++ b/net/mptcp/mptcp_sched.c
-@@ -0,0 +1,493 @@
+@@ -0,0 +1,497 @@
+/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
+
+#include <linux/module.h>
@@ -18979,8 +19001,12 @@ index 000000000000..6c7ff4eceac1
+ if (tp_it != tp &&
+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
+ if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
++ u32 prior_cwnd = tp_it->snd_cwnd;
++
+ tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
-+ if (tp_it->snd_ssthresh != TCP_INFINITE_SSTHRESH)
++
++ /* If in slow start, do not reduce the ssthresh */
++ if (prior_cwnd >= tp_it->snd_ssthresh)
+ tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
+
+ dsp->last_rbuf_opti = tcp_time_stamp;