2 * COPYRIGHT: See COPYING in the top level directory
3 * PROJECT: ReactOS TCP/IP protocol driver
4 * FILE: transport/tcp/tcp_output.c
5 * PURPOSE: Transmission Control Protocol
6 * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
8 * CSH 15-01-2003 Imported from linux kernel 2.4.20
12 * INET An implementation of the TCP/IP protocol suite for the LINUX
13 * operating system. INET is implemented using the BSD Socket
14 * interface as the means of communication with the user level.
16 * Implementation of the Transmission Control Protocol(TCP).
20 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
21 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
22 * Mark Evans, <evansmp@uhura.aston.ac.uk>
23 * Corey Minyard <wf-rch!minyard@relay.EU.net>
24 * Florian La Roche, <flla@stud.uni-sb.de>
25 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
26 * Linus Torvalds, <torvalds@cs.helsinki.fi>
27 * Alan Cox, <gw4pts@gw4pts.ampr.org>
28 * Matthew Dillon, <dillon@apollo.west.oic.com>
29 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
30 * Jorge Cwik, <jorge@laser.satlink.net>
34 * Changes: Pedro Roque : Retransmit queue handled by TCP.
35 * : Fragmentation on mtu decrease
36 * : Segment collapse on retransmit
39 * Linus Torvalds : send_delayed_ack
40 * David S. Miller : Charge memory using the right skb
41 * during syn/ack processing.
42 * David S. Miller : Output engine completely rewritten.
43 * Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
44 * Cacophonix Gaul : draft-minshall-nagle-01
45 * J Hadi Salim : ECN support
52 #include <linux/compiler.h>
53 #include <linux/smp_lock.h>
59 /* People can turn this off for buggy TCP's found in printers etc. */
60 int sysctl_tcp_retrans_collapse = 1;
63 void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
65 tp->send_head = skb->next;
66 if (tp->send_head == (struct sk_buff *) &sk->write_queue)
68 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
69 if (tp->packets_out++ == 0)
70 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
73 /* SND.NXT, if window was not shrunk.
74 * If window has been shrunk, what should we make? It is not clear at all.
75 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
76 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
77 * invalid. OK, let's make this for now:
79 static __inline__ __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_opt *tp)
81 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
84 return tp->snd_una+tp->snd_wnd;
87 /* Calculate mss to advertise in SYN segment.
88 * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
90 * 1. It is independent of path mtu.
91 * 2. Ideally, it is maximal possible segment size i.e. 65535-40.
92 * 3. For IPv4 it is reasonable to calculate it from maximal MTU of
93 * attached devices, because some buggy hosts are confused by
95 * 4. We do not make 3, we advertise MSS, calculated from first
96 * hop device mtu, but allow to raise it to ip_rt_min_advmss.
97 * This may be overriden via information stored in routing table.
98 * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
99 * probably even Jumbo".
101 static __u16 tcp_advertise_mss(struct sock *sk)
104 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
105 struct dst_entry *dst = __sk_dst_get(sk);
106 int mss = tp->advmss;
108 if (dst && dst->advmss < mss) {
119 /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
120 * This is the first part of cwnd validation mechanism. */
121 static void tcp_cwnd_restart(struct tcp_opt *tp)
124 s32 delta = tcp_time_stamp - tp->lsndtime;
125 u32 restart_cwnd = tcp_init_cwnd(tp);
126 u32 cwnd = tp->snd_cwnd;
128 tp->snd_ssthresh = tcp_current_ssthresh(tp);
129 restart_cwnd = min(restart_cwnd, cwnd);
131 while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
133 tp->snd_cwnd = max(cwnd, restart_cwnd);
134 tp->snd_cwnd_stamp = tcp_time_stamp;
135 tp->snd_cwnd_used = 0;
139 static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb)
142 u32 now = tcp_time_stamp;
144 if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
145 tcp_cwnd_restart(tp);
149 /* If it is a reply for ato after last received
150 * packet, enter pingpong mode.
152 if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
153 tp->ack.pingpong = 1;
157 static __inline__ void tcp_event_ack_sent(struct sock *sk)
160 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
162 tcp_dec_quickack_mode(tp);
163 tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
167 /* Chose a new window to advertise, update state in tcp_opt for the
168 * socket, and return result with RFC1323 scaling applied. The return
169 * value can be stuffed directly into th->window for an outgoing
172 static __inline__ u16 tcp_select_window(struct sock *sk)
175 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
176 u32 cur_win = tcp_receive_window(tp);
177 u32 new_win = __tcp_select_window(sk);
179 /* Never shrink the offered window */
180 if(new_win < cur_win) {
181 /* Danger Will Robinson!
182 * Don't update rcv_wup/rcv_wnd here or else
183 * we will not be able to advertise a zero
184 * window in time. --DaveM
186 * Relax Will Robinson.
190 tp->rcv_wnd = new_win;
191 tp->rcv_wup = tp->rcv_nxt;
193 /* RFC1323 scaling applied */
194 new_win >>= tp->rcv_wscale;
196 /* If we advertise zero window, disable fast path. */
207 /* This routine actually transmits TCP packets queued in by
208 * tcp_do_sendmsg(). This is used by both the initial
209 * transmission and possible later retransmissions.
210 * All SKB's seen here are completely headerless. It is our
211 * job to build the TCP header, and pass the packet down to
212 * IP so it can do the same plus pass the packet off to the
215 * We are working here with either a clone of the original
216 * SKB, or a fresh unique copy made by the retransmit engine.
218 int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
222 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
223 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
224 int tcp_header_size = tp->tcp_header_len;
229 #define SYSCTL_FLAG_TSTAMPS 0x1
230 #define SYSCTL_FLAG_WSCALE 0x2
231 #define SYSCTL_FLAG_SACK 0x4
234 if (tcb->flags & TCPCB_FLAG_SYN) {
235 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
236 if(sysctl_tcp_timestamps) {
237 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
238 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
240 if(sysctl_tcp_window_scaling) {
241 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
242 sysctl_flags |= SYSCTL_FLAG_WSCALE;
244 if(sysctl_tcp_sack) {
245 sysctl_flags |= SYSCTL_FLAG_SACK;
246 if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
247 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
249 } else if (tp->eff_sacks) {
250 /* A SACK is 2 pad bytes, a 2 byte header, plus
251 * 2 32-bit sequence numbers for each SACK block.
253 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
254 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
256 th = (struct tcphdr *) skb_push(skb, tcp_header_size);
258 skb_set_owner_w(skb, sk);
260 /* Build TCP header and checksum it. */
261 th->source = sk->sport;
262 th->dest = sk->dport;
263 th->seq = htonl(tcb->seq);
264 th->ack_seq = htonl(tp->rcv_nxt);
265 *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
266 if (tcb->flags & TCPCB_FLAG_SYN) {
267 /* RFC1323: The window in SYN & SYN/ACK segments
270 th->window = htons(tp->rcv_wnd);
272 th->window = htons(tcp_select_window(sk));
278 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
279 th->urg_ptr = htons(tp->snd_up-tcb->seq);
283 if (tcb->flags & TCPCB_FLAG_SYN) {
284 tcp_syn_build_options((__u32 *)(th + 1),
285 tcp_advertise_mss(sk),
286 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
287 (sysctl_flags & SYSCTL_FLAG_SACK),
288 (sysctl_flags & SYSCTL_FLAG_WSCALE),
293 tcp_build_and_update_options((__u32 *)(th + 1),
296 TCP_ECN_send(sk, tp, skb, tcp_header_size);
298 tp->af_specific->send_check(sk, th, skb->len, skb);
300 if (tcb->flags & TCPCB_FLAG_ACK)
301 tcp_event_ack_sent(sk);
303 if (skb->len != tcp_header_size)
304 tcp_event_data_sent(tp, skb);
306 TCP_INC_STATS(TcpOutSegs);
308 err = tp->af_specific->queue_xmit(skb);
314 /* NET_XMIT_CN is special. It does not guarantee,
315 * that this packet is lost. It tells that device
316 * is about to start to drop packets or already
317 * drops some packets of the same priority and
318 * invokes us to send less aggressively.
320 return err == NET_XMIT_CN ? 0 : err;
323 #undef SYSCTL_FLAG_TSTAMPS
324 #undef SYSCTL_FLAG_WSCALE
325 #undef SYSCTL_FLAG_SACK
332 /* This is the main buffer sending routine. We queue the buffer
333 * and decide whether to queue or transmit now.
335 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
336 * otherwise socket can stall.
338 void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss)
341 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
343 /* Advance write_seq and place onto the write_queue. */
344 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
345 __skb_queue_tail(&sk->write_queue, skb);
346 tcp_charge_skb(sk, skb);
348 if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) {
349 /* Send it out now. */
350 TCP_SKB_CB(skb)->when = tcp_time_stamp;
351 if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
352 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
353 tcp_minshall_update(tp, cur_mss, skb);
354 if (tp->packets_out++ == 0)
355 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
359 /* Queue it, remembering where we must start sending. */
360 if (tp->send_head == NULL)
365 /* Send _single_ skb sitting at the send head. This function requires
366 * true push pending frames to setup probe timer etc.
368 void tcp_push_one(struct sock *sk, unsigned cur_mss)
371 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
372 struct sk_buff *skb = tp->send_head;
374 if (tcp_snd_test(tp, skb, cur_mss, 1)) {
375 /* Send it out now. */
376 TCP_SKB_CB(skb)->when = tcp_time_stamp;
377 if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) {
378 tp->send_head = NULL;
379 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
380 if (tp->packets_out++ == 0)
381 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
388 /* Split fragmented skb to two parts at length len. */
390 static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
394 int pos = skb->len - skb->data_len;
397 /* Split line is inside header. */
398 memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
400 /* And move data appendix as is. */
401 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
402 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
404 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
405 skb_shinfo(skb)->nr_frags = 0;
407 skb1->data_len = skb->data_len;
408 skb1->len += skb1->data_len;
411 skb->tail = skb->data+len;
414 int nfrags = skb_shinfo(skb)->nr_frags;
416 /* Second chunk has no header, nothing to copy. */
418 skb_shinfo(skb)->nr_frags = 0;
419 skb1->len = skb1->data_len = skb->len - len;
421 skb->data_len = len - pos;
423 for (i=0; i<nfrags; i++) {
424 int size = skb_shinfo(skb)->frags[i].size;
425 if (pos + size > len) {
426 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
430 * We have to variants in this case:
431 * 1. Move all the frag to the second
432 * part, if it is possible. F.e.
433 * this approach is mandatory for TUX,
434 * where splitting is expensive.
435 * 2. Split is accurately. We make this.
437 get_page(skb_shinfo(skb)->frags[i].page);
438 skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
439 skb_shinfo(skb1)->frags[0].size -= (len-pos);
440 skb_shinfo(skb)->frags[i].size = len-pos;
441 skb_shinfo(skb)->nr_frags++;
445 skb_shinfo(skb)->nr_frags++;
449 skb_shinfo(skb1)->nr_frags = k;
454 /* Function to create two new TCP segments. Shrinks the given segment
455 * to the specified size and appends a new segment with the rest of the
456 * packet to the list. This won't be called frequently, I hope.
457 * Remember, these are still headerless SKBs at this point.
459 static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
462 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
463 struct sk_buff *buff;
464 int nsize = skb->len - len;
467 if (skb_cloned(skb) &&
468 skb_is_nonlinear(skb) &&
469 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
472 /* Get a new skb... force flag on. */
473 buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
475 return -ENOMEM; /* We'll just try again later. */
476 tcp_charge_skb(sk, buff);
478 /* Correct the sequence numbers. */
479 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
480 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
481 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
483 /* PSH and FIN should only be set in the second packet. */
484 flags = TCP_SKB_CB(skb)->flags;
485 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
486 TCP_SKB_CB(buff)->flags = flags;
487 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
488 if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
492 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
494 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
495 /* Copy and checksum data tail into the new buffer. */
496 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
501 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
503 skb->ip_summed = CHECKSUM_HW;
504 skb_split(skb, buff, len);
507 buff->ip_summed = skb->ip_summed;
509 /* Looks stupid, but our code really uses when of
510 * skbs, which it never sent before. --ANK
512 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
514 /* Link BUFF into the send queue. */
515 __skb_append(skb, buff);
523 /* This function synchronize snd mss to current pmtu/exthdr set.
525 tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
526 for TCP options, but includes only bare TCP header.
528 tp->mss_clamp is mss negotiated at connection setup.
529 It is minumum of user_mss and mss received with SYN.
530 It also does not include TCP options.
532 tp->pmtu_cookie is last pmtu, seen by this function.
534 tp->mss_cache is current effective sending mss, including
535 all tcp options except for SACKs. It is evaluated,
536 taking into account current pmtu, but never exceeds
539 NOTE1. rfc1122 clearly states that advertised MSS
540 DOES NOT include either tcp or ip options.
542 NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
543 this function. --ANK (980731)
546 int tcp_sync_mss(struct sock *sk, u32 pmtu)
549 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
552 /* Calculate base mss without TCP options:
553 It is MMS_S - sizeof(tcphdr) of rfc1122
556 mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
558 /* Clamp it (mss_clamp does not include tcp options) */
559 if (mss_now > tp->mss_clamp)
560 mss_now = tp->mss_clamp;
562 /* Now subtract optional transport overhead */
563 mss_now -= tp->ext_header_len;
565 /* Then reserve room for full set of TCP options and 8 bytes of data */
569 /* Now subtract TCP options size, not including SACKs */
570 mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
572 /* Bound mss with half of window */
573 if (tp->max_window && mss_now > (tp->max_window>>1))
574 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
576 /* And store cached results */
577 tp->pmtu_cookie = pmtu;
578 tp->mss_cache = mss_now;
586 /* This routine writes packets to the network. It advances the
587 * send_head. This happens as incoming acks open up the remote
590 * Returns 1, if no segments are in flight and we have queued segments, but
591 * cannot send anything now because of SWS or another problem.
593 int tcp_write_xmit(struct sock *sk, int nonagle)
596 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
597 unsigned int mss_now;
599 /* If we are closed, the bytes will have to remain here.
600 * In time closedown will finish, we empty the write queue and all
603 if(sk->state != TCP_CLOSE) {
607 /* Account for SACKS, we may need to fragment due to this.
608 * It is just like the real MSS changing on us midstream.
609 * We also handle things correctly when the user adds some
610 * IP options mid-stream. Silly to do, but cover it.
612 mss_now = tcp_current_mss(sk);
614 while((skb = tp->send_head) &&
615 tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) {
616 if (skb->len > mss_now) {
617 if (tcp_fragment(sk, skb, mss_now))
621 TCP_SKB_CB(skb)->when = tcp_time_stamp;
622 if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
624 /* Advance the send_head. This one is sent out. */
625 update_send_head(sk, tp, skb);
626 tcp_minshall_update(tp, mss_now, skb);
631 tcp_cwnd_validate(sk, tp);
635 return !tp->packets_out && tp->send_head;
643 /* This function returns the amount that we can raise the
644 * usable window based on the following constraints
646 * 1. The window can never be shrunk once it is offered (RFC 793)
647 * 2. We limit memory per socket
650 * "the suggested [SWS] avoidance algorithm for the receiver is to keep
651 * RECV.NEXT + RCV.WIN fixed until:
652 * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
654 * i.e. don't raise the right edge of the window until you can raise
655 * it at least MSS bytes.
657 * Unfortunately, the recommended algorithm breaks header prediction,
658 * since header prediction assumes th->window stays fixed.
660 * Strictly speaking, keeping th->window fixed violates the receiver
661 * side SWS prevention criteria. The problem is that under this rule
662 * a stream of single byte packets will cause the right side of the
663 * window to always advance by a single byte.
665 * Of course, if the sender implements sender side SWS prevention
666 * then this will not be a problem.
668 * BSD seems to make the following compromise:
670 * If the free space is less than the 1/4 of the maximum
671 * space available and the free space is less than 1/2 mss,
672 * then set the window to 0.
673 * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ]
674 * Otherwise, just prevent the window from shrinking
675 * and from being larger than the largest representable value.
677 * This prevents incremental opening of the window in the regime
678 * where TCP is limited by the speed of the reader side taking
679 * data out of the TCP receive queue. It does nothing about
680 * those cases where the window is constrained on the sender side
681 * because the pipeline is full.
683 * BSD also seems to "accidentally" limit itself to windows that are a
684 * multiple of MSS, at least until the free space gets quite small.
685 * This would appear to be a side effect of the mbuf implementation.
686 * Combining these two algorithms results in the observed behavior
687 * of having a fixed window size at almost all times.
689 * Below we obtain similar behavior by forcing the offered window to
690 * a multiple of the mss when it is feasible to do so.
692 * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
693 * Regular options like TIMESTAMP are taken into account.
695 u32 __tcp_select_window(struct sock *sk)
698 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
699 /* MSS for the peer's data. Previous verions used mss_clamp
700 * here. I don't know if the value based on our guesses
701 * of peer's MSS is better for the performance. It's more correct
702 * but may be worse for the performance because of rcv_mss
703 * fluctuations. --SAW 1998/11/1
705 int mss = tp->ack.rcv_mss;
706 int free_space = tcp_space(sk);
707 int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
710 if (mss > full_space)
713 if (free_space < full_space/2) {
716 if (tcp_memory_pressure)
717 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
719 if (free_space < mss)
723 if (free_space > tp->rcv_ssthresh)
724 free_space = tp->rcv_ssthresh;
726 /* Get the largest window that is a nice multiple of mss.
727 * Window clamp already applied above.
728 * If our current window offering is within 1 mss of the
729 * free space we just keep it. This prevents the divide
730 * and multiply from happening most of the time.
731 * We also don't do any window rounding when the free space
734 window = tp->rcv_wnd;
735 if (window <= free_space - mss || window > free_space)
736 window = (free_space/mss)*mss;
744 /* Attempt to collapse two adjacent SKB's during retransmission. */
745 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
748 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
749 struct sk_buff *next_skb = skb->next;
751 /* The first test we must make is that neither of these two
752 * SKB's are still referenced by someone else.
754 if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
755 int skb_size = skb->len, next_skb_size = next_skb->len;
756 u16 flags = TCP_SKB_CB(skb)->flags;
758 /* Also punt if next skb has been SACK'd. */
759 if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
762 /* Next skb is out of window. */
763 if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
766 /* Punt if not enough space exists in the first SKB for
767 * the data in the second, or the total combined payload
768 * would exceed the MSS.
770 if ((next_skb_size > skb_tailroom(skb)) ||
771 ((skb_size + next_skb_size) > mss_now))
774 /* Ok. We will be able to collapse the packet. */
775 __skb_unlink(next_skb, next_skb->list);
777 if (next_skb->ip_summed == CHECKSUM_HW)
778 skb->ip_summed = CHECKSUM_HW;
780 if (skb->ip_summed != CHECKSUM_HW) {
781 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
782 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
785 /* Update sequence range on original skb. */
786 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
788 /* Merge over control information. */
789 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
790 TCP_SKB_CB(skb)->flags = flags;
792 /* All done, get rid of second SKB and account for it so
793 * packet counting does not break.
795 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
796 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
798 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
802 /* Reno case is special. Sigh... */
803 if (!tp->sack_ok && tp->sacked_out) {
808 /* Not quite right: it can be > snd.fack, but
809 * it is better to underestimate fackets.
813 tcp_free_skb(sk, next_skb);
819 /* Do a simple retransmit without using the backoff mechanisms in
820 * tcp_timer. This is used for path mtu discovery.
821 * The socket is already locked here.
823 void tcp_simple_retransmit(struct sock *sk)
826 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
828 unsigned int mss = tcp_current_mss(sk);
831 for_retrans_queue(skb, sk, tp) {
832 if (skb->len > mss &&
833 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
834 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
835 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
838 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
839 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
849 tcp_sync_left_out(tp);
851 /* Don't muck with the congestion window here.
852 * Reason is that we do not increase amount of _data_
853 * in network, but units changed and effective
854 * cwnd/ssthresh really reduced now.
856 if (tp->ca_state != TCP_CA_Loss) {
857 tp->high_seq = tp->snd_nxt;
858 tp->snd_ssthresh = tcp_current_ssthresh(tp);
859 tp->prior_ssthresh = 0;
861 tp->ca_state = TCP_CA_Loss;
863 tcp_xmit_retransmit_queue(sk);
867 /* This retransmits one SKB. Policy decisions and retransmit queue
868 * state updates are done by the caller. Returns non-zero if an
869 * error occurred which prevented the send.
871 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
874 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
875 unsigned int cur_mss = tcp_current_mss(sk);
878 /* Do not sent more than we queued. 1/4 is reserved for possible
879 * copying overhead: frgagmentation, tunneling, mangling etc.
881 if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))
884 /* If receiver has shrunk his window, and skb is out of
885 * new window, do not retransmit it. The exception is the
886 * case, when window is shrunk to zero. In this case
887 * our retransmit serves as a zero window probe.
889 if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
890 && TCP_SKB_CB(skb)->seq != tp->snd_una)
893 if(skb->len > cur_mss) {
894 if(tcp_fragment(sk, skb, cur_mss))
895 return -ENOMEM; /* We'll try again later. */
897 /* New SKB created, account for it. */
901 /* Collapse two adjacent packets if worthwhile and we can. */
902 if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
903 (skb->len < (cur_mss >> 1)) &&
904 (skb->next != tp->send_head) &&
905 (skb->next != (struct sk_buff *)&sk->write_queue) &&
906 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
907 (sysctl_tcp_retrans_collapse != 0))
908 tcp_retrans_try_collapse(sk, skb, cur_mss);
910 if(tp->af_specific->rebuild_header(sk))
911 return -EHOSTUNREACH; /* Routing failure or similar. */
913 /* Some Solaris stacks overoptimize and ignore the FIN on a
914 * retransmit when old data is attached. So strip it off
915 * since it is cheap to do so and saves bytes on the network.
918 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
919 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
920 if (!pskb_trim(skb, 0)) {
921 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
922 skb->ip_summed = CHECKSUM_NONE;
927 /* Make a copy, if the first transmission SKB clone we made
928 * is still in somebody's hands, else make a clone.
930 TCP_SKB_CB(skb)->when = tcp_time_stamp;
932 err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
933 pskb_copy(skb, GFP_ATOMIC):
934 skb_clone(skb, GFP_ATOMIC)));
937 /* Update global TCP statistics. */
938 TCP_INC_STATS(TcpRetransSegs);
940 #if FASTRETRANS_DEBUG > 0
941 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
943 printk(KERN_DEBUG "retrans_out leaked.\n");
946 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
949 /* Save stamp of the first retransmit. */
950 if (!tp->retrans_stamp)
951 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
955 /* snd_nxt is stored to detect loss of retransmitted segment,
956 * see tcp_input.c tcp_sacktag_write_queue().
958 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
966 /* This gets called after a retransmit timeout, and the initially
967 * retransmitted data is acknowledged. It tries to continue
968 * resending the rest of the retransmit queue, until either
969 * we've sent it all or the congestion window limit is reached.
970 * If doing SACK, the first ACK which comes back for a timeout
971 * based retransmit packet might feed us FACK information again.
972 * If so, we use it to avoid unnecessarily retransmissions.
974 void tcp_xmit_retransmit_queue(struct sock *sk)
977 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
979 int packet_cnt = tp->lost_out;
981 /* First pass: retransmit lost packets. */
983 for_retrans_queue(skb, sk, tp) {
984 __u8 sacked = TCP_SKB_CB(skb)->sacked;
986 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
989 if (sacked&TCPCB_LOST) {
990 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
991 if (tcp_retransmit_skb(sk, skb))
993 if (tp->ca_state != TCP_CA_Loss)
994 NET_INC_STATS_BH(TCPFastRetrans);
996 NET_INC_STATS_BH(TCPSlowStartRetrans);
998 if (skb == skb_peek(&sk->write_queue))
999 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1002 if (--packet_cnt <= 0)
1008 /* OK, demanded retransmission is finished. */
1010 /* Forward retransmissions are possible only during Recovery. */
1011 if (tp->ca_state != TCP_CA_Recovery)
1014 /* No forward retransmissions in Reno are possible. */
1018 /* Yeah, we have to make difficult choice between forward transmission
1019 * and retransmission... Both ways have their merits...
1021 * For now we do not retrnamsit anything, while we have some new
1025 if (tcp_may_send_now(sk, tp))
1030 for_retrans_queue(skb, sk, tp) {
1031 if(++packet_cnt > tp->fackets_out)
1034 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1037 if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
1040 /* Ok, retransmit it. */
1041 if(tcp_retransmit_skb(sk, skb))
1044 if (skb == skb_peek(&sk->write_queue))
1045 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1047 NET_INC_STATS_BH(TCPForwardRetrans);
1053 /* Send a fin. The caller locks the socket for us. This cannot be
1054 * allowed to fail queueing a FIN frame under any circumstances.
1056 void tcp_send_fin(struct sock *sk)
1059 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1060 struct sk_buff *skb = skb_peek_tail(&sk->write_queue);
1061 unsigned int mss_now;
1063 /* Optimization, tack on the FIN if we have a queue of
1064 * unsent frames. But be careful about outgoing SACKS
1067 mss_now = tcp_current_mss(sk);
1069 if(tp->send_head != NULL) {
1070 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
1071 TCP_SKB_CB(skb)->end_seq++;
1074 /* Socket is locked, keep trying until memory is available. */
1076 skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
1082 /* Reserve space for headers and prepare control bits. */
1083 skb_reserve(skb, MAX_TCP_HEADER);
1085 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
1086 TCP_SKB_CB(skb)->sacked = 0;
1088 /* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
1089 TCP_SKB_CB(skb)->seq = tp->write_seq;
1090 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1091 tcp_send_skb(sk, skb, 1, mss_now);
1093 __tcp_push_pending_frames(sk, tp, mss_now, 1);
1097 /* We get here when a process closes a file descriptor (either due to
1098 * an explicit close() or as a byproduct of exit()'ing) and there
1099 * was unread data in the receive queue. This behavior is recommended
1100 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
1102 void tcp_send_active_reset(struct sock *sk, int priority)
1105 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1106 struct sk_buff *skb;
1108 /* NOTE: No TCP options attached and we never retransmit this. */
1109 skb = alloc_skb(MAX_TCP_HEADER, priority);
1111 NET_INC_STATS(TCPAbortFailed);
1115 /* Reserve space for headers and prepare control bits. */
1116 skb_reserve(skb, MAX_TCP_HEADER);
1118 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
1119 TCP_SKB_CB(skb)->sacked = 0;
1122 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
1123 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1124 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1125 if (tcp_transmit_skb(sk, skb))
1126 NET_INC_STATS(TCPAbortFailed);
1130 /* WARNING: This routine must only be called when we have already sent
1131 * a SYN packet that crossed the incoming SYN that caused this routine
1132 * to get called. If this assumption fails then the initial rcv_wnd
1133 * and rcv_wscale values will not be correct.
1135 int tcp_send_synack(struct sock *sk)
1138 struct sk_buff* skb;
1140 skb = skb_peek(&sk->write_queue);
1141 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
1142 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
1145 if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
1146 if (skb_cloned(skb)) {
1147 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
1150 __skb_unlink(skb, &sk->write_queue);
1151 __skb_queue_head(&sk->write_queue, nskb);
1152 tcp_free_skb(sk, skb);
1153 tcp_charge_skb(sk, nskb);
1157 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
1158 TCP_ECN_send_synack(&sk->tp_pinfo.af_tcp, skb);
1160 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1161 return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1168 * Prepare a SYN-ACK.
1170 struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1171 struct open_request *req)
1174 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1176 int tcp_header_size;
1177 struct sk_buff *skb;
1179 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
1183 /* Reserve space for headers. */
1184 skb_reserve(skb, MAX_TCP_HEADER);
1186 skb->dst = dst_clone(dst);
1188 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1189 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1190 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1191 /* SACK_PERM is in the place of NOP NOP of TS */
1192 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1193 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1195 memset(th, 0, sizeof(struct tcphdr));
1198 TCP_ECN_make_synack(req, th);
1199 th->source = sk->sport;
1200 th->dest = req->rmt_port;
1201 TCP_SKB_CB(skb)->seq = req->snt_isn;
1202 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1203 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1204 th->ack_seq = htonl(req->rcv_isn + 1);
1205 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
1207 /* Set this up on the first call only */
1208 req->window_clamp = tp->window_clamp ? : dst->window;
1209 /* tcp_full_space because it is guaranteed to be the first packet */
1210 tcp_select_initial_window(tcp_full_space(sk),
1211 dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1216 req->rcv_wscale = rcv_wscale;
1219 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
1220 th->window = htons(req->rcv_wnd);
1222 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1223 tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok,
1224 req->sack_ok, req->wscale_ok, req->rcv_wscale,
1225 TCP_SKB_CB(skb)->when,
1229 th->doff = (tcp_header_size >> 2);
1230 TCP_INC_STATS(TcpOutSegs);
1238 * Do all connect socket setups that can be done AF independent.
1240 static inline void tcp_connect_init(struct sock *sk)
1243 struct dst_entry *dst = __sk_dst_get(sk);
1244 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1246 /* We'll fix this up when we get a response from the other end.
1247 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
1249 tp->tcp_header_len = sizeof(struct tcphdr) +
1250 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
1252 /* If user gave his TCP_MAXSEG, record it to clamp */
1254 tp->mss_clamp = tp->user_mss;
1256 tcp_sync_mss(sk, dst->pmtu);
1258 if (!tp->window_clamp)
1259 tp->window_clamp = dst->window;
1260 tp->advmss = dst->advmss;
1261 tcp_initialize_rcv_mss(sk);
1263 tcp_select_initial_window(tcp_full_space(sk),
1264 tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
1267 sysctl_tcp_window_scaling,
1270 tp->rcv_ssthresh = tp->rcv_wnd;
1275 tcp_init_wl(tp, tp->write_seq, 0);
1276 tp->snd_una = tp->write_seq;
1277 tp->snd_sml = tp->write_seq;
1282 tp->rto = TCP_TIMEOUT_INIT;
1283 tp->retransmits = 0;
1284 tcp_clear_retrans(tp);
1289 * Build a SYN and send it off.
1291 int tcp_connect(struct sock *sk)
1294 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1295 struct sk_buff *buff;
1297 tcp_connect_init(sk);
1299 buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);
1300 if (unlikely(buff == NULL))
1303 /* Reserve space for headers. */
1304 skb_reserve(buff, MAX_TCP_HEADER);
1306 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
1307 TCP_ECN_send_syn(tp, buff);
1308 TCP_SKB_CB(buff)->sacked = 0;
1310 TCP_SKB_CB(buff)->seq = tp->write_seq++;
1311 TCP_SKB_CB(buff)->end_seq = tp->write_seq;
1312 tp->snd_nxt = tp->write_seq;
1313 tp->pushed_seq = tp->write_seq;
1316 TCP_SKB_CB(buff)->when = tcp_time_stamp;
1317 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
1318 __skb_queue_tail(&sk->write_queue, buff);
1319 tcp_charge_skb(sk, buff);
1321 tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
1322 TCP_INC_STATS(TcpActiveOpens);
1324 /* Timer for repeating the SYN until an answer. */
1325 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
1332 /* Send out a delayed ack, the caller does the policy checking
1333 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
1336 void tcp_send_delayed_ack(struct sock *sk)
1339 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1340 int ato = tp->ack.ato;
1341 unsigned long timeout;
1343 if (ato > TCP_DELACK_MIN) {
1346 if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
1347 max_ato = TCP_DELACK_MAX;
1349 /* Slow path, intersegment interval is "high". */
1351 /* If some rtt estimate is known, use it to bound delayed ack.
1352 * Do not use tp->rto here, use results of rtt measurements
1356 int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
1362 ato = min(ato, max_ato);
1365 /* Stay within the limit we were given */
1366 timeout = jiffies + ato;
1368 /* Use new timeout only if there wasn't a older one earlier. */
1369 if (tp->ack.pending&TCP_ACK_TIMER) {
1370 /* If delack timer was blocked or is about to expire,
1373 if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
1378 if (!time_before(timeout, tp->ack.timeout))
1379 timeout = tp->ack.timeout;
1381 tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
1382 tp->ack.timeout = timeout;
1383 if (!mod_timer(&tp->delack_timer, timeout))
1388 /* This routine sends an ack and also updates the window. */
1389 void tcp_send_ack(struct sock *sk)
1392 /* If we have been reset, we may not send again. */
1393 if(sk->state != TCP_CLOSE) {
1394 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1395 struct sk_buff *buff;
1397 /* We are not putting this on the write queue, so
1398 * tcp_transmit_skb() will set the ownership to this
1401 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
1403 tcp_schedule_ack(tp);
1404 tp->ack.ato = TCP_ATO_MIN;
1405 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
1409 /* Reserve space for headers and prepare control bits. */
1410 skb_reserve(buff, MAX_TCP_HEADER);
1412 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
1413 TCP_SKB_CB(buff)->sacked = 0;
1415 /* Send it off, this clears delayed acks for us. */
1416 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
1417 TCP_SKB_CB(buff)->when = tcp_time_stamp;
1418 tcp_transmit_skb(sk, buff);
1425 /* This routine sends a packet with an out of date sequence
1426 * number. It assumes the other end will try to ack it.
1428 * Question: what should we make while urgent mode?
1429 * 4.4BSD forces sending single byte of data. We cannot send
1430 * out of window data, because we have SND.NXT==SND.MAX...
1432 * Current solution: to send TWO zero-length segments in urgent mode:
1433 * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
1434 * out-of-date with SND.UNA-1 to probe window.
1436 static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
1439 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1440 struct sk_buff *skb;
1442 /* We don't queue it, tcp_transmit_skb() sets ownership. */
1443 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
1447 /* Reserve space for headers and set control bits. */
1448 skb_reserve(skb, MAX_TCP_HEADER);
1450 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
1451 TCP_SKB_CB(skb)->sacked = urgent;
1453 /* Use a previous sequence. This should cause the other
1454 * end to send an ack. Don't queue or clone SKB, just
1457 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
1458 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1459 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1460 return tcp_transmit_skb(sk, skb);
1466 int tcp_write_wakeup(struct sock *sk)
1469 if (sk->state != TCP_CLOSE) {
1470 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1471 struct sk_buff *skb;
1473 if ((skb = tp->send_head) != NULL &&
1474 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
1476 int mss = tcp_current_mss(sk);
1477 int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
1479 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
1480 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
1482 /* We are probing the opening of a window
1483 * but the window size is != 0
1484 * must have been a result SWS avoidance ( sender )
1486 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
1488 seg_size = min(seg_size, mss);
1489 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1490 if (tcp_fragment(sk, skb, seg_size))
1493 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1494 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1495 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1497 update_send_head(sk, tp, skb);
1502 between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
1503 tcp_xmit_probe_skb(sk, TCPCB_URG);
1504 return tcp_xmit_probe_skb(sk, 0);
1513 /* A window probe timeout has occurred. If window is not closed send
1514 * a partial packet else a zero probe.
1516 void tcp_send_probe0(struct sock *sk)
1519 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1522 err = tcp_write_wakeup(sk);
1524 if (tp->packets_out || !tp->send_head) {
1525 /* Cancel probe timer, if it is not required. */
1534 tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0,
1535 min(tp->rto << tp->backoff, TCP_RTO_MAX));
1537 /* If packet was not sent due to local congestion,
1538 * do not backoff and do not remember probes_out.
1539 * Let local senders to fight for local resources.
1541 * Use accumulated backoff yet.
1543 if (!tp->probes_out)
1545 tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0,
1546 min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));