drivers/net/tcpip/transport/tcp/tcp_ipv4.c

   1 /*
   2  * COPYRIGHT:   See COPYING in the top level directory
   3  * PROJECT:     ReactOS TCP/IP protocol driver
   4  * FILE:        transport/tcp/tcp_ipv4.c
   5  * PURPOSE:     Transmission Control Protocol
   6  * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
   7  * REVISIONS:
   8  *   CSH 15-01-2003 Imported from linux kernel 2.4.20
   9  */
  10
  11 /*
  12  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  13  *              operating system.  INET is implemented using the  BSD Socket
  14  *              interface as the means of communication with the user level.
  15  *
  16  *              Implementation of the Transmission Control Protocol(TCP).
  17  *
  18  * Version:     $Id$
  19  *
  20  *              IPv4 specific functions
  21  *
  22  *
  23  *              code split from:
  24  *              linux/ipv4/tcp.c
  25  *              linux/ipv4/tcp_input.c
  26  *              linux/ipv4/tcp_output.c
  27  *
  28  *              See tcp.c for author information
  29  *
  30  *      This program is free software; you can redistribute it and/or
  31  *      modify it under the terms of the GNU General Public License
  32  *      as published by the Free Software Foundation; either version
  33  *      2 of the License, or (at your option) any later version.
  34  */
  35
  36 /*
  37  * Changes:
  38  *              David S. Miller :       New socket lookup architecture.
  39  *                                      This code is dedicated to John Dyson.
  40  *              David S. Miller :       Change semantics of established hash,
  41  *                                      half is devoted to TIME_WAIT sockets
  42  *                                      and the rest go in the other half.
  43  *              Andi Kleen :            Add support for syncookies and fixed
  44  *                                      some bugs: ip options weren't passed to
  45  *                                      the TCP layer, missed a check for an ACK bit.
  46  *              Andi Kleen :            Implemented fast path mtu discovery.
  47  *                                      Fixed many serious bugs in the
  48  *                                      open_request handling and moved
  49  *                                      most of it into the af independent code.
  50  *                                      Added tail drop and some other bugfixes.
  51  *                                      Added new listen sematics.
  52  *              Mike McLagan    :       Routing by source
  53  *      Juan Jose Ciarlante:            ip_dynaddr bits
  54  *              Andi Kleen:             various fixes.
  55  *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  56  *      Andi Kleen              :       Fix new listen.
  57  *      Andi Kleen              :       Fix accept error reporting.
  58  */
  59
  60 #if 0
  61 #include <linux/config.h>
  62 #include <linux/types.h>
  63 #include <linux/fcntl.h>
  64 #include <linux/random.h>
  65 #include <linux/cache.h>
  66 #include <linux/init.h>
  67
  68 #include <net/icmp.h>
  69 #include <net/tcp.h>
  70 #include <net/ipv6.h>
  71 #include <net/inet_common.h>
  72
  73 #include <linux/inet.h>
  74 #include <linux/stddef.h>
  75 #include <linux/ipsec.h>
  76 #else
  77 #include "linux.h"
  78 #include "tcpcore.h"
  79 #endif
  80
  81 extern int sysctl_ip_dynaddr;
  82 extern int sysctl_ip_default_ttl;
  83 int sysctl_tcp_tw_reuse = 0;
  84
  85 /* Check TCP sequence numbers in ICMP packets. */
  86 #define ICMP_MIN_LENGTH 8
  87
  88 /* Socket used for sending RSTs */
  89 #if 0
  90 static struct inode tcp_inode;
  91 static struct socket *tcp_socket=&tcp_inode.u.socket_i;
  92 #endif
  93
  94 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
  95                        struct sk_buff *skb);
  96
  97 /*
  98  * ALL members must be initialised to prevent gcc-2.7.2.3 miscompilation
  99  */
 100 #if 0
 101 struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
 102         __tcp_ehash:          NULL,
 103         __tcp_bhash:          NULL,
 104         __tcp_bhash_size:     0,
 105         __tcp_ehash_size:     0,
 106         __tcp_listening_hash: { NULL, },
 107         __tcp_lhash_lock:     RW_LOCK_UNLOCKED,
 108         __tcp_lhash_users:    ATOMIC_INIT(0),
 109         __tcp_lhash_wait:
 110           __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
 111         __tcp_portalloc_lock: SPIN_LOCK_UNLOCKED
 112 };
 113 #endif
 114
 115 /*
 116  * This array holds the first and last local port number.
 117  * For high-usage systems, use sysctl to change this to
 118  * 32768-61000
 119  */
 120 int sysctl_local_port_range[2] = { 1024, 4999 };
 121 int tcp_port_rover = (1024 - 1);
 122
 123 static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
 124                                  __u32 faddr, __u16 fport)
 125 {
 126         int h = ((laddr ^ lport) ^ (faddr ^ fport));
 127         h ^= h>>16;
 128         h ^= h>>8;
 129         return h & (tcp_ehash_size - 1);
 130 }
 131
 132 static __inline__ int tcp_sk_hashfn(struct sock *sk)
 133 {
 134         __u32 laddr = sk->rcv_saddr;
 135         __u16 lport = sk->num;
 136         __u32 faddr = sk->daddr;
 137         __u16 fport = sk->dport;
 138
 139         return tcp_hashfn(laddr, lport, faddr, fport);
 140 }
 141
 142 /* Allocate and initialize a new TCP local port bind bucket.
 143  * The bindhash mutex for snum's hash chain must be held here.
 144  */
 145 struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
 146                                           unsigned short snum)
 147 {
 148 #if 0
 149         struct tcp_bind_bucket *tb;
 150
 151         tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC);
 152         if(tb != NULL) {
 153                 tb->port = snum;
 154                 tb->fastreuse = 0;
 155                 tb->owners = NULL;
 156                 if((tb->next = head->chain) != NULL)
 157                         tb->next->pprev = &tb->next;
 158                 head->chain = tb;
 159                 tb->pprev = &head->chain;
 160         }
 161         return tb;
 162 #else
 163   return NULL;
 164 #endif
 165 }
 166
 167 /* Caller must disable local BH processing. */
 168 static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
 169 {
 170 #if 0
 171         struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(child->num)];
 172         struct tcp_bind_bucket *tb;
 173
 174         spin_lock(&head->lock);
 175         tb = (struct tcp_bind_bucket *)sk->prev;
 176         if ((child->bind_next = tb->owners) != NULL)
 177                 tb->owners->bind_pprev = &child->bind_next;
 178         tb->owners = child;
 179         child->bind_pprev = &tb->owners;
 180         child->prev = (struct sock *) tb;
 181         spin_unlock(&head->lock);
 182 #endif
 183 }
 184
 185 __inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
 186 {
 187 #if 0
 188         local_bh_disable();
 189         __tcp_inherit_port(sk, child);
 190         local_bh_enable();
 191 #endif
 192 }
 193
 194 static inline void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum)
 195 {
 196 #if 0
 197         sk->num = snum;
 198         if ((sk->bind_next = tb->owners) != NULL)
 199                 tb->owners->bind_pprev = &sk->bind_next;
 200         tb->owners = sk;
 201         sk->bind_pprev = &tb->owners;
 202         sk->prev = (struct sock *) tb;
 203 #endif
 204 }
 205
 206 static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
 207 {
 208 #if 0
 209         struct sock *sk2 = tb->owners;
 210         int sk_reuse = sk->reuse;
 211
 212         for( ; sk2 != NULL; sk2 = sk2->bind_next) {
 213                 if (sk != sk2 &&
 214                     sk2->reuse <= 1 &&
 215                     sk->bound_dev_if == sk2->bound_dev_if) {
 216                         if (!sk_reuse   ||
 217                             !sk2->reuse ||
 218                             sk2->state == TCP_LISTEN) {
 219                                 if (!sk2->rcv_saddr     ||
 220                                     !sk->rcv_saddr      ||
 221                                     (sk2->rcv_saddr == sk->rcv_saddr))
 222                                         break;
 223                         }
 224                 }
 225         }
 226         return sk2 != NULL;
 227 #else
 228   return 0;
 229 #endif
 230 }
 231
 232 /* Obtain a reference to a local port for the given sock,
 233  * if snum is zero it means select any available local port.
 234  */
 235 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 236 {
 237 #if 0
 238         struct tcp_bind_hashbucket *head;
 239         struct tcp_bind_bucket *tb;
 240         int ret;
 241
 242         local_bh_disable();
 243         if (snum == 0) {
 244                 int low = sysctl_local_port_range[0];
 245                 int high = sysctl_local_port_range[1];
 246                 int remaining = (high - low) + 1;
 247                 int rover;
 248
 249                 spin_lock(&tcp_portalloc_lock);
 250                 rover = tcp_port_rover;
 251                 do {    rover++;
 252                         if ((rover < low) || (rover > high))
 253                                 rover = low;
 254                         head = &tcp_bhash[tcp_bhashfn(rover)];
 255                         spin_lock(&head->lock);
 256                         for (tb = head->chain; tb; tb = tb->next)
 257                                 if (tb->port == rover)
 258                                         goto next;
 259                         break;
 260                 next:
 261                         spin_unlock(&head->lock);
 262                 } while (--remaining > 0);
 263                 tcp_port_rover = rover;
 264                 spin_unlock(&tcp_portalloc_lock);
 265
 266                 /* Exhausted local port range during search? */
 267                 ret = 1;
 268                 if (remaining <= 0)
 269                         goto fail;
 270
 271                 /* OK, here is the one we will use.  HEAD is
 272                  * non-NULL and we hold it's mutex.
 273                  */
 274                 snum = rover;
 275                 tb = NULL;
 276         } else {
 277                 head = &tcp_bhash[tcp_bhashfn(snum)];
 278                 spin_lock(&head->lock);
 279                 for (tb = head->chain; tb != NULL; tb = tb->next)
 280                         if (tb->port == snum)
 281                                 break;
 282         }
 283         if (tb != NULL && tb->owners != NULL) {
 284                 if (sk->reuse > 1)
 285                         goto success;
 286                 if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
 287                         goto success;
 288                 } else {
 289                         ret = 1;
 290                         if (tcp_bind_conflict(sk, tb))
 291                                 goto fail_unlock;
 292                 }
 293         }
 294         ret = 1;
 295         if (tb == NULL &&
 296             (tb = tcp_bucket_create(head, snum)) == NULL)
 297                         goto fail_unlock;
 298         if (tb->owners == NULL) {
 299                 if (sk->reuse && sk->state != TCP_LISTEN)
 300                         tb->fastreuse = 1;
 301                 else
 302                         tb->fastreuse = 0;
 303         } else if (tb->fastreuse &&
 304                    ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
 305                 tb->fastreuse = 0;
 306 success:
 307         if (sk->prev == NULL)
 308                 tcp_bind_hash(sk, tb, snum);
 309         BUG_TRAP(sk->prev == (struct sock *) tb);
 310         ret = 0;
 311
 312 fail_unlock:
 313         spin_unlock(&head->lock);
 314 fail:
 315         local_bh_enable();
 316         return ret;
 317 #else
 318   return 0;
 319 #endif
 320 }
 321
 322 /* Get rid of any references to a local port held by the
 323  * given sock.
 324  */
 325 __inline__ void __tcp_put_port(struct sock *sk)
 326 {
 327 #if 0
 328         struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(sk->num)];
 329         struct tcp_bind_bucket *tb;
 330
 331         spin_lock(&head->lock);
 332         tb = (struct tcp_bind_bucket *) sk->prev;
 333         if (sk->bind_next)
 334                 sk->bind_next->bind_pprev = sk->bind_pprev;
 335         *(sk->bind_pprev) = sk->bind_next;
 336         sk->prev = NULL;
 337         sk->num = 0;
 338         if (tb->owners == NULL) {
 339                 if (tb->next)
 340                         tb->next->pprev = tb->pprev;
 341                 *(tb->pprev) = tb->next;
 342                 kmem_cache_free(tcp_bucket_cachep, tb);
 343         }
 344         spin_unlock(&head->lock);
 345 #endif
 346 }
 347
 348 void tcp_put_port(struct sock *sk)
 349 {
 350 #if 0
 351         local_bh_disable();
 352         __tcp_put_port(sk);
 353         local_bh_enable();
 354 #endif
 355 }
 356
 357 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
 358  * Look, when several writers sleep and reader wakes them up, all but one
 359  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 360  * this, _but_ remember, it adds useless work on UP machines (wake up each
 361  * exclusive lock release). It should be ifdefed really.
 362  */
 363
 364 void tcp_listen_wlock(void)
 365 {
 366 #if 0
 367         write_lock(&tcp_lhash_lock);
 368
 369         if (atomic_read(&tcp_lhash_users)) {
 370                 DECLARE_WAITQUEUE(wait, current);
 371
 372                 add_wait_queue_exclusive(&tcp_lhash_wait, &wait);
 373                 for (;;) {
 374                         set_current_state(TASK_UNINTERRUPTIBLE);
 375                         if (atomic_read(&tcp_lhash_users) == 0)
 376                                 break;
 377                         write_unlock_bh(&tcp_lhash_lock);
 378                         schedule();
 379                         write_lock_bh(&tcp_lhash_lock);
 380                 }
 381
 382                 __set_current_state(TASK_RUNNING);
 383                 remove_wait_queue(&tcp_lhash_wait, &wait);
 384         }
 385 #endif
 386 }
 387
 388 static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
 389 {
 390 #if 0
 391         struct sock **skp;
 392         rwlock_t *lock;
 393
 394         BUG_TRAP(sk->pprev==NULL);
 395         if(listen_possible && sk->state == TCP_LISTEN) {
 396                 skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 397                 lock = &tcp_lhash_lock;
 398                 tcp_listen_wlock();
 399         } else {
 400                 skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))].chain;
 401                 lock = &tcp_ehash[sk->hashent].lock;
 402                 write_lock(lock);
 403         }
 404         if((sk->next = *skp) != NULL)
 405                 (*skp)->pprev = &sk->next;
 406         *skp = sk;
 407         sk->pprev = skp;
 408         sock_prot_inc_use(sk->prot);
 409         write_unlock(lock);
 410         if (listen_possible && sk->state == TCP_LISTEN)
 411                 wake_up(&tcp_lhash_wait);
 412 #endif
 413 }
 414
 415 static void tcp_v4_hash(struct sock *sk)
 416 {
 417 #if 0
 418         if (sk->state != TCP_CLOSE) {
 419                 local_bh_disable();
 420                 __tcp_v4_hash(sk, 1);
 421                 local_bh_enable();
 422         }
 423 #endif
 424 }
 425
 426 void tcp_unhash(struct sock *sk)
 427 {
 428 #if 0
 429         rwlock_t *lock;
 430
 431         if (!sk->pprev)
 432                 goto ende;
 433
 434         if (sk->state == TCP_LISTEN) {
 435                 local_bh_disable();
 436                 tcp_listen_wlock();
 437                 lock = &tcp_lhash_lock;
 438         } else {
 439                 struct tcp_ehash_bucket *head = &tcp_ehash[sk->hashent];
 440                 lock = &head->lock;
 441                 write_lock_bh(&head->lock);
 442         }
 443
 444         if(sk->pprev) {
 445                 if(sk->next)
 446                         sk->next->pprev = sk->pprev;
 447                 *sk->pprev = sk->next;
 448                 sk->pprev = NULL;
 449                 sock_prot_dec_use(sk->prot);
 450         }
 451         write_unlock_bh(lock);
 452
 453  ende:
 454         if (sk->state == TCP_LISTEN)
 455                 wake_up(&tcp_lhash_wait);
 456 #endif
 457 }
 458
 459 /* Don't inline this cruft.  Here are some nice properties to
 460  * exploit here.  The BSD API does not allow a listening TCP
 461  * to specify the remote port nor the remote address for the
 462  * connection.  So always assume those are both wildcarded
 463  * during the search since they can never be otherwise.
 464  */
 465 static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigned short hnum, int dif)
 466 {
 467 #if 0
 468         struct sock *result = NULL;
 469         int score, hiscore;
 470
 471         hiscore=0;
 472         for(; sk; sk = sk->next) {
 473                 if(sk->num == hnum) {
 474                         __u32 rcv_saddr = sk->rcv_saddr;
 475
 476                         score = 1;
 477                         if(rcv_saddr) {
 478                                 if (rcv_saddr != daddr)
 479                                         continue;
 480                                 score++;
 481                         }
 482                         if (sk->bound_dev_if) {
 483                                 if (sk->bound_dev_if != dif)
 484                                         continue;
 485                                 score++;
 486                         }
 487                         if (score == 3)
 488                                 return sk;
 489                         if (score > hiscore) {
 490                                 hiscore = score;
 491                                 result = sk;
 492                         }
 493                 }
 494         }
 495         return result;
 496 #else
 497   return NULL;
 498 #endif
 499 }
 500
 501 /* Optimize the common listener case. */
 502 __inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif)
 503 {
 504 #if 0
 505         struct sock *sk;
 506
 507         read_lock(&tcp_lhash_lock);
 508         sk = tcp_listening_hash[tcp_lhashfn(hnum)];
 509         if (sk) {
 510                 if (sk->num == hnum &&
 511                     sk->next == NULL &&
 512                     (!sk->rcv_saddr || sk->rcv_saddr == daddr) &&
 513                     !sk->bound_dev_if)
 514                         goto sherry_cache;
 515                 sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif);
 516         }
 517         if (sk) {
 518 sherry_cache:
 519                 sock_hold(sk);
 520         }
 521         read_unlock(&tcp_lhash_lock);
 522         return sk;
 523 #else
 524   return NULL;
 525 #endif
 526 }
 527
 528 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 529  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 530  *
 531  * Local BH must be disabled here.
 532  */
 533
 534 static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
 535                                                        u32 daddr, u16 hnum, int dif)
 536 {
 537 #if 0
 538         struct tcp_ehash_bucket *head;
 539         TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
 540         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 541         struct sock *sk;
 542         int hash;
 543
 544         /* Optimize here for direct hit, only listening connections can
 545          * have wildcards anyways.
 546          */
 547         hash = tcp_hashfn(daddr, hnum, saddr, sport);
 548         head = &tcp_ehash[hash];
 549         read_lock(&head->lock);
 550         for(sk = head->chain; sk; sk = sk->next) {
 551                 if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
 552                         goto hit; /* You sunk my battleship! */
 553         }
 554
 555         /* Must check for a TIME_WAIT'er before going to listener hash. */
 556         for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next)
 557                 if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
 558                         goto hit;
 559         read_unlock(&head->lock);
 560
 561         return NULL;
 562
 563 hit:
 564         sock_hold(sk);
 565         read_unlock(&head->lock);
 566         return sk;
 567 #else
 568   return NULL;
 569 #endif
 570 }
 571
 572 static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
 573                                            u32 daddr, u16 hnum, int dif)
 574 {
 575 #if 0
 576         struct sock *sk;
 577
 578         sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif);
 579
 580         if (sk)
 581                 return sk;
 582
 583         return tcp_v4_lookup_listener(daddr, hnum, dif);
 584 #else
 585   return NULL;
 586 #endif
 587 }
 588
 589 __inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
 590 {
 591 #if 0
 592         struct sock *sk;
 593
 594         local_bh_disable();
 595         sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
 596         local_bh_enable();
 597
 598         return sk;
 599 #else
 600   return NULL;
 601 #endif
 602 }
 603
 604 static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 605 {
 606 #if 0
 607         return secure_tcp_sequence_number(skb->nh.iph->daddr,
 608                                           skb->nh.iph->saddr,
 609                                           skb->h.th->dest,
 610                                           skb->h.th->source);
 611 #else
 612   return 0;
 613 #endif
 614 }
 615
 616 /* called with local bh disabled */
 617 static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
 618                                       struct tcp_tw_bucket **twp)
 619 {
 620 #if 0
 621         u32 daddr = sk->rcv_saddr;
 622         u32 saddr = sk->daddr;
 623         int dif = sk->bound_dev_if;
 624         TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
 625         __u32 ports = TCP_COMBINED_PORTS(sk->dport, lport);
 626         int hash = tcp_hashfn(daddr, lport, saddr, sk->dport);
 627         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 628         struct sock *sk2, **skp;
 629         struct tcp_tw_bucket *tw;
 630
 631         write_lock(&head->lock);
 632
 633         /* Check TIME-WAIT sockets first. */
 634         for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL;
 635             skp = &sk2->next) {
 636                 tw = (struct tcp_tw_bucket*)sk2;
 637
 638                 if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
 639                         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 640
 641                         /* With PAWS, it is safe from the viewpoint
 642                            of data integrity. Even without PAWS it
 643                            is safe provided sequence spaces do not
 644                            overlap i.e. at data rates <= 80Mbit/sec.
 645
 646                            Actually, the idea is close to VJ's one,
 647                            only timestamp cache is held not per host,
 648                            but per port pair and TW bucket is used
 649                            as state holder.
 650
 651                            If TW bucket has been already destroyed we
 652                            fall back to VJ's scheme and use initial
 653                            timestamp retrieved from peer table.
 654                          */
 655                         if (tw->ts_recent_stamp &&
 656                             (!twp || (sysctl_tcp_tw_reuse &&
 657                                       xtime.tv_sec - tw->ts_recent_stamp > 1))) {
 658                                 if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
 659                                         tp->write_seq = 1;
 660                                 tp->ts_recent = tw->ts_recent;
 661                                 tp->ts_recent_stamp = tw->ts_recent_stamp;
 662                                 sock_hold(sk2);
 663                                 skp = &head->chain;
 664                                 goto unique;
 665                         } else
 666                                 goto not_unique;
 667                 }
 668         }
 669         tw = NULL;
 670
 671         /* And established part... */
 672         for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
 673                 if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
 674                         goto not_unique;
 675         }
 676
 677 unique:
 678         /* Must record num and sport now. Otherwise we will see
 679          * in hash table socket with a funny identity. */
 680         sk->num = lport;
 681         sk->sport = htons(lport);
 682         BUG_TRAP(sk->pprev==NULL);
 683         if ((sk->next = *skp) != NULL)
 684                 (*skp)->pprev = &sk->next;
 685
 686         *skp = sk;
 687         sk->pprev = skp;
 688         sk->hashent = hash;
 689         sock_prot_inc_use(sk->prot);
 690         write_unlock(&head->lock);
 691
 692         if (twp) {
 693                 *twp = tw;
 694                 NET_INC_STATS_BH(TimeWaitRecycled);
 695         } else if (tw) {
 696                 /* Silly. Should hash-dance instead... */
 697                 tcp_tw_deschedule(tw);
 698                 tcp_timewait_kill(tw);
 699                 NET_INC_STATS_BH(TimeWaitRecycled);
 700
 701                 tcp_tw_put(tw);
 702         }
 703
 704         return 0;
 705
 706 not_unique:
 707         write_unlock(&head->lock);
 708         return -EADDRNOTAVAIL;
 709 #else
 710   return 0;
 711 #endif
 712 }
 713
 714 /*
 715  * Bind a port for a connect operation and hash it.
 716  */
 717 static int tcp_v4_hash_connect(struct sock *sk)
 718 {
 719 #if 0
 720         unsigned short snum = sk->num;
 721         struct tcp_bind_hashbucket *head;
 722         struct tcp_bind_bucket *tb;
 723
 724         if (snum == 0) {
 725                 int rover;
 726                 int low = sysctl_local_port_range[0];
 727                 int high = sysctl_local_port_range[1];
 728                 int remaining = (high - low) + 1;
 729                 struct tcp_tw_bucket *tw = NULL;
 730
 731                 local_bh_disable();
 732
 733                 /* TODO. Actually it is not so bad idea to remove
 734                  * tcp_portalloc_lock before next submission to Linus.
 735                  * As soon as we touch this place at all it is time to think.
 736                  *
 737                  * Now it protects single _advisory_ variable tcp_port_rover,
 738                  * hence it is mostly useless.
 739                  * Code will work nicely if we just delete it, but
 740                  * I am afraid in contented case it will work not better or
 741                  * even worse: another cpu just will hit the same bucket
 742                  * and spin there.
 743                  * So some cpu salt could remove both contention and
 744                  * memory pingpong. Any ideas how to do this in a nice way?
 745                  */
 746                 spin_lock(&tcp_portalloc_lock);
 747                 rover = tcp_port_rover;
 748
 749                 do {
 750                         rover++;
 751                         if ((rover < low) || (rover > high))
 752                                 rover = low;
 753                         head = &tcp_bhash[tcp_bhashfn(rover)];
 754                         spin_lock(&head->lock);
 755
 756                         /* Does not bother with rcv_saddr checks,
 757                          * because the established check is already
 758                          * unique enough.
 759                          */
 760                         for (tb = head->chain; tb; tb = tb->next) {
 761                                 if (tb->port == rover) {
 762                                         BUG_TRAP(tb->owners != NULL);
 763                                         if (tb->fastreuse >= 0)
 764                                                 goto next_port;
 765                                         if (!__tcp_v4_check_established(sk, rover, &tw))
 766                                                 goto ok;
 767                                         goto next_port;
 768                                 }
 769                         }
 770
 771                         tb = tcp_bucket_create(head, rover);
 772                         if (!tb) {
 773                                 spin_unlock(&head->lock);
 774                                 break;
 775                         }
 776                         tb->fastreuse = -1;
 777                         goto ok;
 778
 779                 next_port:
 780                         spin_unlock(&head->lock);
 781                 } while (--remaining > 0);
 782                 tcp_port_rover = rover;
 783                 spin_unlock(&tcp_portalloc_lock);
 784
 785                 local_bh_enable();
 786
 787                 return -EADDRNOTAVAIL;
 788
 789         ok:
 790                 /* All locks still held and bhs disabled */
 791                 tcp_port_rover = rover;
 792                 spin_unlock(&tcp_portalloc_lock);
 793
 794                 tcp_bind_hash(sk, tb, rover);
 795                 if (!sk->pprev) {
 796                         sk->sport = htons(rover);
 797                         __tcp_v4_hash(sk, 0);
 798                 }
 799                 spin_unlock(&head->lock);
 800
 801                 if (tw) {
 802                         tcp_tw_deschedule(tw);
 803                         tcp_timewait_kill(tw);
 804                         tcp_tw_put(tw);
 805                 }
 806
 807                 local_bh_enable();
 808                 return 0;
 809         }
 810
 811         head  = &tcp_bhash[tcp_bhashfn(snum)];
 812         tb  = (struct tcp_bind_bucket *)sk->prev;
 813         spin_lock_bh(&head->lock);
 814         if (tb->owners == sk && sk->bind_next == NULL) {
 815                 __tcp_v4_hash(sk, 0);
 816                 spin_unlock_bh(&head->lock);
 817                 return 0;
 818         } else {
 819                 int ret;
 820                 spin_unlock(&head->lock);
 821                 /* No definite answer... Walk to established hash table */
 822                 ret = __tcp_v4_check_established(sk, snum, NULL);
 823                 local_bh_enable();
 824                 return ret;
 825         }
 826 #else
 827   return 0;
 828 #endif
 829 }
 830
 831 /* This will initiate an outgoing connection. */
 832 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 833 {
 834 #if 0
 835         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 836         struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
 837         struct rtable *rt;
 838         u32 daddr, nexthop;
 839         int tmp;
 840         int err;
 841
 842         if (addr_len < sizeof(struct sockaddr_in))
 843                 return(-EINVAL);
 844
 845         if (usin->sin_family != AF_INET)
 846                 return(-EAFNOSUPPORT);
 847
 848         nexthop = daddr = usin->sin_addr.s_addr;
 849         if (sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) {
 850                 if (daddr == 0)
 851                         return -EINVAL;
 852                 nexthop = sk->protinfo.af_inet.opt->faddr;
 853         }
 854
 855         tmp = ip_route_connect(&rt, nexthop, sk->saddr,
 856                                RT_CONN_FLAGS(sk), sk->bound_dev_if);
 857         if (tmp < 0)
 858                 return tmp;
 859
 860         if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {
 861                 ip_rt_put(rt);
 862                 return -ENETUNREACH;
 863         }
 864
 865         __sk_dst_set(sk, &rt->u.dst);
 866         sk->route_caps = rt->u.dst.dev->features;
 867
 868         if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr)
 869                 daddr = rt->rt_dst;
 870
 871         if (!sk->saddr)
 872                 sk->saddr = rt->rt_src;
 873         sk->rcv_saddr = sk->saddr;
 874
 875         if (tp->ts_recent_stamp && sk->daddr != daddr) {
 876                 /* Reset inherited state */
 877                 tp->ts_recent = 0;
 878                 tp->ts_recent_stamp = 0;
 879                 tp->write_seq = 0;
 880         }
 881
 882         if (sysctl_tcp_tw_recycle &&
 883             !tp->ts_recent_stamp &&
 884             rt->rt_dst == daddr) {
 885                 struct inet_peer *peer = rt_get_peer(rt);
 886
 887                 /* VJ's idea. We save last timestamp seen from
 888                  * the destination in peer table, when entering state TIME-WAIT
 889                  * and initialize ts_recent from it, when trying new connection.
 890                  */
 891
 892                 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
 893                         tp->ts_recent_stamp = peer->tcp_ts_stamp;
 894                         tp->ts_recent = peer->tcp_ts;
 895                 }
 896         }
 897
 898         sk->dport = usin->sin_port;
 899         sk->daddr = daddr;
 900
 901         tp->ext_header_len = 0;
 902         if (sk->protinfo.af_inet.opt)
 903                 tp->ext_header_len = sk->protinfo.af_inet.opt->optlen;
 904
 905         tp->mss_clamp = 536;
 906
 907         /* Socket identity is still unknown (sport may be zero).
 908          * However we set state to SYN-SENT and not releasing socket
 909          * lock select source port, enter ourselves into the hash tables and
 910          * complete initalization after this.
 911          */
 912         tcp_set_state(sk, TCP_SYN_SENT);
 913         err = tcp_v4_hash_connect(sk);
 914         if (err)
 915                 goto failure;
 916
 917         if (!tp->write_seq)
 918                 tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
 919                                                            sk->sport, usin->sin_port);
 920
 921         sk->protinfo.af_inet.id = tp->write_seq^jiffies;
 922
 923         err = tcp_connect(sk);
 924         if (err)
 925                 goto failure;
 926
 927         return 0;
 928
 929 failure:
 930         tcp_set_state(sk, TCP_CLOSE);
 931         __sk_dst_reset(sk);
 932         sk->route_caps = 0;
 933         sk->dport = 0;
 934         return err;
 935 #else
 936   return 0;
 937 #endif
 938 }
 939
 940 static __inline__ int tcp_v4_iif(struct sk_buff *skb)
 941 {
 942 #if 0
 943         return ((struct rtable*)skb->dst)->rt_iif;
 944 #else
 945   return 0;
 946 #endif
 947 }
 948
 949 static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport)
 950 {
 951 #if 0
 952         unsigned h = raddr ^ rport;
 953         h ^= h>>16;
 954         h ^= h>>8;
 955         return h&(TCP_SYNQ_HSIZE-1);
 956 #else
 957   return 0;
 958 #endif
 959 }
 960
 961 static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
 962                                               struct open_request ***prevp,
 963                                               __u16 rport,
 964                                               __u32 raddr, __u32 laddr)
 965 {
 966 #if 0
 967         struct tcp_listen_opt *lopt = tp->listen_opt;
 968         struct open_request *req, **prev;
 969
 970         for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)];
 971              (req = *prev) != NULL;
 972              prev = &req->dl_next) {
 973                 if (req->rmt_port == rport &&
 974                     req->af.v4_req.rmt_addr == raddr &&
 975                     req->af.v4_req.loc_addr == laddr &&
 976                     TCP_INET_FAMILY(req->class->family)) {
 977                         BUG_TRAP(req->sk == NULL);
 978                         *prevp = prev;
 979                         return req;
 980                 }
 981         }
 982
 983         return NULL;
 984 #else
 985   return NULL;
 986 #endif
 987 }
 988
 989 static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
 990 {
 991 #if 0
 992         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 993         struct tcp_listen_opt *lopt = tp->listen_opt;
 994         unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port);
 995
 996         req->expires = jiffies + TCP_TIMEOUT_INIT;
 997         req->retrans = 0;
 998         req->sk = NULL;
 999         req->dl_next = lopt->syn_table[h];
1000
1001         write_lock(&tp->syn_wait_lock);
1002         lopt->syn_table[h] = req;
1003         write_unlock(&tp->syn_wait_lock);
1004
1005         tcp_synq_added(sk);
1006 #endif
1007 }
1008
1009
1010 /*
1011  * This routine does path mtu discovery as defined in RFC1191.
1012  */
1013 static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu)
1014 {
1015 #if 0
1016         struct dst_entry *dst;
1017         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1018
1019         /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
1020          * send out by Linux are always <576bytes so they should go through
1021          * unfragmented).
1022          */
1023         if (sk->state == TCP_LISTEN)
1024                 return;
1025
1026         /* We don't check in the destentry if pmtu discovery is forbidden
1027          * on this route. We just assume that no packet_to_big packets
1028          * are send back when pmtu discovery is not active.
1029          * There is a small race when the user changes this flag in the
1030          * route, but I think that's acceptable.
1031          */
1032         if ((dst = __sk_dst_check(sk, 0)) == NULL)
1033                 return;
1034
1035         ip_rt_update_pmtu(dst, mtu);
1036
1037         /* Something is about to be wrong... Remember soft error
1038          * for the case, if this connection will not able to recover.
1039          */
1040         if (mtu < dst->pmtu && ip_dont_fragment(sk, dst))
1041                 sk->err_soft = EMSGSIZE;
1042
1043         if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT &&
1044             tp->pmtu_cookie > dst->pmtu) {
1045                 tcp_sync_mss(sk, dst->pmtu);
1046
1047                 /* Resend the TCP packet because it's
1048                  * clear that the old packet has been
1049                  * dropped. This is the new "fast" path mtu
1050                  * discovery.
1051                  */
1052                 tcp_simple_retransmit(sk);
1053         } /* else let the usual retransmit timer handle it */
1054 #endif
1055 }
1056
1057 /*
1058  * This routine is called by the ICMP module when it gets some
1059  * sort of error condition.  If err < 0 then the socket should
1060  * be closed and the error returned to the user.  If err > 0
1061  * it's just the icmp type << 8 | icmp code.  After adjustment
1062  * header points to the first 8 bytes of the tcp header.  We need
1063  * to find the appropriate port.
1064  *
1065  * The locking strategy used here is very "optimistic". When
1066  * someone else accesses the socket the ICMP is just dropped
1067  * and for some paths there is no check at all.
1068  * A more general error queue to queue errors for later handling
1069  * is probably better.
1070  *
1071  */
1072
1073 void tcp_v4_err(struct sk_buff *skb, u32 info)
1074 {
1075 #if 0
1076         struct iphdr *iph = (struct iphdr*)skb->data;
1077         struct tcphdr *th = (struct tcphdr*)(skb->data+(iph->ihl<<2));
1078         struct tcp_opt *tp;
1079         int type = skb->h.icmph->type;
1080         int code = skb->h.icmph->code;
1081         struct sock *sk;
1082         __u32 seq;
1083         int err;
1084
1085         if (skb->len < (iph->ihl << 2) + 8) {
1086                 ICMP_INC_STATS_BH(IcmpInErrors);
1087                 return;
1088         }
1089
1090         sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, tcp_v4_iif(skb));
1091         if (sk == NULL) {
1092                 ICMP_INC_STATS_BH(IcmpInErrors);
1093                 return;
1094         }
1095         if (sk->state == TCP_TIME_WAIT) {
1096                 tcp_tw_put((struct tcp_tw_bucket*)sk);
1097                 return;
1098         }
1099
1100         bh_lock_sock(sk);
1101         /* If too many ICMPs get dropped on busy
1102          * servers this needs to be solved differently.
1103          */
1104         if (sk->lock.users != 0)
1105                 NET_INC_STATS_BH(LockDroppedIcmps);
1106
1107         if (sk->state == TCP_CLOSE)
1108                 goto out;
1109
1110         tp = &sk->tp_pinfo.af_tcp;
1111         seq = ntohl(th->seq);
1112         if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
1113                 NET_INC_STATS(OutOfWindowIcmps);
1114                 goto out;
1115         }
1116
1117         switch (type) {
1118         case ICMP_SOURCE_QUENCH:
1119                 /* This is deprecated, but if someone generated it,
1120                  * we have no reasons to ignore it.
1121                  */
1122                 if (sk->lock.users == 0)
1123                         tcp_enter_cwr(tp);
1124                 goto out;
1125         case ICMP_PARAMETERPROB:
1126                 err = EPROTO;
1127                 break;
1128         case ICMP_DEST_UNREACH:
1129                 if (code > NR_ICMP_UNREACH)
1130                         goto out;
1131
1132                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
1133                         if (sk->lock.users == 0)
1134                                 do_pmtu_discovery(sk, iph, info);
1135                         goto out;
1136                 }
1137
1138                 err = icmp_err_convert[code].errno;
1139                 break;
1140         case ICMP_TIME_EXCEEDED:
1141                 err = EHOSTUNREACH;
1142                 break;
1143         default:
1144                 goto out;
1145         }
1146
1147         switch (sk->state) {
1148                 struct open_request *req, **prev;
1149         case TCP_LISTEN:
1150                 if (sk->lock.users != 0)
1151                         goto out;
1152
1153                 req = tcp_v4_search_req(tp, &prev,
1154                                         th->dest,
1155                                         iph->daddr, iph->saddr);
1156                 if (!req)
1157                         goto out;
1158
1159                 /* ICMPs are not backlogged, hence we cannot get
1160                    an established socket here.
1161                  */
1162                 BUG_TRAP(req->sk == NULL);
1163
1164                 if (seq != req->snt_isn) {
1165                         NET_INC_STATS_BH(OutOfWindowIcmps);
1166                         goto out;
1167                 }
1168
1169                 /*
1170                  * Still in SYN_RECV, just remove it silently.
1171                  * There is no good way to pass the error to the newly
1172                  * created socket, and POSIX does not want network
1173                  * errors returned from accept().
1174                  */
1175                 tcp_synq_drop(sk, req, prev);
1176                 goto out;
1177
1178         case TCP_SYN_SENT:
1179         case TCP_SYN_RECV:  /* Cannot happen.
1180                                It can f.e. if SYNs crossed.
1181                              */
1182                 if (sk->lock.users == 0) {
1183                         TCP_INC_STATS_BH(TcpAttemptFails);
1184                         sk->err = err;
1185
1186                         sk->error_report(sk);
1187
1188                         tcp_done(sk);
1189                 } else {
1190                         sk->err_soft = err;
1191                 }
1192                 goto out;
1193         }
1194
1195         /* If we've already connected we will keep trying
1196          * until we time out, or the user gives up.
1197          *
1198          * rfc1122 4.2.3.9 allows to consider as hard errors
1199          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
1200          * but it is obsoleted by pmtu discovery).
1201          *
1202          * Note, that in modern internet, where routing is unreliable
1203          * and in each dark corner broken firewalls sit, sending random
1204          * errors ordered by their masters even this two messages finally lose
1205          * their original sense (even Linux sends invalid PORT_UNREACHs)
1206          *
1207          * Now we are in compliance with RFCs.
1208          *                                                      --ANK (980905)
1209          */
1210
1211         if (sk->lock.users == 0 && sk->protinfo.af_inet.recverr) {
1212                 sk->err = err;
1213                 sk->error_report(sk);
1214         } else  { /* Only an error on timeout */
1215                 sk->err_soft = err;
1216         }
1217
1218 out:
1219         bh_unlock_sock(sk);
1220         sock_put(sk);
1221 #endif
1222 }
1223
1224 /* This routine computes an IPv4 TCP checksum. */
1225 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1226                        struct sk_buff *skb)
1227 {
1228 #if 0
1229         if (skb->ip_summed == CHECKSUM_HW) {
1230                 th->check = ~tcp_v4_check(th, len, sk->saddr, sk->daddr, 0);
1231                 skb->csum = offsetof(struct tcphdr, check);
1232         } else {
1233                 th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
1234                                          csum_partial((char *)th, th->doff<<2, skb->csum));
1235         }
1236 #endif
1237 }
1238
1239 /*
1240  *      This routine will send an RST to the other tcp.
1241  *
1242  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
1243  *                    for reset.
1244  *      Answer: if a packet caused RST, it is not for a socket
1245  *              existing in our system, if it is matched to a socket,
1246  *              it is just duplicate segment or bug in other side's TCP.
1247  *              So that we build reply only basing on parameters
1248  *              arrived with segment.
1249  *      Exception: precedence violation. We do not implement it in any case.
1250  */
1251
1252 static void tcp_v4_send_reset(struct sk_buff *skb)
1253 {
1254 #if 0
1255         struct tcphdr *th = skb->h.th;
1256         struct tcphdr rth;
1257         struct ip_reply_arg arg;
1258
1259         /* Never send a reset in response to a reset. */
1260         if (th->rst)
1261                 return;
1262
1263         if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL)
1264                 return;
1265
1266         /* Swap the send and the receive. */
1267         memset(&rth, 0, sizeof(struct tcphdr));
1268         rth.dest = th->source;
1269         rth.source = th->dest;
1270         rth.doff = sizeof(struct tcphdr)/4;
1271         rth.rst = 1;
1272
1273         if (th->ack) {
1274                 rth.seq = th->ack_seq;
1275         } else {
1276                 rth.ack = 1;
1277                 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1278                                     + skb->len - (th->doff<<2));
1279         }
1280
1281         memset(&arg, 0, sizeof arg);
1282         arg.iov[0].iov_base = (unsigned char *)&rth;
1283         arg.iov[0].iov_len  = sizeof rth;
1284         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1285                                       skb->nh.iph->saddr, /*XXX*/
1286                                       sizeof(struct tcphdr),
1287                                       IPPROTO_TCP,
1288                                       0);
1289         arg.n_iov = 1;
1290         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1291
1292         tcp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
1293         ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1294
1295         TCP_INC_STATS_BH(TcpOutSegs);
1296         TCP_INC_STATS_BH(TcpOutRsts);
1297 #endif
1298 }
1299
1300 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
1301    outside socket context is ugly, certainly. What can I do?
1302  */
1303
1304 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1305 {
1306 #if 0
1307         struct tcphdr *th = skb->h.th;
1308         struct {
1309                 struct tcphdr th;
1310                 u32 tsopt[3];
1311         } rep;
1312         struct ip_reply_arg arg;
1313
1314         memset(&rep.th, 0, sizeof(struct tcphdr));
1315         memset(&arg, 0, sizeof arg);
1316
1317         arg.iov[0].iov_base = (unsigned char *)&rep;
1318         arg.iov[0].iov_len  = sizeof(rep.th);
1319         arg.n_iov = 1;
1320         if (ts) {
1321                 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) |
1322                                      (TCPOPT_NOP << 16) |
1323                                      (TCPOPT_TIMESTAMP << 8) |
1324                                      TCPOLEN_TIMESTAMP);
1325                 rep.tsopt[1] = htonl(tcp_time_stamp);
1326                 rep.tsopt[2] = htonl(ts);
1327                 arg.iov[0].iov_len = sizeof(rep);
1328         }
1329
1330         /* Swap the send and the receive. */
1331         rep.th.dest = th->source;
1332         rep.th.source = th->dest;
1333         rep.th.doff = arg.iov[0].iov_len/4;
1334         rep.th.seq = htonl(seq);
1335         rep.th.ack_seq = htonl(ack);
1336         rep.th.ack = 1;
1337         rep.th.window = htons(win);
1338
1339         arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1340                                       skb->nh.iph->saddr, /*XXX*/
1341                                       arg.iov[0].iov_len,
1342                                       IPPROTO_TCP,
1343                                       0);
1344         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1345
1346         ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1347
1348         TCP_INC_STATS_BH(TcpOutSegs);
1349 #endif
1350 }
1351
1352 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1353 {
1354 #if 0
1355         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1356
1357         tcp_v4_send_ack(skb, tw->snd_nxt, tw->rcv_nxt,
1358                         tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent);
1359
1360         tcp_tw_put(tw);
1361 #endif
1362 }
1363
1364 static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
1365 {
1366 #if 0
1367         tcp_v4_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd,
1368                         req->ts_recent);
1369 #endif
1370 }
1371
1372 static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req)
1373 {
1374 #if 0
1375         struct rtable *rt;
1376         struct ip_options *opt;
1377
1378         opt = req->af.v4_req.opt;
1379         if(ip_route_output(&rt, ((opt && opt->srr) ?
1380                                  opt->faddr :
1381                                  req->af.v4_req.rmt_addr),
1382                            req->af.v4_req.loc_addr,
1383                            RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
1384                 IP_INC_STATS_BH(IpOutNoRoutes);
1385                 return NULL;
1386         }
1387         if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1388                 ip_rt_put(rt);
1389                 IP_INC_STATS_BH(IpOutNoRoutes);
1390                 return NULL;
1391         }
1392         return &rt->u.dst;
1393 #else
1394   return NULL;
1395 #endif
1396 }
1397
1398 /*
1399  *      Send a SYN-ACK after having received an ACK.
1400  *      This still operates on a open_request only, not on a big
1401  *      socket.
1402  */
1403 static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1404                               struct dst_entry *dst)
1405 {
1406 #if 0
1407         int err = -1;
1408         struct sk_buff * skb;
1409
1410         /* First, grab a route. */
1411         if (dst == NULL &&
1412             (dst = tcp_v4_route_req(sk, req)) == NULL)
1413                 goto out;
1414
1415         skb = tcp_make_synack(sk, dst, req);
1416
1417         if (skb) {
1418                 struct tcphdr *th = skb->h.th;
1419
1420                 th->check = tcp_v4_check(th, skb->len,
1421                                          req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr,
1422                                          csum_partial((char *)th, skb->len, skb->csum));
1423
1424                 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1425                                             req->af.v4_req.rmt_addr, req->af.v4_req.opt);
1426                 if (err == NET_XMIT_CN)
1427                         err = 0;
1428         }
1429
1430 out:
1431         dst_release(dst);
1432         return err;
1433 #else
1434   return 0;
1435 #endif
1436 }
1437
1438 /*
1439  *      IPv4 open_request destructor.
1440  */
1441 static void tcp_v4_or_free(struct open_request *req)
1442 {
1443 #if 0
1444         if (req->af.v4_req.opt)
1445                 kfree(req->af.v4_req.opt);
1446 #endif
1447 }
1448
1449 static inline void syn_flood_warning(struct sk_buff *skb)
1450 {
1451 #if 0
1452         static unsigned long warntime;
1453
1454         if (jiffies - warntime > HZ*60) {
1455                 warntime = jiffies;
1456                 printk(KERN_INFO
1457                        "possible SYN flooding on port %d. Sending cookies.\n",
1458                        ntohs(skb->h.th->dest));
1459         }
1460 #endif
1461 }
1462
1463 /*
1464  * Save and compile IPv4 options into the open_request if needed.
1465  */
1466 static inline struct ip_options *
1467 tcp_v4_save_options(struct sock *sk, struct sk_buff *skb)
1468 {
1469 #if 0
1470         struct ip_options *opt = &(IPCB(skb)->opt);
1471         struct ip_options *dopt = NULL;
1472
1473         if (opt && opt->optlen) {
1474                 int opt_size = optlength(opt);
1475                 dopt = kmalloc(opt_size, GFP_ATOMIC);
1476                 if (dopt) {
1477                         if (ip_options_echo(dopt, skb)) {
1478                                 kfree(dopt);
1479                                 dopt = NULL;
1480                         }
1481                 }
1482         }
1483         return dopt;
1484 #else
1485   return NULL;
1486 #endif
1487 }
1488
1489 /*
1490  * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1491  * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1492  * It would be better to replace it with a global counter for all sockets
1493  * but then some measure against one socket starving all other sockets
1494  * would be needed.
1495  *
1496  * It was 128 by default. Experiments with real servers show, that
1497  * it is absolutely not enough even at 100conn/sec. 256 cures most
1498  * of problems. This value is adjusted to 128 for very small machines
1499  * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1500  * Further increasing requires to change hash table size.
1501  */
1502 int sysctl_max_syn_backlog = 256;
1503
1504 #if 0
1505 struct or_calltable or_ipv4 = {
1506         PF_INET,
1507         tcp_v4_send_synack,
1508         tcp_v4_or_send_ack,
1509         tcp_v4_or_free,
1510         tcp_v4_send_reset
1511 };
1512 #endif
1513
1514 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1515 {
1516 #if 0
1517         struct tcp_opt tp;
1518         struct open_request *req;
1519         __u32 saddr = skb->nh.iph->saddr;
1520         __u32 daddr = skb->nh.iph->daddr;
1521         __u32 isn = TCP_SKB_CB(skb)->when;
1522         struct dst_entry *dst = NULL;
1523 #ifdef CONFIG_SYN_COOKIES
1524         int want_cookie = 0;
1525 #else
1526 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1527 #endif
1528
1529         /* Never answer to SYNs send to broadcast or multicast */
1530         if (((struct rtable *)skb->dst)->rt_flags &
1531             (RTCF_BROADCAST|RTCF_MULTICAST))
1532                 goto drop;
1533
1534         /* TW buckets are converted to open requests without
1535          * limitations, they conserve resources and peer is
1536          * evidently real one.
1537          */
1538         if (tcp_synq_is_full(sk) && !isn) {
1539 #ifdef CONFIG_SYN_COOKIES
1540                 if (sysctl_tcp_syncookies) {
1541                         want_cookie = 1;
1542                 } else
1543 #endif
1544                 goto drop;
1545         }
1546
1547         /* Accept backlog is full. If we have already queued enough
1548          * of warm entries in syn queue, drop request. It is better than
1549          * clogging syn queue with openreqs with exponentially increasing
1550          * timeout.
1551          */
1552         if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1553                 goto drop;
1554
1555         req = tcp_openreq_alloc();
1556         if (req == NULL)
1557                 goto drop;
1558
1559         tcp_clear_options(&tp);
1560         tp.mss_clamp = 536;
1561         tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
1562
1563         tcp_parse_options(skb, &tp, 0);
1564
1565         if (want_cookie) {
1566                 tcp_clear_options(&tp);
1567                 tp.saw_tstamp = 0;
1568         }
1569
1570         if (tp.saw_tstamp && tp.rcv_tsval == 0) {
1571                 /* Some OSes (unknown ones, but I see them on web server, which
1572                  * contains information interesting only for windows'
1573                  * users) do not send their stamp in SYN. It is easy case.
1574                  * We simply do not advertise TS support.
1575                  */
1576                 tp.saw_tstamp = 0;
1577                 tp.tstamp_ok = 0;
1578         }
1579         tp.tstamp_ok = tp.saw_tstamp;
1580
1581         tcp_openreq_init(req, &tp, skb);
1582
1583         req->af.v4_req.loc_addr = daddr;
1584         req->af.v4_req.rmt_addr = saddr;
1585         req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1586         req->class = &or_ipv4;
1587         if (!want_cookie)
1588                 TCP_ECN_create_request(req, skb->h.th);
1589
1590         if (want_cookie) {
1591 #ifdef CONFIG_SYN_COOKIES
1592                 syn_flood_warning(skb);
1593 #endif
1594                 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1595         } else if (isn == 0) {
1596                 struct inet_peer *peer = NULL;
1597
1598                 /* VJ's idea. We save last timestamp seen
1599                  * from the destination in peer table, when entering
1600                  * state TIME-WAIT, and check against it before
1601                  * accepting new connection request.
1602                  *
1603                  * If "isn" is not zero, this request hit alive
1604                  * timewait bucket, so that all the necessary checks
1605                  * are made in the function processing timewait state.
1606                  */
1607                 if (tp.saw_tstamp &&
1608                     sysctl_tcp_tw_recycle &&
1609                     (dst = tcp_v4_route_req(sk, req)) != NULL &&
1610                     (peer = rt_get_peer((struct rtable*)dst)) != NULL &&
1611                     peer->v4daddr == saddr) {
1612                         if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1613                             (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) {
1614                                 NET_INC_STATS_BH(PAWSPassiveRejected);
1615                                 dst_release(dst);
1616                                 goto drop_and_free;
1617                         }
1618                 }
1619                 /* Kill the following clause, if you dislike this way. */
1620                 else if (!sysctl_tcp_syncookies &&
1621                          (sysctl_max_syn_backlog - tcp_synq_len(sk)
1622                           < (sysctl_max_syn_backlog>>2)) &&
1623                          (!peer || !peer->tcp_ts_stamp) &&
1624                          (!dst || !dst->rtt)) {
1625                         /* Without syncookies last quarter of
1626                          * backlog is filled with destinations, proven to be alive.
1627                          * It means that we continue to communicate
1628                          * to destinations, already remembered
1629                          * to the moment of synflood.
1630                          */
1631                         NETDEBUG(if (net_ratelimit()) \
1632                                 printk(KERN_DEBUG "TCP: drop open request from %u.%u.%u.%u/%u\n", \
1633                                         NIPQUAD(saddr), ntohs(skb->h.th->source)));
1634                         dst_release(dst);
1635                         goto drop_and_free;
1636                 }
1637
1638                 isn = tcp_v4_init_sequence(sk, skb);
1639         }
1640         req->snt_isn = isn;
1641
1642         if (tcp_v4_send_synack(sk, req, dst))
1643                 goto drop_and_free;
1644
1645         if (want_cookie) {
1646                 tcp_openreq_free(req);
1647         } else {
1648                 tcp_v4_synq_add(sk, req);
1649         }
1650         return 0;
1651
1652 drop_and_free:
1653         tcp_openreq_free(req);
1654 drop:
1655         TCP_INC_STATS_BH(TcpAttemptFails);
1656         return 0;
1657 #else
1658   return 0;
1659 #endif
1660 }
1661
1662
1663 /*
1664  * The three way handshake has completed - we got a valid synack -
1665  * now create the new socket.
1666  */
1667 struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1668                                    struct open_request *req,
1669                                    struct dst_entry *dst)
1670 {
1671 #if 0
1672         struct tcp_opt *newtp;
1673         struct sock *newsk;
1674
1675         if (tcp_acceptq_is_full(sk))
1676                 goto exit_overflow;
1677
1678         if (dst == NULL &&
1679             (dst = tcp_v4_route_req(sk, req)) == NULL)
1680                 goto exit;
1681
1682         newsk = tcp_create_openreq_child(sk, req, skb);
1683         if (!newsk)
1684                 goto exit;
1685
1686         newsk->dst_cache = dst;
1687         newsk->route_caps = dst->dev->features;
1688
1689         newtp = &(newsk->tp_pinfo.af_tcp);
1690         newsk->daddr = req->af.v4_req.rmt_addr;
1691         newsk->saddr = req->af.v4_req.loc_addr;
1692         newsk->rcv_saddr = req->af.v4_req.loc_addr;
1693         newsk->protinfo.af_inet.opt = req->af.v4_req.opt;
1694         req->af.v4_req.opt = NULL;
1695         newsk->protinfo.af_inet.mc_index = tcp_v4_iif(skb);
1696         newsk->protinfo.af_inet.mc_ttl = skb->nh.iph->ttl;
1697         newtp->ext_header_len = 0;
1698         if (newsk->protinfo.af_inet.opt)
1699                 newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen;
1700         newsk->protinfo.af_inet.id = newtp->write_seq^jiffies;
1701
1702         tcp_sync_mss(newsk, dst->pmtu);
1703         newtp->advmss = dst->advmss;
1704         tcp_initialize_rcv_mss(newsk);
1705
1706         __tcp_v4_hash(newsk, 0);
1707         __tcp_inherit_port(sk, newsk);
1708
1709         return newsk;
1710
1711 exit_overflow:
1712         NET_INC_STATS_BH(ListenOverflows);
1713 exit:
1714         NET_INC_STATS_BH(ListenDrops);
1715         dst_release(dst);
1716         return NULL;
1717 #else
1718   return NULL;
1719 #endif
1720 }
1721
1722 static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
1723 {
1724 #if 0
1725         struct open_request *req, **prev;
1726         struct tcphdr *th = skb->h.th;
1727         struct iphdr *iph = skb->nh.iph;
1728         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1729         struct sock *nsk;
1730
1731         /* Find possible connection requests. */
1732         req = tcp_v4_search_req(tp, &prev,
1733                                 th->source,
1734                                 iph->saddr, iph->daddr);
1735         if (req)
1736                 return tcp_check_req(sk, skb, req, prev);
1737
1738         nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1739                                           th->source,
1740                                           skb->nh.iph->daddr,
1741                                           ntohs(th->dest),
1742                                           tcp_v4_iif(skb));
1743
1744         if (nsk) {
1745                 if (nsk->state != TCP_TIME_WAIT) {
1746                         bh_lock_sock(nsk);
1747                         return nsk;
1748                 }
1749                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1750                 return NULL;
1751         }
1752
1753 #ifdef CONFIG_SYN_COOKIES
1754         if (!th->rst && !th->syn && th->ack)
1755                 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1756 #endif
1757         return sk;
1758 #else
1759   return NULL;
1760 #endif
1761 }
1762
1763 static int tcp_v4_checksum_init(struct sk_buff *skb)
1764 {
1765 #if 0
1766         if (skb->ip_summed == CHECKSUM_HW) {
1767                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1768                 if (!tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
1769                                   skb->nh.iph->daddr,skb->csum))
1770                         return 0;
1771
1772                 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1773                 skb->ip_summed = CHECKSUM_NONE;
1774         }
1775         if (skb->len <= 76) {
1776                 if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
1777                                  skb->nh.iph->daddr,
1778                                  skb_checksum(skb, 0, skb->len, 0)))
1779                         return -1;
1780                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1781         } else {
1782                 skb->csum = ~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
1783                                           skb->nh.iph->daddr,0);
1784         }
1785         return 0;
1786 #else
1787   return 0;
1788 #endif
1789 }
1790
1791
1792 /* The socket must have it's spinlock held when we get
1793  * here.
1794  *
1795  * We have a potential double-lock case here, so even when
1796  * doing backlog processing we use the BH locking scheme.
1797  * This is because we cannot sleep with the original spinlock
1798  * held.
1799  */
1800 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1801 {
1802 #if 0
1803 #ifdef CONFIG_FILTER
1804         struct sk_filter *filter = sk->filter;
1805         if (filter && sk_filter(skb, filter))
1806                 goto discard;
1807 #endif /* CONFIG_FILTER */
1808
1809         IP_INC_STATS_BH(IpInDelivers);
1810
1811         if (sk->state == TCP_ESTABLISHED) { /* Fast path */
1812                 TCP_CHECK_TIMER(sk);
1813                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1814                         goto reset;
1815                 TCP_CHECK_TIMER(sk);
1816                 return 0;
1817         }
1818
1819         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1820                 goto csum_err;
1821
1822         if (sk->state == TCP_LISTEN) {
1823                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1824                 if (!nsk)
1825                         goto discard;
1826
1827                 if (nsk != sk) {
1828                         if (tcp_child_process(sk, nsk, skb))
1829                                 goto reset;
1830                         return 0;
1831                 }
1832         }
1833
1834         TCP_CHECK_TIMER(sk);
1835         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1836                 goto reset;
1837         TCP_CHECK_TIMER(sk);
1838         return 0;
1839
1840 reset:
1841         tcp_v4_send_reset(skb);
1842 discard:
1843         kfree_skb(skb);
1844         /* Be careful here. If this function gets more complicated and
1845          * gcc suffers from register pressure on the x86, sk (in %ebx)
1846          * might be destroyed here. This current version compiles correctly,
1847          * but you have been warned.
1848          */
1849         return 0;
1850
1851 csum_err:
1852         TCP_INC_STATS_BH(TcpInErrs);
1853         goto discard;
1854 #else
1855   return 0;
1856 #endif
1857 }
1858
1859 /*
1860  *      From tcp_input.c
1861  */
1862
1863 int tcp_v4_rcv(struct sk_buff *skb)
1864 {
1865 #if 0
1866         struct tcphdr *th;
1867         struct sock *sk;
1868         int ret;
1869
1870         if (skb->pkt_type!=PACKET_HOST)
1871                 goto discard_it;
1872
1873         /* Count it even if it's bad */
1874         TCP_INC_STATS_BH(TcpInSegs);
1875
1876         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1877                 goto discard_it;
1878
1879         th = skb->h.th;
1880
1881         if (th->doff < sizeof(struct tcphdr)/4)
1882                 goto bad_packet;
1883         if (!pskb_may_pull(skb, th->doff*4))
1884                 goto discard_it;
1885
1886         /* An explanation is required here, I think.
1887          * Packet length and doff are validated by header prediction,
1888          * provided case of th->doff==0 is elimineted.
1889          * So, we defer the checks. */
1890         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1891              tcp_v4_checksum_init(skb) < 0))
1892                 goto bad_packet;
1893
1894         th = skb->h.th;
1895         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1896         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1897                                     skb->len - th->doff*4);
1898         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1899         TCP_SKB_CB(skb)->when = 0;
1900         TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1901         TCP_SKB_CB(skb)->sacked = 0;
1902
1903         sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1904                              skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb));
1905
1906         if (!sk)
1907                 goto no_tcp_socket;
1908
1909 process:
1910         if(!ipsec_sk_policy(sk,skb))
1911                 goto discard_and_relse;
1912
1913         if (sk->state == TCP_TIME_WAIT)
1914                 goto do_time_wait;
1915
1916         skb->dev = NULL;
1917
1918         bh_lock_sock(sk);
1919         ret = 0;
1920         if (!sk->lock.users) {
1921                 if (!tcp_prequeue(sk, skb))
1922                         ret = tcp_v4_do_rcv(sk, skb);
1923         } else
1924                 sk_add_backlog(sk, skb);
1925         bh_unlock_sock(sk);
1926
1927         sock_put(sk);
1928
1929         return ret;
1930
1931 no_tcp_socket:
1932         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1933 bad_packet:
1934                 TCP_INC_STATS_BH(TcpInErrs);
1935         } else {
1936                 tcp_v4_send_reset(skb);
1937         }
1938
1939 discard_it:
1940         /* Discard frame. */
1941         kfree_skb(skb);
1942         return 0;
1943
1944 discard_and_relse:
1945         sock_put(sk);
1946         goto discard_it;
1947
1948 do_time_wait:
1949         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1950                 TCP_INC_STATS_BH(TcpInErrs);
1951                 goto discard_and_relse;
1952         }
1953         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1954                                           skb, th, skb->len)) {
1955         case TCP_TW_SYN:
1956         {
1957                 struct sock *sk2;
1958
1959                 sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb));
1960                 if (sk2 != NULL) {
1961                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1962                         tcp_timewait_kill((struct tcp_tw_bucket *)sk);
1963                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1964                         sk = sk2;
1965                         goto process;
1966                 }
1967                 /* Fall through to ACK */
1968         }
1969         case TCP_TW_ACK:
1970                 tcp_v4_timewait_ack(sk, skb);
1971                 break;
1972         case TCP_TW_RST:
1973                 goto no_tcp_socket;
1974         case TCP_TW_SUCCESS:;
1975         }
1976         goto discard_it;
1977 #endif
1978 }
1979
1980 /* With per-bucket locks this operation is not-atomic, so that
1981  * this version is not worse.
1982  */
1983 static void __tcp_v4_rehash(struct sock *sk)
1984 {
1985 #if 0
1986         sk->prot->unhash(sk);
1987         sk->prot->hash(sk);
1988 #endif
1989 }
1990
1991 static int tcp_v4_reselect_saddr(struct sock *sk)
1992 {
1993 #if 0
1994         int err;
1995         struct rtable *rt;
1996         __u32 old_saddr = sk->saddr;
1997         __u32 new_saddr;
1998         __u32 daddr = sk->daddr;
1999
2000         if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
2001                 daddr = sk->protinfo.af_inet.opt->faddr;
2002
2003         /* Query new route. */
2004         err = ip_route_connect(&rt, daddr, 0,
2005                                RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute,
2006                                sk->bound_dev_if);
2007         if (err)
2008                 return err;
2009
2010         __sk_dst_set(sk, &rt->u.dst);
2011         sk->route_caps = rt->u.dst.dev->features;
2012
2013         new_saddr = rt->rt_src;
2014
2015         if (new_saddr == old_saddr)
2016                 return 0;
2017
2018         if (sysctl_ip_dynaddr > 1) {
2019                 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting sk->saddr "
2020                        "from %d.%d.%d.%d to %d.%d.%d.%d\n",
2021                        NIPQUAD(old_saddr),
2022                        NIPQUAD(new_saddr));
2023         }
2024
2025         sk->saddr = new_saddr;
2026         sk->rcv_saddr = new_saddr;
2027
2028         /* XXX The only one ugly spot where we need to
2029          * XXX really change the sockets identity after
2030          * XXX it has entered the hashes. -DaveM
2031          *
2032          * Besides that, it does not check for connection
2033          * uniqueness. Wait for troubles.
2034          */
2035         __tcp_v4_rehash(sk);
2036         return 0;
2037 #else
2038   return 0;
2039 #endif
2040 }
2041
2042 int tcp_v4_rebuild_header(struct sock *sk)
2043 {
2044 #if 0
2045         struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
2046         u32 daddr;
2047         int err;
2048
2049         /* Route is OK, nothing to do. */
2050         if (rt != NULL)
2051                 return 0;
2052
2053         /* Reroute. */
2054         daddr = sk->daddr;
2055         if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
2056                 daddr = sk->protinfo.af_inet.opt->faddr;
2057
2058         err = ip_route_output(&rt, daddr, sk->saddr,
2059                               RT_CONN_FLAGS(sk), sk->bound_dev_if);
2060         if (!err) {
2061                 __sk_dst_set(sk, &rt->u.dst);
2062                 sk->route_caps = rt->u.dst.dev->features;
2063                 return 0;
2064         }
2065
2066         /* Routing failed... */
2067         sk->route_caps = 0;
2068
2069         if (!sysctl_ip_dynaddr ||
2070             sk->state != TCP_SYN_SENT ||
2071             (sk->userlocks & SOCK_BINDADDR_LOCK) ||
2072             (err = tcp_v4_reselect_saddr(sk)) != 0)
2073                 sk->err_soft=-err;
2074
2075         return err;
2076 #else
2077   return 0;
2078 #endif
2079 }
2080
2081 static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
2082 {
2083 #if 0
2084         struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
2085
2086         sin->sin_family         = AF_INET;
2087         sin->sin_addr.s_addr    = sk->daddr;
2088         sin->sin_port           = sk->dport;
2089 #endif
2090 }
2091
2092 /* VJ's idea. Save last timestamp seen from this destination
2093  * and hold it at least for normal timewait interval to use for duplicate
2094  * segment detection in subsequent connections, before they enter synchronized
2095  * state.
2096  */
2097
2098 int tcp_v4_remember_stamp(struct sock *sk)
2099 {
2100 #if 0
2101         struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2102         struct rtable *rt = (struct rtable*)__sk_dst_get(sk);
2103         struct inet_peer *peer = NULL;
2104         int release_it = 0;
2105
2106         if (rt == NULL || rt->rt_dst != sk->daddr) {
2107                 peer = inet_getpeer(sk->daddr, 1);
2108                 release_it = 1;
2109         } else {
2110                 if (rt->peer == NULL)
2111                         rt_bind_peer(rt, 1);
2112                 peer = rt->peer;
2113         }
2114
2115         if (peer) {
2116                 if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
2117                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2118                      peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
2119                         peer->tcp_ts_stamp = tp->ts_recent_stamp;
2120                         peer->tcp_ts = tp->ts_recent;
2121                 }
2122                 if (release_it)
2123                         inet_putpeer(peer);
2124                 return 1;
2125         }
2126
2127         return 0;
2128 #else
2129   return 0;
2130 #endif
2131 }
2132
2133 int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2134 {
2135 #if 0
2136         struct inet_peer *peer = NULL;
2137
2138         peer = inet_getpeer(tw->daddr, 1);
2139
2140         if (peer) {
2141                 if ((s32)(peer->tcp_ts - tw->ts_recent) <= 0 ||
2142                     (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2143                      peer->tcp_ts_stamp <= tw->ts_recent_stamp)) {
2144                         peer->tcp_ts_stamp = tw->ts_recent_stamp;
2145                         peer->tcp_ts = tw->ts_recent;
2146                 }
2147                 inet_putpeer(peer);
2148                 return 1;
2149         }
2150
2151         return 0;
2152 #else
2153   return 0;
2154 #endif
2155 }
2156
2157 #if 0
2158 struct tcp_func ipv4_specific = {
2159         ip_queue_xmit,
2160         tcp_v4_send_check,
2161         tcp_v4_rebuild_header,
2162         tcp_v4_conn_request,
2163         tcp_v4_syn_recv_sock,
2164         tcp_v4_remember_stamp,
2165         sizeof(struct iphdr),
2166
2167         ip_setsockopt,
2168         ip_getsockopt,
2169         v4_addr2sockaddr,
2170         sizeof(struct sockaddr_in)
2171 };
2172 #endif
2173
2174 /* NOTE: A lot of things set to zero explicitly by call to
2175  *       sk_alloc() so need not be done here.
2176  */
2177 static int tcp_v4_init_sock(struct sock *sk)
2178 {
2179 #if 0
2180         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2181
2182         skb_queue_head_init(&tp->out_of_order_queue);
2183         tcp_init_xmit_timers(sk);
2184         tcp_prequeue_init(tp);
2185
2186         tp->rto  = TCP_TIMEOUT_INIT;
2187         tp->mdev = TCP_TIMEOUT_INIT;
2188
2189         /* So many TCP implementations out there (incorrectly) count the
2190          * initial SYN frame in their delayed-ACK and congestion control
2191          * algorithms that we must have the following bandaid to talk
2192          * efficiently to them.  -DaveM
2193          */
2194         tp->snd_cwnd = 2;
2195
2196         /* See draft-stevens-tcpca-spec-01 for discussion of the
2197          * initialization of these values.
2198          */
2199         tp->snd_ssthresh = 0x7fffffff;  /* Infinity */
2200         tp->snd_cwnd_clamp = ~0;
2201         tp->mss_cache = 536;
2202
2203         tp->reordering = sysctl_tcp_reordering;
2204
2205         sk->state = TCP_CLOSE;
2206
2207         sk->write_space = tcp_write_space;
2208         sk->use_write_queue = 1;
2209
2210         sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
2211
2212         sk->sndbuf = sysctl_tcp_wmem[1];
2213         sk->rcvbuf = sysctl_tcp_rmem[1];
2214
2215         atomic_inc(&tcp_sockets_allocated);
2216
2217         return 0;
2218 #else
2219   return 0;
2220 #endif
2221 }
2222
2223 static int tcp_v4_destroy_sock(struct sock *sk)
2224 {
2225 #if 0
2226         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2227
2228         tcp_clear_xmit_timers(sk);
2229
2230         /* Cleanup up the write buffer. */
2231         tcp_writequeue_purge(sk);
2232
2233         /* Cleans up our, hopefully empty, out_of_order_queue. */
2234         __skb_queue_purge(&tp->out_of_order_queue);
2235
2236         /* Clean prequeue, it must be empty really */
2237         __skb_queue_purge(&tp->ucopy.prequeue);
2238
2239         /* Clean up a referenced TCP bind bucket. */
2240         if(sk->prev != NULL)
2241                 tcp_put_port(sk);
2242
2243         /* If sendmsg cached page exists, toss it. */
2244         if (tp->sndmsg_page != NULL)
2245                 __free_page(tp->sndmsg_page);
2246
2247         atomic_dec(&tcp_sockets_allocated);
2248
2249         return 0;
2250 #else
2251   return 0;
2252 #endif
2253 }
2254
2255 /* Proc filesystem TCP sock list dumping. */
2256 static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
2257 {
2258 #if 0
2259         int ttd = req->expires - jiffies;
2260
2261         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2262                 " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p",
2263                 i,
2264                 req->af.v4_req.loc_addr,
2265                 ntohs(sk->sport),
2266                 req->af.v4_req.rmt_addr,
2267                 ntohs(req->rmt_port),
2268                 TCP_SYN_RECV,
2269                 0,0, /* could print option size, but that is af dependent. */
2270                 1,   /* timers active (only the expire timer) */
2271                 ttd,
2272                 req->retrans,
2273                 uid,
2274                 0,  /* non standard timer */
2275                 0, /* open_requests have no inode */
2276                 atomic_read(&sk->refcnt),
2277                 req
2278                 );
2279 #endif
2280 }
2281
2282 static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
2283 {
2284 #if 0
2285         unsigned int dest, src;
2286         __u16 destp, srcp;
2287         int timer_active;
2288         unsigned long timer_expires;
2289         struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
2290
2291         dest  = sp->daddr;
2292         src   = sp->rcv_saddr;
2293         destp = ntohs(sp->dport);
2294         srcp  = ntohs(sp->sport);
2295         if (tp->pending == TCP_TIME_RETRANS) {
2296                 timer_active    = 1;
2297                 timer_expires   = tp->timeout;
2298         } else if (tp->pending == TCP_TIME_PROBE0) {
2299                 timer_active    = 4;
2300                 timer_expires   = tp->timeout;
2301         } else if (timer_pending(&sp->timer)) {
2302                 timer_active    = 2;
2303                 timer_expires   = sp->timer.expires;
2304         } else {
2305                 timer_active    = 0;
2306                 timer_expires = jiffies;
2307         }
2308
2309         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2310                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
2311                 i, src, srcp, dest, destp, sp->state,
2312                 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2313                 timer_active, timer_expires-jiffies,
2314                 tp->retransmits,
2315                 sock_i_uid(sp),
2316                 tp->probes_out,
2317                 sock_i_ino(sp),
2318                 atomic_read(&sp->refcnt), sp,
2319                 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2320                 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2321                 );
2322 #endif
2323 }
2324
2325 static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2326 {
2327 #if 0
2328         unsigned int dest, src;
2329         __u16 destp, srcp;
2330         int ttd = tw->ttd - jiffies;
2331
2332         if (ttd < 0)
2333                 ttd = 0;
2334
2335         dest  = tw->daddr;
2336         src   = tw->rcv_saddr;
2337         destp = ntohs(tw->dport);
2338         srcp  = ntohs(tw->sport);
2339
2340         sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2341                 " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
2342                 i, src, srcp, dest, destp, tw->substate, 0, 0,
2343                 3, ttd, 0, 0, 0, 0,
2344                 atomic_read(&tw->refcnt), tw);
2345 #endif
2346 }
2347
2348 #define TMPSZ 150
2349
2350 int tcp_get_info(char *buffer, char **start, off_t offset, int length)
2351 {
2352 #if 0
2353         int len = 0, num = 0, i;
2354         off_t begin, pos = 0;
2355         char tmpbuf[TMPSZ+1];
2356
2357         if (offset < TMPSZ)
2358                 len += sprintf(buffer, "%-*s\n", TMPSZ-1,
2359                                "  sl  local_address rem_address   st tx_queue "
2360                                "rx_queue tr tm->when retrnsmt   uid  timeout inode");
2361
2362         pos = TMPSZ;
2363
2364         /* First, walk listening socket table. */
2365         tcp_listen_lock();
2366         for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
2367                 struct sock *sk;
2368                 struct tcp_listen_opt *lopt;
2369                 int k;
2370
2371                 for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
2372                         struct open_request *req;
2373                         int uid;
2374                         struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2375
2376                         if (!TCP_INET_FAMILY(sk->family))
2377                                 goto skip_listen;
2378
2379                         pos += TMPSZ;
2380                         if (pos >= offset) {
2381                                 get_tcp_sock(sk, tmpbuf, num);
2382                                 len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
2383                                 if (pos >= offset + length) {
2384                                         tcp_listen_unlock();
2385                                         goto out_no_bh;
2386                                 }
2387                         }
2388
2389 skip_listen:
2390                         uid = sock_i_uid(sk);
2391                         read_lock_bh(&tp->syn_wait_lock);
2392                         lopt = tp->listen_opt;
2393                         if (lopt && lopt->qlen != 0) {
2394                                 for (k=0; k<TCP_SYNQ_HSIZE; k++) {
2395                                         for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) {
2396                                                 if (!TCP_INET_FAMILY(req->class->family))
2397                                                         continue;
2398
2399                                                 pos += TMPSZ;
2400                                                 if (pos <= offset)
2401                                                         continue;
2402                                                 get_openreq(sk, req, tmpbuf, num, uid);
2403                                                 len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
2404                                                 if (pos >= offset + length) {
2405                                                         read_unlock_bh(&tp->syn_wait_lock);
2406                                                         tcp_listen_unlock();
2407                                                         goto out_no_bh;
2408                                                 }
2409                                         }
2410                                 }
2411                         }
2412                         read_unlock_bh(&tp->syn_wait_lock);
2413
2414                         /* Completed requests are in normal socket hash table */
2415                 }
2416         }
2417         tcp_listen_unlock();
2418
2419         local_bh_disable();
2420
2421         /* Next, walk established hash chain. */
2422         for (i = 0; i < tcp_ehash_size; i++) {
2423                 struct tcp_ehash_bucket *head = &tcp_ehash[i];
2424                 struct sock *sk;
2425                 struct tcp_tw_bucket *tw;
2426
2427                 read_lock(&head->lock);
2428                 for(sk = head->chain; sk; sk = sk->next, num++) {
2429                         if (!TCP_INET_FAMILY(sk->family))
2430                                 continue;
2431                         pos += TMPSZ;
2432                         if (pos <= offset)
2433                                 continue;
2434                         get_tcp_sock(sk, tmpbuf, num);
2435                         len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
2436                         if (pos >= offset + length) {
2437                                 read_unlock(&head->lock);
2438                                 goto out;
2439                         }
2440                 }
2441                 for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain;
2442                      tw != NULL;
2443                      tw = (struct tcp_tw_bucket *)tw->next, num++) {
2444                         if (!TCP_INET_FAMILY(tw->family))
2445                                 continue;
2446                         pos += TMPSZ;
2447                         if (pos <= offset)
2448                                 continue;
2449                         get_timewait_sock(tw, tmpbuf, num);
2450                         len += sprintf(buffer+len, "%-*s\n", TMPSZ-1, tmpbuf);
2451                         if (pos >= offset + length) {
2452                                 read_unlock(&head->lock);
2453                                 goto out;
2454                         }
2455                 }
2456                 read_unlock(&head->lock);
2457         }
2458
2459 out:
2460         local_bh_enable();
2461 out_no_bh:
2462
2463         begin = len - (pos - offset);
2464         *start = buffer + begin;
2465         len -= begin;
2466         if (len > length)
2467                 len = length;
2468         if (len < 0)
2469                 len = 0;
2470         return len;
2471 #endif
2472 }
2473
2474 struct proto tcp_prot = {
2475         name:           "TCP",
2476         close:          tcp_close,
2477         connect:        tcp_v4_connect,
2478         disconnect:     tcp_disconnect,
2479         accept:         tcp_accept,
2480         ioctl:          tcp_ioctl,
2481         init:           tcp_v4_init_sock,
2482         destroy:        tcp_v4_destroy_sock,
2483         shutdown:       tcp_shutdown,
2484         setsockopt:     tcp_setsockopt,
2485         getsockopt:     tcp_getsockopt,
2486         sendmsg:        tcp_sendmsg,
2487         recvmsg:        tcp_recvmsg,
2488         backlog_rcv:    tcp_v4_do_rcv,
2489         hash:           tcp_v4_hash,
2490         unhash:         tcp_unhash,
2491         get_port:       tcp_v4_get_port,
2492 };
2493
2494
2495
2496 void tcp_v4_init(struct net_proto_family *ops)
2497 {
2498 #if 0
2499         int err;
2500
2501         tcp_inode.i_mode = S_IFSOCK;
2502         tcp_inode.i_sock = 1;
2503         tcp_inode.i_uid = 0;
2504         tcp_inode.i_gid = 0;
2505         init_waitqueue_head(&tcp_inode.i_wait);
2506         init_waitqueue_head(&tcp_inode.u.socket_i.wait);
2507
2508         tcp_socket->inode = &tcp_inode;
2509         tcp_socket->state = SS_UNCONNECTED;
2510         tcp_socket->type=SOCK_RAW;
2511
2512         if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
2513                 panic("Failed to create the TCP control socket.\n");
2514         tcp_socket->sk->allocation=GFP_ATOMIC;
2515         tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
2516
2517         /* Unhash it so that IP input processing does not even
2518          * see it, we do not wish this socket to see incoming
2519          * packets.
2520          */
2521         tcp_socket->sk->prot->unhash(tcp_socket->sk);
2522 #endif
2523 }